Add HTTP headers customization parameters. Fix for #3

2023-04-16 10:33:29 +02:00 · 2023-04-16 10:33:29 +02:00 · acf8b71e5b
commit acf8b71e5b
parent 50fe9b7476
2 changed files with 74 additions and 25 deletions
--- a/README.md
+++ b/README.md
@ -9,6 +9,8 @@ Customizable script for exporting notes from BookStack through API
 - customizable path for placing exported notes
 - configure replacing any characters in filenames with "_" for any filesystem compatibility
 - authorization token is loaded from txt file
+- Set custom HTTP User-Agent header to bypass filtering based on that header (like in CloudFlare tunnels)
+- Set arbitrary custom headers through parameter

 Requirements:
 - Python at least in version 3.6
@ -26,26 +28,38 @@ python exporter.py \
    --force-update-files \
    -t ./token.txt \
    -V debug \
-    -p ./ 
+    -p ./ \
+    --user-agent "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/112.0"
+    --additional-headers "Header1: value1" "Header2: value2"  
 ```

 Customization:
 ```text
 options:
+  -h, --help            show this help message and exit
  -p PATH, --path PATH  Path where exported files will be placed.
  -t TOKEN_FILE, --token-file TOKEN_FILE
                        File containing authorization token in format TOKEN_ID:TOKEN_SECRET
  -H HOST, --host HOST  Your domain with protocol prefix, example: https://example.com
  -f {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...], 
-                       --formats {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...]
+                --formats {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...]
                        Space separated list of formats to use for export.
  -c FORBIDDEN_CHARS [FORBIDDEN_CHARS ...], --forbidden-chars FORBIDDEN_CHARS [FORBIDDEN_CHARS ...]
                        Space separated list of symbols to be replaced with "_" in filenames.
-  -l {pages,chapters,books} [{pages,chapters,books} ...], --level {pages,chapters,books} [{pages,chapters,books} ...]
+  -u USER_AGENT, --user-agent USER_AGENT
+                        User agent header content. In situations where requests are blocked  
+                        because of bad client/unrecognized web browser/etc (like with CloudFlare tunnels),  
+                        change to some typical web browser user agent header.
+  --additional-headers ADDITIONAL_HEADERS [ADDITIONAL_HEADERS ...]
+                        List of arbitrary additional HTTP headers to be sent with every HTTP request.  
+                        They can override default ones, including Authorization header.  
+                        Example: -u "Header1: value1" "Header2": value2
+  -l {pages,chapters,books} [{pages,chapters,books} ...], 
+                --level {pages,chapters,books} [{pages,chapters,books} ...]
                        Space separated list of levels at which should be export performed.
-  --force-update-files  Set this option to skip checking local files timestamps against remote last edit
-                        timestamps.This will cause overwriting local files, even if they seem to be already in
-                        newest version.
+  --force-update-files  Set this option to skip checking local files timestamps against  
+                        remote last edit timestamps. This will cause overwriting local files,  
+                        even if they seem to be already in newest version.
  -V {debug,info,warning,error}, --log-level {debug,info,warning,error}
                        Set verbosity level.
 ```
--- a/exporter.py
+++ b/exporter.py
@ -6,12 +6,12 @@ from datetime import datetime
 from logging import info, error, debug
 from pathlib import Path
 import sys
-from typing import Union
+from typing import Dict, List, Union
 from urllib.request import urlopen, Request
 import urllib.parse

 # (formatName, fileExtension)
-FORMATS: dict['str', 'str'] = {
+FORMATS: Dict['str', 'str'] = {
    'markdown': 'md',
    'plaintext': 'txt',
    'pdf': 'pdf',
@ -20,7 +20,7 @@ FORMATS: dict['str', 'str'] = {

 LEVELS = ['pages', 'chapters', 'books']

-LOG_LEVEL: dict = {
+LOG_LEVEL: Dict = {
    'debug': logging.DEBUG,
    'info': logging.INFO,
    'warning': logging.WARNING,
@ -28,7 +28,7 @@ LOG_LEVEL: dict = {
 }

 # Characters in filenames to be replaced with "_"
-FORBIDDEN_CHARS: list[str] = ["/", "#"]
+FORBIDDEN_CHARS: List[str] = ["/", "#"]

 parser = argparse.ArgumentParser(description='BookStack exporter')
 parser.add_argument('-p',
@ -62,6 +62,23 @@ parser.add_argument('-c',
                    nargs="+",
                    help='Space separated list of symbols to be replaced '
                    'with "_" in filenames.')
+parser.add_argument('-u',
+                    '--user-agent',
+                    type=str,
+                    default="BookStack exporter",
+                    help='User agent header content. In situations'
+                    ' where requests are blocked because of bad client/'
+                    'unrecognized web browser/etc (like with CloudFlare'
+                    ' tunnels), change to some typical '
+                    'web browser user agent header.')
+parser.add_argument('--additional-headers',
+                    type=str,
+                    nargs="+",
+                    default=[],
+                    help='List of arbitrary additional HTTP headers to be '
+                    'sent with every HTTP request. They can override default'
+                    ' ones, including Authorization header. '
+                    'Example: -u "Header1: value1" "Header2": value2')
 parser.add_argument(
    '-l',
    '--level',
@ -87,10 +104,18 @@ parser.add_argument('-V',

 args = parser.parse_args()

+
+def removesuffix(text, suffix):
+    """Remove suffix from text if matched."""
+    if text.endswith(suffix):
+        return text[:len(text) - len(suffix)]
+    return text
+
+
 logging.basicConfig(format='%(levelname)s :: %(message)s',
                    level=LOG_LEVEL.get(args.log_level))

-formats: list[str] = args.formats
+formats: List[str] = args.formats
 FORBIDDEN_CHARS = args.forbidden_chars

 for frmt in formats:
@ -99,21 +124,28 @@ for frmt in formats:
              "check api docs for current version of your BookStack")
        sys.exit(1)

-API_PREFIX: str = f"{args.host.removesuffix(os.path.sep)}/api"
-FS_PATH: str = args.path.removesuffix(os.path.sep)
-LEVEL_CHOICE: list[str] = args.level
+API_PREFIX: str = f"{removesuffix(args.host, os.path.sep)}/api"
+FS_PATH: str = removesuffix(args.path, os.path.sep)
+LEVEL_CHOICE: List[str] = args.level
 for lvl in LEVEL_CHOICE:
    if lvl not in LEVELS:
        error(f"Level {lvl} is not supported, can be only one of {LEVELS}")
        sys.exit(1)

 with open(args.token_file, 'r', encoding='utf-8') as f:
-    TOKEN: str = f.readline().removesuffix('\n')
+    TOKEN: str = removesuffix(f.readline(), '\n')

 HEADERS = {
    'Content-Type': 'application/json; charset=utf-8',
-    'Authorization': f"Token {TOKEN}"
+    'Authorization': f"Token {TOKEN}",
+    'User-Agent': args.user_agent
 }
+for header in args.additional_headers:
+    values = header.split(':', 1)
+    if len(values) < 2:
+        raise ValueError(f"Improper HTTP header specification: {header}")
+    HEADERS[values[0]] = values[1]
+
 SKIP_TIMESTAMPS: bool = args.force_update_files


@ -125,7 +157,7 @@ class Node:
        for char in FORBIDDEN_CHARS:
            name = name.replace(char, "_")
        self.__name: str = name
-        self.__children: list['Node'] = []
+        self.__children: List['Node'] = []

        self.__parent: Union['Node', None] = parent
        if parent is not None:
@ -136,15 +168,18 @@ class Node:

    @property
    def name(self) -> str:
+        """Return name of this Shelf/Book/Chapter/Page."""
        return self.__name

    @property
    def parent(self) -> Union['Node', None]:
+        """Return parent Node or None if there isn't any."""
        return self.__parent

    def changed_since(self, timestamp: datetime) -> int:
        """
-        Check if remote version have changed after given timestamp, including its children
+        Check if remote version have changed after given timestamp,
+        including its children
        :param timestamp:
        :return: amount of changed documents at level of this document Node
        """
@ -175,11 +210,11 @@ class Node:
        return self.__node_id


-shelves: dict[int, Node] = {}
-books: dict[int, Node] = {}
-chapters: dict[int, Node] = {}
-pages: dict[int, Node] = {}
-pages_not_in_chapter: dict[int, Node] = {}
+shelves: Dict[int, Node] = {}
+books: Dict[int, Node] = {}
+chapters: Dict[int, Node] = {}
+pages: Dict[int, Node] = {}
+pages_not_in_chapter: Dict[int, Node] = {}


 def api_timestamp_string_to_datetime(timestamp: str) -> datetime:
@ -271,7 +306,7 @@ def check_if_update_needed(file_path: str, document: Node) -> bool:
    return False


-def export(documents: list[Node], level: str):
+def export(documents: List[Node], level: str):
    """Save Node to file."""
    for document in documents:
        make_dir(f"{FS_PATH}{os.path.sep}{document.get_path()}")
@ -370,7 +405,7 @@ for page_data in api_get_listing('pages'):
          f"last edit: {page.get_last_edit_timestamp()}")
    pages[page.get_id()] = page

-files: list[Node] = []
+files: List[Node] = []
 EXPORT_PAGES_NOT_IN_CHAPTER: bool = False

 for lvl in LEVEL_CHOICE: