Add HTTP headers customization parameters. Fix for #3

This commit is contained in:
Maciej Lebiest 2023-04-16 10:33:29 +02:00
parent 50fe9b7476
commit acf8b71e5b
2 changed files with 74 additions and 25 deletions

View file

@ -9,6 +9,8 @@ Customizable script for exporting notes from BookStack through API
- customizable path for placing exported notes - customizable path for placing exported notes
- configure replacing any characters in filenames with "_" for any filesystem compatibility - configure replacing any characters in filenames with "_" for any filesystem compatibility
- authorization token is loaded from txt file - authorization token is loaded from txt file
- Set custom HTTP User-Agent header to bypass filtering based on that header (like in CloudFlare tunnels)
- Set arbitrary custom headers through parameter
Requirements: Requirements:
- Python at least in version 3.6 - Python at least in version 3.6
@ -26,12 +28,15 @@ python exporter.py \
--force-update-files \ --force-update-files \
-t ./token.txt \ -t ./token.txt \
-V debug \ -V debug \
-p ./ -p ./ \
--user-agent "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/112.0"
--additional-headers "Header1: value1" "Header2: value2"
``` ```
Customization: Customization:
```text ```text
options: options:
-h, --help show this help message and exit
-p PATH, --path PATH Path where exported files will be placed. -p PATH, --path PATH Path where exported files will be placed.
-t TOKEN_FILE, --token-file TOKEN_FILE -t TOKEN_FILE, --token-file TOKEN_FILE
File containing authorization token in format TOKEN_ID:TOKEN_SECRET File containing authorization token in format TOKEN_ID:TOKEN_SECRET
@ -41,11 +46,20 @@ options:
Space separated list of formats to use for export. Space separated list of formats to use for export.
-c FORBIDDEN_CHARS [FORBIDDEN_CHARS ...], --forbidden-chars FORBIDDEN_CHARS [FORBIDDEN_CHARS ...] -c FORBIDDEN_CHARS [FORBIDDEN_CHARS ...], --forbidden-chars FORBIDDEN_CHARS [FORBIDDEN_CHARS ...]
Space separated list of symbols to be replaced with "_" in filenames. Space separated list of symbols to be replaced with "_" in filenames.
-l {pages,chapters,books} [{pages,chapters,books} ...], --level {pages,chapters,books} [{pages,chapters,books} ...] -u USER_AGENT, --user-agent USER_AGENT
User agent header content. In situations where requests are blocked
because of bad client/unrecognized web browser/etc (like with CloudFlare tunnels),
change to some typical web browser user agent header.
--additional-headers ADDITIONAL_HEADERS [ADDITIONAL_HEADERS ...]
List of arbitrary additional HTTP headers to be sent with every HTTP request.
They can override default ones, including Authorization header.
Example: -u "Header1: value1" "Header2": value2
-l {pages,chapters,books} [{pages,chapters,books} ...],
--level {pages,chapters,books} [{pages,chapters,books} ...]
Space separated list of levels at which should be export performed. Space separated list of levels at which should be export performed.
--force-update-files Set this option to skip checking local files timestamps against remote last edit --force-update-files Set this option to skip checking local files timestamps against
timestamps.This will cause overwriting local files, even if they seem to be already in remote last edit timestamps. This will cause overwriting local files,
newest version. even if they seem to be already in newest version.
-V {debug,info,warning,error}, --log-level {debug,info,warning,error} -V {debug,info,warning,error}, --log-level {debug,info,warning,error}
Set verbosity level. Set verbosity level.
``` ```

View file

@ -6,12 +6,12 @@ from datetime import datetime
from logging import info, error, debug from logging import info, error, debug
from pathlib import Path from pathlib import Path
import sys import sys
from typing import Union from typing import Dict, List, Union
from urllib.request import urlopen, Request from urllib.request import urlopen, Request
import urllib.parse import urllib.parse
# (formatName, fileExtension) # (formatName, fileExtension)
FORMATS: dict['str', 'str'] = { FORMATS: Dict['str', 'str'] = {
'markdown': 'md', 'markdown': 'md',
'plaintext': 'txt', 'plaintext': 'txt',
'pdf': 'pdf', 'pdf': 'pdf',
@ -20,7 +20,7 @@ FORMATS: dict['str', 'str'] = {
LEVELS = ['pages', 'chapters', 'books'] LEVELS = ['pages', 'chapters', 'books']
LOG_LEVEL: dict = { LOG_LEVEL: Dict = {
'debug': logging.DEBUG, 'debug': logging.DEBUG,
'info': logging.INFO, 'info': logging.INFO,
'warning': logging.WARNING, 'warning': logging.WARNING,
@ -28,7 +28,7 @@ LOG_LEVEL: dict = {
} }
# Characters in filenames to be replaced with "_" # Characters in filenames to be replaced with "_"
FORBIDDEN_CHARS: list[str] = ["/", "#"] FORBIDDEN_CHARS: List[str] = ["/", "#"]
parser = argparse.ArgumentParser(description='BookStack exporter') parser = argparse.ArgumentParser(description='BookStack exporter')
parser.add_argument('-p', parser.add_argument('-p',
@ -62,6 +62,23 @@ parser.add_argument('-c',
nargs="+", nargs="+",
help='Space separated list of symbols to be replaced ' help='Space separated list of symbols to be replaced '
'with "_" in filenames.') 'with "_" in filenames.')
parser.add_argument('-u',
'--user-agent',
type=str,
default="BookStack exporter",
help='User agent header content. In situations'
' where requests are blocked because of bad client/'
'unrecognized web browser/etc (like with CloudFlare'
' tunnels), change to some typical '
'web browser user agent header.')
parser.add_argument('--additional-headers',
type=str,
nargs="+",
default=[],
help='List of arbitrary additional HTTP headers to be '
'sent with every HTTP request. They can override default'
' ones, including Authorization header. '
'Example: -u "Header1: value1" "Header2": value2')
parser.add_argument( parser.add_argument(
'-l', '-l',
'--level', '--level',
@ -87,10 +104,18 @@ parser.add_argument('-V',
args = parser.parse_args() args = parser.parse_args()
def removesuffix(text, suffix):
"""Remove suffix from text if matched."""
if text.endswith(suffix):
return text[:len(text) - len(suffix)]
return text
logging.basicConfig(format='%(levelname)s :: %(message)s', logging.basicConfig(format='%(levelname)s :: %(message)s',
level=LOG_LEVEL.get(args.log_level)) level=LOG_LEVEL.get(args.log_level))
formats: list[str] = args.formats formats: List[str] = args.formats
FORBIDDEN_CHARS = args.forbidden_chars FORBIDDEN_CHARS = args.forbidden_chars
for frmt in formats: for frmt in formats:
@ -99,21 +124,28 @@ for frmt in formats:
"check api docs for current version of your BookStack") "check api docs for current version of your BookStack")
sys.exit(1) sys.exit(1)
API_PREFIX: str = f"{args.host.removesuffix(os.path.sep)}/api" API_PREFIX: str = f"{removesuffix(args.host, os.path.sep)}/api"
FS_PATH: str = args.path.removesuffix(os.path.sep) FS_PATH: str = removesuffix(args.path, os.path.sep)
LEVEL_CHOICE: list[str] = args.level LEVEL_CHOICE: List[str] = args.level
for lvl in LEVEL_CHOICE: for lvl in LEVEL_CHOICE:
if lvl not in LEVELS: if lvl not in LEVELS:
error(f"Level {lvl} is not supported, can be only one of {LEVELS}") error(f"Level {lvl} is not supported, can be only one of {LEVELS}")
sys.exit(1) sys.exit(1)
with open(args.token_file, 'r', encoding='utf-8') as f: with open(args.token_file, 'r', encoding='utf-8') as f:
TOKEN: str = f.readline().removesuffix('\n') TOKEN: str = removesuffix(f.readline(), '\n')
HEADERS = { HEADERS = {
'Content-Type': 'application/json; charset=utf-8', 'Content-Type': 'application/json; charset=utf-8',
'Authorization': f"Token {TOKEN}" 'Authorization': f"Token {TOKEN}",
'User-Agent': args.user_agent
} }
for header in args.additional_headers:
values = header.split(':', 1)
if len(values) < 2:
raise ValueError(f"Improper HTTP header specification: {header}")
HEADERS[values[0]] = values[1]
SKIP_TIMESTAMPS: bool = args.force_update_files SKIP_TIMESTAMPS: bool = args.force_update_files
@ -125,7 +157,7 @@ class Node:
for char in FORBIDDEN_CHARS: for char in FORBIDDEN_CHARS:
name = name.replace(char, "_") name = name.replace(char, "_")
self.__name: str = name self.__name: str = name
self.__children: list['Node'] = [] self.__children: List['Node'] = []
self.__parent: Union['Node', None] = parent self.__parent: Union['Node', None] = parent
if parent is not None: if parent is not None:
@ -136,15 +168,18 @@ class Node:
@property @property
def name(self) -> str: def name(self) -> str:
"""Return name of this Shelf/Book/Chapter/Page."""
return self.__name return self.__name
@property @property
def parent(self) -> Union['Node', None]: def parent(self) -> Union['Node', None]:
"""Return parent Node or None if there isn't any."""
return self.__parent return self.__parent
def changed_since(self, timestamp: datetime) -> int: def changed_since(self, timestamp: datetime) -> int:
""" """
Check if remote version have changed after given timestamp, including its children Check if remote version have changed after given timestamp,
including its children
:param timestamp: :param timestamp:
:return: amount of changed documents at level of this document Node :return: amount of changed documents at level of this document Node
""" """
@ -175,11 +210,11 @@ class Node:
return self.__node_id return self.__node_id
shelves: dict[int, Node] = {} shelves: Dict[int, Node] = {}
books: dict[int, Node] = {} books: Dict[int, Node] = {}
chapters: dict[int, Node] = {} chapters: Dict[int, Node] = {}
pages: dict[int, Node] = {} pages: Dict[int, Node] = {}
pages_not_in_chapter: dict[int, Node] = {} pages_not_in_chapter: Dict[int, Node] = {}
def api_timestamp_string_to_datetime(timestamp: str) -> datetime: def api_timestamp_string_to_datetime(timestamp: str) -> datetime:
@ -271,7 +306,7 @@ def check_if_update_needed(file_path: str, document: Node) -> bool:
return False return False
def export(documents: list[Node], level: str): def export(documents: List[Node], level: str):
"""Save Node to file.""" """Save Node to file."""
for document in documents: for document in documents:
make_dir(f"{FS_PATH}{os.path.sep}{document.get_path()}") make_dir(f"{FS_PATH}{os.path.sep}{document.get_path()}")
@ -370,7 +405,7 @@ for page_data in api_get_listing('pages'):
f"last edit: {page.get_last_edit_timestamp()}") f"last edit: {page.get_last_edit_timestamp()}")
pages[page.get_id()] = page pages[page.get_id()] = page
files: list[Node] = [] files: List[Node] = []
EXPORT_PAGES_NOT_IN_CHAPTER: bool = False EXPORT_PAGES_NOT_IN_CHAPTER: bool = False
for lvl in LEVEL_CHOICE: for lvl in LEVEL_CHOICE: