Add HTTP headers customization parameters. Fix for #3
This commit is contained in:
parent
50fe9b7476
commit
acf8b71e5b
2 changed files with 74 additions and 25 deletions
26
README.md
26
README.md
|
@ -9,6 +9,8 @@ Customizable script for exporting notes from BookStack through API
|
||||||
- customizable path for placing exported notes
|
- customizable path for placing exported notes
|
||||||
- configure replacing any characters in filenames with "_" for any filesystem compatibility
|
- configure replacing any characters in filenames with "_" for any filesystem compatibility
|
||||||
- authorization token is loaded from txt file
|
- authorization token is loaded from txt file
|
||||||
|
- Set custom HTTP User-Agent header to bypass filtering based on that header (like in CloudFlare tunnels)
|
||||||
|
- Set arbitrary custom headers through parameter
|
||||||
|
|
||||||
Requirements:
|
Requirements:
|
||||||
- Python at least in version 3.6
|
- Python at least in version 3.6
|
||||||
|
@ -26,26 +28,38 @@ python exporter.py \
|
||||||
--force-update-files \
|
--force-update-files \
|
||||||
-t ./token.txt \
|
-t ./token.txt \
|
||||||
-V debug \
|
-V debug \
|
||||||
-p ./
|
-p ./ \
|
||||||
|
--user-agent "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/112.0"
|
||||||
|
--additional-headers "Header1: value1" "Header2: value2"
|
||||||
```
|
```
|
||||||
|
|
||||||
Customization:
|
Customization:
|
||||||
```text
|
```text
|
||||||
options:
|
options:
|
||||||
|
-h, --help show this help message and exit
|
||||||
-p PATH, --path PATH Path where exported files will be placed.
|
-p PATH, --path PATH Path where exported files will be placed.
|
||||||
-t TOKEN_FILE, --token-file TOKEN_FILE
|
-t TOKEN_FILE, --token-file TOKEN_FILE
|
||||||
File containing authorization token in format TOKEN_ID:TOKEN_SECRET
|
File containing authorization token in format TOKEN_ID:TOKEN_SECRET
|
||||||
-H HOST, --host HOST Your domain with protocol prefix, example: https://example.com
|
-H HOST, --host HOST Your domain with protocol prefix, example: https://example.com
|
||||||
-f {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...],
|
-f {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...],
|
||||||
--formats {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...]
|
--formats {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...]
|
||||||
Space separated list of formats to use for export.
|
Space separated list of formats to use for export.
|
||||||
-c FORBIDDEN_CHARS [FORBIDDEN_CHARS ...], --forbidden-chars FORBIDDEN_CHARS [FORBIDDEN_CHARS ...]
|
-c FORBIDDEN_CHARS [FORBIDDEN_CHARS ...], --forbidden-chars FORBIDDEN_CHARS [FORBIDDEN_CHARS ...]
|
||||||
Space separated list of symbols to be replaced with "_" in filenames.
|
Space separated list of symbols to be replaced with "_" in filenames.
|
||||||
-l {pages,chapters,books} [{pages,chapters,books} ...], --level {pages,chapters,books} [{pages,chapters,books} ...]
|
-u USER_AGENT, --user-agent USER_AGENT
|
||||||
|
User agent header content. In situations where requests are blocked
|
||||||
|
because of bad client/unrecognized web browser/etc (like with CloudFlare tunnels),
|
||||||
|
change to some typical web browser user agent header.
|
||||||
|
--additional-headers ADDITIONAL_HEADERS [ADDITIONAL_HEADERS ...]
|
||||||
|
List of arbitrary additional HTTP headers to be sent with every HTTP request.
|
||||||
|
They can override default ones, including Authorization header.
|
||||||
|
Example: -u "Header1: value1" "Header2": value2
|
||||||
|
-l {pages,chapters,books} [{pages,chapters,books} ...],
|
||||||
|
--level {pages,chapters,books} [{pages,chapters,books} ...]
|
||||||
Space separated list of levels at which should be export performed.
|
Space separated list of levels at which should be export performed.
|
||||||
--force-update-files Set this option to skip checking local files timestamps against remote last edit
|
--force-update-files Set this option to skip checking local files timestamps against
|
||||||
timestamps.This will cause overwriting local files, even if they seem to be already in
|
remote last edit timestamps. This will cause overwriting local files,
|
||||||
newest version.
|
even if they seem to be already in newest version.
|
||||||
-V {debug,info,warning,error}, --log-level {debug,info,warning,error}
|
-V {debug,info,warning,error}, --log-level {debug,info,warning,error}
|
||||||
Set verbosity level.
|
Set verbosity level.
|
||||||
```
|
```
|
||||||
|
|
73
exporter.py
73
exporter.py
|
@ -6,12 +6,12 @@ from datetime import datetime
|
||||||
from logging import info, error, debug
|
from logging import info, error, debug
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import sys
|
import sys
|
||||||
from typing import Union
|
from typing import Dict, List, Union
|
||||||
from urllib.request import urlopen, Request
|
from urllib.request import urlopen, Request
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
|
||||||
# (formatName, fileExtension)
|
# (formatName, fileExtension)
|
||||||
FORMATS: dict['str', 'str'] = {
|
FORMATS: Dict['str', 'str'] = {
|
||||||
'markdown': 'md',
|
'markdown': 'md',
|
||||||
'plaintext': 'txt',
|
'plaintext': 'txt',
|
||||||
'pdf': 'pdf',
|
'pdf': 'pdf',
|
||||||
|
@ -20,7 +20,7 @@ FORMATS: dict['str', 'str'] = {
|
||||||
|
|
||||||
LEVELS = ['pages', 'chapters', 'books']
|
LEVELS = ['pages', 'chapters', 'books']
|
||||||
|
|
||||||
LOG_LEVEL: dict = {
|
LOG_LEVEL: Dict = {
|
||||||
'debug': logging.DEBUG,
|
'debug': logging.DEBUG,
|
||||||
'info': logging.INFO,
|
'info': logging.INFO,
|
||||||
'warning': logging.WARNING,
|
'warning': logging.WARNING,
|
||||||
|
@ -28,7 +28,7 @@ LOG_LEVEL: dict = {
|
||||||
}
|
}
|
||||||
|
|
||||||
# Characters in filenames to be replaced with "_"
|
# Characters in filenames to be replaced with "_"
|
||||||
FORBIDDEN_CHARS: list[str] = ["/", "#"]
|
FORBIDDEN_CHARS: List[str] = ["/", "#"]
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(description='BookStack exporter')
|
parser = argparse.ArgumentParser(description='BookStack exporter')
|
||||||
parser.add_argument('-p',
|
parser.add_argument('-p',
|
||||||
|
@ -62,6 +62,23 @@ parser.add_argument('-c',
|
||||||
nargs="+",
|
nargs="+",
|
||||||
help='Space separated list of symbols to be replaced '
|
help='Space separated list of symbols to be replaced '
|
||||||
'with "_" in filenames.')
|
'with "_" in filenames.')
|
||||||
|
parser.add_argument('-u',
|
||||||
|
'--user-agent',
|
||||||
|
type=str,
|
||||||
|
default="BookStack exporter",
|
||||||
|
help='User agent header content. In situations'
|
||||||
|
' where requests are blocked because of bad client/'
|
||||||
|
'unrecognized web browser/etc (like with CloudFlare'
|
||||||
|
' tunnels), change to some typical '
|
||||||
|
'web browser user agent header.')
|
||||||
|
parser.add_argument('--additional-headers',
|
||||||
|
type=str,
|
||||||
|
nargs="+",
|
||||||
|
default=[],
|
||||||
|
help='List of arbitrary additional HTTP headers to be '
|
||||||
|
'sent with every HTTP request. They can override default'
|
||||||
|
' ones, including Authorization header. '
|
||||||
|
'Example: -u "Header1: value1" "Header2": value2')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-l',
|
'-l',
|
||||||
'--level',
|
'--level',
|
||||||
|
@ -87,10 +104,18 @@ parser.add_argument('-V',
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
|
||||||
|
def removesuffix(text, suffix):
|
||||||
|
"""Remove suffix from text if matched."""
|
||||||
|
if text.endswith(suffix):
|
||||||
|
return text[:len(text) - len(suffix)]
|
||||||
|
return text
|
||||||
|
|
||||||
|
|
||||||
logging.basicConfig(format='%(levelname)s :: %(message)s',
|
logging.basicConfig(format='%(levelname)s :: %(message)s',
|
||||||
level=LOG_LEVEL.get(args.log_level))
|
level=LOG_LEVEL.get(args.log_level))
|
||||||
|
|
||||||
formats: list[str] = args.formats
|
formats: List[str] = args.formats
|
||||||
FORBIDDEN_CHARS = args.forbidden_chars
|
FORBIDDEN_CHARS = args.forbidden_chars
|
||||||
|
|
||||||
for frmt in formats:
|
for frmt in formats:
|
||||||
|
@ -99,21 +124,28 @@ for frmt in formats:
|
||||||
"check api docs for current version of your BookStack")
|
"check api docs for current version of your BookStack")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
API_PREFIX: str = f"{args.host.removesuffix(os.path.sep)}/api"
|
API_PREFIX: str = f"{removesuffix(args.host, os.path.sep)}/api"
|
||||||
FS_PATH: str = args.path.removesuffix(os.path.sep)
|
FS_PATH: str = removesuffix(args.path, os.path.sep)
|
||||||
LEVEL_CHOICE: list[str] = args.level
|
LEVEL_CHOICE: List[str] = args.level
|
||||||
for lvl in LEVEL_CHOICE:
|
for lvl in LEVEL_CHOICE:
|
||||||
if lvl not in LEVELS:
|
if lvl not in LEVELS:
|
||||||
error(f"Level {lvl} is not supported, can be only one of {LEVELS}")
|
error(f"Level {lvl} is not supported, can be only one of {LEVELS}")
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
with open(args.token_file, 'r', encoding='utf-8') as f:
|
with open(args.token_file, 'r', encoding='utf-8') as f:
|
||||||
TOKEN: str = f.readline().removesuffix('\n')
|
TOKEN: str = removesuffix(f.readline(), '\n')
|
||||||
|
|
||||||
HEADERS = {
|
HEADERS = {
|
||||||
'Content-Type': 'application/json; charset=utf-8',
|
'Content-Type': 'application/json; charset=utf-8',
|
||||||
'Authorization': f"Token {TOKEN}"
|
'Authorization': f"Token {TOKEN}",
|
||||||
|
'User-Agent': args.user_agent
|
||||||
}
|
}
|
||||||
|
for header in args.additional_headers:
|
||||||
|
values = header.split(':', 1)
|
||||||
|
if len(values) < 2:
|
||||||
|
raise ValueError(f"Improper HTTP header specification: {header}")
|
||||||
|
HEADERS[values[0]] = values[1]
|
||||||
|
|
||||||
SKIP_TIMESTAMPS: bool = args.force_update_files
|
SKIP_TIMESTAMPS: bool = args.force_update_files
|
||||||
|
|
||||||
|
|
||||||
|
@ -125,7 +157,7 @@ class Node:
|
||||||
for char in FORBIDDEN_CHARS:
|
for char in FORBIDDEN_CHARS:
|
||||||
name = name.replace(char, "_")
|
name = name.replace(char, "_")
|
||||||
self.__name: str = name
|
self.__name: str = name
|
||||||
self.__children: list['Node'] = []
|
self.__children: List['Node'] = []
|
||||||
|
|
||||||
self.__parent: Union['Node', None] = parent
|
self.__parent: Union['Node', None] = parent
|
||||||
if parent is not None:
|
if parent is not None:
|
||||||
|
@ -136,15 +168,18 @@ class Node:
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def name(self) -> str:
|
def name(self) -> str:
|
||||||
|
"""Return name of this Shelf/Book/Chapter/Page."""
|
||||||
return self.__name
|
return self.__name
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def parent(self) -> Union['Node', None]:
|
def parent(self) -> Union['Node', None]:
|
||||||
|
"""Return parent Node or None if there isn't any."""
|
||||||
return self.__parent
|
return self.__parent
|
||||||
|
|
||||||
def changed_since(self, timestamp: datetime) -> int:
|
def changed_since(self, timestamp: datetime) -> int:
|
||||||
"""
|
"""
|
||||||
Check if remote version have changed after given timestamp, including its children
|
Check if remote version have changed after given timestamp,
|
||||||
|
including its children
|
||||||
:param timestamp:
|
:param timestamp:
|
||||||
:return: amount of changed documents at level of this document Node
|
:return: amount of changed documents at level of this document Node
|
||||||
"""
|
"""
|
||||||
|
@ -175,11 +210,11 @@ class Node:
|
||||||
return self.__node_id
|
return self.__node_id
|
||||||
|
|
||||||
|
|
||||||
shelves: dict[int, Node] = {}
|
shelves: Dict[int, Node] = {}
|
||||||
books: dict[int, Node] = {}
|
books: Dict[int, Node] = {}
|
||||||
chapters: dict[int, Node] = {}
|
chapters: Dict[int, Node] = {}
|
||||||
pages: dict[int, Node] = {}
|
pages: Dict[int, Node] = {}
|
||||||
pages_not_in_chapter: dict[int, Node] = {}
|
pages_not_in_chapter: Dict[int, Node] = {}
|
||||||
|
|
||||||
|
|
||||||
def api_timestamp_string_to_datetime(timestamp: str) -> datetime:
|
def api_timestamp_string_to_datetime(timestamp: str) -> datetime:
|
||||||
|
@ -271,7 +306,7 @@ def check_if_update_needed(file_path: str, document: Node) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def export(documents: list[Node], level: str):
|
def export(documents: List[Node], level: str):
|
||||||
"""Save Node to file."""
|
"""Save Node to file."""
|
||||||
for document in documents:
|
for document in documents:
|
||||||
make_dir(f"{FS_PATH}{os.path.sep}{document.get_path()}")
|
make_dir(f"{FS_PATH}{os.path.sep}{document.get_path()}")
|
||||||
|
@ -370,7 +405,7 @@ for page_data in api_get_listing('pages'):
|
||||||
f"last edit: {page.get_last_edit_timestamp()}")
|
f"last edit: {page.get_last_edit_timestamp()}")
|
||||||
pages[page.get_id()] = page
|
pages[page.get_id()] = page
|
||||||
|
|
||||||
files: list[Node] = []
|
files: List[Node] = []
|
||||||
EXPORT_PAGES_NOT_IN_CHAPTER: bool = False
|
EXPORT_PAGES_NOT_IN_CHAPTER: bool = False
|
||||||
|
|
||||||
for lvl in LEVEL_CHOICE:
|
for lvl in LEVEL_CHOICE:
|
||||||
|
|
Loading…
Reference in a new issue