From 029b5d920f2a1b7352addd20dd61d1360add5e7e Mon Sep 17 00:00:00 2001 From: Maciej Lebiest Date: Fri, 13 May 2022 20:44:05 +0200 Subject: [PATCH] Initial commit --- .gitignore | 2 + README.md | 30 +++++++++++ exporter.py | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 181 insertions(+) create mode 100644 exporter.py diff --git a/.gitignore b/.gitignore index b6e4761..24c1e3f 100644 --- a/.gitignore +++ b/.gitignore @@ -3,6 +3,8 @@ __pycache__/ *.py[cod] *$py.class +.idea/ +token.txt # C extensions *.so diff --git a/README.md b/README.md index efca5fa..f11b2dd 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,32 @@ # BookStack-Python-exporter Customizable script for exporting notes from BookStack through API + +This script allows you to export Pages in any format available in BookStack's API (can choose multiple formats at once). + +Full example on how to use the script: +1. Clone the repo +2. next to the script place token.txt file containing token id and token secret in format: TOKEN_ID:TOKEN_SECRET +3. in the same directory run the command, specifying your app domain with https prefix: +```bash +python exporter.py -H https://wiki.example.com -f pdf,md,plaintext,html -t ./token.txt -p ./ +``` + +Customization: +```text + -p PATH, --path PATH Path where exported files will be placed. + Default: . + -t TOKEN_FILE, --token-file TOKEN_FILE + File containing authorization token in format TOKEN_ID:TOKEN_SECRET + Default: ./token.txt + -H HOST, --host HOST Your domain with protocol prefix, example: https://example.com + Default: https://localhost + -f FORMATS, --formats FORMATS + Coma separated list of formats to use for export. Available ones: md,plaintext,pdf,html + Default: md +``` + +### TODO: +- choosing verbosity level through command line parameter +- choosing on what level should the notes be exported (Books, Chapters, Pages) +- (optional) choosing if update note file only if the last edit timestamp from API is later that the local file timestamp +- suggestions? diff --git a/exporter.py b/exporter.py new file mode 100644 index 0000000..9543774 --- /dev/null +++ b/exporter.py @@ -0,0 +1,149 @@ +import argparse +import json +import logging +import os +from logging import info, error +from pathlib import Path + +import requests +from requests import Response + +logging.basicConfig(format='%(levelname)s :: %(message)s', level=logging.INFO) + +# (formatName, fileExtension) +FORMATS: dict['str', 'str'] = { + 'md': 'md', + 'plaintext': 'txt', + 'pdf': 'pdf', + 'html': 'html' +} + +parser = argparse.ArgumentParser(description='BookStack exporter') +parser.add_argument('-p', '--path', type=str, default='.', + help='Path where exported files will be placed.') +parser.add_argument('-t', '--token-file', type=str, default=f'.{os.path.sep}token.txt', + help='File containing authorization token in format TOKEN_ID:TOKEN_SECRET') +parser.add_argument('-H', '--host', type=str, default='https://localhost', + help='Your domain with protocol prefix, example: https://example.com') +parser.add_argument('-f', '--formats', type=str, default='md', + help=f'Coma separated list of formats to use for export.' + f' Available ones: {",".join([f for f in FORMATS.keys()])}') +args = parser.parse_args() + +formats = args.formats.split(',') + +API_PREFIX: str = f"{args.host.removesuffix(os.path.sep)}/api" +FS_PATH: str = args.path.removesuffix(os.path.sep) + +with open(args.token_file, 'r') as f: + TOKEN: str = f.readline().removesuffix('\n') + +HEADERS = {'Content-Type': 'application/json; charset=utf-8', + 'Authorization': f"Token {TOKEN}"} + + +class Node: + def __init__(self, name: str, parent: ['Node', None], node_id: int): + self.__name: str = name + self.__parent: ['Node', None] = parent + self.__node_id = node_id + + def get_name(self) -> str: + return self.__name + + def get_parent(self) -> ['Node', None]: + return self.__parent + + def set_parent(self, parent: 'Node'): + self.__parent = parent + + def get_path(self) -> str: + if self.__parent is None: + return "." + return self.__parent.get_path() + os.path.sep + self.__parent.get_name() + + def get_id(self) -> int: + return self.__node_id + + +shelves: dict[int, Node] = {} +books: dict[int, Node] = {} +chapters: dict[int, Node] = {} +pages: dict[int, Node] = {} + + +def make_dir(path: str): + path_obj = Path(path) + if path_obj.exists(): + return + info(f"Creating dir {path}") + path_obj.mkdir(exist_ok=True, parents=True) + + +def api_get(path: str) -> dict: + response: Response = requests.get(f'{API_PREFIX}/{path}', headers=HEADERS) + + if response.status_code == 403: + error("403 Forbidden, check your token!") + exit(response.status_code) + + data: dict = json.loads(response.text) + return data + + +info("Getting info about Shelves and their Books") + +for shelf_data in api_get('shelves').get('data'): + shelf = Node(shelf_data.get('name'), None, shelf_data.get('id')) + shelves[shelf.get_id()] = shelf + + shelf_details = json.loads(requests.get(f'{API_PREFIX}/shelves/{shelf.get_id()}', headers=HEADERS).text) + + if shelf_details.get('books') is None: + continue + for book_data in shelf_details.get('books'): + book = Node(book_data.get('name'), shelf, book_data.get('id')) + books[book.get_id()] = book + +info("Getting info about Books not belonging to any shelf") + +for book_data in api_get('books').get('data'): + if book_data.get('id') != 0: + continue + book = Node(book_data.get('name'), None, book_data.get('id')) + info(f"Book \"{book.get_name()} has no shelf assigned.\"") + books[book.get_id()] = book + +info("Getting info about Chapters") + +for chapter_data in api_get('chapters').get('data'): + chapter = Node(chapter_data.get('name'), books.get(chapter_data.get('book_id')), chapter_data.get('id')) + chapters[chapter.get_id()] = chapter + +info("Getting info about Pages") +for page_data in api_get('pages').get('data'): + parent_id = page_data.get('chapter_id') + if parent_id == 0: + parent_id = page_data.get('book_id') + info(f"Page \"{page_data.get('name')}\" is not in any chapter, " + f"using Book \"{books.get(parent_id).get_name()}\" as a parent.") + page = Node(page_data.get('name'), books.get(parent_id), page_data.get('id')) + pages[page.get_id()] = page + continue + + page = Node(page_data.get('name'), chapters.get(parent_id), page_data.get('id')) + pages[page.get_id()] = page + +for page in pages.values(): + make_dir(f"{FS_PATH}{os.path.sep}{page.get_path()}") + + for frmt in formats: + path: str = f"{FS_PATH}{os.path.sep}{page.get_path()}{os.path.sep}{page.get_name()}.{FORMATS[frmt]}" + + result: Response = requests.get(f'{API_PREFIX}/pages/{page.get_id()}/export/{frmt}', headers=HEADERS) + if os.path.exists(path): + info(f"Updating file with page \"{page.get_name()}.{FORMATS[frmt]}\"") + else: + info(f"Saving new file with page \"{page.get_name()}.{FORMATS[frmt]}\"") + with open(path, 'wb') as f: + f.write(result.content)