From 029b5d920f2a1b7352addd20dd61d1360add5e7e Mon Sep 17 00:00:00 2001
From: Maciej Lebiest <maciejlebiest@protonmail.com>
Date: Fri, 13 May 2022 20:44:05 +0200
Subject: [PATCH] Initial commit

---
 .gitignore  |   2 +
 README.md   |  30 +++++++++++
 exporter.py | 149 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 181 insertions(+)
 create mode 100644 exporter.py

diff --git a/.gitignore b/.gitignore
index b6e4761..24c1e3f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,8 @@ __pycache__/
 *.py[cod]
 *$py.class
 
+.idea/
+token.txt
 # C extensions
 *.so
 
diff --git a/README.md b/README.md
index efca5fa..f11b2dd 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,32 @@
 # BookStack-Python-exporter
 Customizable script for exporting notes from BookStack through API
+
+This script allows you to export Pages in any format available in BookStack's API (can choose multiple formats at once).  
+
+Full example on how to use the script:
+1. Clone the repo 
+2. next to the script place token.txt file containing token id and token secret in format: TOKEN_ID:TOKEN_SECRET
+3. in the same directory run the command, specifying your app domain with https prefix:
+```bash
+python exporter.py -H https://wiki.example.com -f pdf,md,plaintext,html -t ./token.txt -p ./
+```
+
+Customization:
+```text
+  -p PATH, --path PATH  Path where exported files will be placed.
+                        Default: .
+  -t TOKEN_FILE, --token-file TOKEN_FILE
+                        File containing authorization token in format TOKEN_ID:TOKEN_SECRET
+                        Default: ./token.txt
+  -H HOST, --host HOST  Your domain with protocol prefix, example: https://example.com
+                        Default: https://localhost
+  -f FORMATS, --formats FORMATS
+                        Coma separated list of formats to use for export. Available ones: md,plaintext,pdf,html
+                        Default: md
+```
+
+### TODO:
+- choosing verbosity level through command line parameter
+- choosing on what level should the notes be exported (Books, Chapters, Pages)
+- (optional) choosing if update note file only if the last edit timestamp from API is later that the local file timestamp
+- suggestions?
diff --git a/exporter.py b/exporter.py
new file mode 100644
index 0000000..9543774
--- /dev/null
+++ b/exporter.py
@@ -0,0 +1,149 @@
+import argparse
+import json
+import logging
+import os
+from logging import info, error
+from pathlib import Path
+
+import requests
+from requests import Response
+
+logging.basicConfig(format='%(levelname)s :: %(message)s', level=logging.INFO)
+
+# (formatName, fileExtension)
+FORMATS: dict['str', 'str'] = {
+    'md': 'md',
+    'plaintext': 'txt',
+    'pdf': 'pdf',
+    'html': 'html'
+}
+
+parser = argparse.ArgumentParser(description='BookStack exporter')
+parser.add_argument('-p', '--path', type=str, default='.',
+                    help='Path where exported files will be placed.')
+parser.add_argument('-t', '--token-file', type=str, default=f'.{os.path.sep}token.txt',
+                    help='File containing authorization token in format TOKEN_ID:TOKEN_SECRET')
+parser.add_argument('-H', '--host', type=str, default='https://localhost',
+                    help='Your domain with protocol prefix, example: https://example.com')
+parser.add_argument('-f', '--formats', type=str, default='md',
+                    help=f'Coma separated list of formats to use for export.'
+                         f' Available ones: {",".join([f for f in FORMATS.keys()])}')
+args = parser.parse_args()
+
+formats = args.formats.split(',')
+
+API_PREFIX: str = f"{args.host.removesuffix(os.path.sep)}/api"
+FS_PATH: str = args.path.removesuffix(os.path.sep)
+
+with open(args.token_file, 'r') as f:
+    TOKEN: str = f.readline().removesuffix('\n')
+
+HEADERS = {'Content-Type': 'application/json; charset=utf-8',
+           'Authorization': f"Token {TOKEN}"}
+
+
+class Node:
+    def __init__(self, name: str, parent: ['Node', None], node_id: int):
+        self.__name: str = name
+        self.__parent: ['Node', None] = parent
+        self.__node_id = node_id
+
+    def get_name(self) -> str:
+        return self.__name
+
+    def get_parent(self) -> ['Node', None]:
+        return self.__parent
+
+    def set_parent(self, parent: 'Node'):
+        self.__parent = parent
+
+    def get_path(self) -> str:
+        if self.__parent is None:
+            return "."
+        return self.__parent.get_path() + os.path.sep + self.__parent.get_name()
+
+    def get_id(self) -> int:
+        return self.__node_id
+
+
+shelves: dict[int, Node] = {}
+books: dict[int, Node] = {}
+chapters: dict[int, Node] = {}
+pages: dict[int, Node] = {}
+
+
+def make_dir(path: str):
+    path_obj = Path(path)
+    if path_obj.exists():
+        return
+    info(f"Creating dir {path}")
+    path_obj.mkdir(exist_ok=True, parents=True)
+
+
+def api_get(path: str) -> dict:
+    response: Response = requests.get(f'{API_PREFIX}/{path}', headers=HEADERS)
+
+    if response.status_code == 403:
+        error("403 Forbidden, check your token!")
+        exit(response.status_code)
+
+    data: dict = json.loads(response.text)
+    return data
+
+
+info("Getting info about Shelves and their Books")
+
+for shelf_data in api_get('shelves').get('data'):
+    shelf = Node(shelf_data.get('name'), None, shelf_data.get('id'))
+    shelves[shelf.get_id()] = shelf
+
+    shelf_details = json.loads(requests.get(f'{API_PREFIX}/shelves/{shelf.get_id()}', headers=HEADERS).text)
+
+    if shelf_details.get('books') is None:
+        continue
+    for book_data in shelf_details.get('books'):
+        book = Node(book_data.get('name'), shelf, book_data.get('id'))
+        books[book.get_id()] = book
+
+info("Getting info about Books not belonging to any shelf")
+
+for book_data in api_get('books').get('data'):
+    if book_data.get('id') != 0:
+        continue
+    book = Node(book_data.get('name'), None, book_data.get('id'))
+    info(f"Book \"{book.get_name()} has no shelf assigned.\"")
+    books[book.get_id()] = book
+
+info("Getting info about Chapters")
+
+for chapter_data in api_get('chapters').get('data'):
+    chapter = Node(chapter_data.get('name'), books.get(chapter_data.get('book_id')), chapter_data.get('id'))
+    chapters[chapter.get_id()] = chapter
+
+info("Getting info about Pages")
+for page_data in api_get('pages').get('data'):
+    parent_id = page_data.get('chapter_id')
+    if parent_id == 0:
+        parent_id = page_data.get('book_id')
+        info(f"Page \"{page_data.get('name')}\" is not in any chapter, "
+             f"using Book \"{books.get(parent_id).get_name()}\" as a parent.")
+        page = Node(page_data.get('name'), books.get(parent_id), page_data.get('id'))
+        pages[page.get_id()] = page
+        continue
+
+    page = Node(page_data.get('name'), chapters.get(parent_id), page_data.get('id'))
+    pages[page.get_id()] = page
+
+for page in pages.values():
+    make_dir(f"{FS_PATH}{os.path.sep}{page.get_path()}")
+
+    for frmt in formats:
+        path: str = f"{FS_PATH}{os.path.sep}{page.get_path()}{os.path.sep}{page.get_name()}.{FORMATS[frmt]}"
+
+        result: Response = requests.get(f'{API_PREFIX}/pages/{page.get_id()}/export/{frmt}', headers=HEADERS)
+        if os.path.exists(path):
+            info(f"Updating file with page \"{page.get_name()}.{FORMATS[frmt]}\"")
+        else:
+            info(f"Saving new file with page \"{page.get_name()}.{FORMATS[frmt]}\"")
+        with open(path, 'wb') as f:
+            f.write(result.content)