BookStack-Python-exporter/exporter.py

205 lines
6.9 KiB
Python

import argparse
import json
import logging
import os
from datetime import datetime
from logging import info, error, debug
from pathlib import Path
from urllib.request import urlopen, Request
logging.basicConfig(format='%(levelname)s :: %(message)s', level=logging.DEBUG)
# (formatName, fileExtension)
FORMATS: dict['str', 'str'] = {
'markdown': 'md',
'plaintext': 'txt',
'pdf': 'pdf',
'html': 'html'
}
LEVELS = [
'pages',
'chapters',
'books'
]
parser = argparse.ArgumentParser(description='BookStack exporter')
parser.add_argument('-p', '--path', type=str, default='.',
help='Path where exported files will be placed.')
parser.add_argument('-t', '--token-file', type=str, default=f'.{os.path.sep}token.txt',
help='File containing authorization token in format TOKEN_ID:TOKEN_SECRET')
parser.add_argument('-H', '--host', type=str, default='https://localhost',
help='Your domain with protocol prefix, example: https://example.com')
parser.add_argument('-f', '--formats', type=str, default='md',
help=f'Coma separated list of formats to use for export.'
f' Available ones: {",".join([f for f in FORMATS.keys()])}')
parser.add_argument('-l', '--level', type=str, default='pages',
help=f'Coma separated list of levels at which should be export performed. '
f'Available levels: {LEVELS}')
args = parser.parse_args()
formats = args.formats.split(',')
for frmt in formats:
if frmt not in FORMATS.keys():
error("Unknown format name (NOT file extension), "
"check api docs for current version of your BookStack")
exit(1)
API_PREFIX: str = f"{args.host.removesuffix(os.path.sep)}/api"
FS_PATH: str = args.path.removesuffix(os.path.sep)
LEVEL_CHOICE: list[str] = args.level.split(',')
for lvl in LEVEL_CHOICE:
if lvl not in LEVELS:
error(f"Level {lvl} is not supported, can be only one of {LEVELS}")
exit(1)
with open(args.token_file, 'r') as f:
TOKEN: str = f.readline().removesuffix('\n')
HEADERS = {'Content-Type': 'application/json; charset=utf-8',
'Authorization': f"Token {TOKEN}"}
class Node:
def __init__(self, name: str, parent: ['Node', None], node_id: int):
self.__name: str = name
self.__parent: ['Node', None] = parent
self.__node_id = node_id
def get_name(self) -> str:
return self.__name
def get_parent(self) -> ['Node', None]:
return self.__parent
def set_parent(self, parent: 'Node'):
self.__parent = parent
def get_path(self) -> str:
if self.__parent is None:
return "."
return self.__parent.get_path() + os.path.sep + self.__parent.get_name()
def get_id(self) -> int:
return self.__node_id
shelves: dict[int, Node] = {}
books: dict[int, Node] = {}
chapters: dict[int, Node] = {}
pages: dict[int, Node] = {}
pages_not_in_chapter: dict[int, Node] = {}
def make_dir(path: str):
path_obj = Path(path)
if path_obj.exists():
return
info(f"Creating dir {path}")
path_obj.mkdir(exist_ok=True, parents=True)
def api_get_bytes(path: str) -> bytes:
request: Request = Request(f'{API_PREFIX}/{path}', headers=HEADERS)
with urlopen(request) as response:
response = response
if response.status == 403:
error("403 Forbidden, check your token!")
exit(response.status)
return response.read()
def api_get_dict(path: str) -> dict:
return json.loads(api_get_bytes(path).decode())
def check_if_update_needed(file: str, remote_last_edit: datetime) -> bool:
local_last_edit: datetime = datetime.fromtimestamp(os.path.getmtime(file))
debug(f"Local file creation timestamp: {local_last_edit.date()} {local_last_edit.time()}, "
f"remote edit timestamp: {remote_last_edit.date()} {remote_last_edit.time()}")
return local_last_edit.timestamp() < remote_last_edit.timestamp()
def export(files: list[Node], level: str):
for file in files:
make_dir(f"{FS_PATH}{os.path.sep}{file.get_path()}")
file_info: dict = api_get_dict(f'{level}/{file.get_id()}')
last_edit_time: datetime = datetime.strptime(file_info['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ')
for frmt in formats:
path: str = f"{FS_PATH}{os.path.sep}{file.get_path()}{os.path.sep}{file.get_name()}.{FORMATS[frmt]}"
debug(f"Checking for update for file {path}")
if not check_if_update_needed(path, last_edit_time):
debug("Already updated")
continue
data: bytes = api_get_bytes(f'{level}/{file.get_id()}/export/{frmt}')
with open(path, 'wb') as f:
info(f"Saving {path}")
f.write(data)
info("Getting info about Shelves and their Books")
for shelf_data in api_get_dict('shelves').get('data'):
shelf = Node(shelf_data.get('name'), None, shelf_data.get('id'))
shelves[shelf.get_id()] = shelf
shelf_details = api_get_dict(f'shelves/{shelf.get_id()}')
if shelf_details.get('books') is None:
continue
for book_data in shelf_details.get('books'):
book = Node(book_data.get('name'), shelf, book_data.get('id'))
books[book.get_id()] = book
info("Getting info about Books not belonging to any shelf")
for book_data in api_get_dict('books').get('data'):
if book_data.get('id') in books.keys():
continue
book = Node(book_data.get('name'), None, book_data.get('id'))
info(f"Book \"{book.get_name()} has no shelf assigned.\"")
books[book.get_id()] = book
info("Getting info about Chapters")
for chapter_data in api_get_dict('chapters').get('data'):
chapter = Node(chapter_data.get('name'), books.get(chapter_data.get('book_id')), chapter_data.get('id'))
chapters[chapter.get_id()] = chapter
info("Getting info about Pages")
for page_data in api_get_dict('pages').get('data'):
parent_id = page_data.get('chapter_id')
if parent_id not in chapters.keys():
parent_id = page_data.get('book_id')
info(f"Page \"{page_data.get('name')}\" is not in any chapter, "
f"using Book \"{books.get(parent_id).get_name()}\" as a parent.")
page = Node(page_data.get('name'), books.get(parent_id), page_data.get('id'))
pages[page.get_id()] = page
pages_not_in_chapter[page.get_id()] = page
continue
page = Node(page_data.get('name'), chapters.get(parent_id), page_data.get('id'))
pages[page.get_id()] = page
files: list[Node] = []
export_pages_not_in_chapter: bool = False
for lvl in LEVEL_CHOICE:
if lvl == 'pages':
files = pages.values()
elif lvl == 'chapters':
files = chapters.values()
export_pages_not_in_chapter = True
elif lvl == 'books':
files = books.values()
export(files, lvl)
if export_pages_not_in_chapter:
info("Exporting pages that are not in chapter...")
export(pages_not_in_chapter.values(), 'pages')