2022-05-13 18:44:05 +00:00
|
|
|
import argparse
|
|
|
|
import json
|
|
|
|
import logging
|
|
|
|
import os
|
2022-06-22 17:24:15 +00:00
|
|
|
from datetime import datetime
|
|
|
|
from logging import info, error, debug
|
2022-05-13 18:44:05 +00:00
|
|
|
from pathlib import Path
|
2022-06-22 11:52:36 +00:00
|
|
|
from urllib.request import urlopen, Request
|
2022-05-13 18:44:05 +00:00
|
|
|
|
2022-06-22 17:24:15 +00:00
|
|
|
logging.basicConfig(format='%(levelname)s :: %(message)s', level=logging.DEBUG)
|
2022-05-13 18:44:05 +00:00
|
|
|
|
|
|
|
# (formatName, fileExtension)
|
|
|
|
FORMATS: dict['str', 'str'] = {
|
2022-06-22 11:52:36 +00:00
|
|
|
'markdown': 'md',
|
2022-05-13 18:44:05 +00:00
|
|
|
'plaintext': 'txt',
|
|
|
|
'pdf': 'pdf',
|
|
|
|
'html': 'html'
|
|
|
|
}
|
|
|
|
|
2022-06-22 13:52:37 +00:00
|
|
|
LEVELS = [
|
|
|
|
'pages',
|
|
|
|
'chapters',
|
|
|
|
'books'
|
|
|
|
]
|
|
|
|
|
2022-05-13 18:44:05 +00:00
|
|
|
parser = argparse.ArgumentParser(description='BookStack exporter')
|
|
|
|
parser.add_argument('-p', '--path', type=str, default='.',
|
|
|
|
help='Path where exported files will be placed.')
|
|
|
|
parser.add_argument('-t', '--token-file', type=str, default=f'.{os.path.sep}token.txt',
|
|
|
|
help='File containing authorization token in format TOKEN_ID:TOKEN_SECRET')
|
|
|
|
parser.add_argument('-H', '--host', type=str, default='https://localhost',
|
|
|
|
help='Your domain with protocol prefix, example: https://example.com')
|
|
|
|
parser.add_argument('-f', '--formats', type=str, default='md',
|
|
|
|
help=f'Coma separated list of formats to use for export.'
|
|
|
|
f' Available ones: {",".join([f for f in FORMATS.keys()])}')
|
2022-06-22 13:52:37 +00:00
|
|
|
parser.add_argument('-l', '--level', type=str, default='pages',
|
2022-06-22 17:24:15 +00:00
|
|
|
help=f'Coma separated list of levels at which should be export performed. '
|
|
|
|
f'Available levels: {LEVELS}')
|
2022-05-13 18:44:05 +00:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
formats = args.formats.split(',')
|
2022-06-22 11:52:36 +00:00
|
|
|
for frmt in formats:
|
|
|
|
if frmt not in FORMATS.keys():
|
2022-06-22 17:24:15 +00:00
|
|
|
error("Unknown format name (NOT file extension), "
|
|
|
|
"check api docs for current version of your BookStack")
|
|
|
|
exit(1)
|
2022-05-13 18:44:05 +00:00
|
|
|
|
|
|
|
API_PREFIX: str = f"{args.host.removesuffix(os.path.sep)}/api"
|
|
|
|
FS_PATH: str = args.path.removesuffix(os.path.sep)
|
2022-06-22 13:52:37 +00:00
|
|
|
LEVEL_CHOICE: list[str] = args.level.split(',')
|
|
|
|
for lvl in LEVEL_CHOICE:
|
|
|
|
if lvl not in LEVELS:
|
2022-06-22 17:24:15 +00:00
|
|
|
error(f"Level {lvl} is not supported, can be only one of {LEVELS}")
|
|
|
|
exit(1)
|
2022-05-13 18:44:05 +00:00
|
|
|
|
|
|
|
with open(args.token_file, 'r') as f:
|
|
|
|
TOKEN: str = f.readline().removesuffix('\n')
|
|
|
|
|
|
|
|
HEADERS = {'Content-Type': 'application/json; charset=utf-8',
|
|
|
|
'Authorization': f"Token {TOKEN}"}
|
|
|
|
|
|
|
|
|
|
|
|
class Node:
|
|
|
|
def __init__(self, name: str, parent: ['Node', None], node_id: int):
|
|
|
|
self.__name: str = name
|
|
|
|
self.__parent: ['Node', None] = parent
|
|
|
|
self.__node_id = node_id
|
|
|
|
|
|
|
|
def get_name(self) -> str:
|
|
|
|
return self.__name
|
|
|
|
|
|
|
|
def get_parent(self) -> ['Node', None]:
|
|
|
|
return self.__parent
|
|
|
|
|
|
|
|
def set_parent(self, parent: 'Node'):
|
|
|
|
self.__parent = parent
|
|
|
|
|
|
|
|
def get_path(self) -> str:
|
|
|
|
if self.__parent is None:
|
|
|
|
return "."
|
|
|
|
return self.__parent.get_path() + os.path.sep + self.__parent.get_name()
|
|
|
|
|
|
|
|
def get_id(self) -> int:
|
|
|
|
return self.__node_id
|
|
|
|
|
|
|
|
|
|
|
|
shelves: dict[int, Node] = {}
|
|
|
|
books: dict[int, Node] = {}
|
|
|
|
chapters: dict[int, Node] = {}
|
|
|
|
pages: dict[int, Node] = {}
|
2022-06-22 13:52:37 +00:00
|
|
|
pages_not_in_chapter: dict[int, Node] = {}
|
2022-05-13 18:44:05 +00:00
|
|
|
|
|
|
|
|
|
|
|
def make_dir(path: str):
|
|
|
|
path_obj = Path(path)
|
|
|
|
if path_obj.exists():
|
|
|
|
return
|
|
|
|
info(f"Creating dir {path}")
|
|
|
|
path_obj.mkdir(exist_ok=True, parents=True)
|
|
|
|
|
|
|
|
|
2022-06-22 11:52:36 +00:00
|
|
|
def api_get_bytes(path: str) -> bytes:
|
|
|
|
request: Request = Request(f'{API_PREFIX}/{path}', headers=HEADERS)
|
|
|
|
|
|
|
|
with urlopen(request) as response:
|
|
|
|
response = response
|
|
|
|
if response.status == 403:
|
|
|
|
error("403 Forbidden, check your token!")
|
|
|
|
exit(response.status)
|
|
|
|
|
|
|
|
return response.read()
|
2022-05-13 18:44:05 +00:00
|
|
|
|
|
|
|
|
2022-06-22 11:52:36 +00:00
|
|
|
def api_get_dict(path: str) -> dict:
|
|
|
|
return json.loads(api_get_bytes(path).decode())
|
2022-05-13 18:44:05 +00:00
|
|
|
|
|
|
|
|
2022-06-22 17:24:15 +00:00
|
|
|
def check_if_update_needed(file: str, remote_last_edit: datetime) -> bool:
|
|
|
|
local_last_edit: datetime = datetime.fromtimestamp(os.path.getmtime(file))
|
|
|
|
debug(f"Local file creation timestamp: {local_last_edit.date()} {local_last_edit.time()}, "
|
|
|
|
f"remote edit timestamp: {remote_last_edit.date()} {remote_last_edit.time()}")
|
|
|
|
return local_last_edit.timestamp() < remote_last_edit.timestamp()
|
|
|
|
|
|
|
|
|
2022-06-22 13:52:37 +00:00
|
|
|
def export(files: list[Node], level: str):
|
|
|
|
for file in files:
|
|
|
|
make_dir(f"{FS_PATH}{os.path.sep}{file.get_path()}")
|
|
|
|
|
2022-06-22 17:24:15 +00:00
|
|
|
file_info: dict = api_get_dict(f'{level}/{file.get_id()}')
|
|
|
|
last_edit_time: datetime = datetime.strptime(file_info['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ')
|
|
|
|
|
2022-06-22 13:52:37 +00:00
|
|
|
for frmt in formats:
|
|
|
|
path: str = f"{FS_PATH}{os.path.sep}{file.get_path()}{os.path.sep}{file.get_name()}.{FORMATS[frmt]}"
|
2022-06-22 17:24:15 +00:00
|
|
|
debug(f"Checking for update for file {path}")
|
|
|
|
if not check_if_update_needed(path, last_edit_time):
|
|
|
|
debug("Already updated")
|
|
|
|
continue
|
2022-06-22 13:52:37 +00:00
|
|
|
|
|
|
|
data: bytes = api_get_bytes(f'{level}/{file.get_id()}/export/{frmt}')
|
|
|
|
with open(path, 'wb') as f:
|
|
|
|
info(f"Saving {path}")
|
|
|
|
f.write(data)
|
|
|
|
|
|
|
|
|
2022-05-13 18:44:05 +00:00
|
|
|
info("Getting info about Shelves and their Books")
|
|
|
|
|
2022-06-22 11:52:36 +00:00
|
|
|
for shelf_data in api_get_dict('shelves').get('data'):
|
2022-05-13 18:44:05 +00:00
|
|
|
shelf = Node(shelf_data.get('name'), None, shelf_data.get('id'))
|
|
|
|
shelves[shelf.get_id()] = shelf
|
|
|
|
|
2022-06-22 11:52:36 +00:00
|
|
|
shelf_details = api_get_dict(f'shelves/{shelf.get_id()}')
|
2022-05-13 18:44:05 +00:00
|
|
|
|
|
|
|
if shelf_details.get('books') is None:
|
|
|
|
continue
|
|
|
|
for book_data in shelf_details.get('books'):
|
|
|
|
book = Node(book_data.get('name'), shelf, book_data.get('id'))
|
|
|
|
books[book.get_id()] = book
|
|
|
|
|
|
|
|
info("Getting info about Books not belonging to any shelf")
|
|
|
|
|
2022-06-22 11:52:36 +00:00
|
|
|
for book_data in api_get_dict('books').get('data'):
|
2022-06-22 13:52:37 +00:00
|
|
|
if book_data.get('id') in books.keys():
|
2022-05-13 18:44:05 +00:00
|
|
|
continue
|
|
|
|
book = Node(book_data.get('name'), None, book_data.get('id'))
|
|
|
|
info(f"Book \"{book.get_name()} has no shelf assigned.\"")
|
|
|
|
books[book.get_id()] = book
|
|
|
|
|
|
|
|
info("Getting info about Chapters")
|
|
|
|
|
2022-06-22 11:52:36 +00:00
|
|
|
for chapter_data in api_get_dict('chapters').get('data'):
|
2022-05-13 18:44:05 +00:00
|
|
|
chapter = Node(chapter_data.get('name'), books.get(chapter_data.get('book_id')), chapter_data.get('id'))
|
|
|
|
chapters[chapter.get_id()] = chapter
|
|
|
|
|
|
|
|
info("Getting info about Pages")
|
2022-06-22 17:24:15 +00:00
|
|
|
|
2022-06-22 11:52:36 +00:00
|
|
|
for page_data in api_get_dict('pages').get('data'):
|
2022-05-13 18:44:05 +00:00
|
|
|
parent_id = page_data.get('chapter_id')
|
2022-06-22 15:15:58 +00:00
|
|
|
if parent_id not in chapters.keys():
|
2022-05-13 18:44:05 +00:00
|
|
|
parent_id = page_data.get('book_id')
|
|
|
|
info(f"Page \"{page_data.get('name')}\" is not in any chapter, "
|
|
|
|
f"using Book \"{books.get(parent_id).get_name()}\" as a parent.")
|
|
|
|
page = Node(page_data.get('name'), books.get(parent_id), page_data.get('id'))
|
|
|
|
pages[page.get_id()] = page
|
2022-06-22 13:52:37 +00:00
|
|
|
pages_not_in_chapter[page.get_id()] = page
|
2022-05-13 18:44:05 +00:00
|
|
|
continue
|
|
|
|
|
|
|
|
page = Node(page_data.get('name'), chapters.get(parent_id), page_data.get('id'))
|
|
|
|
pages[page.get_id()] = page
|
|
|
|
|
2022-06-22 13:52:37 +00:00
|
|
|
files: list[Node] = []
|
|
|
|
export_pages_not_in_chapter: bool = False
|
|
|
|
|
|
|
|
for lvl in LEVEL_CHOICE:
|
|
|
|
if lvl == 'pages':
|
|
|
|
files = pages.values()
|
|
|
|
elif lvl == 'chapters':
|
|
|
|
files = chapters.values()
|
|
|
|
export_pages_not_in_chapter = True
|
|
|
|
elif lvl == 'books':
|
|
|
|
files = books.values()
|
|
|
|
export(files, lvl)
|
|
|
|
|
|
|
|
if export_pages_not_in_chapter:
|
|
|
|
info("Exporting pages that are not in chapter...")
|
|
|
|
export(pages_not_in_chapter.values(), 'pages')
|