2022-05-13 18:44:05 +00:00
|
|
|
import argparse
|
|
|
|
import json
|
|
|
|
import logging
|
|
|
|
import os
|
2022-06-22 17:24:15 +00:00
|
|
|
from datetime import datetime
|
|
|
|
from logging import info, error, debug
|
2022-05-13 18:44:05 +00:00
|
|
|
from pathlib import Path
|
2023-02-08 10:13:47 +00:00
|
|
|
import sys
|
2023-04-16 08:33:29 +00:00
|
|
|
from typing import Dict, List, Union
|
2022-06-22 11:52:36 +00:00
|
|
|
from urllib.request import urlopen, Request
|
2022-06-23 15:14:45 +00:00
|
|
|
import urllib.parse
|
2023-12-31 15:20:27 +00:00
|
|
|
import base64
|
2022-05-13 18:44:05 +00:00
|
|
|
|
|
|
|
# (formatName, fileExtension)
|
2023-04-16 08:33:29 +00:00
|
|
|
FORMATS: Dict['str', 'str'] = {
|
2022-06-22 11:52:36 +00:00
|
|
|
'markdown': 'md',
|
2022-05-13 18:44:05 +00:00
|
|
|
'plaintext': 'txt',
|
|
|
|
'pdf': 'pdf',
|
|
|
|
'html': 'html'
|
|
|
|
}
|
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
LEVELS = ['pages', 'chapters', 'books']
|
2022-06-22 13:52:37 +00:00
|
|
|
|
2023-04-16 08:33:29 +00:00
|
|
|
LOG_LEVEL: Dict = {
|
2022-06-23 11:50:06 +00:00
|
|
|
'debug': logging.DEBUG,
|
|
|
|
'info': logging.INFO,
|
|
|
|
'warning': logging.WARNING,
|
|
|
|
'error': logging.ERROR
|
|
|
|
}
|
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
# Characters in filenames to be replaced with "_"
|
2023-04-16 08:33:29 +00:00
|
|
|
FORBIDDEN_CHARS: List[str] = ["/", "#"]
|
2023-02-08 10:13:47 +00:00
|
|
|
|
2022-05-13 18:44:05 +00:00
|
|
|
parser = argparse.ArgumentParser(description='BookStack exporter')
|
2023-02-08 10:13:47 +00:00
|
|
|
parser.add_argument('-p',
|
|
|
|
'--path',
|
|
|
|
type=str,
|
|
|
|
default='.',
|
2022-05-13 18:44:05 +00:00
|
|
|
help='Path where exported files will be placed.')
|
2023-02-08 10:13:47 +00:00
|
|
|
parser.add_argument(
|
|
|
|
'-t',
|
|
|
|
'--token-file',
|
|
|
|
type=str,
|
|
|
|
default=f'.{os.path.sep}token.txt',
|
|
|
|
help='File containing authorization token in format TOKEN_ID:TOKEN_SECRET')
|
|
|
|
parser.add_argument(
|
|
|
|
'-H',
|
|
|
|
'--host',
|
|
|
|
type=str,
|
|
|
|
default='https://localhost',
|
|
|
|
help='Your domain with protocol prefix, example: https://example.com')
|
|
|
|
parser.add_argument('-f',
|
|
|
|
'--formats',
|
|
|
|
type=str,
|
|
|
|
default=['markdown'],
|
|
|
|
nargs="+",
|
|
|
|
help='Space separated list of formats to use for export.',
|
|
|
|
choices=FORMATS.keys())
|
|
|
|
parser.add_argument('-c',
|
|
|
|
'--forbidden-chars',
|
|
|
|
type=str,
|
|
|
|
default=FORBIDDEN_CHARS,
|
|
|
|
nargs="+",
|
|
|
|
help='Space separated list of symbols to be replaced '
|
|
|
|
'with "_" in filenames.')
|
2023-04-16 08:33:29 +00:00
|
|
|
parser.add_argument('-u',
|
|
|
|
'--user-agent',
|
|
|
|
type=str,
|
|
|
|
default="BookStack exporter",
|
|
|
|
help='User agent header content. In situations'
|
|
|
|
' where requests are blocked because of bad client/'
|
|
|
|
'unrecognized web browser/etc (like with CloudFlare'
|
|
|
|
' tunnels), change to some typical '
|
|
|
|
'web browser user agent header.')
|
|
|
|
parser.add_argument('--additional-headers',
|
|
|
|
type=str,
|
|
|
|
nargs="+",
|
|
|
|
default=[],
|
|
|
|
help='List of arbitrary additional HTTP headers to be '
|
|
|
|
'sent with every HTTP request. They can override default'
|
2023-12-31 15:20:27 +00:00
|
|
|
' ones, including Authorization header. IMPORTANT: '
|
|
|
|
'these headers are also sent when downloading external '
|
|
|
|
'attachments! Don\'t put here any private data.'
|
|
|
|
'Example: -u "Header1: value1" "Header2: value2"')
|
2023-02-08 10:13:47 +00:00
|
|
|
parser.add_argument(
|
|
|
|
'-l',
|
|
|
|
'--level',
|
|
|
|
type=str,
|
|
|
|
default=['pages'],
|
|
|
|
nargs="+",
|
|
|
|
help="Space separated list of levels at which should be export "
|
|
|
|
"performed. ",
|
|
|
|
choices=LEVELS)
|
|
|
|
parser.add_argument(
|
|
|
|
'--force-update-files',
|
|
|
|
action='store_true',
|
|
|
|
help="Set this option to skip checking local files timestamps against "
|
|
|
|
"remote last edit timestamps. This will cause overwriting local files,"
|
|
|
|
" even if they seem to be already in newest version.")
|
2023-12-31 15:20:27 +00:00
|
|
|
parser.add_argument(
|
|
|
|
'--dont-export-attachments',
|
|
|
|
action='store_true',
|
|
|
|
help=
|
|
|
|
"Set this to prevent exporting attachments that were uploaded to BookStack."
|
|
|
|
)
|
|
|
|
parser.add_argument(
|
|
|
|
'--dont-export-external-attachments',
|
|
|
|
action='store_true',
|
|
|
|
help="Set this to prevent exporting external attachments (from links).")
|
2022-07-27 09:49:07 +00:00
|
|
|
parser.set_defaults(force_update_files=False)
|
2023-12-31 15:20:27 +00:00
|
|
|
parser.set_defaults(dont_export_attachments=False)
|
|
|
|
parser.set_defaults(dont_export_external_attachments=False)
|
2023-02-08 10:13:47 +00:00
|
|
|
parser.add_argument('-V',
|
|
|
|
'--log-level',
|
|
|
|
type=str,
|
|
|
|
default='info',
|
|
|
|
help='Set verbosity level.',
|
|
|
|
choices=LOG_LEVEL.keys())
|
2022-06-23 11:50:06 +00:00
|
|
|
|
2022-05-13 18:44:05 +00:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
2023-04-16 08:33:29 +00:00
|
|
|
|
|
|
|
def removesuffix(text, suffix):
|
|
|
|
"""Remove suffix from text if matched."""
|
|
|
|
if text.endswith(suffix):
|
|
|
|
return text[:len(text) - len(suffix)]
|
|
|
|
return text
|
|
|
|
|
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
logging.basicConfig(format='%(levelname)s :: %(message)s',
|
|
|
|
level=LOG_LEVEL.get(args.log_level))
|
2022-06-23 11:50:06 +00:00
|
|
|
|
2023-04-16 08:33:29 +00:00
|
|
|
formats: List[str] = args.formats
|
2023-02-08 10:13:47 +00:00
|
|
|
FORBIDDEN_CHARS = args.forbidden_chars
|
2022-07-27 09:49:07 +00:00
|
|
|
|
2022-06-22 11:52:36 +00:00
|
|
|
for frmt in formats:
|
2023-02-08 10:13:47 +00:00
|
|
|
if frmt not in FORMATS:
|
2022-06-22 17:24:15 +00:00
|
|
|
error("Unknown format name (NOT file extension), "
|
|
|
|
"check api docs for current version of your BookStack")
|
2023-02-08 10:13:47 +00:00
|
|
|
sys.exit(1)
|
2022-05-13 18:44:05 +00:00
|
|
|
|
2023-04-16 08:33:29 +00:00
|
|
|
API_PREFIX: str = f"{removesuffix(args.host, os.path.sep)}/api"
|
|
|
|
FS_PATH: str = removesuffix(args.path, os.path.sep)
|
|
|
|
LEVEL_CHOICE: List[str] = args.level
|
2022-06-22 13:52:37 +00:00
|
|
|
for lvl in LEVEL_CHOICE:
|
|
|
|
if lvl not in LEVELS:
|
2022-06-22 17:24:15 +00:00
|
|
|
error(f"Level {lvl} is not supported, can be only one of {LEVELS}")
|
2023-02-08 10:13:47 +00:00
|
|
|
sys.exit(1)
|
2022-05-13 18:44:05 +00:00
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
with open(args.token_file, 'r', encoding='utf-8') as f:
|
2023-04-16 08:33:29 +00:00
|
|
|
TOKEN: str = removesuffix(f.readline(), '\n')
|
2022-05-13 18:44:05 +00:00
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
HEADERS = {
|
|
|
|
'Content-Type': 'application/json; charset=utf-8',
|
2023-04-16 08:33:29 +00:00
|
|
|
'Authorization': f"Token {TOKEN}",
|
|
|
|
'User-Agent': args.user_agent
|
2023-02-08 10:13:47 +00:00
|
|
|
}
|
2023-12-31 15:20:27 +00:00
|
|
|
HEADERS_NO_TOKEN = {
|
|
|
|
'Content-Type': 'application/json; charset=utf-8',
|
|
|
|
'User-Agent': args.user_agent
|
|
|
|
}
|
|
|
|
|
2023-04-16 08:33:29 +00:00
|
|
|
for header in args.additional_headers:
|
|
|
|
values = header.split(':', 1)
|
|
|
|
if len(values) < 2:
|
|
|
|
raise ValueError(f"Improper HTTP header specification: {header}")
|
|
|
|
HEADERS[values[0]] = values[1]
|
2023-12-31 15:20:27 +00:00
|
|
|
HEADERS_NO_TOKEN[values[0]] = values[1]
|
2023-04-16 08:33:29 +00:00
|
|
|
|
2022-07-27 09:49:07 +00:00
|
|
|
SKIP_TIMESTAMPS: bool = args.force_update_files
|
2022-05-13 18:44:05 +00:00
|
|
|
|
|
|
|
|
|
|
|
class Node:
|
2023-02-08 10:13:47 +00:00
|
|
|
"""Clas representing any node in whole bookstack documents "tree"."""
|
|
|
|
|
|
|
|
def __init__(self, name: str, parent: Union['Node', None], node_id: int,
|
2022-07-26 15:30:08 +00:00
|
|
|
last_edit_timestamp: datetime):
|
2023-02-08 10:13:47 +00:00
|
|
|
for char in FORBIDDEN_CHARS:
|
|
|
|
name = name.replace(char, "_")
|
2022-05-13 18:44:05 +00:00
|
|
|
self.__name: str = name
|
2023-04-16 08:33:29 +00:00
|
|
|
self.__children: List['Node'] = []
|
2022-07-26 15:30:08 +00:00
|
|
|
|
|
|
|
self.__parent: Union['Node', None] = parent
|
|
|
|
if parent is not None:
|
|
|
|
parent.add_child(self)
|
|
|
|
|
|
|
|
self.__last_edit_timestamp: datetime = last_edit_timestamp
|
2022-05-13 18:44:05 +00:00
|
|
|
self.__node_id = node_id
|
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
@property
|
|
|
|
def name(self) -> str:
|
2023-04-16 08:33:29 +00:00
|
|
|
"""Return name of this Shelf/Book/Chapter/Page."""
|
2022-05-13 18:44:05 +00:00
|
|
|
return self.__name
|
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
@property
|
|
|
|
def parent(self) -> Union['Node', None]:
|
2023-04-16 08:33:29 +00:00
|
|
|
"""Return parent Node or None if there isn't any."""
|
2022-05-13 18:44:05 +00:00
|
|
|
return self.__parent
|
|
|
|
|
2022-07-26 15:30:08 +00:00
|
|
|
def changed_since(self, timestamp: datetime) -> int:
|
|
|
|
"""
|
2023-04-16 08:33:29 +00:00
|
|
|
Check if remote version have changed after given timestamp,
|
|
|
|
including its children
|
2022-07-26 15:30:08 +00:00
|
|
|
:param timestamp:
|
|
|
|
:return: amount of changed documents at level of this document Node
|
|
|
|
"""
|
|
|
|
result: int = 0
|
|
|
|
if self.__last_edit_timestamp > timestamp:
|
|
|
|
result += 1
|
|
|
|
for child in self.__children:
|
|
|
|
result += child.changed_since(timestamp)
|
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
def get_last_edit_timestamp(self) -> datetime:
|
|
|
|
return self.__last_edit_timestamp
|
|
|
|
|
2022-05-13 18:44:05 +00:00
|
|
|
def set_parent(self, parent: 'Node'):
|
|
|
|
self.__parent = parent
|
2022-07-26 15:30:08 +00:00
|
|
|
parent.add_child(self)
|
|
|
|
|
|
|
|
def add_child(self, child: 'Node'):
|
|
|
|
self.__children.append(child)
|
2022-05-13 18:44:05 +00:00
|
|
|
|
|
|
|
def get_path(self) -> str:
|
|
|
|
if self.__parent is None:
|
|
|
|
return "."
|
2023-02-08 10:13:47 +00:00
|
|
|
return self.__parent.get_path() + os.path.sep + self.__parent.name
|
2022-05-13 18:44:05 +00:00
|
|
|
|
|
|
|
def get_id(self) -> int:
|
|
|
|
return self.__node_id
|
|
|
|
|
|
|
|
|
2023-04-16 08:33:29 +00:00
|
|
|
shelves: Dict[int, Node] = {}
|
|
|
|
books: Dict[int, Node] = {}
|
|
|
|
chapters: Dict[int, Node] = {}
|
|
|
|
pages: Dict[int, Node] = {}
|
|
|
|
pages_not_in_chapter: Dict[int, Node] = {}
|
2023-12-31 15:20:27 +00:00
|
|
|
attachments: Dict[int, Node] = {}
|
2022-05-13 18:44:05 +00:00
|
|
|
|
|
|
|
|
2022-07-26 15:30:08 +00:00
|
|
|
def api_timestamp_string_to_datetime(timestamp: str) -> datetime:
|
|
|
|
return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')
|
|
|
|
|
|
|
|
|
2022-05-13 18:44:05 +00:00
|
|
|
def make_dir(path: str):
|
|
|
|
path_obj = Path(path)
|
|
|
|
if path_obj.exists():
|
|
|
|
return
|
|
|
|
info(f"Creating dir {path}")
|
|
|
|
path_obj.mkdir(exist_ok=True, parents=True)
|
|
|
|
|
|
|
|
|
2022-06-23 15:14:45 +00:00
|
|
|
def api_get_bytes(path: str, **kwargs) -> bytes:
|
|
|
|
request_path: str = f'{API_PREFIX}/{path}'
|
|
|
|
|
|
|
|
if len(kwargs) > 0:
|
|
|
|
params: str = urllib.parse.urlencode(kwargs)
|
|
|
|
request_path += f"?{params}"
|
|
|
|
|
|
|
|
debug(f"Making http request: {request_path}")
|
|
|
|
|
|
|
|
request: Request = Request(request_path, headers=HEADERS)
|
2022-06-22 11:52:36 +00:00
|
|
|
|
|
|
|
with urlopen(request) as response:
|
|
|
|
if response.status == 403:
|
|
|
|
error("403 Forbidden, check your token!")
|
2023-02-08 10:13:47 +00:00
|
|
|
sys.exit(response.status)
|
2022-06-22 11:52:36 +00:00
|
|
|
|
|
|
|
return response.read()
|
2022-05-13 18:44:05 +00:00
|
|
|
|
|
|
|
|
2022-06-22 11:52:36 +00:00
|
|
|
def api_get_dict(path: str) -> dict:
|
2023-02-08 10:13:47 +00:00
|
|
|
"""Make api request at specified path and return result as dict."""
|
2022-06-23 12:34:28 +00:00
|
|
|
data = api_get_bytes(path).decode()
|
|
|
|
return json.loads(data)
|
2022-05-13 18:44:05 +00:00
|
|
|
|
|
|
|
|
2022-06-23 15:14:45 +00:00
|
|
|
def api_get_listing(path: str) -> list:
|
2023-02-08 10:13:47 +00:00
|
|
|
"""Retrieve whole lists through api.
|
|
|
|
|
|
|
|
Request for another 50 until have collected "total" amount.
|
2022-06-23 15:14:45 +00:00
|
|
|
:param path:
|
|
|
|
:return:
|
|
|
|
"""
|
|
|
|
count: int = 50
|
|
|
|
total: int = count
|
|
|
|
|
|
|
|
result: list = []
|
|
|
|
|
|
|
|
while total > len(result):
|
2023-02-08 10:13:47 +00:00
|
|
|
data: dict = json.loads(
|
|
|
|
api_get_bytes(path, count=count, offset=len(result)))
|
|
|
|
total = data['total']
|
|
|
|
result += data['data']
|
2022-06-23 15:14:45 +00:00
|
|
|
|
2023-12-31 15:20:27 +00:00
|
|
|
debug(f"API listing got {len(result)} items out of maximum {count}")
|
2022-06-23 15:14:45 +00:00
|
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
2022-07-26 15:30:08 +00:00
|
|
|
def check_if_update_needed(file_path: str, document: Node) -> bool:
|
2023-02-08 10:13:47 +00:00
|
|
|
"""Check if a Node need updating on disk, according to timestamps."""
|
2022-07-27 09:49:07 +00:00
|
|
|
if SKIP_TIMESTAMPS:
|
|
|
|
return True
|
|
|
|
debug(f"Checking for update for file {file_path}")
|
|
|
|
|
2022-07-26 15:30:08 +00:00
|
|
|
if not os.path.exists(file_path):
|
|
|
|
debug(f"Document {file_path} is missing on disk, update needed.")
|
2022-06-23 11:50:06 +00:00
|
|
|
return True
|
2023-12-31 15:20:27 +00:00
|
|
|
local_last_edit: datetime = datetime.fromtimestamp(
|
2023-02-08 10:13:47 +00:00
|
|
|
os.path.getmtime(file_path))
|
2022-07-26 15:30:08 +00:00
|
|
|
remote_last_edit: datetime = document.get_last_edit_timestamp()
|
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
debug("Local file creation timestamp: "
|
|
|
|
f"{local_last_edit.date()} {local_last_edit.time()}, "
|
|
|
|
"remote edit timestamp: "
|
|
|
|
f"{remote_last_edit.date()} {remote_last_edit.time()}")
|
2022-07-26 15:30:08 +00:00
|
|
|
changes: int = document.changed_since(local_last_edit)
|
2022-06-22 17:24:15 +00:00
|
|
|
|
2022-07-26 15:30:08 +00:00
|
|
|
if changes > 0:
|
2023-02-08 10:13:47 +00:00
|
|
|
info(f"Document \"{file_path}\" consists of {changes} "
|
|
|
|
"outdated documents, update needed.")
|
2022-07-26 15:30:08 +00:00
|
|
|
return True
|
2022-06-22 17:24:15 +00:00
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
debug(f"Document \"{file_path}\" consists of {changes} "
|
|
|
|
"outdated documents, skipping updating.")
|
2022-07-26 15:30:08 +00:00
|
|
|
return False
|
2022-06-22 13:52:37 +00:00
|
|
|
|
2022-07-26 15:30:08 +00:00
|
|
|
|
2023-12-31 15:20:27 +00:00
|
|
|
def export_doc(documents: List[Node], level: str):
|
|
|
|
"""Save document-like Nodes to files."""
|
2022-07-26 15:30:08 +00:00
|
|
|
for document in documents:
|
|
|
|
make_dir(f"{FS_PATH}{os.path.sep}{document.get_path()}")
|
2022-06-22 17:24:15 +00:00
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
for v_format in formats:
|
|
|
|
path: str = f"{FS_PATH}{os.path.sep}{document.get_path()}" + \
|
|
|
|
f"{os.path.sep}{document.name}.{FORMATS[v_format]}"
|
2022-07-27 09:49:07 +00:00
|
|
|
|
2022-07-26 15:30:08 +00:00
|
|
|
if not check_if_update_needed(path, document):
|
2022-06-22 17:24:15 +00:00
|
|
|
continue
|
2022-06-22 13:52:37 +00:00
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
data: bytes = api_get_bytes(
|
|
|
|
f'{level}/{document.get_id()}/export/{v_format}')
|
|
|
|
with open(path, 'wb') as file:
|
2022-06-22 13:52:37 +00:00
|
|
|
info(f"Saving {path}")
|
2023-02-08 10:13:47 +00:00
|
|
|
file.write(data)
|
2022-06-22 13:52:37 +00:00
|
|
|
|
|
|
|
|
2023-12-31 15:20:27 +00:00
|
|
|
def export_attachments(attachments: List[Node]):
|
|
|
|
"""Save attachment Nodes to files."""
|
|
|
|
for attachment in attachments:
|
|
|
|
|
|
|
|
base_path = attachment.get_path()
|
|
|
|
if attachment.parent is None:
|
|
|
|
base_path = f'__ATTACHMENTS_FROM_DELETED_PAGES__{os.path.sep}{base_path}'
|
|
|
|
|
|
|
|
make_dir(f"{FS_PATH}{os.path.sep}{base_path}")
|
|
|
|
|
|
|
|
path: str = f"{FS_PATH}{os.path.sep}{base_path}" + \
|
|
|
|
f"{os.path.sep}{attachment.name}"
|
|
|
|
|
|
|
|
if not check_if_update_needed(path, attachment):
|
|
|
|
continue
|
|
|
|
|
|
|
|
data = api_get_bytes(f'attachments/{attachment.get_id()}')
|
|
|
|
data = json.loads(data)
|
|
|
|
content = data['content']
|
|
|
|
content_url = urllib.parse.urlparse(content)
|
|
|
|
|
|
|
|
if content_url.scheme:
|
|
|
|
if args.dont_export_external_attachments:
|
|
|
|
continue
|
|
|
|
info(f"Downloading attachment from url: {content_url.geturl()}")
|
|
|
|
request: Request = Request(content_url.geturl(),
|
|
|
|
headers=HEADERS_NO_TOKEN)
|
|
|
|
|
|
|
|
with urlopen(request) as response:
|
|
|
|
if response.status >= 300:
|
|
|
|
error(
|
|
|
|
"Could not download link-type attachment from "
|
|
|
|
f"'{content_url.geturl()}, got code {response.status}'!"
|
|
|
|
)
|
|
|
|
sys.exit(response.status)
|
|
|
|
|
|
|
|
with open(path, 'wb') as file:
|
|
|
|
info(f"Saving {path}")
|
|
|
|
file.write(response.read())
|
|
|
|
else:
|
|
|
|
with open(path, 'wb') as file:
|
|
|
|
info(f"Saving {path}")
|
|
|
|
file.write(base64.b64decode(content))
|
|
|
|
|
|
|
|
|
|
|
|
#########################
|
|
|
|
# Gathering data from api
|
|
|
|
#########################
|
|
|
|
|
2022-05-13 18:44:05 +00:00
|
|
|
info("Getting info about Shelves and their Books")
|
|
|
|
|
2022-06-23 15:14:45 +00:00
|
|
|
for shelf_data in api_get_listing('shelves'):
|
2022-07-26 15:30:08 +00:00
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
last_edit_ts: datetime = api_timestamp_string_to_datetime(
|
|
|
|
shelf_data['updated_at'])
|
|
|
|
shelf = Node(shelf_data.get('name'), None, shelf_data.get('id'),
|
|
|
|
last_edit_ts)
|
2022-07-26 15:30:08 +00:00
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
debug(f"Shelf: \"{shelf.name}\", ID: {shelf.get_id()}")
|
2022-05-13 18:44:05 +00:00
|
|
|
shelves[shelf.get_id()] = shelf
|
|
|
|
|
2022-06-22 11:52:36 +00:00
|
|
|
shelf_details = api_get_dict(f'shelves/{shelf.get_id()}')
|
2022-05-13 18:44:05 +00:00
|
|
|
|
|
|
|
if shelf_details.get('books') is None:
|
|
|
|
continue
|
2023-02-08 10:13:47 +00:00
|
|
|
for book_data in shelf_details['books']:
|
2022-07-26 15:30:08 +00:00
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
last_edit_ts: datetime = api_timestamp_string_to_datetime(
|
|
|
|
book_data['updated_at'])
|
|
|
|
book = Node(book_data.get('name'), shelf, book_data.get('id'),
|
|
|
|
last_edit_ts)
|
|
|
|
debug(f"Book: \"{book.name}\", ID: {book.get_id()}")
|
2022-05-13 18:44:05 +00:00
|
|
|
books[book.get_id()] = book
|
|
|
|
|
|
|
|
info("Getting info about Books not belonging to any shelf")
|
|
|
|
|
2022-06-23 15:14:45 +00:00
|
|
|
for book_data in api_get_listing('books'):
|
2023-02-08 10:13:47 +00:00
|
|
|
if book_data.get('id') in books:
|
2022-05-13 18:44:05 +00:00
|
|
|
continue
|
2022-07-26 15:30:08 +00:00
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
last_edit_ts: datetime = api_timestamp_string_to_datetime(
|
|
|
|
book_data['updated_at'])
|
|
|
|
book = Node(book_data.get('name'), None, book_data.get('id'), last_edit_ts)
|
2022-07-26 15:30:08 +00:00
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
debug(f"Book: \"{book.name}\", ID: {book.get_id()}, "
|
|
|
|
f"last edit: {book.get_last_edit_timestamp()}")
|
|
|
|
info(f"Book \"{book.name} has no shelf assigned.\"")
|
2022-05-13 18:44:05 +00:00
|
|
|
books[book.get_id()] = book
|
|
|
|
|
|
|
|
info("Getting info about Chapters")
|
|
|
|
|
2022-06-23 15:14:45 +00:00
|
|
|
for chapter_data in api_get_listing('chapters'):
|
2023-02-08 10:13:47 +00:00
|
|
|
last_edit_ts: datetime = api_timestamp_string_to_datetime(
|
|
|
|
chapter_data['updated_at'])
|
2022-07-26 15:30:08 +00:00
|
|
|
chapter = Node(chapter_data.get('name'),
|
|
|
|
books.get(chapter_data.get('book_id')),
|
2023-02-08 10:13:47 +00:00
|
|
|
chapter_data.get('id'), last_edit_ts)
|
|
|
|
debug(f"Chapter: \"{chapter.name}\", ID: {chapter.get_id()},"
|
|
|
|
f" last edit: {chapter.get_last_edit_timestamp()}")
|
2022-05-13 18:44:05 +00:00
|
|
|
chapters[chapter.get_id()] = chapter
|
|
|
|
|
|
|
|
info("Getting info about Pages")
|
2022-06-22 17:24:15 +00:00
|
|
|
|
2022-06-23 15:14:45 +00:00
|
|
|
for page_data in api_get_listing('pages'):
|
2022-05-13 18:44:05 +00:00
|
|
|
parent_id = page_data.get('chapter_id')
|
2022-06-23 15:14:45 +00:00
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
last_edit_ts: datetime = api_timestamp_string_to_datetime(
|
|
|
|
page_data['updated_at'])
|
2022-07-26 15:30:08 +00:00
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
if parent_id not in chapters:
|
|
|
|
parent = books[page_data['book_id']]
|
|
|
|
page = Node(page_data.get('name'), parent, page_data.get('id'),
|
|
|
|
last_edit_ts)
|
2022-07-26 15:30:08 +00:00
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
info(f"Page \"{page.name}\" is not in any chapter, "
|
|
|
|
f"using Book \"{parent.name}\" as a parent.")
|
2022-07-26 15:30:08 +00:00
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
debug(f"Page: \"{page.name}\", ID: {page.get_id()},"
|
|
|
|
f" last edit: {page.get_last_edit_timestamp()}")
|
2022-05-13 18:44:05 +00:00
|
|
|
pages[page.get_id()] = page
|
2022-06-22 13:52:37 +00:00
|
|
|
pages_not_in_chapter[page.get_id()] = page
|
2022-05-13 18:44:05 +00:00
|
|
|
continue
|
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
page = Node(page_data.get('name'), chapters.get(parent_id),
|
|
|
|
page_data.get('id'), last_edit_ts)
|
|
|
|
debug(f"Page: \"{page.name}\", ID: {page.get_id()}, "
|
|
|
|
f"last edit: {page.get_last_edit_timestamp()}")
|
2022-05-13 18:44:05 +00:00
|
|
|
pages[page.get_id()] = page
|
|
|
|
|
2023-12-31 15:20:27 +00:00
|
|
|
if not args.dont_export_attachments:
|
|
|
|
info("Getting info about Attachments.")
|
|
|
|
|
|
|
|
for attachment_data in api_get_listing('attachments'):
|
|
|
|
last_edit_ts: datetime = api_timestamp_string_to_datetime(
|
|
|
|
attachment_data['updated_at'])
|
|
|
|
all_pages = {}
|
|
|
|
all_pages.update(pages)
|
|
|
|
all_pages.update(pages_not_in_chapter)
|
|
|
|
attachment = Node(attachment_data.get('name'),
|
|
|
|
all_pages.get(attachment_data.get('uploaded_to')),
|
|
|
|
attachment_data.get('id'), last_edit_ts)
|
|
|
|
debug(f"Attachment: \"{attachment.name}\", ID: {attachment.get_id()},"
|
|
|
|
f" last edit: {attachment.get_last_edit_timestamp()}")
|
|
|
|
attachments[attachment.get_id()] = attachment
|
|
|
|
|
|
|
|
#########################
|
|
|
|
# Exporting data from api
|
|
|
|
#########################
|
|
|
|
|
2023-04-16 08:33:29 +00:00
|
|
|
files: List[Node] = []
|
2023-02-08 10:13:47 +00:00
|
|
|
EXPORT_PAGES_NOT_IN_CHAPTER: bool = False
|
2022-06-22 13:52:37 +00:00
|
|
|
|
|
|
|
for lvl in LEVEL_CHOICE:
|
|
|
|
if lvl == 'pages':
|
2023-02-08 10:13:47 +00:00
|
|
|
files = list(pages.values())
|
2022-06-22 13:52:37 +00:00
|
|
|
elif lvl == 'chapters':
|
2023-02-08 10:13:47 +00:00
|
|
|
files = list(chapters.values())
|
|
|
|
EXPORT_PAGES_NOT_IN_CHAPTER = True
|
2022-06-22 13:52:37 +00:00
|
|
|
elif lvl == 'books':
|
2023-02-08 10:13:47 +00:00
|
|
|
files = list(books.values())
|
2022-06-23 15:14:45 +00:00
|
|
|
|
2023-12-31 15:20:27 +00:00
|
|
|
export_doc(files, lvl)
|
2022-06-22 13:52:37 +00:00
|
|
|
|
2023-02-08 10:13:47 +00:00
|
|
|
if EXPORT_PAGES_NOT_IN_CHAPTER:
|
2022-06-22 13:52:37 +00:00
|
|
|
info("Exporting pages that are not in chapter...")
|
2023-12-31 15:20:27 +00:00
|
|
|
export_doc(list(pages_not_in_chapter.values()), 'pages')
|
|
|
|
|
|
|
|
if not args.dont_export_attachments:
|
|
|
|
export_attachments(list(attachments.values()))
|
|
|
|
|
|
|
|
info("Finished")
|
|
|
|
sys.exit(0)
|