Initial commit

This commit is contained in:
Maciej Lebiest 2022-05-13 20:44:05 +02:00
parent 879330a901
commit 029b5d920f
3 changed files with 181 additions and 0 deletions

2
.gitignore vendored
View file

@ -3,6 +3,8 @@ __pycache__/
*.py[cod]
*$py.class
.idea/
token.txt
# C extensions
*.so

View file

@ -1,2 +1,32 @@
# BookStack-Python-exporter
Customizable script for exporting notes from BookStack through API
This script allows you to export Pages in any format available in BookStack's API (can choose multiple formats at once).
Full example on how to use the script:
1. Clone the repo
2. next to the script place token.txt file containing token id and token secret in format: TOKEN_ID:TOKEN_SECRET
3. in the same directory run the command, specifying your app domain with https prefix:
```bash
python exporter.py -H https://wiki.example.com -f pdf,md,plaintext,html -t ./token.txt -p ./
```
Customization:
```text
-p PATH, --path PATH Path where exported files will be placed.
Default: .
-t TOKEN_FILE, --token-file TOKEN_FILE
File containing authorization token in format TOKEN_ID:TOKEN_SECRET
Default: ./token.txt
-H HOST, --host HOST Your domain with protocol prefix, example: https://example.com
Default: https://localhost
-f FORMATS, --formats FORMATS
Coma separated list of formats to use for export. Available ones: md,plaintext,pdf,html
Default: md
```
### TODO:
- choosing verbosity level through command line parameter
- choosing on what level should the notes be exported (Books, Chapters, Pages)
- (optional) choosing if update note file only if the last edit timestamp from API is later that the local file timestamp
- suggestions?

149
exporter.py Normal file
View file

@ -0,0 +1,149 @@
import argparse
import json
import logging
import os
from logging import info, error
from pathlib import Path
import requests
from requests import Response
logging.basicConfig(format='%(levelname)s :: %(message)s', level=logging.INFO)
# (formatName, fileExtension)
FORMATS: dict['str', 'str'] = {
'md': 'md',
'plaintext': 'txt',
'pdf': 'pdf',
'html': 'html'
}
parser = argparse.ArgumentParser(description='BookStack exporter')
parser.add_argument('-p', '--path', type=str, default='.',
help='Path where exported files will be placed.')
parser.add_argument('-t', '--token-file', type=str, default=f'.{os.path.sep}token.txt',
help='File containing authorization token in format TOKEN_ID:TOKEN_SECRET')
parser.add_argument('-H', '--host', type=str, default='https://localhost',
help='Your domain with protocol prefix, example: https://example.com')
parser.add_argument('-f', '--formats', type=str, default='md',
help=f'Coma separated list of formats to use for export.'
f' Available ones: {",".join([f for f in FORMATS.keys()])}')
args = parser.parse_args()
formats = args.formats.split(',')
API_PREFIX: str = f"{args.host.removesuffix(os.path.sep)}/api"
FS_PATH: str = args.path.removesuffix(os.path.sep)
with open(args.token_file, 'r') as f:
TOKEN: str = f.readline().removesuffix('\n')
HEADERS = {'Content-Type': 'application/json; charset=utf-8',
'Authorization': f"Token {TOKEN}"}
class Node:
def __init__(self, name: str, parent: ['Node', None], node_id: int):
self.__name: str = name
self.__parent: ['Node', None] = parent
self.__node_id = node_id
def get_name(self) -> str:
return self.__name
def get_parent(self) -> ['Node', None]:
return self.__parent
def set_parent(self, parent: 'Node'):
self.__parent = parent
def get_path(self) -> str:
if self.__parent is None:
return "."
return self.__parent.get_path() + os.path.sep + self.__parent.get_name()
def get_id(self) -> int:
return self.__node_id
shelves: dict[int, Node] = {}
books: dict[int, Node] = {}
chapters: dict[int, Node] = {}
pages: dict[int, Node] = {}
def make_dir(path: str):
path_obj = Path(path)
if path_obj.exists():
return
info(f"Creating dir {path}")
path_obj.mkdir(exist_ok=True, parents=True)
def api_get(path: str) -> dict:
response: Response = requests.get(f'{API_PREFIX}/{path}', headers=HEADERS)
if response.status_code == 403:
error("403 Forbidden, check your token!")
exit(response.status_code)
data: dict = json.loads(response.text)
return data
info("Getting info about Shelves and their Books")
for shelf_data in api_get('shelves').get('data'):
shelf = Node(shelf_data.get('name'), None, shelf_data.get('id'))
shelves[shelf.get_id()] = shelf
shelf_details = json.loads(requests.get(f'{API_PREFIX}/shelves/{shelf.get_id()}', headers=HEADERS).text)
if shelf_details.get('books') is None:
continue
for book_data in shelf_details.get('books'):
book = Node(book_data.get('name'), shelf, book_data.get('id'))
books[book.get_id()] = book
info("Getting info about Books not belonging to any shelf")
for book_data in api_get('books').get('data'):
if book_data.get('id') != 0:
continue
book = Node(book_data.get('name'), None, book_data.get('id'))
info(f"Book \"{book.get_name()} has no shelf assigned.\"")
books[book.get_id()] = book
info("Getting info about Chapters")
for chapter_data in api_get('chapters').get('data'):
chapter = Node(chapter_data.get('name'), books.get(chapter_data.get('book_id')), chapter_data.get('id'))
chapters[chapter.get_id()] = chapter
info("Getting info about Pages")
for page_data in api_get('pages').get('data'):
parent_id = page_data.get('chapter_id')
if parent_id == 0:
parent_id = page_data.get('book_id')
info(f"Page \"{page_data.get('name')}\" is not in any chapter, "
f"using Book \"{books.get(parent_id).get_name()}\" as a parent.")
page = Node(page_data.get('name'), books.get(parent_id), page_data.get('id'))
pages[page.get_id()] = page
continue
page = Node(page_data.get('name'), chapters.get(parent_id), page_data.get('id'))
pages[page.get_id()] = page
for page in pages.values():
make_dir(f"{FS_PATH}{os.path.sep}{page.get_path()}")
for frmt in formats:
path: str = f"{FS_PATH}{os.path.sep}{page.get_path()}{os.path.sep}{page.get_name()}.{FORMATS[frmt]}"
result: Response = requests.get(f'{API_PREFIX}/pages/{page.get_id()}/export/{frmt}', headers=HEADERS)
if os.path.exists(path):
info(f"Updating file with page \"{page.get_name()}.{FORMATS[frmt]}\"")
else:
info(f"Saving new file with page \"{page.get_name()}.{FORMATS[frmt]}\"")
with open(path, 'wb') as f:
f.write(result.content)