Add attachments exporting, fix for #4
This commit is contained in:
parent
acf8b71e5b
commit
cf1bb98dbd
2 changed files with 136 additions and 22 deletions
43
README.md
43
README.md
|
@ -2,7 +2,7 @@
|
||||||
Customizable script for exporting notes from BookStack through API
|
Customizable script for exporting notes from BookStack through API
|
||||||
|
|
||||||
#### Features:
|
#### Features:
|
||||||
- export keeping the tree structure by making folders from Shelves, Books and Chapters
|
- export keeping the tree structure by making folders from Shelves, Books, Chapters and attachments (including attachments from external links)
|
||||||
- export multiple formats at once
|
- export multiple formats at once
|
||||||
- export at multiple levels at once (export Books or/and Chapters or/and Pages as files)
|
- export at multiple levels at once (export Books or/and Chapters or/and Pages as files)
|
||||||
- choose if local files should be updated only if their edit timestamp is older than remote document last edit, or timestamps should be ignored and files will always be overwritten with the newest version
|
- choose if local files should be updated only if their edit timestamp is older than remote document last edit, or timestamps should be ignored and files will always be overwritten with the newest version
|
||||||
|
@ -35,31 +35,47 @@ python exporter.py \
|
||||||
|
|
||||||
Customization:
|
Customization:
|
||||||
```text
|
```text
|
||||||
|
usage: exporter.py [-h] [-p PATH] [-t TOKEN_FILE] [-H HOST]
|
||||||
|
[-f {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...]]
|
||||||
|
[-c FORBIDDEN_CHARS [FORBIDDEN_CHARS ...]] [-u USER_AGENT]
|
||||||
|
[--additional-headers ADDITIONAL_HEADERS [ADDITIONAL_HEADERS ...]]
|
||||||
|
[-l {pages,chapters,books} [{pages,chapters,books} ...]]
|
||||||
|
[--force-update-files] [--dont-export-attachments]
|
||||||
|
[--dont-export-external-attachments] [-V {debug,info,warning,error}]
|
||||||
|
|
||||||
|
BookStack exporter
|
||||||
|
|
||||||
options:
|
options:
|
||||||
-h, --help show this help message and exit
|
-h, --help show this help message and exit
|
||||||
-p PATH, --path PATH Path where exported files will be placed.
|
-p PATH, --path PATH Path where exported files will be placed.
|
||||||
-t TOKEN_FILE, --token-file TOKEN_FILE
|
-t TOKEN_FILE, --token-file TOKEN_FILE
|
||||||
File containing authorization token in format TOKEN_ID:TOKEN_SECRET
|
File containing authorization token in format TOKEN_ID:TOKEN_SECRET
|
||||||
-H HOST, --host HOST Your domain with protocol prefix, example: https://example.com
|
-H HOST, --host HOST Your domain with protocol prefix, example: https://example.com
|
||||||
-f {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...],
|
-f {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...], --formats {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...]
|
||||||
--formats {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...]
|
|
||||||
Space separated list of formats to use for export.
|
Space separated list of formats to use for export.
|
||||||
-c FORBIDDEN_CHARS [FORBIDDEN_CHARS ...], --forbidden-chars FORBIDDEN_CHARS [FORBIDDEN_CHARS ...]
|
-c FORBIDDEN_CHARS [FORBIDDEN_CHARS ...], --forbidden-chars FORBIDDEN_CHARS [FORBIDDEN_CHARS ...]
|
||||||
Space separated list of symbols to be replaced with "_" in filenames.
|
Space separated list of symbols to be replaced with "_" in filenames.
|
||||||
-u USER_AGENT, --user-agent USER_AGENT
|
-u USER_AGENT, --user-agent USER_AGENT
|
||||||
User agent header content. In situations where requests are blocked
|
User agent header content. In situations where requests are blocked
|
||||||
because of bad client/unrecognized web browser/etc (like with CloudFlare tunnels),
|
because of bad client/unrecognized web browser/etc (like with
|
||||||
change to some typical web browser user agent header.
|
CloudFlare tunnels), change to some typical web browser user agent
|
||||||
|
header.
|
||||||
--additional-headers ADDITIONAL_HEADERS [ADDITIONAL_HEADERS ...]
|
--additional-headers ADDITIONAL_HEADERS [ADDITIONAL_HEADERS ...]
|
||||||
List of arbitrary additional HTTP headers to be sent with every HTTP request.
|
List of arbitrary additional HTTP headers to be sent with every HTTP
|
||||||
They can override default ones, including Authorization header.
|
request. They can override default ones, including Authorization
|
||||||
Example: -u "Header1: value1" "Header2": value2
|
header. IMPORTANT: these headers are also sent when downloading
|
||||||
-l {pages,chapters,books} [{pages,chapters,books} ...],
|
external attachments! Don't put here any private data.Example: -u
|
||||||
--level {pages,chapters,books} [{pages,chapters,books} ...]
|
"Header1: value1" "Header2: value2"
|
||||||
|
-l {pages,chapters,books} [{pages,chapters,books} ...], --level {pages,chapters,books} [{pages,chapters,books} ...]
|
||||||
Space separated list of levels at which should be export performed.
|
Space separated list of levels at which should be export performed.
|
||||||
--force-update-files Set this option to skip checking local files timestamps against
|
--force-update-files Set this option to skip checking local files timestamps against remote
|
||||||
remote last edit timestamps. This will cause overwriting local files,
|
last edit timestamps. This will cause overwriting local files, even if
|
||||||
even if they seem to be already in newest version.
|
they seem to be already in newest version.
|
||||||
|
--dont-export-attachments
|
||||||
|
Set this to prevent exporting attachments that were uploaded to
|
||||||
|
BookStack.
|
||||||
|
--dont-export-external-attachments
|
||||||
|
Set this to prevent exporting external attachments (from links).
|
||||||
-V {debug,info,warning,error}, --log-level {debug,info,warning,error}
|
-V {debug,info,warning,error}, --log-level {debug,info,warning,error}
|
||||||
Set verbosity level.
|
Set verbosity level.
|
||||||
```
|
```
|
||||||
|
@ -68,4 +84,5 @@ options:
|
||||||
- [x] ~~choosing verbosity level through command line parameter~~ Done
|
- [x] ~~choosing verbosity level through command line parameter~~ Done
|
||||||
- [x] ~~choosing on what level should the notes be exported (Books, Chapters, Pages)~~ Done
|
- [x] ~~choosing on what level should the notes be exported (Books, Chapters, Pages)~~ Done
|
||||||
- [x] ~~choosing if update note file only if the last edit timestamp from API is later that the local file timestamp~~ Done
|
- [x] ~~choosing if update note file only if the last edit timestamp from API is later that the local file timestamp~~ Done
|
||||||
|
- [x] ~~exporting attachments~~
|
||||||
- [ ] suggestions?
|
- [ ] suggestions?
|
||||||
|
|
113
exporter.py
113
exporter.py
|
@ -9,6 +9,7 @@ import sys
|
||||||
from typing import Dict, List, Union
|
from typing import Dict, List, Union
|
||||||
from urllib.request import urlopen, Request
|
from urllib.request import urlopen, Request
|
||||||
import urllib.parse
|
import urllib.parse
|
||||||
|
import base64
|
||||||
|
|
||||||
# (formatName, fileExtension)
|
# (formatName, fileExtension)
|
||||||
FORMATS: Dict['str', 'str'] = {
|
FORMATS: Dict['str', 'str'] = {
|
||||||
|
@ -77,8 +78,10 @@ parser.add_argument('--additional-headers',
|
||||||
default=[],
|
default=[],
|
||||||
help='List of arbitrary additional HTTP headers to be '
|
help='List of arbitrary additional HTTP headers to be '
|
||||||
'sent with every HTTP request. They can override default'
|
'sent with every HTTP request. They can override default'
|
||||||
' ones, including Authorization header. '
|
' ones, including Authorization header. IMPORTANT: '
|
||||||
'Example: -u "Header1: value1" "Header2": value2')
|
'these headers are also sent when downloading external '
|
||||||
|
'attachments! Don\'t put here any private data.'
|
||||||
|
'Example: -u "Header1: value1" "Header2: value2"')
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
'-l',
|
'-l',
|
||||||
'--level',
|
'--level',
|
||||||
|
@ -94,7 +97,19 @@ parser.add_argument(
|
||||||
help="Set this option to skip checking local files timestamps against "
|
help="Set this option to skip checking local files timestamps against "
|
||||||
"remote last edit timestamps. This will cause overwriting local files,"
|
"remote last edit timestamps. This will cause overwriting local files,"
|
||||||
" even if they seem to be already in newest version.")
|
" even if they seem to be already in newest version.")
|
||||||
|
parser.add_argument(
|
||||||
|
'--dont-export-attachments',
|
||||||
|
action='store_true',
|
||||||
|
help=
|
||||||
|
"Set this to prevent exporting attachments that were uploaded to BookStack."
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
'--dont-export-external-attachments',
|
||||||
|
action='store_true',
|
||||||
|
help="Set this to prevent exporting external attachments (from links).")
|
||||||
parser.set_defaults(force_update_files=False)
|
parser.set_defaults(force_update_files=False)
|
||||||
|
parser.set_defaults(dont_export_attachments=False)
|
||||||
|
parser.set_defaults(dont_export_external_attachments=False)
|
||||||
parser.add_argument('-V',
|
parser.add_argument('-V',
|
||||||
'--log-level',
|
'--log-level',
|
||||||
type=str,
|
type=str,
|
||||||
|
@ -140,11 +155,17 @@ HEADERS = {
|
||||||
'Authorization': f"Token {TOKEN}",
|
'Authorization': f"Token {TOKEN}",
|
||||||
'User-Agent': args.user_agent
|
'User-Agent': args.user_agent
|
||||||
}
|
}
|
||||||
|
HEADERS_NO_TOKEN = {
|
||||||
|
'Content-Type': 'application/json; charset=utf-8',
|
||||||
|
'User-Agent': args.user_agent
|
||||||
|
}
|
||||||
|
|
||||||
for header in args.additional_headers:
|
for header in args.additional_headers:
|
||||||
values = header.split(':', 1)
|
values = header.split(':', 1)
|
||||||
if len(values) < 2:
|
if len(values) < 2:
|
||||||
raise ValueError(f"Improper HTTP header specification: {header}")
|
raise ValueError(f"Improper HTTP header specification: {header}")
|
||||||
HEADERS[values[0]] = values[1]
|
HEADERS[values[0]] = values[1]
|
||||||
|
HEADERS_NO_TOKEN[values[0]] = values[1]
|
||||||
|
|
||||||
SKIP_TIMESTAMPS: bool = args.force_update_files
|
SKIP_TIMESTAMPS: bool = args.force_update_files
|
||||||
|
|
||||||
|
@ -215,6 +236,7 @@ books: Dict[int, Node] = {}
|
||||||
chapters: Dict[int, Node] = {}
|
chapters: Dict[int, Node] = {}
|
||||||
pages: Dict[int, Node] = {}
|
pages: Dict[int, Node] = {}
|
||||||
pages_not_in_chapter: Dict[int, Node] = {}
|
pages_not_in_chapter: Dict[int, Node] = {}
|
||||||
|
attachments: Dict[int, Node] = {}
|
||||||
|
|
||||||
|
|
||||||
def api_timestamp_string_to_datetime(timestamp: str) -> datetime:
|
def api_timestamp_string_to_datetime(timestamp: str) -> datetime:
|
||||||
|
@ -272,7 +294,7 @@ def api_get_listing(path: str) -> list:
|
||||||
total = data['total']
|
total = data['total']
|
||||||
result += data['data']
|
result += data['data']
|
||||||
|
|
||||||
debug(f"API listing got {total} items out of maximum {count}")
|
debug(f"API listing got {len(result)} items out of maximum {count}")
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
|
@ -286,7 +308,7 @@ def check_if_update_needed(file_path: str, document: Node) -> bool:
|
||||||
if not os.path.exists(file_path):
|
if not os.path.exists(file_path):
|
||||||
debug(f"Document {file_path} is missing on disk, update needed.")
|
debug(f"Document {file_path} is missing on disk, update needed.")
|
||||||
return True
|
return True
|
||||||
local_last_edit: datetime = datetime.utcfromtimestamp(
|
local_last_edit: datetime = datetime.fromtimestamp(
|
||||||
os.path.getmtime(file_path))
|
os.path.getmtime(file_path))
|
||||||
remote_last_edit: datetime = document.get_last_edit_timestamp()
|
remote_last_edit: datetime = document.get_last_edit_timestamp()
|
||||||
|
|
||||||
|
@ -306,8 +328,8 @@ def check_if_update_needed(file_path: str, document: Node) -> bool:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def export(documents: List[Node], level: str):
|
def export_doc(documents: List[Node], level: str):
|
||||||
"""Save Node to file."""
|
"""Save document-like Nodes to files."""
|
||||||
for document in documents:
|
for document in documents:
|
||||||
make_dir(f"{FS_PATH}{os.path.sep}{document.get_path()}")
|
make_dir(f"{FS_PATH}{os.path.sep}{document.get_path()}")
|
||||||
|
|
||||||
|
@ -325,6 +347,55 @@ def export(documents: List[Node], level: str):
|
||||||
file.write(data)
|
file.write(data)
|
||||||
|
|
||||||
|
|
||||||
|
def export_attachments(attachments: List[Node]):
|
||||||
|
"""Save attachment Nodes to files."""
|
||||||
|
for attachment in attachments:
|
||||||
|
|
||||||
|
base_path = attachment.get_path()
|
||||||
|
if attachment.parent is None:
|
||||||
|
base_path = f'__ATTACHMENTS_FROM_DELETED_PAGES__{os.path.sep}{base_path}'
|
||||||
|
|
||||||
|
make_dir(f"{FS_PATH}{os.path.sep}{base_path}")
|
||||||
|
|
||||||
|
path: str = f"{FS_PATH}{os.path.sep}{base_path}" + \
|
||||||
|
f"{os.path.sep}{attachment.name}"
|
||||||
|
|
||||||
|
if not check_if_update_needed(path, attachment):
|
||||||
|
continue
|
||||||
|
|
||||||
|
data = api_get_bytes(f'attachments/{attachment.get_id()}')
|
||||||
|
data = json.loads(data)
|
||||||
|
content = data['content']
|
||||||
|
content_url = urllib.parse.urlparse(content)
|
||||||
|
|
||||||
|
if content_url.scheme:
|
||||||
|
if args.dont_export_external_attachments:
|
||||||
|
continue
|
||||||
|
info(f"Downloading attachment from url: {content_url.geturl()}")
|
||||||
|
request: Request = Request(content_url.geturl(),
|
||||||
|
headers=HEADERS_NO_TOKEN)
|
||||||
|
|
||||||
|
with urlopen(request) as response:
|
||||||
|
if response.status >= 300:
|
||||||
|
error(
|
||||||
|
"Could not download link-type attachment from "
|
||||||
|
f"'{content_url.geturl()}, got code {response.status}'!"
|
||||||
|
)
|
||||||
|
sys.exit(response.status)
|
||||||
|
|
||||||
|
with open(path, 'wb') as file:
|
||||||
|
info(f"Saving {path}")
|
||||||
|
file.write(response.read())
|
||||||
|
else:
|
||||||
|
with open(path, 'wb') as file:
|
||||||
|
info(f"Saving {path}")
|
||||||
|
file.write(base64.b64decode(content))
|
||||||
|
|
||||||
|
|
||||||
|
#########################
|
||||||
|
# Gathering data from api
|
||||||
|
#########################
|
||||||
|
|
||||||
info("Getting info about Shelves and their Books")
|
info("Getting info about Shelves and their Books")
|
||||||
|
|
||||||
for shelf_data in api_get_listing('shelves'):
|
for shelf_data in api_get_listing('shelves'):
|
||||||
|
@ -405,6 +476,26 @@ for page_data in api_get_listing('pages'):
|
||||||
f"last edit: {page.get_last_edit_timestamp()}")
|
f"last edit: {page.get_last_edit_timestamp()}")
|
||||||
pages[page.get_id()] = page
|
pages[page.get_id()] = page
|
||||||
|
|
||||||
|
if not args.dont_export_attachments:
|
||||||
|
info("Getting info about Attachments.")
|
||||||
|
|
||||||
|
for attachment_data in api_get_listing('attachments'):
|
||||||
|
last_edit_ts: datetime = api_timestamp_string_to_datetime(
|
||||||
|
attachment_data['updated_at'])
|
||||||
|
all_pages = {}
|
||||||
|
all_pages.update(pages)
|
||||||
|
all_pages.update(pages_not_in_chapter)
|
||||||
|
attachment = Node(attachment_data.get('name'),
|
||||||
|
all_pages.get(attachment_data.get('uploaded_to')),
|
||||||
|
attachment_data.get('id'), last_edit_ts)
|
||||||
|
debug(f"Attachment: \"{attachment.name}\", ID: {attachment.get_id()},"
|
||||||
|
f" last edit: {attachment.get_last_edit_timestamp()}")
|
||||||
|
attachments[attachment.get_id()] = attachment
|
||||||
|
|
||||||
|
#########################
|
||||||
|
# Exporting data from api
|
||||||
|
#########################
|
||||||
|
|
||||||
files: List[Node] = []
|
files: List[Node] = []
|
||||||
EXPORT_PAGES_NOT_IN_CHAPTER: bool = False
|
EXPORT_PAGES_NOT_IN_CHAPTER: bool = False
|
||||||
|
|
||||||
|
@ -417,8 +508,14 @@ for lvl in LEVEL_CHOICE:
|
||||||
elif lvl == 'books':
|
elif lvl == 'books':
|
||||||
files = list(books.values())
|
files = list(books.values())
|
||||||
|
|
||||||
export(files, lvl)
|
export_doc(files, lvl)
|
||||||
|
|
||||||
if EXPORT_PAGES_NOT_IN_CHAPTER:
|
if EXPORT_PAGES_NOT_IN_CHAPTER:
|
||||||
info("Exporting pages that are not in chapter...")
|
info("Exporting pages that are not in chapter...")
|
||||||
export(list(pages_not_in_chapter.values()), 'pages')
|
export_doc(list(pages_not_in_chapter.values()), 'pages')
|
||||||
|
|
||||||
|
if not args.dont_export_attachments:
|
||||||
|
export_attachments(list(attachments.values()))
|
||||||
|
|
||||||
|
info("Finished")
|
||||||
|
sys.exit(0)
|
||||||
|
|
Loading…
Reference in a new issue