Add api request rate limiting, fix for #5

This commit is contained in:
Maciej Lebiest 2024-01-02 15:02:34 +01:00
parent cf1bb98dbd
commit 1fa56298da
2 changed files with 72 additions and 28 deletions

View file

@ -18,12 +18,13 @@ Requirements:
Full example on how to use the script: Full example on how to use the script:
1. Clone the repo 1. Clone the repo
2. next to the script place token.txt file containing token id and token secret in format: TOKEN_ID:TOKEN_SECRET 2. next to the script place token.txt file containing token id and token secret in format: TOKEN_ID:TOKEN_SECRET
3. in the same directory run the command, specifying your app domain with https prefix (every parameter is optional as it have default value, this is a full possible example): 3. in the same directory run the command, specifying your app domain with https prefix (every parameter is optional as it have default value, this is an example):
```bash ```bash
python exporter.py \ python exporter.py \
-H https://wiki.example.com \ -H https://wiki.example.com \
-f pdf markdown plaintext html \ -f pdf markdown plaintext html \
-l pages chapters books \ -l pages chapters books \
--rate-limit 180 \
-c "/" "#" \ -c "/" "#" \
--force-update-files \ --force-update-files \
-t ./token.txt \ -t ./token.txt \
@ -37,45 +38,57 @@ Customization:
```text ```text
usage: exporter.py [-h] [-p PATH] [-t TOKEN_FILE] [-H HOST] usage: exporter.py [-h] [-p PATH] [-t TOKEN_FILE] [-H HOST]
[-f {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...]] [-f {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...]]
[--rate-limit RATE_LIMIT]
[-c FORBIDDEN_CHARS [FORBIDDEN_CHARS ...]] [-u USER_AGENT] [-c FORBIDDEN_CHARS [FORBIDDEN_CHARS ...]] [-u USER_AGENT]
[--additional-headers ADDITIONAL_HEADERS [ADDITIONAL_HEADERS ...]] [--additional-headers ADDITIONAL_HEADERS [ADDITIONAL_HEADERS ...]]
[-l {pages,chapters,books} [{pages,chapters,books} ...]] [-l {pages,chapters,books} [{pages,chapters,books} ...]]
[--force-update-files] [--dont-export-attachments] [--force-update-files] [--dont-export-attachments]
[--dont-export-external-attachments] [-V {debug,info,warning,error}] [--dont-export-external-attachments]
[-V {debug,info,warning,error}]
BookStack exporter BookStack exporter
options: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
-p PATH, --path PATH Path where exported files will be placed. -p PATH, --path PATH Path where exported files will be placed.
-t TOKEN_FILE, --token-file TOKEN_FILE -t TOKEN_FILE, --token-file TOKEN_FILE
File containing authorization token in format TOKEN_ID:TOKEN_SECRET File containing authorization token in format
-H HOST, --host HOST Your domain with protocol prefix, example: https://example.com TOKEN_ID:TOKEN_SECRET
-H HOST, --host HOST Your domain with protocol prefix, example:
https://example.com
-f {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...], --formats {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...] -f {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...], --formats {markdown,plaintext,pdf,html} [{markdown,plaintext,pdf,html} ...]
Space separated list of formats to use for export. Space separated list of formats to use for export.
--rate-limit RATE_LIMIT
How many api requests can be made in a minute. Default
is 180 (BookStack defaults)
-c FORBIDDEN_CHARS [FORBIDDEN_CHARS ...], --forbidden-chars FORBIDDEN_CHARS [FORBIDDEN_CHARS ...] -c FORBIDDEN_CHARS [FORBIDDEN_CHARS ...], --forbidden-chars FORBIDDEN_CHARS [FORBIDDEN_CHARS ...]
Space separated list of symbols to be replaced with "_" in filenames. Space separated list of symbols to be replaced with
"_" in filenames.
-u USER_AGENT, --user-agent USER_AGENT -u USER_AGENT, --user-agent USER_AGENT
User agent header content. In situations where requests are blocked User agent header content. In situations where
because of bad client/unrecognized web browser/etc (like with requests are blocked because of bad
CloudFlare tunnels), change to some typical web browser user agent client/unrecognized web browser/etc (like with
header. CloudFlare tunnels), change to some typical web
browser user agent header.
--additional-headers ADDITIONAL_HEADERS [ADDITIONAL_HEADERS ...] --additional-headers ADDITIONAL_HEADERS [ADDITIONAL_HEADERS ...]
List of arbitrary additional HTTP headers to be sent with every HTTP List of arbitrary additional HTTP headers to be sent
request. They can override default ones, including Authorization with every HTTP request. They can override default
header. IMPORTANT: these headers are also sent when downloading ones, including Authorization header. IMPORTANT: these
external attachments! Don't put here any private data.Example: -u headers are also sent when downloading external
"Header1: value1" "Header2: value2" attachments! Don't put here any private data.Example:
-u "Header1: value1" "Header2: value2"
-l {pages,chapters,books} [{pages,chapters,books} ...], --level {pages,chapters,books} [{pages,chapters,books} ...] -l {pages,chapters,books} [{pages,chapters,books} ...], --level {pages,chapters,books} [{pages,chapters,books} ...]
Space separated list of levels at which should be export performed. Space separated list of levels at which should be
--force-update-files Set this option to skip checking local files timestamps against remote export performed.
last edit timestamps. This will cause overwriting local files, even if --force-update-files Set this option to skip checking local files
they seem to be already in newest version. timestamps against remote last edit timestamps. This
will cause overwriting local files, even if they seem
to be already in newest version.
--dont-export-attachments --dont-export-attachments
Set this to prevent exporting attachments that were uploaded to Set this to prevent exporting any attachments.
BookStack.
--dont-export-external-attachments --dont-export-external-attachments
Set this to prevent exporting external attachments (from links). Set this to prevent exporting external attachments
(from links).
-V {debug,info,warning,error}, --log-level {debug,info,warning,error} -V {debug,info,warning,error}, --log-level {debug,info,warning,error}
Set verbosity level. Set verbosity level.
``` ```
@ -85,4 +98,5 @@ options:
- [x] ~~choosing on what level should the notes be exported (Books, Chapters, Pages)~~ Done - [x] ~~choosing on what level should the notes be exported (Books, Chapters, Pages)~~ Done
- [x] ~~choosing if update note file only if the last edit timestamp from API is later that the local file timestamp~~ Done - [x] ~~choosing if update note file only if the last edit timestamp from API is later that the local file timestamp~~ Done
- [x] ~~exporting attachments~~ - [x] ~~exporting attachments~~
- [x] ~~api rate limiting~~
- [ ] suggestions? - [ ] suggestions?

View file

@ -10,6 +10,8 @@ from typing import Dict, List, Union
from urllib.request import urlopen, Request from urllib.request import urlopen, Request
import urllib.parse import urllib.parse
import base64 import base64
from time import time
from time import sleep
# (formatName, fileExtension) # (formatName, fileExtension)
FORMATS: Dict['str', 'str'] = { FORMATS: Dict['str', 'str'] = {
@ -56,6 +58,11 @@ parser.add_argument('-f',
nargs="+", nargs="+",
help='Space separated list of formats to use for export.', help='Space separated list of formats to use for export.',
choices=FORMATS.keys()) choices=FORMATS.keys())
parser.add_argument('--rate-limit',
type=int,
default=180,
help='How many api requests can be made in a minute. '
'Default is 180 (BookStack defaults)')
parser.add_argument('-c', parser.add_argument('-c',
'--forbidden-chars', '--forbidden-chars',
type=str, type=str,
@ -97,12 +104,9 @@ parser.add_argument(
help="Set this option to skip checking local files timestamps against " help="Set this option to skip checking local files timestamps against "
"remote last edit timestamps. This will cause overwriting local files," "remote last edit timestamps. This will cause overwriting local files,"
" even if they seem to be already in newest version.") " even if they seem to be already in newest version.")
parser.add_argument( parser.add_argument('--dont-export-attachments',
'--dont-export-attachments',
action='store_true', action='store_true',
help= help="Set this to prevent exporting any attachments.")
"Set this to prevent exporting attachments that were uploaded to BookStack."
)
parser.add_argument( parser.add_argument(
'--dont-export-external-attachments', '--dont-export-external-attachments',
action='store_true', action='store_true',
@ -170,6 +174,31 @@ for header in args.additional_headers:
SKIP_TIMESTAMPS: bool = args.force_update_files SKIP_TIMESTAMPS: bool = args.force_update_files
class ApiRateLimiter:
def __init__(self, rate_limit: int) -> None:
self.__rate_limit = rate_limit
info(f"API rate limit: {self.__rate_limit}/min")
self.__requests_times: List[float] = []
def limit_rate_request(self):
"""Count another request and wait minimal required time if limit is reached."""
current_time = time()
self.__requests_times.append(current_time)
# filter out requests older than 60s ago
self.__requests_times = list(
filter(lambda x: current_time - x <= 60, self.__requests_times))
# sleep until oldest remembered request is more than 60s ago
if len(self.__requests_times) > self.__rate_limit:
wait_time = self.__requests_times[0] + 60 - current_time
info(f"API Rate limit reached, waiting {round(wait_time, 2)}s")
sleep(wait_time)
api_rate_limiter = ApiRateLimiter(args.rate_limit)
class Node: class Node:
"""Clas representing any node in whole bookstack documents "tree".""" """Clas representing any node in whole bookstack documents "tree"."""
@ -262,6 +291,7 @@ def api_get_bytes(path: str, **kwargs) -> bytes:
request: Request = Request(request_path, headers=HEADERS) request: Request = Request(request_path, headers=HEADERS)
api_rate_limiter.limit_rate_request()
with urlopen(request) as response: with urlopen(request) as response:
if response.status == 403: if response.status == 403:
error("403 Forbidden, check your token!") error("403 Forbidden, check your token!")