add timestamps checking to prevent update of files that contain newest version

This commit is contained in:
Maciej Lebiest 2022-07-26 17:30:08 +02:00
parent e2bc12eb60
commit ff31611d46

View file

@ -5,6 +5,7 @@ import os
from datetime import datetime from datetime import datetime
from logging import info, error, debug from logging import info, error, debug
from pathlib import Path from pathlib import Path
from typing import Union
from urllib.request import urlopen, Request from urllib.request import urlopen, Request
import urllib.parse import urllib.parse
@ -37,14 +38,11 @@ parser.add_argument('-t', '--token-file', type=str, default=f'.{os.path.sep}toke
parser.add_argument('-H', '--host', type=str, default='https://localhost', parser.add_argument('-H', '--host', type=str, default='https://localhost',
help='Your domain with protocol prefix, example: https://example.com') help='Your domain with protocol prefix, example: https://example.com')
parser.add_argument('-f', '--formats', type=str, default='markdown', parser.add_argument('-f', '--formats', type=str, default='markdown',
help=f'Coma separated list of formats to use for export.' help=f'Coma separated list of formats to use for export.', choices=FORMATS.keys())
f' Available ones: {",".join([f for f in FORMATS.keys()])}')
parser.add_argument('-l', '--level', type=str, default='pages', parser.add_argument('-l', '--level', type=str, default='pages',
help=f'Coma separated list of levels at which should be export performed. ' help=f'Coma separated list of levels at which should be export performed. ', choices=LEVELS)
f'Available levels: {LEVELS}')
parser.add_argument('-V', '--log-level', type=str, default='info', parser.add_argument('-V', '--log-level', type=str, default='info',
help=f'Set verbosity level. ' help=f'Set verbosity level. ', choices=LOG_LEVEL.keys())
f'Available levels: {LOG_LEVEL.keys()}')
args = parser.parse_args() args = parser.parse_args()
@ -77,19 +75,49 @@ HEADERS = {'Content-Type': 'application/json; charset=utf-8',
class Node: class Node:
def __init__(self, name: str, parent: ['Node', None], node_id: int): def __init__(self, name: str,
parent: Union['Node', None],
node_id: int,
last_edit_timestamp: datetime):
self.__name: str = name self.__name: str = name
self.__parent: ['Node', None] = parent self.__children: list['Node'] = []
self.__parent: Union['Node', None] = parent
if parent is not None:
parent.add_child(self)
self.__last_edit_timestamp: datetime = last_edit_timestamp
self.__node_id = node_id self.__node_id = node_id
def get_name(self) -> str: def get_name(self) -> str:
return self.__name return self.__name
def get_parent(self) -> ['Node', None]: def get_parent(self) -> Union['Node', None]:
return self.__parent return self.__parent
def changed_since(self, timestamp: datetime) -> int:
"""
Check if remote version have changed after given timestamp, including its children
:param timestamp:
:return: amount of changed documents at level of this document Node
"""
result: int = 0
if self.__last_edit_timestamp > timestamp:
result += 1
for child in self.__children:
result += child.changed_since(timestamp)
return result
def get_last_edit_timestamp(self) -> datetime:
return self.__last_edit_timestamp
def set_parent(self, parent: 'Node'): def set_parent(self, parent: 'Node'):
self.__parent = parent self.__parent = parent
parent.add_child(self)
def add_child(self, child: 'Node'):
self.__children.append(child)
def get_path(self) -> str: def get_path(self) -> str:
if self.__parent is None: if self.__parent is None:
@ -107,6 +135,10 @@ pages: dict[int, Node] = {}
pages_not_in_chapter: dict[int, Node] = {} pages_not_in_chapter: dict[int, Node] = {}
def api_timestamp_string_to_datetime(timestamp: str) -> datetime:
return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S.%fZ')
def make_dir(path: str): def make_dir(path: str):
path_obj = Path(path) path_obj = Path(path)
if path_obj.exists(): if path_obj.exists():
@ -161,30 +193,37 @@ def api_get_listing(path: str) -> list:
return result return result
def check_if_update_needed(file: str, remote_last_edit: datetime) -> bool: def check_if_update_needed(file_path: str, document: Node) -> bool:
if not os.path.exists(file): if not os.path.exists(file_path):
debug(f"Document {file_path} is missing on disk, update needed.")
return True return True
local_last_edit: datetime = datetime.fromtimestamp(os.path.getmtime(file)) local_last_edit: datetime = datetime.utcfromtimestamp(os.path.getmtime(file_path))
remote_last_edit: datetime = document.get_last_edit_timestamp()
debug(f"Local file creation timestamp: {local_last_edit.date()} {local_last_edit.time()}, " debug(f"Local file creation timestamp: {local_last_edit.date()} {local_last_edit.time()}, "
f"remote edit timestamp: {remote_last_edit.date()} {remote_last_edit.time()}") f"remote edit timestamp: {remote_last_edit.date()} {remote_last_edit.time()}")
return local_last_edit.timestamp() < remote_last_edit.timestamp() changes: int = document.changed_since(local_last_edit)
if changes > 0:
info(f"Document \"{document.get_name()}\" consists of {changes} outdated documents, update needed.")
return True
debug(f"Document \"{document.get_name()}\" consists of {changes} outdated documents.")
return False
def export(files: list[Node], level: str): def export(documents: list[Node], level: str):
for file in files: for document in documents:
make_dir(f"{FS_PATH}{os.path.sep}{file.get_path()}") make_dir(f"{FS_PATH}{os.path.sep}{document.get_path()}")
file_info: dict = api_get_dict(f'{level}/{file.get_id()}')
last_edit_time: datetime = datetime.strptime(file_info['updated_at'], '%Y-%m-%dT%H:%M:%S.%fZ')
for frmt in formats: for frmt in formats:
path: str = f"{FS_PATH}{os.path.sep}{file.get_path()}{os.path.sep}{file.get_name()}.{FORMATS[frmt]}" path: str = f"{FS_PATH}{os.path.sep}{document.get_path()}{os.path.sep}{document.get_name()}.{FORMATS[frmt]}"
debug(f"Checking for update for file {path}") debug(f"Checking for update for file {path}")
if not check_if_update_needed(path, last_edit_time): if not check_if_update_needed(path, document):
debug("Already updated") debug("Already updated")
continue continue
data: bytes = api_get_bytes(f'{level}/{file.get_id()}/export/{frmt}') data: bytes = api_get_bytes(f'{level}/{document.get_id()}/export/{frmt}')
with open(path, 'wb') as f: with open(path, 'wb') as f:
info(f"Saving {path}") info(f"Saving {path}")
f.write(data) f.write(data)
@ -193,7 +232,10 @@ def export(files: list[Node], level: str):
info("Getting info about Shelves and their Books") info("Getting info about Shelves and their Books")
for shelf_data in api_get_listing('shelves'): for shelf_data in api_get_listing('shelves'):
shelf = Node(shelf_data.get('name'), None, shelf_data.get('id'))
last_edit_timestamp: datetime = api_timestamp_string_to_datetime(shelf_data['updated_at'])
shelf = Node(shelf_data.get('name'), None, shelf_data.get('id'), last_edit_timestamp)
debug(f"Shelf: \"{shelf.get_name()}\", ID: {shelf.get_id()}") debug(f"Shelf: \"{shelf.get_name()}\", ID: {shelf.get_id()}")
shelves[shelf.get_id()] = shelf shelves[shelf.get_id()] = shelf
@ -202,7 +244,9 @@ for shelf_data in api_get_listing('shelves'):
if shelf_details.get('books') is None: if shelf_details.get('books') is None:
continue continue
for book_data in shelf_details.get('books'): for book_data in shelf_details.get('books'):
book = Node(book_data.get('name'), shelf, book_data.get('id'))
last_edit_timestamp: datetime = api_timestamp_string_to_datetime(book_data['updated_at'])
book = Node(book_data.get('name'), shelf, book_data.get('id'), last_edit_timestamp)
debug(f"Book: \"{book.get_name()}\", ID: {book.get_id()}") debug(f"Book: \"{book.get_name()}\", ID: {book.get_id()}")
books[book.get_id()] = book books[book.get_id()] = book
@ -211,16 +255,23 @@ info("Getting info about Books not belonging to any shelf")
for book_data in api_get_listing('books'): for book_data in api_get_listing('books'):
if book_data.get('id') in books.keys(): if book_data.get('id') in books.keys():
continue continue
book = Node(book_data.get('name'), None, book_data.get('id'))
debug(f"Book: \"{book.get_name()}\", ID: {book.get_id()}") last_edit_timestamp: datetime = api_timestamp_string_to_datetime(book_data['updated_at'])
book = Node(book_data.get('name'), None, book_data.get('id'), last_edit_timestamp)
debug(f"Book: \"{book.get_name()}\", ID: {book.get_id()}, last edit: {book.get_last_edit_timestamp()}")
info(f"Book \"{book.get_name()} has no shelf assigned.\"") info(f"Book \"{book.get_name()} has no shelf assigned.\"")
books[book.get_id()] = book books[book.get_id()] = book
info("Getting info about Chapters") info("Getting info about Chapters")
for chapter_data in api_get_listing('chapters'): for chapter_data in api_get_listing('chapters'):
chapter = Node(chapter_data.get('name'), books.get(chapter_data.get('book_id')), chapter_data.get('id')) last_edit_timestamp: datetime = api_timestamp_string_to_datetime(chapter_data['updated_at'])
debug(f"Chapter: \"{chapter.get_name()}\", ID: {chapter.get_id()}") chapter = Node(chapter_data.get('name'),
books.get(chapter_data.get('book_id')),
chapter_data.get('id'),
last_edit_timestamp)
debug(f"Chapter: \"{chapter.get_name()}\", ID: {chapter.get_id()}, last edit: {chapter.get_last_edit_timestamp()}")
chapters[chapter.get_id()] = chapter chapters[chapter.get_id()] = chapter
info("Getting info about Pages") info("Getting info about Pages")
@ -228,18 +279,22 @@ info("Getting info about Pages")
for page_data in api_get_listing('pages'): for page_data in api_get_listing('pages'):
parent_id = page_data.get('chapter_id') parent_id = page_data.get('chapter_id')
last_edit_timestamp: datetime = api_timestamp_string_to_datetime(page_data['updated_at'])
if parent_id not in chapters.keys(): if parent_id not in chapters.keys():
parent_id = page_data.get('book_id') parent = books.get(page_data.get('book_id'))
info(f"Page \"{page_data.get('name')}\" is not in any chapter, " page = Node(page_data.get('name'), parent, page_data.get('id'), last_edit_timestamp)
f"using Book \"{books.get(parent_id).get_name()}\" as a parent.")
page = Node(page_data.get('name'), books.get(parent_id), page_data.get('id')) info(f"Page \"{page.get_name()}\" is not in any chapter, "
debug(f"Page: \"{page.get_name()}\", ID: {page.get_id()}") f"using Book \"{parent.get_name()}\" as a parent.")
debug(f"Page: \"{page.get_name()}\", ID: {page.get_id()}, last edit: {page.get_last_edit_timestamp()}")
pages[page.get_id()] = page pages[page.get_id()] = page
pages_not_in_chapter[page.get_id()] = page pages_not_in_chapter[page.get_id()] = page
continue continue
page = Node(page_data.get('name'), chapters.get(parent_id), page_data.get('id')) page = Node(page_data.get('name'), chapters.get(parent_id), page_data.get('id'), last_edit_timestamp)
debug(f"Page: \"{page.get_name()}\", ID: {page.get_id()}") debug(f"Page: \"{page.get_name()}\", ID: {page.get_id()}, last edit: {page.get_last_edit_timestamp()}")
pages[page.get_id()] = page pages[page.get_id()] = page
files: list[Node] = [] files: list[Node] = []