From ca0ee5d391655db413e49fd3fcf90ac4883ddd61 Mon Sep 17 00:00:00 2001 From: Ozzieisaacs Date: Sat, 10 Sep 2022 18:26:52 +0200 Subject: [PATCH] backup metadata first step --- cps/db.py | 22 +++++ cps/editbooks.py | 4 + cps/gdriveutils.py | 24 +++++ cps/helper.py | 4 +- cps/schedule.py | 7 +- cps/tasks/metadata_backup.py | 174 +++++++++++++++++++++++++++++++++++ 6 files changed, 232 insertions(+), 3 deletions(-) create mode 100644 cps/tasks/metadata_backup.py diff --git a/cps/db.py b/cps/db.py index 9136675a..f276e5c8 100644 --- a/cps/db.py +++ b/cps/db.py @@ -322,6 +322,15 @@ class Data(Base): return u"".format(self.book, self.format, self.uncompressed_size, self.name) +class Metadata_Dirtied(Base): + __tablename__ = 'metadata_dirtied' + id = Column(Integer, primary_key=True, autoincrement=True) + book = Column(Integer, ForeignKey('books.id'), nullable=False, unique=True) + + def __init__(self, book): + self.book = book + + class Books(Base): __tablename__ = 'books' @@ -390,6 +399,9 @@ class CustomColumns(Base): display_dict = json.loads(self.display) return display_dict + def to_json(self): + pass + class AlchemyEncoder(json.JSONEncoder): @@ -642,6 +654,16 @@ class CalibreDB: def get_book_format(self, book_id, file_format): return self.session.query(Data).filter(Data.book == book_id).filter(Data.format == file_format).first() + def set_metadata_dirty(self, book_id): + if not self.session.query(Metadata_Dirtied).filter(Metadata_Dirtied.book==book_id).one_or_none(): + self.session.add(Metadata_Dirtied(book_id)) + def delete_dirty_metadata(self, book_id): + try: + self.session.query(Metadata_Dirtied).filter(Metadata_Dirtied.book==book_id).delete() + self.session.commit() + except (OperationalError) as e: + self.session.rollback() + log.error("Database error: {}".format(e)) # Language and content filters for displaying in the UI def common_filters(self, allow_show_archived=False, return_all_languages=False): diff --git a/cps/editbooks.py b/cps/editbooks.py index 0e7588ea..a22664f4 100755 --- a/cps/editbooks.py +++ b/cps/editbooks.py @@ -203,6 +203,7 @@ def edit_book(book_id): if modify_date: book.last_modified = datetime.utcnow() kobo_sync_status.remove_synced_book(edited_books_id, all=True) + calibre_db.set_metadata_dirty(book.id) calibre_db.session.merge(book) calibre_db.session.commit() @@ -277,6 +278,8 @@ def upload(): move_coverfile(meta, db_book) + if modify_date: + calibre_db.set_metadata_dirty(book_id) # save data to database, reread data calibre_db.session.commit() @@ -555,6 +558,7 @@ def table_xchange_author_title(): renamed_author=renamed) if modify_date: book.last_modified = datetime.utcnow() + calibre_db.set_metadata_dirty(book.id) try: calibre_db.session.commit() except (OperationalError, IntegrityError, StaleDataError) as e: diff --git a/cps/gdriveutils.py b/cps/gdriveutils.py index 866defaa..f83bf775 100644 --- a/cps/gdriveutils.py +++ b/cps/gdriveutils.py @@ -578,6 +578,7 @@ def deleteDatabaseEntry(ID): # Gets cover file from gdrive +# ToDo: Check is this right everyone get read permissions on cover files? def get_cover_via_gdrive(cover_path): df = getFileFromEbooksFolder(cover_path, 'cover.jpg') if df: @@ -600,6 +601,29 @@ def get_cover_via_gdrive(cover_path): else: return None +# Gets cover file from gdrive +def get_metadata_backup_via_gdrive(metadata_path): + df = getFileFromEbooksFolder(metadata_path, 'metadata.opf') + if df: + if not session.query(PermissionAdded).filter(PermissionAdded.gdrive_id == df['id']).first(): + df.GetPermissions() + df.InsertPermission({ + 'type': 'anyone', + 'value': 'anyone', + 'role': 'writer', # ToDo needs write access + 'withLink': True}) + permissionAdded = PermissionAdded() + permissionAdded.gdrive_id = df['id'] + session.add(permissionAdded) + try: + session.commit() + except OperationalError as ex: + log.error_or_exception('Database error: {}'.format(ex)) + session.rollback() + return df.metadata.get('webContentLink') + else: + return None + # Creates chunks for downloading big files def partial(total_byte_len, part_size_limit): s = [] diff --git a/cps/helper.py b/cps/helper.py index 5c929944..be15d404 100755 --- a/cps/helper.py +++ b/cps/helper.py @@ -19,7 +19,6 @@ import os import io -import sys import mimetypes import re import shutil @@ -685,7 +684,8 @@ def update_dir_structure(book_id, def delete_book(book, calibrepath, book_format): if not book_format: - clear_cover_thumbnail_cache(book.id) ## here it breaks + clear_cover_thumbnail_cache(book.id) ## here it breaks + calibre_db.delete_dirty_metadata(book.id) if config.config_use_google_drive: return delete_book_gdrive(book, book_format) else: diff --git a/cps/schedule.py b/cps/schedule.py index faadfb7e..228a7f5d 100644 --- a/cps/schedule.py +++ b/cps/schedule.py @@ -23,7 +23,7 @@ from .services.background_scheduler import BackgroundScheduler, use_APScheduler from .tasks.database import TaskReconnectDatabase from .tasks.thumbnail import TaskGenerateCoverThumbnails, TaskGenerateSeriesThumbnails, TaskClearCoverThumbnailCache from .services.worker import WorkerThread - +from .tasks.metadata_backup import TaskBackupMetadata def get_scheduled_tasks(reconnect=True): tasks = list() @@ -32,6 +32,10 @@ def get_scheduled_tasks(reconnect=True): if reconnect: tasks.append([lambda: TaskReconnectDatabase(), 'reconnect', False]) + # ToDo make configurable. Generate metadata.opf file for each changed book + if True: + tasks.append([lambda: TaskBackupMetadata(), 'backup metadata', False]) + # Generate all missing book cover thumbnails if config.schedule_generate_book_covers: tasks.append([lambda: TaskClearCoverThumbnailCache(0), 'delete superfluous book covers', True]) @@ -91,6 +95,7 @@ def should_task_be_running(start, duration): end_time = start_time + datetime.timedelta(hours=duration // 60, minutes=duration % 60) return start_time < now < end_time + def calclulate_end_time(start, duration): start_time = datetime.datetime.now().replace(hour=start, minute=0) return start_time + datetime.timedelta(hours=duration // 60, minutes=duration % 60) diff --git a/cps/tasks/metadata_backup.py b/cps/tasks/metadata_backup.py new file mode 100644 index 00000000..4cb7a79d --- /dev/null +++ b/cps/tasks/metadata_backup.py @@ -0,0 +1,174 @@ +# -*- coding: utf-8 -*- + +# This file is part of the Calibre-Web (https://github.com/janeczku/calibre-web) +# Copyright (C) 2020 monkey +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +import datetime +import os +import json +from urllib.request import urlopen +from lxml import etree +from html import escape + +from cps import config, db, fs, gdriveutils, logger, ub +from cps.services.worker import CalibreTask, STAT_CANCELLED, STAT_ENDED +from flask_babel import lazy_gettext as N_ + +OPF_NAMESPACE = "http://www.idpf.org/2007/opf" +PURL_NAMESPACE = "http://purl.org/dc/elements/1.1/" + +OPF = "{%s}" % OPF_NAMESPACE +PURL = "{%s}" % PURL_NAMESPACE + +etree.register_namespace("opf", OPF_NAMESPACE) +etree.register_namespace("dc", PURL_NAMESPACE) + +OPF_NS = {None: OPF_NAMESPACE} # the default namespace (no prefix) +NSMAP = {'dc': PURL_NAMESPACE, 'opf': OPF_NAMESPACE} + + +class TaskBackupMetadata(CalibreTask): + + def __init__(self, task_message=N_('Backing up Metadata')): + super(TaskBackupMetadata, self).__init__(task_message) + self.log = logger.create() + self.db_session = db.CalibreDB(expire_on_commit=False, init=True).session + + def run(self, worker_thread): + try: + metadata_backup = self.db_session.query(db.Metadata_Dirtied).all() + custom_columns = self.db_session.query(db.CustomColumns).all() + for backup in metadata_backup: + book = self.db_session.query(db.Books).filter(db.Books.id == backup.book).one_or_none() + # self.db_session.query(db.Metadata_Dirtied).filter(db.Metadata_Dirtied == backup.id).delete() + # self.db_session.commit() + if book: + metadata_file = self.open_metadata(book, custom_columns) + self._handleSuccess() + self.db_session.remove() + else: + self.log.error("Book {} not found in database".format(backup.book)) + self._handleError("Book {} not found in database".format(backup.book)) + self.db_session.remove() + + except Exception as ex: + self.log.debug('Error creating metadata backup: ' + str(ex)) + self._handleError('Error creating metadata backup: ' + str(ex)) + self.db_session.rollback() + self.db_session.remove() + + def open_metadata(self, book, custom_columns): + if config.config_use_google_drive: + if not gdriveutils.is_gdrive_ready(): + raise Exception('Google Drive is configured but not ready') + + web_content_link = gdriveutils.get_metadata_backup_via_gdrive(book.path) + if not web_content_link: + raise Exception('Google Drive cover url not found') + + stream = None + try: + stream = urlopen(web_content_link) + except Exception as ex: + # Bubble exception to calling function + self.log.debug('Error reading metadata.opf: ' + str(ex)) # ToDo Chek whats going on + raise ex + finally: + if stream is not None: + stream.close() + else: + book_metadata_filepath = os.path.join(config.config_calibre_dir, book.path, 'metadata.opf') + if not os.path.isfile(book_metadata_filepath): + self.create_new_metadata_backup(book, custom_columns, book_metadata_filepath) + # ToDo What to do + return open(book_metadata_filepath, "w") + else: + etree.parse(book_metadata_filepath) + # backup not found has to be created + #raise Exception('Book cover file not found') + + def create_new_metadata_backup(self, book, custom_columns, book_metadata_filepath): + # generate root package element + package = etree.Element(OPF + "package", nsmap=OPF_NS) + package.set("unique-identifier", "uuid_id") + package.set("version", "2.0") + + # generate metadata element and all subelements of it + metadata = etree.SubElement(package, "metadata", nsmap=NSMAP) + identifier = etree.SubElement(metadata, PURL + "identifier", id="calibre_id", nsmap=NSMAP) + identifier.set(OPF + "scheme", "calibre") + identifier.text = str(book.id) + identifier2 = etree.SubElement(metadata, PURL + "identifier", id="uuid_id", nsmap=NSMAP) + identifier2.set(OPF + "scheme", "uuid") + identifier2.text = book.uuid + title = etree.SubElement(metadata, PURL + "title", nsmap=NSMAP) + title.text = book.title + for author in book.authors: + creator = etree.SubElement(metadata, PURL + "creator", nsmap=NSMAP) + creator.text = str(author) + creator.set(OPF + "file-as", book.author_sort) # ToDo Check + creator.set(OPF + "role", "aut") + contributor = etree.SubElement(metadata, PURL + "contributor", nsmap=NSMAP) + contributor.text = "calibre (5.7.2) [https://calibre-ebook.com]" + contributor.set(OPF + "file-as", "calibre") # ToDo Check + contributor.set(OPF + "role", "bpk") + date = etree.SubElement(metadata, PURL + "date", nsmap=NSMAP) + date.text = datetime.datetime.strftime(book.pubdate, "%Y-%m-%dT%H:%M:%S+00:00") + language = etree.SubElement(metadata, PURL + "language", nsmap=NSMAP) + if book.languages: + language.text = str(book.languages) + else: + language.text = "" # ToDo: insert locale (2 letter code) + if book.tags: + subject = etree.SubElement(metadata, PURL + "subject", nsmap=NSMAP) + subject.text = str(book.tags) + etree.SubElement(metadata, "meta", name="calibre:author_link_map", + content="{" + escape(",".join(['"' + str(a) + '":""' for a in book.authors])) + "}", + nsmap=NSMAP) + etree.SubElement(metadata, "meta", name="calibre:series", + content=str(book.series), + nsmap=NSMAP) + etree.SubElement(metadata, "meta", name="calibre:series_index", + content=str(book.series_index), + nsmap=NSMAP) + etree.SubElement(metadata, "meta", name="calibre:timestamp", + content=datetime.datetime.strftime(book.timestamp, "%Y-%m-%dT%H:%M:%S+00:00"), + nsmap=NSMAP) + etree.SubElement(metadata, "meta", name="calibre:title_sort", + content=book.sort, + nsmap=NSMAP) + for cc in custom_columns: + etree.SubElement(metadata, "meta", name="calibre:user_metadata:#{}".format(cc.label), + content=escape(cc.get_display_dict()), + nsmap=NSMAP) + + pass + + # generate guide element and all sub elements of it + guide = etree.SubElement(package, "guide") + etree.SubElement(guide, "reference", type="cover", title="Titelbild", href="cover.jpg") + + # prepare finalize everything and output + doc = etree.ElementTree(package) + with open(book_metadata_filepath, 'wb') as f: + doc.write(f, xml_declaration=True, encoding='utf-8', pretty_print=True) + + @property + def name(self): + return "Backing up Metadata" + + @property + def is_cancellable(self): + return True