From ac10e607bb356d3c7734ef9ed9fd8f7f37ccd5dd Mon Sep 17 00:00:00 2001 From: Nose Date: Wed, 14 Jan 2026 22:21:19 -0800 Subject: [PATCH] h --- API/folder.py | 135 ++++++++++++++++++++++++--------------------- Store/Folder.py | 7 ++- cmdlet/add_file.py | 21 ++++++- 3 files changed, 95 insertions(+), 68 deletions(-) diff --git a/API/folder.py b/API/folder.py index c46319f..7c6713a 100644 --- a/API/folder.py +++ b/API/folder.py @@ -2028,39 +2028,40 @@ class API_folder_store: pipe: Optional[str] = None, ) -> int: """Insert a new worker entry into the database.""" - try: - cursor = self.connection.cursor() - cursor.execute( - """ - INSERT INTO worker (worker_id, worker_type, pipe, status, title, description, total_steps) - VALUES (?, ?, ?, ?, ?, ?, ?) - """, - ( - worker_id, - worker_type, - pipe, - "running", - title, - description, - total_steps - ), - ) - worker_rowid = cursor.lastrowid or 0 - - # Prune occasionally (1 in 50 chance) or just run it to keep it clean - # Running it every time might be overkill, but let's do a light version - cursor.execute( - "DELETE FROM worker WHERE status != 'running' AND id < (SELECT MAX(id) - ? FROM worker)", - (MAX_FINISHED_WORKERS * 2,) - ) + with self._db_lock: + try: + cursor = self.connection.cursor() + cursor.execute( + """ + INSERT INTO worker (worker_id, worker_type, pipe, status, title, description, total_steps) + VALUES (?, ?, ?, ?, ?, ?, ?) + """, + ( + worker_id, + worker_type, + pipe, + "running", + title, + description, + total_steps + ), + ) + worker_rowid = cursor.lastrowid or 0 + + # Prune occasionally (1 in 50 chance) or just run it to keep it clean + # Running it every time might be overkill, but let's do a light version + cursor.execute( + "DELETE FROM worker WHERE status != 'running' AND id < (SELECT MAX(id) - ? FROM worker)", + (MAX_FINISHED_WORKERS * 2,) + ) - self.connection.commit() - return worker_rowid - except sqlite3.IntegrityError: - return self.update_worker_status(worker_id, "running") - except Exception as e: - logger.error(f"Error inserting worker: {e}", exc_info=True) - return 0 + self.connection.commit() + return worker_rowid + except sqlite3.IntegrityError: + return self.update_worker_status(worker_id, "running") + except Exception as e: + logger.error(f"Error inserting worker: {e}", exc_info=True) + return 0 def update_worker(self, worker_id: str, **kwargs) -> bool: """Update worker entry with given fields.""" @@ -2107,40 +2108,41 @@ class API_folder_store: def update_worker_status(self, worker_id: str, status: str) -> int: """Update worker status and return its database ID.""" - try: - cursor = self.connection.cursor() + with self._db_lock: + try: + cursor = self.connection.cursor() - if status in ("completed", "error"): - cursor.execute( - """ - UPDATE worker - SET status = ?, completed_at = CURRENT_TIMESTAMP, last_updated = CURRENT_TIMESTAMP - WHERE worker_id = ? - """, - (status, - worker_id), - ) - else: - cursor.execute( - """ - UPDATE worker - SET status = ?, last_updated = CURRENT_TIMESTAMP - WHERE worker_id = ? - """, - (status, - worker_id), - ) + if status in ("completed", "error"): + cursor.execute( + """ + UPDATE worker + SET status = ?, completed_at = CURRENT_TIMESTAMP, last_updated = CURRENT_TIMESTAMP + WHERE worker_id = ? + """, + (status, + worker_id), + ) + else: + cursor.execute( + """ + UPDATE worker + SET status = ?, last_updated = CURRENT_TIMESTAMP + WHERE worker_id = ? + """, + (status, + worker_id), + ) - self.connection.commit() + self.connection.commit() - cursor.execute("SELECT id FROM worker WHERE worker_id = ?", - (worker_id, - )) - row = cursor.fetchone() - return row[0] if row else 0 - except Exception as e: - logger.error(f"Error updating worker status: {e}", exc_info=True) - return 0 + cursor.execute("SELECT id FROM worker WHERE worker_id = ?", + (worker_id, + )) + row = cursor.fetchone() + return row[0] if row else 0 + except Exception as e: + logger.error(f"Error updating worker status: {e}", exc_info=True) + return 0 def get_worker(self, worker_id: str) -> Optional[Dict[str, Any]]: """Retrieve a worker entry by ID.""" @@ -2484,10 +2486,15 @@ class API_folder_store: logger.error(f"Error closing database: {e}", exc_info=True) def __enter__(self): + if not self.connection: + self._init_db() return self def __exit__(self, exc_type, exc_val, exc_tb): - self.close() + try: + self.close() + finally: + pass # ============================================================================ diff --git a/Store/Folder.py b/Store/Folder.py index 5b0dbcd..9f3f927 100644 --- a/Store/Folder.py +++ b/Store/Folder.py @@ -372,6 +372,7 @@ class Folder(Store): tag: Optional list of tag values to add url: Optional list of url to associate with the file title: Optional title (will be added as 'title:value' tag) + file_hash: Optional pre-calculated SHA256 hash (skips re-hashing) Returns: File hash (SHA256 hex string) as identifier @@ -380,6 +381,7 @@ class Folder(Store): tag_list = kwargs.get("tag", []) url = kwargs.get("url", []) title = kwargs.get("title") + file_hash = kwargs.get("file_hash") # Extract title from tags if not explicitly provided if not title: @@ -400,7 +402,10 @@ class Folder(Store): tag_list = [title_tag] + list(tag_list) try: - file_hash = sha256_file(file_path) + if not file_hash or len(str(file_hash)) != 64: + debug(f"[folder] Re-hashing file: {file_path}", file=sys.stderr) + file_hash = sha256_file(file_path) + debug(f"File hash: {file_hash}", file=sys.stderr) # Preserve extension in the stored filename diff --git a/cmdlet/add_file.py b/cmdlet/add_file.py index 4ee7b1f..3046e57 100644 --- a/cmdlet/add_file.py +++ b/cmdlet/add_file.py @@ -546,7 +546,7 @@ class Add_File(Cmdlet): delete_after_item = delete_after try: if use_steps and (not steps_started): - progress.begin_steps(4) + progress.begin_steps(5) progress.step("resolving source") steps_started = True @@ -560,6 +560,19 @@ class Add_File(Cmdlet): failures += 1 continue + if use_steps and steps_started: + progress.step("hashing file") + + # Update pipe_obj with resolved path + pipe_obj.path = str(media_path) + + # When using -path (filesystem export), allow all file types. + # When using -store (backend), restrict to SUPPORTED_MEDIA_EXTENSIONS. + allow_all_files = not (location and is_storage_backend_location) + if not self._validate_source(media_path, allow_all_extensions=allow_all_files): + failures += 1 + continue + if use_steps and steps_started and (not step2_done): progress.step("ingesting file") step2_done = True @@ -1973,16 +1986,18 @@ class Add_File(Cmdlet): debug("[add-file] Deferring tag application until after Hydrus upload") debug( - f"[add-file] Storing into backend '{backend_name}' path='{media_path}' title='{title}'" + f"[add-file] Storing into backend '{backend_name}' path='{media_path}' title='{title}' hash='{f_hash[:12] if f_hash else 'N/A'}'" ) # Call backend's add_file with full metadata - # Backend returns hash as identifier + # Backend returns hash as identifier. If we already know the hash from _resolve_source + # (which came from download-file emit), pass it to skip re-hashing the 4GB file. file_identifier = backend.add_file( media_path, title=title, tag=upload_tags, url=[] if (defer_url_association and url) else url, + file_hash=f_hash, ) debug( f"[add-file] backend.add_file returned identifier {file_identifier} (len={len(str(file_identifier)) if file_identifier is not None else 'None'})"