hh

2026-01-15 00:45:42 -08:00
parent ac10e607bb
commit 3a02a52863
5 changed files with 837 additions and 784 deletions
--- a/API/folder.py
+++ b/API/folder.py
@@ -261,6 +261,7 @@ class API_folder_store:

    def _init_db(self) -> None:
        """Initialize database connection and create tables if needed."""
+        with self._db_lock:
            try:
                # Ensure the library root exists; sqlite cannot create parent dirs.
                try:
@@ -352,14 +353,21 @@ class API_folder_store:
            # Global cleanup of old workers and logs regardless of size
            self._global_cleanup()

-            # If the database is larger than 30MB, run a vacuum to ensure space is reclaimed.
-            # We only do this on startup to minimize performance impact.
+            # If the database is larger than 64MB, check if a vacuum is worth the time.
+            # We only do this check on startup to minimize performance impact.
            file_stats = self.db_path.stat()
            size_mb = file_stats.st_size / (1024 * 1024)
            
-            if size_mb > 30:
-                logger.debug(f"Database size ({size_mb:.1f}MB) exceeds maintenance threshold. Vacuuming...")
-                # We use a cursor to avoid blocking the main connection state if possible
+            if size_mb > 64:
+                # Check fragmentation (freelist count)
+                try:
+                    freelist = self.connection.execute("PRAGMA freelist_count").fetchone()[0]
+                    page_size = self.connection.execute("PRAGMA page_size").fetchone()[0]
+                    free_mb = (freelist * page_size) / (1024 * 1024)
+                    
+                    # If more than 25% or 10MB of the file is free space, it's worth a VACUUM.
+                    if free_mb > 10 or (free_mb / size_mb) > 0.25:
+                        logger.debug(f"Database size ({size_mb:.1f}MB) has {free_mb:.1f}MB free. Vacuuming...")
                        self.connection.execute("VACUUM")
                        # Also optimize the query planner indices
                        self.connection.execute("ANALYZE")
@@ -368,6 +376,11 @@ class API_folder_store:
                        reduction = size_mb - new_size_mb
                        if reduction > 1.0:
                            logger.info(f"Maintenance reclaimed {reduction:.1f}MB. Current size: {new_size_mb:.1f}MB")
+                except Exception as inner_e:
+                    logger.debug(f"Refined maintenance check failed: {inner_e}")
+                    # Fallback to simple size threshold if PRAGMA fails
+                    if size_mb > 128:
+                        self.connection.execute("VACUUM")
        except Exception as e:
            # Maintenance should never block application startup
            logger.warning(f"Database maintenance skipped: {e}")
@@ -710,6 +723,7 @@ class API_folder_store:
    @_db_retry()
    def _update_metadata_modified_time(self, file_hash: str) -> None:
        """Update the time_modified timestamp for a file's metadata."""
+        with self._db_lock:
            try:
                cursor = self.connection.cursor()
                cursor.execute(
@@ -1149,6 +1163,7 @@ class API_folder_store:
        tags: List[str]
    ) -> None:
        """Save metadata and tags for a file in a single transaction."""
+        with self._db_lock:
            try:
                abs_path = self._normalize_input_path(file_path)
                db_path = self._to_db_file_path(abs_path)
@@ -1206,9 +1221,7 @@ class API_folder_store:

                # 2. Save Tags
                # We assume tags list is complete and includes title if needed
-            cursor.execute("DELETE FROM tag WHERE hash = ?",
-                           (file_hash,
-                            ))
+                cursor.execute("DELETE FROM tag WHERE hash = ?", (file_hash, ))

                for tag in tags:
                    tag = tag.strip()
@@ -1218,8 +1231,7 @@ class API_folder_store:
                            INSERT OR IGNORE INTO tag (hash, tag)
                            VALUES (?, ?)
                        """,
-                        (file_hash,
-                         tag),
+                            (file_hash, tag),
                        )

                self.connection.commit()
@@ -1230,8 +1242,7 @@ class API_folder_store:
            except Exception as e:
                logger.error(
                    f"[save_file_info] ❌ Error saving file info for {file_path}: {e}",
-                exc_info=True
-            )
+                    exc_info=True)
                raise

    def get_tags(self, file_hash: str) -> List[str]:
@@ -1346,6 +1357,7 @@ class API_folder_store:
    @_db_retry()
    def add_tags(self, file_path: Path, tags: List[str]) -> None:
        """Add tags to a file."""
+        with self._db_lock:
            try:
                file_hash = self.get_or_create_file_entry(file_path)
                cursor = self.connection.cursor()
@@ -1413,6 +1425,7 @@ class API_folder_store:
    @_db_retry()
    def remove_tags(self, file_path: Path, tags: List[str]) -> None:
        """Remove specific tags from a file."""
+        with self._db_lock:
            try:
                file_hash = self.get_or_create_file_entry(file_path)
                cursor = self.connection.cursor()
@@ -1439,6 +1452,7 @@ class API_folder_store:
    @_db_retry()
    def add_tags_to_hash(self, file_hash: str, tags: List[str]) -> None:
        """Add tags to a file by hash."""
+        with self._db_lock:
            try:
                cursor = self.connection.cursor()

@@ -1481,6 +1495,7 @@ class API_folder_store:
    @_db_retry()
    def remove_tags_from_hash(self, file_hash: str, tags: List[str]) -> None:
        """Remove specific tags from a file by hash."""
+        with self._db_lock:
            try:
                cursor = self.connection.cursor()

@@ -1514,6 +1529,7 @@ class API_folder_store:
                               Any]
    ) -> None:
        """Update metadata for a file by hash."""
+        with self._db_lock:
            try:
                cursor = self.connection.cursor()

@@ -1566,6 +1582,7 @@ class API_folder_store:
            related_file_path: Path to the related file
            rel_type: Type of relationship ('king', 'alt', 'related')
        """
+        with self._db_lock:
            try:
                str_path = str(file_path.resolve())
                str_related_path = str(related_file_path.resolve())
@@ -1757,6 +1774,7 @@ class API_folder_store:

    def set_note(self, file_path: Path, name: str, note: str) -> None:
        """Set a named note for a file."""
+        with self._db_lock:
            try:
                note_name = str(name or "").strip()
                if not note_name:
@@ -1784,6 +1802,7 @@ class API_folder_store:

    def delete_note(self, file_hash: str, name: str) -> None:
        """Delete a named note for a file by hash."""
+        with self._db_lock:
            try:
                note_name = str(name or "").strip()
                if not note_name:
@@ -1920,6 +1939,7 @@ class API_folder_store:
        backlinks in other files so no file retains dangling references to the
        deleted hash.
        """
+        with self._db_lock:
            try:
                abs_path = self._normalize_input_path(file_path)
                str_path = self._to_db_file_path(abs_path)
@@ -2065,6 +2085,7 @@ class API_folder_store:

    def update_worker(self, worker_id: str, **kwargs) -> bool:
        """Update worker entry with given fields."""
+        with self._db_lock:
            try:
                allowed_fields = {
                    "status",
@@ -2187,6 +2208,7 @@ class API_folder_store:

    def delete_worker(self, worker_id: str) -> bool:
        """Delete a worker entry."""
+        with self._db_lock:
            try:
                cursor = self.connection.cursor()
                cursor.execute("DELETE FROM worker WHERE worker_id = ?",
@@ -2294,6 +2316,7 @@ class API_folder_store:
        """Append text to a worker's stdout log and timeline."""
        if not text:
            return True
+        with self._db_lock:
            try:
                # Check if connection is valid
                if not self.connection:
--- a/SYS/models.py
+++ b/SYS/models.py
@@ -7,6 +7,7 @@ import os
 import shutil
 import sys
 import time
+from threading import RLock
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Callable, Dict, List, Optional, Protocol, TextIO
@@ -755,6 +756,7 @@ class PipelineLiveProgress:
    def __init__(self, pipe_labels: List[str], *, enabled: bool = True) -> None:
        self._enabled = bool(enabled)
        self._pipe_labels = [str(x) for x in (pipe_labels or [])]
+        self._lock = RLock()

        self._console: Optional[Console] = None
        self._live: Optional[Live] = None
@@ -826,6 +828,7 @@ class PipelineLiveProgress:
        the spinner without needing manual Live.update() calls.
        """

+        with self._lock:
            pipe_progress = self._pipe_progress
            status = self._status
            transfers = self._transfers
@@ -1029,6 +1032,8 @@ class PipelineLiveProgress:
            return
        if not self._ensure_pipe(int(pipe_index)):
            return
+        
+        with self._lock:
            prog = self._status
            if prog is None:
                return
@@ -1061,6 +1066,10 @@ class PipelineLiveProgress:
                pass

    def clear_pipe_status_text(self, pipe_index: int) -> None:
+        if not self._enabled:
+            return
+        
+        with self._lock:
            prog = self._status
            if prog is None:
                return
@@ -1095,6 +1104,31 @@ class PipelineLiveProgress:
            pct = max(0, min(100, int(percent)))
            pipe_task = self._pipe_tasks[pidx]
            pipe_progress.update(pipe_task, completed=pct, total=100, refresh=True)
+            self._update_overall()
+        except Exception:
+            pass
+
+    def _update_overall(self) -> None:
+        """Update the overall pipeline progress task."""
+        if self._overall is None or self._overall_task is None:
+            return
+
+        completed = 0
+        try:
+            # Count a pipe as completed if its 'done' count matches or exceeds the advertised total.
+            completed = sum(
+                1 for i in range(len(self._pipe_labels))
+                if self._pipe_done[i] >= max(1, self._pipe_totals[i])
+            )
+        except Exception:
+            completed = 0
+
+        try:
+            self._overall.update(
+                self._overall_task,
+                completed=min(completed, max(1, len(self._pipe_labels))),
+                description=f"Pipeline: {completed}/{len(self._pipe_labels)} pipes completed",
+            )
        except Exception:
            pass

@@ -1108,6 +1142,7 @@ class PipelineLiveProgress:
        if not self._ensure_pipe(int(pipe_index)):
            return

+        with self._lock:
            try:
                pidx = int(pipe_index)
                tot = max(1, int(total_steps))
@@ -1287,6 +1322,8 @@ class PipelineLiveProgress:
        except Exception:
            pass

+        self._update_overall()
+
        labels: List[str] = []
        if isinstance(items_preview, list) and items_preview:
            labels = [_pipeline_progress_item_label(x) for x in items_preview]
@@ -1372,6 +1409,8 @@ class PipelineLiveProgress:
        else:
            pipe_progress.update(pipe_task, completed=done)

+        self._update_overall()
+
        # Clear any status line now that it emitted.
        try:
            self.clear_pipe_status_text(pipe_index)
@@ -1452,23 +1491,7 @@ class PipelineLiveProgress:
        except Exception:
            pass

-        if self._overall_task is not None:
-            completed = 0
-            try:
-                completed = sum(
-                    1 for i in range(len(self._pipe_labels))
-                    if self._pipe_done[i] >= max(1, self._pipe_totals[i])
-                )
-            except Exception:
-                completed = 0
-            overall.update(
-                self._overall_task,
-                completed=min(completed,
-                              max(1,
-                                  len(self._pipe_labels))),
-                description=
-                f"Pipeline: {completed}/{len(self._pipe_labels)} pipes completed",
-            )
+        self._update_overall()


 class PipelineStageContext:
--- a/Store/HydrusNetwork.py
+++ b/Store/HydrusNetwork.py
@@ -325,8 +325,11 @@ class HydrusNetwork(Store):
        ]

        try:
-            # Compute file hash
+            # Compute file hash (or use hint from kwargs to avoid redundant IO)
+            file_hash = kwargs.get("hash") or kwargs.get("file_hash")
+            if not file_hash:
                file_hash = sha256_file(file_path)
+            
            debug(f"{self._log_prefix()} file hash: {file_hash}")

            # Use persistent client with session key
--- a/cmdlet/add_file.py
+++ b/cmdlet/add_file.py
@@ -369,11 +369,12 @@ class Add_File(Cmdlet):
        # Many add-file flows don't emit intermediate items, so without steps the pipe can look "stuck".
        use_steps = False
        steps_started = False
-        step2_done = False
-        step3_done = False
        try:
            ui, _ = progress.ui_and_pipe_index()
            use_steps = (ui is not None) and (len(items_to_process) == 1)
+            if use_steps:
+                progress.begin_steps(5)
+                steps_started = True
        except Exception:
            use_steps = False

@@ -545,10 +546,8 @@ class Add_File(Cmdlet):
            temp_dir_to_cleanup: Optional[Path] = None
            delete_after_item = delete_after
            try:
-                if use_steps and (not steps_started):
-                    progress.begin_steps(5)
+                if use_steps and steps_started:
                    progress.step("resolving source")
-                    steps_started = True

                media_path, file_hash, temp_dir_to_cleanup = self._resolve_source(
                    item, path_arg, pipe_obj, config, store_instance=storage_registry
@@ -560,32 +559,20 @@ class Add_File(Cmdlet):
                    failures += 1
                    continue

+                # Update pipe_obj with resolved path
+                pipe_obj.path = str(media_path)
+
+                # When using -path (filesystem export), allow all file types.
+                # When using -store (backend), restrict to SUPPORTED_MEDIA_EXTENSIONS.
+                allow_all_files = not (location and is_storage_backend_location)
+                if not self._validate_source(media_path, allow_all_extensions=allow_all_files):
+                    failures += 1
+                    continue
+
                if use_steps and steps_started:
+                    if not file_hash:
                        progress.step("hashing file")
-
-                # Update pipe_obj with resolved path
-                pipe_obj.path = str(media_path)
-
-                # When using -path (filesystem export), allow all file types.
-                # When using -store (backend), restrict to SUPPORTED_MEDIA_EXTENSIONS.
-                allow_all_files = not (location and is_storage_backend_location)
-                if not self._validate_source(media_path, allow_all_extensions=allow_all_files):
-                    failures += 1
-                    continue
-
-                if use_steps and steps_started and (not step2_done):
                    progress.step("ingesting file")
-                    step2_done = True
-
-                # Update pipe_obj with resolved path
-                pipe_obj.path = str(media_path)
-
-                # When using -path (filesystem export), allow all file types.
-                # When using -store (backend), restrict to SUPPORTED_MEDIA_EXTENSIONS.
-                allow_all_files = not (location and is_storage_backend_location)
-                if not self._validate_source(media_path, allow_all_extensions=allow_all_files):
-                    failures += 1
-                    continue

                if provider_name:
                    if str(provider_name).strip().lower() == "matrix":
@@ -690,9 +677,8 @@ class Add_File(Cmdlet):
                seen: set[str] = set()
                hashes = [h for h in hashes if not (h in seen or seen.add(h))]

-                if use_steps and steps_started and (not step3_done):
+                if use_steps and steps_started:
                    progress.step("refreshing display")
-                    step3_done = True

                refreshed_items = Add_File._try_emit_search_file_by_hashes(
                    store=str(location),
@@ -700,6 +686,7 @@ class Add_File(Cmdlet):
                    config=config,
                    store_instance=storage_registry,
                )
+                debug(f"[add-file] Internal refresh returned refreshed_items count={len(refreshed_items) if refreshed_items else 0}")
                if not refreshed_items:
                    # Fallback: at least show the add-file payloads as a display overlay
                    from SYS.result_table import ResultTable
@@ -756,7 +743,7 @@ class Add_File(Cmdlet):
            from cmdlet.search_file import CMDLET as search_file_cmdlet

            query = "hash:" + ",".join(hashes)
-            args = ["-store", str(store), query]
+            args = ["-store", str(store), "-internal-refresh", query]
            debug(f'[add-file] Refresh: search-file -store {store} "{query}"')

            # Run search-file under a temporary stage context so its ctx.emit() calls
--- a/cmdlet/search_file.py
+++ b/cmdlet/search_file.py
@@ -258,9 +258,6 @@ class search_file(Cmdlet):
        try:
            results_list: List[Dict[str, Any]] = []

-            from SYS import result_table
-
-            importlib.reload(result_table)
            from SYS.result_table import ResultTable

            provider_text = str(provider_name or "").strip()
@@ -453,8 +450,8 @@ class search_file(Cmdlet):
        args_list = [str(arg) for arg in (args or [])]

        refresh_mode = any(
-            str(a).strip().lower() in {"--refresh",
-                                       "-refresh"} for a in args_list
+            str(a).strip().lower() in {"--refresh", "-refresh", "-internal-refresh"}
+            for a in args_list
        )

        def _format_command_title(command: str, raw_args: List[str]) -> str:
@@ -470,7 +467,7 @@ class search_file(Cmdlet):

            cleaned = [
                str(a) for a in (raw_args or [])
-                if str(a).strip().lower() not in {"--refresh", "-refresh"}
+                if str(a).strip().lower() not in {"--refresh", "-refresh", "-internal-refresh"}
            ]
            if not cleaned:
                return command
@@ -626,6 +623,10 @@ class search_file(Cmdlet):
                        continue

        if not library_root:
+            # Internal refreshes should not trigger config panels or stop progress.
+            if "-internal-refresh" in args_list:
+                return 1
+
            from SYS import pipeline as ctx_mod
            progress = None
            if hasattr(ctx_mod, "get_pipeline_state"):
@@ -641,6 +642,7 @@ class search_file(Cmdlet):
        # Use context manager to ensure database is always closed
        with API_folder_store(library_root) as db:
            try:
+                if "-internal-refresh" not in args_list:
                    db.insert_worker(
                        worker_id,
                        "search-file",
@@ -650,10 +652,6 @@ class search_file(Cmdlet):
                    )

                results_list = []
-                from SYS import result_table
-                import importlib
-
-                importlib.reload(result_table)
                from SYS.result_table import ResultTable

                table = ResultTable(command_title)
@@ -802,6 +800,16 @@ class search_file(Cmdlet):
                    if found_any:
                        table.title = command_title

+                        # Add-file refresh quality-of-life: if exactly 1 item is being refreshed,
+                        # show the detailed item panel instead of a single-row table.
+                        if refresh_mode and len(results_list) == 1:
+                            try:
+                                from SYS.rich_display import render_item_details_panel
+                                render_item_details_panel(results_list[0])
+                                table._rendered_by_cmdlet = True
+                            except Exception:
+                                pass
+
                        if refresh_mode:
                            ctx.set_last_result_table_preserve_history(
                                table,
@@ -918,6 +926,15 @@ class search_file(Cmdlet):

                    table.title = command_title

+                    # If exactly 1 item is being refreshed, show the detailed item panel.
+                    if refresh_mode and len(results_list) == 1:
+                        try:
+                            from SYS.rich_display import render_item_details_panel
+                            render_item_details_panel(results_list[0])
+                            table._rendered_by_cmdlet = True
+                        except Exception:
+                            pass
+
                    if refresh_mode:
                        ctx.set_last_result_table_preserve_history(table, results_list)
                    else: