diff --git a/.gitignore b/.gitignore index 01f86df..776eb87 100644 --- a/.gitignore +++ b/.gitignore @@ -35,7 +35,7 @@ cookies.txt # Installer logs pip-log.txt pip-delete-this-directory.txt - +backup/ # Unit test / coverage reports htmlcov/ .tox/ diff --git a/ADD_FILE_REFACTOR_SUMMARY.md b/ADD_FILE_REFACTOR_SUMMARY.md new file mode 100644 index 0000000..36e7733 --- /dev/null +++ b/ADD_FILE_REFACTOR_SUMMARY.md @@ -0,0 +1,159 @@ +# add-file.py Refactor Summary + +## Changes Made + +### 1. Removed `is_hydrus` Flag (Legacy Code Removal) +The `is_hydrus` boolean flag was a legacy indicator for Hydrus files that is no longer needed with the explicit hash+store pattern. + +**Changes:** +- Updated `_resolve_source()` signature from returning `(path, is_hydrus, hash)` to `(path, hash)` +- Removed all `is_hydrus` logic throughout the file (11 occurrences) +- Updated `_is_url_target()` to no longer accept `is_hydrus` parameter +- Removed Hydrus-specific detection logic based on store name containing "hydrus" + +**Rationale:** With explicit store names, we no longer need implicit Hydrus detection. The `store` field in PipeObject provides clear backend identification. + +### 2. Added Comprehensive PipeObject Debugging +Added detailed debug logging throughout the execution flow to provide visibility into: + +**PipeObject State After Creation:** +``` +[add-file] PIPEOBJECT created: + hash=00beb438e3c0... + store=local + file_path=C:\Users\Admin\Downloads\Audio\yapping.m4a + tags=[] + title=None + extra keys=[] +``` + +**Input Result Details:** +``` +[add-file] INPUT result type=NoneType +``` + +**Parsed Arguments:** +``` +[add-file] PARSED args: location=test, provider=None, delete=False +``` + +**Source Resolution:** +``` +[add-file] RESOLVED source: path=C:\Users\Admin\Downloads\Audio\yapping.m4a, hash=N/A... +``` + +**Execution Path Decision:** +``` +[add-file] DECISION POINT: provider=None, location=test + media_path=C:\Users\Admin\Downloads\Audio\yapping.m4a, exists=True + Checking execution paths: provider_name=False, location_local=False, location_exists=True +``` + +**Route Selection:** +``` +[add-file] ROUTE: location specified, checking type... +[add-file] _is_local_path check: location=test, slash=False, backslash=False, colon=False, result=False +[add-file] _is_storage_backend check: location=test, backends=['default', 'home', 'test'], result=True +[add-file] ROUTE: storage backend path +``` + +**Error Paths:** +``` +[add-file] ERROR: No location or provider specified - all checks failed +[add-file] ERROR: Invalid location (not local path or storage backend): {location} +``` + +### 3. Fixed Critical Bug: Argument Parsing +**Problem:** The `-store` argument was not being recognized, causing "No storage location or provider specified" error. + +**Root Cause:** Mismatch between argument definition and parsing: +- Argument defined as: `SharedArgs.STORE` (name="store") +- Code was looking for: `parsed.get("storage")` + +**Fix:** Changed line 65 from: +```python +location = parsed.get("storage") +``` +to: +```python +location = parsed.get("store") # Fixed: was "storage", should be "store" +``` + +### 4. Enhanced Helper Method Debugging + +**`_is_local_path()`:** +```python +debug(f"[add-file] _is_local_path check: location={location}, slash={has_slash}, backslash={has_backslash}, colon={has_colon}, result={result}") +``` + +**`_is_storage_backend()`:** +```python +debug(f"[add-file] _is_storage_backend check: location={location}, backends={backends}, result={is_backend}") +debug(f"[add-file] _is_storage_backend ERROR: {exc}") # On exception +``` + +## Testing Results + +### Before Fix: +``` +[add-file] PARSED args: location=None, provider=None, delete=False +[add-file] ERROR: No location or provider specified - all checks failed +No storage location or provider specified +``` + +### After Fix: +``` +[add-file] PARSED args: location=test, provider=None, delete=False +[add-file] _is_storage_backend check: location=test, backends=['default', 'home', 'test'], result=True +[add-file] ROUTE: storage backend path +✓ File added to 'test': 00beb438e3c02cdc0340526deb0c51f916ffd6330259be4f350009869c5448d9 +``` + +## Impact + +### Files Modified: +- `cmdlets/add_file.py`: ~15 replacements across 350+ lines + +### Backwards Compatibility: +- ✅ No breaking changes to command-line interface +- ✅ Existing pipelines continue to work +- ✅ Hash+store pattern fully enforced + +### Code Quality Improvements: +1. **Removed Legacy Code:** Eliminated `is_hydrus` flag (11 occurrences) +2. **Enhanced Debugging:** Added 15+ debug statements for full execution visibility +3. **Fixed Critical Bug:** Corrected argument parsing mismatch +4. **Better Error Messages:** All error paths now have debug context + +## Documentation + +### Debug Output Legend: +- `[add-file] PIPEOBJECT created:` - Shows PipeObject state after coercion +- `[add-file] INPUT result type=` - Shows type of piped input +- `[add-file] PARSED args:` - Shows all parsed command-line arguments +- `[add-file] RESOLVED source:` - Shows resolved file path and hash +- `[add-file] DECISION POINT:` - Shows routing decision variables +- `[add-file] ROUTE:` - Shows which execution path is taken +- `[add-file] ERROR:` - Shows why operation failed + +### Execution Paths: +1. **Provider Upload** (`provider_name` set) → `_handle_provider_upload()` +2. **Local Import** (`location == 'local'`) → `_handle_local_import()` +3. **Local Export** (location is path) → `_handle_local_export()` +4. **Storage Backend** (location is backend name) → `_handle_storage_backend()` ✓ +5. **Error** (no location/provider) → Error message + +## Verification Checklist +- [x] `is_hydrus` completely removed (0 occurrences) +- [x] All return tuples updated to exclude `is_hydrus` +- [x] Comprehensive PipeObject debugging added +- [x] Argument parsing bug fixed (`storage` → `store`) +- [x] Helper method debugging enhanced +- [x] Full execution path visibility achieved +- [x] Tested with real command: `add-file -path "..." -store test` ✓ + +## Related Refactorings +- **PIPELINE_REFACTOR_SUMMARY.md**: Removed backwards compatibility from pipeline.py +- **MODELS_REFACTOR_SUMMARY.md**: Refactored PipeObject to hash+store pattern + +This refactor completes the trilogy of modernization efforts, ensuring add-file.py fully embraces the hash+store canonical pattern with zero legacy code. diff --git a/ANALYSIS_export_store_vs_get_file.md b/ANALYSIS_export_store_vs_get_file.md new file mode 100644 index 0000000..b29ed48 --- /dev/null +++ b/ANALYSIS_export_store_vs_get_file.md @@ -0,0 +1,100 @@ +""" +Analysis: Export-Store vs Get-File cmdlet + +=== FINDINGS === + +1. GET-FILE ALREADY EXISTS AND IS SUFFICIENT + - Located: cmdlets/get_file.py + - Purpose: Export files from any store backend to local path + - Usage: @1 | get-file -path C:\Downloads + - Supports: Explicit -path, configured output dir, custom filename + - Works with: All storage backends (Folder, HydrusNetwork, RemoteStorage) + +2. ARCHITECTURE COMPARISON + + GET-FILE (current): + ✓ Takes hash + store name as input + ✓ Queries backend.get_metadata(hash) to find file details + ✓ For Folder: Returns direct Path from database + ✓ For HydrusNetwork: Downloads to temp location via HTTP + ✓ Outputs file to specified directory + ✓ Supports both input modes: explicit (-hash, -store) and piped results + + EXPORT-STORE (hypothetical): + ✗ Would be redundant with get-file + ✗ Would only work with HydrusNetwork (not Folder, Remote, etc.) + ✗ No clear advantage over get-file's generic approach + ✗ More specialized = less reusable + +3. RECOMMENDED PATTERN + + Sequence for moving files between stores: + + search-store -store home | get-file -path /tmp/staging | add-file -storage test + + This reads: + 1. Search Hydrus "home" instance + 2. Export matching files to staging + 3. Import to Folder "test" storage + +4. FINDINGS ON THE @2 SELECTION ERROR + + Debug output shows: + "[debug] first-stage: sel=[1] rows=1 items=4" + + This means: + - User selected @2 (second item, index=1 in 0-based) + - Table object had only 1 row + - But items_list had 4 items + + CAUSE: Mismatch between displayed rows and internal items list + + Possible reasons: + a) Table display was incomplete (only showed first row) + b) set_last_result_table() wasn't called correctly + c) search-store didn't add all 4 rows to table object + + FIX: Add better validation in search-store and result table handling + +5. DEBUG IMPROVEMENTS MADE + + Added to add_file.py run() method: + - Log input result type and length + - Show first item details: title, hash (truncated), store + - Log resolved source details + - Show validation failures with context + + This will help debug "no items matched" errors in future + +6. STORE FIELD IN RESULTS + + Current behavior: + - search-store results show store="hydrus" (generic) + - Should show store="home" or store="work" (specific instance) + + Next improvement: + - Update search-store to use FileStorage.list_backends() logic + - Use dynamic store detection like .pipe cmdlet does + - Show actual instance names in results table + +=== RECOMMENDATIONS === + +1. DO NOT create export-store cmdlet + - get-file is already generic and works for all backends + - Adding export-store adds confusion without benefit + +2. DO improve search-store display + - Import FileStorage and populate store names correctly + - Show "home" instead of "hydrus" when result is from Hydrus instance + - Similar to the .pipe cmdlet refactoring + +3. DO fix the selection/table registration issue + - Verify set_last_result_table() is being called with correct items list + - Ensure every row added to table has corresponding item + - Add validation: len(table.rows) == len(items_list) + +4. DO use the new debug logs in add_file + - Run: @2 | add-file -storage test + - Observe: [add-file] INPUT result details + - This will show if result is coming through correctly +""" diff --git a/CLI.py b/CLI.py index 483d298..da03ae6 100644 --- a/CLI.py +++ b/CLI.py @@ -12,6 +12,10 @@ from copy import deepcopy from importlib import import_module from pathlib import Path from typing import Any, Dict, List, Optional, Sequence, Set, TextIO, TYPE_CHECKING, cast +import time +import threading + +from helper.logger import debug try: import typer @@ -48,12 +52,30 @@ try: except ImportError: # pragma: no cover - optional dependency WorkerManager = None # type: ignore +try: + from helper.background_notifier import ensure_background_notifier +except ImportError: # pragma: no cover - optional dependency + ensure_background_notifier = lambda *_, **__: None # type: ignore + if TYPE_CHECKING: # pragma: no cover - typing helper from helper.worker_manager import WorkerManager as WorkerManagerType else: WorkerManagerType = Any +# Global toolbar updater callback for prompt_toolkit integration +_TOOLBAR_UPDATER: Optional[Callable[[str], None]] = None +from typing import Callable + + from config import get_local_storage_path, load_config +from helper.cmdlet_catalog import ( + import_cmd_module as _catalog_import_cmd_module, + list_cmdlet_metadata as _catalog_list_cmdlet_metadata, + list_cmdlet_names as _catalog_list_cmdlet_names, + get_cmdlet_arg_flags as _catalog_get_cmdlet_arg_flags, + get_cmdlet_arg_choices as _catalog_get_cmdlet_arg_choices, + get_cmdlet_metadata as _catalog_get_cmdlet_metadata, +) class _WorkerOutputMirror(io.TextIOBase): @@ -211,8 +233,8 @@ def _get_table_title_for_command(cmd_name: str, emitted_items: Optional[List[Any 'delete_tag': 'Results', 'add-url': 'Results', 'add_url': 'Results', - 'get-url': 'URLs', - 'get_url': 'URLs', + 'get-url': 'url', + 'get_url': 'url', 'delete-url': 'Results', 'delete_url': 'Results', 'get-note': 'Notes', @@ -293,6 +315,7 @@ def _ensure_worker_manager(config: Dict[str, Any]) -> Optional[WorkerManagerType _CLI_WORKER_MANAGER = WorkerManager(resolved_root, auto_refresh_interval=0.5) manager = _CLI_WORKER_MANAGER config['_worker_manager'] = manager + # Do NOT attach notifier here - it will be attached when we have session worker IDs if manager and not _CLI_ORPHAN_CLEANUP_DONE: try: manager.expire_running_workers( @@ -321,8 +344,22 @@ def _start_worker_session( completion_label: str, error_label: str, skip_logging_for: Optional[Set[str]] = None, + session_worker_ids: Optional[Set[str]] = None, ) -> Optional[_WorkerStageSession]: - """Create a worker session wrapper and mirror stdout/stderr.""" + """Create a worker session wrapper and mirror stdout/stderr. + + Args: + worker_manager: The worker manager + worker_type: Type of worker (e.g., 'pipeline', 'search-file') + title: Human-readable title + description: Worker description + pipe_text: Pipeline/command text + config: CLI configuration dict + completion_label: Label for successful completion + error_label: Label for errors + skip_logging_for: Set of worker types to skip logging for + session_worker_ids: Optional set to register this worker's ID in (for filtering notifications) + """ if worker_manager is None: return None if skip_logging_for and worker_type in skip_logging_for: @@ -342,6 +379,11 @@ def _start_worker_session( except Exception as exc: print(f"[worker] Failed to track {worker_type}: {exc}", file=sys.stderr) return None + + # Register this worker ID with the session if provided + if session_worker_ids is not None: + session_worker_ids.add(worker_id) + logging_enabled = False try: handler = worker_manager.enable_logging_for_worker(worker_id) @@ -381,8 +423,15 @@ def _begin_worker_stage( config: Optional[Dict[str, Any]], command_text: str, ) -> Optional[_WorkerStageSession]: - """Start a worker entry for an individual CLI stage.""" + """Start a worker entry for an individual CLI stage. + + If a session_worker_ids set exists in config, register this stage with it. + """ description = " ".join(stage_tokens[1:]) if len(stage_tokens) > 1 else "(no args)" + session_worker_ids = None + if isinstance(config, dict): + session_worker_ids = config.get('_session_worker_ids') + return _start_worker_session( worker_manager, worker_type=cmd_name, @@ -393,6 +442,7 @@ def _begin_worker_stage( completion_label="Stage completed", error_label="Stage error", skip_logging_for={".worker", "worker", "workers"}, + session_worker_ids=session_worker_ids, ) @@ -401,7 +451,15 @@ def _begin_pipeline_worker( pipeline_text: str, config: Optional[Dict[str, Any]], ) -> Optional[_WorkerStageSession]: - """Start a worker that represents the entire pipeline execution.""" + """Start a worker that represents the entire pipeline execution. + + Also initializes a session_worker_ids set in config for tracking pipeline workers. + """ + # Create a session ID set for this pipeline execution + session_worker_ids: Set[str] = set() + if isinstance(config, dict): + config['_session_worker_ids'] = session_worker_ids + return _start_worker_session( worker_manager, worker_type="pipeline", @@ -411,75 +469,30 @@ def _begin_pipeline_worker( config=config, completion_label="Pipeline completed", error_label="Pipeline error", + session_worker_ids=session_worker_ids, ) def _get_cmdlet_names() -> List[str]: """Get list of all available cmdlet names.""" try: - from cmdlets import REGISTRY - return sorted(set(REGISTRY.keys())) + return _catalog_list_cmdlet_names() except Exception: return [] def _import_cmd_module(mod_name: str): """Import a cmdlet/native module from cmdlets or cmdnats packages.""" - # Normalize leading punctuation used in aliases (e.g., .pipe) - normalized = (mod_name or "").strip() - if normalized.startswith('.'): - normalized = normalized.lstrip('.') - # Convert hyphens to underscores to match module filenames - normalized = normalized.replace("-", "_") - if not normalized: + try: + return _catalog_import_cmd_module(mod_name) + except Exception: return None - # Prefer native cmdnats modules first so editable installs of this package - # don't shadow the in-repo implementations (e.g., .pipe autocomplete flags). - for package in ("cmdnats", "cmdlets", None): - try: - qualified = f"{package}.{normalized}" if package else normalized - return import_module(qualified) - except ModuleNotFoundError: - continue - except Exception: - continue - return None - def _get_cmdlet_args(cmd_name: str) -> List[str]: """Get list of argument flags for a cmdlet (with - and -- prefixes).""" try: - mod_name = cmd_name.replace("-", "_") - data = None - mod = _import_cmd_module(mod_name) - if mod: - data = getattr(mod, "CMDLET", None) - - if data: - # If CMDLET is an object (not dict), use build_flag_registry if available - if not isinstance(data, dict) and hasattr(data, 'build_flag_registry'): - registry = data.build_flag_registry() - # Flatten all flags into a single list - all_flags = [] - for flag_set in registry.values(): - all_flags.extend(flag_set) - return sorted(all_flags) - - # Fallback for dict format or old style - args_list = data.get("args", []) if isinstance(data, dict) else getattr(data, "args", []) - arg_names = [] - for arg in args_list: - if isinstance(arg, dict): - name = arg.get("name", "") - else: - name = getattr(arg, "name", "") - if name: - # Add both - and -- variants - arg_names.append(f"-{name}") - arg_names.append(f"--{name}") - return arg_names - return [] + return _catalog_get_cmdlet_arg_flags(cmd_name) except Exception: return [] @@ -491,10 +504,11 @@ def _get_arg_choices(cmd_name: str, arg_name: str) -> List[str]: normalized_arg = arg_name.lstrip("-") # Dynamic storage backends: use current config to enumerate available storages - if normalized_arg == "storage": + # Support both "storage" and "store" argument names + if normalized_arg in ("storage", "store"): try: - from helper.file_storage import FileStorage - storage = FileStorage(_load_cli_config()) + from helper.store import FileStorage + storage = FileStorage(_load_cli_config(), suppress_debug=True) backends = storage.list_backends() if backends: return backends @@ -504,7 +518,7 @@ def _get_arg_choices(cmd_name: str, arg_name: str) -> List[str]: # Dynamic search providers if normalized_arg == "provider": try: - from helper.search_provider import list_providers + from helper.provider import list_providers providers = list_providers(_load_cli_config()) available = [name for name, is_ready in providers.items() if is_ready] provider_choices = sorted(available) if available else sorted(providers.keys()) @@ -531,23 +545,8 @@ def _get_arg_choices(cmd_name: str, arg_name: str) -> List[str]: return sorted(meta_providers.keys()) except Exception: pass - mod = _import_cmd_module(mod_name) - data = getattr(mod, "CMDLET", None) if mod else None - if data: - args_list = data.get("args", []) if isinstance(data, dict) else getattr(data, "args", []) - for arg in args_list: - if isinstance(arg, dict): - arg_obj_name = arg.get("name", "") - else: - arg_obj_name = getattr(arg, "name", "") - - if arg_obj_name == arg_name: - # Found matching arg, get choices - if isinstance(arg, dict): - return arg.get("choices", []) - else: - return getattr(arg, "choices", []) - return [] + choices = _catalog_get_cmdlet_arg_choices(cmd_name, arg_name) + return choices or [] except Exception: return [] @@ -749,12 +748,30 @@ def _create_cmdlet_cli(): # Configurable prompt prompt_text = "🜂🜄🜁🜃|" - - # Pre-acquire Hydrus session key at startup (like hub-ui does) + + # Prepare startup table (always attempt; fall back gracefully if import fails) + startup_table = ResultTable("Startup checks") if RESULT_TABLE_AVAILABLE else None + if startup_table: + startup_table.set_no_choice(True).set_preserve_order(True) + + def _add_startup_check(name: str, status: str, detail: str = "") -> None: + if startup_table is None: + return + row = startup_table.add_row() + row.add_column("Check", name) + row.add_column("Status", status) + if detail: + row.add_column("Detail", detail) + + # Load config and initialize debug logging + config = {} try: config = _load_cli_config() + except Exception: + config = {} + + try: if config: - # Initialize debug logging from helper.logger import set_debug, debug debug_enabled = config.get("debug", False) set_debug(debug_enabled) @@ -765,100 +782,84 @@ def _create_cmdlet_cli(): from helper.hydrus import get_client # get_client(config) # Pre-acquire and cache session key # debug("✓ Hydrus session key acquired") - except RuntimeError as e: - # Hydrus is not available - this is expected and normal - # Don't show a message, just continue without it + except RuntimeError: + # Hydrus is not available - expected sometimes; continue pass except Exception as e: debug(f"⚠ Could not pre-acquire Hydrus session key: {e}") - - # Check MPV availability at startup - try: - from hydrus_health_check import ( - check_mpv_availability, - initialize_matrix_health_check, - initialize_hydrus_health_check, - initialize_local_library_scan, - initialize_cookies_check - ) - check_mpv_availability() - initialize_hydrus_health_check(config) - initialize_matrix_health_check(config) - initialize_cookies_check() - initialize_local_library_scan(config) - - # --- Startup File Counts --- - # Count Local Files - try: - from helper.file_storage import LocalStorageBackend - from config import get_local_storage_path - storage_path = get_local_storage_path(config) - if storage_path: - # Use LocalStorageBackend to perform the search as requested - # Pass a large limit to get all files - storage = LocalStorageBackend(location=storage_path) - local_files = storage.search("*", limit=100000) - print(f"Local: {len(local_files)}") - except Exception as e: - debug(f"⚠ Could not count local files: {e}") - # Count Hydrus Files (if available) - from hydrus_health_check import is_hydrus_available - if is_hydrus_available(): - try: - from helper.hydrus import get_client - client = get_client(config) - # Hydrus search for all files - # search_files returns IDs. - response = client.search_files(["system:everything"]) - hydrus_ids = response.get("file_ids", []) - print(f"Hydrus: {len(hydrus_ids)}") - except Exception as e: - debug(f"⚠ Could not count Hydrus files: {e}") - - # Count Debrid Magnets (if available) - try: - from config import get_api_key - from helper.alldebrid import AllDebridClient - - api_key = get_api_key(config, "AllDebrid", "Debrid.All-debrid") - if api_key: - # Use AllDebridClient to get magnets - # We can use magnet_status with ID or just list active magnets if there's an endpoint - # The magnet/status endpoint without ID returns all magnets - # But helper/alldebrid.py magnet_status requires ID. - # Let's check if we can use the client directly to call magnet/status without ID - # Or if there is a method for it. - # Looking at alldebrid.py, magnet_status takes magnet_id. - # But the API docs say /magnet/status returns all magnets if no ID provided? - # Actually, usually /magnet/status requires ID or 'all' or something. - # Let's try to use the client's _request method if possible, or instantiate client. - - # We'll instantiate client and try to list magnets. - # Since magnet_status in helper requires ID, we might need to bypass it or add a method. - # But wait, let's check if we can just use the raw request via client. - - client = AllDebridClient(api_key) - # The helper class doesn't expose a "list all" method easily, - # but we can try calling _request directly if we access it, or add a method. - # Accessing protected member _request is okay for this CLI script. - - # API: /magnet/status - resp = client._request('magnet/status') - if resp.get('status') == 'success': - data = resp.get('data', {}) - magnets = data.get('magnets', []) - if isinstance(magnets, list): - print(f"Debrid: {len(magnets)}") - elif isinstance(magnets, dict): - # Sometimes it returns a dict if single item? Or dict of magnets? - print(f"Debrid: {len(magnets)}") - except Exception as e: - # Don't show error if just not configured or failed - # debug(f"⚠ Could not count Debrid magnets: {e}") - pass + # Run startup checks and render table + try: + from hydrus_health_check import ( + initialize_mpv_health_check, + initialize_matrix_health_check, + initialize_hydrus_health_check, + initialize_local_library_scan, + initialize_cookies_check, + initialize_debrid_health_check, + ) - except Exception as e: + def _run_check(name: str, fn: Callable[[], Tuple[bool, Optional[str]]], skip_reason: Optional[str] = None) -> None: + if skip_reason: + _add_startup_check(name, "SKIPPED", skip_reason) + return + try: + ok, detail = fn() + status = "ENABLED" if name in {"MPV", "Hydrus", "Matrix", "Debrid"} else ("FOUND" if name == "Cookies" else "SCANNED") + if name == "Matrix": + status = "ENABLED" if ok else "DISABLED" + elif name == "Folder Stores": + status = "SCANNED" if ok else "SKIPPED" + elif name == "Cookies": + status = "FOUND" if ok else "MISSING" + elif name in {"MPV", "Hydrus", "Debrid"}: + status = "ENABLED" if ok else "DISABLED" + _add_startup_check(name, status, detail or "") + except Exception as exc: # Best-effort: never block startup + _add_startup_check(name, "ERROR", str(exc)) + + _run_check("MPV", lambda: initialize_mpv_health_check(emit_debug=False)) + + if config: + _run_check("Hydrus", lambda: initialize_hydrus_health_check(config, emit_debug=False)) + + # Hydrus instances - add individual rows for each instance + from hydrus_health_check import _SERVICE_STATE + for instance_name, instance_info in _SERVICE_STATE.get("hydrusnetwork_stores", {}).items(): + status = "ENABLED" if instance_info.get("ok") else "DISABLED" + _add_startup_check(f" {instance_name}", status, f"{instance_info.get('url')} - {instance_info.get('detail')}") + + _run_check("Matrix", lambda: initialize_matrix_health_check(config, emit_debug=False)) + + # Folder stores - add individual rows for each store + ok, detail = initialize_local_library_scan(config, emit_debug=False) + if ok or detail != "No folder stores configured": + # Add individual store rows + from hydrus_health_check import _SERVICE_STATE + for store_name, store_info in _SERVICE_STATE.get("folder_stores", {}).items(): + status = "SCANNED" if store_info.get("ok") else "ERROR" + _add_startup_check(f" {store_name}", status, f"{store_info.get('path')} - {store_info.get('detail')}") + if not _SERVICE_STATE.get("folder_stores"): + _add_startup_check("Folder Stores", "SCANNED", detail) + else: + _add_startup_check("Folder Stores", "SKIPPED", detail) + + _run_check("Debrid", lambda: initialize_debrid_health_check(config, emit_debug=False)) + else: + _add_startup_check("Hydrus", "SKIPPED", "No config loaded") + _add_startup_check("Matrix", "SKIPPED", "No config loaded") + _add_startup_check("Folder Stores", "SKIPPED", "No config loaded") + _add_startup_check("Debrid", "SKIPPED", "No config loaded") + + _run_check("Cookies", lambda: initialize_cookies_check(emit_debug=False)) + + if startup_table is not None and startup_table.rows: + print() + print(startup_table.format_plain()) + + except Exception as e: + if config: + from helper.logger import debug # local import to avoid failing when debug disabled debug(f"⚠ Could not check service availability: {e}") except Exception: pass # Silently ignore if config loading fails @@ -873,12 +874,63 @@ def _create_cmdlet_cli(): 'value': "#9a3209", # red-ish 'string': "#6d0d93", # purple 'pipe': '#4caf50', # green + 'bottom-toolbar': 'noreverse', # Blend in with default background }) + # Toolbar state for background notifications + class ToolbarState: + text = "" + last_update_time = 0 + clear_timer: Optional[threading.Timer] = None + + toolbar_state = ToolbarState() + + def get_toolbar(): + # Only show toolbar if there's text AND it's within the 3-second window + if not toolbar_state.text or not toolbar_state.text.strip(): + return None # None completely hides the toolbar + elapsed = time.time() - toolbar_state.last_update_time + if elapsed > 3: + toolbar_state.text = "" + return None + return toolbar_state.text + + def update_toolbar(text: str): + text = text.strip() + toolbar_state.text = text + toolbar_state.last_update_time = time.time() + + # Cancel any pending clear timer + if toolbar_state.clear_timer: + toolbar_state.clear_timer.cancel() + toolbar_state.clear_timer = None + + # Schedule auto-clear in 3 seconds + if text: + def clear_toolbar(): + toolbar_state.text = "" + toolbar_state.clear_timer = None + if 'session' in locals() and session and hasattr(session, 'app') and session.app.is_running: + session.app.invalidate() + + toolbar_state.clear_timer = threading.Timer(3.0, clear_toolbar) + toolbar_state.clear_timer.daemon = True + toolbar_state.clear_timer.start() + + # Force redraw if the prompt is active + if 'session' in locals() and session and hasattr(session, 'app') and session.app.is_running: + session.app.invalidate() + + # Register global updater + global _TOOLBAR_UPDATER + _TOOLBAR_UPDATER = update_toolbar + session = PromptSession( completer=cast(Any, completer), lexer=MedeiaLexer(), - style=style + style=style, + bottom_toolbar=get_toolbar, + refresh_interval=0.5, # Refresh periodically ) def get_input(prompt: str = prompt_text) -> str: @@ -892,7 +944,7 @@ def _create_cmdlet_cli(): try: user_input = get_input(prompt_text).strip() except (EOFError, KeyboardInterrupt): - print("\nGoodbye!") + print("He who is victorious through deceit is defeated by the truth.") break if not user_input: @@ -900,7 +952,7 @@ def _create_cmdlet_cli(): low = user_input.lower() if low in {"exit", "quit", "q"}: - print("Goodbye!") + print("He who is victorious through deceit is defeated by the truth.") break if low in {"help", "?"}: @@ -924,8 +976,42 @@ def _create_cmdlet_cli(): if not tokens: continue + # Handle special @,, selector to restore next result table (forward navigation) + if len(tokens) == 1 and tokens[0] == "@,,": + try: + import pipeline as ctx + if ctx.restore_next_result_table(): + # Check for overlay table first + if hasattr(ctx, 'get_display_table'): + last_table = ctx.get_display_table() + else: + last_table = None + + if last_table is None: + last_table = ctx.get_last_result_table() + + + if last_table: + print() + # Also update current stage table so @N expansion works correctly + ctx.set_current_stage_table(last_table) + print(last_table.format_plain()) + else: + # Fallback to items if no table object + items = ctx.get_last_result_items() + if items: + # Clear current stage table if we only have items + ctx.set_current_stage_table(None) + print(f"Restored {len(items)} items (no table format available)") + else: + print("No forward history available", file=sys.stderr) + except Exception as exc: + print(f"Error restoring next table: {exc}", file=sys.stderr) + continue + # Handle special @.. selector to restore previous result table if len(tokens) == 1 and tokens[0] == "@..": + try: import pipeline as ctx if ctx.restore_previous_result_table(): @@ -1000,6 +1086,53 @@ def _execute_pipeline(tokens: list): from cmdlets import REGISTRY import json import pipeline as ctx + + def _resolve_items_for_selection(table_obj, items_list): + """Return items in the same order as the displayed table rows. + + When a user sees row #2 in the table and selects @2, they get row #2. + No mapping, no math - the displayed order IS the selection order. + + The table and items list should already be in sync after sorting. + """ + # Simply return items as-is - they should match the table row order + return items_list if items_list else [] + + def _debug_selection(label, selection_indices, table_obj, items_list, resolved_list=None): + """Print debug info for selection mapping when troubleshooting. + + Shows the correspondence between displayed row numbers, source indices, + and the actual items being selected to help diagnose reordering issues. + """ + try: + print(f"[debug] {label}: sel={selection_indices} rows={len(table_obj.rows) if table_obj and hasattr(table_obj, 'rows') else 'n/a'} items={len(items_list) if items_list is not None else 'n/a'}") + if table_obj and hasattr(table_obj, 'rows') and items_list: + # Show correspondence: displayed row # -> source_index -> item hash/title + for i in selection_indices: + if 0 <= i < len(table_obj.rows): + row = table_obj.rows[i] + src_idx = getattr(row, 'source_index', None) + print(f"[debug] @{i+1} -> row_index={i}, source_index={src_idx}", end='') + if src_idx is not None and 0 <= src_idx < len(items_list): + item = items_list[src_idx] + # Try to show hash/title for verification + if isinstance(item, dict): + hash_val = item.get('hash', item.get('hash_hex', 'N/A')) + title_val = item.get('title', 'N/A') + else: + hash_val = getattr(item, 'hash', getattr(item, 'hash_hex', 'N/A')) + title_val = getattr(item, 'title', 'N/A') + if hash_val != 'N/A': + hash_display = hash_val[:8] + '...' if len(str(hash_val)) > 8 else hash_val + print(f" -> hash={hash_display}, title={title_val}") + else: + print(f" -> title={title_val}") + else: + print(" -> [source_index out of range]") + if resolved_list is not None: + print(f"[debug] resolved_len={len(resolved_list)}") + except Exception as e: + print(f"[debug] error in _debug_selection: {e}") # Split tokens by pipe operator stages = [] @@ -1023,10 +1156,21 @@ def _execute_pipeline(tokens: list): # If a previous stage paused for selection, attach its remaining stages when the user runs only @N pending_tail = ctx.get_pending_pipeline_tail() if hasattr(ctx, 'get_pending_pipeline_tail') else [] pending_source = ctx.get_pending_pipeline_source() if hasattr(ctx, 'get_pending_pipeline_source') else None + # Ensure current stage table is restored before checking source (helps selection-only resumes) + if hasattr(ctx, 'get_current_stage_table') and not ctx.get_current_stage_table(): + display_table = ctx.get_display_table() if hasattr(ctx, 'get_display_table') else None + if display_table: + ctx.set_current_stage_table(display_table) + else: + last_table = ctx.get_last_result_table() if hasattr(ctx, 'get_last_result_table') else None + if last_table: + ctx.set_current_stage_table(last_table) + current_source = ctx.get_current_stage_table_source_command() if hasattr(ctx, 'get_current_stage_table_source_command') else None + effective_source = current_source or (ctx.get_last_result_table_source_command() if hasattr(ctx, 'get_last_result_table_source_command') else None) selection_only = len(stages) == 1 and stages[0] and stages[0][0].startswith('@') if pending_tail and selection_only: - if pending_source and current_source and current_source == pending_source: + if (pending_source is None) or (effective_source and pending_source == effective_source): stages.extend(pending_tail) if hasattr(ctx, 'clear_pending_pipeline_tail'): ctx.clear_pending_pipeline_tail() @@ -1035,6 +1179,9 @@ def _execute_pipeline(tokens: list): # Load config relative to CLI root config = _load_cli_config() + if isinstance(config, dict): + # Request terminal-only background updates for this pipeline session + config['_quiet_background_output'] = True # Check if the first stage has @ selection - if so, apply it before pipeline execution first_stage_tokens = stages[0] if stages else [] @@ -1079,6 +1226,31 @@ def _execute_pipeline(tokens: list): worker_manager = _ensure_worker_manager(config) pipeline_text = " | ".join(" ".join(stage) for stage in stages) pipeline_session = _begin_pipeline_worker(worker_manager, pipeline_text, config) + + # Update background notifier with session worker IDs so it only shows workers from this pipeline + if pipeline_session and worker_manager and isinstance(config, dict): + session_worker_ids = config.get('_session_worker_ids') + if session_worker_ids: + try: + # Use toolbar updater if available + output_fn = _TOOLBAR_UPDATER + + # If using toolbar, we want continuous updates, not just terminal completion + quiet_mode = bool(config.get('_quiet_background_output')) + terminal_only = quiet_mode and not _TOOLBAR_UPDATER + + kwargs = { + "session_worker_ids": session_worker_ids, + "only_terminal_updates": terminal_only, + "overlay_mode": bool(output_fn), + } + if output_fn: + kwargs["output"] = output_fn + + ensure_background_notifier(worker_manager, **kwargs) + except Exception: + pass + pipeline_status = "completed" pipeline_error = "" @@ -1086,9 +1258,10 @@ def _execute_pipeline(tokens: list): if first_stage_selection_indices: # Ensure we have a table context for expansion from previous command if not ctx.get_current_stage_table_source_command(): - last_table = ctx.get_last_result_table() - if last_table: - ctx.set_current_stage_table(last_table) + display_table = ctx.get_display_table() if hasattr(ctx, 'get_display_table') else None + table_for_stage = display_table or ctx.get_last_result_table() + if table_for_stage: + ctx.set_current_stage_table(table_for_stage) # Special check for YouTube search results BEFORE command expansion # If we are selecting from a YouTube search, we want to force auto-piping to .pipe @@ -1117,7 +1290,9 @@ def _execute_pipeline(tokens: list): if selected_row_args: # Success: Reconstruct the command with selection args - expanded_stage = [source_cmd] + source_args + selected_row_args + # Handle case where source_cmd might be a list (though it should be a string) + cmd_list = source_cmd if isinstance(source_cmd, list) else [source_cmd] + expanded_stage = cmd_list + source_args + selected_row_args if first_stage_had_extra_args: # Append extra args from the first stage (e.g. @3 arg1 arg2) @@ -1127,7 +1302,7 @@ def _execute_pipeline(tokens: list): # Insert expanded command as first stage (it was popped earlier if it was only @N) stages.insert(0, expanded_stage) - log_msg = f"@N expansion: {source_cmd} + {' '.join(selected_row_args)}" + log_msg = f"@N expansion: {source_cmd} + {' '.join(str(x) for x in selected_row_args)}" worker_manager.log_step(pipeline_session.worker_id, log_msg) if pipeline_session and worker_manager else None first_stage_selection_indices = [] # Clear, we've expanded it @@ -1137,12 +1312,27 @@ def _execute_pipeline(tokens: list): if not command_expanded and first_stage_selection_indices: # FALLBACK: Item-based selection (filter piped items directly) last_piped_items = ctx.get_last_result_items() + # Align to the displayed row order so @N matches what the user sees + stage_table = ctx.get_current_stage_table() + if not stage_table and hasattr(ctx, 'get_display_table'): + stage_table = ctx.get_display_table() + if not stage_table: + stage_table = ctx.get_last_result_table() + resolved_items = _resolve_items_for_selection(stage_table, last_piped_items) + _debug_selection("first-stage", first_stage_selection_indices, stage_table, last_piped_items, resolved_items) if last_piped_items: try: - filtered = [last_piped_items[i] for i in first_stage_selection_indices if 0 <= i < len(last_piped_items)] + filtered = [resolved_items[i] for i in first_stage_selection_indices if 0 <= i < len(resolved_items)] if filtered: - piped_result = filtered if len(filtered) > 1 else filtered[0] - log_msg = f"Applied @N selection {' | '.join('@' + str(i+1) for i in first_stage_selection_indices)}" + # Convert filtered items to PipeObjects for consistent pipeline handling + from cmdlets._shared import coerce_to_pipe_object + filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered] + piped_result = filtered_pipe_objs if len(filtered_pipe_objs) > 1 else filtered_pipe_objs[0] + # Build log message with proper string conversion + selection_parts = [] + for i in first_stage_selection_indices: + selection_parts.append(f'@{i+1}') + log_msg = f"Applied @N selection {' | '.join(selection_parts)}" worker_manager.log_step(pipeline_session.worker_id, log_msg) if pipeline_session and worker_manager else None # Special case for youtube search results in fallback mode: auto-pipe to .pipe @@ -1176,7 +1366,7 @@ def _execute_pipeline(tokens: list): cmd_name = stage_tokens[0].replace("_", "-").lower() stage_args = stage_tokens[1:] - # Bare '@' means "use the subject for the current result table" (e.g., the file whose tags/URLs are shown) + # Bare '@' means "use the subject for the current result table" (e.g., the file whose tags/url are shown) if cmd_name == "@": subject = ctx.get_last_result_subject() if subject is None: @@ -1297,11 +1487,22 @@ def _execute_pipeline(tokens: list): selection_indices = sorted([i - 1 for i in selection]) else: selection_indices = [] + # Align indices to the displayed row order + stage_table = ctx.get_current_stage_table() + if not stage_table and hasattr(ctx, 'get_display_table'): + stage_table = ctx.get_display_table() + if not stage_table: + stage_table = ctx.get_last_result_table() + resolved_list = _resolve_items_for_selection(stage_table, list(piped_result_list)) + _debug_selection("pipeline-stage", selection_indices, stage_table, piped_result_list, resolved_list) try: - filtered = [piped_result_list[i] for i in selection_indices if 0 <= i < len(piped_result_list)] + filtered = [resolved_list[i] for i in selection_indices if 0 <= i < len(resolved_list)] if filtered: - piped_result = filtered if len(filtered) > 1 else filtered[0] + # Convert filtered items to PipeObjects for consistent pipeline handling + from cmdlets._shared import coerce_to_pipe_object + filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered] + piped_result = filtered_pipe_objs if len(filtered_pipe_objs) > 1 else filtered_pipe_objs[0] print(f"Selected {len(filtered)} item(s) using {cmd_name}") continue else: @@ -1324,11 +1525,7 @@ def _execute_pipeline(tokens: list): pipeline_error = f"Unknown command {cmd_name}" return - # Create pipeline context for this stage - is_last_stage = (stage_index == len(stages) - 1) - pipeline_ctx = ctx.PipelineStageContext(stage_index=stage_index, total_stages=len(stages)) - ctx.set_stage_context(pipeline_ctx) - ctx.set_active(True) + debug(f"[pipeline] Stage {stage_index}: cmd_name={cmd_name}, cmd_fn type={type(cmd_fn)}, piped_result type={type(piped_result)}, stage_args={stage_args}") # Execute the cmdlet with piped input stage_session: Optional[_WorkerStageSession] = None @@ -1348,8 +1545,18 @@ def _execute_pipeline(tokens: list): config=config, command_text=" ".join(stage_tokens), ) + + # Create pipeline context for this stage with the worker ID + is_last_stage = (stage_index == len(stages) - 1) + stage_worker_id = stage_session.worker_id if stage_session else (pipeline_session.worker_id if pipeline_session else None) + pipeline_ctx = ctx.PipelineStageContext(stage_index=stage_index, total_stages=len(stages), worker_id=stage_worker_id) + ctx.set_stage_context(pipeline_ctx) try: + if isinstance(config, dict): + config['_pipeline_remaining_after_current'] = stages[stage_index + 1:] + debug(f"[pipeline] Calling cmd_fn({type(piped_result).__name__}, {stage_args}, config)") ret_code = cmd_fn(piped_result, stage_args, config) + debug(f"[pipeline] cmd_fn returned: {ret_code} (type: {type(ret_code)})") # Store emitted results for next stage (or display if last stage) if pipeline_ctx.emits: @@ -1361,7 +1568,7 @@ def _execute_pipeline(tokens: list): # Only set source_command for search/filter commands (not display-only or action commands) # This preserves context so @N refers to the original search, not intermediate results selectable_commands = { - 'search-file', 'download-data', 'search_file', 'download_data', + 'search-file', 'download-data', 'download-media', 'search_file', 'download_data', 'download_media', '.config', '.worker' } # Display-only commands (just show data, don't modify or search) @@ -1436,7 +1643,22 @@ def _execute_pipeline(tokens: list): if not is_last_stage: stage_table_source = ctx.get_current_stage_table_source_command() row_has_selection = ctx.get_current_stage_table_row_selection_args(0) is not None - if stage_table_source and row_has_selection: + stage_table = ctx.get_current_stage_table() + + # Check if next stage is @N selection - if so, don't pause, let it process + next_stage = stages[stage_index + 1] if stage_index + 1 < len(stages) else None + next_is_selection = next_stage and next_stage[0] and next_stage[0][0].startswith('@') + + debug(f"[pipeline] Stage {stage_index} pause check: source={stage_table_source}, has_selection={row_has_selection}, table={stage_table is not None}, next_is_selection={next_is_selection}") + + if stage_table_source and row_has_selection and not next_is_selection: + # Display the table before pausing + if RESULT_TABLE_AVAILABLE and stage_table is not None: + debug(f"[pipeline] Displaying stage table with {len(stage_table.rows) if hasattr(stage_table, 'rows') else 0} rows") + print() + print(stage_table.format_plain()) + print() + pending_tail = stages[stage_index + 1:] if pending_tail and pending_tail[0] and pending_tail[0][0].startswith('@'): pending_tail = pending_tail[1:] @@ -1451,11 +1673,27 @@ def _execute_pipeline(tokens: list): pass print("Pipeline paused: select a format with @N to continue remaining stages") return + + # If the stage requested pipeline abort (e.g., queued async work), stop processing further stages + if getattr(pipeline_ctx, "abort_remaining", False): + if pipeline_session and worker_manager: + try: + worker_manager.log_step( + pipeline_session.worker_id, + f"{stage_label} queued background work; skipping remaining stages", + ) + except Exception: + pass + return if ret_code != 0: stage_status = "failed" stage_error = f"exit code {ret_code}" - print(f"[stage {stage_index} exit code: {ret_code}]\n") + # Only print exit code if it's an integer (not the cmdlet object) + if isinstance(ret_code, int): + print(f"[stage {stage_index} exit code: {ret_code}]\n") + else: + print(f"[stage {stage_index} failed]\n") if pipeline_session: pipeline_status = "failed" pipeline_error = f"{stage_label} failed ({stage_error})" @@ -1532,8 +1770,27 @@ def _execute_cmdlet(cmd_name: str, args: list): import json import pipeline as ctx + # Ensure native commands (cmdnats) are loaded + try: + from helper.cmdlet_catalog import ensure_registry_loaded as _ensure_registry_loaded + _ensure_registry_loaded() + except Exception: + pass + # Get the cmdlet function cmd_fn = REGISTRY.get(cmd_name) + if not cmd_fn: + # Attempt lazy import of the module and retry + from helper.cmdlet_catalog import import_cmd_module as _catalog_import + try: + mod = _catalog_import(cmd_name) + data = getattr(mod, "CMDLET", None) if mod else None + if data and hasattr(data, "exec") and callable(getattr(data, "exec")): + run_fn = getattr(data, "exec") + REGISTRY[cmd_name] = run_fn + cmd_fn = run_fn + except Exception: + pass if not cmd_fn: print(f"Unknown command: {cmd_name}\n") return @@ -1577,9 +1834,6 @@ def _execute_cmdlet(cmd_name: str, args: list): # Get piped items from previous command results piped_items = ctx.get_last_result_items() - pipeline_ctx = ctx.PipelineStageContext(stage_index=0, total_stages=1) - ctx.set_stage_context(pipeline_ctx) - ctx.set_active(True) # Create result object - pass full list (or filtered list if @ selection used) to cmdlet result = None @@ -1599,6 +1853,11 @@ def _execute_cmdlet(cmd_name: str, args: list): config=config, command_text=" ".join([cmd_name, *filtered_args]).strip() or cmd_name, ) + + # Create pipeline context with the worker ID + stage_worker_id = stage_session.worker_id if stage_session else None + pipeline_ctx = ctx.PipelineStageContext(stage_index=0, total_stages=1, worker_id=stage_worker_id) + ctx.set_stage_context(pipeline_ctx) stage_status = "completed" stage_error = "" @@ -1629,7 +1888,7 @@ def _execute_cmdlet(cmd_name: str, args: list): # Only set source_command for search/filter commands (not display-only or action commands) # This preserves context so @N refers to the original search, not intermediate results selectable_commands = { - 'search-file', 'download-data', 'search_file', 'download_data', + 'search-file', 'download-data', 'download-media', 'search_file', 'download_data', 'download_media', '.config', '.worker' } # Display-only commands (excluding get-tag which manages its own table) @@ -1688,7 +1947,7 @@ def _execute_cmdlet(cmd_name: str, args: list): # Store emitted items for @ selection selectable_commands = { - 'search-file', 'download-data', 'search_file', 'download_data', + 'search-file', 'download-data', 'download-media', 'search_file', 'download_data', 'download_media', '.config', '.worker' } display_only_commands = { @@ -1730,122 +1989,25 @@ def _execute_cmdlet(cmd_name: str, args: list): def _show_cmdlet_list(): """Display available cmdlets with full metadata: cmd:name alias:aliases args:args.""" try: - from cmdlets import REGISTRY - import os - - cmdlet_info = {} - base_dir = os.path.dirname(__file__) - - def _collect_cmdlets_from_dir(folder: str, package: str) -> None: - if not os.path.isdir(folder): - return - for filename in os.listdir(folder): - if filename.endswith(".py") and not filename.startswith("_") and filename != "__init__.py": - mod_name = filename[:-3] - try: - mod = import_module(f"{package}.{mod_name}") - if hasattr(mod, "CMDLET"): - cmdlet = getattr(mod, "CMDLET") - if hasattr(cmdlet, "name"): - cmd_name = cmdlet.name - aliases = getattr(cmdlet, "aliases", []) if hasattr(cmdlet, "aliases") else [] - - arg_names = [] - if hasattr(cmdlet, "args"): - for arg in cmdlet.args: - if hasattr(arg, "name"): - arg_names.append(arg.name) - elif isinstance(arg, dict): - arg_names.append(arg.get("name", "")) - if cmd_name not in cmdlet_info: - cmdlet_info[cmd_name] = { - "aliases": aliases, - "args": arg_names, - } - except Exception: - pass - - _collect_cmdlets_from_dir(os.path.join(base_dir, "cmdlets"), "cmdlets") - _collect_cmdlets_from_dir(os.path.join(base_dir, "cmdnats"), "cmdnats") - - # Also check root-level cmdlets (search_*, etc) - # Note: search_libgen, search_soulseek, and search_debrid are consolidated into search-file with providers - for mod_name in ["select_cmdlet", "unlock_link"]: - try: - mod = import_module(mod_name) - if hasattr(mod, "CMDLET"): - cmdlet = getattr(mod, "CMDLET") - if hasattr(cmdlet, "name"): - cmd_name = cmdlet.name - aliases = [] - if hasattr(cmdlet, "aliases"): - aliases = cmdlet.aliases - - # Extract argument names - arg_names = [] - if hasattr(cmdlet, "args"): - for arg in cmdlet.args: - if hasattr(arg, "name"): - arg_names.append(arg.name) - elif isinstance(arg, dict): - arg_names.append(arg.get("name", "")) - - if cmd_name not in cmdlet_info: - cmdlet_info[cmd_name] = { - "aliases": aliases, - "args": arg_names, - } - except Exception: - pass - - # Fallback: Show registry entries that we don't have full metadata for - # This ensures all registered cmdlets are shown even if they have import errors - seen_names = set() - for cmd_name in cmdlet_info.keys(): - seen_names.add(cmd_name) - - # For aliases, add them too - for cmd_name in list(cmdlet_info.keys()): - for alias in cmdlet_info[cmd_name].get("aliases", []): - seen_names.add(alias) - - # Now check registry for any missing cmdlets - for reg_name in REGISTRY.keys(): - if reg_name not in seen_names: - # Add this as a basic cmdlet entry - # Try to find a matching primary name - found_match = False - for cmd_name in cmdlet_info.keys(): - if reg_name in cmdlet_info[cmd_name].get("aliases", []): - found_match = True - break - - if not found_match: - # This is a top-level cmdlet not in our collection - cmdlet_info[reg_name] = { - "aliases": [], - "args": [], - } - + metadata = _catalog_list_cmdlet_metadata() print("\nAvailable cmdlets:") - for cmd_name in sorted(cmdlet_info.keys()): - info = cmdlet_info[cmd_name] - aliases = info["aliases"] - args = info["args"] - - # Build the display string + for cmd_name in sorted(metadata.keys()): + info = metadata[cmd_name] + aliases = info.get("aliases", []) + args = info.get("args", []) + display = f" cmd:{cmd_name}" - if aliases: - alias_str = ", ".join(aliases) - display += f" alias:{alias_str}" - + display += f" alias:{', '.join(aliases)}" if args: - args_str = ", ".join(args) - display += f" args:{args_str}" - + arg_names = [a.get("name") for a in args if a.get("name")] + if arg_names: + display += f" args:{', '.join(arg_names)}" + summary = info.get("summary") + if summary: + display += f" - {summary}" print(display) - + print() except Exception as e: print(f"Error: {e}\n") @@ -1854,22 +2016,10 @@ def _show_cmdlet_list(): def _show_cmdlet_help(cmd_name: str): """Display help for a cmdlet.""" try: - mod_name = cmd_name.replace("-", "_") - mod = _import_cmd_module(mod_name) - data = getattr(mod, "CMDLET", None) if mod else None - if data: - _print_metadata(cmd_name, data) + meta = _catalog_get_cmdlet_metadata(cmd_name) + if meta: + _print_metadata(cmd_name, meta) return - - from cmdlets import REGISTRY - cmd_fn = REGISTRY.get(cmd_name) - if cmd_fn: - owner = import_module(getattr(cmd_fn, "__module__", "")) - data = getattr(owner, "CMDLET", None) - if data: - _print_metadata(cmd_name, data) - return - print(f"Unknown command: {cmd_name}\n") except Exception as e: print(f"Error: {e}\n") @@ -1953,12 +2103,13 @@ def _parse_selection_syntax(token: str) -> Optional[Set[int]]: Returns: Set of 1-based indices (for concrete selections like @1, @2-5, @3,5,7) - None for special cases: @* (all), @.. (restore previous) + None for special cases: @* (all), @.. (restore previous), @,, (restore next) None for invalid format Special handling: - @* returns None and should be handled as "select all current items" - - @.. returns None and is handled as "restore previous table" (separate code path) + - @.. returns None and is handled as "restore previous table" (backward navigation) + - @,, returns None and is handled as "restore next table" (forward navigation) - Invalid selections like @-1 or @a return None and are treated as invalid args Examples: @@ -1968,7 +2119,8 @@ def _parse_selection_syntax(token: str) -> Optional[Set[int]]: "@2,5,6" → {2, 5, 6} "@2-5,8,10-12" → {2, 3, 4, 5, 8, 10, 11, 12} "@*" → None (caller checks token=="@*" to handle as "all") - "@.." → None (separate code path) + "@.." → None (backward navigation) + "@,," → None (forward navigation) """ if not token.startswith("@"): return None @@ -1976,8 +2128,9 @@ def _parse_selection_syntax(token: str) -> Optional[Set[int]]: selector = token[1:].strip() # Special case: @.. means restore previous result table (handled separately) + # Special case: @,, means restore next result table (handled separately) # Special case: @* means all items (should be converted to actual list by caller) - if selector in (".", "*"): + if selector in (".", ",", "*"): return None indices = set() diff --git a/DEBUG_IMPROVEMENTS_SUMMARY.md b/DEBUG_IMPROVEMENTS_SUMMARY.md new file mode 100644 index 0000000..29dee1e --- /dev/null +++ b/DEBUG_IMPROVEMENTS_SUMMARY.md @@ -0,0 +1,127 @@ +DEBUGGING IMPROVEMENTS IMPLEMENTED +================================== + +1. ENHANCED ADD-FILE DEBUG LOGGING + ================================= + + Now logs when cmdlet is executed: + - INPUT result type (list, dict, PipeObject, None, etc.) + - List length if applicable + - First item details: title, hash (first 12 chars), store + - Resolved source: path/URL, whether from Hydrus, hash value + - Error details if resolution or validation fails + + Example output: + [add-file] INPUT result type=list + [add-file] INPUT result is list with 4 items + [add-file] First item details: title=i ve been down, hash=b0780e68a2dc..., store=hydrus + [add-file] RESOLVED source: path=None, is_hydrus=True, hash=b0780e68a2dc... + [add-file] ERROR: Source validation failed for None + + This will help identify: + - Where the result is being lost + - If hash is being extracted correctly + - Which store the file comes from + +2. ENHANCED SEARCH-STORE DEBUG LOGGING + =================================== + + Now logs after building results: + - Number of table rows added + - Number of items in results_list + - WARNING if there's a mismatch + + Example output: + [search-store] Added 4 rows to table, 4 items to results_list + [search-store] WARNING: Table/items mismatch! rows=1 items=4 + + This directly debugs the "@2 selection" issue: + - Will show if table/items registration is correct + - Helps diagnose why only 1 row shows when 4 items exist + +3. ROOT CAUSE ANALYSIS: "@2 SELECTION FAILED" + ========================================== + + Your debug output showed: + [debug] first-stage: sel=[1] rows=1 items=4 + + This means: + - search-store found 4 results + - But only 1 row registered in table for selection + - User selected @2 (index 1) which is valid (0-4) + - But table only had 1 row, so selection was out of bounds + + The mismatch is between: + - What's displayed to the user (seems like 4 rows based on output) + - What's registered for @N selection (only 1 row) + + With the new debug logging, running the same command will show: + [search-store] Added X rows to table, Y items to results_list + + If X=1 and Y=4, then search-store isn't adding all results to the table + If X=4 and Y=4, then the issue is in CLI selection logic + +4. NEXT DEBUGGING STEPS + =================== + + To diagnose the "@2 selection" issue: + + 1. Run: search-store system:limit=5 + 2. Look for: [search-store] Added X rows... + 3. Compare X to number of rows shown in table + 4. If X < display_rows: Problem is in table.add_result() + 5. If X == display_rows: Problem is in CLI selection mapping + + After running add-file: + + 1. Run: @2 | add-file -storage test + 2. Look for: [add-file] INPUT result details + 3. Check if hash, title, and store are extracted + 4. If missing: Problem is in result object structure + 5. If present: Problem is in _resolve_source() logic + +5. ARCHITECTURE DECISION: EXPORT-STORE CMDLET + ========================================== + + Recommendation: DO NOT CREATE EXPORT-STORE + + Reason: get-file already provides this functionality + + get-file: + - Takes hash + store name + - Retrieves from any backend (Folder, HydrusNetwork, Remote, etc.) + - Exports to specified path + - Works for all storage types + - Already tested and working + + Example workflow for moving files between stores: + $ search-store -store home | get-file -path /tmp | add-file -storage test + + This is cleaner than having specialized export-store cmdlet + +6. FUTURE IMPROVEMENTS + =================== + + Based on findings: + + a) Update search-store to show specific instance names + Currently: store="hydrus" + Should be: store="home" or store="work" + Implementation: Use FileStorage to detect which instance + + b) Fix selection/table registration validation + Add assertion: len(table.rows) == len(results_list) + Fail fast if mismatch detected + + c) Enhance add-file to handle Hydrus imports + Current: Needs file path on local filesystem + Future: Should support add-file -hash -store home + This would copy from one Hydrus instance to another + +SUMMARY +======= + +✓ Better debug logging in add-file and search-store +✓ Root cause identified for "@2 selection" issue +✓ Confirmed get-file is sufficient (no export-store needed) +✓ Path forward: Use new logging to identify exact failure point diff --git a/HASH_STORE_PRIORITY_PATTERN.md b/HASH_STORE_PRIORITY_PATTERN.md new file mode 100644 index 0000000..d8245a7 --- /dev/null +++ b/HASH_STORE_PRIORITY_PATTERN.md @@ -0,0 +1,222 @@ +# Hash+Store Priority Pattern & Database Connection Fixes + +## Summary of Changes + +### 1. Database Connection Leak Fixes ✅ + +**Problem:** FolderDB connections were not being properly closed, causing database locks and resource leaks. + +**Files Fixed:** +- `cmdlets/search_store.py` - Now uses `with FolderDB()` context manager +- `cmdlets/search_provider.py` - Now uses `with FolderDB()` context manager +- `helper/store.py` (Folder.__init__) - Now uses `with FolderDB()` for temporary connections +- `helper/worker_manager.py` - Added `close()` method and context manager support (`__enter__`/`__exit__`) + +**Pattern:** +```python +# OLD (leaked connections): +db = FolderDB(path) +try: + db.do_something() +finally: + if db: + db.close() # Could be skipped if exception occurs early + +# NEW (guaranteed cleanup): +with FolderDB(path) as db: + db.do_something() +# Connection automatically closed when exiting block +``` + +### 2. Hash+Store Priority Pattern ✅ + +**Philosophy:** The hash+store pair is the **canonical identifier** for files across all storage backends. Sort order and table structure should not matter because we're always using hash+store. + +**Why This Matters:** +- `@N` selections pass hash+store from search results +- Hash+store works consistently across all backends (Hydrus, Folder, Remote) +- Path-based resolution is fragile (files move, temp paths expire, etc.) +- Hash+store never changes and uniquely identifies content + +**Updated Resolution Priority in `add_file.py`:** + +```python +def _resolve_source(result, path_arg, pipe_obj, config): + """ + PRIORITY 1: hash+store from result dict (most reliable for @N selections) + - Checks result.get("hash") and result.get("store") + - Uses FileStorage[store].get_file(hash) to retrieve + - Works for: Hydrus, Folder, Remote backends + + PRIORITY 2: Explicit -path argument + - Direct path specified by user + + PRIORITY 3: pipe_obj.file_path + - Legacy path from previous pipeline stage + + PRIORITY 4: Hydrus hash from pipe_obj.extra + - Fallback for older Hydrus workflows + + PRIORITY 5: String/list result parsing + - Last resort for simple string paths + """ +``` + +**Example Flow:** +```bash +# User searches and selects result +$ search-store system:limit=5 + +# Result items include: +{ + "hash": "a1b2c3d4...", + "store": "home", # Specific Hydrus instance + "title": "example.mp4" +} + +# User selects @2 (index 1) +$ @2 | add-file -storage test + +# add-file now: +1. Extracts hash="a1b2c3d4..." store="home" from result dict +2. Calls FileStorage["home"].get_file("a1b2c3d4...") +3. Retrieves actual file path from "home" backend +4. Proceeds with copy/upload to "test" storage +``` + +### 3. Benefits of This Approach + +**Consistency:** +- @N selection always uses the same hash+store regardless of display order +- No confusion about which row index maps to which file +- Table synchronization issues (rows vs items) don't break selection + +**Reliability:** +- Hash uniquely identifies content (SHA256 collision is effectively impossible) +- Store identifies the authoritative source backend +- No dependency on temporary paths or file locations + +**Multi-Instance Support:** +- Works seamlessly with multiple Hydrus instances ("home", "work") +- Works with mixed backends (Hydrus + Folder + Remote) +- Each backend can independently retrieve file by hash + +**Debugging:** +- Hash+store are visible in debug logs: `[add-file] Using hash+store: hash=a1b2c3d4..., store=home` +- Easy to trace which backend is being queried +- Clear error messages when hash+store lookup fails + +## How @N Selection Works Now + +### Selection Process: + +1. **Search creates result list with hash+store:** + ```python + results_list = [ + {"hash": "abc123...", "store": "home", "title": "file1.mp4"}, + {"hash": "def456...", "store": "default", "title": "file2.jpg"}, + {"hash": "ghi789...", "store": "test", "title": "file3.png"}, + ] + ``` + +2. **User selects @2 (second item, index 1):** + - CLI extracts: `result = {"hash": "def456...", "store": "default", "title": "file2.jpg"}` + - Passes this dict to the next cmdlet + +3. **Next cmdlet receives dict with hash+store:** + ```python + def run(self, result, args, config): + # result is the dict from selection + file_hash = result.get("hash") # "def456..." + store_name = result.get("store") # "default" + + # Use hash+store to retrieve file + backend = FileStorage(config)[store_name] + file_path = backend.get_file(file_hash) + ``` + +### Why This is Better Than Path-Based: + +**Path-Based (OLD):** +```python +# Fragile: path could be temp file, symlink, moved file, etc. +result = {"file_path": "/tmp/hydrus-abc123.mp4"} +# What if file was moved? What if it's a temp path that expires? +``` + +**Hash+Store (NEW):** +```python +# Reliable: hash+store always works regardless of current location +result = {"hash": "abc123...", "store": "home"} +# Backend retrieves current location from its database/API +``` + +## Testing the Fixes + +### 1. Test Database Connections: + +```powershell +# Search multiple times and check for database locks +search-store system:limit=5 +search-store system:limit=5 +search-store system:limit=5 + +# Should complete without "database is locked" errors +``` + +### 2. Test Hash+Store Selection: + +```powershell +# Search and select +search-store system:limit=5 +@2 | get-metadata + +# Should show metadata for the selected file using hash+store +# Debug log should show: [add-file] Using hash+store from result: hash=... +``` + +### 3. Test WorkerManager Cleanup: + +```powershell +# In Python script: +from helper.worker_manager import WorkerManager +from pathlib import Path + +with WorkerManager(Path("C:/path/to/library")) as wm: + # Do work + pass +# Database automatically closed when exiting block +``` + +## Cmdlets That Already Use Hash+Store Pattern + +These cmdlets already correctly extract hash+store: +- ✅ `get-file` - Export file via hash+store +- ✅ `get-metadata` - Retrieve metadata via hash+store +- ✅ `get-url` - Get url via hash+store +- ✅ `get-tag` - Get tags via hash+store +- ✅ `add-url` - Add URL via hash+store +- ✅ `delete-url` - Delete URL via hash+store +- ✅ `add-file` - **NOW UPDATED** to prioritize hash+store + +## Future Improvements + +1. **Make hash+store mandatory in result dicts:** + - All search cmdlets should emit hash+store + - Validate that result dicts include these fields + +2. **Add hash+store validation:** + - Warn if hash is not 64-char hex string + - Warn if store is not a registered backend + +3. **Standardize error messages:** + - "File not found via hash+store: hash=abc123 store=home" + - Makes debugging much clearer + +4. **Consider deprecating path-based workflows:** + - Migrate legacy cmdlets to hash+store pattern + - Remove path-based fallbacks once all cmdlets updated + +## Key Takeaway + +**The hash+store pair is now the primary way to identify and retrieve files across the entire system.** This makes the codebase more reliable, consistent, and easier to debug. Database connections are properly cleaned up to prevent locks and resource leaks. diff --git a/MODELS_REFACTOR_SUMMARY.md b/MODELS_REFACTOR_SUMMARY.md new file mode 100644 index 0000000..0283f3a --- /dev/null +++ b/MODELS_REFACTOR_SUMMARY.md @@ -0,0 +1,127 @@ +# Models.py Refactoring Summary + +## Overview +Refactored `models.py` PipeObject class to align with the hash+store canonical pattern, removing all backwards compatibility and legacy code. + +## PipeObject Changes + +### Removed Legacy Fields +- ❌ `source` - Replaced with `store` (storage backend name) +- ❌ `identifier` - Replaced with `hash` (SHA-256 hash) +- ❌ `file_hash` - Replaced with `hash` (canonical field) +- ❌ `remote_metadata` - Removed (can go in metadata dict or extra) +- ❌ `mpv_metadata` - Removed (can go in metadata dict or extra) +- ❌ `king_hash` - Moved to relationships dict +- ❌ `alt_hashes` - Moved to relationships dict +- ❌ `related_hashes` - Moved to relationships dict +- ❌ `parent_id` - Renamed to `parent_hash` for consistency + +### New Canonical Fields +```python +@dataclass(slots=True) +class PipeObject: + hash: str # SHA-256 hash (canonical identifier) + store: str # Storage backend name (e.g., 'default', 'hydrus', 'test') + tags: List[str] + title: Optional[str] + source_url: Optional[str] + duration: Optional[float] + metadata: Dict[str, Any] + warnings: List[str] + file_path: Optional[str] + relationships: Dict[str, Any] # Contains king/alt/related + is_temp: bool + action: Optional[str] + parent_hash: Optional[str] # Renamed from parent_id + extra: Dict[str, Any] +``` + +### Updated Methods + +#### Removed +- ❌ `register_as_king(file_hash)` - Replaced with `add_relationship()` +- ❌ `add_alternate(alt_hash)` - Replaced with `add_relationship()` +- ❌ `add_related(related_hash)` - Replaced with `add_relationship()` +- ❌ `@property hash` - Now a direct field +- ❌ `as_dict()` - Removed backwards compatibility alias +- ❌ `to_serializable()` - Removed backwards compatibility alias + +#### Added/Updated +- ✅ `add_relationship(rel_type, rel_hash)` - Generic relationship management +- ✅ `get_relationships()` - Returns copy of relationships dict +- ✅ `to_dict()` - Updated to serialize new fields + +## Updated Files + +### cmdlets/_shared.py +- Updated `coerce_to_pipe_object()` to use hash+store pattern +- Now computes hash from file_path if not provided +- Extracts relationships dict instead of individual king/alt/related fields +- Removes all references to source/identifier/file_hash + +### cmdlets/add_file.py +- Updated `_update_pipe_object_destination()` signature to use hash/store +- Updated `_resolve_source()` to use pipe_obj.hash +- Updated `_prepare_metadata()` to use pipe_obj.hash +- Updated `_resolve_file_hash()` to check pipe_obj.hash +- Updated all call sites to pass hash/store instead of source/identifier/file_hash + +### cmdlets/add_tag.py & cmdlets/add_tags.py +- Updated to access `res.hash` instead of `res.file_hash` +- Updated dict access to use `get('hash')` instead of `get('file_hash')` + +### cmdlets/trim_file.py +- Updated to access `item.hash` instead of `item.file_hash` +- Updated dict access to use `get('hash')` only + +### metadata.py +- Updated IMDb, MusicBrainz, and OpenLibrary tag extraction to return dicts directly +- Removed PipeObject instantiation with old signature (source/identifier) +- Updated remote metadata function to return dict instead of using PipeObject + +## Benefits + +1. **Canonical Pattern**: All file operations now use hash+store as the single source of truth +2. **Simplified Model**: Removed 9 legacy fields, consolidated into 2 canonical fields + relationships dict +3. **Consistency**: All cmdlets now use the same hash+store pattern for identification +4. **Maintainability**: One code path, no backwards compatibility burden +5. **Type Safety**: Direct fields instead of computed properties +6. **Flexibility**: Relationships dict allows for extensible relationship types + +## Migration Notes + +### Old Code +```python +pipe_obj = PipeObject( + source="hydrus", + identifier=file_hash, + file_hash=file_hash, + king_hash=king, + alt_hashes=[alt1, alt2] +) +``` + +### New Code +```python +pipe_obj = PipeObject( + hash=file_hash, + store="hydrus", + relationships={ + "king": king, + "alt": [alt1, alt2] + } +) +``` + +### Accessing Fields +| Old | New | +|-----|-----| +| `obj.file_hash` | `obj.hash` | +| `obj.source` | `obj.store` | +| `obj.identifier` | `obj.hash` | +| `obj.king_hash` | `obj.relationships.get("king")` | +| `obj.alt_hashes` | `obj.relationships.get("alt", [])` | +| `obj.parent_id` | `obj.parent_hash` | + +## Zero Backwards Compatibility +As requested, **all backwards compatibility has been removed**. Old code using the previous PipeObject signature will need to be updated to use hash+store. diff --git a/NEXT_DEBUG_SESSION.md b/NEXT_DEBUG_SESSION.md new file mode 100644 index 0000000..b6470bc --- /dev/null +++ b/NEXT_DEBUG_SESSION.md @@ -0,0 +1,79 @@ +NEXT DEBUGGING SESSION +====================== + +Run these commands in sequence and watch the [add-file] and [search-store] debug logs: + +Step 1: Search and observe table/items mismatch +------ +$ search-store system:limit=5 + +Expected output: +- Should see your 4 items in the table +- Watch for: [search-store] Added X rows to table, Y items to results_list +- If X=1 and Y=4: Problem is in table.add_result() or _ensure_storage_columns() +- If X=4 and Y=4: Problem is in CLI selection mapping (elsewhere) + +Step 2: Test selection with debugging +------ +$ @2 | add-file -storage test + +Expected output: +- [add-file] INPUT result details should show the item you selected +- [add-file] RESOLVED source should have hash and store +- If either is missing/wrong: result object structure is wrong +- If both are correct: problem is in source resolution logic + +Step 3: If selection works +------ +If you successfully select @2 and add-file processes it: +- Congratulations! The issue was a one-time glitch +- If it fails again, compare debug logs to this run + +Step 4: If selection still fails +------ +Collect these logs: +1. Output of: search-store system:limit=5 +2. Output of: @2 | add-file -storage test +3. Run diagnostic command to verify table state: + $ search-store system:limit=5 | .pipe + (This will show what .pipe sees in the results) + +Step 5: Understanding @N selection format +------ +When you see: [debug] first-stage: sel=[1] rows=1 items=4 +- sel=[1] means you selected @2 (0-based index: @2 = index 1) +- rows=1 means the table object has only 1 row registered +- items=4 means there are 4 items in the results_list + +The fix depends on which is wrong: +- If rows should be 4: table.add_result() isn't adding rows +- If items should be 1: results are being duplicated somehow + +QUICK REFERENCE: DEBUGGING COMMANDS +=================================== + +Show debug logs: +$ debug on +$ search-store system:limit=5 +$ @2 | add-file -storage test + +Check what @2 selection resolves to: +$ @2 | get-metadata + +Alternative (bypass @N selection issue): +$ search-store system:limit=5 | get-metadata -store home | .pipe + +This avoids the @N selection and directly pipes results through cmdlets. + +EXPECTED BEHAVIOR +================ + +Correct sequence when selection works: +1. search-store finds 4 results +2. [search-store] Added 4 rows to table, 4 items to results_list +3. @2 selects item at index 1 (second item: "i ve been down") +4. [add-file] INPUT result is dict: title=i ve been down, hash=b0780e68a2dc..., store=hydrus +5. [add-file] RESOLVED source: path=/tmp/medios-hydrus/..., is_hydrus=True, hash=b0780e68a2dc... +6. File is successfully added to "test" storage + +If you see different output, the logs will show exactly where it diverges. diff --git a/PIPELINE_REFACTOR_SUMMARY.md b/PIPELINE_REFACTOR_SUMMARY.md new file mode 100644 index 0000000..031dbdc --- /dev/null +++ b/PIPELINE_REFACTOR_SUMMARY.md @@ -0,0 +1,127 @@ +# Pipeline Refactoring Summary + +## Overview +Refactored `pipeline.py` to remove all backwards compatibility and legacy code, consolidating on a single modern context-based approach using `PipelineStageContext`. + +## Changes Made + +### 1. Removed Legacy Global Variables +- ❌ `_PIPE_EMITS` - Replaced with `PipelineStageContext.emits` +- ❌ `_PIPE_ACTIVE` - Replaced with checking `_CURRENT_CONTEXT is not None` +- ❌ `_PIPE_IS_LAST` - Replaced with `PipelineStageContext.is_last_stage` +- ❌ `_LAST_PIPELINE_CAPTURE` - Removed (unused ephemeral handoff) + +### 2. Removed Legacy Functions +- ❌ `set_active(bool)` - No longer needed, context tracks this +- ❌ `set_last_stage(bool)` - No longer needed, context tracks this +- ❌ `set_last_capture(obj)` - Removed +- ❌ `get_last_capture()` - Removed + +### 3. Updated Core Functions + +#### `emit(obj)` +**Before:** Dual-path with fallback to legacy `_PIPE_EMITS` +```python +if _CURRENT_CONTEXT is not None: + _CURRENT_CONTEXT.emit(obj) + return +_PIPE_EMITS.append(obj) # Legacy fallback +``` + +**After:** Single context-based path +```python +if _CURRENT_CONTEXT is not None: + _CURRENT_CONTEXT.emit(obj) +``` + +#### `emit_list(objects)` +**Before:** Dual-path with legacy fallback +**After:** Single context-based path, removed duplicate definition + +#### `print_if_visible()` +**Before:** Checked `_PIPE_ACTIVE` and `_PIPE_IS_LAST` +```python +should_print = (not _PIPE_ACTIVE) or _PIPE_IS_LAST +``` + +**After:** Uses context state +```python +should_print = (_CURRENT_CONTEXT is None) or (_CURRENT_CONTEXT.is_last_stage) +``` + +#### `get_emitted_items()` +**Before:** Returned `_PIPE_EMITS` +**After:** Returns `_CURRENT_CONTEXT.emits` if context exists + +#### `clear_emits()` +**Before:** Cleared global `_PIPE_EMITS` +**After:** Clears `_CURRENT_CONTEXT.emits` if context exists + +#### `reset()` +**Before:** Reset 10+ legacy variables +**After:** Only resets active state variables, sets `_CURRENT_CONTEXT = None` + +### 4. Updated Call Sites + +#### TUI/pipeline_runner.py +**Before:** +```python +ctx.set_stage_context(pipeline_ctx) +ctx.set_active(True) +ctx.set_last_stage(index == total - 1) +# ... +ctx.set_stage_context(None) +ctx.set_active(False) +``` + +**After:** +```python +ctx.set_stage_context(pipeline_ctx) +# ... +ctx.set_stage_context(None) +``` + +#### CLI.py (2 locations) +**Before:** +```python +ctx.set_stage_context(pipeline_ctx) +ctx.set_active(True) +``` + +**After:** +```python +ctx.set_stage_context(pipeline_ctx) +``` + +## Result + +### Code Reduction +- Removed ~15 lines of legacy global variable declarations +- Removed ~30 lines of legacy function definitions +- Removed ~10 lines of dual-path logic in core functions +- Removed ~8 lines of redundant function calls at call sites + +### Benefits +1. **Single Source of Truth**: All pipeline state is now in `PipelineStageContext` +2. **Cleaner API**: No redundant `set_active()` / `set_last_stage()` calls needed +3. **Type Safety**: Context object provides better type hints and IDE support +4. **Maintainability**: One code path to maintain, no backwards compatibility burden +5. **Clarity**: Intent is clear - context manages all stage-related state + +## Preserved Functionality +All user-facing functionality remains unchanged: +- ✅ @N selection syntax +- ✅ Result table history (@.. and @,,) +- ✅ Display overlays +- ✅ Pipeline value storage/retrieval +- ✅ Worker attribution +- ✅ UI refresh callbacks +- ✅ Pending pipeline tail preservation + +## Type Checking Notes +Some type checker warnings remain about accessing attributes on Optional types (e.g., `_LAST_RESULT_TABLE.source_command`). These are safe because: +1. Code uses `_is_selectable_table()` runtime checks before access +2. Functions check `is not None` before attribute access +3. These warnings are false positives from static analysis + +These do not represent actual runtime bugs. diff --git a/README.md b/README.md index 4565545..721a101 100644 --- a/README.md +++ b/README.md @@ -38,8 +38,32 @@ Adding your first file .pipe "https://www.youtube.com/watch?v=_23dFb50Z2Y" # Add URL to current playlist ``` +Example pipelines: + +1. **Simple download with metadata (tags and URL registration)**: +``` +download-media "https://www.youtube.com/watch?v=dQw4w9WgXcQ" | add-file -storage local | add-url +``` + +2. **Download playlist item with tags**: +``` +download-media "https://www.youtube.com/playlist?list=PLxxxxx" -item 2 | add-file -storage local | add-url +``` + +3. **Download with merge (e.g., Bandcamp albums)**: +``` +download-data "https://altrusiangrace.bandcamp.com/album/ancient-egyptian-legends-full-audiobook" | merge-file | add-file -storage local | add-url +``` + +4. **Download direct file (PDF, document)**: +``` +download-file "https://example.com/file.pdf" | add-file -storage local | add-url +``` + +Search examples: + 1. search-file -provider youtube "something in the way" 2. @1 -1. download-data "https://altrusiangrace.bandcamp.com/album/ancient-egyptian-legends-full-audiobook" | merge-file | add-file -storage local \ No newline at end of file +3. download-media [URL] | add-file -storage local | add-url \ No newline at end of file diff --git a/TUI/modalscreen/access.py b/TUI/modalscreen/access.py index 1f00cf8..c65b258 100644 --- a/TUI/modalscreen/access.py +++ b/TUI/modalscreen/access.py @@ -1,4 +1,4 @@ -"""Modal for displaying files/URLs to access in web mode.""" +"""Modal for displaying files/url to access in web mode.""" from textual.screen import ModalScreen from textual.containers import Container, Vertical, Horizontal @@ -93,7 +93,7 @@ class AccessModal(ModalScreen): yield Label("[bold cyan]File:[/bold cyan]", classes="access-label") # Display as clickable link using HTML link element for web mode - # Rich link markup `[link=URL]` has parsing issues with URLs containing special chars + # Rich link markup `[link=URL]` has parsing issues with url containing special chars # Instead, use the HTML link markup that Textual-serve renders as tag # Format: [link=URL "tooltip"]text[/link] - the quotes help with parsing link_text = f'[link="{self.item_content}"]Open in Browser[/link]' diff --git a/TUI/modalscreen/download.py b/TUI/modalscreen/download.py index 32cb763..ca2fe86 100644 --- a/TUI/modalscreen/download.py +++ b/TUI/modalscreen/download.py @@ -233,8 +233,8 @@ class DownloadModal(ModalScreen): self.screenshot_checkbox.value = False self.playlist_merge_checkbox.value = False - # Initialize PDF playlist URLs (set by _handle_pdf_playlist) - self.pdf_urls = [] + # Initialize PDF playlist url (set by _handle_pdf_playlist) + self.pdf_url = [] self.is_pdf_playlist = False # Hide playlist by default (show format select) @@ -288,10 +288,10 @@ class DownloadModal(ModalScreen): # Launch the background worker with PDF playlist info self._submit_worker(url, tags, source, download_enabled, playlist_selection, merge_enabled, - is_pdf_playlist=self.is_pdf_playlist, pdf_urls=self.pdf_urls if self.is_pdf_playlist else []) + is_pdf_playlist=self.is_pdf_playlist, pdf_url=self.pdf_url if self.is_pdf_playlist else []) @work(thread=True) - def _submit_worker(self, url: str, tags: list, source: str, download_enabled: bool, playlist_selection: str = "", merge_enabled: bool = False, is_pdf_playlist: bool = False, pdf_urls: Optional[list] = None) -> None: + def _submit_worker(self, url: str, tags: list, source: str, download_enabled: bool, playlist_selection: str = "", merge_enabled: bool = False, is_pdf_playlist: bool = False, pdf_url: Optional[list] = None) -> None: """Background worker to execute the cmdlet pipeline. Args: @@ -302,10 +302,10 @@ class DownloadModal(ModalScreen): playlist_selection: Playlist track selection (e.g., "1-3", "all", "merge") merge_enabled: Whether to merge playlist files after download is_pdf_playlist: Whether this is a PDF pseudo-playlist - pdf_urls: List of PDF URLs if is_pdf_playlist is True + pdf_url: List of PDF url if is_pdf_playlist is True """ - if pdf_urls is None: - pdf_urls = [] + if pdf_url is None: + pdf_url = [] # Initialize worker to None so outer exception handler can check it worker = None @@ -340,9 +340,9 @@ class DownloadModal(ModalScreen): worker.log_step("Download initiated") # Handle PDF playlist specially - if is_pdf_playlist and pdf_urls: - logger.info(f"Processing PDF playlist with {len(pdf_urls)} PDFs") - self._handle_pdf_playlist_download(pdf_urls, tags, playlist_selection, merge_enabled) + if is_pdf_playlist and pdf_url: + logger.info(f"Processing PDF playlist with {len(pdf_url)} PDFs") + self._handle_pdf_playlist_download(pdf_url, tags, playlist_selection, merge_enabled) self.app.call_from_thread(self._hide_progress) self.app.call_from_thread(self.dismiss) return @@ -690,7 +690,7 @@ class DownloadModal(ModalScreen): 'media_kind': 'audio', 'hash_hex': None, 'hash': None, - 'known_urls': [], + 'url': [], 'title': filepath_obj.stem })() files_to_merge.append(file_result) @@ -934,8 +934,8 @@ class DownloadModal(ModalScreen): """Scrape metadata from URL(s) in URL textarea - wipes tags and source. This is triggered by Ctrl+T when URL textarea is focused. - Supports single URL or multiple URLs (newline/comma-separated). - For multiple PDF URLs, creates pseudo-playlist for merge workflow. + Supports single URL or multiple url (newline/comma-separated). + For multiple PDF url, creates pseudo-playlist for merge workflow. """ try: text = self.paragraph_textarea.text.strip() @@ -943,29 +943,29 @@ class DownloadModal(ModalScreen): logger.warning("No URL to scrape metadata from") return - # Parse multiple URLs (newline or comma-separated) - urls = [] + # Parse multiple url (newline or comma-separated) + url = [] for line in text.split('\n'): line = line.strip() if line: - # Handle comma-separated URLs within a line + # Handle comma-separated url within a line for url in line.split(','): url = url.strip() if url: - urls.append(url) + url.append(url) - # Check if multiple URLs provided - if len(urls) > 1: - logger.info(f"Detected {len(urls)} URLs - checking for PDF pseudo-playlist") - # Check if all URLs appear to be PDFs - all_pdfs = all(url.endswith('.pdf') or 'pdf' in url.lower() for url in urls) + # Check if multiple url provided + if len(url) > 1: + logger.info(f"Detected {len(url)} url - checking for PDF pseudo-playlist") + # Check if all url appear to be PDFs + all_pdfs = all(url.endswith('.pdf') or 'pdf' in url.lower() for url in url) if all_pdfs: - logger.info(f"All URLs are PDFs - creating pseudo-playlist") - self._handle_pdf_playlist(urls) + logger.info(f"All url are PDFs - creating pseudo-playlist") + self._handle_pdf_playlist(url) return # Single URL - proceed with normal metadata scraping - url = urls[0] if urls else text.strip() + url = url[0] if url else text.strip() logger.info(f"Scraping fresh metadata from: {url}") # Check if tags are already provided in textarea @@ -1044,21 +1044,21 @@ class DownloadModal(ModalScreen): ) - def _handle_pdf_playlist(self, pdf_urls: list) -> None: - """Handle multiple PDF URLs as a pseudo-playlist. + def _handle_pdf_playlist(self, pdf_url: list) -> None: + """Handle multiple PDF url as a pseudo-playlist. Creates a playlist-like structure with PDF metadata for merge workflow. Extracts title from URL or uses default naming. Args: - pdf_urls: List of PDF URLs to process + pdf_url: List of PDF url to process """ try: - logger.info(f"Creating PDF pseudo-playlist with {len(pdf_urls)} items") + logger.info(f"Creating PDF pseudo-playlist with {len(pdf_url)} items") - # Create playlist items from PDF URLs + # Create playlist items from PDF url playlist_items = [] - for idx, url in enumerate(pdf_urls, 1): + for idx, url in enumerate(pdf_url, 1): # Extract filename from URL for display try: # Get filename from URL path @@ -1083,15 +1083,15 @@ class DownloadModal(ModalScreen): # Build minimal metadata structure for UI population metadata = { - 'title': f'{len(pdf_urls)} PDF Documents', + 'title': f'{len(pdf_url)} PDF Documents', 'tags': [], 'formats': [('pdf', 'pdf')], # Default format is PDF 'playlist_items': playlist_items, 'is_pdf_playlist': True # Mark as PDF pseudo-playlist } - # Store URLs for later use during merge - self.pdf_urls = pdf_urls + # Store url for later use during merge + self.pdf_url = pdf_url self.is_pdf_playlist = True # Populate the modal with metadata @@ -1099,7 +1099,7 @@ class DownloadModal(ModalScreen): self._populate_from_metadata(metadata, wipe_tags_and_source=True) self.app.notify( - f"Loaded {len(pdf_urls)} PDFs as playlist", + f"Loaded {len(pdf_url)} PDFs as playlist", title="PDF Playlist", severity="information", timeout=3 @@ -1115,11 +1115,11 @@ class DownloadModal(ModalScreen): ) - def _handle_pdf_playlist_download(self, pdf_urls: list, tags: list, selection: str, merge_enabled: bool) -> None: + def _handle_pdf_playlist_download(self, pdf_url: list, tags: list, selection: str, merge_enabled: bool) -> None: """Download and merge PDF playlist. Args: - pdf_urls: List of PDF URLs to download + pdf_url: List of PDF url to download tags: Tags to apply to the merged PDF selection: Selection string like "1-3" or "1,3,5" merge_enabled: Whether to merge the PDFs @@ -1141,7 +1141,7 @@ class DownloadModal(ModalScreen): # Create temporary list of playlist items for selection parsing # We need this because _parse_playlist_selection uses self.playlist_items temp_items = [] - for url in pdf_urls: + for url in pdf_url: temp_items.append({'title': url}) self.playlist_items = temp_items @@ -1149,20 +1149,20 @@ class DownloadModal(ModalScreen): selected_indices = self._parse_playlist_selection(selection) if not selected_indices: # No valid selection, use all - selected_indices = list(range(len(pdf_urls))) + selected_indices = list(range(len(pdf_url))) - selected_urls = [pdf_urls[i] for i in selected_indices] + selected_url = [pdf_url[i] for i in selected_indices] - logger.info(f"Downloading {len(selected_urls)} selected PDFs for merge") + logger.info(f"Downloading {len(selected_url)} selected PDFs for merge") # Download PDFs to temporary directory temp_dir = Path.home() / ".downlow_temp_pdfs" temp_dir.mkdir(exist_ok=True) downloaded_files = [] - for idx, url in enumerate(selected_urls, 1): + for idx, url in enumerate(selected_url, 1): try: - logger.info(f"Downloading PDF {idx}/{len(selected_urls)}: {url}") + logger.info(f"Downloading PDF {idx}/{len(selected_url)}: {url}") response = requests.get(url, timeout=30) response.raise_for_status() @@ -1619,7 +1619,7 @@ class DownloadModal(ModalScreen): ) return else: - success_msg = "✅ download-data completed successfully" + success_msg = "download-data completed successfully" logger.info(success_msg) if worker: worker.append_stdout(f"{success_msg}\n") @@ -1670,7 +1670,7 @@ class DownloadModal(ModalScreen): worker.append_stdout(f"{warning_msg}\n") else: if worker: - worker.append_stdout("✅ Tags applied successfully\n") + worker.append_stdout("Tags applied successfully\n") except Exception as e: error_msg = f"❌ Tagging error: {e}" logger.error(error_msg, exc_info=True) @@ -1684,7 +1684,7 @@ class DownloadModal(ModalScreen): worker.append_stdout(f"{warning_msg}\n") else: if worker: - worker.append_stdout("✅ Download complete (no tags to apply)\n") + worker.append_stdout("Download complete (no tags to apply)\n") def _show_format_select(self) -> None: """Show format select (always visible for single files).""" @@ -1770,9 +1770,9 @@ class DownloadModal(ModalScreen): # Namespaces to exclude (metadata-only, not user-facing) excluded_namespaces = { 'hash', # Hash values (internal) - 'known_url', # URLs (internal) + 'url', # url (internal) 'relationship', # Internal relationships - 'url', # URLs (internal) + 'url', # url (internal) } # Add all other tags diff --git a/TUI/modalscreen/export.py b/TUI/modalscreen/export.py index 4a5731e..104482a 100644 --- a/TUI/modalscreen/export.py +++ b/TUI/modalscreen/export.py @@ -350,9 +350,9 @@ class ExportModal(ModalScreen): if tag: export_tags.add(tag) - # For Hydrus export, filter out metadata-only tags (hash:, known_url:, relationship:) + # For Hydrus export, filter out metadata-only tags (hash:, url:, relationship:) if export_to == "libraries" and library == "hydrus": - metadata_prefixes = {'hash:', 'known_url:', 'relationship:'} + metadata_prefixes = {'hash:', 'url:', 'relationship:'} export_tags = {tag for tag in export_tags if not any(tag.lower().startswith(prefix) for prefix in metadata_prefixes)} logger.info(f"Filtered tags for Hydrus - removed metadata tags, {len(export_tags)} tags remaining") @@ -404,9 +404,9 @@ class ExportModal(ModalScreen): metadata = self.result_data.get('metadata', {}) # Extract file source info from result_data (passed by hub-ui) - file_hash = self.result_data.get('file_hash') - file_url = self.result_data.get('file_url') - file_path = self.result_data.get('file_path') # For local files + file_hash = self.result_data.get('hash') or self.result_data.get('file_hash') + file_url = self.result_data.get('url') or self.result_data.get('file_url') + file_path = self.result_data.get('path') or self.result_data.get('file_path') # For local files source = self.result_data.get('source', 'unknown') # Prepare export data @@ -419,8 +419,11 @@ class ExportModal(ModalScreen): 'format': file_format, 'metadata': metadata, 'original_data': self.result_data, + 'hash': file_hash, 'file_hash': file_hash, + 'url': file_url, 'file_url': file_url, + 'path': file_path, 'file_path': file_path, # Pass file path for local files 'source': source, } diff --git a/TUI/modalscreen/search.py b/TUI/modalscreen/search.py index fcfdea1..a180e11 100644 --- a/TUI/modalscreen/search.py +++ b/TUI/modalscreen/search.py @@ -16,7 +16,7 @@ import asyncio sys.path.insert(0, str(Path(__file__).parent.parent)) from config import load_config from result_table import ResultTable -from helper.search_provider import get_provider +from helper.provider import get_provider logger = logging.getLogger(__name__) @@ -183,7 +183,7 @@ class SearchModal(ModalScreen): else: # Fallback if no columns defined row.add_column("Title", res.title) - row.add_column("Target", res.target) + row.add_column("Target", getattr(res, 'path', None) or getattr(res, 'url', None) or getattr(res, 'target', None) or '') self.current_result_table = table diff --git a/TUI/pipeline_runner.py b/TUI/pipeline_runner.py index d6c5904..c38b5ea 100644 --- a/TUI/pipeline_runner.py +++ b/TUI/pipeline_runner.py @@ -197,8 +197,6 @@ class PipelineExecutor: pipeline_ctx = ctx.PipelineStageContext(stage_index=index, total_stages=total) ctx.set_stage_context(pipeline_ctx) - ctx.set_active(True) - ctx.set_last_stage(index == total - 1) try: return_code = cmd_fn(piped_input, list(stage_args), self._config) @@ -210,7 +208,6 @@ class PipelineExecutor: return stage finally: ctx.set_stage_context(None) - ctx.set_active(False) emitted = list(getattr(pipeline_ctx, "emits", []) or []) stage.emitted = emitted diff --git a/cmdlets/__init__.py b/cmdlets/__init__.py index 950adba..6898ec3 100644 --- a/cmdlets/__init__.py +++ b/cmdlets/__init__.py @@ -24,70 +24,12 @@ def register(names: Iterable[str]): return _wrap -class AutoRegister: - """Decorator that automatically registers a cmdlet function using CMDLET.aliases. - - Usage: - CMDLET = Cmdlet( - name="delete-file", - aliases=["del", "del-file"], - ... - ) - - @AutoRegister(CMDLET) - def _run(result, args, config) -> int: - ... - - Registers the cmdlet under: - - Its main name from CMDLET.name - - All aliases from CMDLET.aliases - - This allows the help display to show: "cmd: delete-file | alias: del, del-file" - """ - def __init__(self, cmdlet): - self.cmdlet = cmdlet - - def __call__(self, fn: Cmdlet) -> Cmdlet: - """Register fn for the main name and all aliases in cmdlet.""" - normalized_name = None - - # Register for main name first - if hasattr(self.cmdlet, 'name') and self.cmdlet.name: - normalized_name = self.cmdlet.name.replace('_', '-').lower() - REGISTRY[normalized_name] = fn - - # Register for all aliases - if hasattr(self.cmdlet, 'aliases') and self.cmdlet.aliases: - for alias in self.cmdlet.aliases: - normalized_alias = alias.replace('_', '-').lower() - # Always register (aliases are separate from main name) - REGISTRY[normalized_alias] = fn - - return fn - - def get(cmd_name: str) -> Cmdlet | None: return REGISTRY.get(cmd_name.replace('_', '-').lower()) -def format_cmd_help(cmdlet) -> str: - """Format a cmdlet for help display showing cmd:name and aliases. - - Example output: "delete-file | aliases: del, del-file" - """ - if not hasattr(cmdlet, 'name'): - return str(cmdlet) - - cmd_str = f"cmd: {cmdlet.name}" - - if hasattr(cmdlet, 'aliases') and cmdlet.aliases: - aliases_str = ", ".join(cmdlet.aliases) - cmd_str += f" | aliases: {aliases_str}" - - return cmd_str - - # Dynamically import all cmdlet modules in this directory (ignore files starting with _ and __init__.py) +# Cmdlets self-register when instantiated via their __init__ method import os cmdlet_dir = os.path.dirname(__file__) for filename in os.listdir(cmdlet_dir): @@ -106,27 +48,7 @@ for filename in os.listdir(cmdlet_dir): continue try: - module = _import_module(f".{mod_name}", __name__) - - # Auto-register based on CMDLET object with exec function - # This allows cmdlets to be fully self-contained in the CMDLET object - if hasattr(module, 'CMDLET'): - cmdlet_obj = module.CMDLET - - # Get the execution function from the CMDLET object - run_fn = getattr(cmdlet_obj, 'exec', None) if hasattr(cmdlet_obj, 'exec') else None - - if callable(run_fn): - # Register main name - if hasattr(cmdlet_obj, 'name') and cmdlet_obj.name: - normalized_name = cmdlet_obj.name.replace('_', '-').lower() - REGISTRY[normalized_name] = run_fn - - # Register all aliases - if hasattr(cmdlet_obj, 'aliases') and cmdlet_obj.aliases: - for alias in cmdlet_obj.aliases: - normalized_alias = alias.replace('_', '-').lower() - REGISTRY[normalized_alias] = run_fn + _import_module(f".{mod_name}", __name__) except Exception as e: import sys print(f"Error importing cmdlet '{mod_name}': {e}", file=sys.stderr) @@ -141,8 +63,6 @@ except Exception: pass # Import root-level modules that also register cmdlets -# Note: search_libgen, search_soulseek, and search_debrid are now consolidated into search_provider.py -# Use search-file -provider libgen, -provider soulseek, or -provider debrid instead for _root_mod in ("select_cmdlet",): try: _import_module(_root_mod) diff --git a/cmdlets/_shared.py b/cmdlets/_shared.py index e3aa4b8..e22fc31 100644 --- a/cmdlets/_shared.py +++ b/cmdlets/_shared.py @@ -11,7 +11,7 @@ import sys import inspect from collections.abc import Iterable as IterableABC -from helper.logger import log +from helper.logger import log, debug from pathlib import Path from typing import Any, Dict, Iterable, List, Optional, Sequence, Set from dataclasses import dataclass, field @@ -37,22 +37,9 @@ class CmdletArg: """Optional handler function/callable for processing this argument's value""" variadic: bool = False """Whether this argument accepts multiple values (consumes remaining positional args)""" - - def to_dict(self) -> Dict[str, Any]: - """Convert to dict for backward compatibility.""" - d = { - "name": self.name, - "type": self.type, - "required": self.required, - "description": self.description, - "variadic": self.variadic, - } - if self.choices: - d["choices"] = self.choices - if self.alias: - d["alias"] = self.alias - return d - + usage: str = "" + """dsf""" + def resolve(self, value: Any) -> Any: """Resolve/process the argument value using the handler if available. @@ -135,11 +122,68 @@ class SharedArgs: # File/Hash arguments HASH = CmdletArg( - "hash", + name="hash", type="string", - description="Override the Hydrus file hash (SHA256) to target instead of the selected result." + description="File hash (SHA256, 64-char hex string)", ) + STORE = CmdletArg( + name="store", + type="enum", + choices=[], # Dynamically populated via get_store_choices() + description="Selects store", + ) + + PATH = CmdletArg( + name="path", + type="string", + choices=[], # Dynamically populated via get_store_choices() + description="Selects store", + ) + + URL = CmdletArg( + name="url", + type="string", + description="http parser", + ) + + @staticmethod + def get_store_choices(config: Optional[Dict[str, Any]] = None) -> List[str]: + """Get list of available storage backend names from FileStorage. + + This method dynamically discovers all configured storage backends + instead of using a static list. Should be called when building + autocomplete choices or validating store names. + + Args: + config: Optional config dict. If not provided, will try to load from config module. + + Returns: + List of backend names (e.g., ['default', 'test', 'home', 'work']) + + Example: + # In a cmdlet that needs dynamic choices + from helper.store import FileStorage + storage = FileStorage(config) + SharedArgs.STORE.choices = SharedArgs.get_store_choices(config) + """ + try: + from helper.store import FileStorage + + # If no config provided, try to load it + if config is None: + try: + from config import load_config + config = load_config() + except Exception: + return [] + + file_storage = FileStorage(config) + return file_storage.list_backends() + except Exception: + # Fallback to empty list if FileStorage isn't available + return [] + LOCATION = CmdletArg( "location", type="enum", @@ -205,16 +249,7 @@ class SharedArgs: type="string", description="Output file path." ) - - STORAGE = CmdletArg( - "storage", - type="enum", - choices=["hydrus", "local", "ftp", "matrix"], - required=False, - description="Storage location or destination for saving/uploading files.", - alias="s", - handler=lambda val: SharedArgs.resolve_storage(val) if val else None - ) + # Generic arguments QUERY = CmdletArg( @@ -325,78 +360,61 @@ class Cmdlet: log(cmd.name) # "add-file" log(cmd.summary) # "Upload a media file" log(cmd.args[0].name) # "location" - - # Convert to dict for JSON serialization - log(json.dumps(cmd.to_dict())) """ name: str - """Cmdlet name, e.g., 'add-file'""" + """""" summary: str """One-line summary of the cmdlet""" usage: str """Usage string, e.g., 'add-file [-delete]'""" - aliases: List[str] = field(default_factory=list) + alias: List[str] = field(default_factory=list) """List of aliases for this cmdlet, e.g., ['add', 'add-f']""" - args: List[CmdletArg] = field(default_factory=list) + arg: List[CmdletArg] = field(default_factory=list) """List of arguments accepted by this cmdlet""" - details: List[str] = field(default_factory=list) + detail: List[str] = field(default_factory=list) """Detailed explanation lines (for help text)""" exec: Optional[Any] = field(default=None) """The execution function: func(result, args, config) -> int""" - def __post_init__(self) -> None: - """Auto-discover _run function if exec not explicitly provided. - - If exec is None, looks for a _run function in the module where - this Cmdlet was instantiated and uses it automatically. - """ - if self.exec is None: - # Walk up the call stack to find _run in the calling module - frame = inspect.currentframe() - try: - # Walk up frames until we find one with _run in globals - while frame: - if '_run' in frame.f_globals: - self.exec = frame.f_globals['_run'] - break - frame = frame.f_back - finally: - del frame # Avoid reference cycles - - def to_dict(self) -> Dict[str, Any]: - """Convert to dict for backward compatibility with existing code. - - Returns a dict matching the old CMDLET format so existing code - that expects a dict will still work. - """ - # Format command for display: "cmd: name alias: alias1, alias2" - cmd_display = f"cmd: {self.name}" - if self.aliases: - aliases_str = ", ".join(self.aliases) - cmd_display += f" alias: {aliases_str}" - - return { - "name": self.name, - "summary": self.summary, - "usage": self.usage, - "cmd": cmd_display, # Display-friendly command name with aliases on one line - "aliases": self.aliases, - "args": [arg.to_dict() for arg in self.args], - "details": self.details, - } - - def __getitem__(self, key: str) -> Any: - """Dict-like access for backward compatibility. - - Allows code like: cmdlet["name"] or cmdlet["args"] - """ - d = self.to_dict() - return d.get(key) - - def get(self, key: str, default: Any = None) -> Any: - """Dict-like get() method for backward compatibility.""" - d = self.to_dict() - return d.get(key, default) + + + def _collect_names(self) -> List[str]: + """Collect primary name plus aliases, de-duplicated and normalized.""" + names: List[str] = [] + if self.name: + names.append(self.name) + for alias in (self.alias or []): + if alias: + names.append(alias) + for alias in (getattr(self, "aliases", None) or []): + if alias: + names.append(alias) + + seen: Set[str] = set() + deduped: List[str] = [] + for name in names: + key = name.replace("_", "-").lower() + if key in seen: + continue + seen.add(key) + deduped.append(name) + return deduped + + def register(self) -> "Cmdlet": + """Register this cmdlet's exec under its name and aliases.""" + if not callable(self.exec): + return self + try: + from . import register as _register # Local import to avoid circular import cost + except Exception: + return self + + names = self._collect_names() + if not names: + return self + + _register(names)(self.exec) + return self def get_flags(self, arg_name: str) -> set[str]: """Generate -name and --name flag variants for an argument. @@ -432,7 +450,7 @@ class Cmdlet: elif low in flags.get('tag', set()): # handle tag """ - return {arg.name: self.get_flags(arg.name) for arg in self.args} + return {arg.name: self.get_flags(arg.name) for arg in self.arg} # Tag groups cache (loaded from JSON config file) @@ -479,19 +497,19 @@ def parse_cmdlet_args(args: Sequence[str], cmdlet_spec: Dict[str, Any] | Cmdlet) """ result: Dict[str, Any] = {} - # Handle both dict and Cmdlet objects - if isinstance(cmdlet_spec, Cmdlet): - cmdlet_spec = cmdlet_spec.to_dict() + # Only accept Cmdlet objects + if not isinstance(cmdlet_spec, Cmdlet): + raise TypeError(f"Expected Cmdlet, got {type(cmdlet_spec).__name__}") - # Build arg specs tracking which are positional vs flagged - arg_specs: List[Dict[str, Any]] = cmdlet_spec.get("args", []) - positional_args: List[Dict[str, Any]] = [] # args without prefix in definition - flagged_args: List[Dict[str, Any]] = [] # args with prefix in definition + # Build arg specs from cmdlet + arg_specs: List[CmdletArg] = cmdlet_spec.arg + positional_args: List[CmdletArg] = [] # args without prefix in definition + flagged_args: List[CmdletArg] = [] # args with prefix in definition arg_spec_map: Dict[str, str] = {} # prefix variant -> canonical name (without prefix) for spec in arg_specs: - name = spec.get("name") + name = spec.name if not name: continue @@ -520,10 +538,10 @@ def parse_cmdlet_args(args: Sequence[str], cmdlet_spec: Dict[str, Any] | Cmdlet) # Check if this token is a known flagged argument if token_lower in arg_spec_map: canonical_name = arg_spec_map[token_lower] - spec = next((s for s in arg_specs if str(s.get("name", "")).lstrip("-").lower() == canonical_name.lower()), None) + spec = next((s for s in arg_specs if str(s.name).lstrip("-").lower() == canonical_name.lower()), None) # Check if it's a flag type (which doesn't consume next value, just marks presence) - is_flag = spec and spec.get("type") == "flag" + is_flag = spec and spec.type == "flag" if is_flag: # For flags, just mark presence without consuming next token @@ -535,7 +553,7 @@ def parse_cmdlet_args(args: Sequence[str], cmdlet_spec: Dict[str, Any] | Cmdlet) value = args[i + 1] # Check if variadic - is_variadic = spec and spec.get("variadic", False) + is_variadic = spec and spec.variadic if is_variadic: if canonical_name not in result: result[canonical_name] = [] @@ -550,8 +568,8 @@ def parse_cmdlet_args(args: Sequence[str], cmdlet_spec: Dict[str, Any] | Cmdlet) # Otherwise treat as positional if we have positional args remaining elif positional_index < len(positional_args): positional_spec = positional_args[positional_index] - canonical_name = str(positional_spec.get("name", "")).lstrip("-") - is_variadic = positional_spec.get("variadic", False) + canonical_name = str(positional_spec.name).lstrip("-") + is_variadic = positional_spec.variadic if is_variadic: # For variadic args, append to a list @@ -591,6 +609,183 @@ def normalize_hash(hash_hex: Optional[str]) -> Optional[str]: return text.lower() if text else None +def get_hash_for_operation(override_hash: Optional[str], result: Any, field_name: str = "hash_hex") -> Optional[str]: + """Get normalized hash from override or result object, consolidating common pattern. + + Eliminates repeated pattern: normalize_hash(override) if override else normalize_hash(get_field(result, ...)) + + Args: + override_hash: Hash passed as command argument (takes precedence) + result: Object containing hash field (fallback) + field_name: Name of hash field in result object (default: "hash_hex") + + Returns: + Normalized hash string, or None if neither override nor result provides valid hash + """ + if override_hash: + return normalize_hash(override_hash) + # Try multiple field names for robustness + hash_value = get_field(result, field_name) or getattr(result, field_name, None) or getattr(result, "hash", None) or result.get("file_hash") if isinstance(result, dict) else None + return normalize_hash(hash_value) + + +def fetch_hydrus_metadata(config: Any, hash_hex: str, **kwargs) -> tuple[Optional[Dict[str, Any]], Optional[int]]: + """Fetch metadata from Hydrus for a given hash, consolidating common fetch pattern. + + Eliminates repeated boilerplate: client initialization, error handling, metadata extraction. + + Args: + config: Configuration object (passed to hydrus_wrapper.get_client) + hash_hex: File hash to fetch metadata for + **kwargs: Additional arguments to pass to client.fetch_file_metadata() + Common: include_service_keys_to_tags, include_notes, include_file_url, include_duration, etc. + + Returns: + Tuple of (metadata_dict, error_code) + - metadata_dict: Dict from Hydrus (first item in metadata list) or None if unavailable + - error_code: 0 on success, 1 on any error (suitable for returning from cmdlet execute()) + """ + from helper import hydrus + hydrus_wrapper = hydrus + + try: + client = hydrus_wrapper.get_client(config) + except Exception as exc: + log(f"Hydrus client unavailable: {exc}") + return None, 1 + + if client is None: + log("Hydrus client unavailable") + return None, 1 + + try: + payload = client.fetch_file_metadata(hashes=[hash_hex], **kwargs) + except Exception as exc: + log(f"Hydrus metadata fetch failed: {exc}") + return None, 1 + + items = payload.get("metadata") if isinstance(payload, dict) else None + meta = items[0] if (isinstance(items, list) and items and isinstance(items[0], dict)) else None + + return meta, 0 + + +def get_origin(obj: Any, default: Optional[str] = None) -> Optional[str]: + """Extract origin field with fallback to store/source field, consolidating common pattern. + + Supports both dict and object access patterns. + + Args: + obj: Object (dict or dataclass) with 'store', 'origin', or 'source' field + default: Default value if none of the fields are found + + Returns: + Store/origin/source string, or default if none exist + """ + if isinstance(obj, dict): + return obj.get("store") or obj.get("origin") or obj.get("source") or default + else: + return getattr(obj, "store", None) or getattr(obj, "origin", None) or getattr(obj, "source", None) or default + + +def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any: + """Extract a field from either a dict or object with fallback default. + + Handles both dict.get(field) and getattr(obj, field) access patterns. + Also handles lists by accessing the first element. + For PipeObjects, checks the extra field as well. + Used throughout cmdlets to uniformly access fields from mixed types. + + Args: + obj: Dict, object, or list to extract from + field: Field name to retrieve + default: Value to return if field not found (default: None) + + Returns: + Field value if found, otherwise the default value + + Examples: + get_field(result, "hash") # From dict or object + get_field(result, "origin", "unknown") # With default + """ + # Handle lists by accessing the first element + if isinstance(obj, list) and obj: + obj = obj[0] + + if isinstance(obj, dict): + # Direct lookup first + val = obj.get(field, default) + if val is not None: + return val + # Fallback aliases for common fields + if field == "path": + for alt in ("file_path", "target", "filepath", "file"): + v = obj.get(alt) + if v: + return v + if field == "hash": + for alt in ("file_hash", "hash_hex"): + v = obj.get(alt) + if v: + return v + if field == "store": + for alt in ("storage", "storage_source", "origin"): + v = obj.get(alt) + if v: + return v + return default + else: + # Try direct attribute access first + value = getattr(obj, field, None) + if value is not None: + return value + + # Attribute fallback aliases for common fields + if field == "path": + for alt in ("file_path", "target", "filepath", "file", "url"): + v = getattr(obj, alt, None) + if v: + return v + if field == "hash": + for alt in ("file_hash", "hash_hex"): + v = getattr(obj, alt, None) + if v: + return v + if field == "store": + for alt in ("storage", "storage_source", "origin"): + v = getattr(obj, alt, None) + if v: + return v + + # For PipeObjects, also check the extra field + if hasattr(obj, 'extra') and isinstance(obj.extra, dict): + return obj.extra.get(field, default) + + return default + + +def should_show_help(args: Sequence[str]) -> bool: + """Check if help flag was passed in arguments. + + Consolidates repeated pattern of checking for help flags across cmdlets. + + Args: + args: Command arguments to check + + Returns: + True if any help flag is present (-?, /?, --help, -h, help, --cmdlet) + + Examples: + if should_show_help(args): + log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + return 0 + """ + try: + return any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args) + except Exception: + return False + + def looks_like_hash(candidate: Optional[str]) -> bool: """Check if a string looks like a SHA256 hash (64 hex chars). @@ -609,8 +804,8 @@ def looks_like_hash(candidate: Optional[str]) -> bool: def pipeline_item_local_path(item: Any) -> Optional[str]: """Extract local file path from a pipeline item. - Supports both dataclass objects with .target attribute and dicts. - Returns None for HTTP/HTTPS URLs. + Supports both dataclass objects with .path attribute and dicts. + Returns None for HTTP/HTTPS url. Args: item: Pipeline item (PipelineItem dataclass, dict, or other) @@ -618,15 +813,15 @@ def pipeline_item_local_path(item: Any) -> Optional[str]: Returns: Local file path string, or None if item is not a local file """ - target: Optional[str] = None - if hasattr(item, "target"): - target = getattr(item, "target", None) + path_value: Optional[str] = None + if hasattr(item, "path"): + path_value = getattr(item, "path", None) elif isinstance(item, dict): - raw = item.get("target") or item.get("path") or item.get("url") - target = str(raw) if raw is not None else None - if not isinstance(target, str): + raw = item.get("path") or item.get("url") + path_value = str(raw) if raw is not None else None + if not isinstance(path_value, str): return None - text = target.strip() + text = path_value.strip() if not text: return None if text.lower().startswith(("http://", "https://")): @@ -686,22 +881,60 @@ def collect_relationship_labels(payload: Any, label_stack: List[str] | None = No def parse_tag_arguments(arguments: Sequence[str]) -> List[str]: """Parse tag arguments from command line tokens. - - Handles both space-separated and comma-separated tags. - Example: parse_tag_arguments(["tag1,tag2", "tag3"]) -> ["tag1", "tag2", "tag3"] - + + - Supports comma-separated tags. + - Supports pipe namespace shorthand: "artist:A|B|C" -> artist:A, artist:B, artist:C. + Args: arguments: Sequence of argument strings - + Returns: List of normalized tag strings (empty strings filtered out) """ + + def _expand_pipe_namespace(text: str) -> List[str]: + parts = text.split('|') + expanded: List[str] = [] + last_ns: Optional[str] = None + for part in parts: + segment = part.strip() + if not segment: + continue + if ':' in segment: + ns, val = segment.split(':', 1) + ns = ns.strip() + val = val.strip() + last_ns = ns or last_ns + if last_ns and val: + expanded.append(f"{last_ns}:{val}") + elif ns or val: + expanded.append(f"{ns}:{val}".strip(':')) + else: + if last_ns: + expanded.append(f"{last_ns}:{segment}") + else: + expanded.append(segment) + return expanded + tags: List[str] = [] for argument in arguments: for token in argument.split(','): text = token.strip() - if text: - tags.append(text) + if not text: + continue + # Expand namespace shorthand with pipes + pipe_expanded = _expand_pipe_namespace(text) + for entry in pipe_expanded: + candidate = entry.strip() + if not candidate: + continue + if ':' in candidate: + ns, val = candidate.split(':', 1) + ns = ns.strip() + val = val.strip() + candidate = f"{ns}:{val}" if ns or val else "" + if candidate: + tags.append(candidate) return tags @@ -944,7 +1177,7 @@ def create_pipe_object_result( result = { 'source': source, 'id': identifier, - 'file_path': file_path, + 'path': file_path, 'action': f'cmdlet:{cmdlet_name}', # Format: cmdlet:cmdlet_name } @@ -952,6 +1185,7 @@ def create_pipe_object_result( result['title'] = title if file_hash: result['file_hash'] = file_hash + result['hash'] = file_hash if is_temp: result['is_temp'] = True if parent_hash: @@ -959,6 +1193,13 @@ def create_pipe_object_result( if tags: result['tags'] = tags + # Canonical store field: use source for compatibility + try: + if source: + result['store'] = source + except Exception: + pass + # Add any extra fields result.update(extra) @@ -996,13 +1237,13 @@ def get_pipe_object_path(pipe_object: Any) -> Optional[str]: """Extract file path from PipeObject, dict, or pipeline-friendly object.""" if pipe_object is None: return None - for attr in ('file_path', 'path', 'target'): + for attr in ('path', 'target'): if hasattr(pipe_object, attr): value = getattr(pipe_object, attr) if value: return value if isinstance(pipe_object, dict): - for key in ('file_path', 'path', 'target'): + for key in ('path', 'target'): value = pipe_object.get(key) if value: return value @@ -1209,40 +1450,40 @@ def extract_title_from_result(result: Any) -> Optional[str]: return None -def extract_known_urls_from_result(result: Any) -> list[str]: - urls: list[str] = [] +def extract_url_from_result(result: Any) -> list[str]: + url: list[str] = [] def _extend(candidate: Any) -> None: if not candidate: return if isinstance(candidate, list): - urls.extend(candidate) + url.extend(candidate) elif isinstance(candidate, str): - urls.append(candidate) + url.append(candidate) if isinstance(result, models.PipeObject): - _extend(result.extra.get('known_urls')) + _extend(result.extra.get('url')) _extend(result.extra.get('url')) # Also check singular url if isinstance(result.metadata, dict): - _extend(result.metadata.get('known_urls')) - _extend(result.metadata.get('urls')) _extend(result.metadata.get('url')) - elif hasattr(result, 'known_urls') or hasattr(result, 'urls'): - # Handle objects with known_urls/urls attribute - _extend(getattr(result, 'known_urls', None)) - _extend(getattr(result, 'urls', None)) + _extend(result.metadata.get('url')) + _extend(result.metadata.get('url')) + elif hasattr(result, 'url') or hasattr(result, 'url'): + # Handle objects with url/url attribute + _extend(getattr(result, 'url', None)) + _extend(getattr(result, 'url', None)) if isinstance(result, dict): - _extend(result.get('known_urls')) - _extend(result.get('urls')) + _extend(result.get('url')) + _extend(result.get('url')) _extend(result.get('url')) extra = result.get('extra') if isinstance(extra, dict): - _extend(extra.get('known_urls')) - _extend(extra.get('urls')) + _extend(extra.get('url')) + _extend(extra.get('url')) _extend(extra.get('url')) - return merge_sequences(urls, case_sensitive=True) + return merge_sequences(url, case_sensitive=True) def extract_relationships(result: Any) -> Optional[Dict[str, Any]]: @@ -1272,3 +1513,248 @@ def extract_duration(result: Any) -> Optional[float]: return float(duration) except (TypeError, ValueError): return None + + +def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> models.PipeObject: + """Normalize any incoming result to a PipeObject for single-source-of-truth state. + + Uses hash+store canonical pattern. + """ + # Debug: Print ResultItem details if coming from search_file.py + try: + from helper.logger import is_debug_enabled, debug + if is_debug_enabled() and hasattr(value, '__class__') and value.__class__.__name__ == 'ResultItem': + debug("[ResultItem -> PipeObject conversion]") + debug(f" origin={getattr(value, 'origin', None)}") + debug(f" title={getattr(value, 'title', None)}") + debug(f" target={getattr(value, 'target', None)}") + debug(f" hash_hex={getattr(value, 'hash_hex', None)}") + debug(f" media_kind={getattr(value, 'media_kind', None)}") + debug(f" tags={getattr(value, 'tags', None)}") + debug(f" tag_summary={getattr(value, 'tag_summary', None)}") + debug(f" size_bytes={getattr(value, 'size_bytes', None)}") + debug(f" duration_seconds={getattr(value, 'duration_seconds', None)}") + debug(f" relationships={getattr(value, 'relationships', None)}") + debug(f" url={getattr(value, 'url', None)}") + debug(f" full_metadata keys={list(getattr(value, 'full_metadata', {}).keys()) if hasattr(value, 'full_metadata') and value.full_metadata else []}") + except Exception: + pass + + if isinstance(value, models.PipeObject): + return value + + known_keys = { + "hash", "store", "tags", "title", "url", "source_url", "duration", "metadata", + "warnings", "path", "relationships", "is_temp", "action", "parent_hash", + } + + # Convert ResultItem to dict to preserve all attributes + if hasattr(value, 'to_dict'): + value = value.to_dict() + + if isinstance(value, dict): + # Extract hash and store (canonical identifiers) + hash_val = value.get("hash") or value.get("file_hash") + # Recognize multiple possible store naming conventions (store, origin, storage, storage_source) + store_val = value.get("store") or value.get("origin") or value.get("storage") or value.get("storage_source") or "PATH" + # If the store value is embedded under extra, also detect it + if not store_val or store_val in ("local", "PATH"): + extra_store = None + try: + extra_store = value.get("extra", {}).get("store") or value.get("extra", {}).get("storage") or value.get("extra", {}).get("storage_source") + except Exception: + extra_store = None + if extra_store: + store_val = extra_store + + # If no hash, try to compute from path or use placeholder + if not hash_val: + path_val = value.get("path") + if path_val: + try: + from helper.utils import sha256_file + from pathlib import Path + hash_val = sha256_file(Path(path_val)) + except Exception: + hash_val = "unknown" + else: + hash_val = "unknown" + + # Extract title from filename if not provided + title_val = value.get("title") + if not title_val: + path_val = value.get("path") + if path_val: + try: + from pathlib import Path + title_val = Path(path_val).stem + except Exception: + pass + + extra = {k: v for k, v in value.items() if k not in known_keys} + + # Extract URL: prefer direct url field, then url list + url_val = value.get("url") + if not url_val: + url = value.get("url") or value.get("url") or [] + if url and isinstance(url, list) and len(url) > 0: + url_val = url[0] + # Preserve url in extra if multiple url exist + if url and len(url) > 1: + extra["url"] = url + + # Extract relationships + rels = value.get("relationships") or {} + + # Consolidate tags: prefer tags_set over tags, tag_summary + tags_val = [] + if "tags_set" in value and value["tags_set"]: + tags_val = list(value["tags_set"]) + elif "tags" in value and isinstance(value["tags"], (list, set)): + tags_val = list(value["tags"]) + elif "tag" in value: + # Single tag string or list + if isinstance(value["tag"], list): + tags_val = value["tag"] # Already a list + else: + tags_val = [value["tag"]] # Wrap single string in list + + # Consolidate path: prefer explicit path key, but NOT target if it's a URL + path_val = value.get("path") + # Only use target as path if it's not a URL (url should stay in url field) + if not path_val and "target" in value: + target = value["target"] + if target and not (isinstance(target, str) and (target.startswith("http://") or target.startswith("https://"))): + path_val = target + + # If the path value is actually a URL, move it to url_val and clear path_val + try: + if isinstance(path_val, str) and (path_val.startswith("http://") or path_val.startswith("https://")): + # Prefer existing url_val if present, otherwise move path_val into url_val + if not url_val: + url_val = path_val + path_val = None + except Exception: + pass + + # Extract media_kind if available + if "media_kind" in value: + extra["media_kind"] = value["media_kind"] + + pipe_obj = models.PipeObject( + hash=hash_val, + store=store_val, + tags=tags_val, + title=title_val, + url=url_val, + source_url=value.get("source_url"), + duration=value.get("duration") or value.get("duration_seconds"), + metadata=value.get("metadata") or value.get("full_metadata") or {}, + warnings=list(value.get("warnings") or []), + path=path_val, + relationships=rels, + is_temp=bool(value.get("is_temp", False)), + action=value.get("action"), + parent_hash=value.get("parent_hash") or value.get("parent_id"), + extra=extra, + ) + + # Debug: Print formatted table + pipe_obj.debug_table() + + return pipe_obj + + # Fallback: build from path argument or bare value + hash_val = "unknown" + path_val = default_path or getattr(value, "path", None) + title_val = None + + if path_val and path_val != "unknown": + try: + from helper.utils import sha256_file + from pathlib import Path + path_obj = Path(path_val) + hash_val = sha256_file(path_obj) + # Extract title from filename (without extension) + title_val = path_obj.stem + except Exception: + pass + + # When coming from path argument, store should be "PATH" (file path, not a backend) + store_val = "PATH" + + pipe_obj = models.PipeObject( + hash=hash_val, + store=store_val, + path=str(path_val) if path_val and path_val != "unknown" else None, + title=title_val, + tags=[], + extra={}, + ) + + # Debug: Print formatted table + pipe_obj.debug_table() + + return pipe_obj + + +def register_url_with_local_library(pipe_obj: models.PipeObject, config: Dict[str, Any]) -> bool: + """Register url with a file in the local library database. + + This is called automatically by download cmdlets to ensure url are persisted + without requiring a separate add-url step in the pipeline. + + Args: + pipe_obj: PipeObject with path and url + config: Config dict containing local library path + + Returns: + True if url were registered, False otherwise + """ + + try: + from config import get_local_storage_path + from helper.folder_store import FolderDB + + file_path = get_field(pipe_obj, "path") + url_field = get_field(pipe_obj, "url", []) + urls: List[str] = [] + if isinstance(url_field, str): + urls = [u.strip() for u in url_field.split(",") if u.strip()] + elif isinstance(url_field, (list, tuple)): + urls = [u for u in url_field if isinstance(u, str) and u.strip()] + + if not file_path or not urls: + return False + + path_obj = Path(file_path) + if not path_obj.exists(): + return False + + storage_path = get_local_storage_path(config) + if not storage_path: + return False + + with FolderDB(storage_path) as db: + file_hash = db.get_file_hash(path_obj) + if not file_hash: + return False + metadata = db.get_metadata(file_hash) or {} + existing_url = metadata.get("url") or [] + + # Add any new url + changed = False + for u in urls: + if u not in existing_url: + existing_url.append(u) + changed = True + + if changed: + metadata["url"] = existing_url + db.save_metadata(path_obj, metadata) + return True + + return True # url already existed + except Exception: + return False + diff --git a/cmdlets/add_file.py b/cmdlets/add_file.py index 83c8c6a..10c3cab 100644 --- a/cmdlets/add_file.py +++ b/cmdlets/add_file.py @@ -1,25 +1,24 @@ from __future__ import annotations -from typing import Any, Dict, Optional, Sequence, Iterable, Tuple -from collections.abc import Iterable as IterableABC -import json +from typing import Any, Dict, Optional, Sequence, Tuple, List, Union from pathlib import Path import sys +import shutil import models import pipeline as ctx from helper import hydrus as hydrus_wrapper from helper.logger import log, debug -from helper.file_storage import FileStorage +from helper.store import FileStorage from ._shared import ( - Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs, create_pipe_object_result, - extract_tags_from_result, extract_title_from_result, extract_known_urls_from_result, - merge_sequences, extract_relationships, extract_duration + Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs, + extract_tags_from_result, extract_title_from_result, extract_url_from_result, + merge_sequences, extract_relationships, extract_duration, get_origin, coerce_to_pipe_object ) from ._shared import collapse_namespace_tags -from helper.local_library import read_sidecar, find_sidecar, write_sidecar, LocalLibraryDB -from helper.utils import sha256_file -from metadata import embed_metadata_in_file +from helper.folder_store import read_sidecar, find_sidecar, write_sidecar, FolderDB +from helper.utils import sha256_file, unique_path +from metadata import write_metadata # Use official Hydrus supported filetypes from hydrus_wrapper SUPPORTED_MEDIA_EXTENSIONS = hydrus_wrapper.ALL_SUPPORTED_EXTENSIONS @@ -28,1059 +27,874 @@ SUPPORTED_MEDIA_EXTENSIONS = hydrus_wrapper.ALL_SUPPORTED_EXTENSIONS storage = FileStorage() -def _guess_media_kind_from_suffix(media_path: Path) -> str: - suffix = media_path.suffix.lower() - if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka'}: - return 'audio' - if suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}: - return 'video' - if suffix in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}: - return 'image' - if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.doc', '.docx'}: - return 'document' - return 'other' +class Add_File(Cmdlet): + """Add file into the DB""" + def __init__(self) -> None: + """Initialize add-file cmdlet.""" + super().__init__( + name="add-file", + summary="Upload a media file to specified location (Hydrus, file provider, or local directory).", + usage="add-file (-path | ) (-storage | -provider ) [-delete]", + arg=[ + SharedArgs.PATH, + SharedArgs.STORE, + SharedArgs.HASH, + CmdletArg(name="provider", type="string", required=False, description="File hosting provider (e.g., 0x0)", alias="prov"), + CmdletArg(name="delete", type="flag", required=False, description="Delete file after successful upload", alias="del"), + ], + detail=[ + "- Storage location options (use -storage):", + " hydrus: Upload to Hydrus database with metadata tagging", + " local: Copy file to local directory", + " : Copy file to specified directory", + "- File provider options (use -provider):", + " 0x0: Upload to 0x0.st for temporary hosting", + ], + exec=self.run, + ) + self.register() -def _resolve_media_kind(result: Any, media_path: Path) -> str: - if isinstance(result, models.PipeObject): - if getattr(result, 'media_kind', None): - return str(result.media_kind) - elif isinstance(result, dict): - media_kind = result.get('media_kind') - if media_kind: - return str(media_kind) - metadata = result.get('metadata') - if isinstance(metadata, dict) and metadata.get('media_kind'): - return str(metadata['media_kind']) - return _guess_media_kind_from_suffix(media_path) + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Main execution entry point.""" + # Parse arguments + parsed = parse_cmdlet_args(args, self) + # Initialize state + path_arg = parsed.get("path") + location = parsed.get("store") # Fixed: was "storage", should be "store" + provider_name = parsed.get("provider") + delete_after = parsed.get("delete", False) -def _load_sidecar_bundle(media_path: Path, origin: Optional[str] = None, config: Optional[dict] = None) -> tuple[Optional[Path], Optional[str], list[str], list[str]]: - # For local origin, try to read from local database first - if origin and origin.lower() == "local" and config: - try: - from helper.local_library import LocalLibraryDB - from config import get_local_storage_path - + # Coerce result to PipeObject; if result is a list, prefer the first element + effective_result = result + if isinstance(result, list) and result: + first_item = result[0] + # Prefer first item if it's a dict or PipeObject + if isinstance(first_item, (dict, )): + effective_result = first_item + pipe_obj = coerce_to_pipe_object(effective_result, path_arg) + + # Debug: Log input result details + debug(f"[add-file] INPUT result type={type(result).__name__}") + if isinstance(result, list): + debug(f"[add-file] INPUT result is list with {len(result)} items") + if result and isinstance(result[0], dict): + first = result[0] + hash_val = first.get('hash') + hash_str = hash_val[:12] + "..." if hash_val else "N/A" + debug(f"[add-file] First item details: title={first.get('title')}, hash={hash_str}, store={first.get('store', 'N/A')}") + elif isinstance(result, dict): + hash_val = result.get('hash') + hash_str = hash_val[:12] + "..." if hash_val else "N/A" + debug(f"[add-file] INPUT result is dict: title={result.get('title')}, hash={hash_str}, store={result.get('store', 'N/A')}") + + # Debug: Log parsed arguments + debug(f"[add-file] PARSED args: location={location}, provider={provider_name}, delete={delete_after}") + + # Resolve source - returns (media_path_or_url, file_hash) + media_path_or_url, file_hash = self._resolve_source(result, path_arg, pipe_obj, config) + debug(f"[add-file] RESOLVED source: path={media_path_or_url}, hash={file_hash[:12] if file_hash else 'N/A'}...") + if not media_path_or_url: + debug(f"[add-file] ERROR: Could not resolve source file/URL") + return 1 + + # Check if it's a URL before validating as file + if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(("http://", "https://", "magnet:", "torrent:")): + debug(f"Detected URL target, delegating to download-data: {media_path_or_url}") + return self._delegate_to_download_data(result, media_path_or_url, location, provider_name, args, config) + + # Convert to Path and validate + media_path = Path(media_path_or_url) if isinstance(media_path_or_url, str) else media_path_or_url + + # Validate source + if not self._validate_source(media_path): + debug(f"[add-file] ERROR: Source validation failed for {media_path}") + return 1 + + # Debug: Log execution path decision + debug(f"[add-file] DECISION POINT: provider={provider_name}, location={location}") + debug(f" media_path={media_path}, exists={media_path.exists()}") + + # Execute transfer based on destination (using class-based FileStorage system) + if provider_name: + debug(f"[add-file] ROUTE: file provider upload") + return self._handle_provider_upload(media_path, provider_name, pipe_obj, config, delete_after) + elif location: + # Check if location is a registered backend name using FileStorage try: - db_root = get_local_storage_path(config) - except Exception: - db_root = None - - if db_root: + storage = FileStorage(config) + backends = storage.list_backends() + + if location in backends: + debug(f"[add-file] ROUTE: storage backend '{location}'") + return self._handle_storage_backend(media_path, location, pipe_obj, config, delete_after) + else: + # Treat as local export path + debug(f"[add-file] ROUTE: local export to path '{location}'") + return self._handle_local_export(media_path, location, pipe_obj, config, delete_after) + except Exception as exc: + debug(f"[add-file] ERROR: Failed to resolve location: {exc}") + log(f"Invalid location: {location}", file=sys.stderr) + return 1 + else: + debug(f"[add-file] ERROR: No location or provider specified") + log(f"No storage location or provider specified", file=sys.stderr) + return 1 + + @staticmethod + def _resolve_source( + result: Any, + path_arg: Optional[str], + pipe_obj: models.PipeObject, + config: Dict[str, Any], + ) -> Tuple[Optional[Path | str], Optional[str]]: + """Resolve the source file path from args or pipeline result. + + PRIORITY: hash+store pattern is preferred over path-based resolution. + This ensures consistency when @N selections pass hash+store identifiers. + + Returns (media_path_or_url, file_hash) + where media_path_or_url can be a Path object or a URL string. + """ + # PRIORITY 1: Try hash+store from result dict (most reliable for @N selections) + if isinstance(result, dict): + result_hash = result.get("hash") + result_store = result.get("store") + if result_hash and result_store: + debug(f"[add-file] Using hash+store from result: hash={result_hash[:12]}..., store={result_store}") + # Use get_file to retrieve from the specific store try: - with LocalLibraryDB(Path(db_root)) as db: - # Get tags and metadata from database - tags = db.get_tags(media_path) or [] - metadata = db.get_metadata(media_path) or {} - known_urls = metadata.get("known_urls") or [] - file_hash = metadata.get("hash") - - if tags or known_urls or file_hash: - debug(f"Found metadata in local database: {len(tags)} tag(s), {len(known_urls)} URL(s)") - return None, file_hash, tags, known_urls + from helper.store import FileStorage + storage = FileStorage(config) + if result_store in storage.list_backends(): + backend = storage[result_store] + media_path = backend.get_file(result_hash) + if media_path and media_path.exists(): + pipe_obj.path = str(media_path) + debug(f"[add-file] Retrieved file from {result_store}: {media_path}") + return media_path, result_hash except Exception as exc: - log(f"⚠️ Could not query local database: {exc}", file=sys.stderr) - except Exception: - pass - - # Fall back to sidecar file lookup - try: - sidecar_path = find_sidecar(media_path) - except Exception: - sidecar_path = None - if not sidecar_path or not sidecar_path.exists(): - return None, None, [], [] - try: - hash_value, tags, known_urls = read_sidecar(sidecar_path) - return sidecar_path, hash_value, tags or [], known_urls or [] - except Exception as exc: - log(f"⚠️ Failed to read sidecar for {media_path.name}: {exc}", file=sys.stderr) - return sidecar_path, None, [], [] + debug(f"[add-file] Failed to retrieve via hash+store: {exc}") + + # PRIORITY 2: Try explicit path argument + if path_arg: + media_path = Path(path_arg) + pipe_obj.path = str(media_path) + debug(f"[add-file] Using explicit path argument: {media_path}") + return media_path, None + # PRIORITY 3: Try from pipe_obj.path (check file first before URL) + pipe_path = getattr(pipe_obj, "path", None) + if pipe_path: + pipe_path_str = str(pipe_path) + debug(f"Resolved pipe_path: {pipe_path_str}") + if pipe_path_str.startswith("hydrus:"): + file_hash = pipe_path_str.split(":", 1)[1] + media_path, success = Add_File._fetch_hydrus_path(file_hash, config) + return media_path, file_hash if success else None + # Check if pipe_path is a URL - skip to URL handling below + if not pipe_path_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")): + media_path = Path(pipe_path_str) + return media_path, None + + # PRIORITY 4: Try from pipe_obj.url (for streaming url without downloaded file) + pipe_url = getattr(pipe_obj, "url", None) + if pipe_url and isinstance(pipe_url, str): + # Check if it's a URL + if pipe_url.lower().startswith(("http://", "https://", "magnet:", "torrent:")): + debug(f"Detected URL in pipe_obj.url: {pipe_url}") + return pipe_url, None -def _resolve_file_hash(result: Any, fallback_hash: Optional[str], file_path: Path) -> Optional[str]: - candidate = None - if isinstance(result, models.PipeObject): - candidate = result.file_hash - elif isinstance(result, dict): - candidate = result.get('file_hash') or result.get('hash') - candidate = candidate or fallback_hash - if candidate: - return str(candidate) - try: - return sha256_file(file_path) - except Exception as exc: - log(f"⚠️ Could not compute SHA-256 for {file_path.name}: {exc}", file=sys.stderr) - return None + # Try from hydrus hash in pipe_obj.extra or hash + hydrus_hash = None + if isinstance(pipe_obj.extra, dict): + hydrus_hash = pipe_obj.extra.get("hydrus_hash") or pipe_obj.extra.get("hash") + hydrus_hash = hydrus_hash or pipe_obj.hash + if hydrus_hash and hydrus_hash != "unknown": + media_path, success = Add_File._fetch_hydrus_path(str(hydrus_hash), config) + return media_path, str(hydrus_hash) if success else None -def _cleanup_sidecar_files(media_path: Path, *extra_paths: Optional[Path]) -> None: - targets = [ - media_path.parent / (media_path.name + '.metadata'), - media_path.parent / (media_path.name + '.notes'), - media_path.parent / (media_path.name + '.tags'), - media_path.parent / (media_path.name + '.tags.txt'), - ] - targets.extend(extra_paths) - for target in targets: - if not target: - continue - try: - path_obj = Path(target) - if path_obj.exists(): - path_obj.unlink() - except Exception: - continue + # Try from result (if it's a string path or URL) + if isinstance(result, str): + debug(f"Checking result string: {result}") + # Check if result is a URL before treating as file path + if result.lower().startswith(("http://", "https://", "magnet:", "torrent:")): + debug(f"Detected URL in result string: {result}") + return result, None # Return URL string directly + media_path = Path(result) + pipe_obj.path = str(media_path) + return media_path, None + # Try from result if it's a list (pipeline emits multiple results) + if isinstance(result, list) and result: + first_item = result[0] + # If the first item is a string, it's either a URL or a file path + if isinstance(first_item, str): + debug(f"Checking result list[0]: {first_item}") + if first_item.lower().startswith(("http://", "https://", "magnet:", "torrent:")): + debug(f"Detected URL in result list: {first_item}") + return first_item, None # Return URL string directly + media_path = Path(first_item) + pipe_obj.path = str(media_path) + return media_path, None -def _show_local_result_table(file_hash: Optional[str], config: Dict[str, Any]) -> None: - """Run search-file by hash to display the newly added local file in a table.""" - if not file_hash: - return - try: - from cmdlets import search_file as search_cmd - temp_ctx = models.PipelineStageContext(0, 1) - saved_ctx = ctx.get_stage_context() - ctx.set_stage_context(temp_ctx) - try: - # Call the cmdlet exactly like the user would type: search-file "hash:...,store:local" - search_cmd._run(None, [f"hash:{file_hash},store:local"], config) + # If the first item is a dict, interpret it as a PipeObject-style result + if isinstance(first_item, dict): + # Look for path or path-like keys + path_candidate = first_item.get("path") or first_item.get("filepath") or first_item.get("file") + # If the dict includes a 'paths' list (multi-part/section download), prefer the first file + if not path_candidate and isinstance(first_item.get("paths"), (list, tuple)) and first_item.get("paths"): + path_candidate = first_item.get("paths")[0] + if path_candidate: + debug(f"Resolved path from result dict: {path_candidate}") + try: + media_path = Path(path_candidate) + pipe_obj.path = str(media_path) + return media_path, first_item.get("hash") + except Exception: + # Fallback to returning string if not a path + return str(path_candidate), first_item.get("hash") + + # If first item is a PipeObject object try: - table = ctx.get_last_result_table() - if table is not None: - log("") - log(table.format_plain()) + # models.PipeObject is an actual class; check attribute presence + import models as _models + if isinstance(first_item, _models.PipeObject): + path_candidate = getattr(first_item, "path", None) + if path_candidate: + debug(f"Resolved path from PipeObject: {path_candidate}") + media_path = Path(path_candidate) + pipe_obj.path = str(media_path) + return media_path, getattr(first_item, "hash", None) except Exception: pass - finally: - ctx.set_stage_context(saved_ctx) - except Exception as exc: - debug(f"[add-file] Skipped search-file display: {exc}") + debug(f"No resolution path matched. pipe_obj.path={pipe_path}, result type={type(result).__name__}") + log("File path could not be resolved") + return None, None -def _persist_local_metadata( - library_root: Path, - dest_path: Path, - tags: list[str], - known_urls: list[str], - file_hash: Optional[str], - relationships: Optional[Dict[str, Any]], - duration: Optional[float], - media_kind: str, -) -> None: - payload = { - 'hash': file_hash, - 'known_urls': known_urls, - 'relationships': relationships or [], - 'duration': duration, - 'size': None, - 'ext': dest_path.suffix.lower(), - 'media_type': media_kind, - 'media_kind': media_kind, - } - try: - payload['size'] = dest_path.stat().st_size - except OSError: - payload['size'] = None + @staticmethod + def _fetch_hydrus_path(file_hash: str, config: Dict[str, Any]) -> Tuple[Optional[Path], bool]: + """Fetch the physical path of a file from Hydrus using its hash.""" + if not file_hash: + return None, False - try: - debug(f"[_persist_local_metadata] Saving metadata to DB at: {library_root}") - db_path = Path(library_root) / ".downlow_library.db" - debug(f"[_persist_local_metadata] Database file: {db_path}, exists: {db_path.exists()}") - debug(f"[_persist_local_metadata] File: {dest_path}, exists: {dest_path.exists()}, Tags: {len(tags)}, Hash: {file_hash}") - debug(f"[_persist_local_metadata] Absolute dest_path: {dest_path.resolve()}") - - with LocalLibraryDB(library_root) as db: - # Use optimized single-transaction save - debug(f"[_persist_local_metadata] Saving metadata and {len(tags)} tags to DB") - try: - db.save_file_info(dest_path, payload, tags) - debug(f"[_persist_local_metadata] ✅ File info saved to DB") - except Exception as exc: - log(f"[_persist_local_metadata] ❌ Failed to save file info: {exc}", file=sys.stderr) - raise - - # NOTE: Sidecar files are intentionally NOT created for local storage - # Local storage uses database as primary source, not sidecar files - - debug(f"[_persist_local_metadata] ✅ Metadata persisted successfully") - except Exception as exc: - log(f"⚠️ Failed to persist metadata to local database: {exc}", file=sys.stderr) - import traceback - log(traceback.format_exc(), file=sys.stderr) - - -def _handle_local_transfer( - media_path: Path, - destination_root: Path, - result: Any, - config: Optional[Dict[str, Any]] = None, - export_mode: bool = False, -) -> Tuple[int, Optional[Path]]: - """Transfer a file to local storage and return (exit_code, destination_path). - - Args: - media_path: Path to source file - destination_root: Destination directory - result: Result object with metadata - config: Configuration dictionary - - Returns: - Tuple of (exit_code, destination_path) - - exit_code: 0 on success, 1 on failure - - destination_path: Path to moved file on success, None on failure - """ - destination_root = destination_root.expanduser() - try: - destination_root.mkdir(parents=True, exist_ok=True) - except Exception as exc: - log(f"Cannot prepare destination directory {destination_root}: {exc}", file=sys.stderr) - return 1, None - - - tags_from_result = extract_tags_from_result(result) - urls_from_result = extract_known_urls_from_result(result) - # Get origin from result if available - result_origin = None - if hasattr(result, "origin"): - result_origin = result.origin - elif isinstance(result, dict): - result_origin = result.get("origin") or result.get("source") - sidecar_path, sidecar_hash, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=result_origin, config=config) - - # Normalize all title tags to use spaces instead of underscores BEFORE merging - # This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title - def normalize_title_tag(tag: str) -> str: - """Normalize a title tag by replacing underscores with spaces.""" - if str(tag).strip().lower().startswith("title:"): - parts = tag.split(":", 1) - if len(parts) == 2: - value = parts[1].replace("_", " ").strip() - return f"title:{value}" - return tag - - tags_from_result = collapse_namespace_tags([normalize_title_tag(t) for t in tags_from_result], "title", prefer="last") - sidecar_tags = collapse_namespace_tags([normalize_title_tag(t) for t in sidecar_tags], "title", prefer="last") - - # Merge tags carefully: if URL has title tag, don't include sidecar title tags - # This prevents duplicate title: tags when URL provides a title - has_url_title = any(str(t).strip().lower().startswith("title:") for t in tags_from_result) - if has_url_title: - # URL has a title, filter out any sidecar title tags to avoid duplication - sidecar_tags_filtered = [t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")] - merged_tags = merge_sequences(tags_from_result, sidecar_tags_filtered, case_sensitive=True) - else: - # No URL title, use all sidecar tags - merged_tags = merge_sequences(tags_from_result, sidecar_tags, case_sensitive=True) - - merged_urls = merge_sequences(urls_from_result, sidecar_urls, case_sensitive=False) - relationships = extract_relationships(result) - duration = extract_duration(result) - - # Skip title-based renaming for library mode (hash-based) but allow for export mode below - - try: - if export_mode: - title_tag = next((t for t in merged_tags if str(t).strip().lower().startswith("title:")), None) - title_value = "" - if title_tag: - title_value = title_tag.split(":", 1)[1].strip() - if not title_value: - title_value = media_path.stem.replace("_", " ").strip() - # Sanitize filename - safe_title = "".join(c for c in title_value if c.isalnum() or c in " ._-()[]{}'`").strip() - base_name = safe_title or media_path.stem - new_name = base_name + media_path.suffix - target_path = destination_root / new_name - destination_root.mkdir(parents=True, exist_ok=True) - if target_path.exists(): - from helper.utils import unique_path - target_path = unique_path(target_path) - shutil.move(str(media_path), target_path) - - # Move/copy sidecar files alongside - possible_sidecars = [ - media_path.with_suffix(media_path.suffix + ".json"), - media_path.with_name(media_path.name + ".tags"), - media_path.with_name(media_path.name + ".tags.txt"), - media_path.with_name(media_path.name + ".metadata"), - media_path.with_name(media_path.name + ".notes"), - ] - for sc in possible_sidecars: - try: - if sc.exists(): - suffix_part = sc.name.replace(media_path.name, "", 1) - dest_sidecar = target_path.parent / f"{target_path.name}{suffix_part}" - dest_sidecar.parent.mkdir(parents=True, exist_ok=True) - shutil.move(str(sc), dest_sidecar) - except Exception: - pass - media_path = target_path - dest_file = str(target_path) - else: - # Ensure filename is the hash when adding to local storage - resolved_hash = _resolve_file_hash(result, sidecar_hash, media_path) - hashed_move_done = False - if resolved_hash: - hashed_name = resolved_hash + media_path.suffix - target_path = destination_root / hashed_name - try: - if target_path.exists(): - target_path.unlink() - except Exception: - pass - if media_path != target_path: - media_path = media_path.rename(target_path) - hashed_move_done = True - - if hashed_move_done and media_path.parent.samefile(destination_root): - # Already placed at final destination with hash name; skip extra upload/move - dest_file = str(media_path) - else: - dest_file = storage["local"].upload(media_path, location=str(destination_root), move=True) - except Exception as exc: - log(f"❌ Failed to move file into {destination_root}: {exc}", file=sys.stderr) - return 1, None - - dest_path = Path(dest_file) - file_hash = _resolve_file_hash(result, resolved_hash, dest_path) - media_kind = _resolve_media_kind(result, dest_path) - - # If we have a title tag, keep it. Otherwise, derive from filename. - has_title = any(str(t).strip().lower().startswith("title:") for t in merged_tags) - final_tags = collapse_namespace_tags(merged_tags, "title", prefer="last") - - if not has_title: - filename_title = dest_path.stem.replace("_", " ").strip() - if filename_title: - final_tags.insert(0, f"title:{filename_title}") - - if not export_mode: - _persist_local_metadata(destination_root, dest_path, final_tags, merged_urls, file_hash, relationships, duration, media_kind) - _cleanup_sidecar_files(media_path, sidecar_path) - _show_local_result_table(file_hash, config or {}) - else: - debug(f"✅ Exported to destination: {dest_path}") - return 0, dest_path - - - - - -def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: - """Upload/copy a file to specified location. - - Returns 0 on success, non-zero on failure. - """ - import sys # For stderr output - - # Help - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - except Exception: - pass - - debug("Starting add-file cmdlet") - - # Handle list of results (from piped commands that emit multiple items) - if isinstance(result, list): - debug(f"Processing {len(result)} piped files") - success_count = 0 - for item in result: - exit_code = _run(item, _args, config) - if exit_code == 0: - success_count += 1 - return 0 if success_count > 0 else 1 - - # Parse arguments using CMDLET spec - parsed = parse_cmdlet_args(_args, CMDLET) - location: Optional[str] = None - provider_name: Optional[str] = None - delete_after_upload = False - - # Check if -path argument was provided - path_arg = parsed.get("path") - if path_arg: - path_value = Path(str(path_arg).strip()) - # If there is no piped result, treat -path as the source file (existing behavior) - if result is None: - if not path_value.exists(): - log(f"❌ File not found: {path_value}") - return 1 - result = {"target": str(path_value), "origin": "wild"} - log(f"Using direct file path: {path_value}") - else: - # Piped result present: treat -path as destination (export) - if not path_value.exists(): - try: - path_value.mkdir(parents=True, exist_ok=True) - except Exception as exc: - log(f"❌ Cannot create destination directory {path_value}: {exc}", file=sys.stderr) - return 1 - location = str(path_value) - - # Get location from parsed args - now uses SharedArgs.STORAGE so key is "storage" - storage_arg = parsed.get("storage") - if location is None: - location = storage_arg - if location: - location = str(location).lower().strip() - elif storage_arg: - # User provided both -path (as destination) and -storage; prefer explicit storage only if it matches - storage_str = str(storage_arg).lower().strip() - if storage_str != str(location).lower(): - log(f"❌ Conflicting destinations: -path '{location}' vs -storage '{storage_str}'", file=sys.stderr) - return 1 - - # Get file provider from parsed args - provider_name = parsed.get("provider") - if provider_name: - provider_name = str(provider_name).lower().strip() - - # Check for delete flag (presence in parsed dict means it was provided) - delete_after_upload = "delete" in parsed - - # Either storage or provider must be specified, but not both - if location is None and provider_name is None: - log("Either -storage or -provider must be specified") - log(" -storage options: 'hydrus', 'local', or a directory path") - log(" -provider options: '0x0'") - return 1 - - if location is not None and provider_name is not None: - log("❌ Cannot specify both -storage and -provider") - return 1 - - # Validate location (storage backends) - is_valid_location = False - if location is not None: - valid_locations = {'hydrus', 'local'} - is_valid_location = location in valid_locations - is_local_path = not is_valid_location and location is not None and ('/' in location or '\\' in location or ':' in location) - - if location is not None and not (is_valid_location or is_local_path): - log(f"❌ Invalid location: {location}") - log(f"Valid options: 'hydrus', '0x0', 'local', or a directory path (e.g., C:\\Music or /home/user/music)") - return 1 - - # Extract tags/known URLs from pipeline objects if available - pipe_object_tags = extract_tags_from_result(result) - if pipe_object_tags: - debug(f"Extracted {len(pipe_object_tags)} tag(s) from pipeline result: {', '.join(pipe_object_tags[:5])}", file=sys.stderr) - pipe_known_urls = extract_known_urls_from_result(result) - - # Resolve media path: get from piped result - # Support both object attributes (getattr) and dict keys (get) - target = None - origin = None - - # Try object attributes first - if hasattr(result, "target"): - target = result.target - elif hasattr(result, "path"): - target = result.path - elif hasattr(result, "file_path"): - target = result.file_path - # Try dict keys if object attributes failed - elif isinstance(result, dict): - target = (result.get("target") or result.get("path") or result.get("file_path") or - result.get("__file_path") or result.get("__path") or result.get("__target")) - - # Get origin to detect Hydrus files - if hasattr(result, "origin"): - origin = result.origin - elif hasattr(result, "source"): - origin = result.source - elif isinstance(result, dict): - origin = result.get("origin") or result.get("source") or result.get("__source") - - # Convert target to string and preserve URLs (don't let Path() mangle them) - target_str = str(target) if target else None - - # Check if this is a playlist item that needs to be downloaded first - is_playlist_item = isinstance(result, dict) and result.get("__source") == "playlist-probe" - if is_playlist_item and target_str and target_str.lower().startswith(("http://", "https://")): - # This is a playlist item URL - we need to download it first - log(f"Detected playlist item, downloading: {target_str}", file=sys.stderr) - - # Extract item number if available - item_num = None - if "__action" in result and result["__action"].startswith("playlist-item:"): - item_num = result["__action"].split(":")[1] - elif "index" in result: - item_num = result["index"] - - # Call download-data to download this specific item - # Pass the item number so it knows which track to download - from cmdlets import download_data as dl_module - - # Capture emissions from download-data to process them - captured_results = [] - original_emit = ctx.emit - - def capture_emit(obj): - captured_results.append(obj) - # Also emit to original so user sees progress/output if needed - # But since add-file is usually terminal, we might not need to - # original_emit(obj) - - # Temporarily hook the pipeline emit function - ctx.emit = capture_emit - try: - if item_num: - # Pass a marker dict to tell download-data which item to get - download_result = dl_module._run( - { - "__playlist_url": str(target_str), - "__playlist_item": int(item_num) - }, - [], - config - ) - else: - # Fallback: just download the URL (will show all items) - download_result = dl_module._run(None, [str(target_str)], config) - finally: - # Restore original emit function - ctx.emit = original_emit - - if download_result != 0: - log(f"❌ Failed to download playlist item", file=sys.stderr) - return 1 - - log(f"✓ Playlist item downloaded, processing {len(captured_results)} file(s)...", file=sys.stderr) - - # Process the downloaded files recursively - success_count = 0 - for res in captured_results: - # Recursively call add-file with the downloaded result - # This ensures tags and metadata from download-data are applied - if _run(res, _args, config) == 0: - success_count += 1 - - return 0 if success_count > 0 else 1 - # Determine media_path from result - media_path: Optional[Path] = None - is_hydrus_file = origin and origin.lower() == "hydrus" - - if target_str: - # Check if it's a URL or Hydrus hash - if target_str.lower().startswith(("http://", "https://")): - media_path = None # Will handle as Hydrus file below - elif not is_hydrus_file: - # Only treat as local path if not a Hydrus file - media_path = Path(target_str) - - if media_path is None and not is_hydrus_file and (target_str is None or not target_str.lower().startswith(("http://", "https://"))): - # Check if this is a format object from download-data - if isinstance(result, dict) and result.get('format_id') is not None: - log("❌ Format object received, but add-file expects a downloaded file") - log(f" Tip: Use @N to automatically select and download the format") - log(f" Streamlined workflow:") - log(f" download-data \"URL\" | @{result.get('index', 'N')} | add-file -storage local") - log(f" (The @N automatically expands to download-data \"URL\" -item N)") - return 1 - log("❌ File not found: provide a piped file result or local file path") - return 1 - - # Check if this is a Hydrus file - fetch the actual file path from Hydrus - if is_hydrus_file and target_str: - log(f"Detected Hydrus file (hash: {target_str}), fetching local path from Hydrus...", file=sys.stderr) - try: - from helper import hydrus - - # Get the Hydrus client - client = hydrus.get_client(config) + client = hydrus_wrapper.get_client(config) if not client: - log(f"❌ Hydrus client unavailable", file=sys.stderr) - return 1 - - # target_str is the hash - need to get the actual file path from Hydrus - file_hash = target_str - - # Call the /get_files/file_path endpoint to get the actual file path + log("❌ Hydrus client not available", file=sys.stderr) + return None, False + response = client.get_file_path(file_hash) - if not response or not isinstance(response, dict): - log(f"❌ Hydrus file_path endpoint returned invalid response", file=sys.stderr) - return 1 - file_path_str = response.get("path") if not file_path_str: log(f"❌ Hydrus file_path endpoint did not return a path", file=sys.stderr) - return 1 - + return None, False + media_path = Path(file_path_str) if not media_path.exists(): log(f"❌ Hydrus file path does not exist: {media_path}", file=sys.stderr) - return 1 - + return None, False + log(f"✓ Retrieved Hydrus file path: {media_path}", file=sys.stderr) - + return media_path, True except Exception as exc: log(f"❌ Failed to get Hydrus file path: {exc}", file=sys.stderr) - import traceback - log(f"Traceback: {traceback.format_exc()}", file=sys.stderr) - return 1 + return None, False - # Generic URL handler: if target is a URL and we haven't resolved a local path yet - # This handles cases like "search-file -provider openlibrary ... | add-file -storage local" - if target_str and target_str.lower().startswith(("http://", "https://")) and not is_hydrus_file and not is_playlist_item and media_path is None: - log(f"Target is a URL, delegating to download-data: {target_str}", file=sys.stderr) - from cmdlets import download_data as dl_module - - dl_args = [] - if location: - dl_args.extend(["-storage", location]) - - # Map provider 0x0 to storage 0x0 for download-data - if provider_name == "0x0": - dl_args.extend(["-storage", "0x0"]) - - # Capture results from download-data so we can add them to DB - captured_results = [] - original_emit = ctx.emit - - def capture_emit(obj): - captured_results.append(obj) - original_emit(obj) - - ctx.emit = capture_emit - - try: - ret_code = dl_module._run(result, dl_args, config) - finally: - ctx.emit = original_emit - - if ret_code != 0: - return ret_code - - # Process the downloaded files recursively to add them to DB - if captured_results: - log(f"Processing {len(captured_results)} downloaded file(s)...", file=sys.stderr) - success_count = 0 - for res in captured_results: - # Recursively call add-file with the downloaded result - if _run(res, _args, config) == 0: - success_count += 1 - return 0 if success_count > 0 else 1 - - return 0 - - if media_path is None: - log("File path could not be resolved") - return 1 - - if not media_path.exists() or not media_path.is_file(): - log(f"File not found: {media_path}") - return 1 - - # Validate file type - only accept Hydrus-supported files - file_extension = media_path.suffix.lower() - if file_extension not in SUPPORTED_MEDIA_EXTENSIONS: - log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr) - log(f"Hydrus supports the following file types:", file=sys.stderr) - # Display by category from hydrus_wrapper - for category, extensions in sorted(hydrus_wrapper.SUPPORTED_FILETYPES.items()): - ext_list = ', '.join(sorted(e.lstrip('.') for e in extensions.keys())) - log(f"{category.capitalize()}: {ext_list}", file=sys.stderr) - log(f"Skipping this file: {media_path.name}", file=sys.stderr) - return 1 + @staticmethod + def _validate_source(media_path: Optional[Path]) -> bool: + """Validate that the source file exists and is supported.""" + if media_path is None: + return False - # Handle based on provider or storage - if provider_name is not None: - # Use file provider (e.g., 0x0.st) - from helper.search_provider import get_file_provider - - log(f"Uploading via {provider_name} file provider: {media_path.name}", file=sys.stderr) + target_str = str(media_path) + + # If it's a URL target, we skip file existence checks + if target_str.lower().startswith(("http://", "https://", "magnet:", "torrent:")): + return True + + if not media_path.exists() or not media_path.is_file(): + log(f"File not found: {media_path}") + return False + + # Validate file type + file_extension = media_path.suffix.lower() + if file_extension not in SUPPORTED_MEDIA_EXTENSIONS: + log(f"❌ Unsupported file type: {file_extension}", file=sys.stderr) + return False + + return True + + @staticmethod + def _is_url_target(media_path: Optional[Path]) -> bool: + """Check if the target is a URL that needs downloading.""" + if media_path and str(media_path).lower().startswith(("http://", "https://")): + return True + return False + + def _delegate_to_download_data( + self, + result: Any, + url_str: str, + location: Optional[str], + provider_name: Optional[str], + args: Sequence[str], + config: Dict[str, Any], + ) -> int: + """Delegate URL handling to download-media cmdlet.""" + log(f"Target is a URL, delegating to download-media: {url_str}", file=sys.stderr) + # Reuse the globally-registered cmdlet instance to avoid duplicative registration + from cmdlets.download_media import CMDLET as dl_cmdlet + dl_args = list(args) if args else [] + + # Add the URL to the argument list for download-media + dl_args.insert(0, url_str) + # If result has selection_args (like -item from @N selection), include them + if isinstance(result, dict) and "_selection_args" in result: + selection_args = result["_selection_args"] + if selection_args: + dl_args.extend(selection_args) + elif hasattr(result, 'extra') and isinstance(result.extra, dict) and "_selection_args" in result.extra: + selection_args = result.extra["_selection_args"] + if selection_args: + dl_args.extend(selection_args) + + # download-media doesn't support -storage flag + # It downloads to the configured directory, then add-file will handle storage + # Note: Provider uploads (0x0) are not supported via this path + + # Call download-media with the URL in args + return dl_cmdlet.run(None, dl_args, config) + + @staticmethod + def _get_url(result: Any, pipe_obj: models.PipeObject) -> List[str]: + url: List[str] = [] try: - file_provider = get_file_provider(provider_name, config) - if file_provider is None: - log(f"File provider '{provider_name}' not available", file=sys.stderr) - return 1 - - hoster_url = file_provider.upload(media_path) - log(f"File uploaded to {provider_name}: {hoster_url}", file=sys.stderr) - - # Associate the URL with the file in Hydrus if possible - current_hash = locals().get('file_hash') - if not current_hash: - current_hash = _resolve_file_hash(result, None, media_path) - - if current_hash: - try: - client = hydrus_wrapper.get_client(config) - if client: - client.associate_url(current_hash, hoster_url) - debug(f"Associated URL with file hash {current_hash}", file=sys.stderr) - except Exception as exc: - log(f"Could not associate URL with Hydrus file: {exc}", file=sys.stderr) - - except Exception as exc: - log(f"{provider_name} upload failed: {exc}", file=sys.stderr) - return 1 - - if delete_after_upload: - try: - media_path.unlink() - _cleanup_sidecar_files(media_path) - log(f"✅ Deleted file and sidecar", file=sys.stderr) - except Exception as exc: - log(f"⚠️ Could not delete file: {exc}", file=sys.stderr) - - return 0 - - # Handle storage-based operations (location is not None here) - valid_locations = {'hydrus', 'local', 'matrix'} - is_valid_location = location in valid_locations - is_local_path = not is_valid_location and ('/' in location or '\\' in location or ':' in location) - - if not (is_valid_location or is_local_path): - log(f"❌ Invalid location: {location}") - log(f"Valid options: 'hydrus', 'local', 'matrix', or a directory path") - return 1 - - if location == 'local': - try: - from config import get_local_storage_path - resolved_dir = get_local_storage_path(config) + if isinstance(pipe_obj.extra, dict): + url = list(pipe_obj.extra.get("url") or pipe_obj.extra.get("url") or []) except Exception: - resolved_dir = None + pass - if not resolved_dir: - resolved_dir = config.get("LocalDir") or config.get("OutputDir") + if not url and isinstance(result, dict): + url = list(result.get("url") or result.get("url") or []) + if not url: + url = list(extract_url_from_result(result) or []) + return url - if not resolved_dir: - log("❌ No local storage path configured. Set 'storage.local.path' in config.json", file=sys.stderr) - return 1 + @staticmethod + def _get_origin(result: Any, pipe_obj: models.PipeObject) -> Optional[str]: + try: + if isinstance(pipe_obj.extra, dict): + origin = get_origin(pipe_obj.extra) + if origin: + return origin + except Exception: + pass - debug(f"Moving into configured local library: {resolved_dir}", file=sys.stderr) - exit_code, dest_path = _handle_local_transfer(media_path, Path(resolved_dir), result, config) - - # After successful local transfer, emit result for pipeline continuation - # This allows downstream commands like add-tags to chain automatically - if exit_code == 0 and dest_path: - # Extract tags from result for emission - emit_tags = extract_tags_from_result(result) - file_hash = _resolve_file_hash(result, None, dest_path) - - # Extract title from original result, fallback to filename if not available - result_title = extract_title_from_result(result) or dest_path.name - - # Always emit result for local files, even if no tags - # This allows @N selection and piping to downstream commands - result_dict = create_pipe_object_result( - source='local', - identifier=str(dest_path), - file_path=str(dest_path), - cmdlet_name='add-file', - title=result_title, - file_hash=file_hash, - tags=emit_tags if emit_tags else [], - target=str(dest_path) # Explicit target for get-file - ) - ctx.emit(result_dict) - - # Clear the stage table so downstream @N doesn't try to re-run download-data - # Next stage will use these local file results, not format objects - ctx.set_current_stage_table(None) - - return exit_code - - elif is_local_path: + if isinstance(result, dict): + return get_origin(result) + return None + + @staticmethod + def _get_relationships(result: Any, pipe_obj: models.PipeObject) -> Optional[Dict[str, Any]]: + try: + rels = pipe_obj.get_relationships() + if rels: + return rels + except Exception: + pass + if isinstance(result, dict) and result.get("relationships"): + return result.get("relationships") + try: + return extract_relationships(result) + except Exception: + return None + + @staticmethod + def _get_duration(result: Any, pipe_obj: models.PipeObject) -> Optional[float]: + if getattr(pipe_obj, "duration", None) is not None: + return pipe_obj.duration + try: + return extract_duration(result) + except Exception: + return None + + @staticmethod + def _update_pipe_object_destination( + pipe_obj: models.PipeObject, + *, + hash: str, + store: str, + file_path: str, + tags: List[str], + title: Optional[str], + extra_updates: Optional[Dict[str, Any]] = None, + ) -> None: + pipe_obj.hash = hash + pipe_obj.store = store + pipe_obj.path = file_path + pipe_obj.tags = tags + if title: + pipe_obj.title = title + if isinstance(pipe_obj.extra, dict): + pipe_obj.extra.update(extra_updates or {}) + else: + pipe_obj.extra = dict(extra_updates or {}) + + @staticmethod + def _emit_pipe_object(pipe_obj: models.PipeObject) -> None: + from result_table import format_result + log(format_result(pipe_obj, title="Result"), file=sys.stderr) + ctx.emit(pipe_obj.to_dict()) + ctx.set_current_stage_table(None) + + @staticmethod + def _prepare_metadata( + result: Any, + media_path: Path, + pipe_obj: models.PipeObject, + config: Dict[str, Any], + ) -> Tuple[List[str], List[str], Optional[str], Optional[str]]: + """ + Prepare tags, url, and title for the file. + Returns (tags, url, preferred_title, file_hash) + """ + tags_from_result = list(pipe_obj.tags or []) + if not tags_from_result: + try: + tags_from_result = list(extract_tags_from_result(result) or []) + except Exception: + tags_from_result = [] + + url_from_result = Add_File._get_url(result, pipe_obj) + + preferred_title = pipe_obj.title + if not preferred_title: + for t in tags_from_result: + if str(t).strip().lower().startswith("title:"): + candidate = t.split(":", 1)[1].strip().replace("_", " ").strip() + if candidate: + preferred_title = candidate + break + if not preferred_title: + preferred_title = extract_title_from_result(result) + if preferred_title: + preferred_title = preferred_title.replace("_", " ").strip() + + result_origin = Add_File._get_origin(result, pipe_obj) + _, sidecar_hash, sidecar_tags, sidecar_url = Add_File._load_sidecar_bundle( + media_path, result_origin, config + ) + + def normalize_title_tag(tag: str) -> str: + if str(tag).strip().lower().startswith("title:"): + parts = tag.split(":", 1) + if len(parts) == 2: + value = parts[1].replace("_", " ").strip() + return f"title:{value}" + return tag + + tags_from_result_no_title = [t for t in tags_from_result if not str(t).strip().lower().startswith("title:")] + sidecar_tags = collapse_namespace_tags([normalize_title_tag(t) for t in sidecar_tags], "title", prefer="last") + sidecar_tags_filtered = [t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")] + + merged_tags = merge_sequences(tags_from_result_no_title, sidecar_tags_filtered, case_sensitive=True) + + if preferred_title: + merged_tags.append(f"title:{preferred_title}") + + merged_url = merge_sequences(url_from_result, sidecar_url, case_sensitive=False) + + file_hash = Add_File._resolve_file_hash(result, media_path, pipe_obj, sidecar_hash) + + # Persist back to PipeObject + pipe_obj.tags = merged_tags + if preferred_title and not pipe_obj.title: + pipe_obj.title = preferred_title + if file_hash and not pipe_obj.hash: + pipe_obj.hash = file_hash + if isinstance(pipe_obj.extra, dict): + pipe_obj.extra.setdefault("url", merged_url) + return merged_tags, merged_url, preferred_title, file_hash + + @staticmethod + def _handle_local_export( + media_path: Path, + location: str, + pipe_obj: models.PipeObject, + config: Dict[str, Any], + delete_after: bool, + ) -> int: + """Handle exporting to a specific local path (Copy).""" try: destination_root = Path(location) except Exception as exc: log(f"❌ Invalid destination path '{location}': {exc}", file=sys.stderr) return 1 - log(f"Moving to local path: {destination_root}", file=sys.stderr) - exit_code, dest_path = _handle_local_transfer(media_path, destination_root, result, config, export_mode=True) - - # After successful local transfer, emit result for pipeline continuation - if exit_code == 0 and dest_path: - # Extract tags from result for emission - emit_tags = extract_tags_from_result(result) - file_hash = _resolve_file_hash(result, None, dest_path) - - # Extract title from original result, fallback to filename if not available - result_title = extract_title_from_result(result) or dest_path.name - - # Always emit result for local files, even if no tags - # This allows @N selection and piping to downstream commands - result_dict = create_pipe_object_result( - source='local', - identifier=str(dest_path), - file_path=str(dest_path), - cmdlet_name='add-file', - title=result_title, - file_hash=file_hash, - tags=emit_tags if emit_tags else [], - target=str(dest_path) # Explicit target for get-file - ) - ctx.emit(result_dict) - - # Clear the stage table so downstream @N doesn't try to re-run download-data - # Next stage will use these local file results, not format objects - ctx.set_current_stage_table(None) - - return exit_code - - elif location == 'matrix': - log(f"Uploading to Matrix: {media_path.name}", file=sys.stderr) + log(f"Exporting to local path: {destination_root}", file=sys.stderr) + + result = None + tags, url, title, f_hash = Add_File._prepare_metadata(result, media_path, pipe_obj, config) + + # Determine Filename (Title-based) + title_value = title + if not title_value: + # Try to find title in tags + title_tag = next((t for t in tags if str(t).strip().lower().startswith("title:")), None) + if title_tag: + title_value = title_tag.split(":", 1)[1].strip() + + if not title_value: + title_value = media_path.stem.replace("_", " ").strip() + + safe_title = "".join(c for c in title_value if c.isalnum() or c in " ._-()[]{}'`").strip() + base_name = safe_title or media_path.stem + new_name = base_name + media_path.suffix + + destination_root.mkdir(parents=True, exist_ok=True) + target_path = destination_root / new_name + + if target_path.exists(): + target_path = unique_path(target_path) + + # COPY Operation (Safe Export) try: - result_url = storage["matrix"].upload(media_path, config=config) - log(f"Matrix: {result_url}", file=sys.stderr) - - result_dict = create_pipe_object_result( - source='matrix', - identifier=result_url, - file_path=str(media_path), - cmdlet_name='add-file', - title=media_path.name, - target=result_url - ) - ctx.emit(result_dict) - + shutil.copy2(str(media_path), target_path) except Exception as exc: - log(f"Failed: {exc}", file=sys.stderr) + log(f"❌ Failed to export file: {exc}", file=sys.stderr) return 1 - - if delete_after_upload: + + # Copy Sidecars + Add_File._copy_sidecars(media_path, target_path) + + # Ensure hash for exported copy + if not f_hash: try: - media_path.unlink() - _cleanup_sidecar_files(media_path) - log(f"✅ Deleted file and sidecar", file=sys.stderr) - except Exception as exc: - log(f"⚠️ Could not delete file: {exc}", file=sys.stderr) - + f_hash = sha256_file(target_path) + except Exception: + f_hash = None + + # Write Metadata Sidecars (since it's an export) + relationships = Add_File._get_relationships(result, pipe_obj) + try: + write_sidecar(target_path, tags, url, f_hash) + write_metadata(target_path, hash_value=f_hash, url=url, relationships=relationships or []) + except Exception: + pass + + # Update PipeObject and emit + extra_updates = { + "storage_source": "local", + "url": url, + "export_path": str(destination_root), + } + if relationships: + extra_updates["relationships"] = relationships + + chosen_title = title or title_value or pipe_obj.title or target_path.name + + Add_File._update_pipe_object_destination( + pipe_obj, + hash=f_hash or "unknown", + store="local", + file_path=str(target_path), + tags=tags, + title=chosen_title, + extra_updates=extra_updates, + ) + Add_File._emit_pipe_object(pipe_obj) + + # Cleanup + # Only delete if explicitly requested! + Add_File._cleanup_after_success(media_path, delete_source=delete_after) + return 0 - # location == 'hydrus' - # Compute file hash to check if already in Hydrus - log(f"Uploading to Hydrus: {media_path.name}", file=sys.stderr) - log(f"Computing SHA-256 hash for: {media_path.name}", file=sys.stderr) - try: - file_hash = sha256_file(media_path) - except Exception as exc: - log(f"❌ Failed to compute file hash: {exc}", file=sys.stderr) - return 1 - debug(f"File hash: {file_hash}", file=sys.stderr) + @staticmethod + def _handle_provider_upload( + media_path: Path, + provider_name: str, + pipe_obj: models.PipeObject, + config: Dict[str, Any], + delete_after: bool, + ) -> int: + """Handle uploading to a file provider (e.g. 0x0).""" + from helper.provider import get_file_provider - # Read sidecar tags and known URLs first (for tagging) + log(f"Uploading via {provider_name}: {media_path.name}", file=sys.stderr) - sidecar_path, hash_from_sidecar, sidecar_tags, sidecar_urls = _load_sidecar_bundle(media_path, origin=origin, config=config) - if sidecar_path: - log(f"Found sidecar at: {sidecar_path}", file=sys.stderr) - log(f"Read sidecar: hash={hash_from_sidecar}, {len(sidecar_tags)} tag(s), {len(sidecar_urls)} URL(s)", file=sys.stderr) - if sidecar_tags: - log(f"Sidecar tags: {sidecar_tags}", file=sys.stderr) - if sidecar_urls: - log(f"Sidecar URLs: {sidecar_urls}", file=sys.stderr) - else: - log(f"No sidecar found for {media_path.name}", file=sys.stderr) - - # Normalize all title tags to use spaces instead of underscores BEFORE merging - # This ensures that "Radiohead - Creep" and "Radiohead_-_Creep" are treated as the same title - def normalize_title_tag(tag: str) -> str: - """Normalize a title tag by replacing underscores with spaces.""" - if str(tag).strip().lower().startswith("title:"): - parts = tag.split(":", 1) - if len(parts) == 2: - value = parts[1].replace("_", " ").strip() - return f"title:{value}" - return tag - - sidecar_tags = [normalize_title_tag(t) for t in sidecar_tags] - pipe_object_tags = [normalize_title_tag(t) for t in pipe_object_tags] - - # Merge tags from PipeObject with tags from sidecar - # NOTE: Remove ALL existing title tags and use only filename-based title - # The filename is the source of truth for the title - tags_without_titles = [t for t in merge_sequences(sidecar_tags, pipe_object_tags, case_sensitive=True) - if not str(t).strip().lower().startswith("title:")] - - # Ensure ONE title tag based on the actual filename - filename_title = media_path.stem.replace("_", " ").strip() - if filename_title: - tags = [f"title:{filename_title}"] + tags_without_titles - else: - tags = tags_without_titles - - known_urls = merge_sequences(sidecar_urls, pipe_known_urls, case_sensitive=False) - - if pipe_object_tags: - log(f"Merged pipeline tags. Total tags now: {len(tags)}", file=sys.stderr) - - # Write metadata to file before uploading (only for local storage, not for Hydrus) - # Hydrus stores tags separately, so we don't need to modify the file - if location != 'hydrus': try: - if tags: - # Determine file kind from extension - file_kind = '' - sfx = media_path.suffix.lower() - if sfx in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}: - file_kind = 'audio' - elif sfx in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}: - file_kind = 'video' - - if embed_metadata_in_file(media_path, tags, file_kind): - log(f"Wrote metadata tags to file: {media_path.name}", file=sys.stderr) - else: - log(f"Note: Could not embed metadata in file (may not be supported format)", file=sys.stderr) - except Exception as exc: - log(f"Warning: Failed to write metadata to file: {exc}", file=sys.stderr) - else: - log(f"Note: Skipping FFmpeg metadata embedding for Hydrus (tags managed separately)", file=sys.stderr) + file_provider = get_file_provider(provider_name, config) + if not file_provider: + log(f"File provider '{provider_name}' not available", file=sys.stderr) + return 1 - # Use FileStorage backend to upload to Hydrus - try: - file_hash = storage["hydrus"].upload( - media_path, - config=config, - tags=tags, + hoster_url = file_provider.upload(str(media_path)) + log(f"File uploaded: {hoster_url}", file=sys.stderr) + + # Associate URL with Hydrus if possible + f_hash = Add_File._resolve_file_hash(None, media_path, pipe_obj, None) + if f_hash: + try: + client = hydrus_wrapper.get_client(config) + if client: + client.associate_url(f_hash, hoster_url) + except Exception: + pass + + except Exception as exc: + log(f"Upload failed: {exc}", file=sys.stderr) + return 1 + + # Update PipeObject and emit + extra_updates: Dict[str, Any] = { + "provider": provider_name, + "provider_url": hoster_url, + } + if isinstance(pipe_obj.extra, dict): + # Also track hoster URL as a url for downstream steps + existing_known = list(pipe_obj.extra.get("url") or []) + if hoster_url and hoster_url not in existing_known: + existing_known.append(hoster_url) + extra_updates["url"] = existing_known + + file_path = pipe_obj.path or (str(media_path) if media_path else None) or "" + Add_File._update_pipe_object_destination( + pipe_obj, + hash=f_hash or "unknown", + store=provider_name or "provider", + file_path=file_path, + tags=pipe_obj.tags, + title=pipe_obj.title or (media_path.name if media_path else None), + extra_updates=extra_updates, ) - log(f"Hydrus: {file_hash}", file=sys.stderr) - except Exception as exc: - log(f"Failed: {exc}", file=sys.stderr) - return 1 + Add_File._emit_pipe_object(pipe_obj) - # Associate known URLs in Hydrus metadata - url_count = 0 - if known_urls: - try: - client = hydrus_wrapper.get_client(config) - if client: - for url in known_urls: - u = str(url or "").strip() - if not u: - continue - try: - client.associate_url(file_hash, u) - except Exception as exc: - log(f"Hydrus associate-url failed for {u}: {exc}", file=sys.stderr) - continue - url_count += 1 - except Exception as exc: - log(f"Failed to associate URLs: {exc}", file=sys.stderr) - - if url_count: - log(f"✅ Associated {url_count} URL(s)", file=sys.stderr) - else: - log(f"No URLs to associate", file=sys.stderr) + Add_File._cleanup_after_success(media_path, delete_source=delete_after) + return 0 - _cleanup_sidecar_files(media_path, sidecar_path) - - # Update in-memory result for downstream pipes - try: - # Only update piped result objects; direct -path usage may have a dummy result - setattr(result, "hash_hex", file_hash) - # Preserve media_kind for downstream commands (e.g., open) - if not hasattr(result, "media_kind") or getattr(result, "media_kind") == "other": - # Try to infer media_kind from file extension or keep existing - suffix = media_path.suffix.lower() - if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.rtf', '.md', '.html', '.htm', '.doc', '.docx'}: - setattr(result, "media_kind", "document") - if hasattr(result, "columns") and isinstance(getattr(result, "columns"), list): - cols = list(getattr(result, "columns")) - if ("Hash", file_hash) not in cols: - cols.append(("Hash", file_hash)) - setattr(result, "columns", cols) - except Exception: - pass - - # If -delete flag is set, delete the file and .tags after successful upload - # Also delete if the file is a temporary file from merge-file (contains .dlhx_ or (merged)) - is_temp_merge = "(merged)" in media_path.name or ".dlhx_" in media_path.name - - if delete_after_upload or is_temp_merge: - log(f"Deleting local files (as requested or temp file)...", file=sys.stderr) - try: - media_path.unlink() - log(f"✅ Deleted: {media_path.name}", file=sys.stderr) - except OSError as exc: - log(f"Failed to delete file: {exc}", file=sys.stderr) + @staticmethod + def _handle_storage_backend( + media_path: Path, + backend_name: str, + pipe_obj: models.PipeObject, + config: Dict[str, Any], + delete_after: bool, + ) -> int: + """Handle uploading to a registered storage backend (e.g., 'test' folder store, 'hydrus', etc.).""" + from config import load_config - # Delete .tags sidecar if it exists - if sidecar_path is not None: + log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr) + + try: + cfg = load_config() + storage = FileStorage(cfg) + backend = storage[backend_name] + + # Prepare metadata from pipe_obj and sidecars + tags, url, title, f_hash = Add_File._prepare_metadata(None, media_path, pipe_obj, config) + + # Call backend's add_file with full metadata + # Backend returns hash as identifier + file_identifier = backend.add_file( + media_path, + title=title, + tags=tags, + url=url + ) + log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr) + + # Update pipe object with result + # For backends that return paths, file_path = identifier + # For backends that return hashes, file_path = "backend:hash" + file_path_str = str(file_identifier) + if len(file_identifier) == 64 and all(c in '0123456789abcdef' for c in file_identifier.lower()): + # It's a hash - use backend:hash format + file_path_str = f"{backend_name}:{file_identifier}" + + Add_File._update_pipe_object_destination( + pipe_obj, + hash=file_identifier if len(file_identifier) == 64 else f_hash or "unknown", + store=backend_name, + file_path=file_path_str, + tags=tags, + title=title or pipe_obj.title or media_path.name, + extra_updates={ + "storage_source": backend_name, + "url": url, + }, + ) + Add_File._emit_pipe_object(pipe_obj) + + Add_File._cleanup_after_success(media_path, delete_source=delete_after) + return 0 + + except Exception as exc: + log(f"❌ Failed to add file to backend '{backend_name}': {exc}", file=sys.stderr) + import traceback + traceback.print_exc(file=sys.stderr) + return 1 + + # --- Helpers --- + + @staticmethod + def _load_sidecar_bundle( + media_path: Path, + origin: Optional[str], + config: Dict[str, Any], + ) -> Tuple[Optional[Path], Optional[str], List[str], List[str]]: + """Load sidecar metadata.""" + if origin and origin.lower() == "local": try: - sidecar_path.unlink() - log(f"✅ Deleted: {sidecar_path.name}", file=sys.stderr) - except OSError as exc: - log(f"Failed to delete sidecar: {exc}", file=sys.stderr) - - # Decide whether to surface search-file results at end of pipeline - stage_ctx = ctx.get_stage_context() - is_storage_target = location is not None - should_display = is_storage_target and (stage_ctx is None or stage_ctx.is_last_stage) + from config import get_local_storage_path + db_root = get_local_storage_path(config) + if db_root: + with FolderDB(Path(db_root)) as db: + file_hash = db.get_file_hash(media_path) + if file_hash: + tags = db.get_tags(file_hash) or [] + metadata = db.get_metadata(file_hash) or {} + url = metadata.get("url") or [] + f_hash = metadata.get("hash") or file_hash + if tags or url or f_hash: + return None, f_hash, tags, url + except Exception: + pass - if (not should_display) or not file_hash: - log(f"Successfully completed: {media_path.name} (hash={file_hash})", file=sys.stderr) - - # Emit result for Hydrus uploads so downstream commands know about it - if location == 'hydrus': - # Extract title from original result, fallback to filename if not available - result_title = extract_title_from_result(result) or media_path.name - - result_dict = create_pipe_object_result( - source='hydrus', - identifier=file_hash, - file_path=f"hydrus:{file_hash}", - cmdlet_name='add-file', - title=result_title, - file_hash=file_hash, - extra={ - 'storage_source': 'hydrus', - 'hydrus_hash': file_hash, - 'tags': tags, - 'known_urls': known_urls, - } - ) - ctx.emit(result_dict) - - # Clear the stage table so downstream @N doesn't try to re-run download-data - # Next stage will use these Hydrus file results, not format objects - ctx.set_current_stage_table(None) - - # If this is the last stage (or not in a pipeline), show the file via search-file - if should_display and file_hash: try: - from cmdlets import search_file as search_cmdlet - search_cmdlet._run(None, [f"hash:{file_hash}"], config) + sidecar_path = find_sidecar(media_path) + if sidecar_path and sidecar_path.exists(): + h, t, u = read_sidecar(sidecar_path) + return sidecar_path, h, t or [], u or [] except Exception: - debug("search-file lookup after add-file failed", file=sys.stderr) - elif file_hash: - # Not displaying search results here, so report completion normally - log(f"Successfully completed: {media_path.name} (hash={file_hash})", file=sys.stderr) - - return 0 + pass + return None, None, [], [] -CMDLET = Cmdlet( - name="add-file", - summary="Upload a media file to specified location (Hydrus, file provider, or local directory).", - usage="add-file (-path | ) (-storage | -provider ) [-delete]", - args=[ - CmdletArg(name="path", type="str", required=False, description="Direct file path to upload (alternative to piped result)", alias="p"), - SharedArgs.STORAGE, # For hydrus, local, or directory paths - CmdletArg(name="provider", type="str", required=False, description="File hosting provider (e.g., 0x0 for 0x0.st)", alias="prov"), - CmdletArg(name="delete", type="flag", required=False, description="Delete the file and its .tags after successful upload.", alias="del"), - ], - details=[ - "- Storage location options (use -storage):", - " hydrus: Upload to Hydrus database with metadata tagging", - " local: Copy file to local directory", - " : Copy file to specified directory", - "- File provider options (use -provider):", - " 0x0: Upload to 0x0.st for temporary hosting with public URL", - "- Accepts files from official Hydrus supported types: images, animations, videos, audio, applications, projects, and archives.", - "- When uploading to Hydrus: adds tags from .tags sidecar and associates known_urls", - "- When using file provider: uploads to service, adds URL to sidecar", - "- When copying locally: copies file with original metadata preserved", - "- Use -delete flag to automatically delete the file and .tags after successful operation.", - ], -) \ No newline at end of file + @staticmethod + def _resolve_file_hash( + result: Any, + media_path: Path, + pipe_obj: models.PipeObject, + fallback_hash: Optional[str], + ) -> Optional[str]: + if pipe_obj.hash and pipe_obj.hash != "unknown": + return pipe_obj.hash + if fallback_hash: + return fallback_hash + + if isinstance(result, dict): + candidate = result.get('hash') + if candidate: + return str(candidate) + + try: + return sha256_file(media_path) + except Exception: + return None + + @staticmethod + def _resolve_media_kind(path: Path) -> str: + # Reusing logic + suffix = path.suffix.lower() + if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka'}: + return 'audio' + if suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}: + return 'video' + if suffix in {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'}: + return 'image' + if suffix in {'.pdf', '.epub', '.txt', '.mobi', '.azw3', '.cbz', '.cbr', '.doc', '.docx'}: + return 'document' + return 'other' + + @staticmethod + def _persist_local_metadata( + library_root: Path, + dest_path: Path, + tags: List[str], + url: List[str], + f_hash: Optional[str], + relationships: Any, + duration: Any, + media_kind: str, + ): + payload = { + 'hash': f_hash, + 'url': url, + 'relationships': relationships or [], + 'duration': duration, + 'size': None, + 'ext': dest_path.suffix.lower(), + 'media_type': media_kind, + 'media_kind': media_kind, + } + try: + payload['size'] = dest_path.stat().st_size + except OSError: + payload['size'] = None + + with FolderDB(library_root) as db: + try: + db.save_file_info(dest_path, payload, tags) + except Exception as exc: + log(f"⚠️ Failed to persist metadata: {exc}", file=sys.stderr) + + @staticmethod + def _copy_sidecars(source_path: Path, target_path: Path): + possible_sidecars = [ + source_path.with_suffix(source_path.suffix + ".json"), + source_path.with_name(source_path.name + ".tag"), + source_path.with_name(source_path.name + ".tags"), + source_path.with_name(source_path.name + ".tags.txt"), + source_path.with_name(source_path.name + ".metadata"), + source_path.with_name(source_path.name + ".notes"), + ] + for sc in possible_sidecars: + try: + if sc.exists(): + suffix_part = sc.name.replace(source_path.name, "", 1) + dest_sidecar = target_path.parent / f"{target_path.name}{suffix_part}" + dest_sidecar.parent.mkdir(parents=True, exist_ok=True) + shutil.copy2(str(sc), dest_sidecar) + except Exception: + pass + + @staticmethod + def _cleanup_after_success(media_path: Path, delete_source: bool): + if not delete_source: + return + + # Check if it's a temp file that should always be deleted + is_temp_merge = "(merged)" in media_path.name or ".dlhx_" in media_path.name + + if delete_source or is_temp_merge: + log(f"Deleting source file...", file=sys.stderr) + try: + media_path.unlink() + Add_File._cleanup_sidecar_files(media_path) + except Exception as exc: + log(f"⚠️ Could not delete file: {exc}", file=sys.stderr) + + @staticmethod + def _cleanup_sidecar_files(media_path: Path): + targets = [ + media_path.parent / (media_path.name + '.metadata'), + media_path.parent / (media_path.name + '.notes'), + media_path.parent / (media_path.name + '.tag'), + media_path.parent / (media_path.name + '.tags'), + media_path.parent / (media_path.name + '.tags.txt'), + ] + for target in targets: + try: + if target.exists(): + target.unlink() + except Exception: + pass + + +# Create and register the cmdlet +CMDLET = Add_File() diff --git a/cmdlets/add_note.py b/cmdlets/add_note.py index cddd729..fbc87f0 100644 --- a/cmdlets/add_note.py +++ b/cmdlets/add_note.py @@ -7,19 +7,19 @@ from . import register import models import pipeline as ctx from helper import hydrus as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, normalize_hash +from ._shared import Cmdlet, CmdletArg, normalize_hash, should_show_help from helper.logger import log CMDLET = Cmdlet( name="add-note", summary="Add or set a note on a Hydrus file.", usage="add-note [-hash ] ", - args=[ + arg=[ CmdletArg("hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), CmdletArg("name", type="string", required=True, description="The note name/key to set (e.g. 'comment', 'source', etc.)."), CmdletArg("text", type="string", required=True, description="The note text/content to store.", variadic=True), ], - details=[ + detail=[ "- Notes are stored in the 'my notes' service by default.", ], ) @@ -28,12 +28,9 @@ CMDLET = Cmdlet( @register(["add-note", "set-note", "add_note"]) # aliases def add(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Help - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - except Exception: - pass + if should_show_help(args): + log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + return 0 from ._shared import parse_cmdlet_args parsed = parse_cmdlet_args(args, CMDLET) diff --git a/cmdlets/add_relationship.py b/cmdlets/add_relationship.py index 8ed80c1..935b135 100644 --- a/cmdlets/add_relationship.py +++ b/cmdlets/add_relationship.py @@ -14,20 +14,20 @@ from . import register import models import pipeline as ctx from helper import hydrus as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input -from helper.local_library import read_sidecar, find_sidecar +from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, should_show_help, get_field +from helper.folder_store import read_sidecar, find_sidecar CMDLET = Cmdlet( name="add-relationship", summary="Associate file relationships (king/alt/related) in Hydrus based on relationship tags in sidecar.", usage="@1-3 | add-relationship -king @4 OR add-relationship -path OR @1,@2,@3 | add-relationship", - args=[ + arg=[ CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."), CmdletArg("-king", type="string", description="Explicitly set the king hash/file for relationships (e.g., -king @4 or -king hash)"), CmdletArg("-type", type="string", description="Relationship type for piped items (default: 'alt', options: 'king', 'alt', 'related')"), ], - details=[ + detail=[ "- Mode 1: Pipe multiple items, first becomes king, rest become alts (default)", "- Mode 2: Use -king to explicitly set which item/hash is the king: @1-3 | add-relationship -king @4", "- Mode 3: Read relationships from sidecar (format: 'relationship: hash(king),hash(alt)...')", @@ -108,13 +108,11 @@ def _resolve_king_reference(king_arg: str) -> Optional[str]: item = items[index] # Try to extract hash from the item (could be dict or object) - item_hash = None - if isinstance(item, dict): - # Dictionary: try common hash field names - item_hash = item.get('hash_hex') or item.get('hash') or item.get('file_hash') - else: - # Object: use getattr - item_hash = getattr(item, 'hash_hex', None) or getattr(item, 'hash', None) + item_hash = ( + get_field(item, 'hash_hex') + or get_field(item, 'hash') + or get_field(item, 'file_hash') + ) if item_hash: normalized = _normalise_hash_hex(item_hash) @@ -122,13 +120,11 @@ def _resolve_king_reference(king_arg: str) -> Optional[str]: return normalized # If no hash, try to get file path (for local storage) - file_path = None - if isinstance(item, dict): - # Dictionary: try common path field names - file_path = item.get('file_path') or item.get('path') or item.get('target') - else: - # Object: use getattr - file_path = getattr(item, 'file_path', None) or getattr(item, 'path', None) or getattr(item, 'target', None) + file_path = ( + get_field(item, 'file_path') + or get_field(item, 'path') + or get_field(item, 'target') + ) if file_path: return str(file_path) @@ -199,12 +195,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: Returns 0 on success, non-zero on failure. """ # Help - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - except Exception: - pass + if should_show_help(_args): + log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + return 0 # Parse arguments using CMDLET spec parsed = parse_cmdlet_args(_args, CMDLET) @@ -235,7 +228,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: items_to_process = [{"file_path": arg_path}] # Import local storage utilities - from helper.local_library import LocalLibrarySearchOptimizer + from helper.folder_store import LocalLibrarySearchOptimizer from config import get_local_storage_path local_storage_path = get_local_storage_path(config) if config else None diff --git a/cmdlets/add_tag.py b/cmdlets/add_tag.py new file mode 100644 index 0000000..6daac33 --- /dev/null +++ b/cmdlets/add_tag.py @@ -0,0 +1,567 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Sequence, Optional +from pathlib import Path +import sys + +from helper.logger import log + +import models +import pipeline as ctx +from ._shared import normalize_result_input, filter_results_by_temp +from helper import hydrus as hydrus_wrapper +from helper.folder_store import write_sidecar, FolderDB +from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, expand_tag_groups, parse_cmdlet_args, collapse_namespace_tags, should_show_help, get_field +from config import get_local_storage_path + + + +class Add_Tag(Cmdlet): + """Class-based add-tag cmdlet with Cmdlet metadata inheritance.""" + + def __init__(self) -> None: + super().__init__( + name="add-tag", + summary="Add a tag to a Hydrus file or write it to a local .tags sidecar.", + usage="add-tag [-hash ] [-store ] [-duplicate ] [-list [,...]] [--all] [,...]", + arg=[ + SharedArgs.HASH, + SharedArgs.STORE, + CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"), + CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."), + CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."), + CmdletArg("tags", type="string", required=False, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tags from pipeline payload.", variadic=True), + ], + detail=[ + "- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.", + "- Without -hash and when the selection is a local file, tags are written to .tags.", + "- With a Hydrus hash, tags are sent to the 'my tags' service.", + "- Multiple tags can be comma-separated or space-separated.", + "- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult", + "- Tags can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"", + "- Use -duplicate to copy EXISTING tag values to new namespaces:", + " Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)", + " Inferred format: -duplicate title,album,artist (first is source, rest are targets)", + "- The source namespace must already exist in the file being tagged.", + "- Target namespaces that already have a value are skipped (not overwritten).", + "- You can also pass the target hash as a tag token: hash:. This overrides -hash and is removed from the tag list.", + ], + exec=self.run, + ) + self.register() + + @staticmethod + def _extract_title_tag(tags: List[str]) -> Optional[str]: + """Return the value of the first title: tag if present.""" + for tag in tags: + if isinstance(tag, str) and tag.lower().startswith("title:"): + value = tag.split(":", 1)[1].strip() + if value: + return value + return None + + @staticmethod + def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None: + """Update result object/dict title fields and columns in-place.""" + if not title_value: + return + if isinstance(res, models.PipeObject): + res.title = title_value + if hasattr(res, "columns") and isinstance(res.columns, list) and res.columns: + label, *_ = res.columns[0] + if str(label).lower() == "title": + res.columns[0] = (res.columns[0][0], title_value) + elif isinstance(res, dict): + res["title"] = title_value + cols = res.get("columns") + if isinstance(cols, list): + updated = [] + changed = False + for col in cols: + if isinstance(col, tuple) and len(col) == 2: + label, val = col + if str(label).lower() == "title": + updated.append((label, title_value)) + changed = True + else: + updated.append(col) + else: + updated.append(col) + if changed: + res["columns"] = updated + + @staticmethod + def _matches_target(item: Any, hydrus_hash: Optional[str], file_hash: Optional[str], file_path: Optional[str]) -> bool: + """Determine whether a result item refers to the given hash/path target.""" + hydrus_hash_l = hydrus_hash.lower() if hydrus_hash else None + file_hash_l = file_hash.lower() if file_hash else None + file_path_l = file_path.lower() if file_path else None + + def norm(val: Any) -> Optional[str]: + return str(val).lower() if val is not None else None + + hash_fields = ["hydrus_hash", "hash", "hash_hex", "file_hash"] + path_fields = ["path", "file_path", "target"] + + if isinstance(item, dict): + hashes = [norm(item.get(field)) for field in hash_fields] + paths = [norm(item.get(field)) for field in path_fields] + else: + hashes = [norm(get_field(item, field)) for field in hash_fields] + paths = [norm(get_field(item, field)) for field in path_fields] + + if hydrus_hash_l and hydrus_hash_l in hashes: + return True + if file_hash_l and file_hash_l in hashes: + return True + if file_path_l and file_path_l in paths: + return True + return False + + @staticmethod + def _update_item_title_fields(item: Any, new_title: str) -> None: + """Mutate an item to reflect a new title in plain fields and columns.""" + if isinstance(item, models.PipeObject): + item.title = new_title + if hasattr(item, "columns") and isinstance(item.columns, list) and item.columns: + label, *_ = item.columns[0] + if str(label).lower() == "title": + item.columns[0] = (label, new_title) + elif isinstance(item, dict): + item["title"] = new_title + cols = item.get("columns") + if isinstance(cols, list): + updated_cols = [] + changed = False + for col in cols: + if isinstance(col, tuple) and len(col) == 2: + label, val = col + if str(label).lower() == "title": + updated_cols.append((label, new_title)) + changed = True + else: + updated_cols.append(col) + else: + updated_cols.append(col) + if changed: + item["columns"] = updated_cols + + def _refresh_result_table_title(self, new_title: str, hydrus_hash: Optional[str], file_hash: Optional[str], file_path: Optional[str]) -> None: + """Refresh the cached result table with an updated title and redisplay it.""" + try: + last_table = ctx.get_last_result_table() + items = ctx.get_last_result_items() + if not last_table or not items: + return + + updated_items = [] + match_found = False + for item in items: + try: + if self._matches_target(item, hydrus_hash, file_hash, file_path): + self._update_item_title_fields(item, new_title) + match_found = True + except Exception: + pass + updated_items.append(item) + if not match_found: + return + + from result_table import ResultTable # Local import to avoid circular dependency + + new_table = last_table.copy_with_title(getattr(last_table, "title", "")) + + for item in updated_items: + new_table.add_result(item) + + ctx.set_last_result_table_overlay(new_table, updated_items) + except Exception: + pass + + def _refresh_tags_view(self, res: Any, hydrus_hash: Optional[str], file_hash: Optional[str], file_path: Optional[str], config: Dict[str, Any]) -> None: + """Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh.""" + try: + from cmdlets import get_tag as get_tag_cmd # type: ignore + except Exception: + return + + target_hash = hydrus_hash or file_hash + refresh_args: List[str] = [] + if target_hash: + refresh_args = ["-hash", target_hash, "-store", target_hash] + + try: + subject = ctx.get_last_result_subject() + if subject and self._matches_target(subject, hydrus_hash, file_hash, file_path): + get_tag_cmd._run(subject, refresh_args, config) + return + except Exception: + pass + + if target_hash: + try: + get_tag_cmd._run(res, refresh_args, config) + except Exception: + pass + + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Add a tag to a file with smart filtering for pipeline results.""" + if should_show_help(args): + log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}") + return 0 + + parsed = parse_cmdlet_args(args, self) + + # Check for --all flag + include_temp = parsed.get("all", False) + + # Get explicit -hash and -store overrides from CLI + hash_override = normalize_hash(parsed.get("hash")) + store_override = parsed.get("store") or parsed.get("storage") + + # Normalize input to list + results = normalize_result_input(result) + + # If no piped results but we have -hash flag, create a minimal synthetic result + if not results and hash_override: + results = [{"hash": hash_override, "is_temp": False}] + if store_override: + results[0]["store"] = store_override + + # Filter by temp status (unless --all is set) + if not include_temp: + results = filter_results_by_temp(results, include_temp=False) + + if not results: + log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr) + return 1 + + # Get tags from arguments (or fallback to pipeline payload) + raw_tags = parsed.get("tags", []) + if isinstance(raw_tags, str): + raw_tags = [raw_tags] + + # Fallback: if no tags provided explicitly, try to pull from first result payload + if not raw_tags and results: + first = results[0] + payload_tags = None + # Try multiple tag lookup strategies in order + tag_lookups = [ + lambda x: x.extra.get("tags") if isinstance(x, models.PipeObject) and isinstance(x.extra, dict) else None, + lambda x: x.get("tags") if isinstance(x, dict) else None, + lambda x: x.get("extra", {}).get("tags") if isinstance(x, dict) and isinstance(x.get("extra"), dict) else None, + lambda x: getattr(x, "tags", None), + ] + for lookup in tag_lookups: + try: + payload_tags = lookup(first) + if payload_tags: + break + except (AttributeError, TypeError, KeyError): + continue + if payload_tags: + if isinstance(payload_tags, str): + raw_tags = [payload_tags] + elif isinstance(payload_tags, list): + raw_tags = payload_tags + + # Handle -list argument (convert to {list} syntax) + list_arg = parsed.get("list") + if list_arg: + for l in list_arg.split(','): + l = l.strip() + if l: + raw_tags.append(f"{{{l}}}") + + # Parse and expand tags + tags_to_add = parse_tag_arguments(raw_tags) + tags_to_add = expand_tag_groups(tags_to_add) + + # Allow hash override via namespaced token (e.g., "hash:abcdef...") + extracted_hash = None + filtered_tags: List[str] = [] + for tag in tags_to_add: + if isinstance(tag, str) and tag.lower().startswith("hash:"): + _, _, hash_val = tag.partition(":") + if hash_val: + extracted_hash = normalize_hash(hash_val.strip()) + continue + filtered_tags.append(tag) + tags_to_add = filtered_tags + + if not tags_to_add: + log("No tags provided to add", file=sys.stderr) + return 1 + + def _find_library_root(path_obj: Path) -> Optional[Path]: + candidates = [] + cfg_root = get_local_storage_path(config) if config else None + if cfg_root: + try: + candidates.append(Path(cfg_root).expanduser()) + except Exception: + pass + try: + for candidate in candidates: + if (candidate / "medios-macina.db").exists(): + return candidate + for parent in [path_obj] + list(path_obj.parents): + if (parent / "medios-macina.db").exists(): + return parent + except Exception: + pass + return None + + # Get other flags + duplicate_arg = parsed.get("duplicate") + + if not tags_to_add and not duplicate_arg: + # Write sidecar files with the tags that are already in the result dicts + sidecar_count = 0 + for res in results: + # Handle both dict and PipeObject formats + file_path = None + tags = [] + file_hash = "" + # Use canonical field access with get_field for both dict and objects + file_path = get_field(res, "path") + # Try tags from top-level 'tags' or from 'extra.tags' + tags = get_field(res, "tags") or (get_field(res, "extra") or {}).get("tags", []) + file_hash = get_field(res, "hash") or get_field(res, "file_hash") or get_field(res, "hash_hex") or "" + if not file_path: + log(f"[add_tag] Warning: Result has no path, skipping", file=sys.stderr) + ctx.emit(res) + continue + if tags: + # Write sidecar file for this file with its tags + try: + sidecar_path = write_sidecar(Path(file_path), tags, [], file_hash) + log(f"[add_tag] Wrote {len(tags)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr) + sidecar_count += 1 + except Exception as e: + log(f"[add_tag] Warning: Failed to write sidecar for {file_path}: {e}", file=sys.stderr) + ctx.emit(res) + if sidecar_count > 0: + log(f"[add_tag] Wrote {sidecar_count} sidecar file(s) with embedded tags", file=sys.stderr) + else: + log(f"[add_tag] No tags to write - passed {len(results)} result(s) through unchanged", file=sys.stderr) + return 0 + + # Main loop: process results with tags to add + total_new_tags = 0 + total_modified = 0 + for res in results: + # Extract file info from result + file_path = None + existing_tags = [] + file_hash = "" + storage_source = None + + # Use canonical getters for fields from both dicts and PipeObject + file_path = get_field(res, "path") + existing_tags = get_field(res, "tags") or [] + if not existing_tags: + existing_tags = (get_field(res, "extra", {}) or {}).get("tags") or [] + file_hash = get_field(res, "hash") or get_field(res, "file_hash") or get_field(res, "hash_hex") or "" + storage_source = get_field(res, "store") or get_field(res, "storage") or get_field(res, "storage_source") or get_field(res, "origin") + hydrus_hash = get_field(res, "hydrus_hash") or file_hash + + # Infer storage source from result if not found + if not storage_source: + if file_path: + storage_source = 'local' + elif file_hash and file_hash != "unknown": + storage_source = 'hydrus' + + original_tags_lower = {str(t).lower() for t in existing_tags if isinstance(t, str)} + original_title = self._extract_title_tag(list(existing_tags)) + + # Apply CLI overrides if provided + if hash_override and not file_hash: + file_hash = hash_override + if store_override and not storage_source: + storage_source = store_override + + # Check if we have sufficient identifier (file_path OR file_hash) + if not file_path and not file_hash: + log(f"[add_tag] Warning: Result has neither path nor hash available, skipping", file=sys.stderr) + ctx.emit(res) + continue + # Handle -duplicate logic (copy existing tags to new namespaces) + if duplicate_arg: + # Parse duplicate format: source:target1,target2 or source,target1,target2 + parts = duplicate_arg.split(':') + source_ns = "" + targets = [] + if len(parts) > 1: + # Explicit format: source:target1,target2 + source_ns = parts[0] + targets = parts[1].split(',') + else: + # Inferred format: source,target1,target2 + parts = duplicate_arg.split(',') + if len(parts) > 1: + source_ns = parts[0] + targets = parts[1:] + if source_ns and targets: + # Find tags in source namespace + source_tags = [t for t in existing_tags if t.startswith(source_ns + ':')] + for t in source_tags: + value = t.split(':', 1)[1] + for target_ns in targets: + new_tag = f"{target_ns}:{value}" + if new_tag not in existing_tags and new_tag not in tags_to_add: + tags_to_add.append(new_tag) + + # Initialize tag mutation tracking local variables + removed_tags = [] + new_tags_added = [] + final_tags = list(existing_tags) if existing_tags else [] + + # Determine where to add tags: Hydrus or Folder storage + if storage_source and storage_source.lower() == 'hydrus': + # Add tags to Hydrus using the API + target_hash = file_hash + if target_hash: + try: + hydrus_client = hydrus_wrapper.get_client(config) + service_name = hydrus_wrapper.get_tag_service_name(config) + + # For namespaced tags, remove old tags in same namespace + removed_tags = [] + for new_tag in tags_to_add: + if ':' in new_tag: + namespace = new_tag.split(':', 1)[0] + to_remove = [t for t in existing_tags if t.startswith(namespace + ':') and t.lower() != new_tag.lower()] + removed_tags.extend(to_remove) + + # Add new tags + if tags_to_add: + log(f"[add_tag] Adding {len(tags_to_add)} tag(s) to Hydrus file: {target_hash}", file=sys.stderr) + hydrus_client.add_tags(target_hash, tags_to_add, service_name) + + # Delete replaced namespace tags + if removed_tags: + unique_removed = sorted(set(removed_tags)) + hydrus_client.delete_tags(target_hash, unique_removed, service_name) + + if tags_to_add or removed_tags: + total_new_tags += len(tags_to_add) + total_modified += 1 + log(f"[add_tag] ✓ Added {len(tags_to_add)} tag(s) to Hydrus", file=sys.stderr) + # Refresh final tag list from the backend for accurate display + try: + from helper.store import FileStorage + storage = FileStorage(config) + if storage and storage_source in storage.list_backends(): + backend = storage[storage_source] + refreshed_tags, _ = backend.get_tag(target_hash) + if refreshed_tags is not None: + final_tags = refreshed_tags + new_tags_added = [t for t in refreshed_tags if t.lower() not in original_tags_lower] + # Update result tags for downstream cmdlets/UI + if isinstance(res, models.PipeObject): + res.tags = refreshed_tags + if isinstance(res.extra, dict): + res.extra['tags'] = refreshed_tags + elif isinstance(res, dict): + res['tags'] = refreshed_tags + except Exception: + # Ignore failures - this is best-effort for refreshing tag state + pass + except Exception as e: + log(f"[add_tag] Warning: Failed to add tags to Hydrus: {e}", file=sys.stderr) + else: + log(f"[add_tag] Warning: No hash available for Hydrus file, skipping", file=sys.stderr) + elif storage_source: + # For any Folder-based storage (local, test, default, etc.), delegate to backend + # If storage_source is not a registered backend, fallback to writing a sidecar + from helper.store import FileStorage + storage = FileStorage(config) + try: + if storage and storage_source in storage.list_backends(): + backend = storage[storage_source] + if file_hash and backend.add_tag(file_hash, tags_to_add): + # Refresh tags from backend to get merged result + refreshed_tags, _ = backend.get_tag(file_hash) + if refreshed_tags: + # Update result tags + if isinstance(res, models.PipeObject): + res.tags = refreshed_tags + # Also keep as extra for compatibility + if isinstance(res.extra, dict): + res.extra['tags'] = refreshed_tags + elif isinstance(res, dict): + res['tags'] = refreshed_tags + + # Update title if changed + title_value = self._extract_title_tag(refreshed_tags) + self._apply_title_to_result(res, title_value) + + # Compute stats + new_tags_added = [t for t in refreshed_tags if t.lower() not in original_tags_lower] + total_new_tags += len(new_tags_added) + if new_tags_added: + total_modified += 1 + + log(f"[add_tag] Added {len(new_tags_added)} new tag(s); {len(refreshed_tags)} total tag(s) stored in {storage_source}", file=sys.stderr) + final_tags = refreshed_tags + else: + log(f"[add_tag] Warning: Failed to add tags to {storage_source}", file=sys.stderr) + else: + # Not a registered backend - fallback to sidecar if we have a path + if file_path: + try: + sidecar_path = write_sidecar(Path(file_path), tags_to_add, [], file_hash) + log(f"[add_tag] Wrote {len(tags_to_add)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr) + total_new_tags += len(tags_to_add) + total_modified += 1 + # Update res tags + if isinstance(res, models.PipeObject): + res.tags = (res.tags or []) + tags_to_add + if isinstance(res.extra, dict): + res.extra['tags'] = res.tags + elif isinstance(res, dict): + res['tags'] = list(set((res.get('tags') or []) + tags_to_add)) + except Exception as exc: + log(f"[add_tag] Warning: Failed to write sidecar for {file_path}: {exc}", file=sys.stderr) + else: + log(f"[add_tag] Warning: Storage backend '{storage_source}' not found in config", file=sys.stderr) + except KeyError: + # storage[storage_source] raised KeyError - treat as absent backend + if file_path: + try: + sidecar_path = write_sidecar(Path(file_path), tags_to_add, [], file_hash) + log(f"[add_tag] Wrote {len(tags_to_add)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr) + total_new_tags += len(tags_to_add) + total_modified += 1 + # Update res tags for downstream + if isinstance(res, models.PipeObject): + res.tags = (res.tags or []) + tags_to_add + if isinstance(res.extra, dict): + res.extra['tags'] = res.tags + elif isinstance(res, dict): + res['tags'] = list(set((res.get('tags') or []) + tags_to_add)) + except Exception as exc: + log(f"[add_tag] Warning: Failed to write sidecar for {file_path}: {exc}", file=sys.stderr) + else: + log(f"[add_tag] Warning: Storage backend '{storage_source}' not found in config", file=sys.stderr) + else: + # For other storage types or unknown sources, avoid writing sidecars to reduce clutter + # (local/hydrus are handled above). + ctx.emit(res) + continue + # If title changed, refresh the cached result table so the display reflects the new name + final_title = self._extract_title_tag(final_tags) + if final_title and (not original_title or final_title.lower() != original_title.lower()): + self._refresh_result_table_title(final_title, hydrus_hash or file_hash, file_hash, file_path) + # If tags changed, refresh tag view via get-tag (prefer current subject; fall back to hash refresh) + if new_tags_added or removed_tags: + self._refresh_tags_view(res, hydrus_hash, file_hash, file_path, config) + # Emit the modified result + ctx.emit(res) + log(f"[add_tag] Added {total_new_tags} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)", file=sys.stderr) + return 0 + + +CMDLET = Add_Tag() \ No newline at end of file diff --git a/cmdlets/add_tags.py b/cmdlets/add_tags.py index fd00da1..3cba2f9 100644 --- a/cmdlets/add_tags.py +++ b/cmdlets/add_tags.py @@ -1,20 +1,18 @@ from __future__ import annotations from typing import Any, Dict, List, Sequence, Optional -import json from pathlib import Path import sys from helper.logger import log -from . import register import models import pipeline as ctx from ._shared import normalize_result_input, filter_results_by_temp from helper import hydrus as hydrus_wrapper -from helper.local_library import read_sidecar, write_sidecar, find_sidecar, has_sidecar, LocalLibraryDB +from helper.folder_store import read_sidecar, write_sidecar, find_sidecar, has_sidecar, FolderDB from metadata import rename -from ._shared import Cmdlet, CmdletArg, normalize_hash, parse_tag_arguments, expand_tag_groups, parse_cmdlet_args, collapse_namespace_tags +from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, expand_tag_groups, parse_cmdlet_args, collapse_namespace_tags, should_show_help, get_field from config import get_local_storage_path @@ -68,29 +66,16 @@ def _matches_target(item: Any, hydrus_hash: Optional[str], file_hash: Optional[s def norm(val: Any) -> Optional[str]: return str(val).lower() if val is not None else None + # Define field names to check for hashes and paths + hash_fields = ["hydrus_hash", "hash", "hash_hex", "file_hash"] + path_fields = ["path", "file_path", "target"] + if isinstance(item, dict): - hashes = [ - norm(item.get("hydrus_hash")), - norm(item.get("hash")), - norm(item.get("hash_hex")), - norm(item.get("file_hash")), - ] - paths = [ - norm(item.get("path")), - norm(item.get("file_path")), - norm(item.get("target")), - ] + hashes = [norm(item.get(field)) for field in hash_fields] + paths = [norm(item.get(field)) for field in path_fields] else: - hashes = [ - norm(getattr(item, "hydrus_hash", None)), - norm(getattr(item, "hash_hex", None)), - norm(getattr(item, "file_hash", None)), - ] - paths = [ - norm(getattr(item, "path", None)), - norm(getattr(item, "file_path", None)), - norm(getattr(item, "target", None)), - ] + hashes = [norm(get_field(item, field)) for field in hash_fields] + paths = [norm(get_field(item, field)) for field in path_fields] if hydrus_hash_l and hydrus_hash_l in hashes: return True @@ -147,20 +132,18 @@ def _refresh_result_table_title(new_title: str, hydrus_hash: Optional[str], file except Exception: pass updated_items.append(item) - if not match_found: return from result_table import ResultTable # Local import to avoid circular dependency - new_table = ResultTable(getattr(last_table, "title", ""), title_width=getattr(last_table, "title_width", 80), max_columns=getattr(last_table, "max_columns", None)) - if getattr(last_table, "source_command", None): - new_table.set_source_command(last_table.source_command, getattr(last_table, "source_args", [])) + new_table = last_table.copy_with_title(getattr(last_table, "title", "")) for item in updated_items: new_table.add_result(item) - ctx.set_last_result_table_preserve_history(new_table, updated_items) + # Keep the underlying history intact; update only the overlay so @.. can + # clear the overlay then continue back to prior tables (e.g., the search list). ctx.set_last_result_table_overlay(new_table, updated_items) except Exception: pass @@ -194,347 +177,409 @@ def _refresh_tags_view(res: Any, hydrus_hash: Optional[str], file_hash: Optional +class Add_Tag(Cmdlet): + """Class-based add-tags cmdlet with Cmdlet metadata inheritance.""" -@register(["add-tag", "add-tags"]) -def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - """Add tags to a file with smart filtering for pipeline results.""" - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + def __init__(self) -> None: + super().__init__( + name="add-tags", + summary="Add tags to a Hydrus file or write them to a local .tags sidecar.", + usage="add-tags [-hash ] [-duplicate ] [-list [,...]] [--all] [,...]", + arg=[ + SharedArgs.HASH, + CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"), + CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."), + CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."), + CmdletArg("tags", type="string", required=False, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tags from pipeline payload.", variadic=True), + ], + detail=[ + "- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.", + "- Without -hash and when the selection is a local file, tags are written to .tags.", + "- With a Hydrus hash, tags are sent to the 'my tags' service.", + "- Multiple tags can be comma-separated or space-separated.", + "- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult", + "- Tags can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"", + "- Use -duplicate to copy EXISTING tag values to new namespaces:", + " Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)", + " Inferred format: -duplicate title,album,artist (first is source, rest are targets)", + "- The source namespace must already exist in the file being tagged.", + "- Target namespaces that already have a value are skipped (not overwritten).", + "- You can also pass the target hash as a tag token: hash:. This overrides -hash and is removed from the tag list.", + ], + exec=self.run, + ) + self.register() + + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Add tags to a file with smart filtering for pipeline results.""" + if should_show_help(args): + log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}") return 0 - except Exception: - pass - - # Parse arguments - parsed = parse_cmdlet_args(args, CMDLET) - - # Check for --all flag - include_temp = parsed.get("all", False) - - # Normalize input to list - results = normalize_result_input(result) - - # Filter by temp status (unless --all is set) - if not include_temp: - results = filter_results_by_temp(results, include_temp=False) - - if not results: - log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr) - return 1 - - # Get tags from arguments (or fallback to pipeline payload) - raw_tags = parsed.get("tags", []) - if isinstance(raw_tags, str): - raw_tags = [raw_tags] - - # Fallback: if no tags provided explicitly, try to pull from first result payload - if not raw_tags and results: - first = results[0] - payload_tags = None - if isinstance(first, models.PipeObject): - payload_tags = first.extra.get("tags") if isinstance(first.extra, dict) else None - elif isinstance(first, dict): - payload_tags = first.get("tags") - if not payload_tags: - payload_tags = first.get("extra", {}).get("tags") if isinstance(first.get("extra"), dict) else None - # If metadata payload stored tags under nested list, accept directly - if payload_tags is None: - payload_tags = getattr(first, "tags", None) - if payload_tags: - if isinstance(payload_tags, str): - raw_tags = [payload_tags] - elif isinstance(payload_tags, list): - raw_tags = payload_tags - - # Handle -list argument (convert to {list} syntax) - list_arg = parsed.get("list") - if list_arg: - for l in list_arg.split(','): - l = l.strip() - if l: - raw_tags.append(f"{{{l}}}") - - # Parse and expand tags - tags_to_add = parse_tag_arguments(raw_tags) - tags_to_add = expand_tag_groups(tags_to_add) - - if not tags_to_add: - log("No tags provided to add", file=sys.stderr) - return 1 - - # Get other flags - hash_override = normalize_hash(parsed.get("hash")) - duplicate_arg = parsed.get("duplicate") - - # If no tags provided (and no list), write sidecar files with embedded tags - # Note: Since 'tags' is required=True in CMDLET, this block might be unreachable via CLI - # unless called programmatically or if required check is bypassed. - if not tags_to_add and not duplicate_arg: - # Write sidecar files with the tags that are already in the result dicts + + # Parse arguments + parsed = parse_cmdlet_args(args, self) + + # Check for --all flag + include_temp = parsed.get("all", False) + + # Normalize input to list + results = normalize_result_input(result) + + # Filter by temp status (unless --all is set) + if not include_temp: + results = filter_results_by_temp(results, include_temp=False) + + if not results: + log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr) + return 1 + + # Get tags from arguments (or fallback to pipeline payload) + raw_tags = parsed.get("tags", []) + if isinstance(raw_tags, str): + raw_tags = [raw_tags] + + # Fallback: if no tags provided explicitly, try to pull from first result payload + if not raw_tags and results: + first = results[0] + payload_tags = None + + # Try multiple tag lookup strategies in order + tag_lookups = [ + lambda x: x.extra.get("tags") if isinstance(x, models.PipeObject) and isinstance(x.extra, dict) else None, + lambda x: x.get("tags") if isinstance(x, dict) else None, + lambda x: x.get("extra", {}).get("tags") if isinstance(x, dict) and isinstance(x.get("extra"), dict) else None, + lambda x: getattr(x, "tags", None), + ] + + for lookup in tag_lookups: + try: + payload_tags = lookup(first) + if payload_tags: + break + except (AttributeError, TypeError, KeyError): + continue + + if payload_tags: + if isinstance(payload_tags, str): + raw_tags = [payload_tags] + elif isinstance(payload_tags, list): + raw_tags = payload_tags + + # Handle -list argument (convert to {list} syntax) + list_arg = parsed.get("list") + if list_arg: + for l in list_arg.split(','): + l = l.strip() + if l: + raw_tags.append(f"{{{l}}}") + + # Parse and expand tags + tags_to_add = parse_tag_arguments(raw_tags) + tags_to_add = expand_tag_groups(tags_to_add) + + # Allow hash override via namespaced token (e.g., "hash:abcdef...") + extracted_hash = None + filtered_tags: List[str] = [] + for tag in tags_to_add: + if isinstance(tag, str) and tag.lower().startswith("hash:"): + _, _, hash_val = tag.partition(":") + if hash_val: + extracted_hash = normalize_hash(hash_val.strip()) + continue + filtered_tags.append(tag) + tags_to_add = filtered_tags + + if not tags_to_add: + log("No tags provided to add", file=sys.stderr) + return 1 + + # Get other flags (hash override can come from -hash or hash: token) + hash_override = normalize_hash(parsed.get("hash")) or extracted_hash + duplicate_arg = parsed.get("duplicate") + + # If no tags provided (and no list), write sidecar files with embedded tags + # Note: Since 'tags' is required=False in the cmdlet arg, this block can be reached via CLI + # when no tag arguments are provided. + if not tags_to_add and not duplicate_arg: + # Write sidecar files with the tags that are already in the result dicts + sidecar_count = 0 + for res in results: + # Handle both dict and PipeObject formats + file_path = None + tags = [] + file_hash = "" + + if isinstance(res, models.PipeObject): + file_path = res.file_path + tags = res.extra.get('tags', []) + file_hash = res.hash or "" + elif isinstance(res, dict): + file_path = res.get('file_path') + # Try multiple tag locations in order + tag_sources = [lambda: res.get('tags', []), lambda: res.get('extra', {}).get('tags', [])] + for source in tag_sources: + tags = source() + if tags: + break + file_hash = res.get('hash', "") + + if not file_path: + log(f"[add_tags] Warning: Result has no file_path, skipping", file=sys.stderr) + ctx.emit(res) + continue + + if tags: + # Write sidecar file for this file with its tags + try: + sidecar_path = write_sidecar(Path(file_path), tags, [], file_hash) + log(f"[add_tags] Wrote {len(tags)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr) + sidecar_count += 1 + except Exception as e: + log(f"[add_tags] Warning: Failed to write sidecar for {file_path}: {e}", file=sys.stderr) + + ctx.emit(res) + + if sidecar_count > 0: + log(f"[add_tags] Wrote {sidecar_count} sidecar file(s) with embedded tags", file=sys.stderr) + else: + log(f"[add_tags] No tags to write - passed {len(results)} result(s) through unchanged", file=sys.stderr) + return 0 + + # Tags ARE provided - append them to each result and write sidecar files or add to Hydrus sidecar_count = 0 + total_new_tags = 0 + total_modified = 0 for res in results: # Handle both dict and PipeObject formats file_path = None - tags = [] + existing_tags = [] file_hash = "" - + storage_source = None + hydrus_hash = None + + # Define field name aliases to check + path_field_names = ['file_path', 'path'] + source_field_names = ['storage_source', 'source', 'origin'] + hash_field_names = ['hydrus_hash', 'hash', 'hash_hex'] + if isinstance(res, models.PipeObject): file_path = res.file_path - tags = res.extra.get('tags', []) + existing_tags = res.extra.get('tags', []) file_hash = res.file_hash or "" + for field in source_field_names: + storage_source = res.extra.get(field) + if storage_source: + break + hydrus_hash = res.extra.get('hydrus_hash') elif isinstance(res, dict): - file_path = res.get('file_path') - tags = res.get('tags', []) # Check both tags and extra['tags'] - if not tags and 'extra' in res: - tags = res['extra'].get('tags', []) + # Try path field names in order + for field in path_field_names: + file_path = res.get(field) + if file_path: + break + + # Try tag locations in order + tag_sources = [lambda: res.get('tags', []), lambda: res.get('extra', {}).get('tags', [])] + for source in tag_sources: + existing_tags = source() + if existing_tags: + break + file_hash = res.get('file_hash', "") - - if not file_path: - log(f"[add_tags] Warning: Result has no file_path, skipping", file=sys.stderr) + + # Try source field names in order (top-level then extra) + for field in source_field_names: + storage_source = res.get(field) + if storage_source: + break + if not storage_source and 'extra' in res: + for field in source_field_names: + storage_source = res.get('extra', {}).get(field) + if storage_source: + break + + # Try hash field names in order (top-level then extra) + for field in hash_field_names: + hydrus_hash = res.get(field) + if hydrus_hash: + break + if not hydrus_hash and 'extra' in res: + for field in hash_field_names: + hydrus_hash = res.get('extra', {}).get(field) + if hydrus_hash: + break + + if not hydrus_hash and file_hash: + hydrus_hash = file_hash + if not storage_source and hydrus_hash and not file_path: + storage_source = 'hydrus' + # If we have a file path but no storage source, assume local to avoid sidecar spam + if not storage_source and file_path: + storage_source = 'local' + else: ctx.emit(res) continue - - if tags: - # Write sidecar file for this file with its tags - try: - sidecar_path = write_sidecar(Path(file_path), tags, [], file_hash) - log(f"[add_tags] Wrote {len(tags)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr) - sidecar_count += 1 - except Exception as e: - log(f"[add_tags] Warning: Failed to write sidecar for {file_path}: {e}", file=sys.stderr) - - ctx.emit(res) - - if sidecar_count > 0: - log(f"[add_tags] Wrote {sidecar_count} sidecar file(s) with embedded tags", file=sys.stderr) - else: - log(f"[add_tags] No tags to write - passed {len(results)} result(s) through unchanged", file=sys.stderr) - return 0 - - # Tags ARE provided - append them to each result and write sidecar files or add to Hydrus - sidecar_count = 0 - total_new_tags = 0 - total_modified = 0 - for res in results: - # Handle both dict and PipeObject formats - file_path = None - existing_tags = [] - file_hash = "" - storage_source = None - hydrus_hash = None - - if isinstance(res, models.PipeObject): - file_path = res.file_path - existing_tags = res.extra.get('tags', []) - file_hash = res.file_hash or "" - storage_source = res.extra.get('storage_source') or res.extra.get('source') - hydrus_hash = res.extra.get('hydrus_hash') - elif isinstance(res, dict): - file_path = res.get('file_path') or res.get('path') - existing_tags = res.get('tags', []) - if not existing_tags and 'extra' in res: - existing_tags = res['extra'].get('tags', []) - file_hash = res.get('file_hash', "") - storage_source = res.get('storage_source') or res.get('source') or res.get('origin') - if not storage_source and 'extra' in res: - storage_source = res['extra'].get('storage_source') or res['extra'].get('source') - # For Hydrus results from search-file, look for hash, hash_hex, or target (all contain the hash) - hydrus_hash = res.get('hydrus_hash') or res.get('hash') or res.get('hash_hex') - if not hydrus_hash and 'extra' in res: - hydrus_hash = res['extra'].get('hydrus_hash') or res['extra'].get('hash') or res['extra'].get('hash_hex') - if not hydrus_hash and file_hash: - hydrus_hash = file_hash - if not storage_source and hydrus_hash and not file_path: - storage_source = 'hydrus' - # If we have a file path but no storage source, assume local to avoid sidecar spam - if not storage_source and file_path: - storage_source = 'local' - else: - ctx.emit(res) - continue - original_tags_lower = {str(t).lower() for t in existing_tags if isinstance(t, str)} - original_tags_snapshot = list(existing_tags) - original_title = _extract_title_tag(original_tags_snapshot) - removed_tags: List[str] = [] - - # Apply hash override if provided - if hash_override: - hydrus_hash = hash_override - # If we have a hash override, we treat it as a Hydrus target - storage_source = "hydrus" - - if not file_path and not hydrus_hash: - log(f"[add_tags] Warning: Result has neither file_path nor hash available, skipping", file=sys.stderr) - ctx.emit(res) - continue - - # Handle -duplicate logic (copy existing tags to new namespaces) - if duplicate_arg: - # Parse duplicate format: source:target1,target2 or source,target1,target2 - parts = duplicate_arg.split(':') - source_ns = "" - targets = [] - - if len(parts) > 1: - # Explicit format: source:target1,target2 - source_ns = parts[0] - targets = parts[1].split(',') - else: - # Inferred format: source,target1,target2 - parts = duplicate_arg.split(',') + original_tags_lower = {str(t).lower() for t in existing_tags if isinstance(t, str)} + original_tags_snapshot = list(existing_tags) + original_title = _extract_title_tag(original_tags_snapshot) + removed_tags: List[str] = [] + + # Apply hash override if provided + if hash_override: + hydrus_hash = hash_override + # If we have a hash override, we treat it as a Hydrus target + storage_source = "hydrus" + + if not file_path and not hydrus_hash: + log(f"[add_tags] Warning: Result has neither file_path nor hash available, skipping", file=sys.stderr) + ctx.emit(res) + continue + + # Handle -duplicate logic (copy existing tags to new namespaces) + if duplicate_arg: + # Parse duplicate format: source:target1,target2 or source,target1,target2 + parts = duplicate_arg.split(':') + source_ns = "" + targets = [] + if len(parts) > 1: + # Explicit format: source:target1,target2 source_ns = parts[0] - targets = parts[1:] - - if source_ns and targets: - # Find tags in source namespace - source_tags = [t for t in existing_tags if t.startswith(source_ns + ':')] - for t in source_tags: - value = t.split(':', 1)[1] - for target_ns in targets: - new_tag = f"{target_ns}:{value}" - if new_tag not in existing_tags and new_tag not in tags_to_add: - tags_to_add.append(new_tag) - - # Merge new tags with existing tags, handling namespace overwrites - # When adding a tag like "namespace:value", remove any existing "namespace:*" tags - for new_tag in tags_to_add: - # Check if this is a namespaced tag (format: "namespace:value") - if ':' in new_tag: - namespace = new_tag.split(':', 1)[0] - # Track removals for Hydrus: delete old tags in same namespace (except identical) - to_remove = [t for t in existing_tags if t.startswith(namespace + ':') and t.lower() != new_tag.lower()] - removed_tags.extend(to_remove) - # Remove any existing tags with the same namespace - existing_tags = [t for t in existing_tags if not (t.startswith(namespace + ':'))] - - # Add the new tag if not already present - if new_tag not in existing_tags: - existing_tags.append(new_tag) - - # Ensure only one tag per namespace (e.g., single title:) with latest preferred - existing_tags = collapse_namespace_tags(existing_tags, "title", prefer="last") - - # Compute new tags relative to original - new_tags_added = [t for t in existing_tags if isinstance(t, str) and t.lower() not in original_tags_lower] - total_new_tags += len(new_tags_added) - - # Update the result's tags - if isinstance(res, models.PipeObject): - res.extra['tags'] = existing_tags - elif isinstance(res, dict): - res['tags'] = existing_tags - - # If a title: tag was added, update the in-memory title and columns so downstream display reflects it immediately - title_value = _extract_title_tag(existing_tags) - _apply_title_to_result(res, title_value) - - final_tags = existing_tags - - # Determine where to add tags: Hydrus, local DB, or sidecar - if storage_source and storage_source.lower() == 'hydrus': - # Add tags to Hydrus using the API - target_hash = hydrus_hash or file_hash - if target_hash: - try: - tags_to_send = [t for t in existing_tags if isinstance(t, str) and t.lower() not in original_tags_lower] - hydrus_client = hydrus_wrapper.get_client(config) - service_name = hydrus_wrapper.get_tag_service_name(config) - if tags_to_send: - log(f"[add_tags] Adding {len(tags_to_send)} new tag(s) to Hydrus file: {target_hash}", file=sys.stderr) - hydrus_client.add_tags(target_hash, tags_to_send, service_name) - else: - log(f"[add_tags] No new tags to add for Hydrus file: {target_hash}", file=sys.stderr) - # Delete old namespace tags we replaced (e.g., previous title:) - if removed_tags: - unique_removed = sorted(set(removed_tags)) - hydrus_client.delete_tags(target_hash, unique_removed, service_name) - if tags_to_send: - log(f"[add_tags] ✓ Tags added to Hydrus", file=sys.stderr) - elif removed_tags: - log(f"[add_tags] ✓ Removed {len(unique_removed)} tag(s) from Hydrus", file=sys.stderr) - sidecar_count += 1 - if tags_to_send or removed_tags: - total_modified += 1 - except Exception as e: - log(f"[add_tags] Warning: Failed to add tags to Hydrus: {e}", file=sys.stderr) - else: - log(f"[add_tags] Warning: No hash available for Hydrus file, skipping", file=sys.stderr) - elif storage_source and storage_source.lower() == 'local': - # For local storage, save directly to DB (no sidecar needed) - if file_path: - library_root = get_local_storage_path(config) - if library_root: - try: - path_obj = Path(file_path) - with LocalLibraryDB(library_root) as db: - db.save_tags(path_obj, existing_tags) - # Reload tags to reflect DB state (preserves auto-title logic) - refreshed_tags = db.get_tags(path_obj) or existing_tags - # Recompute title from refreshed tags for accurate display - refreshed_title = _extract_title_tag(refreshed_tags) - if refreshed_title: - _apply_title_to_result(res, refreshed_title) - res_tags = refreshed_tags or existing_tags - if isinstance(res, models.PipeObject): - res.extra['tags'] = res_tags - elif isinstance(res, dict): - res['tags'] = res_tags - log(f"[add_tags] Added {len(new_tags_added)} new tag(s); {len(res_tags)} total tag(s) stored locally", file=sys.stderr) - sidecar_count += 1 - if new_tags_added or removed_tags: - total_modified += 1 - final_tags = res_tags - except Exception as e: - log(f"[add_tags] Warning: Failed to save tags to local DB: {e}", file=sys.stderr) + targets = parts[1].split(',') else: - log(f"[add_tags] Warning: No library root configured for local storage, skipping", file=sys.stderr) + # Inferred format: source,target1,target2 + parts = duplicate_arg.split(',') + if len(parts) > 1: + source_ns = parts[0] + targets = parts[1:] + + if source_ns and targets: + # Find tags in source namespace + source_tags = [t for t in existing_tags if t.startswith(source_ns + ':')] + for t in source_tags: + value = t.split(':', 1)[1] + for target_ns in targets: + new_tag = f"{target_ns}:{value}" + if new_tag not in existing_tags and new_tag not in tags_to_add: + tags_to_add.append(new_tag) + + # Merge new tags with existing tags, handling namespace overwrites + # When adding a tag like "namespace:value", remove any existing "namespace:*" tags + for new_tag in tags_to_add: + # Check if this is a namespaced tag (format: "namespace:value") + if ':' in new_tag: + namespace = new_tag.split(':', 1)[0] + # Track removals for Hydrus: delete old tags in same namespace (except identical) + to_remove = [t for t in existing_tags if t.startswith(namespace + ':') and t.lower() != new_tag.lower()] + removed_tags.extend(to_remove) + # Remove any existing tags with the same namespace + existing_tags = [t for t in existing_tags if not (t.startswith(namespace + ':'))] + + # Add the new tag if not already present + if new_tag not in existing_tags: + existing_tags.append(new_tag) + + # Ensure only one tag per namespace (e.g., single title:) with latest preferred + existing_tags = collapse_namespace_tags(existing_tags, "title", prefer="last") + + # Compute new tags relative to original + new_tags_added = [t for t in existing_tags if isinstance(t, str) and t.lower() not in original_tags_lower] + total_new_tags += len(new_tags_added) + + # Update the result's tags + if isinstance(res, models.PipeObject): + res.extra['tags'] = existing_tags + elif isinstance(res, dict): + res['tags'] = existing_tags + + # If a title: tag was added, update the in-memory title and columns so downstream display reflects it immediately + title_value = _extract_title_tag(existing_tags) + _apply_title_to_result(res, title_value) + + final_tags = existing_tags + + # Determine where to add tags: Hydrus, local DB, or sidecar + if storage_source and storage_source.lower() == 'hydrus': + # Add tags to Hydrus using the API + target_hash = hydrus_hash or file_hash + if target_hash: + try: + tags_to_send = [t for t in existing_tags if isinstance(t, str) and t.lower() not in original_tags_lower] + hydrus_client = hydrus_wrapper.get_client(config) + service_name = hydrus_wrapper.get_tag_service_name(config) + if tags_to_send: + log(f"[add_tags] Adding {len(tags_to_send)} new tag(s) to Hydrus file: {target_hash}", file=sys.stderr) + hydrus_client.add_tags(target_hash, tags_to_send, service_name) + else: + log(f"[add_tags] No new tags to add for Hydrus file: {target_hash}", file=sys.stderr) + # Delete old namespace tags we replaced (e.g., previous title:) + if removed_tags: + unique_removed = sorted(set(removed_tags)) + hydrus_client.delete_tags(target_hash, unique_removed, service_name) + if tags_to_send: + log(f"[add_tags] ✓ Tags added to Hydrus", file=sys.stderr) + elif removed_tags: + log(f"[add_tags] ✓ Removed {len(unique_removed)} tag(s) from Hydrus", file=sys.stderr) + sidecar_count += 1 + if tags_to_send or removed_tags: + total_modified += 1 + except Exception as e: + log(f"[add_tags] Warning: Failed to add tags to Hydrus: {e}", file=sys.stderr) + else: + log(f"[add_tags] Warning: No hash available for Hydrus file, skipping", file=sys.stderr) + elif storage_source and storage_source.lower() == 'local': + # For local storage, save directly to DB (no sidecar needed) + if file_path: + library_root = get_local_storage_path(config) + if library_root: + try: + path_obj = Path(file_path) + with FolderDB(library_root) as db: + db.save_tags(path_obj, existing_tags) + # Reload tags to reflect DB state (preserves auto-title logic) + file_hash = db.get_file_hash(path_obj) + refreshed_tags = db.get_tags(file_hash) if file_hash else existing_tags + # Recompute title from refreshed tags for accurate display + refreshed_title = _extract_title_tag(refreshed_tags) + if refreshed_title: + _apply_title_to_result(res, refreshed_title) + res_tags = refreshed_tags or existing_tags + if isinstance(res, models.PipeObject): + res.extra['tags'] = res_tags + elif isinstance(res, dict): + res['tags'] = res_tags + log(f"[add_tags] Added {len(new_tags_added)} new tag(s); {len(res_tags)} total tag(s) stored locally", file=sys.stderr) + sidecar_count += 1 + if new_tags_added or removed_tags: + total_modified += 1 + final_tags = res_tags + except Exception as e: + log(f"[add_tags] Warning: Failed to save tags to local DB: {e}", file=sys.stderr) + else: + log(f"[add_tags] Warning: No library root configured for local storage, skipping", file=sys.stderr) + else: + log(f"[add_tags] Warning: No file path for local storage, skipping", file=sys.stderr) else: - log(f"[add_tags] Warning: No file path for local storage, skipping", file=sys.stderr) - else: - # For other storage types or unknown sources, avoid writing sidecars to reduce clutter - # (local/hydrus are handled above). + # For other storage types or unknown sources, avoid writing sidecars to reduce clutter + # (local/hydrus are handled above). + ctx.emit(res) + continue + + # If title changed, refresh the cached result table so the display reflects the new name + final_title = _extract_title_tag(final_tags) + if final_title and (not original_title or final_title.lower() != original_title.lower()): + _refresh_result_table_title(final_title, hydrus_hash or file_hash, file_hash, file_path) + + # If tags changed, refresh tag view via get-tag (prefer current subject; fall back to hash refresh) + if new_tags_added or removed_tags: + _refresh_tags_view(res, hydrus_hash, file_hash, file_path, config) + + # Emit the modified result ctx.emit(res) - continue - # If title changed, refresh the cached result table so the display reflects the new name - final_title = _extract_title_tag(final_tags) - if final_title and (not original_title or final_title.lower() != original_title.lower()): - _refresh_result_table_title(final_title, hydrus_hash or file_hash, file_hash, file_path) + log(f"[add_tags] Added {total_new_tags} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)", file=sys.stderr) + return 0 - # If tags changed, refresh tag view via get-tag (prefer current subject; fall back to hash refresh) - if new_tags_added or removed_tags: - _refresh_tags_view(res, hydrus_hash, file_hash, file_path, config) - - # Emit the modified result - ctx.emit(res) - - log(f"[add_tags] Added {total_new_tags} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)", file=sys.stderr) - return 0 -CMDLET = Cmdlet( - name="add-tags", - summary="Add tags to a Hydrus file or write them to a local .tags sidecar.", - usage="add-tags [-hash ] [-duplicate ] [-list [,...]] [--all] [,...]", - args=[ - CmdletArg("-hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), - CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"), - CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."), - CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."), - CmdletArg("tags", type="string", required=False, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tags from pipeline payload.", variadic=True), - ], - details=[ - "- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.", - "- Without -hash and when the selection is a local file, tags are written to .tags.", - "- With a Hydrus hash, tags are sent to the 'my tags' service.", - "- Multiple tags can be comma-separated or space-separated.", - "- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult", - "- Tags can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"", - "- Use -duplicate to copy EXISTING tag values to new namespaces:", - " Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)", - " Inferred format: -duplicate title,album,artist (first is source, rest are targets)", - "- The source namespace must already exist in the file being tagged.", - "- Target namespaces that already have a value are skipped (not overwritten).", - ], -) \ No newline at end of file +CMDLET = Add_Tag() \ No newline at end of file diff --git a/cmdlets/add_url.py b/cmdlets/add_url.py index 4008ac5..a48a517 100644 --- a/cmdlets/add_url.py +++ b/cmdlets/add_url.py @@ -1,170 +1,85 @@ from __future__ import annotations from typing import Any, Dict, Sequence -import json import sys -from pathlib import Path from . import register -import models import pipeline as ctx -from helper import hydrus as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, normalize_hash +from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash from helper.logger import log -from config import get_local_storage_path -from helper.local_library import LocalLibraryDB -from helper.logger import debug - -CMDLET = Cmdlet( - name="add-url", - summary="Associate a URL with a file (Hydrus or Local).", - usage="add-url [-hash ] ", - args=[ - CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), - CmdletArg("url", required=True, description="The URL to associate with the file."), - ], - details=[ - "- Adds the URL to the file's known URL list.", - ], -) +from helper.store import FileStorage -@register(["add-url", "ass-url", "associate-url", "add_url"]) # aliases -def add(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - # Help - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) +class Add_Url(Cmdlet): + """Add URL associations to files via hash+store.""" + + NAME = "add-url" + SUMMARY = "Associate a URL with a file" + USAGE = "@1 | add-url " + ARGS = [ + SharedArgs.HASH, + SharedArgs.STORE, + CmdletArg("url", required=True, description="URL to associate"), + ] + DETAIL = [ + "- Associates URL with file identified by hash+store", + "- Multiple url can be comma-separated", + ] + + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Add URL to file via hash+store backend.""" + parsed = parse_cmdlet_args(args, self) + + # Extract hash and store from result or args + file_hash = parsed.get("hash") or get_field(result, "hash") + store_name = parsed.get("store") or get_field(result, "store") + url_arg = parsed.get("url") + + if not file_hash: + log("Error: No file hash provided") + return 1 + + if not store_name: + log("Error: No store name provided") + return 1 + + if not url_arg: + log("Error: No URL provided") + return 1 + + # Normalize hash + file_hash = normalize_hash(file_hash) + if not file_hash: + log("Error: Invalid hash format") + return 1 + + # Parse url (comma-separated) + url = [u.strip() for u in str(url_arg).split(',') if u.strip()] + if not url: + log("Error: No valid url provided") + return 1 + + # Get backend and add url + try: + storage = FileStorage(config) + backend = storage[store_name] + + for url in url: + backend.add_url(file_hash, url) + ctx.emit(f"Added URL: {url}") + return 0 - except Exception: - pass - - from ._shared import parse_cmdlet_args - parsed = parse_cmdlet_args(args, CMDLET) - override_hash = parsed.get("hash") - url_arg = parsed.get("url") - - if not url_arg: - log("Requires a URL argument") - return 1 - - url_arg = str(url_arg).strip() - if not url_arg: - log("Requires a non-empty URL") - return 1 - - # Split by comma to handle multiple URLs - urls_to_add = [u.strip() for u in url_arg.split(',') if u.strip()] - - # Handle @N selection which creates a list - extract the first item - if isinstance(result, list) and len(result) > 0: - result = result[0] - - # Helper to get field from both dict and object - def get_field(obj: Any, field: str, default: Any = None) -> Any: - if isinstance(obj, dict): - return obj.get(field, default) - else: - return getattr(obj, field, default) - - success = False - - # 1. Try Local Library - file_path = get_field(result, "file_path") or get_field(result, "path") - if file_path and not override_hash: - try: - path_obj = Path(file_path) - if path_obj.exists(): - storage_path = get_local_storage_path(config) - if storage_path: - with LocalLibraryDB(storage_path) as db: - metadata = db.get_metadata(path_obj) or {} - known_urls = metadata.get("known_urls") or [] - - local_changed = False - for url in urls_to_add: - if url not in known_urls: - known_urls.append(url) - local_changed = True - ctx.emit(f"Associated URL with local file {path_obj.name}: {url}") - else: - ctx.emit(f"URL already exists for local file {path_obj.name}: {url}") - - if local_changed: - metadata["known_urls"] = known_urls - # Ensure we have a hash if possible, but don't fail if not - if not metadata.get("hash"): - try: - from helper.utils import sha256_file - metadata["hash"] = sha256_file(path_obj) - except Exception: - pass - - db.save_metadata(path_obj, metadata) - - success = True - except Exception as e: - log(f"Error updating local library: {e}", file=sys.stderr) - - # 2. Try Hydrus - hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None)) - - if hash_hex: - try: - client = hydrus_wrapper.get_client(config) - if client: - for url in urls_to_add: - client.associate_url(hash_hex, url) - preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '') - ctx.emit(f"Associated URL with Hydrus file {preview}: {url}") - success = True + + except KeyError: + log(f"Error: Storage backend '{store_name}' not configured") + return 1 except Exception as exc: - # Only log error if we didn't succeed locally either - if not success: - log(f"Hydrus add-url failed: {exc}", file=sys.stderr) - return 1 + log(f"Error adding URL: {exc}", file=sys.stderr) + return 1 - if success: - # If we just mutated the currently displayed item, refresh URLs via get-url - try: - from cmdlets import get_url as get_url_cmd # type: ignore - except Exception: - get_url_cmd = None - if get_url_cmd: - try: - subject = ctx.get_last_result_subject() - if subject is not None: - def norm(val: Any) -> str: - return str(val).lower() - target_hash = norm(hash_hex) if hash_hex else None - target_path = norm(file_path) if 'file_path' in locals() else None - subj_hashes = [] - subj_paths = [] - if isinstance(subject, dict): - subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v] - subj_paths = [norm(v) for v in [subject.get("file_path"), subject.get("path"), subject.get("target")] if v] - else: - subj_hashes = [norm(getattr(subject, f, None)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if getattr(subject, f, None)] - subj_paths = [norm(getattr(subject, f, None)) for f in ("file_path", "path", "target") if getattr(subject, f, None)] - is_match = False - if target_hash and target_hash in subj_hashes: - is_match = True - if target_path and target_path in subj_paths: - is_match = True - if is_match: - refresh_args: list[str] = [] - if hash_hex: - refresh_args.extend(["-hash", hash_hex]) - get_url_cmd._run(subject, refresh_args, config) - except Exception: - debug("URL refresh skipped (error)") - return 0 - - if not hash_hex and not file_path: - log("Selected result does not include a file path or Hydrus hash", file=sys.stderr) - return 1 - - return 1 + +# Register cmdlet +register(["add-url", "add_url"])(Add_Url) diff --git a/cmdlets/check_file_status.py b/cmdlets/check_file_status.py index 468feca..46c975f 100644 --- a/cmdlets/check_file_status.py +++ b/cmdlets/check_file_status.py @@ -8,19 +8,19 @@ from helper.logger import log from . import register from helper import hydrus as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, normalize_hash +from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, should_show_help CMDLET = Cmdlet( name="check-file-status", summary="Check if a file is active, deleted, or corrupted in Hydrus.", usage="check-file-status [-hash ]", - args=[ - CmdletArg("-hash", description="File hash (SHA256) to check. If not provided, uses selected result."), + arg=[ + SharedArgs.HASH, ], - details=[ + detail=[ "- Shows whether file is active in Hydrus or marked as deleted", - "- Detects corrupted data (e.g., comma-separated URLs)", + "- Detects corrupted data (e.g., comma-separated url)", "- Displays file metadata and service locations", "- Note: Hydrus keeps deleted files for recovery. Use cleanup-corrupted for full removal.", ], @@ -30,12 +30,9 @@ CMDLET = Cmdlet( @register(["check-file-status", "check-status", "file-status", "status"]) def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Help - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - except Exception: - pass + if should_show_help(args): + log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + return 0 # Parse arguments override_hash: str | None = None @@ -109,11 +106,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: log(f" - {sname} ({stype}) - deleted at {time_deleted}", file=sys.stderr) # URL check - urls = file_info.get("known_urls", []) - log(f"\n🔗 URLs ({len(urls)}):", file=sys.stderr) + url = file_info.get("url", []) + log(f"\n🔗 url ({len(url)}):", file=sys.stderr) corrupted_count = 0 - for i, url in enumerate(urls, 1): + for i, url in enumerate(url, 1): if "," in url: corrupted_count += 1 log(f" [{i}] ⚠️ CORRUPTED (comma-separated): {url[:50]}...", file=sys.stderr) diff --git a/cmdlets/cleanup.py b/cmdlets/cleanup.py index 1288925..43ba924 100644 --- a/cmdlets/cleanup.py +++ b/cmdlets/cleanup.py @@ -9,11 +9,12 @@ from __future__ import annotations from typing import Any, Dict, Sequence from pathlib import Path import sys +import json from helper.logger import log from . import register -from ._shared import Cmdlet, CmdletArg, get_pipe_object_path, normalize_result_input, filter_results_by_temp +from ._shared import Cmdlet, CmdletArg, get_pipe_object_path, normalize_result_input, filter_results_by_temp, should_show_help import models import pipeline as pipeline_context @@ -36,13 +37,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """ # Help - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args): - import json - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - except Exception: - pass + if should_show_help(args): + log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + return 0 # Normalize input to list results = normalize_result_input(result) @@ -97,8 +94,8 @@ CMDLET = Cmdlet( name="cleanup", summary="Remove temporary artifacts from pipeline (marked with is_temp=True).", usage="cleanup", - args=[], - details=[ + arg=[], + detail=[ "- Accepts pipeline results that may contain temporary files (screenshots, intermediate artifacts)", "- Deletes files marked with is_temp=True from disk", "- Also cleans up associated sidecar files (.tags, .metadata)", diff --git a/cmdlets/delete_file.py b/cmdlets/delete_file.py index 8eda253..012eae9 100644 --- a/cmdlets/delete_file.py +++ b/cmdlets/delete_file.py @@ -1,398 +1,249 @@ +"""Delete-file cmdlet: Delete files from local storage and/or Hydrus.""" from __future__ import annotations from typing import Any, Dict, Sequence -import json import sys - -from helper.logger import debug, log -import sqlite3 from pathlib import Path -import models -import pipeline as ctx +from helper.logger import debug, log +from helper.store import Folder +from ._shared import Cmdlet, CmdletArg, normalize_hash, looks_like_hash, get_origin, get_field, should_show_help from helper import hydrus as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, normalize_hash, looks_like_hash -from config import get_local_storage_path -from helper.local_library import LocalLibraryDB +import pipeline as ctx -def _refresh_last_search(config: Dict[str, Any]) -> None: - """Re-run the last search-file to refresh the table after deletes.""" - try: - source_cmd = ctx.get_last_result_table_source_command() if hasattr(ctx, "get_last_result_table_source_command") else None - if source_cmd not in {"search-file", "search_file", "search"}: - return +class Delete_File(Cmdlet): + """Class-based delete-file cmdlet with self-registration.""" - args = ctx.get_last_result_table_source_args() if hasattr(ctx, "get_last_result_table_source_args") else [] - try: - from cmdlets import search_file as search_file_cmd # type: ignore - except Exception: - return + def __init__(self) -> None: + super().__init__( + name="delete-file", + summary="Delete a file locally and/or from Hydrus, including database entries.", + usage="delete-file [-hash ] [-conserve ] [-lib-root ] [reason]", + alias=["del-file"], + arg=[ + CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), + CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."), + CmdletArg("lib-root", description="Path to local library root for database cleanup."), + CmdletArg("reason", description="Optional reason for deletion (free text)."), + ], + detail=[ + "Default removes both the local file and Hydrus file.", + "Use -conserve local to keep the local file, or -conserve hydrus to keep it in Hydrus.", + "Database entries are automatically cleaned up for local files.", + "Any remaining arguments are treated as the Hydrus reason text.", + ], + exec=self.run, + ) + self.register() - # Re-run the prior search to refresh items/table without disturbing history - search_file_cmd._run(None, args, config) + def _process_single_item(self, item: Any, override_hash: str | None, conserve: str | None, + lib_root: str | None, reason: str, config: Dict[str, Any]) -> bool: + """Process deletion for a single item.""" + # Handle item as either dict or object + if isinstance(item, dict): + hash_hex_raw = item.get("hash_hex") or item.get("hash") + target = item.get("target") or item.get("file_path") or item.get("path") + else: + hash_hex_raw = get_field(item, "hash_hex") or get_field(item, "hash") + target = get_field(item, "target") or get_field(item, "file_path") or get_field(item, "path") + + origin = get_origin(item) + + # Also check the store field explicitly from PipeObject + store = None + if isinstance(item, dict): + store = item.get("store") + else: + store = get_field(item, "store") + + # For Hydrus files, the target IS the hash + if origin and origin.lower() == "hydrus" and not hash_hex_raw: + hash_hex_raw = target - # Set an overlay so action-command pipeline output displays the refreshed table - try: - new_table = ctx.get_last_result_table() - new_items = ctx.get_last_result_items() - subject = ctx.get_last_result_subject() if hasattr(ctx, "get_last_result_subject") else None - if hasattr(ctx, "set_last_result_table_overlay") and new_table and new_items is not None: - ctx.set_last_result_table_overlay(new_table, new_items, subject) - except Exception: - pass - except Exception as exc: - debug(f"[delete_file] search refresh failed: {exc}", file=sys.stderr) + hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(hash_hex_raw) - - - -def _cleanup_relationships(db_path: Path, file_hash: str) -> int: - """Remove references to file_hash from other files' relationships.""" - try: - conn = sqlite3.connect(db_path) - cursor = conn.cursor() + local_deleted = False + local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://")) - # Find all metadata entries that contain this hash in relationships - cursor.execute("SELECT file_id, relationships FROM metadata WHERE relationships LIKE ?", (f'%{file_hash}%',)) - rows = cursor.fetchall() - - rel_update_count = 0 - for row_fid, rel_json in rows: - try: - rels = json.loads(rel_json) - changed = False - if isinstance(rels, dict): - for r_type, hashes in rels.items(): - if isinstance(hashes, list) and file_hash in hashes: - hashes.remove(file_hash) - changed = True - - if changed: - cursor.execute("UPDATE metadata SET relationships = ? WHERE file_id = ?", (json.dumps(rels), row_fid)) - rel_update_count += 1 - except Exception: - pass - - conn.commit() - conn.close() - if rel_update_count > 0: - debug(f"Removed relationship references from {rel_update_count} other files", file=sys.stderr) - return rel_update_count - except Exception as e: - debug(f"Error cleaning up relationships: {e}", file=sys.stderr) - return 0 - - -def _delete_database_entry(db_path: Path, file_path: str) -> bool: - """Delete file and related entries from local library database. - - Args: - db_path: Path to the library.db file - file_path: Exact file path string as stored in database - - Returns: - True if successful, False otherwise - """ - try: - if not db_path.exists(): - debug(f"Database not found at {db_path}", file=sys.stderr) - return False - - conn = sqlite3.connect(db_path) - cursor = conn.cursor() - - debug(f"Searching database for file_path: {file_path}", file=sys.stderr) - - # Find the file_id using the exact file_path - cursor.execute('SELECT id FROM files WHERE file_path = ?', (file_path,)) - result = cursor.fetchone() - - if not result: - debug(f"File path not found in database: {file_path}", file=sys.stderr) - conn.close() - return False - - file_id = result[0] - - # Get file hash before deletion to clean up relationships - cursor.execute('SELECT file_hash FROM files WHERE id = ?', (file_id,)) - hash_result = cursor.fetchone() - file_hash = hash_result[0] if hash_result else None - - debug(f"Found file_id={file_id}, deleting all related records", file=sys.stderr) - - # Delete related records - cursor.execute('DELETE FROM metadata WHERE file_id = ?', (file_id,)) - meta_count = cursor.rowcount - - cursor.execute('DELETE FROM tags WHERE file_id = ?', (file_id,)) - tags_count = cursor.rowcount - - cursor.execute('DELETE FROM notes WHERE file_id = ?', (file_id,)) - notes_count = cursor.rowcount - - cursor.execute('DELETE FROM files WHERE id = ?', (file_id,)) - files_count = cursor.rowcount - - conn.commit() - conn.close() - - # Clean up relationships in other files - if file_hash: - _cleanup_relationships(db_path, file_hash) - - debug(f"Deleted: metadata={meta_count}, tags={tags_count}, notes={notes_count}, files={files_count}", file=sys.stderr) - return True - - except Exception as exc: - log(f"Database cleanup failed: {exc}", file=sys.stderr) - import traceback - traceback.print_exc(file=sys.stderr) - return False - - -def _process_single_item(item: Any, override_hash: str | None, conserve: str | None, - lib_root: str | None, reason: str, config: Dict[str, Any]) -> bool: - """Process deletion for a single item.""" - # Handle item as either dict or object - if isinstance(item, dict): - hash_hex_raw = item.get("hash_hex") or item.get("hash") - target = item.get("target") - origin = item.get("origin") - else: - hash_hex_raw = getattr(item, "hash_hex", None) or getattr(item, "hash", None) - target = getattr(item, "target", None) - origin = getattr(item, "origin", None) - - # For Hydrus files, the target IS the hash - if origin and origin.lower() == "hydrus" and not hash_hex_raw: - hash_hex_raw = target - - hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(hash_hex_raw) - - local_deleted = False - local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://")) - - # Try to resolve local path if target looks like a hash and we have a library root - if local_target and looks_like_hash(str(target)) and lib_root: - try: - db_path = Path(lib_root) / ".downlow_library.db" - if db_path.exists(): - # We can't use LocalLibraryDB context manager easily here without importing it, - # but we can use a quick sqlite connection or just use the class if imported. - # We imported LocalLibraryDB, so let's use it. - with LocalLibraryDB(Path(lib_root)) as db: - resolved = db.search_by_hash(str(target)) - if resolved: - target = str(resolved) - # Also ensure we have the hash set for Hydrus deletion if needed - if not hash_hex: - hash_hex = normalize_hash(str(target)) - except Exception as e: - debug(f"Failed to resolve hash to local path: {e}", file=sys.stderr) - - if conserve != "local" and local_target: - path = Path(str(target)) - file_path_str = str(target) # Keep the original string for DB matching - try: - if path.exists() and path.is_file(): - path.unlink() - local_deleted = True - if ctx._PIPE_ACTIVE: - ctx.emit(f"Removed local file: {path}") - log(f"Deleted: {path.name}", file=sys.stderr) - except Exception as exc: - log(f"Local delete failed: {exc}", file=sys.stderr) - - # Remove common sidecars regardless of file removal success - for sidecar in (path.with_suffix(".tags"), path.with_suffix(".tags.txt"), - path.with_suffix(".metadata"), path.with_suffix(".notes")): - try: - if sidecar.exists() and sidecar.is_file(): - sidecar.unlink() - except Exception: - pass - - # Clean up database entry if library root provided - do this regardless of file deletion success - if lib_root: - lib_root_path = Path(lib_root) - db_path = lib_root_path / ".downlow_library.db" + if conserve != "local" and local_target: + path = Path(str(target)) - # If file_path_str is a hash (because file was already deleted or target was hash), - # we need to find the path by hash in the DB first - if looks_like_hash(file_path_str): + # If lib_root is provided and this is from a folder store, use the Folder class + if lib_root: try: - with LocalLibraryDB(lib_root_path) as db: - resolved = db.search_by_hash(file_path_str) - if resolved: - file_path_str = str(resolved) + folder = Folder(Path(lib_root), name=origin or "local") + if folder.delete_file(str(path)): + local_deleted = True + ctx.emit(f"Removed file: {path.name}") + log(f"Deleted: {path.name}", file=sys.stderr) + except Exception as exc: + debug(f"Folder.delete_file failed: {exc}", file=sys.stderr) + # Fallback to manual deletion + try: + if path.exists() and path.is_file(): + path.unlink() + local_deleted = True + ctx.emit(f"Removed local file: {path}") + log(f"Deleted: {path.name}", file=sys.stderr) + except Exception as exc: + log(f"Local delete failed: {exc}", file=sys.stderr) + else: + # No lib_root, just delete the file + try: + if path.exists() and path.is_file(): + path.unlink() + local_deleted = True + ctx.emit(f"Removed local file: {path}") + log(f"Deleted: {path.name}", file=sys.stderr) + except Exception as exc: + log(f"Local delete failed: {exc}", file=sys.stderr) + + # Remove common sidecars regardless of file removal success + for sidecar in (path.with_suffix(".tags"), path.with_suffix(".tags.txt"), + path.with_suffix(".metadata"), path.with_suffix(".notes")): + try: + if sidecar.exists() and sidecar.is_file(): + sidecar.unlink() except Exception: pass - db_success = _delete_database_entry(db_path, file_path_str) - - if not db_success: - # If deletion failed (e.g. not found), but we have a hash, try to clean up relationships anyway - effective_hash = None - if looks_like_hash(file_path_str): - effective_hash = file_path_str - elif hash_hex: - effective_hash = hash_hex - - if effective_hash: - debug(f"Entry not found, but attempting to clean up relationships for hash: {effective_hash}", file=sys.stderr) - if _cleanup_relationships(db_path, effective_hash) > 0: - db_success = True - - if db_success: - if ctx._PIPE_ACTIVE: - ctx.emit(f"Removed database entry: {path.name}") - debug(f"Database entry cleaned up", file=sys.stderr) - local_deleted = True - else: - debug(f"Database entry not found or cleanup failed for {file_path_str}", file=sys.stderr) - else: - debug(f"No lib_root provided, skipping database cleanup", file=sys.stderr) - - hydrus_deleted = False - # Only attempt Hydrus deletion if origin is explicitly Hydrus or if we failed to delete locally - # and we suspect it might be in Hydrus. - # If origin is local, we should default to NOT deleting from Hydrus unless requested? - # Or maybe we should check if it exists in Hydrus first? - # The user complaint is "its still trying to delete hydrus, this is a local file". - - should_try_hydrus = True - if origin and origin.lower() == "local": - should_try_hydrus = False - - # If conserve is set to hydrus, definitely don't delete - if conserve == "hydrus": + hydrus_deleted = False + # Only attempt Hydrus deletion if store is explicitly Hydrus-related + # Check both origin and store fields to determine if this is a Hydrus file + should_try_hydrus = False - if should_try_hydrus and hash_hex: - try: - client = hydrus_wrapper.get_client(config) - except Exception as exc: - if not local_deleted: - log(f"Hydrus client unavailable: {exc}", file=sys.stderr) - return False - else: - if client is None: + # Check if store indicates this is a Hydrus backend + if store and ("hydrus" in store.lower() or store.lower() == "home" or store.lower() == "work"): + should_try_hydrus = True + # Fallback to origin check if store not available + elif origin and origin.lower() == "hydrus": + should_try_hydrus = True + + # If conserve is set to hydrus, definitely don't delete + if conserve == "hydrus": + should_try_hydrus = False + + if should_try_hydrus and hash_hex: + try: + client = hydrus_wrapper.get_client(config) + except Exception as exc: if not local_deleted: - # If we deleted locally, we don't care if Hydrus is unavailable - pass - else: - log("Hydrus client unavailable", file=sys.stderr) + log(f"Hydrus client unavailable: {exc}", file=sys.stderr) return False else: - payload: Dict[str, Any] = {"hashes": [hash_hex]} - if reason: - payload["reason"] = reason - try: - client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined] - hydrus_deleted = True - preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '') - debug(f"Deleted from Hydrus: {preview}…", file=sys.stderr) - except Exception as exc: - # If it's not in Hydrus (e.g. 404 or similar), that's fine - # log(f"Hydrus delete failed: {exc}", file=sys.stderr) + if client is None: if not local_deleted: + log("Hydrus client unavailable", file=sys.stderr) return False + else: + payload: Dict[str, Any] = {"hashes": [hash_hex]} + if reason: + payload["reason"] = reason + try: + client._post("/add_files/delete_files", data=payload) # type: ignore[attr-defined] + hydrus_deleted = True + preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '') + debug(f"Deleted from Hydrus: {preview}…", file=sys.stderr) + except Exception as exc: + # If it's not in Hydrus (e.g. 404 or similar), that's fine + if not local_deleted: + return False - if hydrus_deleted and hash_hex: - preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '') - if ctx._PIPE_ACTIVE: + if hydrus_deleted and hash_hex: + preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '') if reason: ctx.emit(f"Deleted {preview} (reason: {reason}).") else: ctx.emit(f"Deleted {preview}.") - if hydrus_deleted or local_deleted: - return True + if hydrus_deleted or local_deleted: + return True - log("Selected result has neither Hydrus hash nor local file target") - return False + log("Selected result has neither Hydrus hash nor local file target") + return False - -def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - # Help - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Execute delete-file command.""" + if should_show_help(args): + log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}") return 0 - except Exception: - pass - override_hash: str | None = None - conserve: str | None = None - lib_root: str | None = None - reason_tokens: list[str] = [] - i = 0 - while i < len(args): - token = args[i] - low = str(token).lower() - if low in {"-hash", "--hash", "hash"} and i + 1 < len(args): - override_hash = str(args[i + 1]).strip() - i += 2 - continue - if low in {"-conserve", "--conserve"} and i + 1 < len(args): - value = str(args[i + 1]).strip().lower() - if value in {"local", "hydrus"}: - conserve = value + # Parse arguments + override_hash: str | None = None + conserve: str | None = None + lib_root: str | None = None + reason_tokens: list[str] = [] + i = 0 + + while i < len(args): + token = args[i] + low = str(token).lower() + if low in {"-hash", "--hash", "hash"} and i + 1 < len(args): + override_hash = str(args[i + 1]).strip() i += 2 continue - if low in {"-lib-root", "--lib-root", "lib-root"} and i + 1 < len(args): - lib_root = str(args[i + 1]).strip() - i += 2 - continue - reason_tokens.append(token) - i += 1 + if low in {"-conserve", "--conserve"} and i + 1 < len(args): + value = str(args[i + 1]).strip().lower() + if value in {"local", "hydrus"}: + conserve = value + i += 2 + continue + if low in {"-lib-root", "--lib-root", "lib-root"} and i + 1 < len(args): + lib_root = str(args[i + 1]).strip() + i += 2 + continue + reason_tokens.append(token) + i += 1 - if not lib_root: - # Try to get from config - p = get_local_storage_path(config) - if p: - lib_root = str(p) + # If no lib_root provided, try to get the first folder store from config + if not lib_root: + try: + storage_config = config.get("storage", {}) + folder_config = storage_config.get("folder", {}) + if folder_config: + # Get first folder store path + for store_name, store_config in folder_config.items(): + if isinstance(store_config, dict): + path = store_config.get("path") + if path: + lib_root = path + break + except Exception: + pass - reason = " ".join(token for token in reason_tokens if str(token).strip()).strip() + reason = " ".join(token for token in reason_tokens if str(token).strip()).strip() - items = [] - if isinstance(result, list): - items = result - elif result: - items = [result] - - if not items: - log("No items to delete", file=sys.stderr) - return 1 + items = [] + if isinstance(result, list): + items = result + elif result: + items = [result] + + if not items: + log("No items to delete", file=sys.stderr) + return 1 - success_count = 0 - for item in items: - if _process_single_item(item, override_hash, conserve, lib_root, reason, config): - success_count += 1 + success_count = 0 + for item in items: + if self._process_single_item(item, override_hash, conserve, lib_root, reason, config): + success_count += 1 - if success_count > 0: - _refresh_last_search(config) + if success_count > 0: + # Clear cached tables/items so deleted entries are not redisplayed + try: + ctx.set_last_result_table_overlay(None, None, None) + ctx.set_last_result_table(None, []) + ctx.set_last_result_items_only([]) + ctx.set_current_stage_table(None) + except Exception: + pass - return 0 if success_count > 0 else 1 + return 0 if success_count > 0 else 1 + + +# Instantiate and register the cmdlet +Delete_File() -CMDLET = Cmdlet( - name="delete-file", - summary="Delete a file locally and/or from Hydrus, including database entries.", - usage="delete-file [-hash ] [-conserve ] [-lib-root ] [reason]", - aliases=["del-file"], - args=[ - CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), - CmdletArg("conserve", description="Choose which copy to keep: 'local' or 'hydrus'."), - CmdletArg("lib-root", description="Path to local library root for database cleanup."), - CmdletArg("reason", description="Optional reason for deletion (free text)."), - ], - details=[ - "Default removes both the local file and Hydrus file.", - "Use -conserve local to keep the local file, or -conserve hydrus to keep it in Hydrus.", - "Database entries are automatically cleaned up for local files.", - "Any remaining arguments are treated as the Hydrus reason text.", - ], -) diff --git a/cmdlets/delete_note.py b/cmdlets/delete_note.py index d43f036..f7346cb 100644 --- a/cmdlets/delete_note.py +++ b/cmdlets/delete_note.py @@ -5,18 +5,18 @@ import json import pipeline as ctx from helper import hydrus as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, normalize_hash +from ._shared import Cmdlet, CmdletArg, normalize_hash, get_hash_for_operation, fetch_hydrus_metadata, should_show_help, get_field from helper.logger import log CMDLET = Cmdlet( name="delete-note", summary="Delete a named note from a Hydrus file.", usage="i | del-note [-hash ] ", - aliases=["del-note"], - args=[ + alias=["del-note"], + arg=[ ], - details=[ + detail=[ "- Removes the note with the given name from the Hydrus file.", ], ) @@ -24,12 +24,9 @@ CMDLET = Cmdlet( def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Help - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - except Exception: - pass + if should_show_help(args): + log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + return 0 if not args: log("Requires the note name/key to delete") return 1 @@ -57,7 +54,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: if isinstance(result, list) and len(result) > 0: result = result[0] - hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None)) + hash_hex = get_hash_for_operation(override_hash, result) if not hash_hex: log("Selected result does not include a Hydrus hash") return 1 @@ -93,7 +90,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: if isinstance(subject, dict): subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v] else: - subj_hashes = [norm(getattr(subject, f, None)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if getattr(subject, f, None)] + subj_hashes = [norm(get_field(subject, f)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if get_field(subject, f)] if target_hash and target_hash in subj_hashes: get_note_cmd.get_notes(subject, ["-hash", hash_hex], config) return 0 diff --git a/cmdlets/delete_relationship.py b/cmdlets/delete_relationship.py index f9d62ea..3ff270b 100644 --- a/cmdlets/delete_relationship.py +++ b/cmdlets/delete_relationship.py @@ -10,8 +10,8 @@ import sys from helper.logger import log import pipeline as ctx -from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input -from helper.local_library import LocalLibrarySearchOptimizer +from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, get_field +from helper.folder_store import LocalLibrarySearchOptimizer from config import get_local_storage_path @@ -35,12 +35,14 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat subj_hashes: list[str] = [] subj_paths: list[str] = [] - if isinstance(subject, dict): - subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v] - subj_paths = [norm(v) for v in [subject.get("file_path"), subject.get("path"), subject.get("target")] if v] - else: - subj_hashes = [norm(getattr(subject, f, None)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if getattr(subject, f, None)] - subj_paths = [norm(getattr(subject, f, None)) for f in ("file_path", "path", "target") if getattr(subject, f, None)] + for field in ("hydrus_hash", "hash", "hash_hex", "file_hash"): + val = get_field(subject, field) + if val: + subj_hashes.append(norm(val)) + for field in ("file_path", "path", "target"): + val = get_field(subject, field) + if val: + subj_paths.append(norm(val)) is_match = False if target_hashes and any(h in subj_hashes for h in target_hashes): @@ -93,21 +95,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: for single_result in results: try: # Get file path from result - file_path_from_result = None - - if isinstance(single_result, dict): - file_path_from_result = ( - single_result.get("file_path") or - single_result.get("path") or - single_result.get("target") - ) - else: - file_path_from_result = ( - getattr(single_result, "file_path", None) or - getattr(single_result, "path", None) or - getattr(single_result, "target", None) or - str(single_result) - ) + file_path_from_result = ( + get_field(single_result, "file_path") + or get_field(single_result, "path") + or get_field(single_result, "target") + or (str(single_result) if not isinstance(single_result, dict) else None) + ) if not file_path_from_result: log("Could not extract file path from result", file=sys.stderr) @@ -199,12 +192,12 @@ CMDLET = Cmdlet( name="delete-relationship", summary="Remove relationships from files.", usage="@1 | delete-relationship --all OR delete-relationship -path --all OR @1-3 | delete-relationship -type alt", - args=[ + arg=[ CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."), CmdletArg("all", type="flag", description="Delete all relationships for the file(s)."), CmdletArg("type", type="string", description="Delete specific relationship type ('alt', 'king', 'related'). Default: delete all types."), ], - details=[ + detail=[ "- Delete all relationships: pipe files | delete-relationship --all", "- Delete specific type: pipe files | delete-relationship -type alt", "- Delete all from file: delete-relationship -path --all", diff --git a/cmdlets/delete_tag.py b/cmdlets/delete_tag.py index db1add9..50c6c11 100644 --- a/cmdlets/delete_tag.py +++ b/cmdlets/delete_tag.py @@ -9,7 +9,7 @@ from . import register import models import pipeline as ctx from helper import hydrus as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, normalize_hash, parse_tag_arguments +from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, fetch_hydrus_metadata, should_show_help, get_field from helper.logger import debug, log @@ -37,8 +37,8 @@ def _refresh_tag_view_if_current(hash_hex: str | None, file_path: str | None, co subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v] subj_paths = [norm(v) for v in [subject.get("file_path"), subject.get("path"), subject.get("target")] if v] else: - subj_hashes = [norm(getattr(subject, f, None)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if getattr(subject, f, None)] - subj_paths = [norm(getattr(subject, f, None)) for f in ("file_path", "path", "target") if getattr(subject, f, None)] + subj_hashes = [norm(get_field(subject, f)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if get_field(subject, f)] + subj_paths = [norm(get_field(subject, f)) for f in ("file_path", "path", "target") if get_field(subject, f)] is_match = False if target_hash and target_hash in subj_hashes: @@ -60,12 +60,12 @@ CMDLET = Cmdlet( name="delete-tags", summary="Remove tags from a Hydrus file.", usage="del-tags [-hash ] [,...]", - aliases=["del-tag", "del-tags", "delete-tag"], - args=[ - CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), + alias=["del-tag", "del-tags", "delete-tag"], + arg=[ + SharedArgs.HASH, CmdletArg("[,...]", required=True, description="One or more tags to remove. Comma- or space-separated."), ], - details=[ + detail=[ "- Requires a Hydrus file (hash present) or explicit -hash override.", "- Multiple tags can be comma-separated or space-separated.", ], @@ -74,12 +74,9 @@ CMDLET = Cmdlet( @register(["del-tag", "del-tags", "delete-tag", "delete-tags"]) # Still needed for backward compatibility def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Help - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - except Exception: - pass + if should_show_help(args): + log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + return 0 # Check if we have a piped TagItem with no args (i.e., from @1 | delete-tag) has_piped_tag = (result and hasattr(result, '__class__') and @@ -139,15 +136,15 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: if idx - 1 < len(ctx._LAST_RESULT_ITEMS): item = ctx._LAST_RESULT_ITEMS[idx - 1] if hasattr(item, '__class__') and item.__class__.__name__ == 'TagItem': - tag_name = getattr(item, 'tag_name', None) + tag_name = get_field(item, 'tag_name') if tag_name: log(f"[delete_tag] Extracted tag from @{idx}: {tag_name}") tags_from_at_syntax.append(tag_name) # Also get hash from first item for consistency if not hash_from_at_syntax: - hash_from_at_syntax = getattr(item, 'hash_hex', None) + hash_from_at_syntax = get_field(item, 'hash_hex') if not file_path_from_at_syntax: - file_path_from_at_syntax = getattr(item, 'file_path', None) + file_path_from_at_syntax = get_field(item, 'file_path') if not tags_from_at_syntax: log(f"No tags found at indices: {indices}") @@ -219,13 +216,13 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: for item in items_to_process: tags_to_delete = [] - item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(item, "hash_hex", None)) - item_path = getattr(item, "path", None) or getattr(item, "file_path", None) or getattr(item, "target", None) - # If result is a dict (e.g. from search-file), try getting path from keys - if not item_path and isinstance(item, dict): - item_path = item.get("path") or item.get("file_path") or item.get("target") - - item_source = getattr(item, "source", None) + item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash_hex")) + item_path = ( + get_field(item, "path") + or get_field(item, "file_path") + or get_field(item, "target") + ) + item_source = get_field(item, "source") if hasattr(item, '__class__') and item.__class__.__name__ == 'TagItem': # It's a TagItem @@ -238,7 +235,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Let's assume if args are present, we use args. If not, we use the tag name. tags_to_delete = tags_arg else: - tag_name = getattr(item, 'tag_name', None) + tag_name = get_field(item, 'tag_name') if tag_name: tags_to_delete = [tag_name] else: @@ -270,34 +267,31 @@ def _process_deletion(tags: list[str], hash_hex: str | None, file_path: str | No # Prefer local DB when we have a path and not explicitly hydrus if file_path and (source == "local" or (source != "hydrus" and not hash_hex)): try: - from helper.local_library import LocalLibraryDB + from helper.folder_store import FolderDB from config import get_local_storage_path path_obj = Path(file_path) local_root = get_local_storage_path(config) or path_obj.parent - with LocalLibraryDB(local_root) as db: - existing = db.get_tags(path_obj) or [] + with FolderDB(local_root) as db: + file_hash = db.get_file_hash(path_obj) + existing = db.get_tags(file_hash) if file_hash else [] except Exception: existing = [] elif hash_hex: - try: - client = hydrus_wrapper.get_client(config) - payload = client.fetch_file_metadata( - hashes=[hash_hex], - include_service_keys_to_tags=True, - include_file_urls=False, - ) - items = payload.get("metadata") if isinstance(payload, dict) else None - meta = items[0] if isinstance(items, list) and items else None - if isinstance(meta, dict): - tags_payload = meta.get("tags") - if isinstance(tags_payload, dict): - seen: set[str] = set() - for svc_data in tags_payload.values(): - if not isinstance(svc_data, dict): - continue - display = svc_data.get("display_tags") - if isinstance(display, list): - for t in display: + meta, _ = fetch_hydrus_metadata( + config, hash_hex, + include_service_keys_to_tags=True, + include_file_url=False, + ) + if isinstance(meta, dict): + tags_payload = meta.get("tags") + if isinstance(tags_payload, dict): + seen: set[str] = set() + for svc_data in tags_payload.values(): + if not isinstance(svc_data, dict): + continue + display = svc_data.get("display_tags") + if isinstance(display, list): + for t in display: if isinstance(t, (str, bytes)): val = str(t).strip() if val and val not in seen: @@ -313,8 +307,6 @@ def _process_deletion(tags: list[str], hash_hex: str | None, file_path: str | No if val and val not in seen: seen.add(val) existing.append(val) - except Exception: - existing = [] return existing # Safety: only block if this deletion would remove the final title tag @@ -335,7 +327,7 @@ def _process_deletion(tags: list[str], hash_hex: str | None, file_path: str | No # Handle local file tag deletion if file_path and (source == "local" or (not hash_hex and source != "hydrus")): try: - from helper.local_library import LocalLibraryDB + from helper.folder_store import FolderDB from pathlib import Path path_obj = Path(file_path) @@ -351,7 +343,7 @@ def _process_deletion(tags: list[str], hash_hex: str | None, file_path: str | No # Fallback: assume file is in a library root or use its parent local_root = path_obj.parent - with LocalLibraryDB(local_root) as db: + with FolderDB(local_root) as db: db.remove_tags(path_obj, tags) debug(f"Removed {len(tags)} tag(s) from {path_obj.name} (local)") _refresh_tag_view_if_current(hash_hex, file_path, config) diff --git a/cmdlets/delete_url.py b/cmdlets/delete_url.py index 398d018..751b233 100644 --- a/cmdlets/delete_url.py +++ b/cmdlets/delete_url.py @@ -1,194 +1,82 @@ from __future__ import annotations from typing import Any, Dict, Sequence -import json import sys -from pathlib import Path from . import register -from helper import hydrus as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, normalize_hash -from helper.logger import debug, log -from config import get_local_storage_path -from helper.local_library import LocalLibraryDB import pipeline as ctx - -CMDLET = Cmdlet( - name="delete-url", - summary="Remove a URL association from a file (Hydrus or Local).", - usage="delete-url [-hash ] ", - args=[ - CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), - CmdletArg("url", required=True, description="The URL to remove from the file."), - ], - details=[ - "- Removes the URL from the file's known URL list.", - ], -) +from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash +from helper.logger import log +from helper.store import FileStorage -def _parse_hash_and_rest(args: Sequence[str]) -> tuple[str | None, list[str]]: - override_hash: str | None = None - rest: list[str] = [] - i = 0 - while i < len(args): - a = args[i] - low = str(a).lower() - if low in {"-hash", "--hash", "hash"} and i + 1 < len(args): - override_hash = str(args[i + 1]).strip() - i += 2 - continue - rest.append(a) - i += 1 - return override_hash, rest - - -@register(["del-url", "delete-url", "delete_url"]) # aliases -def delete(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - # Help - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - except Exception: - pass +class Delete_Url(Cmdlet): + """Delete URL associations from files via hash+store.""" - override_hash, rest = _parse_hash_and_rest(args) + NAME = "delete-url" + SUMMARY = "Remove a URL association from a file" + USAGE = "@1 | delete-url " + ARGS = [ + SharedArgs.HASH, + SharedArgs.STORE, + CmdletArg("url", required=True, description="URL to remove"), + ] + DETAIL = [ + "- Removes URL association from file identified by hash+store", + "- Multiple url can be comma-separated", + ] - url_arg = None - if rest: - url_arg = str(rest[0] or '').strip() - - # Normalize result to a list - items = result if isinstance(result, list) else [result] - if not items: - log("No input provided.") - return 1 - - success_count = 0 - - for item in items: - target_url = url_arg - target_file = item + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Delete URL from file via hash+store backend.""" + parsed = parse_cmdlet_args(args, self) - # Check for rich URL object from get-url - if isinstance(item, dict) and "url" in item and "source_file" in item: - if not target_url: - target_url = item["url"] - target_file = item["source_file"] + # Extract hash and store from result or args + file_hash = parsed.get("hash") or get_field(result, "hash") + store_name = parsed.get("store") or get_field(result, "store") + url_arg = parsed.get("url") - if not target_url: - continue - - if _delete_single(target_file, target_url, override_hash, config): - success_count += 1 - - if success_count == 0: + if not file_hash: + log("Error: No file hash provided") + return 1 + + if not store_name: + log("Error: No store name provided") + return 1 + if not url_arg: - log("Requires a URL argument or valid selection.") - else: - log("Failed to delete URL(s).") - return 1 + log("Error: No URL provided") + return 1 - return 0 - - -def _delete_single(result: Any, url: str, override_hash: str | None, config: Dict[str, Any]) -> bool: - # Helper to get field from both dict and object - def get_field(obj: Any, field: str, default: Any = None) -> Any: - if isinstance(obj, dict): - return obj.get(field, default) - else: - return getattr(obj, field, default) - - success = False - - # 1. Try Local Library - file_path = get_field(result, "file_path") or get_field(result, "path") - if file_path and not override_hash: + # Normalize hash + file_hash = normalize_hash(file_hash) + if not file_hash: + log("Error: Invalid hash format") + return 1 + + # Parse url (comma-separated) + url = [u.strip() for u in str(url_arg).split(',') if u.strip()] + if not url: + log("Error: No valid url provided") + return 1 + + # Get backend and delete url try: - path_obj = Path(file_path) - if path_obj.exists(): - storage_path = get_local_storage_path(config) - if storage_path: - with LocalLibraryDB(storage_path) as db: - metadata = db.get_metadata(path_obj) or {} - known_urls = metadata.get("known_urls") or [] - - # Handle comma-separated URLs if passed as arg - # But first check if the exact url string exists (e.g. if it contains commas itself) - urls_to_process = [] - if url in known_urls: - urls_to_process = [url] - else: - urls_to_process = [u.strip() for u in url.split(',') if u.strip()] - - local_changed = False - for u in urls_to_process: - if u in known_urls: - known_urls.remove(u) - local_changed = True - ctx.emit(f"Deleted URL from local file {path_obj.name}: {u}") - - if local_changed: - metadata["known_urls"] = known_urls - db.save_metadata(path_obj, metadata) - success = True - except Exception as e: - log(f"Error updating local library: {e}", file=sys.stderr) - - # 2. Try Hydrus - hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None)) - - if hash_hex: - try: - client = hydrus_wrapper.get_client(config) - if client: - urls_to_delete = [u.strip() for u in url.split(',') if u.strip()] - for u in urls_to_delete: - client.delete_url(hash_hex, u) - preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '') - ctx.emit(f"Deleted URL from Hydrus file {preview}: {u}") - success = True + storage = FileStorage(config) + backend = storage[store_name] + + for url in url: + backend.delete_url(file_hash, url) + ctx.emit(f"Deleted URL: {url}") + + return 0 + + except KeyError: + log(f"Error: Storage backend '{store_name}' not configured") + return 1 except Exception as exc: - log(f"Hydrus del-url failed: {exc}", file=sys.stderr) + log(f"Error deleting URL: {exc}", file=sys.stderr) + return 1 - if success: - try: - from cmdlets import get_url as get_url_cmd # type: ignore - except Exception: - get_url_cmd = None - if get_url_cmd: - try: - subject = ctx.get_last_result_subject() - if subject is not None: - def norm(val: Any) -> str: - return str(val).lower() - target_hash = norm(hash_hex) if hash_hex else None - target_path = norm(file_path) if file_path else None - - subj_hashes = [] - subj_paths = [] - if isinstance(subject, dict): - subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v] - subj_paths = [norm(v) for v in [subject.get("file_path"), subject.get("path"), subject.get("target")] if v] - else: - subj_hashes = [norm(getattr(subject, f, None)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if getattr(subject, f, None)] - subj_paths = [norm(getattr(subject, f, None)) for f in ("file_path", "path", "target") if getattr(subject, f, None)] - - is_match = False - if target_hash and target_hash in subj_hashes: - is_match = True - if target_path and target_path in subj_paths: - is_match = True - - if is_match: - refresh_args: list[str] = [] - if hash_hex: - refresh_args.extend(["-hash", hash_hex]) - get_url_cmd._run(subject, refresh_args, config) - except Exception: - debug("URL refresh skipped (error)") - - return success +# Register cmdlet +register(["delete-url", "del-url", "delete_url"])(Delete_Url) diff --git a/cmdlets/download_data.py b/cmdlets/download_data.py deleted file mode 100644 index 49c4ab0..0000000 --- a/cmdlets/download_data.py +++ /dev/null @@ -1,3138 +0,0 @@ -"""Download data from URLs using yt-dlp with playlist, clipping, and format selection. - -This is a merged implementation combining: -- cmdlets/download_data.py (pipeline wrapper) -- funact/download_data.py (feature-rich implementation) -- helper/download.py (low-level machinery) - -Features: -- Direct file downloads and yt-dlp streaming sites -- Playlist detection with interactive track selection -- Clip extraction (time ranges like 34:03-35:08) -- Format selection and audio/video toggles -- Cookies file support -- Tag extraction and metadata integration -- Progress tracking and debug logging -- Pipeline integration with result emission -- Background torrent/magnet downloads via AllDebrid -""" - -from __future__ import annotations - -import hashlib -import re -import sys -import threading -import time -from pathlib import Path -from typing import Any, Dict, List, Optional, Sequence, Tuple -import uuid - -from helper.logger import log, debug -from helper.download import download_media, probe_url, is_url_supported_by_ytdlp -from helper.utils import sha256_file -from models import DownloadOptions - -from . import register -from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input, parse_cmdlet_args -import models -import pipeline as pipeline_context -from config import resolve_output_dir -from metadata import ( - fetch_openlibrary_metadata_tags, - format_playlist_entry, - extract_ytdlp_tags, - build_book_tags, -) - -# ============================================================================ -# Try to import optional dependencies -# ============================================================================ - -try: - from yt_dlp.utils import sanitize_filename as ytdlp_sanitize_filename # type: ignore -except Exception: # pragma: no cover - optional dependency - ytdlp_sanitize_filename = None - - -# ============================================================================ -# Background Worker for AllDebrid Downloads -# ============================================================================ - -def _download_torrent_worker( - worker_id: str, - magnet_url: str, - output_dir: Path, - config: Dict[str, Any], - api_key: str, - playlist_items: Optional[str] = None, - audio_mode: bool = False, - wait_timeout: int = 600, - worker_manager: Optional[Any] = None, -) -> None: - """Background worker to download torrent/magnet via AllDebrid. - - Runs in a separate thread and updates worker_manager with progress. - - Args: - worker_id: Unique ID for this worker task - magnet_url: Magnet link or .torrent URL to download - output_dir: Directory to save downloaded files - config: Configuration dict - api_key: AllDebrid API key - playlist_items: Optional file selection (e.g., "1,3,5-8") - audio_mode: Whether to tag as audio or video - wait_timeout: Timeout in seconds for magnet processing - worker_manager: WorkerManager instance for progress updates - """ - worker = None - downloaded_files = [] - - try: - from helper.alldebrid import AllDebridClient - - # Get worker reference if manager provided - if worker_manager: - try: - workers = worker_manager.get_active_workers() - worker = next((w for w in workers if w.get('id') == worker_id), None) - except: - worker = None - - def log_progress(message: str) -> None: - """Log progress to both console and worker manager.""" - debug(message) - if worker_manager and worker_id: - try: - worker_manager.log_step(worker_id, message) - except: - pass - - log_progress(f"[Worker {worker_id}] Submitting magnet to AllDebrid...") - client = AllDebridClient(api_key) - - # Add magnet - magnet_info = client.magnet_add(magnet_url) - magnet_id = int(magnet_info.get('id', 0)) - - if magnet_id <= 0: - log_progress(f"[Worker {worker_id}] ✗ Failed to add magnet to AllDebrid") - if worker_manager: - try: - worker_manager.finish_worker(worker_id, "failed", f"Failed to add magnet") - except: - pass - return - - log_progress(f"[Worker {worker_id}] ✓ Magnet added (ID: {magnet_id})") - - # Poll for ready status - elapsed = 0 - last_status_reported = 0 - - while elapsed < wait_timeout: - try: - status_info = client.magnet_status(magnet_id) - except Exception as e: - log_progress(f"[Worker {worker_id}] ⚠ Failed to get status: {e}") - time.sleep(2) - elapsed += 2 - continue - - status_code = status_info.get('statusCode', -1) - status_text = status_info.get('status', 'Unknown') - - # Report progress every 5 seconds (avoid log spam) - if elapsed - last_status_reported >= 5 or elapsed < 2: - downloaded = status_info.get('downloaded', 0) - total_size = status_info.get('size', 0) - seeders = status_info.get('seeders', 0) - speed = status_info.get('downloadSpeed', 0) - - if total_size > 0: - percent = (downloaded / total_size) * 100 - speed_str = f" @ {speed / (1024**2):.1f} MB/s" if speed > 0 else "" - seeders_str = f" ({seeders} seeders)" if seeders > 0 else "" - progress_msg = f"[Worker {worker_id}] ⧗ {status_text}: {percent:.1f}% ({downloaded / (1024**3):.2f} / {total_size / (1024**3):.2f} GB){speed_str}{seeders_str}" - log_progress(progress_msg) - - # Update worker with progress - if worker_manager: - try: - worker_manager.update_worker( - worker_id, - status="running", - progress=f"{percent:.1f}%", - details=progress_msg - ) - except: - pass - else: - log_progress(f"[Worker {worker_id}] ⧗ {status_text}...") - - last_status_reported = elapsed - - if status_code == 4: # Ready - log_progress(f"[Worker {worker_id}] ✓ Files ready") - break - elif status_code >= 5: # Error - error_status = { - 5: "Upload failed", - 6: "Internal error during unpacking", - 7: "Not downloaded in 20 minutes", - 8: "File too big (>1TB)", - 9: "Internal error", - 10: "Download took >72 hours", - 11: "Deleted on hoster website", - 12: "Processing failed", - 13: "Processing failed", - 14: "Tracker error", - 15: "No peers available" - } - error_msg = error_status.get(status_code, f"Unknown error {status_code}") - log_progress(f"[Worker {worker_id}] ✗ Magnet failed: {error_msg}") - if worker_manager: - try: - worker_manager.finish_worker(worker_id, "failed", error_msg) - except: - pass - return - - time.sleep(2) - elapsed += 2 - - if elapsed >= wait_timeout: - log_progress(f"[Worker {worker_id}] ✗ Timeout waiting for magnet (>{wait_timeout}s)") - if worker_manager: - try: - worker_manager.finish_worker(worker_id, "failed", f"Timeout after {wait_timeout}s") - except: - pass - return - - # Get files - files_result = client.magnet_links([magnet_id]) - magnet_files = files_result.get(str(magnet_id), {}) - if not magnet_files and isinstance(magnet_id, int): - # Try integer key as fallback - for key in files_result: - if str(key) == str(magnet_id): - magnet_files = files_result[key] - break - files_array = magnet_files.get('files', []) - - if not files_array: - log_progress(f"[Worker {worker_id}] ✗ No files found in magnet") - if worker_manager: - try: - worker_manager.finish_worker(worker_id, "failed", "No files found in magnet") - except: - pass - return - - log_progress(f"[Worker {worker_id}] ✓ Found {len(files_array)} file(s)") - - # Extract download links - download_links = [] - - def extract_links(items, prefix=""): - if not isinstance(items, list): - return - for item in items: - if isinstance(item, dict): - name = item.get('n', '') - link = item.get('l', '') - size = item.get('s', 0) - entries = item.get('e', []) - - if link: - download_links.append({ - 'link': link, - 'name': name, - 'size': size, - 'path': f"{prefix}/{name}" if prefix else name - }) - - if entries: - extract_links(entries, f"{prefix}/{name}" if prefix else name) - - extract_links(files_array) - - if not download_links: - log_progress(f"[Worker {worker_id}] ✗ No downloadable files found") - if worker_manager: - try: - worker_manager.finish_worker(worker_id, "failed", "No downloadable files") - except: - pass - return - - # Filter by playlist_items if specified - if playlist_items and playlist_items != '*': - # Parse selection like "1,3,5-8" - selected_indices = [] - for part in playlist_items.split(','): - part = part.strip() - if '-' in part: - start, end = part.split('-') - selected_indices.extend(range(int(start)-1, int(end))) - else: - selected_indices.append(int(part)-1) - - download_links = [download_links[i] for i in selected_indices if i < len(download_links)] - log_progress(f"[Worker {worker_id}] Downloading {len(download_links)} selected file(s)") - - # Download each file - for idx, file_info in enumerate(download_links, 1): - link = file_info['link'] - name = file_info['name'] - - log_progress(f"[Worker {worker_id}] ({idx}/{len(download_links)}) Downloading: {name}") - - try: - # Unlock the link - try: - actual_link = client.unlock_link(link) - if actual_link and actual_link != link: - link = actual_link - except: - pass - - # Download via HTTP - from helper.http_client import HTTPClient - - output_dir.mkdir(parents=True, exist_ok=True) - file_path = output_dir / name - file_path.parent.mkdir(parents=True, exist_ok=True) - - with HTTPClient() as http_client: - http_client.download(link, str(file_path)) - - log_progress(f"[Worker {worker_id}] ✓ Downloaded: {name}") - - # Compute hash and emit result - file_hash = _compute_file_hash(file_path) - - result_obj = { - 'file_path': str(file_path), - 'source_url': magnet_url, - 'file_hash': file_hash, - 'media_kind': 'audio' if audio_mode else 'video', - } - - pipeline_context.emit(result_obj) - downloaded_files.append(file_path) - - except Exception as e: - log_progress(f"[Worker {worker_id}] ⚠ Failed to download {name}: {e}") - - if downloaded_files: - msg = f"✓ Torrent download complete ({len(downloaded_files)} file(s))" - log_progress(f"[Worker {worker_id}] {msg}") - if worker_manager: - try: - worker_manager.finish_worker(worker_id, "success", msg) - except: - pass - else: - if worker_manager: - try: - worker_manager.finish_worker(worker_id, "failed", "No files downloaded") - except: - pass - - except ImportError: - log_progress(f"[Worker {worker_id}] ✗ AllDebrid client not available") - if worker_manager: - try: - worker_manager.finish_worker(worker_id, "failed", "AllDebrid client not available") - except: - pass - except Exception as e: - import traceback - log_progress(f"[Worker {worker_id}] ✗ Torrent download failed: {e}") - if worker_manager: - try: - worker_manager.finish_worker(worker_id, "failed", str(e)) - except: - pass - traceback.print_exc(file=sys.stderr) - - -# ============================================================================ -# CMDLET Metadata Declaration -# ============================================================================ - - - - -# ============================================================================ -# Torrent File Parsing -# ============================================================================ - -def _parse_torrent_file(file_path: str) -> Optional[str]: - """Parse a .torrent file and extract magnet link. - - Args: - file_path: Path to .torrent file - - Returns: - Magnet link string or None if parsing fails - """ - try: - import bencode3 - except ImportError: - log("⚠ bencode3 module not found. Install: pip install bencode3", file=sys.stderr) - return None - - try: - with open(file_path, 'rb') as f: - torrent_data = bencode3.bdecode(f.read()) - except Exception as e: - log(f"✗ Failed to parse torrent file: {e}", file=sys.stderr) - return None - - try: - # Get info dict - bencode3 returns string keys, not bytes - info = torrent_data.get('info') - if not info: - log("✗ No info dict in torrent file", file=sys.stderr) - return None - - # Calculate info hash (SHA1 of bencoded info dict) - import hashlib - info_hash = hashlib.sha1(bencode3.bencode(info)).hexdigest() - - # Get name - name = info.get('name', 'Unknown') - if isinstance(name, bytes): - name = name.decode('utf-8', errors='ignore') - - # Create magnet link - magnet = f"magnet:?xt=urn:btih:{info_hash}&dn={name}" - - # Add trackers if available - announce = torrent_data.get('announce') - if announce: - try: - tracker = announce if isinstance(announce, str) else announce.decode('utf-8', errors='ignore') - magnet += f"&tr={tracker}" - except: - pass - - announce_list = torrent_data.get('announce-list', []) - for tier in announce_list: - if isinstance(tier, list): - for tracker_item in tier: - try: - tracker = tracker_item if isinstance(tracker_item, str) else tracker_item.decode('utf-8', errors='ignore') - if tracker: - magnet += f"&tr={tracker}" - except: - pass - - debug(f"✓ Parsed torrent: {name} (hash: {info_hash})") - return magnet - - except Exception as e: - log(f"✗ Error parsing torrent metadata: {e}", file=sys.stderr) - return None - - -def _download_torrent_file(url: str, temp_dir: Optional[Path] = None) -> Optional[str]: - """Download a .torrent file from URL and parse it. - - Args: - url: URL to .torrent file - temp_dir: Optional temp directory for storing downloaded file - - Returns: - Magnet link string or None if download/parsing fails - """ - try: - from helper.http_client import HTTPClient - except ImportError: - log("⚠ HTTPClient not available", file=sys.stderr) - return None - - try: - # Download torrent file - debug(f"⇓ Downloading torrent file: {url}") - - with HTTPClient(timeout=30.0) as client: - response = client.get(url) - response.raise_for_status() - torrent_data = response.content - - # Create temp file - if temp_dir is None: - temp_dir = Path.home() / ".cache" / "downlow" - temp_dir.mkdir(parents=True, exist_ok=True) - - # Save to temp file - import hashlib - url_hash = hashlib.md5(url.encode()).hexdigest()[:8] - temp_file = temp_dir / f"torrent_{url_hash}.torrent" - temp_file.write_bytes(torrent_data) - - debug(f"✓ Downloaded torrent file: {temp_file}") - - # Parse it - magnet = _parse_torrent_file(str(temp_file)) - - # Clean up - try: - temp_file.unlink() - except: - pass - - return magnet - - except Exception as e: - log(f"✗ Failed to download/parse torrent: {e}", file=sys.stderr) - return None - - -def _is_torrent_file_or_url(arg: str) -> bool: - """Check if argument is a .torrent file path or URL. - - Args: - arg: Argument to check - - Returns: - True if it's a .torrent file or URL - """ - arg_lower = arg.lower() - - # Check if it's a .torrent file path - if arg_lower.endswith('.torrent'): - return Path(arg).exists() or arg_lower.startswith('http') - - # Check if it's a URL to .torrent file - if arg_lower.startswith('http://') or arg_lower.startswith('https://'): - return '.torrent' in arg_lower - - return False - - -def _process_torrent_input(arg: str) -> Optional[str]: - """Process torrent file or URL and convert to magnet link. - - Args: - arg: .torrent file path or URL - - Returns: - Magnet link or original argument if not processable - """ - try: - if arg.lower().startswith('http://') or arg.lower().startswith('https://'): - # It's a URL - return _download_torrent_file(arg) or arg - else: - # It's a file path - if Path(arg).exists(): - return _parse_torrent_file(arg) or arg - else: - return arg - except Exception as e: - log(f"⚠ Error processing torrent: {e}", file=sys.stderr) - return arg - - -# ============================================================================ -# Helper Functions -# ============================================================================ - - - - -def _show_playlist_table(url: str, probe_info: Dict[str, Any]) -> Optional[Dict[str, Any]]: - """Show playlist result table and get user selection. - - Args: - url: Original URL - probe_info: Info dict from probe_url() - - Returns: - Modified probe_info with selected_entries, or None if user cancelled - """ - entries = probe_info.get("entries", []) - if not entries: - return probe_info - - extractor = probe_info.get("extractor", "") - title = probe_info.get("title", "Playlist") - - debug(f"📋 Detected playlist: {title} ({len(entries)} items) - {extractor}") - - # Skip full metadata enrichment for speed - extract_flat usually provides enough info - # debug("📋 Fetching metadata for each item...") - # entries = enrich_playlist_entries(entries, extractor) - - # Emit each playlist item as a separate result row - for i, entry in enumerate(entries, 1): - formatted = format_playlist_entry(entry, i, extractor) - - # Build tags from available metadata - tags = [] - artist = formatted.get("artist") or formatted.get("uploader", "") - if artist: - tags.append(artist) - - album = formatted.get("album", "") - if album and album != title: # Don't repeat playlist title - tags.append(album) - - # Extract individual fields for separate columns - duration = formatted.get("duration", 0) - duration_str = "" - if duration: - minutes = int(duration // 60) - seconds = int(duration % 60) - duration_str = f"{minutes}m{seconds}s" - tags.append(duration_str) - - # Normalize extractor for comparison (remove special chars and case) - ext_lower = extractor.lower().replace(":", "").replace(" ", "") - - track_number = None - # Add site-specific tags and fields - if "youtube" in ext_lower and formatted.get("channel"): - tags.append(f"channel:{formatted.get('channel')}") - elif "bandcamp" in ext_lower: - track_number = formatted.get("track_number", i) - tags.append(f"track:{track_number}") - - # Create result row with separate columns for important metadata - # Build columns dynamically based on available data - columns = [ - ("#", i), - ("Title", formatted["title"]), - ] - - # Add Artist column if available - if artist: - columns.append(("Artist", artist)) - - # Add Duration column if available - if duration_str: - columns.append(("Duration", duration_str)) - - # Add Track number column for music platforms - if track_number is not None: - columns.append(("Track", str(track_number))) - - # Add Tags column for remaining tags (if any) - remaining_tags = [t for t in tags if t not in [artist, duration_str]] - if remaining_tags: - columns.append(("Tags", ", ".join(remaining_tags))) - - # Create result row with compact columns display - # Using "columns" field tells ResultTable which columns to show - result_row = { - "title": formatted["title"], - "tags": tags, - "index": i, - # Store all metadata but don't display in table (use columns field) - "__source": "playlist-probe", - "__id": f"{i}", - "__file_path": url, - "__action": f"playlist-item:{i}", - "__artist": formatted.get("artist", ""), - "__duration": formatted.get("duration", 0), - "__extractor": extractor, - # Define which columns should be shown in the result table - "columns": columns - } - - # Add site-specific metadata for pipeline use - if "youtube" in ext_lower: - result_row["__video_id"] = formatted.get("video_id", "") - result_row["__channel"] = formatted.get("channel", "") - elif "bandcamp" in ext_lower: - result_row["__track_number"] = formatted.get("track_number", i) - result_row["__album"] = formatted.get("album") or title - elif "spotify" in ext_lower: - result_row["__artists"] = formatted.get("artists", "") - result_row["__album"] = formatted.get("album", "") - - pipeline_context.emit(result_row) - - debug(f"ℹ️ Playlist items displayed. Use result table references (@1, @2, etc.) to select tracks.") - - # Return modified probe info - return probe_info - - -def _parse_time_range(clip_spec: str) -> Optional[Tuple[int, int]]: - """Parse time range from MM:SS-MM:SS or seconds format. - - Args: - clip_spec: Time range string like "34:03-35:08" or "2043-2108" - - Returns: - Tuple of (start_seconds, end_seconds) or None if invalid - """ - try: - if '-' not in clip_spec: - return None - - parts = clip_spec.split('-') - if len(parts) != 2: - return None - - start_str, end_str = parts - - # Try MM:SS format first - if ':' in start_str: - start_parts = start_str.split(':') - if len(start_parts) == 2: - start_sec = int(start_parts[0]) * 60 + int(start_parts[1]) - else: - return None - else: - start_sec = int(start_str) - - if ':' in end_str: - end_parts = end_str.split(':') - if len(end_parts) == 2: - end_sec = int(end_parts[0]) * 60 + int(end_parts[1]) - else: - return None - else: - end_sec = int(end_str) - - if start_sec >= end_sec: - return None - - return (start_sec, end_sec) - - except (ValueError, AttributeError): - return None - - -def _parse_section_ranges(section_spec: str) -> Optional[List[Tuple[int, int]]]: - """Parse section ranges from comma-separated time ranges. - - Args: - section_spec: Section ranges like "1:30-1:35,0:05-0:15" or "90-95,5-15" - May include quotes from CLI which will be stripped - - Returns: - List of (start_seconds, end_seconds) tuples or None if invalid - """ - try: - # Strip quotes if present (from CLI parsing) - section_spec = section_spec.strip('"\'') - - if not section_spec or ',' not in section_spec and '-' not in section_spec: - return None - - ranges = [] - # Handle both comma-separated ranges and single range - if ',' in section_spec: - section_parts = section_spec.split(',') - else: - section_parts = [section_spec] - - for part in section_parts: - part = part.strip() - if not part: - continue - - # Parse each range using the same logic as _parse_time_range - # Handle format like "1:30-1:35" or "90-95" - if '-' not in part: - return None - - # Split carefully to handle cases like "1:30-1:35" - # We need to find the dash that separates start and end - # Look for pattern: something-something where first something may have colons - dash_pos = -1 - colon_count = 0 - for i, char in enumerate(part): - if char == ':': - colon_count += 1 - elif char == '-': - # If we've seen a colon and this is a dash, check if it's the separator - # Could be "1:30-1:35" or just "90-95" - # The separator dash should come after the first number/time - if i > 0 and i < len(part) - 1: - dash_pos = i - break - - if dash_pos == -1: - return None - - start_str = part[:dash_pos] - end_str = part[dash_pos+1:] - - # Parse start time - if ':' in start_str: - start_parts = start_str.split(':') - if len(start_parts) == 2: - start_sec = int(start_parts[0]) * 60 + int(start_parts[1]) - elif len(start_parts) == 3: - start_sec = int(start_parts[0]) * 3600 + int(start_parts[1]) * 60 + int(start_parts[2]) - else: - return None - else: - start_sec = int(start_str) - - # Parse end time - if ':' in end_str: - end_parts = end_str.split(':') - if len(end_parts) == 2: - end_sec = int(end_parts[0]) * 60 + int(end_parts[1]) - elif len(end_parts) == 3: - end_sec = int(end_parts[0]) * 3600 + int(end_parts[1]) * 60 + int(end_parts[2]) - else: - return None - else: - end_sec = int(end_str) - - if start_sec >= end_sec: - return None - - ranges.append((start_sec, end_sec)) - - return ranges if ranges else None - - except (ValueError, AttributeError, IndexError): - return None - - -MEDIA_EXTENSIONS = {'.mp3', '.m4a', '.mp4', '.mkv', '.webm', '.flac', '.wav', '.aac'} - - -def _parse_playlist_selection_indices(selection: Optional[str], total_items: int) -> list[int]: - """Convert playlist selection string to 0-based indices.""" - if total_items <= 0: - return [] - if not selection or selection.strip() in {"*", ""}: - return list(range(total_items)) - indices: list[int] = [] - for part in selection.split(','): - part = part.strip() - if not part: - continue - if '-' in part: - bounds = part.split('-', 1) - try: - start = int(bounds[0]) - end = int(bounds[1]) - except ValueError: - continue - if start <= 0 or end <= 0: - continue - if start > end: - start, end = end, start - for idx in range(start - 1, end): - if 0 <= idx < total_items: - indices.append(idx) - else: - try: - idx = int(part) - 1 - except ValueError: - continue - if 0 <= idx < total_items: - indices.append(idx) - seen: set[int] = set() - ordered: list[int] = [] - for idx in indices: - if idx not in seen: - ordered.append(idx) - seen.add(idx) - return ordered - - -def _select_playlist_entries(entries: Any, selection: Optional[str]) -> list[Dict[str, Any]]: - """Pick playlist entries according to a selection string.""" - if not isinstance(entries, list): - return [] - indices = _parse_playlist_selection_indices(selection, len(entries)) - if not indices: - return [] - selected: list[Dict[str, Any]] = [] - for idx in indices: - entry = entries[idx] - if isinstance(entry, dict): - selected.append(entry) - return selected - - -def _sanitize_title_for_filename(title: Optional[str]) -> str: - """Match yt-dlp's restricted filename sanitization for comparisons.""" - if not title: - return "" - if ytdlp_sanitize_filename: - try: - return ytdlp_sanitize_filename(title, restricted=True) - except Exception: - pass - sanitized = re.sub(r"[^0-9A-Za-z._-]+", "_", title) - return sanitized.strip() or "" - - -def _find_playlist_files_from_entries( - entries: Sequence[Dict[str, Any]], - output_dir: Path, -) -> list[Path]: - """Resolve expected playlist files based on entry titles/exts.""" - matched: list[Path] = [] - seen: set[str] = set() - for entry in entries: - title = entry.get('title') if isinstance(entry, dict) else None - sanitized = _sanitize_title_for_filename(title) - if not sanitized: - continue - preferred_exts: list[str] = [] - for key in ('ext', 'audio_ext', 'video_ext'): - value = entry.get(key) if isinstance(entry, dict) else None - if isinstance(value, str) and value: - preferred_exts.append(value.lower()) - if not preferred_exts: - preferred_exts = [ext.strip('.') for ext in MEDIA_EXTENSIONS] - candidate: Optional[Path] = None - for ext in preferred_exts: - ext = ext.lstrip('.').lower() - path = output_dir / f"{sanitized}.{ext}" - if path.exists(): - candidate = path - break - if candidate is None: - try: - # Bandcamp/yt-dlp often prefixes uploader info, so fall back to a substring match. - for f in output_dir.glob(f"*{sanitized}*"): - if f.suffix.lower() in MEDIA_EXTENSIONS and f.is_file(): - candidate = f - break - except OSError: - candidate = None - if candidate and str(candidate) not in seen: - matched.append(candidate) - seen.add(str(candidate)) - return matched - - -def _snapshot_playlist_paths( - entries: Sequence[Dict[str, Any]], - output_dir: Path, -) -> tuple[list[Path], set[str]]: - """Capture current playlist file paths for a given selection.""" - matches = _find_playlist_files_from_entries(entries, output_dir) - resolved: set[str] = set() - for path in matches: - try: - resolved.add(str(path.resolve())) - except OSError: - resolved.add(str(path)) - return matches, resolved - - -def _expand_playlist_selection(selection: str, num_items: int) -> str: - """Expand playlist selection string, handling wildcards. - - Args: - selection: Selection string like '1,3,5-8' or '*' - num_items: Total number of items in playlist - - Returns: - Expanded selection string like '1,3,5,6,7,8' or '1-18' for '*' - """ - if selection.strip() == "*": - # Wildcard: select all items - return f"1-{num_items}" - - # Return as-is if not wildcard (yt-dlp will handle ranges and lists) - return selection - - -def _parse_selection_string(selection: str) -> List[int]: - """Parse selection string into list of integers. - - Handles formats like: - - "2" -> [2] - - "1,3,5" -> [1, 3, 5] - - "1-3" -> [1, 2, 3] - - "1,3-5,7" -> [1, 3, 4, 5, 7] - - Args: - selection: Selection string - - Returns: - List of integer indices - """ - result = [] - for part in selection.split(','): - part = part.strip() - if '-' in part: - # Range like "3-5" - try: - start, end = part.split('-') - start_num = int(start.strip()) - end_num = int(end.strip()) - result.extend(range(start_num, end_num + 1)) - except (ValueError, AttributeError): - continue - else: - # Single number - try: - result.append(int(part)) - except ValueError: - continue - return result - - -def _filter_and_sort_formats(formats: List[Dict[str, Any]]) -> List[Dict[str, Any]]: - """Filter and sort formats for user selection. - - Filters out: - - Storyboards (webp, svg formats) - - Low quality audio (below ~128 kbps, typically 48kHz audio) - - Video below 360p - - Sorts to prioritize: - - @1: Best combined audio+video (highest resolution, highest bitrate) - - @2: Best audio-only (highest bitrate audio) - - Then rest by quality - - Args: - formats: List of format dicts from yt-dlp - - Returns: - Filtered and sorted format list - """ - filtered = [] - - for fmt in formats: - format_id = fmt.get("format_id", "") - ext = fmt.get("ext", "") - vcodec = fmt.get("vcodec", "") - acodec = fmt.get("acodec", "") - height = fmt.get("height") - tbr = fmt.get("tbr") # Total bitrate - - # Skip storyboards (webp images, svg, etc.) - if ext in {"webp", "svg", "mhtml"}: - continue - - # Skip video-only formats below 360p - if vcodec != "none" and acodec == "none": - if height and height < 360: - continue - - # Skip low-bitrate audio (typically 48kHz, very low quality) - # Keep audio with tbr >= 64 kbps (reasonable quality threshold) - if acodec != "none" and vcodec == "none": - if tbr and tbr < 64: - continue - - filtered.append(fmt) - - # Sort formats: best combined first, then best audio-only, then video-only - def format_sort_key(fmt: Dict[str, Any]) -> tuple: - vcodec = fmt.get("vcodec", "") - acodec = fmt.get("acodec", "") - height = fmt.get("height", 0) or 0 - tbr = fmt.get("tbr", 0) or 0 - - # Category 0: has both audio and video (sort first) - # Category 1: audio only (sort second) - # Category 2: video only (sort last, by height desc) - if vcodec != "none" and acodec != "none": - category = 0 - return (category, -height, -tbr) - elif acodec != "none" and vcodec == "none": - category = 1 - return (category, -tbr) # Sort by bitrate descending - else: # Video only - category = 2 - return (category, -height, -tbr) # Sort by height descending, then bitrate - - return sorted(filtered, key=format_sort_key) - - -def _compute_file_hash(file_path: Path) -> Optional[str]: - """Compute SHA256 hash of file.""" - try: - return sha256_file(file_path) - except Exception: - return None - - - - - -# ============================================================================ -# Main Cmdlet Function -# ============================================================================ - -def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results: bool = True) -> int: - """Download data from URLs with advanced options. - - Accepts: - - Single URL as string - - Result object with 'url' or 'file_path' field - - List of results - - File containing URLs (one per line) - - Returns: - Exit code (0 for success, 1 for failure) - """ - - debug("Starting download-data") - - collected_results: List[Dict[str, Any]] = [] - - def _emit(obj: Any) -> None: - """Internal helper to collect and optionally emit results.""" - collected_results.append(obj) - if emit_results: - pipeline_context.emit(obj) - - # Track pipeline mode once so playlist handling can respect current run scope - stage_ctx = pipeline_context.get_stage_context() - in_pipeline = stage_ctx is not None and getattr(stage_ctx, 'total_stages', 1) > 1 - - # ======================================================================== - # ARGUMENT PARSING - # ======================================================================== - - # Parse arguments using shared parser - parsed = parse_cmdlet_args(args, CMDLET) - - audio_mode = parsed.get("audio", False) - format_selector = parsed.get("format") - list_formats_mode = parsed.get("list-formats", False) - - clip_spec = parsed.get("clip") - clip_range = None - if clip_spec: - clip_range = _parse_time_range(clip_spec) - if clip_range: - debug(f"Clip range: {clip_spec} ({clip_range[0]}-{clip_range[1]} seconds)") - else: - log(f"Invalid clip format: {clip_spec}", file=sys.stderr) - return 1 - - # Section download (yt-dlp only) - section_spec = parsed.get("section") - section_ranges = None - if section_spec: - # Parse section spec like "1:30-1:35,0:05-0:15" into list of (start, end) tuples - section_ranges = _parse_section_ranges(section_spec) - if section_ranges: - debug(f"Section ranges: {section_spec} ({len(section_ranges)} sections)") - # When downloading sections, auto-select best format if not specified - # Since we're only getting portions, quality matters less than completeness - if not format_selector: - format_selector = "bestvideo+bestaudio/best" - debug(f"Auto-selecting format for sections: {format_selector}") - else: - log(f"Invalid section format: {section_spec}", file=sys.stderr) - return 1 - - cookies_path = parsed.get("cookies") - storage_location = parsed.get("storage") - - torrent_mode = parsed.get("torrent", False) - wait_timeout = float(parsed.get("wait", 1800)) - - # Collect URLs from positional args and -url flag - # Both map to "url" in parsed result - urls_to_download = [] - raw_urls = parsed.get("url", []) - if isinstance(raw_urls, str): - raw_urls = [raw_urls] - - for arg in raw_urls: - if arg.lower().startswith(('http://', 'https://')): - # Check if it's a .torrent URL or file first - if '.torrent' in arg.lower(): - debug(f"Processing torrent URL: {arg}") - magnet = _process_torrent_input(arg) - if magnet and magnet.lower().startswith('magnet:'): - urls_to_download.append(magnet) - debug(f"✓ Converted to magnet: {magnet[:70]}...") - elif magnet: - urls_to_download.append(magnet) - else: - log(f"✗ Failed to process torrent: {arg}", file=sys.stderr) - else: - urls_to_download.append(arg) - elif torrent_mode and (arg.lower().startswith('magnet:') or len(arg) == 40 or len(arg) == 64): - # In torrent mode, accept magnet links or torrent hashes (40-char SHA1 or 64-char SHA256) - urls_to_download.append(arg) - debug(f"Torrent/magnet added: {arg[:50]}...") - elif _is_torrent_file_or_url(arg): - # Handle .torrent files and URLs - debug(f"Processing torrent file/URL: {arg}") - magnet = _process_torrent_input(arg) - if magnet and magnet.lower().startswith('magnet:'): - urls_to_download.append(magnet) - debug(f"✓ Converted to magnet: {magnet[:70]}...") - elif magnet: - urls_to_download.append(magnet) - else: - log(f"✗ Failed to process torrent: {arg}", file=sys.stderr) - else: - # Treat as URL if it looks like one - if arg.lower().startswith(('magnet:', 'ftp://')): - urls_to_download.append(arg) - else: - # Check if it's a file containing URLs - path = Path(arg) - if path.exists() and path.is_file(): - try: - with open(arg, 'r') as f: - for line in f: - line = line.strip() - if line and line.lower().startswith(('http://', 'https://')): - urls_to_download.append(line) - debug(f"Loaded URLs from file: {arg}") - except Exception as e: - log(f"Error reading file {arg}: {e}", file=sys.stderr) - else: - debug(f"Ignored argument: {arg}") - - # Item selection (for playlists/formats) - # Note: -item flag is deprecated in favor of @N pipeline selection, but kept for compatibility - playlist_items = parsed.get("item") - if playlist_items: - debug(f"Item selection: {playlist_items}") - - - - - def _is_openlibrary_downloadable(ebook_access_val: Any, status_val: Any) -> bool: - access = str(ebook_access_val or "").strip().lower() - status = str(status_val or "").strip().lower() - if status == "download": - return True - if access in {"borrowable", "public", "full", "open"} or access.startswith("full "): - return True - if "✓" in str(status_val or ""): - return True - return False - - # ======================================================================== - # INPUT PROCESSING - Extract URLs from pipeline or arguments - # ======================================================================== - - # Initialize worker tracking for downloads - import uuid - from helper.local_library import LocalLibraryDB - from config import get_local_storage_path - - # Define LazyDB proxy to avoid keeping DB connection open for long duration - class LazyDB: - def __init__(self, root): - self.root = root - - def _op(self, func_name, *args, **kwargs): - try: - with LocalLibraryDB(self.root) as db: - func = getattr(db, func_name) - return func(*args, **kwargs) - except Exception as e: - # Log error but don't crash - pass - - def insert_worker(self, *args, **kwargs): self._op('insert_worker', *args, **kwargs) - def update_worker_status(self, *args, **kwargs): self._op('update_worker_status', *args, **kwargs) - def append_worker_stdout(self, *args, **kwargs): self._op('append_worker_stdout', *args, **kwargs) - def close(self): pass - - worker_id = str(uuid.uuid4()) - library_root = get_local_storage_path(config or {}) - db = None - if library_root: - try: - db = LazyDB(library_root) - db.insert_worker( - worker_id, - "download", - title="Download Data", - description="Downloading files from search results", - pipe=pipeline_context.get_current_command_text() - ) - except Exception as e: - log(f"⚠ Worker tracking unavailable: {e}", file=sys.stderr) - - piped_results = normalize_result_input(result) - - # Track files downloaded directly (e.g. Soulseek) to avoid "No URLs" error - files_downloaded_directly = 0 - - # Only process piped results if no URLs were provided in arguments - # This prevents picking up residue from previous commands when running standalone - if piped_results and not urls_to_download: - for item in piped_results: - url = None - origin = None - - # ====== CHECK FOR PLAYLIST ITEM MARKER FROM add-file ====== - # When add-file detects a playlist item and wants to download it - if isinstance(item, dict) and item.get('__playlist_url'): - playlist_url = item.get('__playlist_url') - item_num = item.get('__playlist_item', 1) - debug(f"📍 Playlist item from add-file: #{item_num}") - # Add to download list with marker - urls_to_download.append({ - '__playlist_url': playlist_url, - '__playlist_item': int(item_num) - }) - continue - - # ====== CHECK FOR PLAYLIST ITEM SELECTION FIRST ====== - # When user selects @12 from a playlist, item is emitted dict with __action: "playlist-item:12" - if isinstance(item, dict) and '__action' in item and item['__action'].startswith('playlist-item:'): - playlist_url = item.get('__file_path') - playlist_action = item['__action'] # e.g., "playlist-item:12" - item_num = playlist_action.split(':')[1] # Extract item number (1-based) - - if playlist_url: - # Playlist item selected - need to download this specific track - debug(f"📍 Playlist item selected: #{item_num} - {item.get('title', 'Unknown')}") - # Add to download list - the playlist will be probed and item extracted - # Store with special marker so we know which item to select - urls_to_download.append({ - '__playlist_url': playlist_url, - '__playlist_item': int(item_num) - }) - continue - - # ====== CHECK FOR FORMAT SELECTION RESULT ====== - if isinstance(item, dict) and item.get('format_id') is not None and item.get('source_url'): - debug(f"🎬 Format selected from pipe: {item.get('format_id')}") - debug(f" Source URL: {item.get('source_url')}") - # Store as dict so we can extract format_id + source_url during download - urls_to_download.append(item) - continue - elif hasattr(item, 'format_id') and hasattr(item, 'source_url') and item.format_id is not None: - debug(f"🎬 Format selected from pipe: {item.format_id}") - debug(f" Source URL: {item.source_url}") - urls_to_download.append({ - 'format_id': item.format_id, - 'source_url': item.source_url, - }) - continue - - if isinstance(item, dict): - # Check for search provider results first - origin = item.get('origin') - if origin in {'openlibrary', 'libgen', 'soulseek', 'debrid'}: - # Handle search provider results - title = item.get('title', 'Item') - if origin == 'openlibrary': - # OpenLibrary: First check if lendable/downloadable via Archive.org - # Only route to LibGen if NOT available on Archive.org - metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {} - isbn = metadata.get('isbn') or item.get('isbn') - olid = metadata.get('olid') or item.get('olid') - - debug(f"[search-result] OpenLibrary: '{title}'") - if isbn: - debug(f" ISBN: {isbn}") - - # Check if book is borrowable from ebook_access field or status - ebook_access = metadata.get('ebook_access') or item.get('ebook_access', '') - status_text = metadata.get('status') or item.get('status', '') - archive_id = metadata.get('archive_id') or item.get('archive_id') - - # Determine if borrowable based on new status vocabulary - is_borrowable = _is_openlibrary_downloadable(ebook_access, status_text) - - if is_borrowable: - debug(f" ✓ Available for borrowing on Archive.org") - debug(f" → Queued for auto-borrowing...") - # Queue borrow request as special dict object - # We need OCAID (Archive.org ID), not just numeric OLID - ocaid = archive_id - - if not ocaid and isbn: - # If no OCAID in metadata, fetch it from OpenLibrary ISBN lookup - try: - import requests - ol_url = f'https://openlibrary.org/isbn/{isbn}.json' - r = requests.get(ol_url, timeout=5) - if r.status_code == 200: - ol_data = r.json() - ocaid = ol_data.get('ocaid') - except Exception as e: - debug(f" ⚠ Could not fetch OCAID from OpenLibrary: {e}") - - if ocaid: - urls_to_download.append({ - '__borrow_request__': True, - 'book_id': ocaid, - 'isbn': isbn, - 'title': title, - 'olid': olid - }) - else: - # OCAID not found - book claims borrowable but not on Archive.org - # Fall back to LibGen search instead - debug(f" ⚠ Book marked borrowable but not found on Archive.org") - if isbn: - try: - from helper.search_provider import get_provider - libgen_provider = get_provider("libgen", config) - if libgen_provider: - libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1) - if libgen_results: - libgen_result = libgen_results[0] - url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None) - if url: - urls_to_download.append(url) - debug(f" ✓ Found on LibGen instead") - else: - debug(f" ⚠ Not found on LibGen") - else: - debug(f" ⚠ Not found on LibGen") - else: - debug(f" ⚠ LibGen provider not available") - except Exception as e: - debug(f" ✗ Error searching LibGen: {e}") - else: - # Book is NOT borrowable - route to LibGen - if isbn: - debug(f" ⚠ Not available on Archive.org - attempting LibGen...") - try: - from helper.search_provider import get_provider - libgen_provider = get_provider("libgen", config) - if libgen_provider: - libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1) - if libgen_results: - libgen_result = libgen_results[0] - url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None) - if url: - urls_to_download.append(url) - debug(f" ✓ Found on LibGen") - else: - debug(f" ⚠ Not found on LibGen") - else: - debug(f" ⚠ Not found on LibGen") - debug(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data") - else: - debug(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data") - except Exception as e: - debug(f" ⚠ Could not search LibGen: {e}") - debug(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data") - else: - debug(f" ⚠ ISBN not available") - debug(f" ▶ Visit: {item.get('target', 'https://openlibrary.org')}") - debug(f" ▶ Or find ISBN and use: search-file -provider libgen 'isbn:\"\"'") - elif origin == 'soulseek': - # Handle Soulseek downloads using the provider - metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {} - username = metadata.get('username') - filename = metadata.get('filename') - size = item.get('size_bytes') or 0 - - if username and filename: - try: - import asyncio - from helper.search_provider import SoulSeekProvider - provider = SoulSeekProvider(config) - log(f"[search-result] Soulseek: '{title}'", flush=True) - log(f" ▶ Downloading from {username}...", flush=True) - - if db: - db.append_worker_stdout(worker_id, f"Downloading from Soulseek: {title} (from {username})") - - # Get temp directory from config - temp_dir = config.get('temp') - if temp_dir: - temp_dir = str(Path(temp_dir).expanduser()) - - # Call async download_file with asyncio.run() - success = asyncio.run(provider.download_file( - username=username, - filename=filename, - file_size=size, - target_dir=temp_dir - )) - - if success: - downloaded_file = Path(provider.DOWNLOAD_DIR) / Path(filename).name - if downloaded_file.exists(): - log(f" ✓ Downloaded: {downloaded_file.name}", flush=True) - files_downloaded_directly += 1 - if db: - db.append_worker_stdout(worker_id, f"✓ Downloaded: {downloaded_file.name}") - if pipeline_context._PIPE_ACTIVE: - # Create proper PipeObject result - result_dict = create_pipe_object_result( - source='soulseek', - identifier=filename, - file_path=str(downloaded_file), - cmdlet_name='download-data', - title=title, - target=str(downloaded_file), # Explicit target for add-file - extra={ - "metadata": metadata, - "origin": "soulseek" - } - ) - pipeline_context.emit(result_dict) - else: - debug(f" ✗ Download failed (peer may be offline)") - if db: - db.append_worker_stdout(worker_id, f"✗ Download failed for {title}") - debug(f" ▶ Try another result: search-file -provider soulseek \"...\" | @2 | download-data") - except Exception as e: - debug(f" ✗ Download error: {e}") - if db: - db.append_worker_stdout(worker_id, f"✗ Error: {e}") - debug(f" ▶ Alternative: search-soulseek -download \"{title}\" -storage ") - else: - debug(f"[search-result] Soulseek: '{title}'") - debug(f" ⚠ Missing download info (username/filename)") - if db: - db.append_worker_stdout(worker_id, f"⚠ Missing download info for {title}") - elif origin == 'libgen': - # LibGen results can use the direct URL - # Also extract mirrors dict for fallback if primary fails - url = item.get('target') - # Extract mirrors and book_id from full_metadata - metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {} - mirrors = metadata.get('mirrors', {}) - book_id = metadata.get('book_id', '') - author = metadata.get('author') - isbn_val = metadata.get('isbn') - year_val = metadata.get('year') - - if url: - url_entry = { - 'url': str(url), - 'mirrors': mirrors, # Alternative mirrors for fallback - 'book_id': book_id, - 'title': title, - 'author': author, - 'isbn': isbn_val, - 'year': year_val, - } - urls_to_download.append(url_entry) - debug(f"[search-result] LibGen: '{title}'") - debug(f" ✓ Queued for download") - if mirrors: - debug(f" Mirrors available: {len(mirrors)}") - elif origin == 'debrid': - # Debrid results can use download-data - url = item.get('target') - if url: - urls_to_download.append(str(url)) - debug(f"[search-result] Debrid: '{title}'") - debug(f" ✓ Queued for download") - else: - # Regular fields for non-search results - url = item.get('url') or item.get('link') or item.get('href') or item.get('target') - else: - # Object attributes - origin = getattr(item, 'origin', None) - title = getattr(item, 'title', 'Item') - if origin in {'openlibrary', 'libgen', 'soulseek', 'debrid'}: - # Handle search provider results - if origin == 'openlibrary': - # OpenLibrary: First check if lendable/downloadable via Archive.org - # Only route to LibGen if NOT available on Archive.org - metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {} - isbn = metadata.get('isbn') or getattr(item, 'isbn', None) - olid = metadata.get('olid') or getattr(item, 'olid', None) - - debug(f"[search-result] OpenLibrary: '{title}'") - if isbn: - debug(f" ISBN: {isbn}") - - # Check if book is borrowable from ebook_access field or status - ebook_access = metadata.get('ebook_access') or getattr(item, 'ebook_access', '') - status_text = metadata.get('status') or getattr(item, 'status', '') - archive_id = metadata.get('archive_id') or getattr(item, 'archive_id', '') - - # Determine if borrowable using unified helper - is_borrowable = _is_openlibrary_downloadable(ebook_access, status_text) - - if is_borrowable: - # Book IS borrowable on Archive.org - debug(f" ✓ Available for borrowing on Archive.org") - debug(f" → Queued for auto-borrowing...") - # Queue borrow request as special dict object - ocaid = archive_id - if not ocaid and isbn: - try: - import requests - ol_url = f'https://openlibrary.org/isbn/{isbn}.json' - r = requests.get(ol_url, timeout=5) - if r.status_code == 200: - ol_data = r.json() - ocaid = ol_data.get('ocaid') - except Exception as e: - debug(f" ⚠ Could not fetch OCAID from OpenLibrary: {e}") - if ocaid: - urls_to_download.append({ - '__borrow_request__': True, - 'book_id': ocaid, - 'isbn': isbn, - 'title': title, - 'olid': olid or getattr(item, 'openlibrary_id', '') - }) - else: - # OCAID not found - book claims borrowable but not on Archive.org - # Fall back to LibGen search instead - debug(f" ⚠ No Archive.org ID found - attempting LibGen instead...") - if isbn: - try: - from helper.search_provider import get_provider - libgen_provider = get_provider("libgen", config) - if libgen_provider: - libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1) - if libgen_results: - libgen_result = libgen_results[0] - url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None) - if url: - urls_to_download.append(url) - debug(f" ✓ Found on LibGen instead") - else: - debug(f" ⚠ Not found on LibGen") - else: - debug(f" ⚠ Not found on LibGen") - else: - debug(f" ⚠ LibGen provider not available") - except Exception as e: - debug(f" ✗ Error searching LibGen: {e}") - else: - debug(f" ⚠ ISBN not available for LibGen fallback") - else: - # Book is NOT borrowable - route to LibGen - if isbn: - debug(f" ⚠ Not available on Archive.org - attempting LibGen...") - try: - from helper.search_provider import get_provider - libgen_provider = get_provider("libgen", config) - if libgen_provider: - libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1) - if libgen_results: - libgen_result = libgen_results[0] - url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None) - if url: - urls_to_download.append(url) - debug(f" ✓ Found on LibGen") - else: - debug(f" ⚠ Not found on LibGen") - else: - debug(f" ⚠ Not found on LibGen") - debug(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data") - else: - debug(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data") - except Exception as e: - debug(f" ⚠ Could not search LibGen: {e}") - debug(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data") - else: - debug(f" ⚠ ISBN not available") - debug(f" ▶ Visit: {getattr(item, 'target', 'https://openlibrary.org')}") - debug(f" ▶ Or find ISBN and use: search-file -provider libgen 'isbn:\"\"'") - elif origin == 'soulseek': - # Handle Soulseek downloads using the provider - metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {} - username = metadata.get('username') - filename = metadata.get('filename') - size = getattr(item, 'size_bytes', 0) or 0 - - if username and filename: - try: - import asyncio - from helper.search_provider import SoulSeekProvider - provider = SoulSeekProvider(config) - debug(f"[search-result] Soulseek: '{title}'") - debug(f" ▶ Downloading from {username}...") - - if db: - db.append_worker_stdout(worker_id, f"Downloading from Soulseek: {title} (from {username})") - - # Get temp directory from config - temp_dir = config.get('temp') - if temp_dir: - temp_dir = str(Path(temp_dir).expanduser()) - - # Call async download_file with asyncio.run() - success = asyncio.run(provider.download_file( - username=username, - filename=filename, - file_size=size, - target_dir=temp_dir - )) - - if success: - downloaded_file = Path(provider.DOWNLOAD_DIR) / Path(filename).name - if downloaded_file.exists(): - debug(f" ✓ Downloaded: {downloaded_file.name}") - files_downloaded_directly += 1 - if db: - db.append_worker_stdout(worker_id, f"✓ Downloaded: {downloaded_file.name}") - if pipeline_context._PIPE_ACTIVE: - # Create proper PipeObject result - result_dict = create_pipe_object_result( - source='soulseek', - identifier=filename, - file_path=str(downloaded_file), - cmdlet_name='download-data', - title=title, - target=str(downloaded_file), # Explicit target for add-file - extra={ - "metadata": metadata, - "origin": "soulseek" - } - ) - pipeline_context.emit(result_dict) - else: - debug(f" ✗ Download failed (peer may be offline)") - if db: - db.append_worker_stdout(worker_id, f"✗ Download failed for {title}") - debug(f" ▶ Try another result: search-file -provider soulseek \"...\" | @2 | download-data") - except Exception as e: - debug(f" ✗ Download error: {e}") - if db: - db.append_worker_stdout(worker_id, f"✗ Error: {e}") - debug(f" ▶ Alternative: search-soulseek -download \"{title}\" -storage ") - else: - debug(f"[search-result] Soulseek: '{title}'") - debug(f" ⚠ Missing download info (username/filename)") - if db: - db.append_worker_stdout(worker_id, f"⚠ Missing download info for {title}") - elif origin == 'libgen': - # LibGen results with mirrors dict for fallback - url = getattr(item, 'target', None) - # Extract mirrors and book_id from full_metadata - metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {} - mirrors = metadata.get('mirrors', {}) - book_id = metadata.get('book_id', '') - author = metadata.get('author') - isbn_val = metadata.get('isbn') - year_val = metadata.get('year') - - if url: - url_entry = { - 'url': str(url), - 'mirrors': mirrors, # Alternative mirrors for fallback - 'book_id': book_id, - 'title': title, - 'author': author, - 'isbn': isbn_val, - 'year': year_val, - } - urls_to_download.append(url_entry) - else: - urls_to_download.append(url) if url else None - elif origin == 'debrid': - url = getattr(item, 'target', None) - else: - url = getattr(item, 'url', None) or getattr(item, 'link', None) or getattr(item, 'href', None) or getattr(item, 'target', None) - - if url: - urls_to_download.append(str(url)) - - if not urls_to_download and files_downloaded_directly == 0: - debug(f"No downloadable URLs found") - return 1 - - # Deduplicate URLs while preserving order - unique_urls = [] - seen_keys = set() - - for u in urls_to_download: - key = None - if isinstance(u, dict): - key = u.get('url') or u.get('link') or u.get('target') or u.get('source_url') - if not key: - key = str(u) - else: - key = str(u) - - if key and key not in seen_keys: - seen_keys.add(key) - unique_urls.append(u) - - urls_to_download = unique_urls - - debug(f"Processing {len(urls_to_download)} URL(s)") - for i, u in enumerate(urls_to_download, 1): - if isinstance(u, dict): - debug(f" [{i}] Format: {u.get('format_id', '?')} from {u.get('source_url', '?')[:60]}...") - else: - debug(f" [{i}] URL: {str(u)[:60]}...") - - # ======================================================================== - # RESOLVE OUTPUT DIRECTORY - # ======================================================================== - - final_output_dir = None - - # Priority 1: --storage flag - if storage_location: - try: - # For 'local' storage, check config first before using default - if storage_location.lower() == 'local': - from config import get_local_storage_path - try: - configured_path = get_local_storage_path(config) - if configured_path: - final_output_dir = configured_path - debug(f"Using configured local storage path: {final_output_dir}") - else: - final_output_dir = SharedArgs.resolve_storage(storage_location) - debug(f"Using default storage location: {storage_location} → {final_output_dir}") - except Exception as exc: - log(f"⚠️ Error reading local storage config: {exc}", file=sys.stderr) - final_output_dir = SharedArgs.resolve_storage(storage_location) - debug(f"Falling back to default storage location: {storage_location} → {final_output_dir}") - else: - final_output_dir = SharedArgs.resolve_storage(storage_location) - debug(f"Using storage location: {storage_location} → {final_output_dir}") - except ValueError as e: - log(str(e), file=sys.stderr) - return 1 - - # Priority 2: Config resolver - if final_output_dir is None and resolve_output_dir is not None: - try: - final_output_dir = resolve_output_dir(config) - debug(f"Using config resolver: {final_output_dir}") - except Exception: - pass - - # Priority 4: Config outfile - if final_output_dir is None and config and config.get("outfile"): - try: - final_output_dir = Path(config["outfile"]).expanduser() - debug(f"Using config outfile: {final_output_dir}") - except Exception: - pass - - # Priority 5: Default (home/Videos) - if final_output_dir is None: - final_output_dir = Path.home() / "Videos" - debug(f"Using default directory: {final_output_dir}") - - # Ensure directory exists - try: - final_output_dir.mkdir(parents=True, exist_ok=True) - except Exception as e: - log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr) - return 1 - - # ======================================================================== - # DOWNLOAD EACH URL - # ======================================================================== - - downloaded_files = [] - playlists_displayed = 0 - formats_displayed = False # NEW: Track if we showed formats - exit_code = 0 - - for url in urls_to_download: - try: - selected_playlist_entries: list[Dict[str, Any]] = [] - playlist_existing_paths: set[str] = set() - - # ====== HANDLE FORMAT SELECTION FROM PIPED RESULT ====== - # If url is a dict with format_id and source_url, extract them and override format_selector - current_format_selector = format_selector - actual_url = url - if isinstance(url, dict) and url.get('format_id') and url.get('source_url'): - debug(f"🎬 Format selected: {url.get('format_id')}") - format_id = url.get('format_id') - current_format_selector = format_id - - # If it's a video-only format (has vcodec but no acodec), add bestaudio - # BUT: Skip this for -section downloads because combining formats causes re-encoding - # For -section, use formats that already have audio (muxed) to avoid FFmpeg re-encoding - vcodec = url.get('vcodec', '') - acodec = url.get('acodec', '') - if vcodec and vcodec != "none" and (not acodec or acodec == "none"): - if not clip_range and not section_ranges: - # Only add bestaudio if NOT doing -section or -clip - # For section downloads, we need muxed formats to avoid re-encoding - current_format_selector = f"{format_id}+bestaudio" - debug(f" ℹ️ Video-only format detected, automatically adding bestaudio") - else: - debug(f" ℹ️ Section/clip download: using video-only format as-is (no bestaudio to avoid re-encoding)") - - actual_url = url.get('source_url') - url = actual_url # Use the actual URL for further processing - - # ====== AUTO-BORROW MODE - INTERCEPT SPECIAL BORROW REQUEST DICTS ====== - if isinstance(url, dict) and url.get('__borrow_request__'): - try: - from helper.archive_client import credential_openlibrary, loan, get_book_infos, download - import tempfile - import shutil - - book_id = url.get('book_id') - if not book_id: - debug(f" ✗ Missing book ID for borrowing") - exit_code = 1 - continue - - title_val = url.get('title', 'Unknown Book') - book_id_str = str(book_id) - - debug(f"[auto-borrow] Starting borrow for: {title_val}") - debug(f" Book ID: {book_id_str}") - - # Get Archive.org credentials - email, password = credential_openlibrary(config) - if not email or not password: - log(f" ✗ Archive.org credentials not configured", file=sys.stderr) - log(f" ▶ Set ARCHIVE_EMAIL and ARCHIVE_PASSWORD environment variables", file=sys.stderr) - exit_code = 1 - continue - - # Attempt to borrow and download - try: - debug(f" → Logging into Archive.org...") - from helper.archive_client import login - import requests - try: - session = login(email, password) - except requests.exceptions.Timeout: - debug(f" ✗ Timeout logging into Archive.org (server not responding)") - exit_code = 1 - continue - except requests.exceptions.RequestException as e: - debug(f" ✗ Error connecting to Archive.org: {e}") - exit_code = 1 - continue - - debug(f" → Borrowing book...") - try: - session = loan(session, book_id_str, verbose=True) - except requests.exceptions.Timeout: - debug(f" ✗ Timeout while borrowing (server not responding)") - exit_code = 1 - continue - except requests.exceptions.RequestException as e: - debug(f" ✗ Error while borrowing: {e}") - exit_code = 1 - continue - except Exception as e: - # Check for BookNotAvailableError (imported dynamically or by name) - if type(e).__name__ == 'BookNotAvailableError': - debug(f" ⚠ Book is waitlisted/unavailable on Archive.org") - - # Fallback to LibGen if ISBN is available - isbn = url.get('isbn') - if isbn: - debug(f" ▶ Falling back to LibGen search for ISBN: {isbn}") - from helper.search_provider import LibGenProvider - - provider = LibGenProvider(config) - # Search specifically by ISBN - results = provider.search(f"isbn:{isbn}", limit=1) - - if results: - debug(f" ✓ Found {len(results)} result(s) on LibGen") - # Use the first result - libgen_result = results[0] - - # Construct a new URL entry for the main loop to process - # We can't easily inject into the loop, so we'll process it here - # LibGen results from provider have 'target' as mirror URL or libgen:ID - - target = libgen_result.target - debug(f" → Downloading from LibGen: {libgen_result.title}") - - # We need to use the LibGen download logic. - # The easiest way is to call the UnifiedBookDownloader directly or - # delegate to the 'libgen' origin handler if we can. - # But we are inside the loop. - - # Let's use UnifiedBookDownloader directly to download to final_output_dir - from helper.unified_book_downloader import UnifiedBookDownloader - downloader = UnifiedBookDownloader(config) - - # The target might be a mirror URL or libgen:ID - # UnifiedBookDownloader.download_book expects a book dict or similar? - # Actually, let's look at how 'libgen' origin is handled in the main loop. - # It uses urls_to_download.append(url_entry). - - # We can just process this result right here. - # The provider result has full_metadata which is the book dict. - book_data = libgen_result.full_metadata - - # Download the book - # We need to find a working mirror - mirrors = book_data.get('mirrors', {}) - download_url = book_data.get('mirror_url') - - if not download_url and mirrors: - # Pick first mirror - download_url = next(iter(mirrors.values())) - - if download_url: - debug(f" → Mirror: {download_url}") - # Use helper.download.download_media or similar? - # UnifiedBookDownloader has download_book(book, output_dir) - - # Reconstruct book dict for downloader - # It expects: title, author, year, extension, mirrors, etc. - # book_data should have most of it. - - filepath = downloader.download_book(book_data, final_output_dir) - if filepath: - debug(f" ✓ Successfully downloaded from LibGen: {filepath}") - downloaded_files.append(str(filepath)) - - # Emit result - file_hash = _compute_file_hash(filepath) - emit_tags = ['book', 'libgen'] - if isbn: emit_tags.append(f'isbn:{isbn}') - - pipe_obj = create_pipe_object_result( - source='libgen', - identifier=book_data.get('md5', 'unknown'), - file_path=str(filepath), - cmdlet_name='download-data', - title=libgen_result.title, - file_hash=file_hash, - tags=emit_tags, - source_url=download_url - ) - pipeline_context.emit(pipe_obj) - exit_code = 0 - continue # Success! - else: - debug(f" ✗ Failed to download from LibGen") - else: - debug(f" ✗ No download URL found in LibGen result") - else: - debug(f" ✗ No results found on LibGen for ISBN: {isbn}") - else: - debug(f" ⚠ No ISBN available for LibGen fallback") - - # If fallback failed or wasn't possible, abort - debug(f" ✗ Unable to borrow from Archive.org and LibGen fallback failed.") - exit_code = 1 - continue - else: - # Re-raise other exceptions - raise e - - debug(f" → Extracting page information...") - # Try both URL formats - book_urls = [ - f"https://archive.org/borrow/{book_id_str}", - f"https://archive.org/details/{book_id_str}" - ] - - title = None - links = None - metadata = None - last_error = None - for book_url in book_urls: - try: - title, links, metadata = get_book_infos(session, book_url) - if title and links: - debug(f" → Found {len(links)} pages") - break - except requests.exceptions.Timeout: - last_error = "Timeout while extracting pages" - debug(f" ⚠ Timeout while extracting from {book_url}") - continue - except Exception as e: - last_error = str(e) - debug(f" ⚠ Failed to extract from {book_url}: {e}") - continue - - if not links: - debug(f" ✗ Could not extract book pages (Last error: {last_error})") - exit_code = 1 - continue - - # Download pages - debug(f" → Downloading {len(links)} pages...") - with tempfile.TemporaryDirectory() as temp_dir: - # download(session, n_threads, directory, links, scale, book_id) - images = download( - session, - n_threads=4, - directory=temp_dir, - links=links, - scale=2, - book_id=str(book_id) - ) - - if not images: - debug(f" ✗ No pages downloaded") - exit_code = 1 - continue - - debug(f" ✓ Downloaded {len(images)} pages") - - # Try to merge into PDF - try: - import img2pdf - debug(f" → Merging pages into PDF...") - - # Use title from result item if available, otherwise fallback to extracted title - filename_title = title_val if title_val and title_val != 'Unknown Book' else (title if title else f"book_{book_id_str}") - # Allow underscores and spaces - filename = "".join(c for c in filename_title if c.isalnum() or c in (' ', '.', '-', '_'))[:100] - output_path = Path(final_output_dir) / f"{filename}.pdf" - - # Make unique filename if needed - i = 1 - while output_path.exists(): - output_path = Path(final_output_dir) / f"{filename}({i}).pdf" - i += 1 - - pdf_content = img2pdf.convert(images) - if pdf_content: - with open(output_path, 'wb') as f: - f.write(pdf_content) - - debug(f" ✓ Successfully borrowed and saved to: {output_path}") - downloaded_files.append(str(output_path)) - - # Emit result for downstream cmdlets - file_hash = _compute_file_hash(output_path) - # Build tags including ISBN if available - emit_tags = ['book', 'borrowed', 'pdf'] - if title_val and title_val != 'Unknown Book': - emit_tags.append(f'title:{title_val}') - isbn_tag = url.get('isbn') - if isbn_tag: - emit_tags.append(f'isbn:{isbn_tag}') - olid_tag = url.get('olid') - if olid_tag: - emit_tags.append(f'olid:{olid_tag}') - - # Fetch OpenLibrary metadata tags - ol_tags = fetch_openlibrary_metadata_tags(isbn=isbn_tag, olid=olid_tag) - emit_tags.extend(ol_tags) - - pipe_obj = create_pipe_object_result( - source='archive.org', - identifier=book_id_str, - file_path=str(output_path), - cmdlet_name='download-data', - title=title_val, - file_hash=file_hash, - tags=emit_tags, - source_url=url.get('source_url', f'archive.org/borrow/{book_id_str}') - ) - pipeline_context.emit(pipe_obj) - exit_code = 0 - except ImportError: - debug(f" ⚠ img2pdf not available - saving pages as collection") - # Just copy images to output dir - filename = title if title else f"book_{book_id_str}" - filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100] - output_dir = Path(final_output_dir) / filename - i = 1 - while output_dir.exists(): - output_dir = Path(final_output_dir) / f"{filename}({i})" - i += 1 - - shutil.copytree(temp_dir, str(output_dir)) - debug(f" ✓ Successfully borrowed and saved to: {output_dir}") - downloaded_files.append(str(output_dir)) - - # Emit result for downstream cmdlets - # Build tags including ISBN if available - emit_tags = ['book', 'borrowed', 'pages'] - isbn_tag = url.get('isbn') - if isbn_tag: - emit_tags.append(f'isbn:{isbn_tag}') - olid_tag = url.get('olid') - if olid_tag: - emit_tags.append(f'olid:{olid_tag}') - - # Fetch OpenLibrary metadata tags - ol_tags = fetch_openlibrary_metadata_tags(isbn=isbn_tag, olid=olid_tag) - emit_tags.extend(ol_tags) - - pipe_obj = create_pipe_object_result( - source='archive.org', - identifier=book_id_str, - file_path=str(output_dir), - cmdlet_name='download-data', - title=title_val, - tags=emit_tags, - source_url=url.get('source_url', f'archive.org/borrow/{book_id_str}') - ) - pipeline_context.emit(pipe_obj) - exit_code = 0 - - except Exception as e: - debug(f" ✗ Borrow/download failed: {e}") - import traceback - traceback.print_exc() - exit_code = 1 - - continue # Skip normal URL handling - - except ImportError as e: - debug(f" ✗ Archive.org tools not available: {e}") - exit_code = 1 - continue - except Exception as e: - debug(f" ✗ Auto-borrow error: {e}") - import traceback - traceback.print_exc() - exit_code = 1 - continue - - - # ====== LIBGEN MIRROR FALLBACK MODE ====== - # Handle libgen results with mirrors dict for fallback on failure - if isinstance(url, dict) and 'mirrors' in url: - try: - primary_url = url.get('url') - mirrors_dict = url.get('mirrors', {}) - book_id = url.get('book_id', '') - title_val = url.get('title') - author_val = url.get('author') - isbn_val = url.get('isbn') - year_val = url.get('year') - - if not primary_url: - debug(f"Skipping libgen entry: no primary URL") - exit_code = 1 - continue - - # Build list of mirrors to try: primary first, then alternatives - mirrors_to_try = [primary_url] - mirrors_to_try.extend(mirrors_dict.values()) - - # Remove duplicates while preserving order - mirrors_to_try = list(dict.fromkeys(mirrors_to_try)) - - debug(f"🔄 LibGen download with mirror fallback (book_id: {book_id})") - debug(f" Primary: {primary_url[:80]}...") - - if len(mirrors_to_try) > 1: - debug(f" {len(mirrors_to_try) - 1} alternative mirror(s) available") - - # Resolve cookies path - final_cookies_path_libgen = None - if cookies_path: - if resolve_cookies_path: - try: - final_cookies_path_libgen = resolve_cookies_path(config, Path(cookies_path)) - except Exception: - final_cookies_path_libgen = Path(cookies_path).expanduser() if cookies_path else None - else: - final_cookies_path_libgen = Path(cookies_path).expanduser() - - download_succeeded = False - last_error = None - successful_mirror = None - - # Try each mirror in sequence using libgen_service's native download - for mirror_idx, mirror_url in enumerate(mirrors_to_try, 1): - try: - if mirror_idx > 1: - debug(f" → Trying mirror #{mirror_idx}: {mirror_url[:80]}...") - - # Use libgen_service's download_from_mirror for proper libgen handling - from helper.libgen_service import download_from_mirror - - # Generate filename from book_id and title - safe_title = "".join(c for c in str(title or "book") if c.isalnum() or c in (' ', '.', '-'))[:100] - file_path = final_output_dir / f"{safe_title}_{book_id}.pdf" - - progress_bar = models.ProgressBar() - progress_start = time.time() - last_update = [progress_start] - progress_bytes = [0] - progress_total = [0] - - def _libgen_progress(downloaded: int, total: int) -> None: - progress_bytes[0] = downloaded - progress_total[0] = total - now = time.time() - if total > 0 and now - last_update[0] >= 0.5: - percent = (downloaded / total) * 100 - elapsed = max(now - progress_start, 1e-6) - speed = downloaded / elapsed if elapsed > 0 else 0 - remaining = max(total - downloaded, 0) - eta = remaining / speed if speed > 0 else 0 - minutes, seconds = divmod(int(eta), 60) - hours, minutes = divmod(minutes, 60) - eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}" - speed_str = f"{progress_bar.format_bytes(speed)}/s" - progress_line = progress_bar.format_progress( - percent_str=f"{percent:.1f}%", - downloaded=downloaded, - total=total, - speed_str=speed_str, - eta_str=eta_str, - ) - debug(f" {progress_line}") - last_update[0] = now - - # Attempt download using libgen's native function - success, downloaded_path = download_from_mirror( - mirror_url=mirror_url, - output_path=file_path, - log_info=lambda msg: debug(f" {msg}"), - log_error=lambda msg: debug(f" ⚠ {msg}"), - progress_callback=_libgen_progress, - ) - - final_path = Path(downloaded_path) if downloaded_path else file_path - if success and final_path.exists(): - downloaded = progress_bytes[0] or final_path.stat().st_size - elapsed = time.time() - progress_start - avg_speed = downloaded / elapsed if elapsed > 0 else 0 - debug(f" ✓ Downloaded in {elapsed:.1f}s at {progress_bar.format_bytes(avg_speed)}/s") - debug(f" ✓ Downloaded successfully from mirror #{mirror_idx}") - successful_mirror = mirror_url - download_succeeded = True - - # Emit result for downstream cmdlets - file_hash = _compute_file_hash(final_path) - emit_tags = build_book_tags( - title=title_val or title, - author=author_val, - isbn=isbn_val, - year=year_val, - source='libgen', - extra=[f"libgen_id:{book_id}"] if book_id else None, - ) - - pipe_obj = create_pipe_object_result( - source='libgen', - identifier=book_id, - file_path=str(final_path), - cmdlet_name='download-data', - file_hash=file_hash, - tags=emit_tags, - source_url=successful_mirror - ) - pipeline_context.emit(pipe_obj) - downloaded_files.append(str(final_path)) - exit_code = 0 - break # Success, stop trying mirrors - - except Exception as e: - last_error = str(e) - if mirror_idx == 1: - debug(f" ⚠ Primary mirror failed: {e}") - else: - debug(f" ⚠ Mirror #{mirror_idx} failed: {e}") - - if not download_succeeded: - log(f" ✗ All mirrors failed. Last error: {last_error}", file=sys.stderr) - if "getaddrinfo failed" in str(last_error) or "NameResolutionError" in str(last_error) or "Failed to resolve" in str(last_error): - log(f" ⚠ Network issue detected: Cannot resolve LibGen mirror hostnames", file=sys.stderr) - log(f" ▶ Check your network connection or try with a VPN/proxy", file=sys.stderr) - exit_code = 1 - - continue # Skip to next URL - - except Exception as e: - debug(f" ✗ LibGen mirror fallback error: {e}") - import traceback - traceback.print_exc(file=sys.stderr) - exit_code = 1 - continue - - # Ensure URL is a string for normal handling - if not isinstance(url, str): - # Check if it's a playlist item marker - if isinstance(url, dict) and url.get('__playlist_url'): - playlist_url = url.get('__playlist_url') - item_num = url.get('__playlist_item', 1) - debug(f"📍 Handling selected playlist item #{item_num}") - # Convert to actual URL and set playlist_items to download only this item - url = playlist_url - playlist_items = str(item_num) - # Fall through to normal handling below - else: - debug(f"Skipping invalid URL entry: {url}") - continue - - debug(f"Probing URL: {url}") - - # ====== TORRENT MODE - INTERCEPT BEFORE NORMAL DOWNLOAD ====== - if torrent_mode or url.lower().startswith('magnet:'): - debug(f"🧲 Torrent/magnet mode - spawning background worker...") - - try: - # Get API key from config - from config import get_debrid_api_key - api_key = get_debrid_api_key(config) - - if not api_key: - log(f"✗ AllDebrid API key not found in config", file=sys.stderr) - exit_code = 1 - continue - - # Create a unique worker ID - worker_id = f"torrent_{uuid.uuid4().hex[:8]}" - - # Get worker manager if available from config - worker_manager = config.get('_worker_manager') - - # Create worker in manager if available - if worker_manager: - try: - worker_manager.track_worker( - worker_id, - worker_type="download_torrent", - title=f"Download: {url[:60]}...", - description=f"Torrent/magnet download via AllDebrid", - pipe=pipeline_context.get_current_command_text() - ) - debug(f"✓ Worker created (ID: {worker_id})") - except Exception as e: - debug(f"⚠ Failed to create worker: {e}") - worker_manager = None - - # Spawn background thread to handle the download - worker_thread = threading.Thread( - target=_download_torrent_worker, - args=( - worker_id, - url, - final_output_dir, - config, - api_key, - playlist_items, - audio_mode, - wait_timeout, - worker_manager, - ), - daemon=False, - name=f"TorrentWorker_{worker_id}" - ) - - worker_thread.start() - debug(f"✓ Background worker started (ID: {worker_id})") - - # Emit worker info so user can track it - worker_info = { - 'worker_id': worker_id, - 'worker_type': 'download_torrent', - 'source_url': url, - 'status': 'running', - 'message': 'Downloading in background...' - } - pipeline_context.emit(worker_info) - - continue - - except ImportError: - log(f"✗ AllDebrid client not available", file=sys.stderr) - exit_code = 1 - except Exception as e: - # Catches AllDebridError and other exceptions - log(f"✗ Failed to spawn torrent worker: {e}", file=sys.stderr) - import traceback - traceback.print_exc(file=sys.stderr) - exit_code = 1 - - continue # Skip to next URL - - # ====== NORMAL DOWNLOAD MODE (HTTP/HTTPS) ====== - - # First, probe the URL to detect playlists and get info - # For YouTube URLs, ignore playlists and only probe the single video - is_youtube_url = isinstance(url, str) and ('youtube.com' in url or 'youtu.be' in url) - probe_info = probe_url(url, no_playlist=is_youtube_url) - is_actual_playlist = False # Track if we have a real multi-item playlist - - if probe_info: - debug(f"✓ Probed: {probe_info.get('title', url)} ({probe_info.get('extractor', 'unknown')})") - - # If it's a playlist, show the result table and skip download for now - entries = probe_info.get("entries", []) - if entries and not playlist_items: - is_actual_playlist = True # We have a real playlist with multiple items - # Playlist detected but NO selection provided - # Always show table for user to select items - debug(f"📋 Found playlist with {len(entries)} items") - _show_playlist_table(url, probe_info) - debug(f"ℹ️ Playlist displayed. To select items, use @* or @1,3,5-8 syntax after piping results") - playlists_displayed += 1 - continue # Skip to next URL - don't download playlist without selection - elif entries and playlist_items: - is_actual_playlist = True # We have a real playlist with item selection - # Playlist detected WITH selection - will download below - # Expand wildcard if present - expanded_items = _expand_playlist_selection(playlist_items, len(entries)) - playlist_items = expanded_items - selected_playlist_entries = _select_playlist_entries(entries, playlist_items) - debug(f"📋 Found playlist with {len(entries)} items - downloading selected: {playlist_items}") - else: - debug(f"Single item: {probe_info.get('title', 'Unknown')}") - - # ====== FORMAT LISTING MODE ====== - if list_formats_mode and isinstance(url, str) and url.startswith(('http://', 'https://')): - debug(f"Fetching formats for: {url}") - from helper.download import list_formats - from result_table import ResultTable - - all_formats = list_formats(url, no_playlist=is_youtube_url, playlist_items=playlist_items) - if all_formats: - # Filter and sort formats for better user experience - formats = _filter_and_sort_formats(all_formats) - - # Create result table for format display - table = ResultTable(title=f"Available Formats - {probe_info.get('title', 'Unknown')}") - - for idx, fmt in enumerate(formats, start=1): - row = table.add_row() - row.add_column("Format ID", fmt.get("format_id", "")) - - # Build resolution/bitrate string - vcodec = fmt.get("vcodec", "") - acodec = fmt.get("acodec", "") - height = fmt.get("height") - tbr = fmt.get("tbr") - - if vcodec != "none" and acodec != "none": - # Video + audio - res_str = fmt.get("resolution", "") - elif acodec != "none" and vcodec == "none": - # Audio only - show bitrate - res_str = f"{tbr:.0f} kbps" if tbr else "audio" - else: - # Video only - res_str = fmt.get("resolution", "") - - row.add_column("Resolution", res_str) - - # Build codec string (merged vcodec/acodec) - codec_parts = [] - if vcodec and vcodec != "none": - codec_parts.append(f"v:{vcodec}") - if acodec and acodec != "none": - codec_parts.append(f"a:{acodec}") - codec_str = " | ".join(codec_parts) if codec_parts else "unknown" - row.add_column("Codec", codec_str) - - if fmt.get("filesize"): - size_mb = fmt["filesize"] / (1024 * 1024) - row.add_column("Size", f"{size_mb:.1f} MB") - - # Enable @N expansion to rerun download-data with -item idx - row.set_selection_args(["-item", str(idx)]) - - # Set source command for @N expansion - table.set_source_command("download-data", [url]) - - # Display table - log(str(table), flush=True) - formats_displayed = True - - # Store table for @N expansion so CLI can reconstruct commands - pipeline_context.set_current_stage_table(table) - pipeline_context.set_last_result_table_overlay(table, formats) - debug("Use @N to pick a format; pipeline paused until selection") - else: - log(f"✗ No formats available for this URL", file=sys.stderr) - - # Stop pipeline here; selection via @N will re-run download-data with -item - return 0 - - # ====== AUTO-DETECT MULTIPLE FORMATS ====== - # Check if multiple formats exist and handle based on -item flag - if (not current_format_selector and not list_formats_mode and - isinstance(url, str) and url.startswith(('http://', 'https://'))): - # Check if this is a yt-dlp supported URL (YouTube, Vimeo, etc.) - from helper.download import list_formats - from result_table import ResultTable - - if is_url_supported_by_ytdlp(url): - debug(f"Checking available formats for: {url}") - all_formats = list_formats(url, no_playlist=is_youtube_url, playlist_items=playlist_items) - - if all_formats: - # Filter and sort formats for better user experience - formats = _filter_and_sort_formats(all_formats) - - # Handle -item selection for formats (single video) - if playlist_items and playlist_items.isdigit() and not is_actual_playlist: - idx = int(playlist_items) - if 0 < idx <= len(formats): - fmt = formats[idx-1] - current_format_selector = fmt.get("format_id") - - # If video-only format is selected, append +bestaudio to merge with best audio - # BUT: Skip this for -section downloads because combining formats causes re-encoding - vcodec = fmt.get("vcodec") - acodec = fmt.get("acodec") - if vcodec and vcodec != "none" and (not acodec or acodec == "none"): - if not clip_range and not section_ranges: - # Only add bestaudio if NOT doing -section or -clip - current_format_selector = f"{current_format_selector}+bestaudio" - debug(f"Video-only format selected, appending bestaudio: {current_format_selector}") - else: - debug(f"Section/clip download: using video-only format as-is (no bestaudio to avoid re-encoding)") - - debug(f"Selected format #{idx}: {current_format_selector}") - playlist_items = None # Clear so it doesn't affect download options - else: - log(f"Invalid format index: {idx}", file=sys.stderr) - - elif len(formats) > 1: - # Multiple formats available - debug(f"📊 Found {len(formats)} available formats for: {probe_info.get('title', 'Unknown')}") - - # Always show table for format selection via @N syntax - # Show table and wait for @N selection - table = ResultTable(title=f"Available Formats - {probe_info.get('title', 'Unknown')}") - - for fmt in formats: - row = table.add_row() - row.add_column("Format ID", fmt.get("format_id", "")) - - # Build resolution/bitrate string - vcodec = fmt.get("vcodec", "") - acodec = fmt.get("acodec", "") - height = fmt.get("height") - tbr = fmt.get("tbr") - - if vcodec != "none" and acodec != "none": - # Video + audio - res_str = fmt.get("resolution", "") - elif acodec != "none" and vcodec == "none": - # Audio only - show bitrate - res_str = f"{tbr:.0f} kbps" if tbr else "audio" - else: - # Video only - res_str = fmt.get("resolution", "") - - row.add_column("Resolution", res_str) - - # Build codec string (merged vcodec/acodec) - codec_parts = [] - if vcodec and vcodec != "none": - codec_parts.append(f"v:{vcodec}") - if acodec and acodec != "none": - codec_parts.append(f"a:{acodec}") - codec_str = " | ".join(codec_parts) if codec_parts else "unknown" - row.add_column("Codec", codec_str) - - if fmt.get("filesize"): - size_mb = fmt["filesize"] / (1024 * 1024) - row.add_column("Size", f"{size_mb:.1f} MB") - - # Set source command for @N expansion - table.set_source_command("download-data", [url]) - - # Set row selection args so @N expands to "download-data URL -item N" - for i in range(len(formats)): - table.set_row_selection_args(i, ["-item", str(i + 1)]) - - # Display table - log(str(table), flush=True) - debug(f"💡 Use @N syntax to select a format and download (e.g., @1)") - - # Store table for @N expansion so CLI can reconstruct commands - pipeline_context.set_current_stage_table(table) - pipeline_context.set_last_result_table_overlay(table, formats) - - formats_displayed = True # Mark that we displayed formats - return 0 # Pause pipeline; user must select format via @N - - debug(f"Downloading: {url}") - - # Special handling for LibGen URLs - if "libgen" in url or "library.lol" in url: - debug(f"🔄 Detected LibGen URL, using specialized downloader: {url}") - try: - from helper.libgen_service import download_from_mirror, search_libgen - - # If it's a search/details page, try to find the download link - # e.g. https://libgen.li/series.php?id=577851 - # We can try to extract the ID and search for it, or just try to download if it's a mirror - - # Extract ID if possible, BUT skip for series/edition pages which are handled by download_from_mirror - libgen_id = "" - results = [] - - if "series.php" not in url and "edition.php" not in url: - match = re.search(r"id=(\d+)", url) - if match: - libgen_id = match.group(1) - debug(f" Extracted LibGen ID: {libgen_id}") - - # Search by ID to get fresh mirror links - results = search_libgen(libgen_id, limit=1) - if results: - # Use the mirror URL from the result - mirror_url = results[0].get("mirror_url") - if mirror_url: - debug(f" Resolved to mirror URL: {mirror_url}") - url = mirror_url - - # Attempt download with specialized function - # We need a filename. LibGen doesn't always give one easily in the URL. - # download_from_mirror expects a full path. - # We'll try to guess a filename or use a temp one and rename later? - # Actually download_from_mirror writes to output_path. - - # Let's try to get metadata to make a good filename - filename = "libgen_download.bin" - title_from_results = None - author_from_results = None - year_from_results = None - if libgen_id and results: - title_from_results = results[0].get("title") - author_from_results = results[0].get("author") - year_from_results = results[0].get("year") - ext = results[0].get("extension", "pdf") - # Sanitize filename - safe_title = "".join(c for c in (title_from_results or "book") if c.isalnum() or c in (' ', '-', '_')).strip() - filename = f"{safe_title}.{ext}" - elif "series.php" in url: - filename = f"series_{re.search(r'id=(\d+)', url).group(1) if re.search(r'id=(\d+)', url) else 'unknown'}.pdf" - - output_path = final_output_dir / filename - - success, downloaded_path = download_from_mirror( - url, - output_path, - log_info=debug, - log_error=log, - ) - final_file = Path(downloaded_path) if downloaded_path else output_path - if success and final_file.exists(): - debug(f"✓ LibGen download successful: {final_file}") - - # Create a result object - info = { - "id": libgen_id or "libgen", - "title": filename, - "webpage_url": url, - "ext": final_file.suffix.lstrip("."), - } - - emit_tags = build_book_tags( - title=title_from_results or filename, - author=author_from_results, - year=year_from_results, - source="libgen", - extra=[f"libgen_id:{libgen_id}"] if libgen_id else None, - ) - file_hash = _compute_file_hash(final_file) - - # Emit result - pipeline_context.emit(create_pipe_object_result( - source="libgen", - identifier=libgen_id or "libgen", - file_path=str(final_file), - cmdlet_name="download-data", - title=filename, - file_hash=file_hash, - tags=emit_tags, - extra=info - )) - downloaded_files.append(str(final_file)) - continue - else: - debug("⚠ LibGen specialized download failed, falling back to generic downloader...") - except Exception as e: - debug(f"⚠ LibGen specialized download error: {e}") - # Fall through to generic downloader - - # Resolve cookies path if specified - final_cookies_path = None - if cookies_path: - if resolve_cookies_path: - try: - final_cookies_path = resolve_cookies_path(config, Path(cookies_path)) - except Exception: - final_cookies_path = Path(cookies_path).expanduser() if cookies_path else None - else: - final_cookies_path = Path(cookies_path).expanduser() - - # Create download options - use correct parameter names - # Mode is "audio" or "video", required field - mode = "audio" if audio_mode else "video" - - # Detect YouTube URLs and set no_playlist to download only the single video - is_youtube_url = isinstance(url, str) and ('youtube.com' in url or 'youtu.be' in url) - - # Determine clip_sections to pass to yt-dlp - # Sections take precedence over clip if both are specified - # Sections are for yt-dlp download-sections (merge multiple clips at source) - # Clip is for post-download extraction - clip_sections_str = None - if section_ranges: - # Check if this is a yt-dlp URL - if is_url_supported_by_ytdlp(url): - # Convert section ranges to yt-dlp format: "start1-end1,start2-end2" - # Use * prefix to indicate download_sections (yt-dlp convention in some contexts) - # But here we just pass the string and let helper/download.py parse it - clip_sections_str = ",".join(f"{start}-{end}" for start, end in section_ranges) - debug(f"Using yt-dlp sections: {clip_sections_str}") - else: - log(f"Warning: -section only works with yt-dlp supported URLs. Use -clip for {url}", file=sys.stderr) - elif clip_range: - # For -clip, we use the same field but it's handled differently in helper/download.py - # Wait, helper/download.py treats clip_sections as download_sections for yt-dlp - # So -clip should also work as download_sections if it's a yt-dlp URL? - # Currently -clip is just one range. - clip_sections_str = f"{clip_range[0]}-{clip_range[1]}" - - download_opts = DownloadOptions( - url=url, - mode=mode, - output_dir=final_output_dir, - cookies_path=final_cookies_path, - ytdl_format=current_format_selector, # Use per-URL format override if available - clip_sections=clip_sections_str, - playlist_items=playlist_items, - no_playlist=is_youtube_url, # For YouTube, ignore playlist URLs and download single video - ) - - # For playlist downloads, capture existing files BEFORE download - if playlist_items and selected_playlist_entries: - _, playlist_existing_paths = _snapshot_playlist_paths(selected_playlist_entries, final_output_dir) - - # Call download_media from helper - no show_progress param - result_data = download_media(download_opts) - - if result_data and result_data.path: - file_path = result_data.path - - if file_path.exists(): - # Check if we have multiple section files to emit - if result_data.paths: - # Section download - emit each section file separately for merge-file - debug(f"📋 Section download: emitting {len(result_data.paths)} file(s) to merge-file") - for section_file in result_data.paths: - if section_file.exists(): - file_hash = _compute_file_hash(section_file) - tags = result_data.tags if result_data.tags else [] - - pipe_obj = create_pipe_object_result( - source='download', - identifier=section_file.stem, - file_path=str(section_file), - cmdlet_name='download-data', - title=section_file.name, - file_hash=file_hash, - is_temp=False, - extra={ - 'url': url, - 'tags': tags, - 'audio_mode': audio_mode, - 'format': format_selector, - 'from_sections': True, - } - ) - - downloaded_files.append(section_file) - pipeline_context.emit(pipe_obj) - # Check if this was a playlist download (is_actual_playlist tracks if we have a multi-item playlist) - elif is_actual_playlist: - if not selected_playlist_entries: - debug("⚠ Playlist metadata unavailable; cannot emit selected items for this stage.") - exit_code = 1 - continue - - matched_after, _ = _snapshot_playlist_paths(selected_playlist_entries, final_output_dir) - if not matched_after: - debug("⚠ No playlist files found for the selected items after download.") - exit_code = 1 - continue - - new_playlist_files: list[Path] = [] - for playlist_file in matched_after: - try: - path_key = str(playlist_file.resolve()) - except OSError: - path_key = str(playlist_file) - if path_key not in playlist_existing_paths: - new_playlist_files.append(playlist_file) - - emit_targets = new_playlist_files if new_playlist_files else matched_after - if new_playlist_files: - debug(f"📋 Playlist download completed: {len(new_playlist_files)} new file(s)") - else: - debug(f"📁 Reusing {len(emit_targets)} cached playlist file(s)") - - for playlist_file in emit_targets: - file_hash = _compute_file_hash(playlist_file) - - tags = [] - if extract_ytdlp_tags and result_data.tags: - tags = result_data.tags - - pipe_obj = create_pipe_object_result( - source='download', - identifier=playlist_file.stem, - file_path=str(playlist_file), - cmdlet_name='download-data', - title=playlist_file.name, - file_hash=file_hash, - is_temp=False, - extra={ - 'url': url, - 'tags': tags, - 'audio_mode': audio_mode, - 'format': format_selector, - 'from_playlist': True, - }, - ) - - downloaded_files.append(playlist_file) - pipeline_context.emit(pipe_obj) - else: - # Single file download - file_hash = result_data.hash_value or _compute_file_hash(file_path) - tags = result_data.tags if result_data.tags else [] - - pipe_obj = create_pipe_object_result( - source='download', - identifier=file_path.stem, - file_path=str(file_path), - cmdlet_name='download-data', - title=file_path.name, - file_hash=file_hash, - is_temp=False, - extra={ - 'url': url, - 'tags': tags, - 'audio_mode': audio_mode, - 'format': format_selector, - 'clipped': clip_range is not None, - } - ) - - downloaded_files.append(file_path) - pipeline_context.emit(pipe_obj) - - debug(f"✓ Downloaded: {file_path}") - else: - log(f"Download returned no result for {url}", file=sys.stderr) - exit_code = 1 - - except Exception as e: - log(f"Error downloading {url}: {e}", file=sys.stderr) - import traceback - traceback.print_exc(file=sys.stderr) - exit_code = 1 - - # Success if we downloaded files or displayed playlists/formats - if downloaded_files or files_downloaded_directly > 0: - total_files = len(downloaded_files) + files_downloaded_directly - debug(f"✓ Successfully downloaded {total_files} file(s)") - - stage_ctx = pipeline_context.get_stage_context() - should_display_results = stage_ctx is None or stage_ctx.is_last_stage - - if downloaded_files and should_display_results: - try: - from cmdlets import search_file as search_cmdlet - except Exception: - search_cmdlet = None - - if search_cmdlet: - seen_hashes: set[str] = set() - for file_entry in downloaded_files: - path_obj = Path(file_entry) if not isinstance(file_entry, Path) else file_entry - if not path_obj.is_file(): - continue - file_hash = _compute_file_hash(path_obj) - if file_hash and file_hash not in seen_hashes: - seen_hashes.add(file_hash) - search_cmdlet._run(None, [f"hash:{file_hash}"], config) - else: - debug("search-file not available; skipping post-download display") - elif downloaded_files: - debug("Skipping search-file display because downstream pipeline is present") - - if db: - db.update_worker_status(worker_id, 'completed') - return 0 - - if playlists_displayed: - debug(f"✓ Displayed {playlists_displayed} playlist(s) for selection") - if db: - db.update_worker_status(worker_id, 'completed') - db.close() - return 0 # Success - playlists shown - - if formats_displayed: - debug(f"✓ Format selection table displayed - use @N to select and download") - if db: - db.update_worker_status(worker_id, 'completed') - db.close() - return 0 # Success - formats shown - - log(f"No files were downloaded or playlists displayed", file=sys.stderr) - if db: - db.update_worker_status(worker_id, 'completed') - db.close() - return 1 - - - -CMDLET = Cmdlet( - name="download-data", - exec=_run, - summary="Download data from URLs with playlist/clip support using yt-dlp", - usage="download-data [options] or search-file | download-data [options]", - aliases=["download", "dl"], - args=[ - CmdletArg( - name="url", - type="string", - required=False, - description="URL to download (HTTP/HTTPS or file with URL list)", - variadic=True - ), - CmdletArg( - name="-url", - type="string", - description="URL to download (alias for positional argument)", - variadic=True - ), - CmdletArg( - name="list-formats", - type="flag", - description="List available formats without downloading" - ), - CmdletArg( - name="audio", - type="flag", - alias="a", - description="Download audio only (extract from video)" - ), - CmdletArg( - name="video", - type="flag", - alias="v", - description="Download video (default if not specified)" - ), - CmdletArg( - name="format", - type="string", - alias="fmt", - description="Explicit yt-dlp format selector (e.g., 'bestvideo+bestaudio')" - ), - CmdletArg( - name="clip", - type="string", - description="Extract time range: MM:SS-MM:SS (e.g., 34:03-35:08) or seconds" - ), - CmdletArg( - name="section", - type="string", - description="Download sections (yt-dlp only): TIME_RANGE[,TIME_RANGE...] (e.g., '1:30-1:35,0:05-0:15')" - ), - CmdletArg( - name="cookies", - type="string", - description="Path to cookies.txt file for authentication" - ), - CmdletArg( - name="torrent", - type="flag", - description="Download torrent/magnet via AllDebrid (requires API key in config)" - ), - CmdletArg( - name="wait", - type="float", - description="Wait time (seconds) for magnet processing timeout" - ), - CmdletArg( - name="item", - type="string", - alias="items", - description="Item selection for playlists/formats: use '-item N' to select format N, or '-item' to show table for @N selection in next command" - ), - SharedArgs.STORAGE, # Storage location: local, hydrus, 0x0, debrid, ftp - ], - details=[ - "Download media from URLs with advanced features.", - "", - "BASIC USAGE:", - " download-data https://youtube.com/watch?v=xyz", - " download-data https://example.com/file.pdf -storage local", - "", - "AUDIO/VIDEO OPTIONS:", - " -audio, -a Extract audio from video (M4A, MP3)", - " -video, -v Download as video (default)", - "", - "FORMAT SELECTION:", - " -format SELECTOR Specify yt-dlp format", - " Examples: 'best', 'bestvideo+bestaudio', '22'", - "", - "FORMAT/RESULT ITEM SELECTION:", - " -item Show available formats in table (see @N below)", - " -item N Auto-select and download format #N (e.g., -item 1)", - " Example: download-data URL -item 2 | add-file -storage local", - "", - "FORMAT SELECTION WITH @N SYNTAX:", - " 1. Show formats: download-data URL", - " 2. Select with @N: @1 | download-data | add-file", - " OR use -item N to skip manual selection", - "", - "CLIPPING:", - " -clip START-END Extract time range from media", - " Format: MM:SS-MM:SS (e.g., 34:03-35:08)", - " Also accepts: 2043-2108 (seconds)", - "", - "SECTION DOWNLOAD (yt-dlp only):", - " -section RANGES Download specific time sections and merge them", - " Format: HH:MM:SS-HH:MM:SS[,HH:MM:SS-HH:MM:SS...]", - " Example: -section '1:30-1:35,0:05-0:15'", - " Each section is downloaded separately then merged in order", - "", - "PLAYLIST MODE:", - " Automatically detects playlists", - " Shows numbered list of tracks", - " Download specific items: -item '1,3,5-8'", - " Download all items: -item '*'", - "", - "TORRENT MODE:", - " Download torrents/magnets via AllDebrid (if configured)", - " Usage: download-data -torrent magnet:?xt=urn:btih:... -item '1,3,5-8'", - " -wait SECONDS Maximum wait time for magnet processing (default: 1800)", - "", - "STORAGE LOCATIONS:", - " -storage local ~/Videos (default)", - " -storage hydrus ~/.hydrus/client_files", - " -storage 0x0 ~/Screenshots", - " -storage debrid ~/Debrid", - " -storage ftp ~/FTP", - "", - "EXAMPLES:", - " # Download YouTube video as audio", - " download-data https://youtube.com/watch?v=xyz -audio -storage local", - "", - " # Extract specific clip from video", - " download-data https://vimeo.com/123456 -clip 1:30-2:45 -format best", - "", - " # Download multiple sections and merge them", - " download-data https://youtube.com/watch?v=xyz -section '1:30-1:35,0:05-0:15' | merge-file | add-file -storage local", - "", - " # Download specific tracks from playlist", - " download-data https://youtube.com/playlist?list=xyz -item '1,3,5-8'", - "", - " # Download all items from playlist", - " download-data https://youtube.com/playlist?list=xyz -item '*'", - "", - " # Download with authentication", - " download-data https://example.com/content -cookies ~/cookies.txt", - "", - "TORRENT EXAMPLES:", - " # Download specific tracks from magnet link", - " download-data -torrent magnet:?xt=urn:btih:... -item '1,3,5-8' -storage local", - "", - " # Download all items from torrent and merge", - " download-data -torrent magnet:?xt=urn:btih:... -item '*' | merge-file | add-file", - "", - " # Download with custom wait time (5 minutes)", - " download-data -torrent magnet:?xt=urn:btih:... -wait 300 -item '1-5'", - ] -) diff --git a/cmdlets/download_file.py b/cmdlets/download_file.py new file mode 100644 index 0000000..663ebfd --- /dev/null +++ b/cmdlets/download_file.py @@ -0,0 +1,199 @@ +"""Download files directly via HTTP (non-yt-dlp url). + +Focused cmdlet for direct file downloads from: +- PDFs, images, documents +- url not supported by yt-dlp +- LibGen sources +- Direct file links + +No streaming site logic - pure HTTP download with retries. +""" + +from __future__ import annotations + +import sys +from pathlib import Path +from typing import Any, Dict, List, Optional, Sequence + +from helper.download import DownloadError, _download_direct_file +from helper.logger import log, debug +from models import DownloadOptions +import pipeline as pipeline_context + +from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, register_url_with_local_library, coerce_to_pipe_object + + +class Download_File(Cmdlet): + """Class-based download-file cmdlet - direct HTTP downloads.""" + + def __init__(self) -> None: + """Initialize download-file cmdlet.""" + super().__init__( + name="download-file", + summary="Download files directly via HTTP (PDFs, images, documents)", + usage="download-file [options] or search-file | download-file [options]", + alias=["dl-file", "download-http"], + arg=[ + CmdletArg(name="url", type="string", required=False, description="URL to download (direct file links)", variadic=True), + CmdletArg(name="-url", type="string", description="URL to download (alias for positional argument)", variadic=True), + CmdletArg(name="output", type="string", alias="o", description="Output filename (auto-detected if not specified)"), + SharedArgs.URL + ], + detail=["Download files directly via HTTP without yt-dlp processing.", "For streaming sites, use download-media."], + exec=self.run, + ) + self.register() + + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Main execution method.""" + stage_ctx = pipeline_context.get_stage_context() + in_pipeline = stage_ctx is not None and getattr(stage_ctx, "total_stages", 1) > 1 + if in_pipeline and isinstance(config, dict): + config["_quiet_background_output"] = True + return self._run_impl(result, args, config) + + def _run_impl(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Main download implementation for direct HTTP files.""" + try: + debug("Starting download-file") + + # Parse arguments + parsed = parse_cmdlet_args(args, self) + + # Extract options + raw_url = parsed.get("url", []) + if isinstance(raw_url, str): + raw_url = [raw_url] + + if not raw_url: + log("No url to download", file=sys.stderr) + return 1 + + # Get output directory + final_output_dir = self._resolve_output_dir(parsed, config) + if not final_output_dir: + return 1 + + debug(f"Output directory: {final_output_dir}") + + # Download each URL + downloaded_count = 0 + quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False + custom_output = parsed.get("output") + + for url in raw_url: + try: + debug(f"Processing: {url}") + + # Direct HTTP download + result_obj = _download_direct_file(url, final_output_dir, quiet=quiet_mode) + debug(f"Download completed, building pipe object...") + pipe_obj_dict = self._build_pipe_object(result_obj, url, final_output_dir) + debug(f"Emitting result to pipeline...") + pipeline_context.emit(pipe_obj_dict) + + # Automatically register url with local library + if pipe_obj_dict.get("url"): + pipe_obj = coerce_to_pipe_object(pipe_obj_dict) + register_url_with_local_library(pipe_obj, config) + + downloaded_count += 1 + debug("✓ Downloaded and emitted") + + except DownloadError as e: + log(f"Download failed for {url}: {e}", file=sys.stderr) + except Exception as e: + log(f"Error processing {url}: {e}", file=sys.stderr) + + if downloaded_count > 0: + debug(f"✓ Successfully processed {downloaded_count} file(s)") + return 0 + + log("No downloads completed", file=sys.stderr) + return 1 + + except Exception as e: + log(f"Error in download-file: {e}", file=sys.stderr) + return 1 + + def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]: + """Resolve the output directory from storage location or config.""" + storage_location = parsed.get("storage") + + # Priority 1: --storage flag + if storage_location: + try: + return SharedArgs.resolve_storage(storage_location) + except Exception as e: + log(f"Invalid storage location: {e}", file=sys.stderr) + return None + + # Priority 2: Config outfile + if config and config.get("outfile"): + try: + return Path(config["outfile"]).expanduser() + except Exception: + pass + + # Priority 3: Default (home/Downloads) + final_output_dir = Path.home() / "Downloads" + debug(f"Using default directory: {final_output_dir}") + + # Ensure directory exists + try: + final_output_dir.mkdir(parents=True, exist_ok=True) + except Exception as e: + log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr) + return None + + return final_output_dir + + def _build_pipe_object(self, download_result: Any, url: str, output_dir: Path) -> Dict[str, Any]: + """Create a PipeObject-compatible dict from a download result.""" + # Try to get file path from result + file_path = None + if hasattr(download_result, 'path'): + file_path = download_result.path + elif isinstance(download_result, dict) and 'path' in download_result: + file_path = download_result['path'] + + if not file_path: + # Fallback: assume result is the path itself + file_path = str(download_result) + + media_path = Path(file_path) + hash_value = self._compute_file_hash(media_path) + title = media_path.stem + + # Build tags with title for searchability + tags = [f"title:{title}"] + + # Prefer canonical fields while keeping legacy keys for compatibility + return { + "path": str(media_path), + "hash": hash_value, + "file_hash": hash_value, + "title": title, + "file_title": title, + "action": "cmdlet:download-file", + "download_mode": "file", + "url": url or (download_result.get('url') if isinstance(download_result, dict) else None), + "url": [url] if url else [], + "store": "local", + "storage_source": "downloads", + "media_kind": "file", + "tags": tags, + } + + def _compute_file_hash(self, filepath: Path) -> str: + """Compute SHA256 hash of a file.""" + import hashlib + sha256_hash = hashlib.sha256() + with open(filepath, "rb") as f: + for byte_block in iter(lambda: f.read(4096), b""): + sha256_hash.update(byte_block) + return sha256_hash.hexdigest() + + +# Module-level singleton registration +CMDLET = Download_File() diff --git a/cmdlets/download_media.py b/cmdlets/download_media.py new file mode 100644 index 0000000..c736650 --- /dev/null +++ b/cmdlets/download_media.py @@ -0,0 +1,1445 @@ +"""Download media from url using yt-dlp (streaming sites only). + +Focused cmdlet for video/audio downloads from yt-dlp-supported sites: +- YouTube, Twitch, Dailymotion, Vimeo, etc. +- No direct file downloads (use download-file for that) +- Playlist detection with item selection +- Clip extraction (time ranges) +- Format selection and audio/video modes +- Tags extraction and metadata integration +""" + +from __future__ import annotations + +import sys +from pathlib import Path +from typing import Any, Dict, List, Optional, Sequence + +import glob # noqa: F401 +import hashlib +import json # noqa: F401 +import random +import re +import string +import subprocess +import sys +import time +import traceback +from typing import Any, Dict, Iterator, List, Optional + +import httpx + +from helper.logger import log, debug +from helper.utils import ensure_directory, sha256_file +from helper.http_client import HTTPClient +from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLogger, ProgressBar +import pipeline as pipeline_context +from result_table import ResultTable + +from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, parse_cmdlet_args, register_url_with_local_library, coerce_to_pipe_object + + +# Minimal inlined helpers from helper/download.py (is_url_supported_by_ytdlp, list_formats) +try: + import yt_dlp # type: ignore + from yt_dlp.extractor import gen_extractors # type: ignore +except Exception as exc: + yt_dlp = None # type: ignore + YTDLP_IMPORT_ERROR = exc +else: + YTDLP_IMPORT_ERROR = None + +try: + from metadata import extract_ytdlp_tags +except ImportError: + extract_ytdlp_tags = None + +_EXTRACTOR_CACHE: List[Any] | None = None + + +def _ensure_yt_dlp_ready() -> None: + if yt_dlp is not None: + return + detail = str(YTDLP_IMPORT_ERROR or "yt-dlp is not installed") + raise DownloadError(f"yt-dlp module not available: {detail}") + + +def is_url_supported_by_ytdlp(url: str) -> bool: + if yt_dlp is None: + return False + global _EXTRACTOR_CACHE + if _EXTRACTOR_CACHE is None: + try: + _EXTRACTOR_CACHE = [ie for ie in gen_extractors()] # type: ignore[arg-type] + except Exception: + _EXTRACTOR_CACHE = [] + for extractor in _EXTRACTOR_CACHE: + try: + if not extractor.suitable(url): + continue + except Exception: + continue + name = getattr(extractor, "IE_NAME", "") + if name.lower() == "generic": + continue + return True + return False + + +def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]: + _ensure_yt_dlp_ready() + try: + ydl_opts = {"quiet": True, "no_warnings": True, "socket_timeout": 30} + if no_playlist: + ydl_opts["noplaylist"] = True + if playlist_items: + ydl_opts["playlist_items"] = playlist_items + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + debug(f"Fetching format list for: {url}") + info = ydl.extract_info(url, download=False) + formats = info.get("formats", []) + if not formats: + log("No formats available", file=sys.stderr) + return None + result_formats = [] + for fmt in formats: + result_formats.append({ + "format_id": fmt.get("format_id", ""), + "format": fmt.get("format", ""), + "ext": fmt.get("ext", ""), + "resolution": fmt.get("resolution", ""), + "width": fmt.get("width"), + "height": fmt.get("height"), + "fps": fmt.get("fps"), + "vcodec": fmt.get("vcodec", "none"), + "acodec": fmt.get("acodec", "none"), + "filesize": fmt.get("filesize"), + "tbr": fmt.get("tbr"), + }) + debug(f"Found {len(result_formats)} available formats") + return result_formats + except Exception as e: + log(f"✗ Error fetching formats: {e}", file=sys.stderr) + return None + + +def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False) -> tuple[Optional[str], Dict[str, Any]]: + sections_list = ytdl_options.get("download_sections", []) + if not sections_list: + return "", {} + + session_id = hashlib.md5((url + str(time.time()) + ''.join(random.choices(string.ascii_letters, k=10))).encode()).hexdigest()[:12] + first_section_info = None + + for section_idx, section in enumerate(sections_list, 1): + base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s") + output_dir_path = Path(base_outtmpl).parent + filename_tmpl = f"{session_id}_{section_idx}" + if base_outtmpl.endswith(".%(ext)s"): + filename_tmpl += ".%(ext)s" + section_outtmpl = str(output_dir_path / filename_tmpl) + + if section_idx == 1: + metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"] + if ytdl_options.get("cookiefile"): + cookies_path = ytdl_options["cookiefile"].replace("\\", "/") + metadata_cmd.extend(["--cookies", cookies_path]) + if ytdl_options.get("noplaylist"): + metadata_cmd.append("--no-playlist") + metadata_cmd.append(url) + try: + meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True) + if meta_result.returncode == 0 and meta_result.stdout: + try: + info_dict = json.loads(meta_result.stdout.strip()) + first_section_info = info_dict + if not quiet: + debug(f"Extracted title from metadata: {info_dict.get('title')}") + except json.JSONDecodeError: + if not quiet: + debug("Could not parse JSON metadata") + except Exception as e: + if not quiet: + debug(f"Error extracting metadata: {e}") + + cmd = ["yt-dlp"] + if ytdl_options.get("format"): + cmd.extend(["-f", ytdl_options["format"]]) + if ytdl_options.get("force_keyframes_at_cuts"): + cmd.extend(["--force-keyframes-at-cuts"]) if ytdl_options.get("force_keyframes_at_cuts") else None + cmd.extend(["-o", section_outtmpl]) + if ytdl_options.get("cookiefile"): + cookies_path = ytdl_options["cookiefile"].replace("\\", "/") + cmd.extend(["--cookies", cookies_path]) + if ytdl_options.get("noplaylist"): + cmd.append("--no-playlist") + cmd.append(url) + if not quiet: + debug(f"Running yt-dlp for section: {section}") + try: + subprocess.run(cmd, check=True) + except Exception as exc: + if not quiet: + debug(f"yt-dlp error for section {section}: {exc}") + + return session_id, first_section_info or {} + + +def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]: + ensure_directory(opts.output_dir) + outtmpl = str((opts.output_dir / "%(title)s.%(ext)s").resolve()) + base_options: Dict[str, Any] = { + "outtmpl": outtmpl, + "quiet": True, + "no_warnings": True, + "noprogress": True, + "socket_timeout": 30, + "retries": 10, + "fragment_retries": 10, + "http_chunk_size": 10_485_760, + "restrictfilenames": True, + "progress_hooks": [] if opts.quiet else [_progress_callback], + } + + if opts.cookies_path and opts.cookies_path.is_file(): + base_options["cookiefile"] = str(opts.cookies_path) + else: + from hydrus_health_check import get_cookies_file_path # local import + global_cookies = get_cookies_file_path() + if global_cookies: + base_options["cookiefile"] = global_cookies + + if opts.no_playlist: + base_options["noplaylist"] = True + + if opts.mode == "audio": + base_options["format"] = opts.ytdl_format or "251/140/bestaudio" + base_options["postprocessors"] = [{"key": "FFmpegExtractAudio"}] + else: + base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best" + base_options["format_sort"] = ["res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"] + + if opts.clip_sections: + sections = [] + for section_range in opts.clip_sections.split(','): + try: + start_s, end_s = [int(x) for x in section_range.split('-')] + def _secs_to_hms(s: int) -> str: + minutes, seconds = divmod(s, 60) + hours, minutes = divmod(minutes, 60) + return f"{hours:02d}:{minutes:02d}:{seconds:02d}" + sections.append(f"*{_secs_to_hms(start_s)}-{_secs_to_hms(end_s)}") + except (ValueError, AttributeError): + pass + if sections: + base_options["download_sections"] = sections + debug(f"Download sections configured: {', '.join(sections)}") + + if opts.playlist_items: + base_options["playlist_items"] = opts.playlist_items + + if not opts.quiet: + debug(f"yt-dlp: mode={opts.mode}, format={base_options.get('format')}") + return base_options + + +def _iter_download_entries(info: Dict[str, Any]) -> Iterator[Dict[str, Any]]: + queue: List[Dict[str, Any]] = [info] + seen: set[int] = set() + while queue: + current = queue.pop(0) + obj_id = id(current) + if obj_id in seen: + continue + seen.add(obj_id) + entries = current.get("entries") + if isinstance(entries, list): + for entry in entries: + queue.append(entry) + if current.get("requested_downloads") or not entries: + yield current + + +def _candidate_paths(entry: Dict[str, Any], output_dir: Path) -> Iterator[Path]: + requested = entry.get("requested_downloads") + if isinstance(requested, list): + for item in requested: + if isinstance(item, dict): + fp = item.get("filepath") or item.get("_filename") + if fp: + yield Path(fp) + for key in ("filepath", "_filename", "filename"): + value = entry.get(key) + if value: + yield Path(value) + if entry.get("filename"): + yield output_dir / entry["filename"] + + +def _resolve_entry_and_path(info: Dict[str, Any], output_dir: Path) -> tuple[Dict[str, Any], Path]: + for entry in _iter_download_entries(info): + for candidate in _candidate_paths(entry, output_dir): + if candidate.is_file(): + return entry, candidate + if not candidate.is_absolute(): + maybe = output_dir / candidate + if maybe.is_file(): + return entry, maybe + raise FileNotFoundError("yt-dlp did not report a downloaded media file") + + +def _extract_sha256(info: Dict[str, Any]) -> Optional[str]: + for payload in [info] + info.get("entries", []): + if not isinstance(payload, dict): + continue + hashes = payload.get("hashes") + if isinstance(hashes, dict): + for key in ("sha256", "sha-256", "sha_256"): + if key in hashes and isinstance(hashes[key], str) and hashes[key].strip(): + return hashes[key].strip() + for key in ("sha256", "sha-256", "sha_256"): + value = payload.get(key) + if isinstance(value, str) and value.strip(): + return value.strip() + return None + + +def _get_libgen_download_url(libgen_url: str) -> Optional[str]: + try: + from urllib.parse import urlparse + import requests + parsed = urlparse(libgen_url) + if 'libgen' not in parsed.netloc.lower(): + return None + if '/file.php' not in parsed.path.lower(): + return None + session = requests.Session() + session.headers.update({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}) + debug(f"Following LibGen redirect chain for: {libgen_url}") + try: + response = session.get(libgen_url, timeout=10, allow_redirects=True) + final_url = response.url + try: + from bs4 import BeautifulSoup + soup = BeautifulSoup(response.content, 'html.parser') + for link in soup.find_all('a'): + href = link.get('href') + if href and 'get.php' in href: + return urljoin(libgen_url, href) + except ImportError: + pass + if final_url != libgen_url: + debug(f"LibGen resolved to mirror: {final_url}") + return final_url + except requests.RequestException as e: + log(f"Error following LibGen redirects: {e}", file=sys.stderr) + try: + response = session.head(libgen_url, allow_redirects=True, timeout=10) + if response.url != libgen_url: + return response.url + except: + pass + return None + except Exception as e: + log(f"Error resolving LibGen URL: {e}", file=sys.stderr) + return None + + +def _progress_callback(status: Dict[str, Any]) -> None: + """Simple progress callback using logger.""" + event = status.get("status") + if event == "downloading": + percent = status.get("_percent_str", "?") + speed = status.get("_speed_str", "?") + eta = status.get("_eta_str", "?") + sys.stdout.write(f"\r[download] {percent} at {speed} ETA {eta} ") + sys.stdout.flush() + elif event == "finished": + sys.stdout.write("\r" + " " * 70 + "\r") + sys.stdout.flush() + debug(f"✓ Download finished: {status.get('filename')}") + elif event in ("postprocessing", "processing"): + debug(f"Post-processing: {status.get('postprocessor')}") + + +def _download_direct_file( + url: str, + output_dir: Path, + debug_logger: Optional[DebugLogger] = None, + quiet: bool = False, +) -> DownloadMediaResult: + """Download a direct file (PDF, image, document, etc.) without yt-dlp.""" + ensure_directory(output_dir) + + from urllib.parse import unquote, urlparse, parse_qs + import re + + # Extract filename from URL + parsed_url = urlparse(url) + url_path = parsed_url.path + + # Try to get filename from query parameters first (for LibGen and similar services) + # e.g., ?filename=Book+Title.pdf or &download=filename.pdf + filename = None + if parsed_url.query: + query_params = parse_qs(parsed_url.query) + for param_name in ('filename', 'download', 'file', 'name'): + if param_name in query_params and query_params[param_name]: + filename = query_params[param_name][0] + filename = unquote(filename) + break + + # If not found in query params, extract from URL path + if not filename or not filename.strip(): + filename = url_path.split("/")[-1] if url_path else "" + filename = unquote(filename) + + # Remove query strings from filename if any + if "?" in filename: + filename = filename.split("?")[0] + + # Try to get real filename from Content-Disposition header (HEAD request) + try: + with HTTPClient(timeout=10.0) as client: + response = client._request("HEAD", url, follow_redirects=True) + content_disposition = response.headers.get("content-disposition", "") + if content_disposition: + # Extract filename from Content-Disposition header + # Format: attachment; filename="filename.pdf" or filename=filename.pdf + match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition) + if match: + extracted_name = match.group(1) or match.group(2) + if extracted_name: + filename = unquote(extracted_name) + if not quiet: + debug(f"Filename from Content-Disposition: {filename}") + except Exception as e: + if not quiet: + log(f"Could not get filename from headers: {e}", file=sys.stderr) + + # Fallback if we still don't have a good filename + if not filename or "." not in filename: + filename = "downloaded_file.bin" + + file_path = output_dir / filename + progress_bar = ProgressBar() + + if not quiet: + debug(f"Direct download: {filename}") + + try: + start_time = time.time() + downloaded_bytes = [0] + total_bytes = [0] + last_progress_time = [start_time] + + def progress_callback(bytes_downloaded: int, content_length: int) -> None: + downloaded_bytes[0] = bytes_downloaded + total_bytes[0] = content_length + + now = time.time() + if now - last_progress_time[0] >= 0.5 and total_bytes[0] > 0: + elapsed = now - start_time + percent = (bytes_downloaded / content_length) * 100 if content_length > 0 else 0 + speed = bytes_downloaded / elapsed if elapsed > 0 else 0 + eta_seconds = (content_length - bytes_downloaded) / speed if speed > 0 else 0 + + speed_str = progress_bar.format_bytes(speed) + "/s" + minutes, seconds = divmod(int(eta_seconds), 60) + hours, minutes = divmod(minutes, 60) + eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}" + + progress_line = progress_bar.format_progress( + percent_str=f"{percent:.1f}%", + downloaded=bytes_downloaded, + total=content_length, + speed_str=speed_str, + eta_str=eta_str, + ) + if not quiet: + debug(progress_line) + last_progress_time[0] = now + + with HTTPClient(timeout=30.0) as client: + client.download(url, str(file_path), progress_callback=progress_callback) + + elapsed = time.time() - start_time + avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s" + if not quiet: + debug(f"✓ Downloaded in {elapsed:.1f}s at {avg_speed_str}") + + # For direct file downloads, create minimal info dict without filename as title + # This prevents creating duplicate title: tags when filename gets auto-generated + # We'll add title back later only if we couldn't extract meaningful tags + info = { + "id": filename.rsplit(".", 1)[0], + "ext": filename.rsplit(".", 1)[1] if "." in filename else "bin", + "webpage_url": url, + } + + hash_value = None + try: + hash_value = sha256_file(file_path) + except Exception: + pass + + tags = [] + if extract_ytdlp_tags: + try: + tags = extract_ytdlp_tags(info) + except Exception as e: + log(f"Error extracting tags: {e}", file=sys.stderr) + + # Only use filename as a title tag if we couldn't extract any meaningful tags + # This prevents duplicate title: tags when the filename could be mistaken for metadata + if not any(t.startswith('title:') for t in tags): + # Re-extract tags with filename as title only if needed + info['title'] = filename + tags = [] + if extract_ytdlp_tags: + try: + tags = extract_ytdlp_tags(info) + except Exception as e: + log(f"Error extracting tags with filename: {e}", file=sys.stderr) + + if debug_logger is not None: + debug_logger.write_record( + "direct-file-downloaded", + {"url": url, "path": str(file_path), "hash": hash_value}, + ) + + return DownloadMediaResult( + path=file_path, + info=info, + tags=tags, + source_url=url, + hash_value=hash_value, + ) + + except (httpx.HTTPError, httpx.RequestError) as exc: + log(f"Download error: {exc}", file=sys.stderr) + if debug_logger is not None: + debug_logger.write_record( + "exception", + {"phase": "direct-file", "url": url, "error": str(exc)}, + ) + raise DownloadError(f"Failed to download {url}: {exc}") from exc + except Exception as exc: + log(f"Error downloading file: {exc}", file=sys.stderr) + if debug_logger is not None: + debug_logger.write_record( + "exception", + { + "phase": "direct-file", + "url": url, + "error": str(exc), + "traceback": traceback.format_exc(), + }, + ) + raise DownloadError(f"Error downloading file: {exc}") from exc + + +def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) -> Optional[Dict[str, Any]]: + """Probe URL to extract metadata WITHOUT downloading. + + Args: + url: URL to probe + no_playlist: If True, ignore playlists and probe only the single video + timeout_seconds: Max seconds to wait for probe (default 15s) + + Returns: + Dict with keys: extractor, title, entries (if playlist), duration, etc. + Returns None if not supported by yt-dlp or on timeout. + """ + if not is_url_supported_by_ytdlp(url): + return None + + # Wrap probe in timeout to prevent hanging on large playlists + import threading + from typing import cast + + result_container: List[Optional[Any]] = [None, None] # [result, error] + + def _do_probe() -> None: + try: + _ensure_yt_dlp_ready() + + assert yt_dlp is not None + # Extract info without downloading + # Use extract_flat='in_playlist' to get full metadata for playlist items + ydl_opts = { + "quiet": True, # Suppress all output + "no_warnings": True, + "socket_timeout": 10, + "retries": 2, # Reduce retries for faster timeout + "skip_download": True, # Don't actually download + "extract_flat": "in_playlist", # Get playlist with metadata for each entry + "noprogress": True, # No progress bars + } + + # Add cookies if available (lazy import to avoid circular dependency) + from hydrus_health_check import get_cookies_file_path # local import + + global_cookies = get_cookies_file_path() + if global_cookies: + ydl_opts["cookiefile"] = global_cookies + + # Add no_playlist option if specified + if no_playlist: + ydl_opts["noplaylist"] = True + + with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type] + info = ydl.extract_info(url, download=False) + + if not isinstance(info, dict): + result_container[0] = None + return + + # Extract relevant fields + result_container[0] = { + "extractor": info.get("extractor", ""), + "title": info.get("title", ""), + "entries": info.get("entries", []), # Will be populated if playlist + "duration": info.get("duration"), + "uploader": info.get("uploader"), + "description": info.get("description"), + "url": url, + } + except Exception as exc: + log(f"Probe error for {url}: {exc}") + result_container[1] = exc + + thread = threading.Thread(target=_do_probe, daemon=False) + thread.start() + thread.join(timeout=timeout_seconds) + + if thread.is_alive(): + # Probe timed out - return None to fall back to direct download + debug(f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download") + return None + + if result_container[1] is not None: + # Probe error - return None to proceed anyway + return None + + return cast(Optional[Dict[str, Any]], result_container[0]) + + +def download_media( + opts: DownloadOptions, + *, + debug_logger: Optional[DebugLogger] = None, +) -> DownloadMediaResult: + """Download media from URL using yt-dlp or direct HTTP download. + + Args: + opts: DownloadOptions with url, mode, output_dir, etc. + debug_logger: Optional debug logger for troubleshooting + + Returns: + DownloadMediaResult with path, info, tags, hash + + Raises: + DownloadError: If download fails + """ + # Handle LibGen url specially + # file.php redirects to mirrors, get.php is direct from modern API + if 'libgen' in opts.url.lower(): + if '/get.php' in opts.url.lower(): + # Modern API get.php links are direct downloads from mirrors (not file redirects) + if not opts.quiet: + log(f"Detected LibGen get.php URL, downloading directly...") + if debug_logger is not None: + debug_logger.write_record("libgen-direct", {"url": opts.url}) + return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet) + elif '/file.php' in opts.url.lower(): + # Old-style file.php redirects to mirrors, we need to resolve + if not opts.quiet: + log(f"Detected LibGen file.php URL, resolving to actual mirror...") + actual_url = _get_libgen_download_url(opts.url) + if actual_url and actual_url != opts.url: + if not opts.quiet: + log(f"Resolved LibGen URL to mirror: {actual_url}") + opts.url = actual_url + # After resolution, this will typically be an onion link or direct file + # Skip yt-dlp for this (it won't support onion/mirrors), go direct + if debug_logger is not None: + debug_logger.write_record("libgen-resolved", {"original": opts.url, "resolved": actual_url}) + return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet) + else: + if not opts.quiet: + log(f"Could not resolve LibGen URL, trying direct download anyway", file=sys.stderr) + if debug_logger is not None: + debug_logger.write_record("libgen-resolve-failed", {"url": opts.url}) + return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet) + + # Handle GoFile shares with a dedicated resolver before yt-dlp/direct fallbacks + try: + netloc = urlparse(opts.url).netloc.lower() + except Exception: + netloc = "" + if "gofile.io" in netloc: + msg = "GoFile links are currently unsupported" + if not opts.quiet: + debug(msg) + if debug_logger is not None: + debug_logger.write_record("gofile-unsupported", {"url": opts.url}) + raise DownloadError(msg) + + # Determine if yt-dlp should be used + ytdlp_supported = is_url_supported_by_ytdlp(opts.url) + if ytdlp_supported: + # Skip probe for playlists with item selection (probe can hang on large playlists) + # Just proceed straight to download which will handle item selection + if opts.playlist_items: + debug(f"Skipping probe for playlist (item selection: {opts.playlist_items}), proceeding with download") + probe_result = {"url": opts.url} # Minimal probe result + else: + probe_result = probe_url(opts.url, no_playlist=opts.no_playlist, timeout_seconds=15) + + if probe_result is None: + if not opts.quiet: + log(f"URL supported by yt-dlp but no media detected, falling back to direct download: {opts.url}") + if debug_logger is not None: + debug_logger.write_record("ytdlp-skip-no-media", {"url": opts.url}) + return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet) + else: + if not opts.quiet: + log(f"URL not supported by yt-dlp, trying direct download: {opts.url}") + if debug_logger is not None: + debug_logger.write_record("direct-file-attempt", {"url": opts.url}) + return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet) + + _ensure_yt_dlp_ready() + + ytdl_options = _build_ytdlp_options(opts) + if not opts.quiet: + debug(f"Starting yt-dlp download: {opts.url}") + if debug_logger is not None: + debug_logger.write_record("ytdlp-start", {"url": opts.url}) + + assert yt_dlp is not None + try: + # Debug: show what options we're using + if not opts.quiet: + if ytdl_options.get("download_sections"): + debug(f"[yt-dlp] download_sections: {ytdl_options['download_sections']}") + debug(f"[yt-dlp] force_keyframes_at_cuts: {ytdl_options.get('force_keyframes_at_cuts', False)}") + + # Use subprocess when download_sections are present (Python API doesn't support them properly) + session_id = None + first_section_info = {} + if ytdl_options.get("download_sections"): + session_id, first_section_info = _download_with_sections_via_cli(opts.url, ytdl_options, ytdl_options.get("download_sections", []), quiet=opts.quiet) + info = None + else: + with yt_dlp.YoutubeDL(ytdl_options) as ydl: # type: ignore[arg-type] + info = ydl.extract_info(opts.url, download=True) + except Exception as exc: + log(f"yt-dlp failed: {exc}", file=sys.stderr) + if debug_logger is not None: + debug_logger.write_record( + "exception", + { + "phase": "yt-dlp", + "error": str(exc), + "traceback": traceback.format_exc(), + }, + ) + raise DownloadError("yt-dlp download failed") from exc + + # If we used subprocess, we need to find the file manually + if info is None: + # Find files created/modified during this download (after we started) + # Look for files matching the expected output template pattern + try: + import glob + import time + import re + + # Get the expected filename pattern from outtmpl + # For sections: "C:\path\{session_id}.section_1_of_3.ext", etc. + # For non-sections: "C:\path\title.ext" + + # Wait a moment to ensure files are fully written + time.sleep(0.5) + + # List all files in output_dir, sorted by modification time + files = sorted(opts.output_dir.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True) + if not files: + raise FileNotFoundError(f"No files found in {opts.output_dir}") + + # If we downloaded sections, look for files with the session_id pattern + if opts.clip_sections and session_id: + # Pattern: "{session_id}_1.ext", "{session_id}_2.ext", etc. + section_pattern = re.compile(rf'^{re.escape(session_id)}_(\d+)\.') + matching_files = [f for f in files if section_pattern.search(f.name)] + + if matching_files: + # Sort by section number to ensure correct order + def extract_section_num(path: Path) -> int: + match = section_pattern.search(path.name) + return int(match.group(1)) if match else 999 + + matching_files.sort(key=extract_section_num) + debug(f"Found {len(matching_files)} section file(s) matching pattern") + + # Now rename section files to use hash-based names + # This ensures unique filenames for each section content + renamed_files = [] + + for idx, section_file in enumerate(matching_files, 1): + try: + # Calculate hash for the file + file_hash = sha256_file(section_file) + ext = section_file.suffix + new_name = f"{file_hash}{ext}" + new_path = opts.output_dir / new_name + + if new_path.exists() and new_path != section_file: + # If file with same hash exists, use it and delete the temp one + debug(f"File with hash {file_hash} already exists, using existing file.") + try: + section_file.unlink() + except OSError: + pass + renamed_files.append(new_path) + else: + section_file.rename(new_path) + debug(f"Renamed section file: {section_file.name} → {new_name}") + renamed_files.append(new_path) + except Exception as e: + debug(f"Failed to process section file {section_file.name}: {e}") + renamed_files.append(section_file) + + media_path = renamed_files[0] + media_paths = renamed_files + if not opts.quiet: + debug(f"✓ Downloaded {len(media_paths)} section file(s) (session: {session_id})") + else: + # Fallback to most recent file if pattern not found + media_path = files[0] + media_paths = None + if not opts.quiet: + debug(f"✓ Downloaded section file (pattern not found): {media_path.name}") + else: + # No sections, just take the most recent file + media_path = files[0] + media_paths = None + + if not opts.quiet: + debug(f"✓ Downloaded: {media_path.name}") + if debug_logger is not None: + debug_logger.write_record("ytdlp-file-found", {"path": str(media_path)}) + except Exception as exc: + log(f"Error finding downloaded file: {exc}", file=sys.stderr) + if debug_logger is not None: + debug_logger.write_record( + "exception", + {"phase": "find-file", "error": str(exc)}, + ) + raise DownloadError(str(exc)) from exc + + # Create result with minimal data extracted from filename + file_hash = sha256_file(media_path) + + # For section downloads, create tags with the title and build proper info dict + tags = [] + title = '' + if first_section_info: + title = first_section_info.get('title', '') + if title: + tags.append(f'title:{title}') + debug(f"Added title tag for section download: {title}") + + # Build info dict - always use extracted title if available, not hash + if first_section_info: + info_dict = first_section_info + else: + info_dict = { + "id": media_path.stem, + "title": title or media_path.stem, + "ext": media_path.suffix.lstrip(".") + } + + return DownloadMediaResult( + path=media_path, + info=info_dict, + tags=tags, + source_url=opts.url, + hash_value=file_hash, + paths=media_paths, # Include all section files if present + ) + + if not isinstance(info, dict): + log(f"Unexpected yt-dlp response: {type(info)}", file=sys.stderr) + raise DownloadError("Unexpected yt-dlp response type") + + info_dict: Dict[str, Any] = info + if debug_logger is not None: + debug_logger.write_record( + "ytdlp-info", + { + "keys": sorted(info_dict.keys()), + "is_playlist": bool(info_dict.get("entries")), + }, + ) + + try: + entry, media_path = _resolve_entry_and_path(info_dict, opts.output_dir) + except FileNotFoundError as exc: + log(f"Error: {exc}", file=sys.stderr) + if debug_logger is not None: + debug_logger.write_record( + "exception", + {"phase": "resolve-path", "error": str(exc)}, + ) + raise DownloadError(str(exc)) from exc + + if debug_logger is not None: + debug_logger.write_record( + "resolved-media", + {"path": str(media_path), "entry_keys": sorted(entry.keys())}, + ) + + # Extract hash from metadata or compute + hash_value = _extract_sha256(entry) or _extract_sha256(info_dict) + if not hash_value: + try: + hash_value = sha256_file(media_path) + except OSError as exc: + if debug_logger is not None: + debug_logger.write_record( + "hash-error", + {"path": str(media_path), "error": str(exc)}, + ) + + # Extract tags using metadata.py + tags = [] + if extract_ytdlp_tags: + try: + tags = extract_ytdlp_tags(entry) + except Exception as e: + log(f"Error extracting tags: {e}", file=sys.stderr) + + source_url = ( + entry.get("webpage_url") + or entry.get("original_url") + or entry.get("url") + ) + + if not opts.quiet: + debug(f"✓ Downloaded: {media_path.name} ({len(tags)} tags)") + if debug_logger is not None: + debug_logger.write_record( + "downloaded", + { + "path": str(media_path), + "tag_count": len(tags), + "source_url": source_url, + "sha256": hash_value, + }, + ) + + return DownloadMediaResult( + path=media_path, + info=entry, + tags=tags, + source_url=source_url, + hash_value=hash_value, + ) + + +# Timeout handler to prevent yt-dlp hangs +def _download_with_timeout(opts: DownloadOptions, timeout_seconds: int = 300) -> Any: + """Download with timeout protection. + + Args: + opts: DownloadOptions + timeout_seconds: Max seconds to wait (default 300s = 5 min) + + Returns: + DownloadMediaResult + + Raises: + DownloadError: If timeout exceeded + """ + import threading + from typing import cast + + result_container: List[Optional[Any]] = [None, None] # [result, error] + + def _do_download() -> None: + try: + result_container[0] = download_media(opts) + except Exception as e: + result_container[1] = e + + thread = threading.Thread(target=_do_download, daemon=False) + thread.start() + thread.join(timeout=timeout_seconds) + + if thread.is_alive(): + # Thread still running - timeout + raise DownloadError(f"Download timeout after {timeout_seconds} seconds for {opts.url}") + + if result_container[1] is not None: + raise cast(Exception, result_container[1]) + + if result_container[0] is None: + raise DownloadError(f"Download failed for {opts.url}") + + return cast(Any, result_container[0]) + + +class Download_Media(Cmdlet): + """Class-based download-media cmdlet - yt-dlp only, streaming sites.""" + + def __init__(self) -> None: + """Initialize download-media cmdlet.""" + super().__init__( + name="download-media", + summary="Download media from streaming sites (YouTube, Twitch, etc.)", + usage="download-media [options] or search-file | download-media [options]", + alias=["dl-media", "download-ytdlp"], + arg=[ + CmdletArg(name="url", type="string", required=False, description="URL to download (yt-dlp supported sites only)", variadic=True), + CmdletArg(name="-url", type="string", description="URL to download (alias for positional argument)", variadic=True), + CmdletArg(name="audio", type="flag", alias="a", description="Download audio only"), + CmdletArg(name="video", type="flag", alias="v", description="Download video (default)"), + CmdletArg(name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector"), + CmdletArg(name="clip", type="string", description="Extract time range: MM:SS-MM:SS"), + CmdletArg(name="section", type="string", description="Download sections: TIME_RANGE[,TIME_RANGE...]"), + CmdletArg(name="item", type="string", description="Item selection for playlists/formats"), + ], + detail=["Download media from streaming sites using yt-dlp.", "For direct file downloads, use download-file."], + exec=self.run, + ) + self.register() + + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Main execution method.""" + stage_ctx = pipeline_context.get_stage_context() + in_pipeline = stage_ctx is not None and getattr(stage_ctx, "total_stages", 1) > 1 + if in_pipeline and isinstance(config, dict): + config["_quiet_background_output"] = True + return self._run_impl(result, args, config) + + def _run_impl(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Main download implementation for yt-dlp-supported url.""" + try: + debug("Starting download-media") + + # Parse arguments + parsed = parse_cmdlet_args(args, self) + + # Extract options + raw_url = parsed.get("url", []) + if isinstance(raw_url, str): + raw_url = [raw_url] + + # If no url provided via args, try to extract from piped result + if not raw_url and result: + from ._shared import get_field + # Handle single result or list of results + results_to_check = result if isinstance(result, list) else [result] + for item in results_to_check: + # Try to get URL from various possible fields + url = get_field(item, "url") or get_field(item, "target") + if url: + raw_url.append(url) + + # Filter to yt-dlp supported url only + supported_url = [ + url for url in raw_url + if is_url_supported_by_ytdlp(url) + ] + + if not supported_url: + log("No yt-dlp-supported url to download", file=sys.stderr) + return 1 + + # Log unsupported url if any + unsupported = set(raw_url) - set(supported_url) + if unsupported: + debug(f"Skipping {len(unsupported)} unsupported url (use download-file for direct downloads)") + + # Get output directory + final_output_dir = self._resolve_output_dir(parsed, config) + if not final_output_dir: + return 1 + + debug(f"Output directory: {final_output_dir}") + + # Get other options + clip_spec = parsed.get("clip") + section_spec = parsed.get("section") + + # Parse clip/section ranges if specified + clip_range = None + if clip_spec: + clip_range = self._parse_time_range(clip_spec) + if not clip_range: + log(f"Invalid clip format: {clip_spec}", file=sys.stderr) + return 1 + + section_ranges = None + if section_spec: + section_ranges = self._parse_section_ranges(section_spec) + if not section_ranges: + log(f"Invalid section format: {section_spec}", file=sys.stderr) + return 1 + + # Check if we need to show format selection + playlist_items = str(parsed.get("item")) if parsed.get("item") else None + ytdl_format = parsed.get("format") + + # If no -item, no explicit -format specified, and single URL, check for multiple formats/playlist + if not playlist_items and not ytdl_format and len(supported_url) == 1: + url = supported_url[0] + formats = list_formats(url, no_playlist=False) + + if formats and len(formats) > 1: + # Filter formats: multiple videos (640x+, one per resolution tier) + 1 best audio + video_formats = [] + audio_formats = [] + + for fmt in formats: + width = fmt.get("width") or 0 + height = fmt.get("height") or 0 + vcodec = fmt.get("vcodec", "none") + acodec = fmt.get("acodec", "none") + + # Classify as video or audio + if vcodec != "none" and acodec == "none" and width >= 640: + video_formats.append(fmt) + elif acodec != "none" and vcodec == "none": + audio_formats.append(fmt) + + # Group videos by resolution and select best format per resolution + filtered_formats = [] + if video_formats: + # Group by height (resolution tier) + from collections import defaultdict + by_resolution = defaultdict(list) + for f in video_formats: + height = f.get("height") or 0 + by_resolution[height].append(f) + + # For each resolution, prefer AV1, then highest bitrate + for height in sorted(by_resolution.keys(), reverse=True): + candidates = by_resolution[height] + av1_formats = [f for f in candidates if "av01" in f.get("vcodec", "")] + if av1_formats: + best = max(av1_formats, key=lambda f: f.get("tbr") or 0) + else: + best = max(candidates, key=lambda f: f.get("tbr") or 0) + filtered_formats.append(best) + + # Select best audio: highest bitrate (any format) + if audio_formats: + best_audio = max(audio_formats, key=lambda f: f.get("tbr") or f.get("abr") or 0) + filtered_formats.append(best_audio) + + if not filtered_formats: + # Fallback to all formats if filtering resulted in nothing + filtered_formats = formats + + debug(f"Filtered to {len(filtered_formats)} formats from {len(formats)} total") + + # Show format selection table + log(f"Available formats for {url}:", file=sys.stderr) + log("", file=sys.stderr) + + # Build the base command that will be replayed with @N selection + # Include any additional args from the original command + base_cmd = f'download-media "{url}"' + # Preserve any additional pipeline stages if this is in a pipeline + remaining_args = [arg for arg in args if arg not in [url] and not arg.startswith('-')] + if remaining_args: + base_cmd += ' ' + ' '.join(remaining_args) + + # Create result table for display + table = ResultTable() + table.title = f"Available formats for {url}" + table.set_source_command("download-media", [url]) + + # Collect results for table + results_list = [] + + # Emit format results for selection + for idx, fmt in enumerate(filtered_formats, 1): + resolution = fmt.get("resolution", "") + ext = fmt.get("ext", "") + vcodec = fmt.get("vcodec", "none") + acodec = fmt.get("acodec", "none") + filesize = fmt.get("filesize") + format_id = fmt.get("format_id", "") + + # Format size + size_str = "" + if filesize: + size_mb = filesize / (1024 * 1024) + size_str = f"{size_mb:.1f}MB" + + # Build format description + desc_parts = [] + if resolution and resolution != "audio only": + desc_parts.append(resolution) + if ext: + desc_parts.append(ext.upper()) + if vcodec != "none": + desc_parts.append(f"v:{vcodec}") + if acodec != "none": + desc_parts.append(f"a:{acodec}") + if size_str: + desc_parts.append(size_str) + + format_desc = " | ".join(desc_parts) + + # Build format dict for emission and table + format_dict = { + "origin": "download-media", + "title": f"Format {format_id}", + "url": url, + "target": url, + "detail": format_desc, + "annotations": [ext, resolution] if resolution else [ext], + "media_kind": "format", + "cmd": base_cmd, + "columns": [ + ("#", str(idx)), + ("ID", format_id), + ("Resolution", resolution or "N/A"), + ("Ext", ext), + ("Video", vcodec), + ("Audio", acodec), + ("Size", size_str or "N/A"), + ], + "full_metadata": { + "format_id": format_id, + "url": url, + "item_selector": format_id, + }, + "_selection_args": ["-format", format_id] + } + + # Add to results list and table (don't emit - formats should wait for @N selection) + results_list.append(format_dict) + table.add_result(format_dict) + + # Render and display the table + # Table is displayed by pipeline runner via set_current_stage_table + + # Set the result table so it displays and is available for @N selection + pipeline_context.set_current_stage_table(table) + pipeline_context.set_last_result_table(table, results_list) + + log(f"", file=sys.stderr) + log(f"Use: @N | download-media to select and download format", file=sys.stderr) + return 0 + + # Download each URL + downloaded_count = 0 + clip_sections_spec = self._build_clip_sections_spec(clip_range, section_ranges) + quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False + mode = "audio" if parsed.get("audio") else "video" + + for url in supported_url: + try: + debug(f"Processing: {url}") + + # If playlist_items is specified but looks like a format ID (e.g. from table selection), + # treat it as a format selector instead of playlist items. + # This handles the case where @N selection passes -item + actual_format = ytdl_format + actual_playlist_items = playlist_items + + if playlist_items and not ytdl_format: + # Heuristic: if it contains non-numeric chars (excluding ranges/commas) + # it is likely a format ID (e.g. '140-drc', 'best', '137+140') + import re + if re.search(r'[^0-9,-]', playlist_items): + actual_format = playlist_items + actual_playlist_items = None + + opts = DownloadOptions( + url=url, + mode=mode, + output_dir=final_output_dir, + ytdl_format=actual_format, + clip_sections=clip_sections_spec, + playlist_items=actual_playlist_items, + quiet=quiet_mode, + no_playlist=False, + ) + + # Use timeout wrapper to prevent hanging + debug(f"Starting download with 5-minute timeout...") + result_obj = _download_with_timeout(opts, timeout_seconds=300) + debug(f"Download completed, building pipe object...") + pipe_obj_dict = self._build_pipe_object(result_obj, url, opts) + debug(f"Emitting result to pipeline...") + pipeline_context.emit(pipe_obj_dict) + + # Automatically register url with local library + if pipe_obj_dict.get("url"): + pipe_obj = coerce_to_pipe_object(pipe_obj_dict) + register_url_with_local_library(pipe_obj, config) + + downloaded_count += 1 + debug("✓ Downloaded and emitted") + + except DownloadError as e: + log(f"Download failed for {url}: {e}", file=sys.stderr) + except Exception as e: + log(f"Error processing {url}: {e}", file=sys.stderr) + + if downloaded_count > 0: + debug(f"✓ Successfully processed {downloaded_count} URL(s)") + return 0 + + log("No downloads completed", file=sys.stderr) + return 1 + + except Exception as e: + log(f"Error in download-media: {e}", file=sys.stderr) + return 1 + + def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]: + """Resolve the output directory from storage location or config.""" + storage_location = parsed.get("storage") + + # Priority 1: --storage flag + if storage_location: + try: + return SharedArgs.resolve_storage(storage_location) + except Exception as e: + log(f"Invalid storage location: {e}", file=sys.stderr) + return None + + # Priority 2: Config outfile + if config and config.get("outfile"): + try: + return Path(config["outfile"]).expanduser() + except Exception: + pass + + # Priority 3: Default (home/Videos) + final_output_dir = Path.home() / "Videos" + debug(f"Using default directory: {final_output_dir}") + + # Ensure directory exists + try: + final_output_dir.mkdir(parents=True, exist_ok=True) + except Exception as e: + log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr) + return None + + return final_output_dir + + def _parse_time_range(self, spec: str) -> Optional[tuple]: + """Parse 'MM:SS-MM:SS' format into (start_seconds, end_seconds).""" + try: + parts = spec.split("-") + if len(parts) != 2: + return None + + def to_seconds(ts: str) -> int: + ts = ts.strip() + if ":" in ts: + mm, ss = ts.split(":") + return int(mm) * 60 + int(ss) + return int(ts) + + start = to_seconds(parts[0]) + end = to_seconds(parts[1]) + return (start, end) if start < end else None + except Exception: + return None + + def _parse_section_ranges(self, spec: str) -> Optional[List[tuple]]: + """Parse 'RANGE1,RANGE2,...' where each RANGE is 'MM:SS-MM:SS'.""" + try: + ranges = [] + for range_spec in spec.split(","): + r = self._parse_time_range(range_spec.strip()) + if r is None: + return None + ranges.append(r) + return ranges if ranges else None + except Exception: + return None + + def _build_clip_sections_spec( + self, + clip_range: Optional[tuple], + section_ranges: Optional[List[tuple]], + ) -> Optional[str]: + """Convert parsed clip/section ranges into downloader spec (seconds).""" + ranges: List[str] = [] + if clip_range: + ranges.append(f"{clip_range[0]}-{clip_range[1]}") + if section_ranges: + for start, end in section_ranges: + ranges.append(f"{start}-{end}") + return ",".join(ranges) if ranges else None + + def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]: + """Create a PipeObject-compatible dict from a DownloadMediaResult.""" + info: Dict[str, Any] = download_result.info if isinstance(download_result.info, dict) else {} + media_path = Path(download_result.path) + hash_value = download_result.hash_value or self._compute_file_hash(media_path) + title = info.get("title") or media_path.stem + tags = list(download_result.tags or []) + + # Add title tag for searchability + if title and f"title:{title}" not in tags: + tags.insert(0, f"title:{title}") + + # Build a single canonical URL field; prefer yt-dlp provided webpage_url or info.url, + # but fall back to the original requested URL. If multiple unique urls are available, + # join them into a comma-separated string. + urls_to_consider: List[str] = [] + try: + page_url = info.get("webpage_url") or info.get("url") + if page_url: + urls_to_consider.append(str(page_url)) + except Exception: + pass + if url: + urls_to_consider.append(str(url)) + + seen_urls: List[str] = [] + for u in urls_to_consider: + if u and u not in seen_urls: + seen_urls.append(u) + final_url = ",".join(seen_urls) if seen_urls else None + + # Construct canonical PipeObject dict: hash, store, path, url, title, tags + # Prefer explicit backend names (storage_name/storage_location). If none, default to PATH + # which indicates the file is available at a filesystem path and hasn't been added to a backend yet. + return { + "path": str(media_path), + "hash": hash_value, + "title": title, + "url": final_url, + "tags": tags, + "action": "cmdlet:download-media", + # download_mode removed (deprecated), keep media_kind + "store": getattr(opts, "storage_name", None) or getattr(opts, "storage_location", None) or "PATH", + "media_kind": "video" if opts.mode == "video" else "audio", + } + + def _compute_file_hash(self, filepath: Path) -> str: + """Compute SHA256 hash of a file.""" + import hashlib + sha256_hash = hashlib.sha256() + with open(filepath, "rb") as f: + for byte_block in iter(lambda: f.read(4096), b""): + sha256_hash.update(byte_block) + return sha256_hash.hexdigest() + + +# Module-level singleton registration +CMDLET = Download_Media() diff --git a/cmdlets/download_torrent.py b/cmdlets/download_torrent.py new file mode 100644 index 0000000..2293d29 --- /dev/null +++ b/cmdlets/download_torrent.py @@ -0,0 +1,127 @@ +"""Download torrent/magnet links via AllDebrid in a dedicated cmdlet. + +Features: +- Accepts magnet links and .torrent files/url +- Uses AllDebrid API for background downloads +- Progress tracking and worker management +- Self-registering class-based cmdlet +""" + +from __future__ import annotations +import sys +import uuid +import threading +from pathlib import Path +from typing import Any, Dict, Optional, Sequence + +from helper.logger import log +from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args + +class Download_Torrent(Cmdlet): + """Class-based download-torrent cmdlet with self-registration.""" + + def __init__(self) -> None: + super().__init__( + name="download-torrent", + summary="Download torrent/magnet links via AllDebrid", + usage="download-torrent [options]", + alias=["torrent", "magnet"], + arg=[ + CmdletArg(name="magnet", type="string", required=False, description="Magnet link or .torrent file/URL", variadic=True), + CmdletArg(name="output", type="string", description="Output directory for downloaded files"), + CmdletArg(name="wait", type="float", description="Wait time (seconds) for magnet processing timeout"), + CmdletArg(name="background", type="flag", alias="bg", description="Start download in background"), + ], + detail=["Download torrents/magnets via AllDebrid API."], + exec=self.run, + ) + self.register() + + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + parsed = parse_cmdlet_args(args, self) + magnet_args = parsed.get("magnet", []) + output_dir = Path(parsed.get("output") or Path.home() / "Downloads") + wait_timeout = int(float(parsed.get("wait", 600))) + background_mode = parsed.get("background", False) + api_key = config.get("alldebrid_api_key") + if not api_key: + log("AllDebrid API key not configured", file=sys.stderr) + return 1 + for magnet_url in magnet_args: + if background_mode: + self._start_background_worker(magnet_url, output_dir, config, api_key, wait_timeout) + log(f"⧗ Torrent download queued in background: {magnet_url}") + else: + self._download_torrent_worker(str(uuid.uuid4()), magnet_url, output_dir, config, api_key, wait_timeout) + return 0 + + @staticmethod + def _download_torrent_worker( + worker_id: str, + magnet_url: str, + output_dir: Path, + config: Dict[str, Any], + api_key: str, + wait_timeout: int = 600, + worker_manager: Optional[Any] = None, + ) -> None: + try: + from helper.alldebrid import AllDebridClient + client = AllDebridClient(api_key) + log(f"[Worker {worker_id}] Submitting magnet to AllDebrid...") + magnet_info = client.magnet_add(magnet_url) + magnet_id = int(magnet_info.get('id', 0)) + if magnet_id <= 0: + log(f"[Worker {worker_id}] Magnet add failed", file=sys.stderr) + return + log(f"[Worker {worker_id}] ✓ Magnet added (ID: {magnet_id})") + # Poll for ready status (simplified) + import time + elapsed = 0 + while elapsed < wait_timeout: + status = client.magnet_status(magnet_id) + if status.get('ready'): + break + time.sleep(5) + elapsed += 5 + if elapsed >= wait_timeout: + log(f"[Worker {worker_id}] Timeout waiting for magnet", file=sys.stderr) + return + files_result = client.magnet_links([magnet_id]) + magnet_files = files_result.get(str(magnet_id), {}) + files_array = magnet_files.get('files', []) + if not files_array: + log(f"[Worker {worker_id}] No files found", file=sys.stderr) + return + for file_info in files_array: + file_url = file_info.get('link') + file_name = file_info.get('name') + if file_url: + Download_Torrent._download_file(file_url, output_dir / file_name) + log(f"[Worker {worker_id}] ✓ Downloaded {file_name}") + except Exception as e: + log(f"[Worker {worker_id}] Torrent download failed: {e}", file=sys.stderr) + + @staticmethod + def _download_file(url: str, dest: Path) -> None: + try: + import requests + resp = requests.get(url, stream=True) + with open(dest, 'wb') as f: + for chunk in resp.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + except Exception as e: + log(f"File download failed: {e}", file=sys.stderr) + + def _start_background_worker(self, magnet_url, output_dir, config, api_key, wait_timeout): + worker_id = f"torrent_{uuid.uuid4().hex[:6]}" + thread = threading.Thread( + target=self._download_torrent_worker, + args=(worker_id, magnet_url, output_dir, config, api_key, wait_timeout), + daemon=False, + name=f"TorrentWorker_{worker_id}", + ) + thread.start() + +CMDLET = Download_Torrent() diff --git a/cmdlets/get_file.py b/cmdlets/get_file.py index a475dd2..f36f01a 100644 --- a/cmdlets/get_file.py +++ b/cmdlets/get_file.py @@ -1,1730 +1,204 @@ from __future__ import annotations -from typing import Any, Callable, Dict, List, Optional, Sequence +from typing import Any, Dict, Sequence from pathlib import Path -import shutil as _shutil -import subprocess as _subprocess -import json import sys -import platform - -import threading - -from helper.logger import log, debug -import uuid as _uuid -import time as _time - -from helper.progress import print_progress, print_final_progress -from helper.http_client import HTTPClient -from helper.mpv_ipc import get_ipc_pipe_path, send_to_mpv, MPV_LUA_SCRIPT_PATH -import fnmatch as _fnmatch +import shutil from . import register -import models import pipeline as ctx -from helper import hydrus as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, normalize_hash, looks_like_hash, create_pipe_object_result -from config import resolve_output_dir, get_hydrus_url, get_hydrus_access_key -from helper.alldebrid import AllDebridClient - -DEFAULT_DEBRID_WAIT_TIMEOUT = 600 -DEBRID_WORKER_PREFIX = "debrid_" +from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash +from helper.logger import log, debug +from helper.store import FileStorage +from config import resolve_output_dir - - - -def _is_alldebrid_pipe_data(line: str) -> bool: - """Check if line is AllDebrid pipe format: ID|filename|size|...""" - parts = line.strip().split('|') - if len(parts) < 5: - return False - try: - # Check if first part is magnet ID (integer) - magnet_id = int(parts[0]) - # Check if 3rd part (size) is integer - size = int(parts[2]) - # Check if 4th part (status_code) is integer - status_code = int(parts[3]) - return magnet_id > 0 and size >= 0 and status_code in {0, 1, 2, 3, 4} - except (ValueError, IndexError): - return False - - -def _handle_alldebrid_pipe(config: Dict[str, Any], args: Sequence[str]) -> int: - """Handle AllDebrid magnet downloads from piped stdin.""" - # Parse arguments - out_path = None - file_filter = None - i = 0 - while i < len(args): - if args[i].lower() in {"-path", "--path", "path"} and i + 1 < len(args): - out_path = Path(args[i + 1]).expanduser() - i += 2 - elif args[i].lower() in {"-file", "--file", "file"} and i + 1 < len(args): - file_filter = args[i + 1] - i += 2 - else: - i += 1 +class Get_File(Cmdlet): + """Export files to local path via hash+store.""" - if not out_path: - log("✗ -path required for AllDebrid downloads", file=sys.stderr) - return 1 + def __init__(self) -> None: + """Initialize get-file cmdlet.""" + super().__init__( + name="get-file", + summary="Export file to local path", + usage="@1 | get-file -path C:\\Downloads", + arg=[ + SharedArgs.HASH, + SharedArgs.STORE, + CmdletArg("-path", description="Output directory path (default: from config)"), + CmdletArg("-name", description="Output filename (default: from metadata title)"), + ], + detail=[ + "- Exports file from storage backend to local path", + "- Uses hash+store to retrieve file", + "- Preserves file extension and metadata", + ], + exec=self.run, + ) + self.register() - # Read magnet IDs from stdin - magnets = [] - try: - for line in sys.stdin: - line = line.strip() - if line and _is_alldebrid_pipe_data(line): - parts = line.split('|') - magnet_id = int(parts[0]) - magnets.append(magnet_id) - except Exception as e: - log(f"✗ Error reading stdin: {e}", file=sys.stderr) - return 1 - - if not magnets: - log("✗ No valid magnet IDs in pipe", file=sys.stderr) - return 1 - - return _queue_alldebrid_worker( - config=config, - output_dir=out_path, - magnet_ids=magnets, - title=f"AllDebrid pipe ({len(magnets)} magnet{'s' if len(magnets) != 1 else ''})", - file_filter=file_filter, - ) - - -def _extract_files_from_magnet(magnet_info: Dict[str, Any], filter_pattern: Optional[str] = None) -> list: - """Extract files from magnet file tree, optionally filtering by pattern.""" - files = [] - - def traverse(items: Any, prefix: str = "") -> None: - if not isinstance(items, list): - return - for item in items: - if not isinstance(item, dict): - continue - name = item.get('n', '') - link = item.get('l', '') - size = item.get('s', 0) - entries = item.get('e', []) - - # File - if link: - full_path = f"{prefix}/{name}" if prefix else name - if filter_pattern is None or _fnmatch.fnmatch(name.lower(), filter_pattern.lower()): - files.append({'name': name, 'path': full_path, 'size': size, 'link': link}) - - # Folder - if entries: - full_path = f"{prefix}/{name}" if prefix else name - traverse(entries, full_path) - - items = magnet_info.get('files', []) - traverse(items) - return files - - -def _download_file_from_alldebrid(url: str, output_path: Path, filename: str, file_size: int) -> bool: - """Download a single file from AllDebrid with progress bar.""" - output_path.parent.mkdir(parents=True, exist_ok=True) - - try: - downloaded = 0 - chunk_size = 1024 * 1024 - start_time = _time.time() - last_update = start_time + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Export file via hash+store backend.""" + debug(f"[get-file] run() called with result type: {type(result)}") + parsed = parse_cmdlet_args(args, self) + debug(f"[get-file] parsed args: {parsed}") - with HTTPClient(timeout=30.0, headers={'User-Agent': 'downlow/1.0'}) as client: - response = client.get(url) - response.raise_for_status() - with open(output_path, 'wb', buffering=1024*1024) as f: - for chunk in response.iter_bytes(chunk_size): - if not chunk: - break - f.write(chunk) - downloaded += len(chunk) - - # Update progress every 0.5 seconds to avoid spam - now = _time.time() - if now - last_update >= 0.5 or downloaded == file_size: - elapsed = now - start_time - speed = downloaded / elapsed if elapsed > 0 else 0 - print_progress(filename, downloaded, file_size, speed) - last_update = now + # Extract hash and store from result or args + file_hash = parsed.get("hash") or get_field(result, "hash") + store_name = parsed.get("store") or get_field(result, "store") + output_path = parsed.get("path") + output_name = parsed.get("name") - # Print final progress line - elapsed = _time.time() - start_time - print_final_progress(filename, file_size, elapsed) - log(f"✓ {filename} downloaded", file=sys.stderr) + debug(f"[get-file] file_hash={file_hash[:12] if file_hash else None}... store_name={store_name}") - return True - except Exception as e: - log(f"\n[get-file] ✗ Download error: {e}", file=sys.stderr) - return False - - -def _queue_alldebrid_worker( - config: Dict[str, Any], - output_dir: Path, - magnet_ids: Sequence[int], - title: str, - file_filter: Optional[str] = None, - wait_timeout: int = DEFAULT_DEBRID_WAIT_TIMEOUT, -): - """Spawn a background worker to download AllDebrid magnets.""" - from config import get_debrid_api_key - - if not magnet_ids: - log("✗ No magnet IDs provided for AllDebrid download", file=sys.stderr) - return 1 - - api_key = get_debrid_api_key(config) - if not api_key: - log("✗ AllDebrid API key not configured", file=sys.stderr) - return 1 - - worker_id = f"{DEBRID_WORKER_PREFIX}{_uuid.uuid4().hex[:8]}" - worker_manager = config.get('_worker_manager') - if worker_manager: - try: - worker_manager.track_worker( - worker_id, - worker_type="download_debrid", - title=title, - description=f"AllDebrid download for {title}", - pipe=ctx.get_current_command_text(), - ) - except Exception as exc: - debug(f"⚠ Failed to register AllDebrid worker: {exc}") - worker_manager = None - - thread = threading.Thread( - target=_run_alldebrid_download_worker, - args=( - worker_id, - api_key, - output_dir, - list(magnet_ids), - file_filter, - title, - worker_manager, - wait_timeout, - ), - daemon=False, - name=f"AllDebridWorker_{worker_id}" - ) - thread.start() - - ctx.emit({ - 'worker_id': worker_id, - 'worker_type': 'download_debrid', - 'status': 'running', - 'message': f"{title} (queued)", - }) - - log(f"🌀 AllDebrid download queued (worker {worker_id})", file=sys.stderr) - return 0 - - -def _run_alldebrid_download_worker( - worker_id: str, - api_key: str, - output_dir: Path, - magnet_ids: List[int], - file_filter: Optional[str], - title: str, - worker_manager: Optional[Any], - wait_timeout: int, -): - """Worker entrypoint that polls AllDebrid and downloads magnet files.""" - def log_progress(message: str) -> None: - safe = f"[Worker {worker_id}] {message}" - debug(safe) - if worker_manager: - try: - worker_manager.log_step(worker_id, message) - except Exception: - pass - - try: - client = AllDebridClient(api_key) - except Exception as exc: - log_progress(f"✗ Failed to initialize AllDebrid client: {exc}") - if worker_manager: - try: - worker_manager.finish_worker(worker_id, "failed", str(exc)) - except Exception: - pass - return - - output_dir.mkdir(parents=True, exist_ok=True) - total_downloaded = 0 - total_failed = 0 - - for magnet_id in magnet_ids: - log_progress(f"⧗ Processing magnet {magnet_id}") - try: - status_info = client.magnet_status(magnet_id) - except Exception as exc: - log_progress(f"✗ Failed to query magnet {magnet_id}: {exc}") - total_failed += 1 - continue - - try: - ready_status = _wait_for_magnet_ready(client, magnet_id, log_progress, wait_timeout) - except Exception as exc: - log_progress(f"✗ Magnet {magnet_id} did not become ready: {exc}") - total_failed += 1 - continue - - try: - magnet_info = client.magnet_status(magnet_id, include_files=True) - except Exception as exc: - log_progress(f"✗ Failed to list files for magnet {magnet_id}: {exc}") - total_failed += 1 - continue - - files_list = _extract_files_from_magnet(magnet_info, file_filter) - if not files_list: - log_progress(f"⊘ Magnet {magnet_id} has no files") - total_failed += 1 - continue - - for file_info in files_list: - name = file_info.get('name', 'unknown') - log_progress(f"⇓ Downloading {name}") - link = file_info.get('link') - if not link: - log_progress(f"✗ Missing link for {name}") - total_failed += 1 - continue - - try: - direct_url = client.unlock_link(link) - except Exception as exc: - log_progress(f"✗ Failed to unlock {name}: {exc}") - total_failed += 1 - continue - - output_file = output_dir / name - if _download_file_from_alldebrid(direct_url, output_file, name, file_info.get('size', 0)): - total_downloaded += 1 - else: - total_failed += 1 - - if total_downloaded or total_failed: - summary = f"{total_downloaded} file(s) downloaded, {total_failed} failed" - else: - summary = "No files were processed" - - log(f"✓ AllDebrid worker {worker_id}: {summary}", file=sys.stderr) - if worker_manager: - status = "success" if total_downloaded > 0 else "failed" - try: - worker_manager.finish_worker(worker_id, status, summary if status == "failed" else "") - except Exception: - pass - - -def _wait_for_magnet_ready( - client: AllDebridClient, - magnet_id: int, - log_progress: Callable[[str], None], - wait_timeout: int, -) -> Dict[str, Any]: - elapsed = 0 - last_report = -5 - while elapsed < wait_timeout: - try: - status = client.magnet_status(magnet_id) - except Exception as exc: - log_progress(f"⚠ Live status check failed: {exc}") - _time.sleep(2) - elapsed += 2 - continue - - status_code = int(status.get('statusCode', -1)) - if status_code == 4: - return status - if status_code >= 5: - raise RuntimeError(status.get('status', f"Failed code {status_code}")) - if elapsed - last_report >= 5: - downloaded = status.get('downloaded', 0) - size = status.get('size', 0) - percent = (downloaded / size * 100) if size else 0 - log_progress(f"⧗ {status.get('status', 'processing')} — {percent:.1f}%") - last_report = elapsed - _time.sleep(2) - elapsed += 2 - raise TimeoutError(f"Magnet {magnet_id} not ready after {wait_timeout}s") - - -def _is_playable_in_mpv(file_path_or_ext: str, mime_type: Optional[str] = None) -> bool: - """Check if file can be played in MPV based on extension or mime type.""" - from helper.utils_constant import mime_maps - - # Check mime type first if provided - if mime_type: - mime_lower = mime_type.lower() - # Simple prefix check for common media types - if any(mime_lower.startswith(prefix) for prefix in ['video/', 'audio/', 'image/']): - return True - - # Extract extension - if file_path_or_ext.startswith('.'): - ext = file_path_or_ext.lower() - else: - ext = Path(file_path_or_ext).suffix.lower() - - if not ext: - return False - - # Check if extension is in playable categories - playable_categories = ['video', 'audio', 'image', 'image_sequence'] - - for category in playable_categories: - if category in mime_maps: - for key, info in mime_maps[category].items(): - if info.get('ext', '').lower() == ext: - return True - return False - - -def _play_in_mpv(file_url: str, file_title: str, is_stream: bool = False, headers: Optional[Dict[str, str]] = None) -> bool: - """Play file in MPV using centralized IPC pipe, creating new instance if needed. - - Returns True on success, False on error. - """ - try: - # First try to send to existing MPV instance - if send_to_mpv(file_url, file_title, headers): - debug(f"Added to MPV: {file_title}") - return True - - # No existing MPV or pipe unavailable - start new instance - ipc_pipe = get_ipc_pipe_path() - debug(f"[get-file] Starting new MPV instance (pipe: {ipc_pipe})", file=sys.stderr) - - # Build command - start MPV without a file initially, just with IPC server and our Lua helper - cmd = ['mpv', f'--input-ipc-server={ipc_pipe}'] - try: - if MPV_LUA_SCRIPT_PATH and Path(MPV_LUA_SCRIPT_PATH).exists(): - cmd.append(f"--scripts-append={MPV_LUA_SCRIPT_PATH}") - except Exception: - pass - - if headers: - # Format headers for command line - # --http-header-fields="Header1: Val1,Header2: Val2" - header_str = ",".join([f"{k}: {v}" for k, v in headers.items()]) - cmd.append(f'--http-header-fields={header_str}') - - # Add --idle flag so MPV stays running and waits for playlist commands - cmd.append('--idle') - - # Detach process to prevent freezing parent CLI - kwargs = {} - if platform.system() == 'Windows': - kwargs['creationflags'] = 0x00000008 # DETACHED_PROCESS - - _subprocess.Popen(cmd, stdin=_subprocess.DEVNULL, stdout=_subprocess.DEVNULL, stderr=_subprocess.DEVNULL, **kwargs) - - debug(f"[get-file] Started MPV instance (IPC: {ipc_pipe})", file=sys.stderr) - - # Give MPV time to start and open IPC pipe - # Windows needs more time than Unix - wait_time = 1.0 if platform.system() == 'Windows' else 0.5 - debug(f"[get-file] Waiting {wait_time}s for MPV to initialize IPC...", file=sys.stderr) - _time.sleep(wait_time) - - # Try up to 3 times to send the file via IPC - for attempt in range(3): - debug(f"[get-file] Sending file via IPC (attempt {attempt + 1}/3)", file=sys.stderr) - if send_to_mpv(file_url, file_title, headers): - debug(f"{'Streaming' if is_stream else 'Playing'} in MPV: {file_title}") - debug(f"[get-file] Added to new MPV instance (IPC: {ipc_pipe})", file=sys.stderr) - return True - - if attempt < 2: - # Wait before retrying - _time.sleep(0.3) - - # IPC send failed after all retries - log("Error: Could not send file to MPV via IPC after startup", file=sys.stderr) - return False - - except FileNotFoundError: - log("Error: MPV not found. Install mpv to play media files", file=sys.stderr) - return False - except Exception as e: - log(f"Error launching MPV: {e}", file=sys.stderr) - return False - - -# Backward-compatible alias for modules expecting the old IPC helper name. -def _get_fixed_ipc_pipe() -> str: - """Return the shared MPV IPC pipe path (compat shim).""" - return get_ipc_pipe_path() - - -def _handle_search_result(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - """Handle a file from search-file results using FileStorage backend.""" - try: - from helper.file_storage import FileStorage - - # Helper to get field from both dict and object - def get_field(obj: Any, field: str, default: Any = None) -> Any: - if isinstance(obj, dict): - return obj.get(field, default) - else: - return getattr(obj, field, default) - - # Extract file information from ResultItem - storage_name = get_field(result, 'origin', None) - # Also check for 'source' field (from add-file and other cmdlets) - if not storage_name: - storage_name = get_field(result, 'source', None) - file_hash = get_field(result, 'hash_hex', None) - # Also check for file_hash field (from add-file and other cmdlets) if not file_hash: - file_hash = get_field(result, 'file_hash', None) - file_title = get_field(result, 'title', 'file') - mime_type = get_field(result, 'mime', None) - file_path = get_field(result, 'target', None) - # Also check for 'file_path' field (from add-file and other cmdlets) - if not file_path: - file_path = get_field(result, 'file_path', None) - # Also check for 'path' field (from search-file and other cmdlets) - if not file_path: - file_path = get_field(result, 'path', None) - - full_metadata = get_field(result, 'full_metadata', {}) - magnet_id = full_metadata.get('magnet_id') if isinstance(full_metadata, dict) else None - - if not storage_name: - log("Error: No storage backend specified in result", file=sys.stderr) + log("Error: No file hash provided") return 1 - - debug(f"[get-file] Retrieving file from storage: {storage_name}", file=sys.stderr) - - # Handle different storage backends - if storage_name.lower() == 'hydrus': - return _handle_hydrus_file(file_hash, file_title, config, args, mime_type=mime_type) - elif storage_name.lower() == 'local': - return _handle_local_file(file_path, file_title, config, args, file_hash=file_hash) - elif storage_name.lower() == 'download': - # Downloads are local files - return _handle_local_file(file_path, file_title, config, args, file_hash=file_hash) - elif storage_name.lower() == 'debrid': - # Extract magnet_id from result (search-file stores it in full_metadata or as custom attribute) - if not magnet_id: - magnet_id = get_field(result, 'magnet_id', None) - if not magnet_id: - log("Error: No magnet ID in debrid result", file=sys.stderr) - return 1 - return _handle_debrid_file(magnet_id, file_title, config, args) - elif storage_name.lower() in {'bandcamp', 'youtube'}: - # Handle Bandcamp/YouTube via yt-dlp - url = get_field(result, 'target', None) - if not url: - # Try to find URL in other fields - url = get_field(result, 'url', None) - if not url: - log(f"Error: No URL found for {storage_name} result", file=sys.stderr) - return 1 - - return _handle_ytdlp_download(url, file_title, config, args) - else: - log(f"Unknown storage backend: {storage_name}", file=sys.stderr) - return 1 - - except Exception as e: - log(f"Error processing search result: {e}", file=sys.stderr) - import traceback - traceback.print_exc(file=sys.stderr) - return 1 - - -def _handle_hydrus_file(file_hash: Optional[str], file_title: str, config: Dict[str, Any], args: Sequence[str], mime_type: Optional[str] = None) -> int: - """Handle file from Hydrus - auto-play in MPV if media file, otherwise open web URL.""" - if not file_hash: - log("Error: No file hash provided", file=sys.stderr) - return 1 - - try: - hydrus_url = get_hydrus_url(config) - access_key = get_hydrus_access_key(config) - - if not hydrus_url or not access_key: - log("Error: Hydrus not configured", file=sys.stderr) + if not store_name: + log("Error: No store name provided") return 1 - # Check if it's a playable media file based on filename or mime type - is_media = _is_playable_in_mpv(file_title) - if not is_media and mime_type: - # Check mime type if filename check failed - if any(m in mime_type.lower() for m in ['video/', 'audio/', 'image/']): - is_media = True - - force_mpv = any(str(a).lower() in {'-mpv', '--mpv', 'mpv'} for a in args) - force_browser = any(str(a).lower() in {'-web', '--web', 'web', '-browser', '--browser'} for a in args) + # Normalize hash + file_hash = normalize_hash(file_hash) + if not file_hash: + log("Error: Invalid hash format") + return 1 - # Check MPV availability - from hydrus_health_check import check_mpv_availability - mpv_available, _ = check_mpv_availability() + debug(f"[get-file] Getting storage backend: {store_name}") - # Construct URLs for streaming/viewing - # For streaming, we use headers for auth, so we don't put the key in the URL - stream_url = f"{hydrus_url}/get_files/file?hash={file_hash}" - # For browser, we still need the key in the URL - web_url = f"{hydrus_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}" + # Get storage backend + storage = FileStorage(config) + backend = storage[store_name] + debug(f"[get-file] Backend retrieved: {type(backend).__name__}") - headers = { - "Hydrus-Client-API-Access-Key": access_key - } + # Get file metadata to determine name and extension + debug(f"[get-file] Getting metadata for hash...") + metadata = backend.get_metadata(file_hash) + if not metadata: + log(f"Error: File metadata not found for hash {file_hash[:12]}...") + return 1 + debug(f"[get-file] Metadata retrieved: title={metadata.get('title')}, ext={metadata.get('ext')}") - if force_browser: - # User explicitly wants browser - ipc_pipe = get_ipc_pipe_path() - result_dict = create_pipe_object_result( - source='hydrus', - identifier=file_hash, - file_path=web_url, - cmdlet_name='get-file', - title=file_title, - file_hash=file_hash, - extra={ - 'ipc': ipc_pipe, - 'action_type': 'browser', - 'web_url': web_url, - 'hydrus_url': hydrus_url, - 'access_key': access_key - } - ) - ctx.emit(result_dict) - try: - import webbrowser - webbrowser.open(web_url) - debug(f"[get-file] Opened in browser: {file_title}", file=sys.stderr) - except Exception: - pass - return 0 - elif force_mpv or (is_media and mpv_available): - # Auto-play in MPV for media files (if available), or user requested it - if _play_in_mpv(stream_url, file_title, is_stream=True, headers=headers): - # Show unified MPV playlist view (reuse cmdnats.pipe display) - try: - from cmdnats import pipe as mpv_pipe - mpv_pipe._run(None, [], config) - except Exception: - pass - return 0 - else: - # Fall back to browser - try: - import webbrowser - webbrowser.open(web_url) - debug(f"[get-file] Opened in browser instead", file=sys.stderr) - except Exception: - pass - return 0 + # Determine output filename + if output_name: + filename = output_name else: - # Not media, open in browser - ipc_pipe = get_ipc_pipe_path() - result_dict = create_pipe_object_result( - source='hydrus', - identifier=file_hash, - file_path=web_url, - cmdlet_name='get-file', - title=file_title, - file_hash=file_hash, - extra={ - 'ipc': ipc_pipe, - 'action_type': 'browser', - 'web_url': web_url, - 'hydrus_url': hydrus_url, - 'access_key': access_key - } - ) - ctx.emit(result_dict) - try: - import webbrowser - webbrowser.open(web_url) - debug(f"[get-file] Opened in browser: {file_title}", file=sys.stderr) - except Exception: - pass - return 0 - - except Exception as e: - log(f"Error handling Hydrus file: {e}", file=sys.stderr) - import traceback - traceback.print_exc(file=sys.stderr) - return 1 - - -def _handle_local_file(file_path: Optional[str], file_title: str, config: Dict[str, Any], args: Sequence[str], file_hash: Optional[str] = None) -> int: - """Handle file from local storage - auto-play in MPV if media, otherwise open with default app.""" - if not file_path: - log("Error: No file path provided", file=sys.stderr) - return 1 - - try: - source = Path(file_path) - if not source.exists(): - # Try to resolve by hash if the path looks like a hash - resolved_local = False - if looks_like_hash(str(file_path)): - try: - from config import get_local_storage_path - from helper.local_library import LocalLibraryDB - storage_path = get_local_storage_path(config) - if storage_path: - with LocalLibraryDB(storage_path) as db: - resolved_path = db.search_by_hash(str(file_path)) - if resolved_path and resolved_path.exists(): - source = resolved_path - file_path = str(resolved_path) - resolved_local = True - # Also set file_hash since we know it - file_hash = str(file_path) - except Exception: - pass + # Use title from metadata, sanitize it + title = metadata.get("title", "export") + filename = self._sanitize_filename(title) + + # Add extension if metadata has it + ext = metadata.get("ext") + if ext and not filename.endswith(ext): + if not ext.startswith('.'): + ext = '.' + ext + filename += ext + + # Determine output directory + if output_path: + output_dir = Path(output_path).expanduser() + else: + output_dir = resolve_output_dir(config) + + debug(f"[get-file] Output dir: {output_dir}") + + # Create output directory + output_dir.mkdir(parents=True, exist_ok=True) + + debug(f"[get-file] Calling backend.get_file({file_hash[:12]}...)") + + # Get file from backend (may return Path or URL string depending on backend) + source_path = backend.get_file(file_hash) + + debug(f"[get-file] backend.get_file returned: {source_path}") + + # Check if backend returned a URL (HydrusNetwork case) + if isinstance(source_path, str) and (source_path.startswith("http://") or source_path.startswith("https://")): + log(f"File opened in browser: {source_path}", file=sys.stderr) + ctx.emit(f"Opened in browser: {source_path}") - if not resolved_local: - log(f"Error: File not found: {file_path}", file=sys.stderr) - return 1 - - # Check for explicit user flags - force_mpv = any(str(a).lower() in {'-mpv', '--mpv', 'mpv'} for a in args) - force_default = any(str(a).lower() in {'-open', '--open', 'open'} for a in args) - - # Check if it's a playable media file - is_media = _is_playable_in_mpv(str(source)) - - # Check MPV availability - from hydrus_health_check import check_mpv_availability - mpv_available, _ = check_mpv_availability() - - if force_default: - # User explicitly wants default application - import subprocess as sp - import platform - import os - try: - if platform.system() == 'Darwin': # macOS - sp.run(['open', file_path]) - elif platform.system() == 'Windows': - os.startfile(file_path) - else: # Linux - sp.run(['xdg-open', file_path]) - ctx.emit(f"Opened: {file_title}") - debug(f"[get-file] Opened {file_title} with default app", file=sys.stderr) - return 0 - except Exception as e: - log(f"Error opening file: {e}", file=sys.stderr) - return 1 - elif force_mpv or (is_media and mpv_available): - # Auto-play in MPV for media files (if available), or user requested it - if _play_in_mpv(file_path, file_title, is_stream=False): - # Show unified MPV playlist view (reuse cmdnats.pipe display) - try: - from cmdnats import pipe as mpv_pipe - mpv_pipe._run(None, [], config) - except Exception: - pass - return 0 - else: - # Fall back to default application - try: - import os - import platform - if platform.system() == 'Darwin': # macOS - _subprocess.run(['open', file_path]) - elif platform.system() == 'Windows': - os.startfile(file_path) - else: # Linux - _subprocess.run(['xdg-open', file_path]) - debug(f"[get-file] Opened with default app instead", file=sys.stderr) - except Exception: - pass - return 0 - else: - # Not media - open with default application - import subprocess as sp - import platform - import os - try: - if platform.system() == 'Darwin': # macOS - sp.run(['open', file_path]) - elif platform.system() == 'Windows': - # Use os.startfile for more reliable Windows handling - os.startfile(file_path) - else: # Linux - sp.run(['xdg-open', file_path]) - print(f"Opened: {file_title}") - debug(f"[get-file] Opened {file_title} with default app", file=sys.stderr) - - # Emit result for downstream processing - result_dict = create_pipe_object_result( - source='local', - identifier=str(Path(file_path).stem) if file_path else 'unknown', - file_path=file_path, - cmdlet_name='get-file', - title=file_title, - file_hash=file_hash, - extra={'action_type': 'opened'} - ) - ctx.emit(result_dict) - return 0 - except Exception as e: - log(f"Error opening file with default app: {e}", file=sys.stderr) - return 1 - - except Exception as e: - log(f"Error handling local file: {e}", file=sys.stderr) - return 1 - - -def _handle_debrid_file(magnet_id: int, magnet_title: str, config: Dict[str, Any], args: Sequence[str]) -> int: - """Handle magnet file from AllDebrid storage - download to local path.""" - # Parse output path argument - out_path = None - i = 0 - args_list = [str(a) for a in args] - while i < len(args_list): - if args_list[i].lower() in {"-path", "--path", "path"} and i + 1 < len(args_list): - out_path = Path(args_list[i + 1]).expanduser() - i += 2 - else: - i += 1 - - if not out_path: - log("✗ -Path required for debrid downloads", file=sys.stderr) - return 1 - - # Ensure output directory exists - try: - out_path.mkdir(parents=True, exist_ok=True) - except Exception as e: - log(f"✗ Error creating output directory: {e}", file=sys.stderr) - return 1 - - return _queue_alldebrid_worker( - config=config, - output_dir=out_path, - magnet_ids=[magnet_id], - title=magnet_title or f"magnet {magnet_id}", - ) - - -@register(["get-file"]) # primary name -def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - # Help: if any help token is present, print CMDLET JSON and exit - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + # Emit result for pipeline + ctx.emit({ + "hash": file_hash, + "store": store_name, + "url": source_path, + "title": filename, + }) return 0 - except Exception: - pass - - # Helper to get field from both dict and object - def get_field(obj: Any, field: str, default: Any = None) -> Any: - if isinstance(obj, dict): - return obj.get(field, default) + + # Otherwise treat as file path (local/folder backends) + if isinstance(source_path, str): + source_path = Path(source_path) + + # Determine output directory + if output_path: + output_dir = Path(output_path).expanduser() else: - return getattr(obj, field, default) - - # Check if result is a list (from @N selection) and extract the first item - actual_result = result - if isinstance(result, list) and len(result) > 0: - actual_result = result[0] - - # Check if this is a FileStorage search result (has origin field indicating a backend) - # This handles both dict and ResultItem objects - origin = get_field(actual_result, 'origin', None) - # Also check for 'source' field (from add-file and other cmdlets) - if not origin: - origin = get_field(actual_result, 'source', None) - if origin and origin.lower() in {'hydrus', 'local', 'debrid', 'alldebrid', 'bandcamp', 'youtube'}: - # This is a search result with explicit origin - handle it via _handle_search_result - return _handle_search_result(actual_result, args, config) - - # Handle ResultItem from search-file via @N selection - # The result can be either: - # 1. A single ResultItem (direct call) - # 2. A list of ResultItems (from @N selection in CLI) - result_item = None - if result and hasattr(result, '__class__'): - if result.__class__.__name__ == 'ResultItem': - result_item = result - elif isinstance(result, list) and len(result) > 0: - # @N selection creates a list, extract the first item if it's a ResultItem - if hasattr(result[0], '__class__') and result[0].__class__.__name__ == 'ResultItem': - result_item = result[0] - - if result_item: - return _handle_search_result(result_item, args, config) - - # Handle PipeObject results from previous get-file call (for chaining) - if result and isinstance(result, dict) and result.get('action', '').startswith('cmdlet:get-file'): - # This is from a previous get-file result - just pass it through - # Don't treat it as a new file to play, just emit for pipeline chaining - ctx.emit(result) + output_dir = resolve_output_dir(config) + + debug(f"[get-file] Output dir: {output_dir}") + + # Create output directory + output_dir.mkdir(parents=True, exist_ok=True) + + # Build full output path + dest_path = output_dir / filename + + # Make path unique if file exists + dest_path = self._unique_path(dest_path) + + if not source_path or not source_path.exists(): + log(f"Error: Backend could not retrieve file for hash {file_hash[:12]}...") + return 1 + + # Copy file to destination + debug(f"[get-file] Copying {source_path} -> {dest_path}", file=sys.stderr) + shutil.copy2(source_path, dest_path) + + ctx.emit(f"Exported to: {dest_path}") + log(f"Exported: {dest_path}", file=sys.stderr) + + # Emit result for pipeline + ctx.emit({ + "hash": file_hash, + "store": store_name, + "path": str(dest_path), + "title": filename, + }) + + debug(f"[get-file] Completed successfully") return 0 - # Check for AllDebrid pipe input (from search-debrid) - # Try to read first line from stdin to detect format - first_line = None - try: - # Try to read one line without blocking - if hasattr(sys.stdin, 'readable') and sys.stdin.readable(): - first_line = sys.stdin.readline().strip() - except Exception: - pass - - if first_line and _is_alldebrid_pipe_data(first_line): - # This is AllDebrid pipe data - handle it separately - # Put the line back by creating a chain with the rest of stdin - import io - try: - remaining_stdin = sys.stdin.read() - except: - remaining_stdin = "" - sys.stdin = io.StringIO(first_line + '\n' + remaining_stdin) - return _handle_alldebrid_pipe(config, args) - elif first_line: - # Not AllDebrid data, put it back for normal processing - import io - try: - remaining_stdin = sys.stdin.read() - except: - remaining_stdin = "" - sys.stdin = io.StringIO(first_line + '\n' + remaining_stdin) - - # Helpers - def _sanitize_name(text: str) -> str: - allowed = [] - for ch in text: - allowed.append(ch if (ch.isalnum() or ch in {"-", "_", " ", "."}) else " ") - return (" ".join("".join(allowed).split()) or "export").strip() - - def _ffprobe_duration_seconds(path: Path) -> Optional[float]: - ffprobe_path = _shutil.which('ffprobe') - if not ffprobe_path: - return None - try: - res = _subprocess.run( - [ffprobe_path, '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', str(path)], - stdout=_subprocess.PIPE, - stderr=_subprocess.PIPE, - check=True, - text=True, - ) - out = (res.stdout or '').strip() - if not out: - return None - value = float(out) - return value if value > 0 else None - except Exception: - return None - - def _parse_args(tokens: Sequence[str]) -> tuple[Optional[Path], Optional[str], Optional[str], Optional[str], bool]: - out_override: Optional[Path] = None - size_spec: Optional[str] = None - convert_spec: Optional[str] = None - hash_spec: Optional[str] = None - export_metadata: bool = False - i = 0 - while i < len(tokens): - t = tokens[i] - low = t.lower() - if low in {"-path", "--path", "path"} and i + 1 < len(tokens): - try: - out_override = Path(tokens[i + 1]).expanduser() - except Exception: - out_override = None - i += 2 - continue - if low in {"size", "-size", "--size"} and i + 1 < len(tokens): - size_spec = tokens[i + 1] - i += 2 - continue - if low in {"convert", "-convert", "--convert"} and i + 1 < len(tokens): - convert_spec = tokens[i + 1] - i += 2 - continue - if low in {"-hash", "--hash", "hash"} and i + 1 < len(tokens): - hash_spec = tokens[i + 1] - i += 2 - continue - if low in {"-metadata", "--metadata", "metadata"}: - export_metadata = True - i += 1 - continue - i += 1 - return out_override, size_spec, convert_spec, hash_spec, export_metadata - - def _compute_target_bytes(size_spec: Optional[str], source_bytes: int) -> Optional[int]: - if not size_spec: - return None - text = str(size_spec).strip().lower() - if not text: - return None - if text.endswith('%'): - try: - pct = float(text[:-1]) - except ValueError: - return None - pct = max(0.0, min(100.0, pct)) - target = int(round(source_bytes * (pct / 100.0))) - else: - val = text - if val.endswith('mb'): - val = val[:-2] - elif val.endswith('m'): - val = val[:-1] - try: - mb = float(val) - except ValueError: - return None - target = int(round(mb * 1024 * 1024)) - min_bytes = 1 * 1024 * 1024 - if target <= 0: - target = min_bytes - return min(target, source_bytes) - - def _guess_kind_from_suffix(path: Path) -> str: - sfx = path.suffix.lower() - if sfx in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}: - return 'video' - if sfx in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}: - return 'audio' - return 'other' - - def _extract_metadata_from_tags(tags_payload: Dict[str, Any], file_hash: str, input_kind: str = '') -> Dict[str, str]: - """Extract common metadata fields from Hydrus tags. - - Returns a dict mapping FFmpeg metadata keys to values. - Supports: title, artist, album, track, date, genre, etc. - - For audio files, applies sensible defaults: - - If no album, uses title as album - - If no track, defaults to 1 - - album_artist is set to artist value - """ - metadata = {} - - # Map of common tag namespaces to FFmpeg metadata keys - tag_map = { - 'title': 'title', - 'artist': 'artist', - 'album': 'album', - 'track': 'track', - 'track_number': 'track', - 'date': 'date', - 'year': 'date', - 'genre': 'genre', - 'composer': 'composer', - 'comment': 'comment', - } - - if not tags_payload or 'metadata' not in tags_payload or not tags_payload['metadata']: - return metadata - - entry = tags_payload['metadata'][0] - if 'tags' not in entry or not isinstance(entry['tags'], dict): - return metadata - - tags_dict = entry['tags'] - - # Extract metadata from tags - for _service_key, service_data in tags_dict.items(): - if not isinstance(service_data, dict): - continue - - display_tags = service_data.get('display_tags', {}) - if not isinstance(display_tags, dict): - continue - - current_tags = display_tags.get('0', []) - if not isinstance(current_tags, list): - continue - - for tag in current_tags: - tag_str = str(tag).strip() - if ':' in tag_str: - namespace, value = tag_str.split(':', 1) - namespace = namespace.lower().strip() - value = value.strip() - if namespace in tag_map and value: - ffmpeg_key = tag_map[namespace] - # Use first occurrence - if ffmpeg_key not in metadata: - metadata[ffmpeg_key] = value - - # Apply sensible defaults for audio files - if input_kind == 'audio': - # If no album, use title as album - if 'album' not in metadata and 'title' in metadata: - metadata['album'] = metadata['title'] - # If no track, default to 1 - if 'track' not in metadata: - metadata['track'] = '1' - # If no album_artist, use artist - if 'artist' in metadata: - metadata['album_artist'] = metadata['artist'] - - return metadata - - out_override, size_spec, convert_spec, hash_spec, export_metadata = _parse_args(args) - default_dir = resolve_output_dir(config) - - media_kind = (get_field(result, 'media_kind', '') or '').lower() - - _chk = [] - if out_override: - _chk.append(f"Path={out_override}") - if size_spec: - _chk.append(f"Size={size_spec}") - if convert_spec: - _chk.append(f"Convert={convert_spec}") - # Prefer explicit -hash over result hash for logging - file_hash_for_log = None - if hash_spec and looks_like_hash(hash_spec): - file_hash_for_log = normalize_hash(hash_spec) - else: - hash_value = get_field(result, 'hash_hex', None) - file_hash_for_log = normalize_hash(hash_value) if hash_value else None - if _chk or file_hash_for_log: - msg = "get-file: " + ", ".join(_chk) if _chk else "get-file" - if file_hash_for_log: - msg = f"{msg} (Hash={file_hash_for_log})" - ctx.emit(msg) - - base_name = _sanitize_name(get_field(result, 'title', None) or '') - if not base_name: - target_attr = get_field(result, 'target', None) - if isinstance(target_attr, str) and target_attr and not target_attr.startswith(('http://', 'https://')): - base_name = _sanitize_name(Path(target_attr).stem) - else: - base_name = 'export' - - # Accept multiple path-ish fields so @ selection from MPV playlist rows or ad-hoc dicts still resolve. - local_target = ( - get_field(result, 'target', None) - or get_field(result, 'path', None) - or get_field(result, 'file_path', None) - or get_field(result, 'filename', None) - ) - is_url = isinstance(local_target, str) and local_target.startswith(('http://', 'https://')) - # Establish file hash (prefer -hash override when provided and valid) - if hash_spec and looks_like_hash(hash_spec): - file_hash = normalize_hash(hash_spec) - else: - file_hash = normalize_hash(get_field(result, 'hash_hex', None)) if get_field(result, 'hash_hex', None) else None - - source_path: Optional[Path] = None - source_size: Optional[int] = None - duration_sec: Optional[float] = None - tags_payload: Dict[str, Any] = {} - urls_payload: Dict[str, Any] = {} - cleanup_source: bool = False - - if isinstance(local_target, str) and not is_url and not (hash_spec and file_hash): - p = Path(local_target) - if not p.exists(): - # Check if it's a hash and try to resolve locally - resolved_local = False - if looks_like_hash(local_target): - try: - from config import get_local_storage_path - from helper.local_library import LocalLibraryDB - storage_path = get_local_storage_path(config) - if storage_path: - with LocalLibraryDB(storage_path) as db: - resolved_path = db.search_by_hash(local_target) - if resolved_path and resolved_path.exists(): - p = resolved_path - resolved_local = True - # Also set file_hash since we know it - file_hash = local_target - except Exception: - pass - - if not resolved_local: - log(f"File missing: {p}") - return 1 - - source_path = p - try: - source_size = p.stat().st_size - except OSError: - source_size = None - duration_sec = _ffprobe_duration_seconds(p) - if file_hash is None: - for sc in (p.with_suffix('.tags'), p.with_suffix('.tags.txt')): - try: - if sc.exists(): - text = sc.read_text(encoding='utf-8', errors='ignore') - for line in text.splitlines(): - ls = line.strip().lower() - if ls.startswith('hash:'): - candidate = line.split(':', 1)[1].strip() if ':' in line else '' - if looks_like_hash(candidate): - file_hash = candidate.lower() - break - except OSError: - pass - elif file_hash: - # Try local resolution first if origin is local or just in case - resolved_local = False - try: - from config import get_local_storage_path - from helper.local_library import LocalLibraryDB - storage_path = get_local_storage_path(config) - if storage_path: - with LocalLibraryDB(storage_path) as db: - resolved_path = db.search_by_hash(file_hash) - if resolved_path and resolved_path.exists(): - source_path = resolved_path - resolved_local = True - try: - source_size = source_path.stat().st_size - except OSError: - source_size = None - duration_sec = _ffprobe_duration_seconds(source_path) - except Exception: - pass - - if not resolved_local: - try: - client = hydrus_wrapper.get_client(config) - except Exception as exc: - log(f"Hydrus client unavailable: {exc}") - return 1 - - if client is None: - log("Hydrus client unavailable") - return 1 - - # Fetch metadata and tags (needed for both -metadata flag and audio tagging) - # Fetch tags - try: - tags_payload = client.fetch_file_metadata(hashes=[file_hash], include_service_keys_to_tags=True) - except Exception: - tags_payload = {} - - # Fetch URLs - try: - urls_payload = client.fetch_file_metadata(hashes=[file_hash], include_file_urls=True) - except Exception: - urls_payload = {} - - # Extract title from metadata if base_name is still 'export' - if base_name == 'export' and tags_payload: - try: - file_metadata = tags_payload.get('file_metadata', []) - if file_metadata and isinstance(file_metadata, list) and len(file_metadata) > 0: - meta = file_metadata[0] - if isinstance(meta, dict): - tags_dict = meta.get('tags', {}) - if isinstance(tags_dict, dict): - # Look for title in storage tags - for service in tags_dict.values(): - if isinstance(service, dict): - storage = service.get('storage_tags', {}) - if isinstance(storage, dict): - for tag_list in storage.values(): - if isinstance(tag_list, list): - for tag in tag_list: - if isinstance(tag, str) and tag.lower().startswith('title:'): - title_val = tag.split(':', 1)[1].strip() - if title_val: - base_name = _sanitize_name(title_val) - break - if base_name != 'export': - break - if base_name != 'export': - break - except Exception: - pass - - # Normal file export (happens regardless of -metadata flag) - try: - from helper.hydrus import hydrus_export as _hydrus_export - except Exception: - _hydrus_export = None # type: ignore - if _hydrus_export is None: - log("Hydrus export helper unavailable") - return 1 - download_dir = out_override if (out_override and out_override.is_dir()) else default_dir - try: - download_dir.mkdir(parents=True, exist_ok=True) - except Exception: - # If mkdir fails, fall back to default_dir - download_dir = default_dir - - # Verify the directory is writable; if not, fall back to default - try: - test_file = download_dir / f".downlow_write_test_{_uuid.uuid4().hex[:8]}" - test_file.touch() - test_file.unlink() - except (OSError, PermissionError): - # Directory is not writable, use default_dir instead - download_dir = default_dir - try: - download_dir.mkdir(parents=True, exist_ok=True) - except Exception: - pass - token = (_uuid.uuid4().hex[:8]) - provisional_stem = f"{base_name}.dlhx_{token}" - provisional = download_dir / f"{provisional_stem}.bin" - class _Args: - pass - args_obj = _Args() - setattr(args_obj, 'output', provisional) - setattr(args_obj, 'format', 'copy') - setattr(args_obj, 'tmp_dir', str(download_dir)) - setattr(args_obj, 'metadata_json', None) - setattr(args_obj, 'hydrus_url', get_hydrus_url(config, "home") or "http://localhost:45869") - setattr(args_obj, 'access_key', get_hydrus_access_key(config, "home") or "") - setattr(args_obj, 'timeout', float(config.get('HydrusNetwork_Request_Timeout') or 60.0)) - try: - file_url = client.file_url(file_hash) - except Exception: - file_url = None - setattr(args_obj, 'file_url', file_url) - setattr(args_obj, 'file_hash', file_hash) - import io as _io, contextlib as _contextlib - _buf = _io.StringIO() - status = 1 - with _contextlib.redirect_stdout(_buf): - status = _hydrus_export(args_obj, None) - if status != 0: - stderr_text = _buf.getvalue().strip() - if stderr_text: - log(stderr_text) - return status - json_text = _buf.getvalue().strip().splitlines()[-1] if _buf.getvalue() else '' - final_from_json: Optional[Path] = None - try: - payload = json.loads(json_text) if json_text else None - if isinstance(payload, dict): - outp = payload.get('output') - if isinstance(outp, str) and outp: - final_from_json = Path(outp) - except Exception: - final_from_json = None - if final_from_json and final_from_json.exists(): - source_path = final_from_json + def _sanitize_filename(self, name: str) -> str: + """Sanitize filename by removing invalid characters.""" + allowed_chars = [] + for ch in str(name): + if ch.isalnum() or ch in {'-', '_', ' ', '.'}: + allowed_chars.append(ch) else: - candidates = [p for p in provisional.parent.glob(provisional_stem + '*') if p.exists() and p.is_file()] - non_provisional = [p for p in candidates if p.suffix.lower() not in {'.bin', '.hydrus'}] - pick_from = non_provisional if non_provisional else candidates - if pick_from: - try: - source_path = max(pick_from, key=lambda p: p.stat().st_mtime) - except Exception: - source_path = pick_from[0] - else: - source_path = provisional - candidates = [p for p in provisional.parent.glob(provisional_stem + '*') if p.exists() and p.is_file()] - non_provisional = [p for p in candidates if p.suffix.lower() not in {'.bin', '.hydrus'}] - pick_from = non_provisional if non_provisional else candidates - if pick_from: - try: - source_path = max(pick_from, key=lambda p: p.stat().st_mtime) - except Exception: - source_path = pick_from[0] - else: - source_path = provisional - try: - source_size = source_size or (source_path.stat().st_size if source_path.exists() else None) - except OSError: - source_size = source_size - if duration_sec is None: - duration_sec = _ffprobe_duration_seconds(source_path) - cleanup_source = True - else: - log("Selected result is neither a local file nor a Hydrus record") - return 1 - - convert = (str(convert_spec or '').strip().lower()) - if convert not in {'', 'copy', 'mp4', 'webm', 'audio', 'mp3', 'opus'}: - log(f"Unsupported Convert value: {convert_spec}") - return 1 - if not convert: - convert = 'copy' - input_kind = media_kind or _guess_kind_from_suffix(source_path) - if input_kind == 'audio' and convert in {'mp4', 'webm'}: - log("Cannot convert audio to video") - return 1 - - def _ext_for_convert(conv: str, src: Path) -> str: - if conv == 'mp4': - return '.mp4' - if conv == 'webm': - return '.webm' - if conv in {'audio', 'mp3'}: - return '.mp3' - if conv == 'opus': - return '.opus' - return src.suffix or '' - - auto_named = True - if out_override is not None and out_override.exists() and out_override.is_dir(): - dest_dir = out_override - dest_ext = _ext_for_convert(convert, source_path) - dest_path = dest_dir / f"{base_name}{dest_ext}" - else: - dest_dir = default_dir - dest_ext = _ext_for_convert(convert, source_path) - if out_override and not out_override.exists() and not str(out_override).endswith(('/', '\\')): - dest_path = out_override - auto_named = False - else: - dest_path = (dest_dir / f"{base_name}{dest_ext}") - - if source_size is None: - try: - source_size = source_path.stat().st_size - except OSError: - source_size = None - if source_size is None: - log("Unable to determine source size for sizing logic; proceeding without Size targeting") - target_bytes = None - else: - target_bytes = _compute_target_bytes(size_spec, int(source_size)) - if target_bytes and (source_size or 0): - try: - from ..downlow import _fmt_bytes as _fmt_bytes_helper - except ImportError: - try: - from downlow import _fmt_bytes as _fmt_bytes_helper # type: ignore - except ImportError: - _fmt_bytes_helper = lambda x: f"{x} bytes" # type: ignore - except Exception: - _fmt_bytes_helper = lambda x: f"{x} bytes" # type: ignore - ctx.emit(f"Resizing target: {_fmt_bytes_helper(source_size)} -> {_fmt_bytes_helper(target_bytes)}") - - cleanup_source = locals().get('cleanup_source', False) - if convert == 'copy' and (not target_bytes or target_bytes >= (source_size or 0)): - # Simple copy without FFmpeg processing - # Only skip this if we need to write metadata (then FFmpeg handles it) - if not (export_metadata or (tags_payload and tags_payload.get('metadata'))): - try: - dest_path.parent.mkdir(parents=True, exist_ok=True) - final_dest = _unique_path(dest_path) - _shutil.copy2(source_path, final_dest) - ctx.emit(f"Exported to {final_dest}") - log(f"Exported: {final_dest}", file=sys.stderr) - if cleanup_source: - try: - if source_path.exists() and source_path != final_dest: - source_path.unlink() - except OSError: - pass - - return 0 - except Exception as exc: - log(f"Copy failed: {exc}") - return 1 - else: - # Metadata exists, so we need to go through FFmpeg to embed and write sidecar - # Fall through to FFmpeg section below - pass - - convert_effective = convert - if convert == 'copy' and target_bytes and (source_size or 0) > target_bytes: - if input_kind == 'video': - convert_effective = 'mp4' - elif input_kind == 'audio': - convert_effective = 'copy' - else: - convert_effective = convert - - ffmpeg_path = _shutil.which('ffmpeg') - if not ffmpeg_path: - log("ffmpeg executable not found in PATH") - return 1 - - # Extract metadata from tags to embed in file - file_metadata = _extract_metadata_from_tags(tags_payload, file_hash or '', input_kind) - if file_metadata: - metadata_msg = ', '.join(f'{k}={v}' for k, v in file_metadata.items()) - ctx.emit(f"[metadata] Embedding: {metadata_msg}") - ctx.print_if_visible(f"[get-file] Embedding metadata: {metadata_msg}", file=sys.stderr) - else: - ctx.print_if_visible(f"[get-file] No metadata tags found to embed", file=sys.stderr) - - cmd: list[str] = [ffmpeg_path, '-y', '-i', str(source_path)] - - # Add metadata flags to FFmpeg command - for key, value in file_metadata.items(): - cmd.extend(['-metadata', f'{key}={value}']) - - conv = convert_effective - if conv in {'mp4', 'webm', 'copy'}: - video_bitrate: Optional[int] = None - audio_bitrate: int = 128_000 - if target_bytes and duration_sec and duration_sec > 0: - total_bps = max(1, int((target_bytes * 8) / duration_sec)) - if total_bps <= audio_bitrate + 50_000: - if input_kind == 'video': - video_bitrate = max(50_000, total_bps - audio_bitrate) - else: - video_bitrate = None - else: - video_bitrate = total_bps - audio_bitrate - if conv == 'webm': - cmd += ['-c:v', 'libvpx-vp9'] - if video_bitrate: - cmd += ['-b:v', str(video_bitrate)] - else: - cmd += ['-b:v', '0', '-crf', '32'] - cmd += ['-c:a', 'libopus', '-b:a', '160k'] - elif conv == 'mp4' or (conv == 'copy' and input_kind == 'video'): - cmd += ['-c:v', 'libx265', '-preset', 'medium', '-tag:v', 'hvc1', '-pix_fmt', 'yuv420p'] - if video_bitrate: - cmd += ['-b:v', str(video_bitrate)] - else: - cmd += ['-crf', '26'] - cmd += ['-c:a', 'aac', '-b:a', '192k'] - if conv == 'mp4' or (conv == 'copy' and input_kind == 'video'): - cmd += ['-movflags', '+faststart'] - if convert_spec and conv != 'copy': - ctx.emit(f"Converting video -> {conv} (duration={duration_sec or 'unknown'}s)") - else: - if target_bytes and duration_sec and duration_sec > 0: - total_bps = max(1, int((target_bytes * 8) / duration_sec)) - abr = max(32_000, min(320_000, total_bps)) - else: - abr = 192_000 - if conv in {'audio', 'mp3'}: - cmd += ['-vn', '-c:a', 'libmp3lame', '-b:a', str(abr)] - elif conv == 'opus': - cmd += ['-vn', '-c:a', 'libopus', '-b:a', str(abr)] - else: - ext = (source_path.suffix.lower() if source_path else '') - if ext in {'.mp3'}: - cmd += ['-vn', '-c:a', 'libmp3lame', '-b:a', str(abr)] - elif ext in {'.opus', '.ogg'}: - cmd += ['-vn', '-c:a', 'libopus', '-b:a', str(abr)] - elif ext in {'.m4a', '.aac'}: - cmd += ['-vn', '-c:a', 'aac', '-b:a', str(abr)] - else: - cmd += ['-vn', '-c:a', 'libmp3lame', '-b:a', str(abr)] - if convert_spec and conv != 'copy': - ctx.emit(f"Converting audio -> {conv}") - - if conv in {'audio','mp3'}: - desired_ext = '.mp3' - elif conv == 'opus': - desired_ext = '.opus' - elif conv == 'webm': - desired_ext = '.webm' - elif conv == 'mp4': - desired_ext = '.mp4' - else: - desired_ext = source_path.suffix - if (not dest_path.suffix) or auto_named or (dest_path.suffix.lower() in {'.hydrus', '.bin'}): - dest_path = dest_path.with_suffix(desired_ext) - - suffix_parts: list[str] = [] - def _size_label(raw: Optional[str], tb: Optional[int]) -> Optional[str]: - if not raw: - return None - text = str(raw).strip() - if text.endswith('%'): - return text - if not tb: - return None - mb = int(round(tb / (1024*1024))) - return f"{mb}Mb" - label = _size_label(size_spec, locals().get('target_bytes')) - if label: - suffix_parts.append(label) - if convert_spec and convert.lower() != 'copy': - label_map = {'mp4':'MP4','webm':'WEBM','audio':'AUDIO','mp3':'MP3','opus':'OPUS'} - suffix_parts.append(label_map.get(convert.lower(), convert.upper())) - if suffix_parts and auto_named: - _aug = f"{base_name} (" + ",".join(suffix_parts) + ")" - dest_path = dest_path.with_name(_aug + dest_path.suffix) - - try: - dest_path.parent.mkdir(parents=True, exist_ok=True) - final_dest = _unique_path(dest_path) - cmd.append(str(final_dest)) - completed = _subprocess.run(cmd, stdout=_subprocess.PIPE, stderr=_subprocess.PIPE, text=True) - if completed.returncode != 0: - stderr = (completed.stderr or '').strip() - log(f"ffmpeg failed ({completed.returncode}): {stderr}") - return 1 - ctx.emit(f"Exported to {final_dest}") - log(f"Exported: {final_dest}", file=sys.stderr) + allowed_chars.append(' ') - # Always write the .tags sidecar with metadata (hash, tags, URLs) - # This ensures metadata is preserved even if FFmpeg embedding didn't work - try: - metadata_lines = [] - - # Add hash - if file_hash: - metadata_lines.append(f"hash:{file_hash}") - - # Extract tags from metadata payload using correct structure - tags_set = set() - if 'metadata' in tags_payload and tags_payload['metadata']: - entry = tags_payload['metadata'][0] - if 'tags' in entry and isinstance(entry['tags'], dict): - for _service_key, service_data in entry['tags'].items(): - if isinstance(service_data, dict): - display_tags = service_data.get('display_tags', {}) - if isinstance(display_tags, dict): - current_tags = display_tags.get('0', []) - if isinstance(current_tags, list): - tags_set.update(current_tags) - - # Add tags (sorted, no prefix) - for tag in sorted(tags_set): - metadata_lines.append(tag) - - # Extract and add URLs - if 'metadata' in urls_payload and urls_payload['metadata']: - entry = urls_payload['metadata'][0] - if 'known_urls' in entry and isinstance(entry['known_urls'], list): - for url in entry['known_urls']: - metadata_lines.append(f"known_url:{url}") - - # Write sidecar if we have any metadata - if metadata_lines: - sidecar_path = final_dest.parent / f"{final_dest.name}.tags" - sidecar_path.write_text('\n'.join(metadata_lines), encoding='utf-8') - ctx.emit(f"Sidecar: {sidecar_path.name}") - log(f"Tags file: {sidecar_path}", file=sys.stderr) - except Exception as exc: - log(f"Warning: Could not write metadata sidecar: {exc}", file=sys.stderr) + # Collapse multiple spaces + sanitized = ' '.join(''.join(allowed_chars).split()) + return sanitized or "export" + + def _unique_path(self, path: Path) -> Path: + """Generate unique path by adding (1), (2), etc. if file exists.""" + if not path.exists(): + return path - if cleanup_source: - try: - if source_path.exists() and source_path != final_dest: - source_path.unlink() - except OSError: - pass - return 0 - except Exception as exc: - log(f"Export failed: {exc}") - return 1 - - -def _unique_path(p: Path) -> Path: - if not p.exists(): - return p - stem = p.stem - suffix = p.suffix - parent = p.parent - for i in range(1, 1000): - candidate = parent / f"{stem} ({i}){suffix}" - if not candidate.exists(): - return candidate - return p - - -def _handle_ytdlp_download(url: str, title: str, config: Dict[str, Any], args: Sequence[str]) -> int: - """Handle download/streaming of URL using yt-dlp.""" - if not url: - log("Error: No URL provided", file=sys.stderr) - return 1 + stem = path.stem + suffix = path.suffix + parent = path.parent - # Check for -storage local - args_list = list(map(str, args)) - storage_mode = None - if '-storage' in args_list: - try: - idx = args_list.index('-storage') - if idx + 1 < len(args_list): - storage_mode = args_list[idx + 1].lower() - except ValueError: - pass - - force_local = (storage_mode == 'local') - - if not force_local: - # Default: Stream to MPV - if _play_in_mpv(url, title, is_stream=True): - try: - from cmdnats import pipe as mpv_pipe - mpv_pipe._run(None, [], config) - except Exception: - pass - return 0 - else: - # Fallback to browser - try: - import webbrowser - webbrowser.open(url) - debug(f"[get-file] Opened in browser: {title}", file=sys.stderr) - return 0 - except Exception: - pass - return 1 - - # Download mode - try: - import yt_dlp - except ImportError: - log("Error: yt-dlp not installed. Please install it to download.", file=sys.stderr) - return 1 - - log(f"Downloading {title}...", file=sys.stderr) - - # Determine output directory - download_dir = resolve_output_dir(config) - try: - download_dir.mkdir(parents=True, exist_ok=True) - except Exception: - pass - - # Configure yt-dlp - ydl_opts = { - 'outtmpl': str(download_dir / '%(title)s.%(ext)s'), - 'quiet': False, - 'no_warnings': True, - # Use best audio/video - 'format': 'best', - } - - try: - with yt_dlp.YoutubeDL(ydl_opts) as ydl: - ydl.download([url]) - log(f"Downloaded to: {download_dir}", file=sys.stderr) - return 0 - except Exception as e: - log(f"Error downloading: {e}", file=sys.stderr) - return 1 + counter = 1 + while True: + new_path = parent / f"{stem} ({counter}){suffix}" + if not new_path.exists(): + return new_path + counter += 1 -CMDLET = Cmdlet( - name="get-file", - summary="Export files: from Hydrus database OR from AllDebrid magnets via pipe. Auto-detects source and handles accordingly.", - usage="get-file [-Path ] [Size <50%|34MB>] [Convert ] [-metadata] [-file ]", - args=[ - CmdletArg("Path", description="Output directory for files."), - CmdletArg("Size", description="Target size (Hydrus only): 50% or 34MB."), - CmdletArg("Convert", description="Convert format (Hydrus only): mp4, webm, audio, mp3, opus."), - CmdletArg("metadata", type="flag", description="Export metadata to .tags file (Hydrus only)."), - CmdletArg("file", description="Filter files by pattern (AllDebrid only)."), - ], - details=[ - "Hydrus mode: exports media with optional size/format conversion", - "AllDebrid mode: downloads files from piped magnet IDs from search-debrid", - "Auto-detects pipe format and routes to correct handler", - "Magnet pipe format: ID|filename|size|statusCode|status|progress|...", - ], - -) \ No newline at end of file +# Instantiate and register cmdlet +Add_File_Instance = Get_File() diff --git a/cmdlets/get_file.py.backup b/cmdlets/get_file.py.backup new file mode 100644 index 0000000..f2c8cfe --- /dev/null +++ b/cmdlets/get_file.py.backup @@ -0,0 +1,1708 @@ +from __future__ import annotations + +from typing import Any, Callable, Dict, List, Optional, Sequence +from pathlib import Path +import shutil as _shutil +import subprocess as _subprocess +import json +import sys +import platform + +import threading + +from helper.logger import log, debug +import uuid as _uuid +import time as _time + +from helper.progress import print_progress, print_final_progress +from helper.http_client import HTTPClient +from helper.mpv_ipc import get_ipc_pipe_path, send_to_mpv, MPV_LUA_SCRIPT_PATH +import fnmatch as _fnmatch + +from . import register +import models +import pipeline as ctx +from helper import hydrus as hydrus_wrapper +from ._shared import Cmdlet, CmdletArg, normalize_hash, looks_like_hash, create_pipe_object_result, get_origin, get_field, should_show_help +from config import resolve_output_dir, get_hydrus_url, get_hydrus_access_key +from helper.alldebrid import AllDebridClient + +DEFAULT_DEBRID_WAIT_TIMEOUT = 600 +DEBRID_WORKER_PREFIX = "debrid_" + + + + + +def _is_alldebrid_pipe_data(line: str) -> bool: + """Check if line is AllDebrid pipe format: ID|filename|size|...""" + parts = line.strip().split('|') + if len(parts) < 5: + return False + try: + # Check if first part is magnet ID (integer) + magnet_id = int(parts[0]) + # Check if 3rd part (size) is integer + size = int(parts[2]) + # Check if 4th part (status_code) is integer + status_code = int(parts[3]) + return magnet_id > 0 and size >= 0 and status_code in {0, 1, 2, 3, 4} + except (ValueError, IndexError): + return False + + +def _handle_alldebrid_pipe(config: Dict[str, Any], args: Sequence[str]) -> int: + """Handle AllDebrid magnet downloads from piped stdin.""" + # Parse arguments + out_path = None + file_filter = None + i = 0 + while i < len(args): + if args[i].lower() in {"-path", "--path", "path"} and i + 1 < len(args): + out_path = Path(args[i + 1]).expanduser() + i += 2 + elif args[i].lower() in {"-file", "--file", "file"} and i + 1 < len(args): + file_filter = args[i + 1] + i += 2 + else: + i += 1 + + if not out_path: + log("✗ -path required for AllDebrid downloads", file=sys.stderr) + return 1 + + # Read magnet IDs from stdin + magnets = [] + try: + for line in sys.stdin: + line = line.strip() + if line and _is_alldebrid_pipe_data(line): + parts = line.split('|') + magnet_id = int(parts[0]) + magnets.append(magnet_id) + except Exception as e: + log(f"✗ Error reading stdin: {e}", file=sys.stderr) + return 1 + + if not magnets: + log("✗ No valid magnet IDs in pipe", file=sys.stderr) + return 1 + + return _queue_alldebrid_worker( + config=config, + output_dir=out_path, + magnet_ids=magnets, + title=f"AllDebrid pipe ({len(magnets)} magnet{'s' if len(magnets) != 1 else ''})", + file_filter=file_filter, + ) + + +def _extract_files_from_magnet(magnet_info: Dict[str, Any], filter_pattern: Optional[str] = None) -> list: + """Extract files from magnet file tree, optionally filtering by pattern.""" + files = [] + + def traverse(items: Any, prefix: str = "") -> None: + if not isinstance(items, list): + return + for item in items: + if not isinstance(item, dict): + continue + name = item.get('n', '') + link = item.get('l', '') + size = item.get('s', 0) + entries = item.get('e', []) + + # File + if link: + full_path = f"{prefix}/{name}" if prefix else name + if filter_pattern is None or _fnmatch.fnmatch(name.lower(), filter_pattern.lower()): + files.append({'name': name, 'path': full_path, 'size': size, 'link': link}) + + # Folder + if entries: + full_path = f"{prefix}/{name}" if prefix else name + traverse(entries, full_path) + + items = magnet_info.get('files', []) + traverse(items) + return files + + +def _download_file_from_alldebrid(url: str, output_path: Path, filename: str, file_size: int) -> bool: + """Download a single file from AllDebrid with progress bar.""" + output_path.parent.mkdir(parents=True, exist_ok=True) + + try: + downloaded = 0 + chunk_size = 1024 * 1024 + start_time = _time.time() + last_update = start_time + + with HTTPClient(timeout=30.0, headers={'User-Agent': 'downlow/1.0'}) as client: + response = client.get(url) + response.raise_for_status() + with open(output_path, 'wb', buffering=1024*1024) as f: + for chunk in response.iter_bytes(chunk_size): + if not chunk: + break + f.write(chunk) + downloaded += len(chunk) + + # Update progress every 0.5 seconds to avoid spam + now = _time.time() + if now - last_update >= 0.5 or downloaded == file_size: + elapsed = now - start_time + speed = downloaded / elapsed if elapsed > 0 else 0 + print_progress(filename, downloaded, file_size, speed) + last_update = now + + # Print final progress line + elapsed = _time.time() - start_time + print_final_progress(filename, file_size, elapsed) + log(f"✓ {filename} downloaded", file=sys.stderr) + + return True + except Exception as e: + log(f"\n[get-file] ✗ Download error: {e}", file=sys.stderr) + return False + + +def _queue_alldebrid_worker( + config: Dict[str, Any], + output_dir: Path, + magnet_ids: Sequence[int], + title: str, + file_filter: Optional[str] = None, + wait_timeout: int = DEFAULT_DEBRID_WAIT_TIMEOUT, +): + """Spawn a background worker to download AllDebrid magnets.""" + from config import get_debrid_api_key + + if not magnet_ids: + log("✗ No magnet IDs provided for AllDebrid download", file=sys.stderr) + return 1 + + api_key = get_debrid_api_key(config) + if not api_key: + log("✗ AllDebrid API key not configured", file=sys.stderr) + return 1 + + worker_id = f"{DEBRID_WORKER_PREFIX}{_uuid.uuid4().hex[:8]}" + worker_manager = config.get('_worker_manager') + if worker_manager: + try: + worker_manager.track_worker( + worker_id, + worker_type="download_debrid", + title=title, + description=f"AllDebrid download for {title}", + pipe=ctx.get_current_command_text(), + ) + except Exception as exc: + debug(f"⚠ Failed to register AllDebrid worker: {exc}") + worker_manager = None + + thread = threading.Thread( + target=_run_alldebrid_download_worker, + args=( + worker_id, + api_key, + output_dir, + list(magnet_ids), + file_filter, + title, + worker_manager, + wait_timeout, + ), + daemon=False, + name=f"AllDebridWorker_{worker_id}" + ) + thread.start() + + ctx.emit({ + 'worker_id': worker_id, + 'worker_type': 'download_debrid', + 'status': 'running', + 'message': f"{title} (queued)", + }) + + log(f"🌀 AllDebrid download queued (worker {worker_id})", file=sys.stderr) + return 0 + + +def _run_alldebrid_download_worker( + worker_id: str, + api_key: str, + output_dir: Path, + magnet_ids: List[int], + file_filter: Optional[str], + title: str, + worker_manager: Optional[Any], + wait_timeout: int, +): + """Worker entrypoint that polls AllDebrid and downloads magnet files.""" + def log_progress(message: str) -> None: + safe = f"[Worker {worker_id}] {message}" + debug(safe) + if worker_manager: + try: + worker_manager.log_step(worker_id, message) + except Exception: + pass + + try: + client = AllDebridClient(api_key) + except Exception as exc: + log_progress(f"✗ Failed to initialize AllDebrid client: {exc}") + if worker_manager: + try: + worker_manager.finish_worker(worker_id, "failed", str(exc)) + except Exception: + pass + return + + output_dir.mkdir(parents=True, exist_ok=True) + total_downloaded = 0 + total_failed = 0 + + for magnet_id in magnet_ids: + log_progress(f"⧗ Processing magnet {magnet_id}") + try: + status_info = client.magnet_status(magnet_id) + except Exception as exc: + log_progress(f"✗ Failed to query magnet {magnet_id}: {exc}") + total_failed += 1 + continue + + try: + ready_status = _wait_for_magnet_ready(client, magnet_id, log_progress, wait_timeout) + except Exception as exc: + log_progress(f"✗ Magnet {magnet_id} did not become ready: {exc}") + total_failed += 1 + continue + + try: + magnet_info = client.magnet_status(magnet_id, include_files=True) + except Exception as exc: + log_progress(f"✗ Failed to list files for magnet {magnet_id}: {exc}") + total_failed += 1 + continue + + files_list = _extract_files_from_magnet(magnet_info, file_filter) + if not files_list: + log_progress(f"⊘ Magnet {magnet_id} has no files") + total_failed += 1 + continue + + for file_info in files_list: + name = file_info.get('name', 'unknown') + log_progress(f"⇓ Downloading {name}") + link = file_info.get('link') + if not link: + log_progress(f"✗ Missing link for {name}") + total_failed += 1 + continue + + try: + direct_url = client.unlock_link(link) + except Exception as exc: + log_progress(f"✗ Failed to unlock {name}: {exc}") + total_failed += 1 + continue + + output_file = output_dir / name + if _download_file_from_alldebrid(direct_url, output_file, name, file_info.get('size', 0)): + total_downloaded += 1 + else: + total_failed += 1 + + if total_downloaded or total_failed: + summary = f"{total_downloaded} file(s) downloaded, {total_failed} failed" + else: + summary = "No files were processed" + + log(f"✓ AllDebrid worker {worker_id}: {summary}", file=sys.stderr) + if worker_manager: + status = "success" if total_downloaded > 0 else "failed" + try: + worker_manager.finish_worker(worker_id, status, summary if status == "failed" else "") + except Exception: + pass + + +def _wait_for_magnet_ready( + client: AllDebridClient, + magnet_id: int, + log_progress: Callable[[str], None], + wait_timeout: int, +) -> Dict[str, Any]: + elapsed = 0 + last_report = -5 + while elapsed < wait_timeout: + try: + status = client.magnet_status(magnet_id) + except Exception as exc: + log_progress(f"⚠ Live status check failed: {exc}") + _time.sleep(2) + elapsed += 2 + continue + + status_code = int(status.get('statusCode', -1)) + if status_code == 4: + return status + if status_code >= 5: + raise RuntimeError(status.get('status', f"Failed code {status_code}")) + if elapsed - last_report >= 5: + downloaded = status.get('downloaded', 0) + size = status.get('size', 0) + percent = (downloaded / size * 100) if size else 0 + log_progress(f"⧗ {status.get('status', 'processing')} — {percent:.1f}%") + last_report = elapsed + _time.sleep(2) + elapsed += 2 + raise TimeoutError(f"Magnet {magnet_id} not ready after {wait_timeout}s") + + +def _is_playable_in_mpv(file_path_or_ext: str, mime_type: Optional[str] = None) -> bool: + """Check if file can be played in MPV based on extension or mime type.""" + from helper.utils_constant import mime_maps + + # Check mime type first if provided + if mime_type: + mime_lower = mime_type.lower() + # Simple prefix check for common media types + if any(mime_lower.startswith(prefix) for prefix in ['video/', 'audio/', 'image/']): + return True + + # Extract extension + if file_path_or_ext.startswith('.'): + ext = file_path_or_ext.lower() + else: + ext = Path(file_path_or_ext).suffix.lower() + + if not ext: + return False + + # Check if extension is in playable categories + playable_categories = ['video', 'audio', 'image', 'image_sequence'] + + for category in playable_categories: + if category in mime_maps: + for key, info in mime_maps[category].items(): + if info.get('ext', '').lower() == ext: + return True + return False + + +def _play_in_mpv(file_url: str, file_title: str, is_stream: bool = False, headers: Optional[Dict[str, str]] = None) -> bool: + """Play file in MPV using centralized IPC pipe, creating new instance if needed. + + Returns True on success, False on error. + """ + try: + # First try to send to existing MPV instance + if send_to_mpv(file_url, file_title, headers): + debug(f"Added to MPV: {file_title}") + return True + + # No existing MPV or pipe unavailable - start new instance + ipc_pipe = get_ipc_pipe_path() + debug(f"[get-file] Starting new MPV instance (pipe: {ipc_pipe})", file=sys.stderr) + + # Build command - start MPV without a file initially, just with IPC server and our Lua helper + cmd = ['mpv', f'--input-ipc-server={ipc_pipe}'] + try: + if MPV_LUA_SCRIPT_PATH and Path(MPV_LUA_SCRIPT_PATH).exists(): + cmd.append(f"--scripts-append={MPV_LUA_SCRIPT_PATH}") + except Exception: + pass + + if headers: + # Format headers for command line + # --http-header-fields="Header1: Val1,Header2: Val2" + header_str = ",".join([f"{k}: {v}" for k, v in headers.items()]) + cmd.append(f'--http-header-fields={header_str}') + + # Add --idle flag so MPV stays running and waits for playlist commands + cmd.append('--idle') + + # Detach process to prevent freezing parent CLI + kwargs = {} + if platform.system() == 'Windows': + kwargs['creationflags'] = 0x00000008 # DETACHED_PROCESS + + _subprocess.Popen(cmd, stdin=_subprocess.DEVNULL, stdout=_subprocess.DEVNULL, stderr=_subprocess.DEVNULL, **kwargs) + + debug(f"[get-file] Started MPV instance (IPC: {ipc_pipe})", file=sys.stderr) + + # Give MPV time to start and open IPC pipe + # Windows needs more time than Unix + wait_time = 1.0 if platform.system() == 'Windows' else 0.5 + debug(f"[get-file] Waiting {wait_time}s for MPV to initialize IPC...", file=sys.stderr) + _time.sleep(wait_time) + + # Try up to 3 times to send the file via IPC + for attempt in range(3): + debug(f"[get-file] Sending file via IPC (attempt {attempt + 1}/3)", file=sys.stderr) + if send_to_mpv(file_url, file_title, headers): + debug(f"{'Streaming' if is_stream else 'Playing'} in MPV: {file_title}") + debug(f"[get-file] Added to new MPV instance (IPC: {ipc_pipe})", file=sys.stderr) + return True + + if attempt < 2: + # Wait before retrying + _time.sleep(0.3) + + # IPC send failed after all retries + log("Error: Could not send file to MPV via IPC after startup", file=sys.stderr) + return False + + except FileNotFoundError: + log("Error: MPV not found. Install mpv to play media files", file=sys.stderr) + return False + except Exception as e: + log(f"Error launching MPV: {e}", file=sys.stderr) + return False + + +# Backward-compatible alias for modules expecting the old IPC helper name. +def _get_fixed_ipc_pipe() -> str: + """Return the shared MPV IPC pipe path (compat shim).""" + return get_ipc_pipe_path() + + +def _handle_search_result(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Handle a file from search-file results using FileStorage backend.""" + try: + from helper.store import FileStorage + + # Helper to get field from both dict and object + # Extract file information from ResultItem + storage_name = get_origin(result) + file_hash = get_field(result, 'hash_hex', None) + # Also check for file_hash field (from add-file and other cmdlets) + if not file_hash: + file_hash = get_field(result, 'file_hash', None) + file_title = get_field(result, 'title', 'file') + mime_type = get_field(result, 'mime', None) + file_path = get_field(result, 'target', None) + # Also check for 'file_path' field (from add-file and other cmdlets) + if not file_path: + file_path = get_field(result, 'file_path', None) + # Also check for 'path' field (from search-file and other cmdlets) + if not file_path: + file_path = get_field(result, 'path', None) + + full_metadata = get_field(result, 'full_metadata', {}) + magnet_id = full_metadata.get('magnet_id') if isinstance(full_metadata, dict) else None + + if not storage_name: + log("Error: No storage backend specified in result", file=sys.stderr) + return 1 + + debug(f"[get-file] Retrieving file from storage: {storage_name}", file=sys.stderr) + + # Handle different storage backends + if storage_name.lower() == 'hydrus': + return _handle_hydrus_file(file_hash, file_title, config, args, mime_type=mime_type) + elif storage_name.lower() == 'local': + return _handle_local_file(file_path, file_title, config, args, file_hash=file_hash) + elif storage_name.lower() == 'download': + # Downloads are local files + return _handle_local_file(file_path, file_title, config, args, file_hash=file_hash) + elif storage_name.lower() == 'debrid': + # Extract magnet_id from result (search-file stores it in full_metadata or as custom attribute) + if not magnet_id: + magnet_id = get_field(result, 'magnet_id', None) + if not magnet_id: + log("Error: No magnet ID in debrid result", file=sys.stderr) + return 1 + return _handle_debrid_file(magnet_id, file_title, config, args) + elif storage_name.lower() in {'bandcamp', 'youtube'}: + # Handle Bandcamp/YouTube via yt-dlp + url = get_field(result, 'target', None) + if not url: + # Try to find URL in other fields + url = get_field(result, 'url', None) + + if not url: + log(f"Error: No URL found for {storage_name} result", file=sys.stderr) + return 1 + + return _handle_ytdlp_download(url, file_title, config, args) + else: + log(f"Unknown storage backend: {storage_name}", file=sys.stderr) + return 1 + + except Exception as e: + log(f"Error processing search result: {e}", file=sys.stderr) + import traceback + traceback.print_exc(file=sys.stderr) + return 1 + + +def _handle_hydrus_file(file_hash: Optional[str], file_title: str, config: Dict[str, Any], args: Sequence[str], mime_type: Optional[str] = None) -> int: + """Handle file from Hydrus - auto-play in MPV if media file, otherwise open web URL.""" + if not file_hash: + log("Error: No file hash provided", file=sys.stderr) + return 1 + + try: + hydrus_url = get_hydrus_url(config) + access_key = get_hydrus_access_key(config) + + if not hydrus_url or not access_key: + log("Error: Hydrus not configured", file=sys.stderr) + return 1 + + # Check if it's a playable media file based on filename or mime type + is_media = _is_playable_in_mpv(file_title) + if not is_media and mime_type: + # Check mime type if filename check failed + if any(m in mime_type.lower() for m in ['video/', 'audio/', 'image/']): + is_media = True + + force_mpv = any(str(a).lower() in {'-mpv', '--mpv', 'mpv'} for a in args) + force_browser = any(str(a).lower() in {'-web', '--web', 'web', '-browser', '--browser'} for a in args) + + # Check MPV availability + from hydrus_health_check import check_mpv_availability + mpv_available, _ = check_mpv_availability() + + # Construct url for streaming/viewing + # For streaming, we use headers for auth, so we don't put the key in the URL + stream_url = f"{hydrus_url}/get_files/file?hash={file_hash}" + # For browser, we still need the key in the URL + web_url = f"{hydrus_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}" + + headers = { + "Hydrus-Client-API-Access-Key": access_key + } + + if force_browser: + # User explicitly wants browser + ipc_pipe = get_ipc_pipe_path() + result_dict = create_pipe_object_result( + source='hydrus', + identifier=file_hash, + file_path=web_url, + cmdlet_name='get-file', + title=file_title, + file_hash=file_hash, + extra={ + 'ipc': ipc_pipe, + 'action_type': 'browser', + 'web_url': web_url, + 'hydrus_url': hydrus_url, + 'access_key': access_key + } + ) + ctx.emit(result_dict) + try: + import webbrowser + webbrowser.open(web_url) + debug(f"[get-file] Opened in browser: {file_title}", file=sys.stderr) + except Exception: + pass + return 0 + elif force_mpv or (is_media and mpv_available): + # Auto-play in MPV for media files (if available), or user requested it + if _play_in_mpv(stream_url, file_title, is_stream=True, headers=headers): + # Show unified MPV playlist view (reuse cmdnats.pipe display) + try: + from cmdnats import pipe as mpv_pipe + mpv_pipe._run(None, [], config) + except Exception: + pass + return 0 + else: + # Fall back to browser + try: + import webbrowser + webbrowser.open(web_url) + debug(f"[get-file] Opened in browser instead", file=sys.stderr) + except Exception: + pass + return 0 + else: + # Not media, open in browser + ipc_pipe = get_ipc_pipe_path() + result_dict = create_pipe_object_result( + source='hydrus', + identifier=file_hash, + file_path=web_url, + cmdlet_name='get-file', + title=file_title, + file_hash=file_hash, + extra={ + 'ipc': ipc_pipe, + 'action_type': 'browser', + 'web_url': web_url, + 'hydrus_url': hydrus_url, + 'access_key': access_key + } + ) + ctx.emit(result_dict) + try: + import webbrowser + webbrowser.open(web_url) + debug(f"[get-file] Opened in browser: {file_title}", file=sys.stderr) + except Exception: + pass + return 0 + + except Exception as e: + log(f"Error handling Hydrus file: {e}", file=sys.stderr) + import traceback + traceback.print_exc(file=sys.stderr) + return 1 + + +def _handle_local_file(file_path: Optional[str], file_title: str, config: Dict[str, Any], args: Sequence[str], file_hash: Optional[str] = None) -> int: + """Handle file from local storage - auto-play in MPV if media, otherwise open with default app.""" + if not file_path: + log("Error: No file path provided", file=sys.stderr) + return 1 + + try: + source = Path(file_path) + if not source.exists(): + # Try to resolve by hash if the path looks like a hash + resolved_local = False + if looks_like_hash(str(file_path)): + try: + from config import get_local_storage_path + from helper.folder_store import FolderDB + storage_path = get_local_storage_path(config) + if storage_path: + with FolderDB(storage_path) as db: + resolved_path = db.search_hash(str(file_path)) + if resolved_path and resolved_path.exists(): + source = resolved_path + file_path = str(resolved_path) + resolved_local = True + # Also set file_hash since we know it + file_hash = str(file_path) + except Exception: + pass + + if not resolved_local: + log(f"Error: File not found: {file_path}", file=sys.stderr) + return 1 + + # Check for explicit user flags + force_mpv = any(str(a).lower() in {'-mpv', '--mpv', 'mpv'} for a in args) + force_default = any(str(a).lower() in {'-open', '--open', 'open'} for a in args) + + # Check if it's a playable media file + is_media = _is_playable_in_mpv(str(source)) + + # Check MPV availability + from hydrus_health_check import check_mpv_availability + mpv_available, _ = check_mpv_availability() + + if force_default: + # User explicitly wants default application + import subprocess as sp + import platform + import os + try: + if platform.system() == 'Darwin': # macOS + sp.run(['open', file_path]) + elif platform.system() == 'Windows': + os.startfile(file_path) + else: # Linux + sp.run(['xdg-open', file_path]) + ctx.emit(f"Opened: {file_title}") + debug(f"[get-file] Opened {file_title} with default app", file=sys.stderr) + return 0 + except Exception as e: + log(f"Error opening file: {e}", file=sys.stderr) + return 1 + elif force_mpv or (is_media and mpv_available): + # Auto-play in MPV for media files (if available), or user requested it + if _play_in_mpv(file_path, file_title, is_stream=False): + # Show unified MPV playlist view (reuse cmdnats.pipe display) + try: + from cmdnats import pipe as mpv_pipe + mpv_pipe._run(None, [], config) + except Exception: + pass + return 0 + else: + # Fall back to default application + try: + import os + import platform + if platform.system() == 'Darwin': # macOS + _subprocess.run(['open', file_path]) + elif platform.system() == 'Windows': + os.startfile(file_path) + else: # Linux + _subprocess.run(['xdg-open', file_path]) + debug(f"[get-file] Opened with default app instead", file=sys.stderr) + except Exception: + pass + return 0 + else: + # Not media - open with default application + import subprocess as sp + import platform + import os + try: + if platform.system() == 'Darwin': # macOS + sp.run(['open', file_path]) + elif platform.system() == 'Windows': + # Use os.startfile for more reliable Windows handling + os.startfile(file_path) + else: # Linux + sp.run(['xdg-open', file_path]) + print(f"Opened: {file_title}") + debug(f"[get-file] Opened {file_title} with default app", file=sys.stderr) + + # Emit result for downstream processing + result_dict = create_pipe_object_result( + source='local', + identifier=str(Path(file_path).stem) if file_path else 'unknown', + file_path=file_path, + cmdlet_name='get-file', + title=file_title, + file_hash=file_hash, + extra={'action_type': 'opened'} + ) + ctx.emit(result_dict) + return 0 + except Exception as e: + log(f"Error opening file with default app: {e}", file=sys.stderr) + return 1 + + except Exception as e: + log(f"Error handling local file: {e}", file=sys.stderr) + return 1 + + +def _handle_debrid_file(magnet_id: int, magnet_title: str, config: Dict[str, Any], args: Sequence[str]) -> int: + """Handle magnet file from AllDebrid storage - download to local path.""" + # Parse output path argument + out_path = None + i = 0 + args_list = [str(a) for a in args] + while i < len(args_list): + if args_list[i].lower() in {"-path", "--path", "path"} and i + 1 < len(args_list): + out_path = Path(args_list[i + 1]).expanduser() + i += 2 + else: + i += 1 + + if not out_path: + log("✗ -Path required for debrid downloads", file=sys.stderr) + return 1 + + # Ensure output directory exists + try: + out_path.mkdir(parents=True, exist_ok=True) + except Exception as e: + log(f"✗ Error creating output directory: {e}", file=sys.stderr) + return 1 + + return _queue_alldebrid_worker( + config=config, + output_dir=out_path, + magnet_ids=[magnet_id], + title=magnet_title or f"magnet {magnet_id}", + ) + + +@register(["get-file"]) # primary name +def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + # Help: if any help token is present, print CMDLET JSON and exit + if should_show_help(args): + log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + return 0 + + # Check if result is a list (from @N selection) and extract the first item + actual_result = result + if isinstance(result, list) and len(result) > 0: + actual_result = result[0] + + # Check if this is a FileStorage search result (has origin field indicating a backend) + # This handles both dict and ResultItem objects + origin = get_origin(actual_result) + if origin and origin.lower() in {'hydrus', 'local', 'debrid', 'alldebrid', 'bandcamp', 'youtube'}: + # This is a search result with explicit origin - handle it via _handle_search_result + return _handle_search_result(actual_result, args, config) + + # Handle ResultItem from search-file via @N selection + # The result can be either: + # 1. A single ResultItem (direct call) + # 2. A list of ResultItems (from @N selection in CLI) + result_item = None + if result and hasattr(result, '__class__'): + if result.__class__.__name__ == 'ResultItem': + result_item = result + elif isinstance(result, list) and len(result) > 0: + # @N selection creates a list, extract the first item if it's a ResultItem + if hasattr(result[0], '__class__') and result[0].__class__.__name__ == 'ResultItem': + result_item = result[0] + + if result_item: + return _handle_search_result(result_item, args, config) + + # Handle PipeObject results from previous get-file call (for chaining) + if result and isinstance(result, dict) and result.get('action', '').startswith('cmdlet:get-file'): + # This is from a previous get-file result - just pass it through + # Don't treat it as a new file to play, just emit for pipeline chaining + ctx.emit(result) + return 0 + + # Check for AllDebrid pipe input (from search-debrid) + # Try to read first line from stdin to detect format + first_line = None + try: + # Try to read one line without blocking + if hasattr(sys.stdin, 'readable') and sys.stdin.readable(): + first_line = sys.stdin.readline().strip() + except Exception: + pass + + if first_line and _is_alldebrid_pipe_data(first_line): + # This is AllDebrid pipe data - handle it separately + # Put the line back by creating a chain with the rest of stdin + import io + try: + remaining_stdin = sys.stdin.read() + except: + remaining_stdin = "" + sys.stdin = io.StringIO(first_line + '\n' + remaining_stdin) + return _handle_alldebrid_pipe(config, args) + elif first_line: + # Not AllDebrid data, put it back for normal processing + import io + try: + remaining_stdin = sys.stdin.read() + except: + remaining_stdin = "" + sys.stdin = io.StringIO(first_line + '\n' + remaining_stdin) + + # Helpers + def _sanitize_name(text: str) -> str: + allowed = [] + for ch in text: + allowed.append(ch if (ch.isalnum() or ch in {"-", "_", " ", "."}) else " ") + return (" ".join("".join(allowed).split()) or "export").strip() + + def _ffprobe_duration_seconds(path: Path) -> Optional[float]: + ffprobe_path = _shutil.which('ffprobe') + if not ffprobe_path: + return None + try: + res = _subprocess.run( + [ffprobe_path, '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', str(path)], + stdout=_subprocess.PIPE, + stderr=_subprocess.PIPE, + check=True, + text=True, + ) + out = (res.stdout or '').strip() + if not out: + return None + value = float(out) + return value if value > 0 else None + except Exception: + return None + + def _parse_args(tokens: Sequence[str]) -> tuple[Optional[Path], Optional[str], Optional[str], Optional[str], bool]: + out_override: Optional[Path] = None + size_spec: Optional[str] = None + convert_spec: Optional[str] = None + hash_spec: Optional[str] = None + export_metadata: bool = False + i = 0 + while i < len(tokens): + t = tokens[i] + low = t.lower() + if low in {"-path", "--path", "path"} and i + 1 < len(tokens): + try: + out_override = Path(tokens[i + 1]).expanduser() + except Exception: + out_override = None + i += 2 + continue + if low in {"size", "-size", "--size"} and i + 1 < len(tokens): + size_spec = tokens[i + 1] + i += 2 + continue + if low in {"convert", "-convert", "--convert"} and i + 1 < len(tokens): + convert_spec = tokens[i + 1] + i += 2 + continue + if low in {"-hash", "--hash", "hash"} and i + 1 < len(tokens): + hash_spec = tokens[i + 1] + i += 2 + continue + if low in {"-metadata", "--metadata", "metadata"}: + export_metadata = True + i += 1 + continue + i += 1 + return out_override, size_spec, convert_spec, hash_spec, export_metadata + + def _compute_target_bytes(size_spec: Optional[str], source_bytes: int) -> Optional[int]: + if not size_spec: + return None + text = str(size_spec).strip().lower() + if not text: + return None + if text.endswith('%'): + try: + pct = float(text[:-1]) + except ValueError: + return None + pct = max(0.0, min(100.0, pct)) + target = int(round(source_bytes * (pct / 100.0))) + else: + val = text + if val.endswith('mb'): + val = val[:-2] + elif val.endswith('m'): + val = val[:-1] + try: + mb = float(val) + except ValueError: + return None + target = int(round(mb * 1024 * 1024)) + min_bytes = 1 * 1024 * 1024 + if target <= 0: + target = min_bytes + return min(target, source_bytes) + + def _guess_kind_from_suffix(path: Path) -> str: + sfx = path.suffix.lower() + if sfx in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}: + return 'video' + if sfx in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}: + return 'audio' + return 'other' + + def _extract_metadata_from_tags(tags_payload: Dict[str, Any], file_hash: str, input_kind: str = '') -> Dict[str, str]: + """Extract common metadata fields from Hydrus tags. + + Returns a dict mapping FFmpeg metadata keys to values. + Supports: title, artist, album, track, date, genre, etc. + + For audio files, applies sensible defaults: + - If no album, uses title as album + - If no track, defaults to 1 + - album_artist is set to artist value + """ + metadata = {} + + # Map of common tag namespaces to FFmpeg metadata keys + tag_map = { + 'title': 'title', + 'artist': 'artist', + 'album': 'album', + 'track': 'track', + 'track_number': 'track', + 'date': 'date', + 'year': 'date', + 'genre': 'genre', + 'composer': 'composer', + 'comment': 'comment', + } + + if not tags_payload or 'metadata' not in tags_payload or not tags_payload['metadata']: + return metadata + + entry = tags_payload['metadata'][0] + if 'tags' not in entry or not isinstance(entry['tags'], dict): + return metadata + + tags_dict = entry['tags'] + + # Extract metadata from tags + for _service_key, service_data in tags_dict.items(): + if not isinstance(service_data, dict): + continue + + display_tags = service_data.get('display_tags', {}) + if not isinstance(display_tags, dict): + continue + + current_tags = display_tags.get('0', []) + if not isinstance(current_tags, list): + continue + + for tag in current_tags: + tag_str = str(tag).strip() + if ':' in tag_str: + namespace, value = tag_str.split(':', 1) + namespace = namespace.lower().strip() + value = value.strip() + if namespace in tag_map and value: + ffmpeg_key = tag_map[namespace] + # Use first occurrence + if ffmpeg_key not in metadata: + metadata[ffmpeg_key] = value + + # Apply sensible defaults for audio files + if input_kind == 'audio': + # If no album, use title as album + if 'album' not in metadata and 'title' in metadata: + metadata['album'] = metadata['title'] + # If no track, default to 1 + if 'track' not in metadata: + metadata['track'] = '1' + # If no album_artist, use artist + if 'artist' in metadata: + metadata['album_artist'] = metadata['artist'] + + return metadata + + out_override, size_spec, convert_spec, hash_spec, export_metadata = _parse_args(args) + default_dir = resolve_output_dir(config) + + media_kind = (get_field(result, 'media_kind', '') or '').lower() + + _chk = [] + if out_override: + _chk.append(f"Path={out_override}") + if size_spec: + _chk.append(f"Size={size_spec}") + if convert_spec: + _chk.append(f"Convert={convert_spec}") + # Prefer explicit -hash over result hash for logging + file_hash_for_log = None + if hash_spec and looks_like_hash(hash_spec): + file_hash_for_log = normalize_hash(hash_spec) + else: + hash_value = get_field(result, 'hash_hex', None) + file_hash_for_log = normalize_hash(hash_value) if hash_value else None + if _chk or file_hash_for_log: + msg = "get-file: " + ", ".join(_chk) if _chk else "get-file" + if file_hash_for_log: + msg = f"{msg} (Hash={file_hash_for_log})" + ctx.emit(msg) + + base_name = _sanitize_name(get_field(result, 'title', None) or '') + if not base_name: + target_attr = get_field(result, 'target', None) + if isinstance(target_attr, str) and target_attr and not target_attr.startswith(('http://', 'https://')): + base_name = _sanitize_name(Path(target_attr).stem) + else: + base_name = 'export' + + # Accept multiple path-ish fields so @ selection from MPV playlist rows or ad-hoc dicts still resolve. + local_target = ( + get_field(result, 'target', None) + or get_field(result, 'path', None) + or get_field(result, 'file_path', None) + or get_field(result, 'filename', None) + ) + is_url = isinstance(local_target, str) and local_target.startswith(('http://', 'https://')) + # Establish file hash (prefer -hash override when provided and valid) + if hash_spec and looks_like_hash(hash_spec): + file_hash = normalize_hash(hash_spec) + else: + file_hash = normalize_hash(get_field(result, 'hash_hex', None)) if get_field(result, 'hash_hex', None) else None + + source_path: Optional[Path] = None + source_size: Optional[int] = None + duration_sec: Optional[float] = None + tags_payload: Dict[str, Any] = {} + url_payload: Dict[str, Any] = {} + cleanup_source: bool = False + + if isinstance(local_target, str) and not is_url and not (hash_spec and file_hash): + p = Path(local_target) + if not p.exists(): + # Check if it's a hash and try to resolve locally + resolved_local = False + if looks_like_hash(local_target): + try: + from config import get_local_storage_path + from helper.folder_store import FolderDB + storage_path = get_local_storage_path(config) + if storage_path: + with FolderDB(storage_path) as db: + resolved_path = db.search_hash(local_target) + if resolved_path and resolved_path.exists(): + p = resolved_path + resolved_local = True + # Also set file_hash since we know it + file_hash = local_target + except Exception: + pass + + if not resolved_local: + log(f"File missing: {p}") + return 1 + + source_path = p + try: + source_size = p.stat().st_size + except OSError: + source_size = None + duration_sec = _ffprobe_duration_seconds(p) + if file_hash is None: + for sc in (p.with_suffix('.tags'), p.with_suffix('.tags.txt')): + try: + if sc.exists(): + text = sc.read_text(encoding='utf-8', errors='ignore') + for line in text.splitlines(): + ls = line.strip().lower() + if ls.startswith('hash:'): + candidate = line.split(':', 1)[1].strip() if ':' in line else '' + if looks_like_hash(candidate): + file_hash = candidate.lower() + break + except OSError: + pass + elif file_hash: + # Try local resolution first if origin is local or just in case + resolved_local = False + try: + from config import get_local_storage_path + from helper.folder_store import FolderDB + storage_path = get_local_storage_path(config) + if storage_path: + with FolderDB(storage_path) as db: + resolved_path = db.search_hash(file_hash) + if resolved_path and resolved_path.exists(): + source_path = resolved_path + resolved_local = True + try: + source_size = source_path.stat().st_size + except OSError: + source_size = None + duration_sec = _ffprobe_duration_seconds(source_path) + except Exception: + pass + + if not resolved_local: + try: + client = hydrus_wrapper.get_client(config) + except Exception as exc: + log(f"Hydrus client unavailable: {exc}") + return 1 + + if client is None: + log("Hydrus client unavailable") + return 1 + + # Fetch metadata and tags (needed for both -metadata flag and audio tagging) + # Fetch tags + try: + tags_payload = client.fetch_file_metadata(hashes=[file_hash], include_service_keys_to_tags=True) + except Exception: + tags_payload = {} + + # Fetch url + try: + url_payload = client.fetch_file_metadata(hashes=[file_hash], include_file_url=True) + except Exception: + url_payload = {} + + # Extract title from metadata if base_name is still 'export' + if base_name == 'export' and tags_payload: + try: + file_metadata = tags_payload.get('file_metadata', []) + if file_metadata and isinstance(file_metadata, list) and len(file_metadata) > 0: + meta = file_metadata[0] + if isinstance(meta, dict): + tags_dict = meta.get('tags', {}) + if isinstance(tags_dict, dict): + # Look for title in storage tags + for service in tags_dict.values(): + if isinstance(service, dict): + storage = service.get('storage_tags', {}) + if isinstance(storage, dict): + for tag_list in storage.values(): + if isinstance(tag_list, list): + for tag in tag_list: + if isinstance(tag, str) and tag.lower().startswith('title:'): + title_val = tag.split(':', 1)[1].strip() + if title_val: + base_name = _sanitize_name(title_val) + break + if base_name != 'export': + break + if base_name != 'export': + break + except Exception: + pass + + # Normal file export (happens regardless of -metadata flag) + try: + from helper.hydrus import hydrus_export as _hydrus_export + except Exception: + _hydrus_export = None # type: ignore + if _hydrus_export is None: + log("Hydrus export helper unavailable") + return 1 + download_dir = out_override if (out_override and out_override.is_dir()) else default_dir + try: + download_dir.mkdir(parents=True, exist_ok=True) + except Exception: + # If mkdir fails, fall back to default_dir + download_dir = default_dir + + # Verify the directory is writable; if not, fall back to default + try: + test_file = download_dir / f".downlow_write_test_{_uuid.uuid4().hex[:8]}" + test_file.touch() + test_file.unlink() + except (OSError, PermissionError): + # Directory is not writable, use default_dir instead + download_dir = default_dir + try: + download_dir.mkdir(parents=True, exist_ok=True) + except Exception: + pass + token = (_uuid.uuid4().hex[:8]) + provisional_stem = f"{base_name}.dlhx_{token}" + provisional = download_dir / f"{provisional_stem}.bin" + class _Args: + pass + args_obj = _Args() + setattr(args_obj, 'output', provisional) + setattr(args_obj, 'format', 'copy') + setattr(args_obj, 'tmp_dir', str(download_dir)) + setattr(args_obj, 'metadata_json', None) + setattr(args_obj, 'hydrus_url', get_hydrus_url(config, "home") or "http://localhost:45869") + setattr(args_obj, 'access_key', get_hydrus_access_key(config, "home") or "") + setattr(args_obj, 'timeout', float(config.get('HydrusNetwork_Request_Timeout') or 60.0)) + try: + file_url = client.file_url(file_hash) + except Exception: + file_url = None + setattr(args_obj, 'file_url', file_url) + setattr(args_obj, 'file_hash', file_hash) + import io as _io, contextlib as _contextlib + _buf = _io.StringIO() + status = 1 + with _contextlib.redirect_stdout(_buf): + status = _hydrus_export(args_obj, None) + if status != 0: + stderr_text = _buf.getvalue().strip() + if stderr_text: + log(stderr_text) + return status + json_text = _buf.getvalue().strip().splitlines()[-1] if _buf.getvalue() else '' + final_from_json: Optional[Path] = None + try: + payload = json.loads(json_text) if json_text else None + if isinstance(payload, dict): + outp = payload.get('output') + if isinstance(outp, str) and outp: + final_from_json = Path(outp) + except Exception: + final_from_json = None + if final_from_json and final_from_json.exists(): + source_path = final_from_json + else: + candidates = [p for p in provisional.parent.glob(provisional_stem + '*') if p.exists() and p.is_file()] + non_provisional = [p for p in candidates if p.suffix.lower() not in {'.bin', '.hydrus'}] + pick_from = non_provisional if non_provisional else candidates + if pick_from: + try: + source_path = max(pick_from, key=lambda p: p.stat().st_mtime) + except Exception: + source_path = pick_from[0] + else: + source_path = provisional + candidates = [p for p in provisional.parent.glob(provisional_stem + '*') if p.exists() and p.is_file()] + non_provisional = [p for p in candidates if p.suffix.lower() not in {'.bin', '.hydrus'}] + pick_from = non_provisional if non_provisional else candidates + if pick_from: + try: + source_path = max(pick_from, key=lambda p: p.stat().st_mtime) + except Exception: + source_path = pick_from[0] + else: + source_path = provisional + try: + source_size = source_size or (source_path.stat().st_size if source_path.exists() else None) + except OSError: + source_size = source_size + if duration_sec is None: + duration_sec = _ffprobe_duration_seconds(source_path) + cleanup_source = True + else: + log("Selected result is neither a local file nor a Hydrus record") + return 1 + + convert = (str(convert_spec or '').strip().lower()) + if convert not in {'', 'copy', 'mp4', 'webm', 'audio', 'mp3', 'opus'}: + log(f"Unsupported Convert value: {convert_spec}") + return 1 + if not convert: + convert = 'copy' + input_kind = media_kind or _guess_kind_from_suffix(source_path) + if input_kind == 'audio' and convert in {'mp4', 'webm'}: + log("Cannot convert audio to video") + return 1 + + def _ext_for_convert(conv: str, src: Path) -> str: + if conv == 'mp4': + return '.mp4' + if conv == 'webm': + return '.webm' + if conv in {'audio', 'mp3'}: + return '.mp3' + if conv == 'opus': + return '.opus' + return src.suffix or '' + + auto_named = True + if out_override is not None and out_override.exists() and out_override.is_dir(): + dest_dir = out_override + dest_ext = _ext_for_convert(convert, source_path) + dest_path = dest_dir / f"{base_name}{dest_ext}" + else: + dest_dir = default_dir + dest_ext = _ext_for_convert(convert, source_path) + if out_override and not out_override.exists() and not str(out_override).endswith(('/', '\\')): + dest_path = out_override + auto_named = False + else: + dest_path = (dest_dir / f"{base_name}{dest_ext}") + + if source_size is None: + try: + source_size = source_path.stat().st_size + except OSError: + source_size = None + if source_size is None: + log("Unable to determine source size for sizing logic; proceeding without Size targeting") + target_bytes = None + else: + target_bytes = _compute_target_bytes(size_spec, int(source_size)) + if target_bytes and (source_size or 0): + try: + from ..downlow import _fmt_bytes as _fmt_bytes_helper + except ImportError: + try: + from downlow import _fmt_bytes as _fmt_bytes_helper # type: ignore + except ImportError: + _fmt_bytes_helper = lambda x: f"{x} bytes" # type: ignore + except Exception: + _fmt_bytes_helper = lambda x: f"{x} bytes" # type: ignore + ctx.emit(f"Resizing target: {_fmt_bytes_helper(source_size)} -> {_fmt_bytes_helper(target_bytes)}") + + cleanup_source = locals().get('cleanup_source', False) + if convert == 'copy' and (not target_bytes or target_bytes >= (source_size or 0)): + # Simple copy without FFmpeg processing + # Only skip this if we need to write metadata (then FFmpeg handles it) + if not (export_metadata or (tags_payload and tags_payload.get('metadata'))): + try: + dest_path.parent.mkdir(parents=True, exist_ok=True) + final_dest = _unique_path(dest_path) + _shutil.copy2(source_path, final_dest) + ctx.emit(f"Exported to {final_dest}") + log(f"Exported: {final_dest}", file=sys.stderr) + if cleanup_source: + try: + if source_path.exists() and source_path != final_dest: + source_path.unlink() + except OSError: + pass + + return 0 + except Exception as exc: + log(f"Copy failed: {exc}") + return 1 + else: + # Metadata exists, so we need to go through FFmpeg to embed and write sidecar + # Fall through to FFmpeg section below + pass + + convert_effective = convert + if convert == 'copy' and target_bytes and (source_size or 0) > target_bytes: + if input_kind == 'video': + convert_effective = 'mp4' + elif input_kind == 'audio': + convert_effective = 'copy' + else: + convert_effective = convert + + ffmpeg_path = _shutil.which('ffmpeg') + if not ffmpeg_path: + log("ffmpeg executable not found in PATH") + return 1 + + # Extract metadata from tags to embed in file + file_metadata = _extract_metadata_from_tags(tags_payload, file_hash or '', input_kind) + if file_metadata: + metadata_msg = ', '.join(f'{k}={v}' for k, v in file_metadata.items()) + ctx.emit(f"[metadata] Embedding: {metadata_msg}") + ctx.print_if_visible(f"[get-file] Embedding metadata: {metadata_msg}", file=sys.stderr) + else: + ctx.print_if_visible(f"[get-file] No metadata tags found to embed", file=sys.stderr) + + cmd: list[str] = [ffmpeg_path, '-y', '-i', str(source_path)] + + # Add metadata flags to FFmpeg command + for key, value in file_metadata.items(): + cmd.extend(['-metadata', f'{key}={value}']) + + conv = convert_effective + if conv in {'mp4', 'webm', 'copy'}: + video_bitrate: Optional[int] = None + audio_bitrate: int = 128_000 + if target_bytes and duration_sec and duration_sec > 0: + total_bps = max(1, int((target_bytes * 8) / duration_sec)) + if total_bps <= audio_bitrate + 50_000: + if input_kind == 'video': + video_bitrate = max(50_000, total_bps - audio_bitrate) + else: + video_bitrate = None + else: + video_bitrate = total_bps - audio_bitrate + if conv == 'webm': + cmd += ['-c:v', 'libvpx-vp9'] + if video_bitrate: + cmd += ['-b:v', str(video_bitrate)] + else: + cmd += ['-b:v', '0', '-crf', '32'] + cmd += ['-c:a', 'libopus', '-b:a', '160k'] + elif conv == 'mp4' or (conv == 'copy' and input_kind == 'video'): + cmd += ['-c:v', 'libx265', '-preset', 'medium', '-tag:v', 'hvc1', '-pix_fmt', 'yuv420p'] + if video_bitrate: + cmd += ['-b:v', str(video_bitrate)] + else: + cmd += ['-crf', '26'] + cmd += ['-c:a', 'aac', '-b:a', '192k'] + if conv == 'mp4' or (conv == 'copy' and input_kind == 'video'): + cmd += ['-movflags', '+faststart'] + if convert_spec and conv != 'copy': + ctx.emit(f"Converting video -> {conv} (duration={duration_sec or 'unknown'}s)") + else: + if target_bytes and duration_sec and duration_sec > 0: + total_bps = max(1, int((target_bytes * 8) / duration_sec)) + abr = max(32_000, min(320_000, total_bps)) + else: + abr = 192_000 + if conv in {'audio', 'mp3'}: + cmd += ['-vn', '-c:a', 'libmp3lame', '-b:a', str(abr)] + elif conv == 'opus': + cmd += ['-vn', '-c:a', 'libopus', '-b:a', str(abr)] + else: + ext = (source_path.suffix.lower() if source_path else '') + if ext in {'.mp3'}: + cmd += ['-vn', '-c:a', 'libmp3lame', '-b:a', str(abr)] + elif ext in {'.opus', '.ogg'}: + cmd += ['-vn', '-c:a', 'libopus', '-b:a', str(abr)] + elif ext in {'.m4a', '.aac'}: + cmd += ['-vn', '-c:a', 'aac', '-b:a', str(abr)] + else: + cmd += ['-vn', '-c:a', 'libmp3lame', '-b:a', str(abr)] + if convert_spec and conv != 'copy': + ctx.emit(f"Converting audio -> {conv}") + + if conv in {'audio','mp3'}: + desired_ext = '.mp3' + elif conv == 'opus': + desired_ext = '.opus' + elif conv == 'webm': + desired_ext = '.webm' + elif conv == 'mp4': + desired_ext = '.mp4' + else: + desired_ext = source_path.suffix + if (not dest_path.suffix) or auto_named or (dest_path.suffix.lower() in {'.hydrus', '.bin'}): + dest_path = dest_path.with_suffix(desired_ext) + + suffix_parts: list[str] = [] + def _size_label(raw: Optional[str], tb: Optional[int]) -> Optional[str]: + if not raw: + return None + text = str(raw).strip() + if text.endswith('%'): + return text + if not tb: + return None + mb = int(round(tb / (1024*1024))) + return f"{mb}Mb" + label = _size_label(size_spec, locals().get('target_bytes')) + if label: + suffix_parts.append(label) + if convert_spec and convert.lower() != 'copy': + label_map = {'mp4':'MP4','webm':'WEBM','audio':'AUDIO','mp3':'MP3','opus':'OPUS'} + suffix_parts.append(label_map.get(convert.lower(), convert.upper())) + if suffix_parts and auto_named: + _aug = f"{base_name} (" + ",".join(suffix_parts) + ")" + dest_path = dest_path.with_name(_aug + dest_path.suffix) + + try: + dest_path.parent.mkdir(parents=True, exist_ok=True) + final_dest = _unique_path(dest_path) + cmd.append(str(final_dest)) + completed = _subprocess.run(cmd, stdout=_subprocess.PIPE, stderr=_subprocess.PIPE, text=True) + if completed.returncode != 0: + stderr = (completed.stderr or '').strip() + log(f"ffmpeg failed ({completed.returncode}): {stderr}") + return 1 + ctx.emit(f"Exported to {final_dest}") + log(f"Exported: {final_dest}", file=sys.stderr) + + # Always write the .tags sidecar with metadata (hash, tags, url) + # This ensures metadata is preserved even if FFmpeg embedding didn't work + try: + metadata_lines = [] + + # Add hash + if file_hash: + metadata_lines.append(f"hash:{file_hash}") + + # Extract tags from metadata payload using correct structure + tags_set = set() + if 'metadata' in tags_payload and tags_payload['metadata']: + entry = tags_payload['metadata'][0] + if 'tags' in entry and isinstance(entry['tags'], dict): + for _service_key, service_data in entry['tags'].items(): + if isinstance(service_data, dict): + display_tags = service_data.get('display_tags', {}) + if isinstance(display_tags, dict): + current_tags = display_tags.get('0', []) + if isinstance(current_tags, list): + tags_set.update(current_tags) + + # Add tags (sorted, no prefix) + for tag in sorted(tags_set): + metadata_lines.append(tag) + + # Extract and add url + if 'metadata' in url_payload and url_payload['metadata']: + entry = url_payload['metadata'][0] + if 'url' in entry and isinstance(entry['url'], list): + for url in entry['url']: + metadata_lines.append(f"url:{url}") + + # Write sidecar if we have any metadata + if metadata_lines: + sidecar_path = final_dest.parent / f"{final_dest.name}.tags" + sidecar_path.write_text('\n'.join(metadata_lines), encoding='utf-8') + ctx.emit(f"Sidecar: {sidecar_path.name}") + log(f"Tags file: {sidecar_path}", file=sys.stderr) + except Exception as exc: + log(f"Warning: Could not write metadata sidecar: {exc}", file=sys.stderr) + + if cleanup_source: + try: + if source_path.exists() and source_path != final_dest: + source_path.unlink() + except OSError: + pass + return 0 + except Exception as exc: + log(f"Export failed: {exc}") + return 1 + + +def _unique_path(p: Path) -> Path: + if not p.exists(): + return p + stem = p.stem + suffix = p.suffix + parent = p.parent + for i in range(1, 1000): + candidate = parent / f"{stem} ({i}){suffix}" + if not candidate.exists(): + return candidate + return p + + +def _handle_ytdlp_download(url: str, title: str, config: Dict[str, Any], args: Sequence[str]) -> int: + """Handle download/streaming of URL using yt-dlp.""" + if not url: + log("Error: No URL provided", file=sys.stderr) + return 1 + + # Check for -storage local + args_list = list(map(str, args)) + storage_mode = None + if '-storage' in args_list: + try: + idx = args_list.index('-storage') + if idx + 1 < len(args_list): + storage_mode = args_list[idx + 1].lower() + except ValueError: + pass + + force_local = (storage_mode == 'local') + + if not force_local: + # Default: Stream to MPV + if _play_in_mpv(url, title, is_stream=True): + try: + from cmdnats import pipe as mpv_pipe + mpv_pipe._run(None, [], config) + except Exception: + pass + return 0 + else: + # Fallback to browser + try: + import webbrowser + webbrowser.open(url) + debug(f"[get-file] Opened in browser: {title}", file=sys.stderr) + return 0 + except Exception: + pass + return 1 + + # Download mode + try: + import yt_dlp + except ImportError: + log("Error: yt-dlp not installed. Please install it to download.", file=sys.stderr) + return 1 + + log(f"Downloading {title}...", file=sys.stderr) + + # Determine output directory + download_dir = resolve_output_dir(config) + try: + download_dir.mkdir(parents=True, exist_ok=True) + except Exception: + pass + + # Configure yt-dlp + ydl_opts = { + 'outtmpl': str(download_dir / '%(title)s.%(ext)s'), + 'quiet': False, + 'no_warnings': True, + # Use best audio/video + 'format': 'best', + } + + try: + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download([url]) + log(f"Downloaded to: {download_dir}", file=sys.stderr) + return 0 + except Exception as e: + log(f"Error downloading: {e}", file=sys.stderr) + return 1 + + +CMDLET = Cmdlet( + name="get-file", + summary="Export files: from Hydrus database OR from AllDebrid magnets via pipe. Auto-detects source and handles accordingly.", + usage="get-file [-Path ] [Size <50%|34MB>] [Convert ] [-metadata] [-file ]", + arg=[ + CmdletArg("Path", description="Output directory for files."), + CmdletArg("Size", description="Target size (Hydrus only): 50% or 34MB."), + CmdletArg("Convert", description="Convert format (Hydrus only): mp4, webm, audio, mp3, opus."), + CmdletArg("metadata", type="flag", description="Export metadata to .tags file (Hydrus only)."), + CmdletArg("file", description="Filter files by pattern (AllDebrid only)."), + ], + detail=[ + "Hydrus mode: exports media with optional size/format conversion", + "AllDebrid mode: downloads files from piped magnet IDs from search-debrid", + "Auto-detects pipe format and routes to correct handler", + "Magnet pipe format: ID|filename|size|statusCode|status|progress|...", + ], + +) \ No newline at end of file diff --git a/cmdlets/get_metadata.py b/cmdlets/get_metadata.py index 1ea8c7a..603e3c7 100644 --- a/cmdlets/get_metadata.py +++ b/cmdlets/get_metadata.py @@ -6,337 +6,224 @@ import sys from helper.logger import log from pathlib import Path -import mimetypes -import os -from helper import hydrus as hydrus_wrapper -from helper.local_library import LocalLibraryDB -from ._shared import Cmdlet, CmdletArg, normalize_hash -from config import get_local_storage_path +from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field import pipeline as ctx from result_table import ResultTable -def _extract_imported_ts(meta: Dict[str, Any]) -> Optional[int]: - """Extract an imported timestamp from Hydrus metadata if available.""" - if not isinstance(meta, dict): +class Get_Metadata(Cmdlet): + """Class-based get-metadata cmdlet with self-registration.""" + + def __init__(self) -> None: + """Initialize get-metadata cmdlet.""" + super().__init__( + name="get-metadata", + summary="Print metadata for files by hash and storage backend.", + usage="get-metadata [-hash ] [-store ]", + alias=["meta"], + arg=[ + SharedArgs.HASH, + SharedArgs.STORE, + ], + detail=[ + "- Retrieves metadata from storage backend using file hash as identifier.", + "- Shows hash, MIME type, size, duration/pages, known url, and import timestamp.", + "- Hash and store are taken from piped result or can be overridden with -hash/-store flags.", + "- All metadata is retrieved from the storage backend's database (single source of truth).", + ], + exec=self.run, + ) + self.register() + + @staticmethod + def _extract_imported_ts(meta: Dict[str, Any]) -> Optional[int]: + """Extract an imported timestamp from metadata if available.""" + if not isinstance(meta, dict): + return None + + # Prefer explicit time_imported if present + explicit = meta.get("time_imported") + if isinstance(explicit, (int, float)): + return int(explicit) + + # Try parsing string timestamps + if isinstance(explicit, str): + try: + import datetime as _dt + return int(_dt.datetime.fromisoformat(explicit).timestamp()) + except Exception: + pass + return None - # Prefer explicit time_imported if present - explicit = meta.get("time_imported") - if isinstance(explicit, (int, float)): - return int(explicit) - - file_services = meta.get("file_services") - if isinstance(file_services, dict): - current = file_services.get("current") - if isinstance(current, dict): - numeric = [int(v) for v in current.values() if isinstance(v, (int, float))] - if numeric: - return min(numeric) - return None - - -def _format_imported(ts: Optional[int]) -> str: - if not ts: - return "" - try: - import datetime as _dt - return _dt.datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S") - except Exception: - return "" - - -def _build_table_row(title: str, origin: str, path: str, mime: str, size_bytes: Optional[int], dur_seconds: Optional[int], imported_ts: Optional[int], urls: list[str], hash_value: Optional[str], pages: Optional[int] = None) -> Dict[str, Any]: - size_mb = None - if isinstance(size_bytes, int): + @staticmethod + def _format_imported(ts: Optional[int]) -> str: + """Format timestamp as readable string.""" + if not ts: + return "" try: - size_mb = int(size_bytes / (1024 * 1024)) + import datetime as _dt + return _dt.datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S") except Exception: - size_mb = None + return "" - dur_int = int(dur_seconds) if isinstance(dur_seconds, (int, float)) else None - pages_int = int(pages) if isinstance(pages, (int, float)) else None - imported_label = _format_imported(imported_ts) + @staticmethod + def _build_table_row(title: str, origin: str, path: str, mime: str, size_bytes: Optional[int], + dur_seconds: Optional[int], imported_ts: Optional[int], url: list[str], + hash_value: Optional[str], pages: Optional[int] = None) -> Dict[str, Any]: + """Build a table row dict with metadata fields.""" + size_mb = None + if isinstance(size_bytes, int): + try: + size_mb = int(size_bytes / (1024 * 1024)) + except Exception: + size_mb = None - duration_label = "Duration(s)" - duration_value = str(dur_int) if dur_int is not None else "" - if mime and mime.lower().startswith("application/pdf"): - duration_label = "Pages" - duration_value = str(pages_int) if pages_int is not None else "" + dur_int = int(dur_seconds) if isinstance(dur_seconds, (int, float)) else None + pages_int = int(pages) if isinstance(pages, (int, float)) else None + imported_label = Get_Metadata._format_imported(imported_ts) - columns = [ - ("Title", title or ""), - ("Hash", hash_value or ""), - ("MIME", mime or ""), - ("Size(MB)", str(size_mb) if size_mb is not None else ""), - (duration_label, duration_value), - ("Imported", imported_label), - ("Store", origin or ""), - ] + duration_label = "Duration(s)" + duration_value = str(dur_int) if dur_int is not None else "" + if mime and mime.lower().startswith("application/pdf"): + duration_label = "Pages" + duration_value = str(pages_int) if pages_int is not None else "" - return { - "title": title or path, - "path": path, - "origin": origin, - "mime": mime, - "size_bytes": size_bytes, - "duration_seconds": dur_int, - "pages": pages_int, - "imported_ts": imported_ts, - "imported": imported_label, - "hash": hash_value, - "known_urls": urls, - "columns": columns, - } + columns = [ + ("Title", title or ""), + ("Hash", hash_value or ""), + ("MIME", mime or ""), + ("Size(MB)", str(size_mb) if size_mb is not None else ""), + (duration_label, duration_value), + ("Imported", imported_label), + ("Store", origin or ""), + ] + return { + "title": title or path, + "path": path, + "origin": origin, + "mime": mime, + "size_bytes": size_bytes, + "duration_seconds": dur_int, + "pages": pages_int, + "imported_ts": imported_ts, + "imported": imported_label, + "hash": hash_value, + "url": url, + "columns": columns, + } -def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: - # Help - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args): - log(json.dumps(CMDLET.to_dict(), ensure_ascii=False, indent=2)) - return 0 - except Exception: - pass - - # Helper to get field from both dict and object - def get_field(obj: Any, field: str, default: Any = None) -> Any: - if isinstance(obj, dict): - return obj.get(field, default) + @staticmethod + def _add_table_body_row(table: ResultTable, row: Dict[str, Any]) -> None: + """Add a single row to the ResultTable using the prepared columns.""" + columns = row.get("columns") if isinstance(row, dict) else None + lookup: Dict[str, Any] = {} + if isinstance(columns, list): + for col in columns: + if isinstance(col, tuple) and len(col) == 2: + label, value = col + lookup[str(label)] = value + + row_obj = table.add_row() + row_obj.add_column("Hash", lookup.get("Hash", "")) + row_obj.add_column("MIME", lookup.get("MIME", "")) + row_obj.add_column("Size(MB)", lookup.get("Size(MB)", "")) + if "Duration(s)" in lookup: + row_obj.add_column("Duration(s)", lookup.get("Duration(s)", "")) + elif "Pages" in lookup: + row_obj.add_column("Pages", lookup.get("Pages", "")) else: - return getattr(obj, field, default) - - # Parse -hash override - override_hash: str | None = None - args_list = list(_args) - i = 0 - while i < len(args_list): - a = args_list[i] - low = str(a).lower() - if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list): - override_hash = str(args_list[i + 1]).strip() - break - i += 1 - - # Try to determine if this is a local file or Hydrus file - local_path = get_field(result, "target", None) or get_field(result, "path", None) - is_local = False - if local_path and isinstance(local_path, str) and not local_path.startswith(("http://", "https://")): - is_local = True - - # LOCAL FILE PATH - if is_local and local_path: + row_obj.add_column("Duration(s)", "") + + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Main execution entry point.""" + # Parse arguments + parsed = parse_cmdlet_args(args, self) + + # Get hash and store from parsed args or result + file_hash = parsed.get("hash") or get_field(result, "hash") or get_field(result, "file_hash") or get_field(result, "hash_hex") + storage_source = parsed.get("store") or get_field(result, "store") or get_field(result, "storage") or get_field(result, "origin") + + if not file_hash: + log("No hash available - use -hash to specify", file=sys.stderr) + return 1 + + if not storage_source: + log("No storage backend specified - use -store to specify", file=sys.stderr) + return 1 + + # Use storage backend to get metadata try: - file_path = Path(str(local_path)) - if file_path.exists() and file_path.is_file(): - # Get the hash from result or compute it - hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None)) - - # If no hash, compute SHA256 of the file - if not hash_hex: - try: - import hashlib - with open(file_path, 'rb') as f: - hash_hex = hashlib.sha256(f.read()).hexdigest() - except Exception: - hash_hex = None - - # Get MIME type - mime_type, _ = mimetypes.guess_type(str(file_path)) - if not mime_type: - mime_type = "unknown" - - # Pull metadata from local DB if available (for imported timestamp, duration, etc.) - db_metadata = None - library_root = get_local_storage_path(config) - if library_root: - try: - with LocalLibraryDB(library_root) as db: - db_metadata = db.get_metadata(file_path) or None - except Exception: - db_metadata = None - - # Get file size (prefer DB size if present) - file_size = None - if isinstance(db_metadata, dict) and isinstance(db_metadata.get("size"), int): - file_size = db_metadata.get("size") - else: - try: - file_size = file_path.stat().st_size - except Exception: - file_size = None - - # Duration/pages - duration_seconds = None - pages = None - if isinstance(db_metadata, dict): - if isinstance(db_metadata.get("duration"), (int, float)): - duration_seconds = float(db_metadata.get("duration")) - if isinstance(db_metadata.get("pages"), (int, float)): - pages = int(db_metadata.get("pages")) - - if duration_seconds is None and mime_type and mime_type.startswith("video"): - try: - import subprocess - result_proc = subprocess.run( - ["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", str(file_path)], - capture_output=True, - text=True, - timeout=5 - ) - if result_proc.returncode == 0 and result_proc.stdout.strip(): - duration_seconds = float(result_proc.stdout.strip()) - except Exception: - pass - - # Known URLs from sidecar or result - urls = [] - sidecar_path = Path(str(file_path) + '.tags') - if sidecar_path.exists(): - try: - with open(sidecar_path, 'r', encoding='utf-8') as f: - for line in f: - line = line.strip() - if line.startswith('known_url:'): - url_value = line.replace('known_url:', '', 1).strip() - if url_value: - urls.append(url_value) - except Exception: - pass - - if not urls: - urls_from_result = get_field(result, "known_urls", None) or get_field(result, "urls", None) - if isinstance(urls_from_result, list): - urls.extend([str(u).strip() for u in urls_from_result if u]) - - imported_ts = None - if isinstance(db_metadata, dict): - ts = db_metadata.get("time_imported") or db_metadata.get("time_added") - if isinstance(ts, (int, float)): - imported_ts = int(ts) - elif isinstance(ts, str): - try: - import datetime as _dt - imported_ts = int(_dt.datetime.fromisoformat(ts).timestamp()) - except Exception: - imported_ts = None - - row = _build_table_row( - title=file_path.name, - origin="local", - path=str(file_path), - mime=mime_type or "", - size_bytes=int(file_size) if isinstance(file_size, int) else None, - dur_seconds=duration_seconds, - imported_ts=imported_ts, - urls=urls, - hash_value=hash_hex, - pages=pages, - ) - - table_title = file_path.name - table = ResultTable(table_title) - table.set_source_command("get-metadata", list(_args)) - table.add_result(row) - ctx.set_last_result_table_overlay(table, [row], row) - ctx.emit(row) - return 0 - except Exception: - # Fall through to Hydrus if local file handling fails - pass - - # HYDRUS PATH - hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None)) - if not hash_hex: - log("Selected result does not include a Hydrus hash or local path", file=sys.stderr) - return 1 - - try: - client = hydrus_wrapper.get_client(config) - except Exception as exc: - log(f"Hydrus client unavailable: {exc}", file=sys.stderr) - return 1 - - if client is None: - log("Hydrus client unavailable", file=sys.stderr) - return 1 - - try: - payload = client.fetch_file_metadata( - hashes=[hash_hex], - include_service_keys_to_tags=False, - include_file_urls=True, - include_duration=True, - include_size=True, - include_mime=True, - ) - except Exception as exc: - log(f"Hydrus metadata fetch failed: {exc}", file=sys.stderr) - return 1 - - items = payload.get("metadata") if isinstance(payload, dict) else None - if not isinstance(items, list) or not items: - log("No metadata found.") - return 0 - - meta = items[0] if isinstance(items[0], dict) else None - if not isinstance(meta, dict): - log("No metadata found.") - return 0 - - mime = meta.get("mime") - size = meta.get("size") or meta.get("file_size") - duration_value = meta.get("duration") - inner = meta.get("metadata") if isinstance(meta.get("metadata"), dict) else None - if duration_value is None and isinstance(inner, dict): - duration_value = inner.get("duration") - - imported_ts = _extract_imported_ts(meta) - - try: - from .search_file import _hydrus_duration_seconds as _dur_secs - except Exception: - _dur_secs = lambda x: x - - dur_seconds = _dur_secs(duration_value) - urls = meta.get("known_urls") or meta.get("urls") - urls = [str(u).strip() for u in urls] if isinstance(urls, list) else [] - - row = _build_table_row( - title=hash_hex, - origin="hydrus", - path=f"hydrus://file/{hash_hex}", - mime=mime or "", - size_bytes=int(size) if isinstance(size, int) else None, - dur_seconds=int(dur_seconds) if isinstance(dur_seconds, (int, float)) else None, - imported_ts=imported_ts, - urls=urls, - hash_value=hash_hex, - pages=None, - ) - - table = ResultTable(hash_hex or "Metadata") - table.set_source_command("get-metadata", list(_args)) - table.add_result(row) - ctx.set_last_result_table_overlay(table, [row], row) - ctx.emit(row) - - return 0 + from helper.store import FileStorage + storage = FileStorage(config) + backend = storage[storage_source] + + # Get metadata from backend + metadata = backend.get_metadata(file_hash) + + if not metadata: + log(f"No metadata found for hash {file_hash[:8]}... in {storage_source}", file=sys.stderr) + return 1 + + # Extract title from tags if available + title = get_field(result, "title") or file_hash[:16] + if not get_field(result, "title"): + # Try to get title from tags + try: + tags, _ = backend.get_tag(file_hash) + for tag in tags: + if tag.lower().startswith("title:"): + title = tag.split(":", 1)[1] + break + except Exception: + pass + + # Extract metadata fields + mime_type = metadata.get("mime") or metadata.get("ext", "") + file_size = metadata.get("size") + duration_seconds = metadata.get("duration") + pages = metadata.get("pages") + url = metadata.get("url") or [] + imported_ts = self._extract_imported_ts(metadata) + + # Normalize url + if isinstance(url, str): + try: + url = json.loads(url) + except (json.JSONDecodeError, TypeError): + url = [] + if not isinstance(url, list): + url = [] + + # Build display row + row = self._build_table_row( + title=title, + origin=storage_source, + path=metadata.get("file_path", ""), + mime=mime_type, + size_bytes=file_size, + dur_seconds=duration_seconds, + imported_ts=imported_ts, + url=url, + hash_value=file_hash, + pages=pages, + ) + + table_title = title + table = ResultTable(table_title).init_command("get-metadata", list(args)) + self._add_table_body_row(table, row) + ctx.set_last_result_table_overlay(table, [row], row) + ctx.emit(row) + return 0 + + except KeyError: + log(f"Storage backend '{storage_source}' not found", file=sys.stderr) + return 1 + except Exception as exc: + log(f"Failed to get metadata: {exc}", file=sys.stderr) + return 1 -CMDLET = Cmdlet( - name="get-metadata", - summary="Print metadata for local or Hydrus files (hash, mime, duration, size, URLs).", - usage="get-metadata [-hash ]", - aliases=["meta"], - args=[ - CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), - ], - details=[ - "- For local files: Shows path, hash (computed if needed), MIME type, size, duration, and known URLs from sidecar.", - "- For Hydrus files: Shows path (hydrus://), hash, MIME, duration, size, and known URLs.", - "- Automatically detects local vs Hydrus files.", - "- Local file hashes are computed via SHA256 if not already available.", - ], -) +CMDLET = Get_Metadata() diff --git a/cmdlets/get_note.py b/cmdlets/get_note.py index 6acc920..f0402ed 100644 --- a/cmdlets/get_note.py +++ b/cmdlets/get_note.py @@ -7,17 +7,17 @@ from . import register import models import pipeline as ctx from helper import hydrus as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, normalize_hash +from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, get_hash_for_operation, fetch_hydrus_metadata, get_field, should_show_help from helper.logger import log CMDLET = Cmdlet( name="get-note", summary="List notes on a Hydrus file.", usage="get-note [-hash ]", - args=[ - CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), + arg=[ + SharedArgs.HASH, ], - details=[ + detail=[ "- Prints notes by service and note name.", ], ) @@ -25,45 +25,24 @@ CMDLET = Cmdlet( @register(["get-note", "get-notes", "get_note"]) # aliases def get_notes(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - # Helper to get field from both dict and object - def get_field(obj: Any, field: str, default: Any = None) -> Any: - if isinstance(obj, dict): - return obj.get(field, default) - else: - return getattr(obj, field, default) - # Help - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - except Exception: - pass + if should_show_help(args): + log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + return 0 - from ._shared import parse_cmdlet_args + from ._shared import parse_cmdlet_args, get_hash_for_operation, fetch_hydrus_metadata parsed = parse_cmdlet_args(args, CMDLET) override_hash = parsed.get("hash") - hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None)) + hash_hex = get_hash_for_operation(override_hash, result) if not hash_hex: log("Selected result does not include a Hydrus hash") return 1 - try: - client = hydrus_wrapper.get_client(config) - except Exception as exc: - log(f"Hydrus client unavailable: {exc}") - return 1 - if client is None: - log("Hydrus client unavailable") - return 1 - try: - payload = client.fetch_file_metadata(hashes=[hash_hex], include_service_keys_to_tags=False, include_notes=True) - except Exception as exc: - log(f"Hydrus metadata fetch failed: {exc}") - return 1 - items = payload.get("metadata") if isinstance(payload, dict) else None - meta = items[0] if (isinstance(items, list) and items and isinstance(items[0], dict)) else None + meta, error_code = fetch_hydrus_metadata(config, hash_hex, include_service_keys_to_tags=False, include_notes=True) + if error_code != 0: + return error_code + notes = {} if isinstance(meta, dict): # Hydrus returns service_keys_to_tags; for notes we expect 'service_names_to_notes' in modern API diff --git a/cmdlets/get_relationship.py b/cmdlets/get_relationship.py index da851d3..b0dcdb8 100644 --- a/cmdlets/get_relationship.py +++ b/cmdlets/get_relationship.py @@ -7,12 +7,11 @@ from pathlib import Path from helper.logger import log -from . import register import models import pipeline as ctx from helper import hydrus as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, normalize_hash, fmt_bytes -from helper.local_library import LocalLibraryDB +from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, fmt_bytes, get_hash_for_operation, fetch_hydrus_metadata, should_show_help +from helper.folder_store import FolderDB from config import get_local_storage_path from result_table import ResultTable @@ -20,23 +19,22 @@ CMDLET = Cmdlet( name="get-relationship", summary="Print relationships for the selected file (Hydrus or Local).", usage="get-relationship [-hash ]", - args=[ - CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), + alias=[ + "get-rel", ], - details=[ + arg=[ + SharedArgs.HASH, + ], + detail=[ "- Lists relationship data as returned by Hydrus or Local DB.", ], ) -@register(["get-rel", "get-relationship", "get-relationships", "get-file-relationships"]) # aliases def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: # Help - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - except Exception: - pass + if should_show_help(_args): + log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + return 0 # Parse -hash override override_hash: str | None = None @@ -91,8 +89,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: storage_path = get_local_storage_path(config) print(f"[DEBUG] Storage path: {storage_path}", file=sys.stderr) if storage_path: - with LocalLibraryDB(storage_path) as db: - metadata = db.get_metadata(path_obj) + with FolderDB(storage_path) as db: + file_hash = db.get_file_hash(path_obj) + metadata = db.get_metadata(file_hash) if file_hash else None print(f"[DEBUG] Metadata found: {metadata is not None}", file=sys.stderr) if metadata and metadata.get("relationships"): local_db_checked = True @@ -106,14 +105,14 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: # h is now a file hash (not a path) print(f"[DEBUG] Processing relationship hash: h={h}", file=sys.stderr) # Resolve hash to file path - resolved_path = db.search_by_hash(h) + resolved_path = db.search_hash(h) title = h[:16] + "..." path = None if resolved_path and resolved_path.exists(): path = str(resolved_path) # Try to get title from tags try: - tags = db.get_tags(resolved_path) + tags = db.get_tags(h) found_title = False for t in tags: if t.lower().startswith('title:'): @@ -154,11 +153,13 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: if not existing_parent: parent_title = parent_path_obj.stem try: - parent_tags = db.get_tags(parent_path_obj) - for t in parent_tags: - if t.lower().startswith('title:'): - parent_title = t[6:].strip() - break + parent_hash = db.get_file_hash(parent_path_obj) + if parent_hash: + parent_tags = db.get_tags(parent_hash) + for t in parent_tags: + if t.lower().startswith('title:'): + parent_title = t[6:].strip() + break except Exception: pass @@ -176,7 +177,8 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: existing_parent['type'] = "king" # 1. Check forward relationships from parent (siblings) - parent_metadata = db.get_metadata(parent_path_obj) + parent_hash = db.get_file_hash(parent_path_obj) + parent_metadata = db.get_metadata(parent_hash) if parent_hash else None print(f"[DEBUG] 📖 Parent metadata: {parent_metadata is not None}", file=sys.stderr) if parent_metadata: print(f"[DEBUG] Parent metadata keys: {parent_metadata.keys()}", file=sys.stderr) @@ -189,7 +191,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: if child_hashes: for child_h in child_hashes: # child_h is now a HASH, not a path - resolve it - child_path_obj = db.search_by_hash(child_h) + child_path_obj = db.search_hash(child_h) print(f"[DEBUG] Resolved hash {child_h[:16]}... to: {child_path_obj}", file=sys.stderr) if not child_path_obj: @@ -205,11 +207,13 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: # Now child_path_obj is a Path, so we can get tags child_title = child_path_obj.stem try: - child_tags = db.get_tags(child_path_obj) - for t in child_tags: - if t.lower().startswith('title:'): - child_title = t[6:].strip() - break + child_hash = db.get_file_hash(child_path_obj) + if child_hash: + child_tags = db.get_tags(child_hash) + for t in child_tags: + if t.lower().startswith('title:'): + child_title = t[6:].strip() + break except Exception: pass @@ -241,11 +245,13 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: child_path_obj = Path(child_path) child_title = child_path_obj.stem try: - child_tags = db.get_tags(child_path_obj) - for t in child_tags: - if t.lower().startswith('title:'): - child_title = t[6:].strip() - break + child_hash = db.get_file_hash(child_path_obj) + if child_hash: + child_tags = db.get_tags(child_hash) + for t in child_tags: + if t.lower().startswith('title:'): + child_title = t[6:].strip() + break except Exception: pass @@ -304,11 +310,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: # But if the file is also in Hydrus, we might want those too. # Let's try Hydrus if we have a hash. - hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None)) - if not hash_hex: - # Try to get hash from dict - if isinstance(result, dict): - hash_hex = normalize_hash(result.get("hash") or result.get("file_hash")) + hash_hex = get_hash_for_operation(override_hash, result) if hash_hex and not local_db_checked: try: @@ -362,7 +364,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: return 0 # Display results - table = ResultTable(f"Relationships: {source_title}") + table = ResultTable(f"Relationships: {source_title}").init_command("get-relationship", []) # Sort by type then title # Custom sort order: King first, then Derivative, then others diff --git a/cmdlets/get_tag.py b/cmdlets/get_tag.py index 2ece642..3b42ad0 100644 --- a/cmdlets/get_tag.py +++ b/cmdlets/get_tag.py @@ -20,8 +20,8 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple import pipeline as ctx from helper import hydrus -from helper.local_library import read_sidecar, write_sidecar, find_sidecar, LocalLibraryDB -from ._shared import normalize_hash, looks_like_hash, Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args +from helper.folder_store import read_sidecar, write_sidecar, find_sidecar, FolderDB +from ._shared import normalize_hash, looks_like_hash, Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field from config import get_local_storage_path @@ -71,33 +71,6 @@ class TagItem: } -def _extract_my_tags_from_hydrus_meta(meta: Dict[str, Any], service_key: Optional[str], service_name: str) -> List[str]: - """Extract current tags from Hydrus metadata dict. - - Prefers display_tags (includes siblings/parents, excludes deleted). - Falls back to storage_tags status '0' (current). - """ - tags_payload = meta.get("tags") - if not isinstance(tags_payload, dict): - return [] - svc_data = None - if service_key: - svc_data = tags_payload.get(service_key) - if not isinstance(svc_data, dict): - return [] - # Prefer display_tags (Hydrus computes siblings/parents) - display = svc_data.get("display_tags") - if isinstance(display, list) and display: - return [str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip()] - # Fallback to storage_tags status '0' (current) - storage = svc_data.get("storage_tags") - if isinstance(storage, dict): - current_list = storage.get("0") or storage.get(0) - if isinstance(current_list, list): - return [str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()] - return [] - - def _emit_tags_as_table( tags_list: List[str], hash_hex: Optional[str], @@ -316,12 +289,12 @@ def _read_sidecar_fallback(p: Path) -> tuple[Optional[str], List[str], List[str] Format: - Lines with "hash:" prefix: file hash - - Lines with "known_url:" or "url:" prefix: URLs + - Lines with "url:" or "url:" prefix: url - Lines with "relationship:" prefix: ignored (internal relationships) - Lines with "key:", "namespace:value" format: treated as namespace tags - Plain lines without colons: freeform tags - Excluded namespaces (treated as metadata, not tags): hash, known_url, url, relationship + Excluded namespaces (treated as metadata, not tags): hash, url, url, relationship """ try: raw = p.read_text(encoding="utf-8", errors="ignore") @@ -332,7 +305,7 @@ def _read_sidecar_fallback(p: Path) -> tuple[Optional[str], List[str], List[str] h: Optional[str] = None # Namespaces to exclude from tags - excluded_namespaces = {"hash", "known_url", "url", "relationship"} + excluded_namespaces = {"hash", "url", "url", "relationship"} for line in raw.splitlines(): s = line.strip() @@ -344,7 +317,7 @@ def _read_sidecar_fallback(p: Path) -> tuple[Optional[str], List[str], List[str] if low.startswith("hash:"): h = s.split(":", 1)[1].strip() if ":" in s else h # Check if this is a URL line - elif low.startswith("known_url:") or low.startswith("url:"): + elif low.startswith("url:") or low.startswith("url:"): val = s.split(":", 1)[1].strip() if ":" in s else "" if val: u.append(val) @@ -361,12 +334,12 @@ def _read_sidecar_fallback(p: Path) -> tuple[Optional[str], List[str], List[str] return h, t, u -def _write_sidecar(p: Path, media: Path, tag_list: List[str], known_urls: List[str], hash_in_sidecar: Optional[str]) -> Path: +def _write_sidecar(p: Path, media: Path, tag_list: List[str], url: List[str], hash_in_sidecar: Optional[str]) -> Path: """Write tags to sidecar file and handle title-based renaming. Returns the new media path if renamed, otherwise returns the original media path. """ - success = write_sidecar(media, tag_list, known_urls, hash_in_sidecar) + success = write_sidecar(media, tag_list, url, hash_in_sidecar) if success: _apply_result_updates_from_tags(None, tag_list) # Check if we should rename the file based on title tag @@ -381,8 +354,8 @@ def _write_sidecar(p: Path, media: Path, tag_list: List[str], known_urls: List[s if hash_in_sidecar: lines.append(f"hash:{hash_in_sidecar}") lines.extend(ordered) - for u in known_urls: - lines.append(f"known_url:{u}") + for u in url: + lines.append(f"url:{u}") try: p.write_text("\n".join(lines) + "\n", encoding="utf-8") # Check if we should rename the file based on title tag @@ -414,16 +387,16 @@ def _emit_tag_payload(source: str, tags_list: List[str], *, hash_value: Optional label = None if store_label: label = store_label - elif ctx._PIPE_ACTIVE: + elif ctx.get_stage_context() is not None: label = "tags" if label: ctx.store_value(label, payload) - if ctx._PIPE_ACTIVE and label.lower() != "tags": + if ctx.get_stage_context() is not None and label.lower() != "tags": ctx.store_value("tags", payload) # Emit individual TagItem objects so they can be selected by bare index # When in pipeline, emit individual TagItem objects - if ctx._PIPE_ACTIVE: + if ctx.get_stage_context() is not None: for idx, tag_name in enumerate(tags_list, start=1): tag_item = TagItem( tag_name=tag_name, @@ -1113,7 +1086,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Try local sidecar if no tags present on result if not identifier_tags: - file_path = get_field(result, "target", None) or get_field(result, "path", None) or get_field(result, "file_path", None) or get_field(result, "filename", None) + file_path = get_field(result, "target", None) or get_field(result, "path", None) or get_field(result, "filename", None) if isinstance(file_path, str) and file_path and not file_path.lower().startswith(("http://", "https://")): try: media_path = Path(str(file_path)) @@ -1226,103 +1199,35 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: emit_mode = emit_requested or bool(store_key) store_label = (store_key.strip() if store_key and store_key.strip() else None) - # Check Hydrus availability - hydrus_available, _ = hydrus.is_available(config) + # Get hash and store from result + file_hash = hash_hex + storage_source = get_field(result, "store") or get_field(result, "storage") or get_field(result, "origin") - # Try to find path in result object - local_path = get_field(result, "target", None) or get_field(result, "path", None) or get_field(result, "file_path", None) + if not file_hash: + log("No hash available in result", file=sys.stderr) + return 1 - # Determine if local file - is_local_file = False - media: Optional[Path] = None - if local_path and isinstance(local_path, str) and not local_path.startswith(("http://", "https://")): - is_local_file = True - try: - media = Path(str(local_path)) - except Exception: - media = None + if not storage_source: + log("No storage backend specified in result", file=sys.stderr) + return 1 - # Try Hydrus first (always prioritize if available and has hash) - use_hydrus = False - hydrus_meta = None # Cache the metadata from first fetch - client = None - if hash_hex and hydrus_available: - try: - client = hydrus.get_client(config) - payload = client.fetch_file_metadata(hashes=[str(hash_hex)], include_service_keys_to_tags=True, include_file_urls=False) - items = payload.get("metadata") if isinstance(payload, dict) else None - if isinstance(items, list) and items: - meta = items[0] if isinstance(items[0], dict) else None - # Only accept file if it has a valid file_id (not None) - if isinstance(meta, dict) and meta.get("file_id") is not None: - use_hydrus = True - hydrus_meta = meta # Cache for tag extraction - except Exception: - pass - - # Get tags - try Hydrus first, fallback to sidecar - current = [] - service_name = "" - service_key = None - source = "unknown" - - if use_hydrus and hash_hex and hydrus_meta: - try: - # Use cached metadata from above, don't fetch again - service_name = hydrus.get_tag_service_name(config) - if client is None: - client = hydrus.get_client(config) - service_key = hydrus.get_tag_service_key(client, service_name) - current = _extract_my_tags_from_hydrus_meta(hydrus_meta, service_key, service_name) - source = "hydrus" - except Exception as exc: - log(f"Warning: Failed to extract tags from Hydrus: {exc}", file=sys.stderr) - - # Fallback to local sidecar or local DB if no tags - if not current and is_local_file and media and media.exists(): - try: - # First try local library DB - library_root = get_local_storage_path(config) - if library_root: - try: - with LocalLibraryDB(library_root) as db: - db_tags = db.get_tags(media) - if db_tags: - current = db_tags - source = "local_db" - except Exception as exc: - log(f"[get_tag] DB lookup failed, trying sidecar: {exc}", file=sys.stderr) - - # Fall back to sidecar if DB didn't have tags - if not current: - sidecar_path = find_sidecar(media) - if sidecar_path and sidecar_path.exists(): - try: - _, current, _ = read_sidecar(sidecar_path) - except Exception: - _, current, _ = _read_sidecar_fallback(sidecar_path) - if current: - source = "sidecar" - except Exception as exc: - log(f"Warning: Failed to load tags from local storage: {exc}", file=sys.stderr) - - # Fallback to tags in the result object if Hydrus/local lookup returned nothing - if not current: - # Check if result has 'tags' attribute (PipeObject) - if hasattr(result, 'tags') and getattr(result, 'tags', None): - current = getattr(result, 'tags') - source = "pipeline_result" - # Check if result is a dict with 'tags' key - elif isinstance(result, dict) and 'tags' in result: - tags_val = result['tags'] - if isinstance(tags_val, list): - current = tags_val - source = "pipeline_result" - source = "pipeline_result" - - # Error if no tags found - if not current: - log("No tags found", file=sys.stderr) + # Get tags using storage backend + try: + from helper.store import FileStorage + storage = FileStorage(config) + backend = storage[storage_source] + current, source = backend.get_tag(file_hash, config=config) + + if not current: + log("No tags found", file=sys.stderr) + return 1 + + service_name = "" + except KeyError: + log(f"Storage backend '{storage_source}' not found", file=sys.stderr) + return 1 + except Exception as exc: + log(f"Failed to get tags: {exc}", file=sys.stderr) return 1 # Always output to ResultTable (pipeline mode only) @@ -1383,33 +1288,106 @@ except Exception: _SCRAPE_CHOICES = ["itunes", "openlibrary", "googlebooks", "google", "musicbrainz"] -CMDLET = Cmdlet( - name="get-tag", - summary="Get tags from Hydrus or local sidecar metadata", - usage="get-tag [-hash ] [--store ] [--emit] [-scrape ]", - aliases=["tags"], - args=[ - SharedArgs.HASH, - CmdletArg( - name="-store", - type="string", - description="Store result to this key for pipeline", - alias="store" - ), - CmdletArg( - name="-emit", - type="flag", - description="Emit result without interactive prompt (quiet mode)", - alias="emit-only" - ), - CmdletArg( - name="-scrape", - type="string", - description="Scrape metadata from URL or provider name (returns tags as JSON or table)", - required=False, - choices=_SCRAPE_CHOICES, - ) - ] -) +class Get_Tag(Cmdlet): + """Class-based get-tag cmdlet with self-registration.""" + + def __init__(self) -> None: + """Initialize get-tag cmdlet.""" + super().__init__( + name="get-tag", + summary="Get tags from Hydrus or local sidecar metadata", + usage="get-tag [-hash ] [--store ] [--emit] [-scrape ]", + alias=["tags"], + arg=[ + SharedArgs.HASH, + CmdletArg( + name="-store", + type="string", + description="Store result to this key for pipeline", + alias="store" + ), + CmdletArg( + name="-emit", + type="flag", + description="Emit result without interactive prompt (quiet mode)", + alias="emit-only" + ), + CmdletArg( + name="-scrape", + type="string", + description="Scrape metadata from URL or provider name (returns tags as JSON or table)", + required=False, + choices=_SCRAPE_CHOICES, + ) + ], + detail=[ + "- Retrieves tags for a file from:", + " Hydrus: Using file hash if available", + " Local: From sidecar files or local library database", + "- Options:", + " -hash: Override hash to look up in Hydrus", + " -store: Store result to key for downstream pipeline", + " -emit: Quiet mode (no interactive selection)", + " -scrape: Scrape metadata from URL or metadata provider", + ], + exec=self.run, + ) + self.register() + + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Execute get-tag cmdlet.""" + # Parse arguments + parsed = parse_cmdlet_args(args, self) + + # Get hash and store from parsed args or result + hash_override = parsed.get("hash") + file_hash = hash_override or get_field(result, "hash") or get_field(result, "file_hash") or get_field(result, "hash_hex") + storage_source = parsed.get("store") or get_field(result, "store") or get_field(result, "storage") or get_field(result, "origin") + + if not file_hash: + log("No hash available in result", file=sys.stderr) + return 1 + + if not storage_source: + log("No storage backend specified in result", file=sys.stderr) + return 1 + + # Get tags using storage backend + try: + from helper.store import FileStorage + storage_obj = FileStorage(config) + backend = storage_obj[storage_source] + current, source = backend.get_tag(file_hash, config=config) + + if not current: + log("No tags found", file=sys.stderr) + return 1 + + # Build table and emit + item_title = get_field(result, "title") or file_hash[:16] + _emit_tags_as_table( + tags_list=current, + hash_hex=file_hash, + source=source, + service_name="", + config=config, + item_title=item_title, + file_path=None, + subject=result, + ) + return 0 + + except KeyError: + log(f"Storage backend '{storage_source}' not found", file=sys.stderr) + return 1 + except Exception as exc: + log(f"Failed to get tags: {exc}", file=sys.stderr) + import traceback + traceback.print_exc(file=sys.stderr) + return 1 + + +# Create and register the cmdlet +CMDLET = Get_Tag() diff --git a/cmdlets/get_tag.py.orig b/cmdlets/get_tag.py.orig new file mode 100644 index 0000000..a49b6f5 --- /dev/null +++ b/cmdlets/get_tag.py.orig @@ -0,0 +1,1415 @@ +"""Get tags from Hydrus or local sidecar metadata. + +This cmdlet retrieves tags for a selected result, supporting both: +- Hydrus Network (for files with hash_hex) +- Local sidecar files (.tags) + +In interactive mode: navigate with numbers, add/delete tags +In pipeline mode: display tags as read-only table, emit as structured JSON +""" + +from __future__ import annotations + +import sys + +from helper.logger import log, debug +from helper.metadata_search import get_metadata_provider, list_metadata_providers +import subprocess +from pathlib import Path +from typing import Any, Dict, List, Optional, Sequence, Tuple + +import pipeline as ctx +from helper import hydrus +from helper.local_library import read_sidecar, write_sidecar, find_sidecar, LocalLibraryDB +from ._shared import normalize_hash, looks_like_hash, Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args +from config import get_local_storage_path + + +try: + from metadata import extract_title +except ImportError: + extract_title = None + + + + + +# Tag item for ResultTable display and piping +from dataclasses import dataclass + +@dataclass +class TagItem: + """Tag item for display in ResultTable and piping to other cmdlets. + + Allows tags to be selected and piped like: + - delete-tag @{3,4,9} (delete tags at indices 3, 4, 9) + - add-tag @"namespace:value" (add this tag) + """ + tag_name: str + tag_index: int # 1-based index for user reference + hash_hex: Optional[str] = None + source: str = "hydrus" + service_name: Optional[str] = None + file_path: Optional[str] = None + + def __post_init__(self): + # Make ResultTable happy by adding standard fields + # NOTE: Don't set 'title' - we want only the tag column in ResultTable + self.origin = self.source + self.detail = f"Tag #{self.tag_index}" + self.target = self.tag_name + self.media_kind = "tag" + + def to_dict(self) -> Dict[str, Any]: + """Convert to dict for JSON serialization.""" + return { + "tag_name": self.tag_name, + "tag_index": self.tag_index, + "hash_hex": self.hash_hex, + "source": self.source, + "service_name": self.service_name, + } + + +def _extract_my_tags_from_hydrus_meta(meta: Dict[str, Any], service_key: Optional[str], service_name: str) -> List[str]: + """Extract current tags from Hydrus metadata dict. + + Prefers display_tags (includes siblings/parents, excludes deleted). + Falls back to storage_tags status '0' (current). + """ + tags_payload = meta.get("tags") + if not isinstance(tags_payload, dict): + return [] + svc_data = None + if service_key: + svc_data = tags_payload.get(service_key) + if not isinstance(svc_data, dict): + return [] + # Prefer display_tags (Hydrus computes siblings/parents) + display = svc_data.get("display_tags") + if isinstance(display, list) and display: + return [str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip()] + # Fallback to storage_tags status '0' (current) + storage = svc_data.get("storage_tags") + if isinstance(storage, dict): + current_list = storage.get("0") or storage.get(0) + if isinstance(current_list, list): + return [str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()] + return [] + + +def _emit_tags_as_table( + tags_list: List[str], + hash_hex: Optional[str], + source: str = "hydrus", + service_name: Optional[str] = None, + config: Dict[str, Any] = None, + item_title: Optional[str] = None, + file_path: Optional[str] = None, + subject: Optional[Any] = None, +) -> None: + """Emit tags as TagItem objects and display via ResultTable. + + This replaces _print_tag_list to make tags pipe-able. + Stores the table in ctx._LAST_RESULT_TABLE for downstream @ selection. + """ + from result_table import ResultTable + + # Create ResultTable with just tag column (no title) + table_title = "Tags" + if item_title: + table_title = f"Tags: {item_title}" + if hash_hex: + table_title += f" [{hash_hex[:8]}]" + + table = ResultTable(table_title, max_columns=1) + table.set_source_command("get-tag", []) + + # Create TagItem for each tag + tag_items = [] + for idx, tag_name in enumerate(tags_list, start=1): + tag_item = TagItem( + tag_name=tag_name, + tag_index=idx, + hash_hex=hash_hex, + source=source, + service_name=service_name, + file_path=file_path, + ) + tag_items.append(tag_item) + table.add_result(tag_item) + # Also emit to pipeline for downstream processing + ctx.emit(tag_item) + + # Store the table and items in history so @.. works to go back + # Use overlay mode so it doesn't push the previous search to history stack + # This makes get-tag behave like a transient view + try: + ctx.set_last_result_table_overlay(table, tag_items, subject) + except AttributeError: + ctx.set_last_result_table(table, tag_items, subject) + # Note: CLI will handle displaying the table via ResultTable formatting +def _summarize_tags(tags_list: List[str], limit: int = 8) -> str: + """Create a summary of tags for display.""" + shown = [t for t in tags_list[:limit] if t] + summary = ", ".join(shown) + remaining = max(0, len(tags_list) - len(shown)) + if remaining > 0: + summary = f"{summary} (+{remaining} more)" if summary else f"(+{remaining} more)" + if len(summary) > 200: + summary = summary[:197] + "..." + return summary + + +def _extract_title_from(tags_list: List[str]) -> Optional[str]: + """Extract title from tags list.""" + if extract_title: + try: + return extract_title(tags_list) + except Exception: + pass + for t in tags_list: + if isinstance(t, str) and t.lower().startswith("title:"): + val = t.split(":", 1)[1].strip() + if val: + return val + return None + + +def _rename_file_if_title_tag(media: Optional[Path], tags_added: List[str]) -> bool: + """Rename a local file if title: tag was added. + + Returns True if file was renamed, False otherwise. + """ + if not media or not tags_added: + return False + + # Check if any of the added tags is a title: tag + title_value = None + for tag in tags_added: + if isinstance(tag, str): + lower_tag = tag.lower() + if lower_tag.startswith("title:"): + title_value = tag.split(":", 1)[1].strip() + break + + if not title_value: + return False + + try: + # Get current file path + file_path = media + if not file_path.exists(): + return False + + # Parse file path + dir_path = file_path.parent + old_name = file_path.name + + # Get file extension + suffix = file_path.suffix or '' + + # Sanitize title for use as filename + import re + safe_title = re.sub(r'[<>:"/\\|?*]', '', title_value).strip() + if not safe_title: + return False + + new_name = safe_title + suffix + new_file_path = dir_path / new_name + + if new_file_path == file_path: + return False + + # Build sidecar paths BEFORE renaming the file + old_sidecar = Path(str(file_path) + '.tags') + new_sidecar = Path(str(new_file_path) + '.tags') + + # Rename file + try: + file_path.rename(new_file_path) + log(f"Renamed file: {old_name} → {new_name}") + + # Rename .tags sidecar if it exists + if old_sidecar.exists(): + try: + old_sidecar.rename(new_sidecar) + log(f"Renamed sidecar: {old_name}.tags → {new_name}.tags") + except Exception as e: + log(f"Failed to rename sidecar: {e}", file=sys.stderr) + + return True + except Exception as e: + log(f"Failed to rename file: {e}", file=sys.stderr) + return False + except Exception as e: + log(f"Error during file rename: {e}", file=sys.stderr) + return False + + +def _apply_result_updates_from_tags(result: Any, tag_list: List[str]) -> None: + """Update result object with title and tag summary from tags.""" + try: + new_title = _extract_title_from(tag_list) + if new_title: + setattr(result, "title", new_title) + setattr(result, "tag_summary", _summarize_tags(tag_list)) + except Exception: + pass + + +def _handle_title_rename(old_path: Path, tags_list: List[str]) -> Optional[Path]: + """If a title: tag is present, rename the file and its .tags sidecar to match. + + Returns the new path if renamed, otherwise returns None. + """ + # Extract title from tags + new_title = None + for tag in tags_list: + if isinstance(tag, str) and tag.lower().startswith('title:'): + new_title = tag.split(':', 1)[1].strip() + break + + if not new_title or not old_path.exists(): + return None + + try: + # Build new filename with same extension + old_name = old_path.name + old_suffix = old_path.suffix + + # Create new filename: title + extension + new_name = f"{new_title}{old_suffix}" + new_path = old_path.parent / new_name + + # Don't rename if already the same name + if new_path == old_path: + return None + + # Rename the main file + if new_path.exists(): + log(f"Warning: Target filename already exists: {new_name}", file=sys.stderr) + return None + + old_path.rename(new_path) + log(f"Renamed file: {old_name} → {new_name}", file=sys.stderr) + + # Rename the .tags sidecar if it exists + old_tags_path = old_path.parent / (old_name + '.tags') + if old_tags_path.exists(): + new_tags_path = old_path.parent / (new_name + '.tags') + if new_tags_path.exists(): + log(f"Warning: Target sidecar already exists: {new_tags_path.name}", file=sys.stderr) + else: + old_tags_path.rename(new_tags_path) + log(f"Renamed sidecar: {old_tags_path.name} → {new_tags_path.name}", file=sys.stderr) + + return new_path + except Exception as exc: + log(f"Warning: Failed to rename file: {exc}", file=sys.stderr) + return None + + + +def _read_sidecar_fallback(p: Path) -> tuple[Optional[str], List[str], List[str]]: + """Fallback sidecar reader if metadata module unavailable. + + Format: + - Lines with "hash:" prefix: file hash + - Lines with "url:" or "url:" prefix: url + - Lines with "relationship:" prefix: ignored (internal relationships) + - Lines with "key:", "namespace:value" format: treated as namespace tags + - Plain lines without colons: freeform tags + + Excluded namespaces (treated as metadata, not tags): hash, url, url, relationship + """ + try: + raw = p.read_text(encoding="utf-8", errors="ignore") + except OSError: + return None, [], [] + t: List[str] = [] + u: List[str] = [] + h: Optional[str] = None + + # Namespaces to exclude from tags + excluded_namespaces = {"hash", "url", "url", "relationship"} + + for line in raw.splitlines(): + s = line.strip() + if not s: + continue + low = s.lower() + + # Check if this is a hash line + if low.startswith("hash:"): + h = s.split(":", 1)[1].strip() if ":" in s else h + # Check if this is a URL line + elif low.startswith("url:") or low.startswith("url:"): + val = s.split(":", 1)[1].strip() if ":" in s else "" + if val: + u.append(val) + # Check if this is an excluded namespace + elif ":" in s: + namespace = s.split(":", 1)[0].strip().lower() + if namespace not in excluded_namespaces: + # Include as namespace tag (e.g., "title: The Freemasons") + t.append(s) + else: + # Plain text without colon = freeform tag + t.append(s) + + return h, t, u + + +def _write_sidecar(p: Path, media: Path, tag_list: List[str], url: List[str], hash_in_sidecar: Optional[str]) -> Path: + """Write tags to sidecar file and handle title-based renaming. + + Returns the new media path if renamed, otherwise returns the original media path. + """ + success = write_sidecar(media, tag_list, url, hash_in_sidecar) + if success: + _apply_result_updates_from_tags(None, tag_list) + # Check if we should rename the file based on title tag + new_media = _handle_title_rename(media, tag_list) + if new_media: + return new_media + return media + + # Fallback writer + ordered = [s for s in tag_list if s and s.strip()] + lines = [] + if hash_in_sidecar: + lines.append(f"hash:{hash_in_sidecar}") + lines.extend(ordered) + for u in url: + lines.append(f"url:{u}") + try: + p.write_text("\n".join(lines) + "\n", encoding="utf-8") + # Check if we should rename the file based on title tag + new_media = _handle_title_rename(media, tag_list) + if new_media: + return new_media + return media + except OSError as exc: + log(f"Failed to write sidecar: {exc}", file=sys.stderr) + return media + + +def _emit_tag_payload(source: str, tags_list: List[str], *, hash_value: Optional[str], extra: Optional[Dict[str, Any]] = None, store_label: Optional[str] = None) -> int: + """Emit tags as structured payload to pipeline. + + Also emits individual tag objects to _PIPELINE_LAST_ITEMS so they can be selected by index. + """ + payload: Dict[str, Any] = { + "source": source, + "tags": list(tags_list), + "count": len(tags_list), + } + if hash_value: + payload["hash"] = hash_value + if extra: + for key, value in extra.items(): + if value is not None: + payload[key] = value + label = None + if store_label: + label = store_label + elif ctx._PIPE_ACTIVE: + label = "tags" + if label: + ctx.store_value(label, payload) + if ctx._PIPE_ACTIVE and label.lower() != "tags": + ctx.store_value("tags", payload) + + # Emit individual TagItem objects so they can be selected by bare index + # When in pipeline, emit individual TagItem objects + if ctx._PIPE_ACTIVE: + for idx, tag_name in enumerate(tags_list, start=1): + tag_item = TagItem( + tag_name=tag_name, + tag_index=idx, + hash_hex=hash_value, + source=source, + service_name=None + ) + ctx.emit(tag_item) + else: + # When not in pipeline, just emit the payload + ctx.emit(payload) + + return 0 + + + +def _extract_scrapable_identifiers(tags_list: List[str]) -> Dict[str, str]: + """Extract scrapable identifiers from tags.""" + identifiers = {} + scrapable_prefixes = { + 'openlibrary', 'isbn', 'isbn_10', 'isbn_13', + 'musicbrainz', 'musicbrainzalbum', 'imdb', 'tmdb', 'tvdb' + } + + for tag in tags_list: + if not isinstance(tag, str) or ':' not in tag: + continue + + parts = tag.split(':', 1) + if len(parts) != 2: + continue + + key_raw = parts[0].strip().lower() + key = key_raw.replace('-', '_') + if key == 'isbn10': + key = 'isbn_10' + elif key == 'isbn13': + key = 'isbn_13' + value = parts[1].strip() + + # Normalize ISBN values by removing hyphens for API friendliness + if key.startswith('isbn'): + value = value.replace('-', '') + + if key in scrapable_prefixes and value: + identifiers[key] = value + + return identifiers + + +def _extract_tag_value(tags_list: List[str], namespace: str) -> Optional[str]: + """Get first tag value for a namespace (e.g., artist:, title:).""" + ns = namespace.lower() + for tag in tags_list: + if not isinstance(tag, str) or ':' not in tag: + continue + prefix, _, value = tag.partition(':') + if prefix.strip().lower() != ns: + continue + candidate = value.strip() + if candidate: + return candidate + return None + + +def _scrape_url_metadata(url: str) -> Tuple[Optional[str], List[str], List[Tuple[str, str]], List[Dict[str, Any]]]: + """Scrape metadata from a URL using yt-dlp. + + Returns: + (title, tags, formats, playlist_items) tuple where: + - title: Video/content title + - tags: List of extracted tags (both namespaced and freeform) + - formats: List of (display_label, format_id) tuples + - playlist_items: List of playlist entry dicts (empty if not a playlist) + """ + try: + import json as json_module + + try: + from metadata import extract_ytdlp_tags + except ImportError: + extract_ytdlp_tags = None + + # Build yt-dlp command with playlist support + # IMPORTANT: Do NOT use --flat-playlist! It strips metadata like artist, album, uploader, genre + # Without it, yt-dlp gives us full metadata in an 'entries' array within a single JSON object + # This ensures we get album-level metadata from sources like BandCamp, YouTube Music, etc. + cmd = [ + "yt-dlp", + "-j", # Output JSON + "--no-warnings", + "--playlist-items", "1-10", # Get first 10 items if it's a playlist (provides entries) + "-f", "best", + url + ] + + result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) + + if result.returncode != 0: + log(f"yt-dlp error: {result.stderr}", file=sys.stderr) + return None, [], [], [] + + # Parse JSON output - WITHOUT --flat-playlist, we get ONE JSON object with 'entries' array + # This gives us full metadata instead of flat format + lines = result.stdout.strip().split('\n') + if not lines or not lines[0]: + log("yt-dlp returned empty output", file=sys.stderr) + return None, [], [], [] + + # Parse the single JSON object + try: + data = json_module.loads(lines[0]) + except json_module.JSONDecodeError as e: + log(f"Failed to parse yt-dlp JSON: {e}", file=sys.stderr) + return None, [], [], [] + + # Extract title - use the main title + title = data.get('title', 'Unknown') + + # Determine if this is a playlist/album (has entries array) + # is_playlist = 'entries' in data and isinstance(data.get('entries'), list) + + # Extract tags and playlist items + tags = [] + playlist_items = [] + + # IMPORTANT: Extract album/playlist-level tags FIRST (before processing entries) + # This ensures we get metadata about the collection, not just individual tracks + if extract_ytdlp_tags: + album_tags = extract_ytdlp_tags(data) + tags.extend(album_tags) + + # Case 1: Entries are nested in the main object (standard playlist structure) + if 'entries' in data and isinstance(data.get('entries'), list): + entries = data['entries'] + # Build playlist items with title and duration + for idx, entry in enumerate(entries, 1): + if isinstance(entry, dict): + item_title = entry.get('title', entry.get('id', f'Track {idx}')) + item_duration = entry.get('duration', 0) + playlist_items.append({ + 'index': idx, + 'id': entry.get('id', f'track_{idx}'), + 'title': item_title, + 'duration': item_duration, + 'url': entry.get('url') or entry.get('webpage_url', ''), + }) + + # Extract tags from each entry and merge (but don't duplicate album-level tags) + # Only merge entry tags that are multi-value prefixes (not single-value like title:, artist:, etc.) + if extract_ytdlp_tags: + entry_tags = extract_ytdlp_tags(entry) + + # Single-value namespaces that should not be duplicated from entries + single_value_namespaces = {'title', 'artist', 'album', 'creator', 'channel', 'release_date', 'upload_date', 'license', 'location'} + + for tag in entry_tags: + # Extract the namespace (part before the colon) + tag_namespace = tag.split(':', 1)[0].lower() if ':' in tag else None + + # Skip if this namespace already exists in tags (from album level) + if tag_namespace and tag_namespace in single_value_namespaces: + # Check if any tag with this namespace already exists in tags + already_has_namespace = any( + t.split(':', 1)[0].lower() == tag_namespace + for t in tags if ':' in t + ) + if already_has_namespace: + continue # Skip this tag, keep the album-level one + + if tag not in tags: # Avoid exact duplicates + tags.append(tag) + + # Case 2: Playlist detected by playlist_count field (BandCamp albums, etc.) + # These need a separate call with --flat-playlist to get the actual entries + elif (data.get('playlist_count') or 0) > 0 and 'entries' not in data: + try: + # Make a second call with --flat-playlist to get the actual tracks + flat_cmd = [ + "yt-dlp", + "-j", + "--no-warnings", + "--flat-playlist", + "-f", "best", + url + ] + flat_result = subprocess.run(flat_cmd, capture_output=True, text=True, timeout=30) + if flat_result.returncode == 0: + flat_lines = flat_result.stdout.strip().split('\n') + # With --flat-playlist, each line is a separate track JSON object + # (not nested in a playlist container), so process ALL lines + for idx, line in enumerate(flat_lines, 1): + if line.strip().startswith('{'): + try: + entry = json_module.loads(line) + item_title = entry.get('title', entry.get('id', f'Track {idx}')) + item_duration = entry.get('duration', 0) + playlist_items.append({ + 'index': idx, + 'id': entry.get('id', f'track_{idx}'), + 'title': item_title, + 'duration': item_duration, + 'url': entry.get('url') or entry.get('webpage_url', ''), + }) + except json_module.JSONDecodeError: + pass + except Exception as e: + pass # Silently ignore if we can't get playlist entries + + + # Fallback: if still no tags detected, get from first item + if not tags and extract_ytdlp_tags: + tags = extract_ytdlp_tags(data) + + # Extract formats from the main data object + formats = [] + if 'formats' in data: + formats = _extract_url_formats(data.get('formats', [])) + + # Deduplicate tags by namespace to prevent duplicate title:, artist:, etc. + try: + from metadata import dedup_tags_by_namespace as _dedup + if _dedup: + tags = _dedup(tags, keep_first=True) + except Exception: + pass # If dedup fails, return tags as-is + + return title, tags, formats, playlist_items + + except subprocess.TimeoutExpired: + log("yt-dlp timeout (>30s)", file=sys.stderr) + return None, [], [], [] + except Exception as e: + log(f"URL scraping error: {e}", file=sys.stderr) + return None, [], [], [] + + +def _extract_url_formats(formats: list) -> List[Tuple[str, str]]: + """Extract best formats from yt-dlp formats list. + + Returns list of (display_label, format_id) tuples. + """ + try: + video_formats = {} # {resolution: format_data} + audio_formats = {} # {quality_label: format_data} + + for fmt in formats: + vcodec = fmt.get('vcodec', 'none') + acodec = fmt.get('acodec', 'none') + height = fmt.get('height') + ext = fmt.get('ext', 'unknown') + format_id = fmt.get('format_id', '') + tbr = fmt.get('tbr', 0) + abr = fmt.get('abr', 0) + + # Video format + if vcodec and vcodec != 'none' and height: + if height < 480: + continue + res_key = f"{height}p" + if res_key not in video_formats or tbr > video_formats[res_key].get('tbr', 0): + video_formats[res_key] = { + 'label': f"{height}p ({ext})", + 'format_id': format_id, + 'tbr': tbr, + } + + # Audio-only format + elif acodec and acodec != 'none' and (not vcodec or vcodec == 'none'): + audio_key = f"audio_{abr}" + if audio_key not in audio_formats or abr > audio_formats[audio_key].get('abr', 0): + audio_formats[audio_key] = { + 'label': f"audio ({ext})", + 'format_id': format_id, + 'abr': abr, + } + + result = [] + + # Add video formats in descending resolution order + for res in sorted(video_formats.keys(), key=lambda x: int(x.replace('p', '')), reverse=True): + fmt = video_formats[res] + result.append((fmt['label'], fmt['format_id'])) + + # Add best audio format + if audio_formats: + best_audio = max(audio_formats.values(), key=lambda x: x.get('abr', 0)) + result.append((best_audio['label'], best_audio['format_id'])) + + return result + + except Exception as e: + log(f"Error extracting formats: {e}", file=sys.stderr) + return [] + + +def _scrape_isbn_metadata(isbn: str) -> List[str]: + """Scrape metadata for an ISBN using Open Library API.""" + new_tags = [] + try: + from ..helper.http_client import HTTPClient + import json as json_module + + isbn_clean = isbn.replace('-', '').strip() + url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json" + + try: + with HTTPClient() as client: + response = client.get(url) + response.raise_for_status() + data = json_module.loads(response.content.decode('utf-8')) + except Exception as e: + log(f"Failed to fetch ISBN metadata: {e}", file=sys.stderr) + return [] + + if not data: + log(f"No ISBN metadata found for: {isbn}") + return [] + + book_data = next(iter(data.values()), None) + if not book_data: + return [] + + if 'title' in book_data: + new_tags.append(f"title:{book_data['title']}") + + if 'authors' in book_data and isinstance(book_data['authors'], list): + for author in book_data['authors'][:3]: + if 'name' in author: + new_tags.append(f"author:{author['name']}") + + if 'publish_date' in book_data: + new_tags.append(f"publish_date:{book_data['publish_date']}") + + if 'publishers' in book_data and isinstance(book_data['publishers'], list): + for pub in book_data['publishers'][:1]: + if 'name' in pub: + new_tags.append(f"publisher:{pub['name']}") + + if 'description' in book_data: + desc = book_data['description'] + if isinstance(desc, dict) and 'value' in desc: + desc = desc['value'] + if desc: + desc_str = str(desc).strip() + # Include description if available (limit to 200 chars to keep it manageable) + if len(desc_str) > 0: + new_tags.append(f"description:{desc_str[:200]}") + + if 'number_of_pages' in book_data: + page_count = book_data['number_of_pages'] + if page_count and isinstance(page_count, int) and page_count > 0: + new_tags.append(f"pages:{page_count}") + + if 'identifiers' in book_data and isinstance(book_data['identifiers'], dict): + identifiers = book_data['identifiers'] + + if 'openlibrary' in identifiers: + ol_ids = identifiers['openlibrary'] + if isinstance(ol_ids, list) and ol_ids: + new_tags.append(f"openlibrary:{ol_ids[0]}") + elif isinstance(ol_ids, str): + new_tags.append(f"openlibrary:{ol_ids}") + + if 'lccn' in identifiers: + lccn_list = identifiers['lccn'] + if isinstance(lccn_list, list) and lccn_list: + new_tags.append(f"lccn:{lccn_list[0]}") + elif isinstance(lccn_list, str): + new_tags.append(f"lccn:{lccn_list}") + + if 'oclc' in identifiers: + oclc_list = identifiers['oclc'] + if isinstance(oclc_list, list) and oclc_list: + new_tags.append(f"oclc:{oclc_list[0]}") + elif isinstance(oclc_list, str): + new_tags.append(f"oclc:{oclc_list}") + + if 'goodreads' in identifiers: + goodreads_list = identifiers['goodreads'] + if isinstance(goodreads_list, list) and goodreads_list: + new_tags.append(f"goodreads:{goodreads_list[0]}") + elif isinstance(goodreads_list, str): + new_tags.append(f"goodreads:{goodreads_list}") + + if 'librarything' in identifiers: + lt_list = identifiers['librarything'] + if isinstance(lt_list, list) and lt_list: + new_tags.append(f"librarything:{lt_list[0]}") + elif isinstance(lt_list, str): + new_tags.append(f"librarything:{lt_list}") + + if 'doi' in identifiers: + doi_list = identifiers['doi'] + if isinstance(doi_list, list) and doi_list: + new_tags.append(f"doi:{doi_list[0]}") + elif isinstance(doi_list, str): + new_tags.append(f"doi:{doi_list}") + + if 'internet_archive' in identifiers: + ia_list = identifiers['internet_archive'] + if isinstance(ia_list, list) and ia_list: + new_tags.append(f"internet_archive:{ia_list[0]}") + elif isinstance(ia_list, str): + new_tags.append(f"internet_archive:{ia_list}") + + log(f"Found {len(new_tags)} tag(s) from ISBN lookup") + return new_tags + except Exception as e: + log(f"ISBN scraping error: {e}", file=sys.stderr) + return [] + + +def _scrape_openlibrary_metadata(olid: str) -> List[str]: + """Scrape metadata for an OpenLibrary ID using the .json API endpoint. + + Fetches from https://openlibrary.org/books/{OLID}.json and extracts: + - Title, authors, publish date, publishers + - Description + - Subjects as freeform tags (without namespace prefix) + - Identifiers (ISBN, LCCN, OCLC, etc.) + """ + new_tags = [] + try: + from ..helper.http_client import HTTPClient + import json as json_module + + # Format: OL9674499M or just 9674499M + olid_clean = olid.replace('OL', '').replace('M', '') + if not olid_clean.isdigit(): + olid_clean = olid + + # Ensure we have the full OLID format for the URL + if not olid.startswith('OL'): + url = f"https://openlibrary.org/books/OL{olid_clean}M.json" + else: + url = f"https://openlibrary.org/books/{olid}.json" + + try: + with HTTPClient() as client: + response = client.get(url) + response.raise_for_status() + data = json_module.loads(response.content.decode('utf-8')) + except Exception as e: + log(f"Failed to fetch OpenLibrary metadata: {e}", file=sys.stderr) + return [] + + if not data: + log(f"No OpenLibrary metadata found for: {olid}") + return [] + + # Add title + if 'title' in data: + new_tags.append(f"title:{data['title']}") + + # Add authors + if 'authors' in data and isinstance(data['authors'], list): + for author in data['authors'][:3]: + if isinstance(author, dict) and 'name' in author: + new_tags.append(f"author:{author['name']}") + elif isinstance(author, str): + new_tags.append(f"author:{author}") + + # Add publish date + if 'publish_date' in data: + new_tags.append(f"publish_date:{data['publish_date']}") + + # Add publishers + if 'publishers' in data and isinstance(data['publishers'], list): + for pub in data['publishers'][:1]: + if isinstance(pub, dict) and 'name' in pub: + new_tags.append(f"publisher:{pub['name']}") + elif isinstance(pub, str): + new_tags.append(f"publisher:{pub}") + + # Add description + if 'description' in data: + desc = data['description'] + if isinstance(desc, dict) and 'value' in desc: + desc = desc['value'] + if desc: + desc_str = str(desc).strip() + if len(desc_str) > 0: + new_tags.append(f"description:{desc_str[:200]}") + + # Add number of pages + if 'number_of_pages' in data: + page_count = data['number_of_pages'] + if page_count and isinstance(page_count, int) and page_count > 0: + new_tags.append(f"pages:{page_count}") + + # Add subjects as FREEFORM tags (no namespace prefix) + if 'subjects' in data and isinstance(data['subjects'], list): + for subject in data['subjects'][:10]: + if subject and isinstance(subject, str): + subject_clean = str(subject).strip() + if subject_clean and subject_clean not in new_tags: + new_tags.append(subject_clean) + + # Add identifiers + if 'identifiers' in data and isinstance(data['identifiers'], dict): + identifiers = data['identifiers'] + + if 'isbn_10' in identifiers: + isbn_10_list = identifiers['isbn_10'] + if isinstance(isbn_10_list, list) and isbn_10_list: + new_tags.append(f"isbn_10:{isbn_10_list[0]}") + elif isinstance(isbn_10_list, str): + new_tags.append(f"isbn_10:{isbn_10_list}") + + if 'isbn_13' in identifiers: + isbn_13_list = identifiers['isbn_13'] + if isinstance(isbn_13_list, list) and isbn_13_list: + new_tags.append(f"isbn_13:{isbn_13_list[0]}") + elif isinstance(isbn_13_list, str): + new_tags.append(f"isbn_13:{isbn_13_list}") + + if 'lccn' in identifiers: + lccn_list = identifiers['lccn'] + if isinstance(lccn_list, list) and lccn_list: + new_tags.append(f"lccn:{lccn_list[0]}") + elif isinstance(lccn_list, str): + new_tags.append(f"lccn:{lccn_list}") + + if 'oclc_numbers' in identifiers: + oclc_list = identifiers['oclc_numbers'] + if isinstance(oclc_list, list) and oclc_list: + new_tags.append(f"oclc:{oclc_list[0]}") + elif isinstance(oclc_list, str): + new_tags.append(f"oclc:{oclc_list}") + + if 'goodreads' in identifiers: + goodreads_list = identifiers['goodreads'] + if isinstance(goodreads_list, list) and goodreads_list: + new_tags.append(f"goodreads:{goodreads_list[0]}") + elif isinstance(goodreads_list, str): + new_tags.append(f"goodreads:{goodreads_list}") + + log(f"Found {len(new_tags)} tag(s) from OpenLibrary lookup") + return new_tags + except Exception as e: + log(f"OpenLibrary scraping error: {e}", file=sys.stderr) + return [] + + +def _perform_scraping(tags_list: List[str]) -> List[str]: + """Perform scraping based on identifiers in tags. + + Priority order: + 1. openlibrary: (preferred - more complete metadata) + 2. isbn_10 or isbn (fallback) + """ + identifiers = _extract_scrapable_identifiers(tags_list) + + if not identifiers: + log("No scrapable identifiers found (openlibrary, ISBN, musicbrainz, imdb)") + return [] + + log(f"Found scrapable identifiers: {', '.join(identifiers.keys())}") + + new_tags = [] + + # Prefer OpenLibrary over ISBN (more complete metadata) + if 'openlibrary' in identifiers: + olid = identifiers['openlibrary'] + if olid: + log(f"Scraping OpenLibrary: {olid}") + new_tags.extend(_scrape_openlibrary_metadata(olid)) + elif 'isbn_13' in identifiers or 'isbn_10' in identifiers or 'isbn' in identifiers: + isbn = identifiers.get('isbn_13') or identifiers.get('isbn_10') or identifiers.get('isbn') + if isbn: + log(f"Scraping ISBN: {isbn}") + new_tags.extend(_scrape_isbn_metadata(isbn)) + + existing_tags_lower = {tag.lower() for tag in tags_list} + scraped_unique = [] + seen = set() + for tag in new_tags: + tag_lower = tag.lower() + if tag_lower not in existing_tags_lower and tag_lower not in seen: + scraped_unique.append(tag) + seen.add(tag_lower) + + if scraped_unique: + log(f"Added {len(scraped_unique)} new tag(s) from scraping") + + return scraped_unique + + +def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Get tags from Hydrus, local sidecar, or URL metadata. + + Usage: + get-tag [-hash ] [--store ] [--emit] + get-tag -scrape + + Options: + -hash : Override hash to use instead of result's hash_hex + --store : Store result to this key for pipeline + --emit: Emit result without interactive prompt (quiet mode) + -scrape : Scrape metadata from URL or provider name (itunes, openlibrary, googlebooks) + """ + args_list = [str(arg) for arg in (args or [])] + raw_args = list(args_list) + + # Support numeric selection tokens (e.g., "@1" leading to argument "1") without treating + # them as hash overrides. This lets users pick from the most recent table overlay/results. + if len(args_list) == 1: + token = args_list[0] + if not token.startswith("-") and token.isdigit(): + try: + idx = int(token) - 1 + items_pool = ctx.get_last_result_items() + if 0 <= idx < len(items_pool): + result = items_pool[idx] + args_list = [] + debug(f"[get_tag] Resolved numeric selection arg {token} -> last_result_items[{idx}]") + else: + debug(f"[get_tag] Numeric selection arg {token} out of range (items={len(items_pool)})") + except Exception as exc: + debug(f"[get_tag] Failed to resolve numeric selection arg {token}: {exc}") + # Helper to get field from both dict and object + def get_field(obj: Any, field: str, default: Any = None) -> Any: + if isinstance(obj, dict): + return obj.get(field, default) + else: + return getattr(obj, field, default) + + # Parse arguments using shared parser + parsed_args = parse_cmdlet_args(args_list, CMDLET) + + # Detect if -scrape flag was provided without a value (parse_cmdlet_args skips missing values) + scrape_flag_present = any(str(arg).lower() in {"-scrape", "--scrape"} for arg in args_list) + + # Extract values + hash_override_raw = parsed_args.get("hash") + hash_override = normalize_hash(hash_override_raw) + store_key = parsed_args.get("store") + emit_requested = parsed_args.get("emit", False) + scrape_url = parsed_args.get("scrape") + scrape_requested = scrape_flag_present or scrape_url is not None + + explicit_hash_flag = any(str(arg).lower() in {"-hash", "--hash"} for arg in raw_args) + if hash_override_raw is not None: + if not hash_override or not looks_like_hash(hash_override): + debug(f"[get_tag] Ignoring invalid hash override '{hash_override_raw}' (explicit_flag={explicit_hash_flag})") + if explicit_hash_flag: + log("Invalid hash format: expected 64 hex characters", file=sys.stderr) + return 1 + hash_override = None + + if scrape_requested and (not scrape_url or str(scrape_url).strip() == ""): + log("-scrape requires a URL or provider name", file=sys.stderr) + return 1 + + # Handle URL or provider scraping mode + if scrape_requested and scrape_url: + import json as json_module + + if scrape_url.startswith("http://") or scrape_url.startswith("https://"): + # URL scraping (existing behavior) + title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url) + if not tags: + log("No tags extracted from URL", file=sys.stderr) + return 1 + output = { + "title": title, + "tags": tags, + "formats": [(label, fmt_id) for label, fmt_id in formats], + "playlist_items": playlist_items, + } + print(json_module.dumps(output, ensure_ascii=False)) + return 0 + + # Provider scraping (e.g., itunes) + provider = get_metadata_provider(scrape_url, config) + if provider is None: + log(f"Unknown metadata provider: {scrape_url}", file=sys.stderr) + return 1 + + # Prefer identifier tags (ISBN/OLID/etc.) when available; fallback to title/filename + identifier_tags: List[str] = [] + result_tags = get_field(result, "tags", None) + if isinstance(result_tags, list): + identifier_tags = [str(t) for t in result_tags if isinstance(t, (str, bytes))] + + # Try local sidecar if no tags present on result + if not identifier_tags: + file_path = get_field(result, "target", None) or get_field(result, "path", None) or get_field(result, "file_path", None) or get_field(result, "filename", None) + if isinstance(file_path, str) and file_path and not file_path.lower().startswith(("http://", "https://")): + try: + media_path = Path(str(file_path)) + if media_path.exists(): + tags_from_sidecar = read_sidecar(media_path) + if isinstance(tags_from_sidecar, list): + identifier_tags = [str(t) for t in tags_from_sidecar if isinstance(t, (str, bytes))] + except Exception: + pass + + title_from_tags = _extract_tag_value(identifier_tags, "title") + artist_from_tags = _extract_tag_value(identifier_tags, "artist") + + identifiers = _extract_scrapable_identifiers(identifier_tags) + identifier_query: Optional[str] = None + if identifiers: + if provider.name in {"openlibrary", "googlebooks", "google"}: + identifier_query = identifiers.get("isbn_13") or identifiers.get("isbn_10") or identifiers.get("isbn") or identifiers.get("openlibrary") + elif provider.name == "itunes": + identifier_query = identifiers.get("musicbrainz") or identifiers.get("musicbrainzalbum") + + # Determine query from identifier first, else title on the result or filename + title_hint = title_from_tags or get_field(result, "title", None) or get_field(result, "name", None) + if not title_hint: + file_path = get_field(result, "path", None) or get_field(result, "filename", None) + if file_path: + title_hint = Path(str(file_path)).stem + artist_hint = artist_from_tags or get_field(result, "artist", None) or get_field(result, "uploader", None) + if not artist_hint: + meta_field = get_field(result, "metadata", None) + if isinstance(meta_field, dict): + meta_artist = meta_field.get("artist") or meta_field.get("uploader") + if meta_artist: + artist_hint = str(meta_artist) + + combined_query: Optional[str] = None + if not identifier_query and title_hint and artist_hint and provider.name in {"itunes", "musicbrainz"}: + if provider.name == "musicbrainz": + combined_query = f'recording:"{title_hint}" AND artist:"{artist_hint}"' + else: + combined_query = f"{title_hint} {artist_hint}" + + query_hint = identifier_query or combined_query or title_hint + if not query_hint: + log("No title or identifier available to search for metadata", file=sys.stderr) + return 1 + + if identifier_query: + log(f"Using identifier for metadata search: {identifier_query}") + elif combined_query: + log(f"Using title+artist for metadata search: {title_hint} - {artist_hint}") + else: + log(f"Using title for metadata search: {query_hint}") + + items = provider.search(query_hint, limit=10) + if not items: + log("No metadata results found", file=sys.stderr) + return 1 + + from result_table import ResultTable + table = ResultTable(f"Metadata: {provider.name}") + table.set_source_command("get-tag", []) + selection_payload = [] + hash_for_payload = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash_hex", None)) + for idx, item in enumerate(items): + tags = provider.to_tags(item) + row = table.add_row() + row.add_column("Title", item.get("title", "")) + row.add_column("Artist", item.get("artist", "")) + row.add_column("Album", item.get("album", "")) + row.add_column("Year", item.get("year", "")) + payload = { + "tags": tags, + "provider": provider.name, + "title": item.get("title"), + "artist": item.get("artist"), + "album": item.get("album"), + "year": item.get("year"), + "extra": { + "tags": tags, + "provider": provider.name, + "hydrus_hash": hash_for_payload, + "storage_source": get_field(result, "source", None) or get_field(result, "origin", None), + }, + "file_hash": hash_for_payload, + } + selection_payload.append(payload) + table.set_row_selection_args(idx, [str(idx + 1)]) + + ctx.set_last_result_table_overlay(table, selection_payload) + ctx.set_current_stage_table(table) + # Preserve items for @ selection and downstream pipes without emitting duplicates + ctx.set_last_result_items_only(selection_payload) + print(table) + return 0 + + # If -scrape was requested but no URL, that's an error + if scrape_requested and not scrape_url: + log("-scrape requires a URL argument", file=sys.stderr) + return 1 + + # Handle @N selection which creates a list - extract the first item + if isinstance(result, list) and len(result) > 0: + result = result[0] + + hash_from_result = normalize_hash(get_field(result, "hash_hex", None)) + hash_hex = hash_override or hash_from_result + # Only use emit mode if explicitly requested with --emit flag, not just because we're in a pipeline + # This allows interactive REPL to work even in pipelines + emit_mode = emit_requested or bool(store_key) + store_label = (store_key.strip() if store_key and store_key.strip() else None) + + # Check Hydrus availability + hydrus_available, _ = hydrus.is_available(config) + + # Try to find path in result object + local_path = get_field(result, "target", None) or get_field(result, "path", None) or get_field(result, "file_path", None) + + # Determine if local file + is_local_file = False + media: Optional[Path] = None + if local_path and isinstance(local_path, str) and not local_path.startswith(("http://", "https://")): + is_local_file = True + try: + media = Path(str(local_path)) + except Exception: + media = None + + # Try Hydrus first (always prioritize if available and has hash) + use_hydrus = False + hydrus_meta = None # Cache the metadata from first fetch + client = None + if hash_hex and hydrus_available: + try: + client = hydrus.get_client(config) + payload = client.fetch_file_metadata(hashes=[str(hash_hex)], include_service_keys_to_tags=True, include_file_url=False) + items = payload.get("metadata") if isinstance(payload, dict) else None + if isinstance(items, list) and items: + meta = items[0] if isinstance(items[0], dict) else None + # Only accept file if it has a valid file_id (not None) + if isinstance(meta, dict) and meta.get("file_id") is not None: + use_hydrus = True + hydrus_meta = meta # Cache for tag extraction + except Exception: + pass + + # Get tags - try Hydrus first, fallback to sidecar + current = [] + service_name = "" + service_key = None + source = "unknown" + + if use_hydrus and hash_hex and hydrus_meta: + try: + # Use cached metadata from above, don't fetch again + service_name = hydrus.get_tag_service_name(config) + if client is None: + client = hydrus.get_client(config) + service_key = hydrus.get_tag_service_key(client, service_name) + current = _extract_my_tags_from_hydrus_meta(hydrus_meta, service_key, service_name) + source = "hydrus" + except Exception as exc: + log(f"Warning: Failed to extract tags from Hydrus: {exc}", file=sys.stderr) + + # Fallback to local sidecar or local DB if no tags + if not current and is_local_file and media and media.exists(): + try: + # First try local library DB + library_root = get_local_storage_path(config) + if library_root: + try: + with LocalLibraryDB(library_root) as db: + db_tags = db.get_tags(media) + if db_tags: + current = db_tags + source = "local_db" + except Exception as exc: + log(f"[get_tag] DB lookup failed, trying sidecar: {exc}", file=sys.stderr) + + # Fall back to sidecar if DB didn't have tags + if not current: + sidecar_path = find_sidecar(media) + if sidecar_path and sidecar_path.exists(): + try: + _, current, _ = read_sidecar(sidecar_path) + except Exception: + _, current, _ = _read_sidecar_fallback(sidecar_path) + if current: + source = "sidecar" + except Exception as exc: + log(f"Warning: Failed to load tags from local storage: {exc}", file=sys.stderr) + + # Fallback to tags in the result object if Hydrus/local lookup returned nothing + if not current: + # Check if result has 'tags' attribute (PipeObject) + if hasattr(result, 'tags') and getattr(result, 'tags', None): + current = getattr(result, 'tags') + source = "pipeline_result" + # Check if result is a dict with 'tags' key + elif isinstance(result, dict) and 'tags' in result: + tags_val = result['tags'] + if isinstance(tags_val, list): + current = tags_val + source = "pipeline_result" + source = "pipeline_result" + + # Error if no tags found + if not current: + log("No tags found", file=sys.stderr) + return 1 + + # Always output to ResultTable (pipeline mode only) + # Extract title for table header + item_title = get_field(result, "title", None) or get_field(result, "name", None) or get_field(result, "filename", None) + + # Build a subject payload representing the file whose tags are being shown + subject_origin = get_field(result, "origin", None) or get_field(result, "source", None) or source + subject_payload: Dict[str, Any] = { + "tags": list(current), + "title": item_title, + "name": item_title, + "origin": subject_origin, + "source": subject_origin, + "storage_source": subject_origin, + "service_name": service_name, + "extra": { + "tags": list(current), + "storage_source": subject_origin, + "hydrus_hash": hash_hex, + }, + } + if hash_hex: + subject_payload.update({ + "hash": hash_hex, + "hash_hex": hash_hex, + "file_hash": hash_hex, + "hydrus_hash": hash_hex, + }) + if local_path: + try: + path_text = str(local_path) + subject_payload.update({ + "file_path": path_text, + "path": path_text, + "target": path_text, + }) + subject_payload["extra"]["file_path"] = path_text + except Exception: + pass + + if source == "hydrus": + _emit_tags_as_table(current, hash_hex=hash_hex, source="hydrus", service_name=service_name, config=config, item_title=item_title, subject=subject_payload) + else: + _emit_tags_as_table(current, hash_hex=hash_hex, source="local", service_name=None, config=config, item_title=item_title, file_path=str(local_path) if local_path else None, subject=subject_payload) + + # If emit requested or store key provided, emit payload + if emit_mode: + _emit_tag_payload(source, current, hash_value=hash_hex, store_label=store_label) + + return 0 + + +_SCRAPE_CHOICES = [] +try: + _SCRAPE_CHOICES = sorted(list_metadata_providers().keys()) +except Exception: + _SCRAPE_CHOICES = ["itunes", "openlibrary", "googlebooks", "google", "musicbrainz"] + + +CMDLET = Cmdlet( + name="get-tag", + summary="Get tags from Hydrus or local sidecar metadata", + usage="get-tag [-hash ] [--store ] [--emit] [-scrape ]", + aliases=["tags"], + args=[ + SharedArgs.HASH, + CmdletArg( + name="-store", + type="string", + description="Store result to this key for pipeline", + alias="store" + ), + CmdletArg( + name="-emit", + type="flag", + description="Emit result without interactive prompt (quiet mode)", + alias="emit-only" + ), + CmdletArg( + name="-scrape", + type="string", + description="Scrape metadata from URL or provider name (returns tags as JSON or table)", + required=False, + choices=_SCRAPE_CHOICES, + ) + ] +) + + diff --git a/cmdlets/get_url.py b/cmdlets/get_url.py index e087e5e..0e60b81 100644 --- a/cmdlets/get_url.py +++ b/cmdlets/get_url.py @@ -1,139 +1,80 @@ from __future__ import annotations from typing import Any, Dict, Sequence -import json import sys -from pathlib import Path from . import register -import models import pipeline as ctx -from helper import hydrus as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, normalize_hash +from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash from helper.logger import log -from config import get_local_storage_path -from helper.local_library import LocalLibraryDB - -CMDLET = Cmdlet( - name="get-url", - summary="List URLs associated with a file (Hydrus or Local).", - usage="get-url [-hash ]", - args=[ - CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), - ], - details=[ - "- Prints the known URLs for the selected file.", - ], -) +from helper.store import FileStorage -def _parse_hash_and_rest(args: Sequence[str]) -> tuple[str | None, list[str]]: - override_hash: str | None = None - rest: list[str] = [] - i = 0 - while i < len(args): - a = args[i] - low = str(a).lower() - if low in {"-hash", "--hash", "hash"} and i + 1 < len(args): - override_hash = str(args[i + 1]).strip() - i += 2 - continue - rest.append(a) - i += 1 - return override_hash, rest - - -@register(["get-url", "get-urls", "get_url"]) # aliases -def get_urls(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - # Helper to get field from both dict and object - def get_field(obj: Any, field: str, default: Any = None) -> Any: - if isinstance(obj, dict): - return obj.get(field, default) - else: - return getattr(obj, field, default) +class Get_Url(Cmdlet): + """Get url associated with files via hash+store.""" - # Help - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - except Exception: - pass + NAME = "get-url" + SUMMARY = "List url associated with a file" + USAGE = "@1 | get-url" + ARGS = [ + SharedArgs.HASH, + SharedArgs.STORE, + ] + DETAIL = [ + "- Lists all url associated with file identified by hash+store", + ] - override_hash, _ = _parse_hash_and_rest(args) - - # Handle @N selection which creates a list - extract the first item - if isinstance(result, list) and len(result) > 0: - result = result[0] - - found_urls = [] - - # 1. Try Local Library - file_path = get_field(result, "file_path") or get_field(result, "path") - if file_path and not override_hash: - try: - path_obj = Path(file_path) - if path_obj.exists(): - storage_path = get_local_storage_path(config) - if storage_path: - with LocalLibraryDB(storage_path) as db: - metadata = db.get_metadata(path_obj) - if metadata and metadata.get("known_urls"): - found_urls.extend(metadata["known_urls"]) - except Exception as e: - log(f"Error checking local library: {e}", file=sys.stderr) - - # 2. Try Hydrus - hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None)) - - # If we haven't found URLs yet, or if we want to merge them (maybe?), let's check Hydrus if we have a hash - # But usually if it's local, we might not want to check Hydrus unless requested. - # However, the user said "they can just work together". - - if hash_hex: - try: - client = hydrus_wrapper.get_client(config) - if client: - payload = client.fetch_file_metadata(hashes=[hash_hex], include_file_urls=True) - items = payload.get("metadata") if isinstance(payload, dict) else None - meta = items[0] if (isinstance(items, list) and items and isinstance(items[0], dict)) else None - hydrus_urls = (meta.get("known_urls") if isinstance(meta, dict) else None) or [] - for u in hydrus_urls: - if u not in found_urls: - found_urls.append(u) - except Exception as exc: - # Only log error if we didn't find local URLs either, or if it's a specific error - if not found_urls: - log(f"Hydrus lookup failed: {exc}", file=sys.stderr) - - if found_urls: - for u in found_urls: - text = str(u).strip() - if text: - # Emit a rich object that looks like a string but carries context - # We use a dict with 'title' which ResultTable uses for display - # and 'url' which is the actual data - # We also include the source file info so downstream cmdlets can use it - - # Create a result object that mimics the structure expected by delete-url - # delete-url expects a file object usually, but here we are emitting URLs. - # If we emit a dict with 'url' and 'source_file', delete-url can use it. - - rich_result = { - "title": text, # Display as just the URL - "url": text, - "source_file": result, # Pass the original file context - "file_path": get_field(result, "file_path") or get_field(result, "path"), - "hash_hex": hash_hex - } - ctx.emit(rich_result) - return 0 - - if not hash_hex and not file_path: - log("Selected result does not include a file path or Hydrus hash", file=sys.stderr) - return 1 + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Get url for file via hash+store backend.""" + parsed = parse_cmdlet_args(args, self) - ctx.emit("No URLs found.") - return 0 + # Extract hash and store from result or args + file_hash = parsed.get("hash") or get_field(result, "hash") + store_name = parsed.get("store") or get_field(result, "store") + + if not file_hash: + log("Error: No file hash provided") + return 1 + + if not store_name: + log("Error: No store name provided") + return 1 + + # Normalize hash + file_hash = normalize_hash(file_hash) + if not file_hash: + log("Error: Invalid hash format") + return 1 + + # Get backend and retrieve url + try: + storage = FileStorage(config) + backend = storage[store_name] + + url = backend.get_url(file_hash) + + if url: + for url in url: + # Emit rich object for pipeline compatibility + ctx.emit({ + "url": url, + "hash": file_hash, + "store": store_name, + }) + return 0 + else: + ctx.emit("No url found") + return 0 + + except KeyError: + log(f"Error: Storage backend '{store_name}' not configured") + return 1 + except Exception as exc: + log(f"Error retrieving url: {exc}", file=sys.stderr) + return 1 + + +# Register cmdlet +register(["get-url", "get_url"])(Get_Url) diff --git a/cmdlets/manage_config.py b/cmdlets/manage_config.py index ac7126f..11184ab 100644 --- a/cmdlets/manage_config.py +++ b/cmdlets/manage_config.py @@ -6,7 +6,7 @@ CMDLET = Cmdlet( name=".config", summary="Manage configuration settings", usage=".config [key] [value]", - args=[ + arg=[ CmdletArg( name="key", description="Configuration key to update (dot-separated)", diff --git a/cmdlets/merge_file.py b/cmdlets/merge_file.py index 2fa9047..02186b7 100644 --- a/cmdlets/merge_file.py +++ b/cmdlets/merge_file.py @@ -42,16 +42,14 @@ from ._shared import ( normalize_result_input, get_pipe_object_path, get_pipe_object_hash, + should_show_help, + get_field, ) import models import pipeline as ctx -def _get_item_value(item: Any, key: str, default: Any = None) -> Any: - """Helper to read either dict keys or attributes.""" - if isinstance(item, dict): - return item.get(key, default) - return getattr(item, key, default) + @@ -60,12 +58,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Merge multiple files into one.""" # Parse help - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - except Exception: - pass + if should_show_help(args): + log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + return 0 # Parse arguments parsed = parse_cmdlet_args(args, CMDLET) @@ -102,7 +97,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: source_files: List[Path] = [] source_tags_files: List[Path] = [] source_hashes: List[str] = [] - source_urls: List[str] = [] + source_url: List[str] = [] source_tags: List[str] = [] # NEW: collect tags from source files source_relationships: List[str] = [] # NEW: collect relationships from source files @@ -146,7 +141,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: if tags_file.exists(): source_tags_files.append(tags_file) - # Try to read hash, tags, urls, and relationships from .tags sidecar file + # Try to read hash, tags, url, and relationships from .tags sidecar file try: tags_content = tags_file.read_text(encoding='utf-8') for line in tags_content.split('\n'): @@ -157,18 +152,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: hash_value = line[5:].strip() if hash_value: source_hashes.append(hash_value) - elif line.startswith('known_url:') or line.startswith('url:'): - # Extract URLs from tags file + elif line.startswith('url:') or line.startswith('url:'): + # Extract url from tags file url_value = line.split(':', 1)[1].strip() if ':' in line else '' - if url_value and url_value not in source_urls: - source_urls.append(url_value) + if url_value and url_value not in source_url: + source_url.append(url_value) elif line.startswith('relationship:'): # Extract relationships from tags file rel_value = line.split(':', 1)[1].strip() if ':' in line else '' if rel_value and rel_value not in source_relationships: source_relationships.append(rel_value) else: - # Collect actual tags (not metadata like hash: or known_url:) + # Collect actual tags (not metadata like hash: or url:) source_tags.append(line) except Exception: pass @@ -178,14 +173,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: if hash_value and hash_value not in source_hashes: source_hashes.append(str(hash_value)) - # Extract known URLs if available - known_urls = _get_item_value(item, 'known_urls', []) - if isinstance(known_urls, str): - source_urls.append(known_urls) - elif isinstance(known_urls, list): - source_urls.extend(known_urls) + # Extract known url if available + url = get_field(item, 'url', []) + if isinstance(url, str): + source_url.append(url) + elif isinstance(url, list): + source_url.extend(url) else: - title = _get_item_value(item, 'title', 'unknown') or _get_item_value(item, 'id', 'unknown') + title = get_field(item, 'title', 'unknown') or get_field(item, 'id', 'unknown') log(f"Warning: Could not locate file for item: {title}", file=sys.stderr) if len(source_files) < 2: @@ -279,8 +274,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: if HAS_METADATA_API and write_tags_to_file: # Use unified API for file writing source_hashes_list = source_hashes if source_hashes else None - source_urls_list = source_urls if source_urls else None - write_tags_to_file(tags_path, merged_tags, source_hashes_list, source_urls_list) + source_url_list = source_url if source_url else None + write_tags_to_file(tags_path, merged_tags, source_hashes_list, source_url_list) else: # Fallback: manual file writing tags_lines = [] @@ -292,10 +287,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Add regular tags tags_lines.extend(merged_tags) - # Add known URLs - if source_urls: - for url in source_urls: - tags_lines.append(f"known_url:{url}") + # Add known url + if source_url: + for url in source_url: + tags_lines.append(f"url:{url}") # Add relationships (if available) if source_relationships: @@ -309,7 +304,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Also create .metadata file using centralized function try: - write_metadata(output_path, source_hashes[0] if source_hashes else None, source_urls, source_relationships) + write_metadata(output_path, source_hashes[0] if source_hashes else None, source_url, source_relationships) log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr) except Exception as e: log(f"Warning: Could not create metadata file: {e}", file=sys.stderr) @@ -325,12 +320,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: except ImportError: # Fallback: create a simple object with the required attributes class SimpleItem: - def __init__(self, target, title, media_kind, tags=None, known_urls=None): + def __init__(self, target, title, media_kind, tags=None, url=None): self.target = target self.title = title self.media_kind = media_kind self.tags = tags or [] - self.known_urls = known_urls or [] + self.url = url or [] self.origin = "local" # Ensure origin is set for add-file PipelineItem = SimpleItem @@ -339,7 +334,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: title=output_path.stem, media_kind=file_kind, tags=merged_tags, # Include merged tags - known_urls=source_urls # Include known URLs + url=source_url # Include known url ) # Clear previous results to ensure only the merged file is passed down ctx.clear_last_result() @@ -904,12 +899,12 @@ CMDLET = Cmdlet( name="merge-file", summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.", usage="merge-file [-delete] [-output ] [-format ]", - args=[ + arg=[ CmdletArg("-delete", type="flag", description="Delete source files after successful merge."), CmdletArg("-output", description="Override output file path."), CmdletArg("-format", description="Output format (auto/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."), ], - details=[ + detail=[ "- Pipe multiple files: search-file query | [1,2,3] | merge-file", "- Audio files merge with minimal quality loss using specified codec.", "- Video files merge into MP4 or MKV containers.", diff --git a/cmdlets/screen_shot.py b/cmdlets/screen_shot.py index 6534fe2..4526bd7 100644 --- a/cmdlets/screen_shot.py +++ b/cmdlets/screen_shot.py @@ -1,4 +1,4 @@ -"""Screen-shot cmdlet for capturing screenshots of URLs in a pipeline. +"""Screen-shot cmdlet for capturing screenshots of url in a pipeline. This cmdlet processes files through the pipeline and creates screenshots using Playwright, marking them as temporary artifacts for cleanup. @@ -23,7 +23,7 @@ from helper.http_client import HTTPClient from helper.utils import ensure_directory, unique_path, unique_preserve_order from . import register -from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input +from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input, should_show_help, get_field import models import pipeline as pipeline_context @@ -113,8 +113,8 @@ class ScreenshotError(RuntimeError): class ScreenshotOptions: """Options controlling screenshot capture and post-processing.""" - url: str output_dir: Path + url: Sequence[str] = () output_path: Optional[Path] = None full_page: bool = True headless: bool = True @@ -124,7 +124,7 @@ class ScreenshotOptions: tags: Sequence[str] = () archive: bool = False archive_timeout: float = ARCHIVE_TIMEOUT - known_urls: Sequence[str] = () + url: Sequence[str] = () output_format: Optional[str] = None prefer_platform_target: bool = False target_selectors: Optional[Sequence[str]] = None @@ -136,10 +136,9 @@ class ScreenshotResult: """Details about the captured screenshot.""" path: Path - url: str tags_applied: List[str] - archive_urls: List[str] - known_urls: List[str] + archive_url: List[str] + url: List[str] warnings: List[str] = field(default_factory=list) @@ -471,24 +470,24 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult: warnings: List[str] = [] _capture(options, destination, warnings) - known_urls = unique_preserve_order([options.url, *options.known_urls]) - archive_urls: List[str] = [] + # Build URL list from provided options.url (sequence) and deduplicate + url = unique_preserve_order(list(options.url)) + archive_url: List[str] = [] if options.archive: debug(f"[_capture_screenshot] Archiving enabled for {options.url}") archives, archive_warnings = _archive_url(options.url, options.archive_timeout) - archive_urls.extend(archives) + archive_url.extend(archives) warnings.extend(archive_warnings) if archives: - known_urls = unique_preserve_order([*known_urls, *archives]) + url = unique_preserve_order([*url, *archives]) applied_tags = unique_preserve_order(list(tag for tag in options.tags if tag.strip())) return ScreenshotResult( path=destination, - url=options.url, tags_applied=applied_tags, - archive_urls=archive_urls, - known_urls=known_urls, + archive_url=archive_url, + url=url, warnings=warnings, ) @@ -498,10 +497,10 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult: # ============================================================================ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - """Take screenshots of URLs in the pipeline. + """Take screenshots of url in the pipeline. Accepts: - - Single result object (dict or PipeObject) with 'file_path' field + - Single result object (dict or PipeObject) with 'path' field - List of result objects to screenshot each - Direct URL as string @@ -518,12 +517,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: debug(f"[_run] screen-shot invoked with args: {args}") # Help check - try: - if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - except Exception: - pass + if should_show_help(args): + log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + return 0 # ======================================================================== # ARGUMENT PARSING @@ -539,36 +535,36 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Positional URL argument (if provided) url_arg = parsed.get("url") - positional_urls = [str(url_arg)] if url_arg else [] + positional_url = [str(url_arg)] if url_arg else [] # ======================================================================== - # INPUT PROCESSING - Extract URLs from pipeline or command arguments + # INPUT PROCESSING - Extract url from pipeline or command arguments # ======================================================================== piped_results = normalize_result_input(result) - urls_to_process = [] + url_to_process = [] - # Extract URLs from piped results + # Extract url from piped results if piped_results: for item in piped_results: - url = None - if isinstance(item, dict): - url = item.get('file_path') or item.get('path') or item.get('url') or item.get('target') - else: - url = getattr(item, 'file_path', None) or getattr(item, 'path', None) or getattr(item, 'url', None) or getattr(item, 'target', None) - + url = ( + get_field(item, 'path') + or get_field(item, 'url') + or get_field(item, 'target') + ) + if url: - urls_to_process.append(str(url)) + url_to_process.append(str(url)) # Use positional arguments if no pipeline input - if not urls_to_process and positional_urls: - urls_to_process = positional_urls + if not url_to_process and positional_url: + url_to_process = positional_url - if not urls_to_process: - log(f"No URLs to process for screen-shot cmdlet", file=sys.stderr) + if not url_to_process: + log(f"No url to process for screen-shot cmdlet", file=sys.stderr) return 1 - debug(f"[_run] URLs to process: {urls_to_process}") + debug(f"[_run] url to process: {url_to_process}") # ======================================================================== # OUTPUT DIRECTORY RESOLUTION - Priority chain @@ -619,10 +615,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: all_emitted = [] exit_code = 0 # ======================================================================== - # PROCESS URLs AND CAPTURE SCREENSHOTS + # PROCESS url AND CAPTURE SCREENSHOTS # ======================================================================== - for url in urls_to_process: + for url in url_to_process: # Validate URL format if not url.lower().startswith(("http://", "https://", "file://")): log(f"[screen_shot] Skipping non-URL input: {url}", file=sys.stderr) @@ -631,7 +627,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: try: # Create screenshot with provided options options = ScreenshotOptions( - url=url, + url=[url], output_dir=screenshot_dir, output_format=format_name, archive=archive_enabled, @@ -645,8 +641,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Log results and warnings log(f"Screenshot captured to {screenshot_result.path}", flush=True) - if screenshot_result.archive_urls: - log(f"Archives: {', '.join(screenshot_result.archive_urls)}", flush=True) + if screenshot_result.archive_url: + log(f"Archives: {', '.join(screenshot_result.archive_url)}", flush=True) for warning in screenshot_result.warnings: log(f"Warning: {warning}", flush=True) @@ -670,8 +666,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: parent_hash=hashlib.sha256(url.encode()).hexdigest(), extra={ 'source_url': url, - 'archive_urls': screenshot_result.archive_urls, - 'known_urls': screenshot_result.known_urls, + 'archive_url': screenshot_result.archive_url, + 'url': screenshot_result.url, 'target': str(screenshot_result.path), # Explicit target for add-file } ) @@ -701,16 +697,16 @@ CMDLET = Cmdlet( name="screen-shot", summary="Capture a screenshot of a URL or file and mark as temporary artifact", usage="screen-shot [options] or download-data | screen-shot [options]", - aliases=["screenshot", "ss"], - args=[ + alias=["screenshot", "ss"], + arg=[ CmdletArg(name="url", type="string", required=False, description="URL to screenshot (or from pipeline)"), CmdletArg(name="format", type="string", description="Output format: png, jpeg, or pdf"), CmdletArg(name="selector", type="string", description="CSS selector for element capture"), SharedArgs.ARCHIVE, # Use shared archive argument - SharedArgs.STORAGE, # Use shared storage argument + SharedArgs.STORE, # Use shared storage argument ], - details=[ - "Take screenshots of URLs with optional archiving and element targeting.", + detail=[ + "Take screenshots of url with optional archiving and element targeting.", "Screenshots are marked as temporary artifacts for cleanup by the cleanup cmdlet.", "", "Arguments:", diff --git a/cmdlets/search_file.py b/cmdlets/search_file.py deleted file mode 100644 index e20fbcb..0000000 --- a/cmdlets/search_file.py +++ /dev/null @@ -1,531 +0,0 @@ -"""Search-file cmdlet: Search for files by query, tags, size, type, duration, etc.""" -from __future__ import annotations - -from typing import Any, Dict, Sequence, List, Optional, Tuple, Callable -from fnmatch import fnmatchcase -from pathlib import Path -from dataclasses import dataclass, field -from collections import OrderedDict -import re -import json -import os -import sys - -from helper.logger import log, debug -import shutil -import subprocess - -from helper.file_storage import FileStorage -from helper.search_provider import get_provider, list_providers, SearchResult -from metadata import import_pending_sidecars - -from . import register -from ._shared import Cmdlet, CmdletArg -import models -import pipeline as ctx - -# Optional dependencies -try: - import mutagen # type: ignore -except ImportError: # pragma: no cover - mutagen = None # type: ignore - -try: - from config import get_hydrus_url, resolve_output_dir -except Exception: # pragma: no cover - get_hydrus_url = None # type: ignore - resolve_output_dir = None # type: ignore - -try: - from helper.hydrus import HydrusClient, HydrusRequestError -except ImportError: # pragma: no cover - HydrusClient = None # type: ignore - HydrusRequestError = RuntimeError # type: ignore - -try: - from helper.utils import sha256_file -except ImportError: # pragma: no cover - sha256_file = None # type: ignore - -try: - from helper.utils_constant import mime_maps -except ImportError: # pragma: no cover - mime_maps = {} # type: ignore - - -# ============================================================================ -# Data Classes (from helper/search.py) -# ============================================================================ - -@dataclass(slots=True) -class SearchRecord: - path: str - size_bytes: int | None = None - duration_seconds: str | None = None - tags: str | None = None - hash_hex: str | None = None - - def as_dict(self) -> dict[str, str]: - payload: dict[str, str] = {"path": self.path} - if self.size_bytes is not None: - payload["size"] = str(self.size_bytes) - if self.duration_seconds: - payload["duration"] = self.duration_seconds - if self.tags: - payload["tags"] = self.tags - if self.hash_hex: - payload["hash"] = self.hash_hex - return payload - - -@dataclass -class ResultItem: - origin: str - title: str - detail: str - annotations: List[str] - target: str - media_kind: str = "other" - hash_hex: Optional[str] = None - columns: List[tuple[str, str]] = field(default_factory=list) - tag_summary: Optional[str] = None - duration_seconds: Optional[float] = None - size_bytes: Optional[int] = None - full_metadata: Optional[Dict[str, Any]] = None - tags: Optional[set[str]] = field(default_factory=set) - relationships: Optional[List[str]] = field(default_factory=list) - known_urls: Optional[List[str]] = field(default_factory=list) - - def to_dict(self) -> Dict[str, Any]: - payload: Dict[str, Any] = { - "title": self.title, - } - - # Always include these core fields for downstream cmdlets (get-file, download-data, etc) - payload["origin"] = self.origin - payload["target"] = self.target - payload["media_kind"] = self.media_kind - - # Always include full_metadata if present (needed by download-data, etc) - # This is NOT for display, but for downstream processing - if self.full_metadata: - payload["full_metadata"] = self.full_metadata - - # Include columns if defined (result renderer will use these for display) - if self.columns: - payload["columns"] = list(self.columns) - else: - # If no columns, include the detail for backwards compatibility - payload["detail"] = self.detail - payload["annotations"] = list(self.annotations) - - # Include optional fields - if self.hash_hex: - payload["hash"] = self.hash_hex - if self.tag_summary: - payload["tags"] = self.tag_summary - if self.tags: - payload["tags_set"] = list(self.tags) - if self.relationships: - payload["relationships"] = self.relationships - if self.known_urls: - payload["known_urls"] = self.known_urls - return payload - - -STORAGE_ORIGINS = {"local", "hydrus", "debrid"} - - -def _normalize_extension(ext_value: Any) -> str: - """Sanitize extension strings to alphanumerics and cap at 5 chars.""" - ext = str(ext_value or "").strip().lstrip(".") - - # Stop at common separators to avoid dragging status text into the extension - for sep in (" ", "|", "(", "[", "{", ",", ";"): - if sep in ext: - ext = ext.split(sep, 1)[0] - break - - # If there are multiple dots, take the last token as the extension - if "." in ext: - ext = ext.split(".")[-1] - - # Keep only alphanumeric characters and enforce max length - ext = "".join(ch for ch in ext if ch.isalnum()) - return ext[:5] - - -def _ensure_storage_columns(payload: Dict[str, Any]) -> Dict[str, Any]: - """Attach Title/Store columns for storage-origin results to keep CLI display compact.""" - origin_value = str(payload.get("origin") or payload.get("source") or "").lower() - if origin_value not in STORAGE_ORIGINS: - return payload - - title = payload.get("title") or payload.get("name") or payload.get("target") or payload.get("path") or "Result" - store_label = payload.get("origin") or payload.get("source") or origin_value - - # Handle extension - extension = _normalize_extension(payload.get("ext", "")) - if not extension and title: - path_obj = Path(str(title)) - if path_obj.suffix: - extension = _normalize_extension(path_obj.suffix.lstrip('.')) - title = path_obj.stem - - # Handle size as integer MB (header will include units) - size_val = payload.get("size") or payload.get("size_bytes") - size_str = "" - if size_val is not None: - try: - size_bytes = int(size_val) - size_mb = int(size_bytes / (1024 * 1024)) - size_str = str(size_mb) - except (ValueError, TypeError): - size_str = str(size_val) - - normalized = dict(payload) - normalized["columns"] = [ - ("Title", str(title)), - ("Ext", str(extension)), - ("Store", str(store_label)), - ("Size(Mb)", str(size_str)), - ] - return normalized - - -CMDLET = Cmdlet( - name="search-file", - summary="Unified search cmdlet for storage (Hydrus, Local) and providers (Debrid, LibGen, OpenLibrary, Soulseek).", - usage="search-file [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-storage BACKEND] [-provider PROVIDER]", - args=[ - CmdletArg("query", description="Search query string"), - CmdletArg("tag", description="Filter by tag (can be used multiple times)"), - CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"), - CmdletArg("type", description="Filter by type: audio, video, image, document"), - CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"), - CmdletArg("limit", type="integer", description="Limit results (default: 45)"), - CmdletArg("storage", description="Search storage backend: hydrus, local (default: all searchable storages)"), - CmdletArg("provider", description="Search provider: libgen, openlibrary, soulseek, debrid, local (overrides -storage)"), - ], - details=[ - "Search across storage (Hydrus, Local) and providers (Debrid, LibGen, OpenLibrary, Soulseek)", - "Use -provider to search a specific source, or -storage to search file backends", - "Filter results by: tag, size, type, duration", - "Results can be piped to other commands", - "Examples:", - "search-file foo # Search all file backends", - "search-file -provider libgen 'python programming' # Search LibGen books", - "search-file -provider debrid 'movie' # Search AllDebrid magnets", - "search-file 'music' -provider soulseek # Search Soulseek P2P", - "search-file -provider openlibrary 'tolkien' # Search OpenLibrary", - "search-file song -storage hydrus -type audio # Search only Hydrus audio", - "search-file movie -tag action -provider debrid # Debrid with filters", - ], -) - - -@register(["search-file", "search"]) -def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - """Search across multiple providers: Hydrus, Local, Debrid, LibGen, etc.""" - args_list = [str(arg) for arg in (args or [])] - - # Parse arguments - query = "" - tag_filters: List[str] = [] - size_filter: Optional[Tuple[str, int]] = None - duration_filter: Optional[Tuple[str, float]] = None - type_filter: Optional[str] = None - storage_backend: Optional[str] = None - provider_name: Optional[str] = None - limit = 45 - searched_backends: List[str] = [] - - # Simple argument parsing - i = 0 - while i < len(args_list): - arg = args_list[i] - low = arg.lower() - - if low in {"-provider", "--provider"} and i + 1 < len(args_list): - provider_name = args_list[i + 1].lower() - i += 2 - elif low in {"-storage", "--storage"} and i + 1 < len(args_list): - storage_backend = args_list[i + 1].lower() - i += 2 - elif low in {"-tag", "--tag"} and i + 1 < len(args_list): - tag_filters.append(args_list[i + 1]) - i += 2 - elif low in {"-limit", "--limit"} and i + 1 < len(args_list): - try: - limit = int(args_list[i + 1]) - except ValueError: - limit = 100 - i += 2 - elif low in {"-type", "--type"} and i + 1 < len(args_list): - type_filter = args_list[i + 1].lower() - i += 2 - elif not arg.startswith("-"): - if query: - query += " " + arg - else: - query = arg - i += 1 - else: - i += 1 - - # Extract store: filter tokens (works with commas or whitespace) and clean query for backends - store_filter: Optional[str] = None - if query: - match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE) - if match: - store_filter = match.group(1).strip().lower() or None - # Remove any store: tokens so downstream backends see only the actual query - query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE) - query = re.sub(r"\s{2,}", " ", query) - query = query.strip().strip(',') - - # Debrid is provider-only now - if storage_backend and storage_backend.lower() == "debrid": - log("Use -provider debrid instead of -storage debrid (debrid is provider-only)", file=sys.stderr) - return 1 - - # If store: was provided without explicit -storage/-provider, prefer that backend - if store_filter and not provider_name and not storage_backend: - if store_filter in {"hydrus", "local", "debrid"}: - storage_backend = store_filter - - # Handle piped input (e.g. from @N selection) if query is empty - if not query and result: - # If result is a list, take the first item - actual_result = result[0] if isinstance(result, list) and result else result - - # Helper to get field - def get_field(obj: Any, field: str) -> Any: - return getattr(obj, field, None) or (obj.get(field) if isinstance(obj, dict) else None) - - origin = get_field(actual_result, 'origin') - target = get_field(actual_result, 'target') - - # Special handling for Bandcamp artist/album drill-down - if origin == 'bandcamp' and target: - query = target - if not provider_name: - provider_name = 'bandcamp' - - # Generic URL handling - elif target and str(target).startswith(('http://', 'https://')): - query = target - # Try to infer provider from URL if not set - if not provider_name: - if 'bandcamp.com' in target: - provider_name = 'bandcamp' - elif 'youtube.com' in target or 'youtu.be' in target: - provider_name = 'youtube' - - if not query: - log("Provide a search query", file=sys.stderr) - return 1 - - # Initialize worker for this search command - from helper.local_library import LocalLibraryDB - from config import get_local_storage_path - import uuid - worker_id = str(uuid.uuid4()) - library_root = get_local_storage_path(config or {}) - if not library_root: - log("No library root configured", file=sys.stderr) - return 1 - - db = None - try: - db = LocalLibraryDB(library_root) - db.insert_worker( - worker_id, - "search", - title=f"Search: {query}", - description=f"Query: {query}", - pipe=ctx.get_current_command_text() - ) - - results_list = [] - import result_table - import importlib - importlib.reload(result_table) - from result_table import ResultTable - - # Create ResultTable for display - table_title = f"Search: {query}" - if provider_name: - table_title += f" [{provider_name}]" - elif storage_backend: - table_title += f" [{storage_backend}]" - - table = ResultTable(table_title) - table.set_source_command("search-file", args_list) - - # Try to search using provider (libgen, soulseek, debrid, openlibrary) - if provider_name: - debug(f"[search_file] Attempting provider search with: {provider_name}") - provider = get_provider(provider_name, config) - if not provider: - log(f"Provider '{provider_name}' not available", file=sys.stderr) - db.update_worker_status(worker_id, 'error') - return 1 - - debug(f"[search_file] Provider loaded, calling search with query: {query}") - search_result = provider.search(query, limit=limit) - debug(f"[search_file] Provider search returned {len(search_result)} results") - - for item in search_result: - # Add to table - table.add_result(item) - - # Emit to pipeline - item_dict = item.to_dict() - results_list.append(item_dict) - ctx.emit(item_dict) - - # Set the result table in context for TUI/CLI display - ctx.set_last_result_table(table, results_list) - - debug(f"[search_file] Emitted {len(results_list)} results") - - # Write results to worker stdout - db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2)) - db.update_worker_status(worker_id, 'completed') - return 0 - - # Otherwise search using storage backends (Hydrus, Local) - from helper.file_storage import FileStorage - storage = FileStorage(config=config or {}) - - backend_to_search = storage_backend or None - if backend_to_search: - # Check if requested backend is available - if backend_to_search == "hydrus": - from helper.hydrus import is_hydrus_available - if not is_hydrus_available(config or {}): - log(f"Backend 'hydrus' is not available (Hydrus service not running)", file=sys.stderr) - db.update_worker_status(worker_id, 'error') - return 1 - searched_backends.append(backend_to_search) - if not storage.supports_search(backend_to_search): - log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr) - db.update_worker_status(worker_id, 'error') - return 1 - results = storage[backend_to_search].search(query, limit=limit) - else: - # Search all searchable backends, but skip hydrus if unavailable - from helper.hydrus import is_hydrus_available - hydrus_available = is_hydrus_available(config or {}) - - all_results = [] - for backend_name in storage.list_searchable_backends(): - # Skip hydrus if not available - if backend_name == "hydrus" and not hydrus_available: - continue - searched_backends.append(backend_name) - try: - backend_results = storage[backend_name].search(query, limit=limit - len(all_results)) - if backend_results: - all_results.extend(backend_results) - if len(all_results) >= limit: - break - except Exception as exc: - log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr) - results = all_results[:limit] - - # Also query Debrid provider by default (provider-only, but keep legacy coverage when no explicit provider given) - if not provider_name and not storage_backend: - try: - debrid_provider = get_provider("debrid", config) - if debrid_provider and debrid_provider.validate(): - remaining = max(0, limit - len(results)) if isinstance(results, list) else limit - if remaining > 0: - debrid_results = debrid_provider.search(query, limit=remaining) - if debrid_results: - if "debrid" not in searched_backends: - searched_backends.append("debrid") - if results is None: - results = [] - results.extend(debrid_results) - except Exception as exc: - log(f"Debrid provider search failed: {exc}", file=sys.stderr) - - def _format_storage_label(name: str) -> str: - clean = str(name or "").strip() - if not clean: - return "Unknown" - return clean.replace("_", " ").title() - - storage_counts: OrderedDict[str, int] = OrderedDict((name, 0) for name in searched_backends) - for item in results or []: - origin = getattr(item, 'origin', None) - if origin is None and isinstance(item, dict): - origin = item.get('origin') or item.get('source') - if not origin: - continue - key = str(origin).lower() - if key not in storage_counts: - storage_counts[key] = 0 - storage_counts[key] += 1 - - if storage_counts or query: - display_counts = OrderedDict((_format_storage_label(name), count) for name, count in storage_counts.items()) - summary_line = table.set_storage_summary(display_counts, query, inline=True) - if summary_line: - table.title = summary_line - - # Emit results and collect for workers table - if results: - for item in results: - def _as_dict(obj: Any) -> Dict[str, Any]: - if isinstance(obj, dict): - return dict(obj) - if hasattr(obj, "to_dict") and callable(getattr(obj, "to_dict")): - return obj.to_dict() # type: ignore[arg-type] - return {"title": str(obj)} - - item_dict = _as_dict(item) - if store_filter: - origin_val = str(item_dict.get("origin") or item_dict.get("source") or "").lower() - if store_filter != origin_val: - continue - normalized = _ensure_storage_columns(item_dict) - # Add to table using normalized columns to avoid extra fields (e.g., Tags/Name) - table.add_result(normalized) - - results_list.append(normalized) - ctx.emit(normalized) - - # Set the result table in context for TUI/CLI display - ctx.set_last_result_table(table, results_list) - - # Write results to worker stdout - db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2)) - else: - log("No results found", file=sys.stderr) - db.append_worker_stdout(worker_id, json.dumps([], indent=2)) - - db.update_worker_status(worker_id, 'completed') - return 0 - - except Exception as exc: - log(f"Search failed: {exc}", file=sys.stderr) - import traceback - traceback.print_exc(file=sys.stderr) - if db: - try: - db.update_worker_status(worker_id, 'error') - except Exception: - pass - return 1 - - finally: - # Always close the database connection - if db: - try: - db.close() - except Exception: - pass - diff --git a/cmdlets/search_provider.py b/cmdlets/search_provider.py new file mode 100644 index 0000000..61dfc31 --- /dev/null +++ b/cmdlets/search_provider.py @@ -0,0 +1,117 @@ +"""search-provider cmdlet: Search external providers (bandcamp, libgen, soulseek, youtube).""" +from __future__ import annotations + +from typing import Any, Dict, List, Sequence +import sys + +from helper.logger import log, debug +from helper.provider import get_search_provider, list_search_providers + +from ._shared import Cmdlet, CmdletArg, should_show_help +import pipeline as ctx + + +class Search_Provider(Cmdlet): + """Search external content providers.""" + + def __init__(self): + super().__init__( + name="search-provider", + summary="Search external providers (bandcamp, libgen, soulseek, youtube)", + usage="search-provider [-limit N]", + arg=[ + CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube"), + CmdletArg("query", type="string", required=True, description="Search query (supports provider-specific syntax)"), + CmdletArg("limit", type="int", description="Maximum results to return (default: 50)"), + ], + detail=[ + "Search external content providers:", + "- bandcamp: Search for music albums/tracks", + " Example: search-provider bandcamp \"artist:altrusian grace\"", + "- libgen: Search Library Genesis for books", + " Example: search-provider libgen \"python programming\"", + "- soulseek: Search P2P network for music", + " Example: search-provider soulseek \"pink floyd\"", + "- youtube: Search YouTube for videos", + " Example: search-provider youtube \"tutorial\"", + "", + "Query syntax:", + "- bandcamp: Use 'artist:Name' to search by artist", + "- libgen: Supports isbn:, author:, title: prefixes", + "- soulseek: Plain text search", + "- youtube: Plain text search", + "", + "Results can be piped to other cmdlets:", + " search-provider bandcamp \"artist:grace\" | @1 | download-data", + ], + exec=self.run + ) + self.register() + + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Execute search-provider cmdlet.""" + if should_show_help(args): + ctx.emit(self.__dict__) + return 0 + + # Parse arguments + if len(args) < 2: + log("Error: search-provider requires and arguments", file=sys.stderr) + log(f"Usage: {self.usage}", file=sys.stderr) + log("Available providers:", file=sys.stderr) + providers = list_search_providers(config) + for name, available in sorted(providers.items()): + status = "✓" if available else "✗" + log(f" {status} {name}", file=sys.stderr) + return 1 + + provider_name = args[0] + query = args[1] + + # Parse optional limit + limit = 50 + if len(args) >= 4 and args[2] in ("-limit", "--limit"): + try: + limit = int(args[3]) + except ValueError: + log(f"Warning: Invalid limit value '{args[3]}', using default 50", file=sys.stderr) + + debug(f"[search-provider] provider={provider_name}, query={query}, limit={limit}") + + # Get provider + provider = get_search_provider(provider_name, config) + if not provider: + log(f"Error: Provider '{provider_name}' is not available", file=sys.stderr) + log("Available providers:", file=sys.stderr) + providers = list_search_providers(config) + for name, available in sorted(providers.items()): + if available: + log(f" - {name}", file=sys.stderr) + return 1 + + # Execute search + try: + debug(f"[search-provider] Calling {provider_name}.search()") + results = provider.search(query, limit=limit) + debug(f"[search-provider] Got {len(results)} results") + + if not results: + log(f"No results found for query: {query}", file=sys.stderr) + return 0 + + # Emit results for pipeline + for search_result in results: + ctx.emit(search_result.to_dict()) + + log(f"Found {len(results)} result(s) from {provider_name}", file=sys.stderr) + return 0 + + except Exception as e: + log(f"Error searching {provider_name}: {e}", file=sys.stderr) + import traceback + debug(traceback.format_exc()) + return 1 + + +# Register cmdlet instance +Search_Provider_Instance = Search_Provider() diff --git a/cmdlets/search_store.py b/cmdlets/search_store.py new file mode 100644 index 0000000..8c7efbc --- /dev/null +++ b/cmdlets/search_store.py @@ -0,0 +1,341 @@ +"""Search-store cmdlet: Search for files in storage backends (Folder, Hydrus).""" +from __future__ import annotations + +from typing import Any, Dict, Sequence, List, Optional, Tuple +from pathlib import Path +from dataclasses import dataclass, field +from collections import OrderedDict +import re +import json +import sys + +from helper.logger import log, debug + +from ._shared import Cmdlet, CmdletArg, get_origin, get_field, should_show_help +import pipeline as ctx + +# Optional dependencies +try: + import mutagen # type: ignore +except ImportError: # pragma: no cover + mutagen = None # type: ignore + +try: + from config import get_hydrus_url, resolve_output_dir +except Exception: # pragma: no cover + get_hydrus_url = None # type: ignore + resolve_output_dir = None # type: ignore + +try: + from helper.hydrus import HydrusClient, HydrusRequestError +except ImportError: # pragma: no cover + HydrusClient = None # type: ignore + HydrusRequestError = RuntimeError # type: ignore + +try: + from helper.utils import sha256_file +except ImportError: # pragma: no cover + sha256_file = None # type: ignore + +try: + from helper.utils_constant import mime_maps +except ImportError: # pragma: no cover + mime_maps = {} # type: ignore + +@dataclass(slots=True) +class SearchRecord: + path: str + size_bytes: int | None = None + duration_seconds: str | None = None + tags: str | None = None + hash_hex: str | None = None + + def as_dict(self) -> dict[str, str]: + payload: dict[str, str] = {"path": self.path} + if self.size_bytes is not None: + payload["size"] = str(self.size_bytes) + if self.duration_seconds: + payload["duration"] = self.duration_seconds + if self.tags: + payload["tags"] = self.tags + if self.hash_hex: + payload["hash"] = self.hash_hex + return payload + + +STORAGE_ORIGINS = {"local", "hydrus", "folder"} + + +class Search_Store(Cmdlet): + """Class-based search-store cmdlet for searching storage backends.""" + + def __init__(self) -> None: + super().__init__( + name="search-store", + summary="Search storage backends (Folder, Hydrus) for files.", + usage="search-store [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-store BACKEND]", + arg=[ + CmdletArg("query", description="Search query string"), + CmdletArg("tag", description="Filter by tag (can be used multiple times)"), + CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"), + CmdletArg("type", description="Filter by type: audio, video, image, document"), + CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"), + CmdletArg("limit", type="integer", description="Limit results (default: 100)"), + CmdletArg("store", description="Search specific storage backend (e.g., 'home', 'test', or 'default')"), + ], + detail=[ + "Search across storage backends: Folder stores and Hydrus instances", + "Use -store to search a specific backend by name", + "Filter results by: tag, size, type, duration", + "Results include hash for downstream commands (get-file, add-tag, etc.)", + "Examples:", + "search-store foo # Search all storage backends", + "search-store -store home '*' # Search 'home' Hydrus instance", + "search-store -store test 'video' # Search 'test' folder store", + "search-store song -type audio # Search for audio files", + "search-store movie -tag action # Search with tag filter", + ], + exec=self.run, + ) + self.register() + + # --- Helper methods ------------------------------------------------- + @staticmethod + def _normalize_extension(ext_value: Any) -> str: + """Sanitize extension strings to alphanumerics and cap at 5 chars.""" + ext = str(ext_value or "").strip().lstrip(".") + for sep in (" ", "|", "(", "[", "{", ",", ";"): + if sep in ext: + ext = ext.split(sep, 1)[0] + break + if "." in ext: + ext = ext.split(".")[-1] + ext = "".join(ch for ch in ext if ch.isalnum()) + return ext[:5] + + def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]: + """Ensure storage results have the necessary fields for result_table display.""" + store_value = str(get_origin(payload, "") or "").lower() + if store_value not in STORAGE_ORIGINS: + return payload + + # Ensure we have title field + if "title" not in payload: + payload["title"] = payload.get("name") or payload.get("target") or payload.get("path") or "Result" + + # Ensure we have ext field + if "ext" not in payload: + title = str(payload.get("title", "")) + path_obj = Path(title) + if path_obj.suffix: + payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip('.')) + else: + payload["ext"] = payload.get("ext", "") + + # Ensure size_bytes is present for display (already set by search_file()) + # result_table will handle formatting it + + # Don't create manual columns - let result_table handle display + # This allows the table to respect max_columns and apply consistent formatting + return payload + + # --- Execution ------------------------------------------------------ + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Search storage backends for files.""" + if should_show_help(args): + log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}") + return 0 + + args_list = [str(arg) for arg in (args or [])] + + # Parse arguments + query = "" + tag_filters: List[str] = [] + size_filter: Optional[Tuple[str, int]] = None + duration_filter: Optional[Tuple[str, float]] = None + type_filter: Optional[str] = None + storage_backend: Optional[str] = None + limit = 100 + searched_backends: List[str] = [] + + i = 0 + while i < len(args_list): + arg = args_list[i] + low = arg.lower() + if low in {"-store", "--store", "-storage", "--storage"} and i + 1 < len(args_list): + storage_backend = args_list[i + 1] + i += 2 + elif low in {"-tag", "--tag"} and i + 1 < len(args_list): + tag_filters.append(args_list[i + 1]) + i += 2 + elif low in {"-limit", "--limit"} and i + 1 < len(args_list): + try: + limit = int(args_list[i + 1]) + except ValueError: + limit = 100 + i += 2 + elif low in {"-type", "--type"} and i + 1 < len(args_list): + type_filter = args_list[i + 1].lower() + i += 2 + elif not arg.startswith("-"): + query = f"{query} {arg}".strip() if query else arg + i += 1 + else: + i += 1 + + store_filter: Optional[str] = None + if query: + match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE) + if match: + store_filter = match.group(1).strip() or None + query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE) + query = re.sub(r"\s{2,}", " ", query) + query = query.strip().strip(',') + + if store_filter and not storage_backend: + storage_backend = store_filter + + if not query: + log("Provide a search query", file=sys.stderr) + return 1 + + from helper.folder_store import FolderDB + from config import get_local_storage_path + import uuid + worker_id = str(uuid.uuid4()) + library_root = get_local_storage_path(config or {}) + if not library_root: + log("No library root configured", file=sys.stderr) + return 1 + + # Use context manager to ensure database is always closed + with FolderDB(library_root) as db: + try: + db.insert_worker( + worker_id, + "search-store", + title=f"Search: {query}", + description=f"Query: {query}", + pipe=ctx.get_current_command_text() + ) + + results_list = [] + import result_table + import importlib + importlib.reload(result_table) + from result_table import ResultTable + + table_title = f"Search: {query}" + if storage_backend: + table_title += f" [{storage_backend}]" + + table = ResultTable(table_title) + + from helper.store import FileStorage + storage = FileStorage(config=config or {}) + + backend_to_search = storage_backend or None + if backend_to_search: + searched_backends.append(backend_to_search) + target_backend = storage[backend_to_search] + if not callable(getattr(target_backend, 'search_file', None)): + log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr) + db.update_worker_status(worker_id, 'error') + return 1 + results = target_backend.search_file(query, limit=limit) + else: + from helper.hydrus import is_hydrus_available + hydrus_available = is_hydrus_available(config or {}) + + all_results = [] + for backend_name in storage.list_searchable_backends(): + if backend_name.startswith("hydrus") and not hydrus_available: + continue + searched_backends.append(backend_name) + try: + backend_results = storage[backend_name].search_file(query, limit=limit - len(all_results)) + if backend_results: + all_results.extend(backend_results) + if len(all_results) >= limit: + break + except Exception as exc: + log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr) + results = all_results[:limit] + + def _format_storage_label(name: str) -> str: + clean = str(name or "").strip() + if not clean: + return "Unknown" + return clean.replace("_", " ").title() + + storage_counts: OrderedDict[str, int] = OrderedDict((name, 0) for name in searched_backends) + for item in results or []: + origin = get_origin(item) + if not origin: + continue + key = str(origin).lower() + if key not in storage_counts: + storage_counts[key] = 0 + storage_counts[key] += 1 + + if storage_counts or query: + display_counts = OrderedDict((_format_storage_label(name), count) for name, count in storage_counts.items()) + summary_line = table.set_storage_summary(display_counts, query, inline=True) + if summary_line: + table.title = summary_line + + if results: + for item in results: + def _as_dict(obj: Any) -> Dict[str, Any]: + if isinstance(obj, dict): + return dict(obj) + if hasattr(obj, "to_dict") and callable(getattr(obj, "to_dict")): + return obj.to_dict() # type: ignore[arg-type] + return {"title": str(obj)} + + item_dict = _as_dict(item) + if store_filter: + origin_val = str(get_origin(item_dict) or "").lower() + if store_filter != origin_val: + continue + normalized = self._ensure_storage_columns(item_dict) + + # Make hash/store available for downstream cmdlets without rerunning search + hash_val = normalized.get("hash") + store_val = normalized.get("store") or get_origin(item_dict) + if hash_val and not normalized.get("hash"): + normalized["hash"] = hash_val + if store_val and not normalized.get("store"): + normalized["store"] = store_val + + table.add_result(normalized) + + results_list.append(normalized) + ctx.emit(normalized) + + # Debug: Verify table rows match items list + debug(f"[search-store] Added {len(table.rows)} rows to table, {len(results_list)} items to results_list") + if len(table.rows) != len(results_list): + debug(f"[search-store] WARNING: Table/items mismatch! rows={len(table.rows)} items={len(results_list)}", file=sys.stderr) + + ctx.set_last_result_table(table, results_list) + db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2)) + else: + log("No results found", file=sys.stderr) + db.append_worker_stdout(worker_id, json.dumps([], indent=2)) + + db.update_worker_status(worker_id, 'completed') + return 0 + + except Exception as exc: + log(f"Search failed: {exc}", file=sys.stderr) + import traceback + traceback.print_exc(file=sys.stderr) + try: + db.update_worker_status(worker_id, 'error') + except Exception: + pass + return 1 + + +CMDLET = Search_Store() diff --git a/cmdlets/trim_file.py b/cmdlets/trim_file.py index 912b406..569004d 100644 --- a/cmdlets/trim_file.py +++ b/cmdlets/trim_file.py @@ -26,12 +26,12 @@ CMDLET = Cmdlet( name="trim-file", summary="Trim a media file using ffmpeg.", usage="trim-file [-path ] -range [-delete]", - args=[ + arg=[ CmdletArg("-path", description="Path to the file (optional if piped)."), CmdletArg("-range", required=True, description="Time range to trim (e.g. '3:45-3:55' or '00:03:45-00:03:55')."), CmdletArg("-delete", type="flag", description="Delete the original file after trimming."), ], - details=[ + detail=[ "Creates a new file with 'clip_' prefix in the filename/title.", "Inherits tags from the source file.", "Adds a relationship to the source file (if hash is available).", @@ -133,7 +133,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # If path arg provided, add it to inputs if path_arg: - inputs.append({"file_path": path_arg}) + inputs.append({"path": path_arg}) if not inputs: log("No input files provided.", file=sys.stderr) @@ -145,9 +145,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Resolve file path file_path = None if isinstance(item, dict): - file_path = item.get("file_path") or item.get("path") or item.get("target") - elif hasattr(item, "file_path"): - file_path = item.file_path + file_path = item.get("path") or item.get("target") + elif hasattr(item, "path"): + file_path = item.path elif isinstance(item, str): file_path = item @@ -175,9 +175,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # 1. Get source hash for relationship source_hash = None if isinstance(item, dict): - source_hash = item.get("hash") or item.get("file_hash") - elif hasattr(item, "file_hash"): - source_hash = item.file_hash + source_hash = item.get("hash") + elif hasattr(item, "hash"): + source_hash = item.hash if not source_hash: try: @@ -219,18 +219,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Update original file in local DB if possible try: from config import get_local_storage_path - from helper.local_library import LocalLibraryDB + from helper.folder_store import FolderDB storage_path = get_local_storage_path(config) if storage_path: - with LocalLibraryDB(storage_path) as db: + with FolderDB(storage_path) as db: # Get original file metadata # We need to find the original file by hash or path # Try path first orig_meta = db.get_metadata(path_obj) if not orig_meta and source_hash: # Try by hash - orig_path_resolved = db.search_by_hash(source_hash) + orig_path_resolved = db.search_hash(source_hash) if orig_path_resolved: orig_meta = db.get_metadata(orig_path_resolved) @@ -256,7 +256,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: orig_meta["hash"] = source_hash # We need the path to save - save_path = Path(orig_meta.get("file_path") or path_obj) + save_path = Path(orig_meta.get("path") or path_obj) db.save_metadata(save_path, orig_meta) log(f"Updated relationship for original file: {save_path.name}", file=sys.stderr) except Exception as e: @@ -264,7 +264,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # 5. Construct result result_dict = { - "file_path": str(output_path), "path": str(output_path), "title": new_title, "tags": new_tags, diff --git a/cmdnats/adjective.py b/cmdnats/adjective.py index 177e3f9..53a5edf 100644 --- a/cmdnats/adjective.py +++ b/cmdnats/adjective.py @@ -135,10 +135,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: CMDLET = Cmdlet( name=".adjective", - aliases=["adj"], + alias=["adj"], summary="Manage adjective categories and tags", usage=".adjective [category] [-add tag] [-delete tag]", - args=[ + arg=[ CmdletArg(name="category", type="string", description="Category name", required=False), CmdletArg(name="tag", type="string", description="Tag name", required=False), CmdletArg(name="add", type="flag", description="Add tag"), diff --git a/cmdnats/help.py b/cmdnats/help.py new file mode 100644 index 0000000..106ec29 --- /dev/null +++ b/cmdnats/help.py @@ -0,0 +1,183 @@ +from __future__ import annotations + +from typing import Any, Dict, Sequence, List, Optional +import shlex +import sys + +from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args +from helper.logger import log +from result_table import ResultTable +import pipeline as ctx + + +def _normalize_choice_list(arg_names: Optional[List[str]]) -> List[str]: + return sorted(set(arg_names or [])) + + +def _examples_for_cmd(name: str) -> List[str]: + """Return example invocations for a given command (best-effort).""" + lookup = { + ".adjective": [ + '.adjective -add "example"', + '.adjective -delete "example"', + ], + } + + key = name.replace("_", "-").lower() + return lookup.get(key, []) + + +def _find_cmd_metadata(name: str, metadata: Dict[str, Dict[str, Any]]) -> Optional[Dict[str, Any]]: + target = name.replace("_", "-").lower() + for cmd_name, meta in metadata.items(): + if target == cmd_name: + return meta + aliases = meta.get("aliases", []) or [] + if target in aliases: + return meta + return None + + +def _render_list(metadata: Dict[str, Dict[str, Any]], filter_text: Optional[str], args: Sequence[str]) -> None: + table = ResultTable("Help") + table.set_source_command(".help", list(args)) + + items: List[Dict[str, Any]] = [] + needle = (filter_text or "").lower().strip() + + for name in sorted(metadata.keys()): + meta = metadata[name] + summary = meta.get("summary", "") or "" + if needle and needle not in name.lower() and needle not in summary.lower(): + continue + + row = table.add_row() + row.add_column("Cmd", name) + aliases = ", ".join(meta.get("aliases", []) or []) + row.add_column("Aliases", aliases) + arg_names = [a.get("name") for a in meta.get("args", []) if a.get("name")] + row.add_column("Args", ", ".join(f"-{a}" for a in arg_names)) + table.set_row_selection_args(len(table.rows) - 1, ["-cmd", name]) + items.append(meta) + + ctx.set_last_result_table(table, items) + ctx.set_current_stage_table(table) + print(table) + + +def _render_detail(meta: Dict[str, Any], args: Sequence[str]) -> None: + title = f"Help: {meta.get('name', '') or 'cmd'}" + table = ResultTable(title) + table.set_source_command(".help", list(args)) + + header_lines: List[str] = [] + summary = meta.get("summary", "") + usage = meta.get("usage", "") + aliases = meta.get("aliases", []) or [] + examples = _examples_for_cmd(meta.get("name", "")) + first_example_tokens: List[str] = [] + first_example_cmd: Optional[str] = None + if examples: + try: + split_tokens = shlex.split(examples[0]) + if split_tokens: + first_example_cmd = split_tokens[0] + first_example_tokens = split_tokens[1:] + except Exception: + pass + + if summary: + header_lines.append(summary) + if usage: + header_lines.append(f"Usage: {usage}") + if aliases: + header_lines.append("Aliases: " + ", ".join(aliases)) + if examples: + header_lines.append("Examples: " + " | ".join(examples)) + if header_lines: + table.set_header_lines(header_lines) + + args_meta = meta.get("args", []) or [] + example_text = " | ".join(examples) + # If we have an example, use it as the source command so @N runs that example + if first_example_cmd: + table.set_source_command(first_example_cmd, []) + if not args_meta: + row = table.add_row() + row.add_column("Arg", "(none)") + row.add_column("Type", "") + row.add_column("Req", "") + row.add_column("Description", "") + row.add_column("Example", example_text) + if first_example_tokens: + table.set_row_selection_args(len(table.rows) - 1, first_example_tokens) + else: + for arg in args_meta: + row = table.add_row() + name = arg.get("name") or "" + row.add_column("Arg", f"-{name}" if name else "") + row.add_column("Type", arg.get("type", "")) + row.add_column("Req", "yes" if arg.get("required") else "") + desc = arg.get("description", "") or "" + choices = arg.get("choices", []) or [] + if choices: + choice_text = f"choices: {', '.join(choices)}" + desc = f"{desc} ({choice_text})" if desc else choice_text + row.add_column("Description", desc) + row.add_column("Example", example_text) + if first_example_tokens: + table.set_row_selection_args(len(table.rows) - 1, first_example_tokens) + + ctx.set_last_result_table_overlay(table, [meta]) + ctx.set_current_stage_table(table) + print(table) + + +def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + try: + from helper import cmdlet_catalog as _catalog + + CMDLET.arg[0].choices = _normalize_choice_list(_catalog.list_cmdlet_names()) + metadata = _catalog.list_cmdlet_metadata() + except Exception: + CMDLET.arg[0].choices = [] + metadata = {} + + parsed = parse_cmdlet_args(args, CMDLET) + + filter_text = parsed.get("filter") + cmd_arg = parsed.get("cmd") + + if cmd_arg: + target_meta = _find_cmd_metadata(str(cmd_arg), metadata) + if not target_meta: + log(f"Unknown command: {cmd_arg}", file=sys.stderr) + return 1 + _render_detail(target_meta, args) + return 0 + + _render_list(metadata, filter_text, args) + return 0 + + +CMDLET = Cmdlet( + name=".help", + alias=["help", "?"], + summary="Show cmdlets or detailed help", + usage=".help [cmd] [-filter text]", + arg=[ + CmdletArg( + name="cmd", + type="string", + description="Cmdlet name to show detailed help", + required=False, + choices=[], + ), + CmdletArg( + name="-filter", + type="string", + description="Filter cmdlets by substring", + required=False, + ), + ], +) diff --git a/cmdnats/matrix.py b/cmdnats/matrix.py index 3f3e3a7..4701d76 100644 --- a/cmdnats/matrix.py +++ b/cmdnats/matrix.py @@ -3,95 +3,22 @@ import sys from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args from helper.logger import log, debug from result_table import ResultTable -from helper.file_storage import MatrixStorageBackend +# REFACTOR: Commenting out Matrix import until provider refactor is complete +# from helper.store import MatrixStorageBackend from config import save_config, load_config import pipeline as ctx def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - parsed = parse_cmdlet_args(args, CMDLET) - - # Initialize backend - backend = MatrixStorageBackend() - - # Get current default room - matrix_conf = config.get('storage', {}).get('matrix', {}) - current_room_id = matrix_conf.get('room_id') - - # Fetch rooms - debug("Fetching joined rooms from Matrix...") - rooms = backend.list_rooms(config) - - if not rooms: - debug("No joined rooms found or Matrix not configured.") - return 1 - - # Handle selection if provided - selection = parsed.get("selection") - if selection: - new_room_id = None - selected_room_name = None - - # Try as index (1-based) - try: - idx = int(selection) - 1 - if 0 <= idx < len(rooms): - selected_room = rooms[idx] - new_room_id = selected_room['id'] - selected_room_name = selected_room['name'] - except ValueError: - # Try as Room ID - for room in rooms: - if room['id'] == selection: - new_room_id = selection - selected_room_name = room['name'] - break - - if new_room_id: - # Update config - # Load fresh config from disk to avoid saving runtime objects (like WorkerManager) - disk_config = load_config() - - if 'storage' not in disk_config: disk_config['storage'] = {} - if 'matrix' not in disk_config['storage']: disk_config['storage']['matrix'] = {} - - disk_config['storage']['matrix']['room_id'] = new_room_id - save_config(disk_config) - - debug(f"Default Matrix room set to: {selected_room_name} ({new_room_id})") - current_room_id = new_room_id - else: - debug(f"Invalid selection: {selection}") - return 1 - - # Display table - table = ResultTable("Matrix Rooms") - for i, room in enumerate(rooms): - is_default = (room['id'] == current_room_id) - - row = table.add_row() - row.add_column("Default", "*" if is_default else "") - row.add_column("Name", room['name']) - row.add_column("ID", room['id']) - - # Set selection args so user can type @N to select - # This will run .matrix N - table.set_row_selection_args(i, [str(i + 1)]) - - table.set_source_command(".matrix") - - # Register results - ctx.set_last_result_table_overlay(table, rooms) - ctx.set_current_stage_table(table) - - print(table) - return 0 + # REFACTOR: Matrix cmdlet temporarily disabled during storage provider refactor + log("⚠️ Matrix cmdlet is temporarily disabled during refactor", file=sys.stderr) + return 1 CMDLET = Cmdlet( name=".matrix", - aliases=["matrix", "rooms"], + alias=["matrix", "rooms"], summary="List and select default Matrix room", usage=".matrix [selection]", - args=[ + arg=[ CmdletArg( name="selection", type="string", diff --git a/cmdnats/pipe.py b/cmdnats/pipe.py index b9a6cfc..4ac57a6 100644 --- a/cmdnats/pipe.py +++ b/cmdnats/pipe.py @@ -14,7 +14,7 @@ from helper.mpv_ipc import get_ipc_pipe_path, MPVIPCClient import pipeline as ctx from helper.download import is_url_supported_by_ytdlp -from helper.local_library import LocalLibrarySearchOptimizer +from helper.folder_store import LocalLibrarySearchOptimizer from config import get_local_storage_path, get_hydrus_access_key, get_hydrus_url from hydrus_health_check import get_cookies_file_path @@ -35,6 +35,20 @@ def _send_ipc_command(command: Dict[str, Any], silent: bool = False) -> Optional debug(f"IPC Error: {e}", file=sys.stderr) return None + +def _is_mpv_running() -> bool: + """Check if MPV is currently running and accessible via IPC.""" + try: + ipc_pipe = get_ipc_pipe_path() + client = MPVIPCClient(socket_path=ipc_pipe) + if client.connect(): + client.disconnect() + return True + return False + except Exception: + return False + + def _get_playlist(silent: bool = False) -> Optional[List[Dict[str, Any]]]: """Get the current playlist from MPV. Returns None if MPV is not running.""" cmd = {"command": ["get_property", "playlist"], "request_id": 100} @@ -87,8 +101,75 @@ def _extract_target_from_memory_uri(text: str) -> Optional[str]: return None -def _normalize_playlist_target(text: Optional[str]) -> Optional[str]: - """Normalize playlist entry targets for dedupe comparisons.""" +def _find_hydrus_instance_for_hash(hash_str: str, file_storage: Any) -> Optional[str]: + """Find which Hydrus instance serves a specific file hash. + + Args: + hash_str: SHA256 hash (64 hex chars) + file_storage: FileStorage instance with Hydrus backends + + Returns: + Instance name (e.g., 'home') or None if not found + """ + # Query each Hydrus backend to see if it has this file + for backend_name in file_storage.list_backends(): + backend = file_storage[backend_name] + # Check if this is a Hydrus backend by checking class name + backend_class = type(backend).__name__ + if backend_class != "HydrusNetwork": + continue + + try: + # Query metadata to see if this instance has the file + metadata = backend.get_metadata(hash_str) + if metadata: + return backend_name + except Exception: + # This instance doesn't have the file or had an error + continue + + return None + + +def _find_hydrus_instance_by_url(url: str, file_storage: Any) -> Optional[str]: + """Find which Hydrus instance matches a given URL. + + Args: + url: Full URL (e.g., http://localhost:45869/get_files/file?hash=...) + file_storage: FileStorage instance with Hydrus backends + + Returns: + Instance name (e.g., 'home') or None if not found + """ + from urllib.parse import urlparse + + parsed_target = urlparse(url) + target_netloc = parsed_target.netloc.lower() + + # Check each Hydrus backend's URL + for backend_name in file_storage.list_backends(): + backend = file_storage[backend_name] + backend_class = type(backend).__name__ + if backend_class != "HydrusNetwork": + continue + + # Get the backend's base URL from its client + try: + backend_url = backend._client.base_url + parsed_backend = urlparse(backend_url) + backend_netloc = parsed_backend.netloc.lower() + + # Match by netloc (host:port) + if target_netloc == backend_netloc: + return backend_name + except Exception: + continue + + return None + + +def _normalize_playlist_path(text: Optional[str]) -> Optional[str]: + """Normalize playlist entry paths for dedupe comparisons.""" if not text: return None real = _extract_target_from_memory_uri(text) or text @@ -118,8 +199,16 @@ def _normalize_playlist_target(text: Optional[str]) -> Optional[str]: return real.lower() -def _infer_store_from_playlist_item(item: Dict[str, Any]) -> str: - """Infer a friendly store label from an MPV playlist entry.""" +def _infer_store_from_playlist_item(item: Dict[str, Any], file_storage: Optional[Any] = None) -> str: + """Infer a friendly store label from an MPV playlist entry. + + Args: + item: MPV playlist item dict + file_storage: Optional FileStorage instance for querying specific backend instances + + Returns: + Store label (e.g., 'home', 'work', 'local', 'youtube', etc.) + """ name = item.get("filename") if isinstance(item, dict) else None target = str(name or "") @@ -130,19 +219,33 @@ def _infer_store_from_playlist_item(item: Dict[str, Any]) -> str: # Hydrus hashes: bare 64-hex entries if re.fullmatch(r"[0-9a-f]{64}", target.lower()): + # If we have file_storage, query each Hydrus instance to find which one has this hash + if file_storage: + hash_str = target.lower() + hydrus_instance = _find_hydrus_instance_for_hash(hash_str, file_storage) + if hydrus_instance: + return hydrus_instance return "hydrus" lower = target.lower() if lower.startswith("magnet:"): return "magnet" if lower.startswith("hydrus://"): + # Extract hash from hydrus:// URL if possible + if file_storage: + hash_match = re.search(r"[0-9a-f]{64}", target.lower()) + if hash_match: + hash_str = hash_match.group(0) + hydrus_instance = _find_hydrus_instance_for_hash(hash_str, file_storage) + if hydrus_instance: + return hydrus_instance return "hydrus" # Windows / UNC paths if re.match(r"^[a-z]:[\\/]", target, flags=re.IGNORECASE) or target.startswith("\\\\"): return "local" - # file:// URLs + # file:// url if lower.startswith("file://"): return "local" @@ -162,9 +265,33 @@ def _infer_store_from_playlist_item(item: Dict[str, Any]) -> str: return "soundcloud" if "bandcamp" in host_stripped: return "bandcamp" - if "get_files" in path or host_stripped in {"127.0.0.1", "localhost"}: + if "get_files" in path or "file?hash=" in path or host_stripped in {"127.0.0.1", "localhost"}: + # Hydrus API URL - try to extract hash and find instance + if file_storage: + # Try to extract hash from URL parameters + hash_match = re.search(r"hash=([0-9a-f]{64})", target.lower()) + if hash_match: + hash_str = hash_match.group(1) + hydrus_instance = _find_hydrus_instance_for_hash(hash_str, file_storage) + if hydrus_instance: + return hydrus_instance + # If no hash in URL, try matching the base URL to configured instances + hydrus_instance = _find_hydrus_instance_by_url(target, file_storage) + if hydrus_instance: + return hydrus_instance return "hydrus" if re.match(r"^\d+\.\d+\.\d+\.\d+$", host_stripped) and "get_files" in path: + # IP-based Hydrus URL + if file_storage: + hash_match = re.search(r"hash=([0-9a-f]{64})", target.lower()) + if hash_match: + hash_str = hash_match.group(1) + hydrus_instance = _find_hydrus_instance_for_hash(hash_str, file_storage) + if hydrus_instance: + return hydrus_instance + hydrus_instance = _find_hydrus_instance_by_url(target, file_storage) + if hydrus_instance: + return hydrus_instance return "hydrus" parts = host_stripped.split('.') @@ -231,15 +358,15 @@ def _build_ytdl_options(config: Optional[Dict[str, Any]], hydrus_header: Optiona return ",".join(opts) if opts else None -def _is_hydrus_target(target: str, hydrus_url: Optional[str]) -> bool: - if not target: +def _is_hydrus_path(path: str, hydrus_url: Optional[str]) -> bool: + if not path: return False - lower = target.lower() + lower = path.lower() if "hydrus://" in lower: return True - parsed = urlparse(target) + parsed = urlparse(path) host = (parsed.netloc or "").lower() - path = parsed.path or "" + path_part = parsed.path or "" if hydrus_url: try: hydrus_host = urlparse(hydrus_url).netloc.lower() @@ -247,9 +374,9 @@ def _is_hydrus_target(target: str, hydrus_url: Optional[str]) -> bool: return True except Exception: pass - if "get_files" in path or "file?hash=" in path: + if "get_files" in path_part or "file?hash=" in path_part: return True - if re.match(r"^\d+\.\d+\.\d+\.\d+$", host) and "get_files" in path: + if re.match(r"^\d+\.\d+\.\d+\.\d+$", host) and "get_files" in path_part: return True return False @@ -313,6 +440,113 @@ def _monitor_mpv_logs(duration: float = 3.0) -> None: client.disconnect() except Exception: pass +def _get_playable_path(item: Any, file_storage: Optional[Any], config: Optional[Dict[str, Any]]) -> Optional[tuple[str, Optional[str]]]: + """Extract a playable path/URL from an item, handling different store types. + + Args: + item: Item to extract path from (dict, PipeObject, or string) + file_storage: FileStorage instance for querying backends + config: Config dict for Hydrus URL + + Returns: + Tuple of (path, title) or None if no valid path found + """ + path = None + title = None + store = None + file_hash = None + + # Extract fields from item - prefer a disk path ('path'), but accept 'url' as fallback for providers + if isinstance(item, dict): + # Support both canonical 'path' and legacy 'file_path' keys, and provider 'url' keys + path = item.get("path") or item.get("file_path") + # Fallbacks for provider-style entries where URL is stored in 'url' or 'source_url' or 'target' + if not path: + path = item.get("url") or item.get("source_url") or item.get("target") + if not path: + known = item.get("url") or item.get("url") or [] + if known and isinstance(known, list): + path = known[0] + title = item.get("title") or item.get("file_title") + store = item.get("store") or item.get("storage") or item.get("storage_source") or item.get("origin") + file_hash = item.get("hash") or item.get("file_hash") or item.get("hash_hex") + elif hasattr(item, "path") or hasattr(item, "url") or hasattr(item, "source_url") or hasattr(item, "store") or hasattr(item, "hash"): + # Handle PipeObject / dataclass objects - prefer path, but fall back to url/source_url attributes + path = getattr(item, "path", None) or getattr(item, "file_path", None) + if not path: + path = getattr(item, "url", None) or getattr(item, "source_url", None) or getattr(item, "target", None) + if not path: + known = getattr(item, "url", None) or (getattr(item, "extra", None) or {}).get("url") + if known and isinstance(known, list): + path = known[0] + title = getattr(item, "title", None) or getattr(item, "file_title", None) + store = getattr(item, "store", None) or getattr(item, "origin", None) + file_hash = getattr(item, "hash", None) + elif isinstance(item, str): + path = item + + # Debug: show incoming values + try: + debug(f"_get_playable_path: store={store}, path={path}, hash={file_hash}") + except Exception: + pass + + if not path: + return None + + # If we have a store and hash, use store's .pipe() method if available + # Skip this for URL-based providers (YouTube, SoundCloud, etc.) which have hash="unknown" + # Also skip if path is already a URL (http/https) + if store and file_hash and file_hash != "unknown" and file_storage: + # Check if this is actually a URL - if so, just return it + if path.startswith(("http://", "https://")): + return (path, title) + + try: + backend = file_storage[store] + # Check if backend has a .pipe() method + if hasattr(backend, 'pipe') and callable(backend.pipe): + pipe_path = backend.pipe(file_hash, config) + if pipe_path: + path = pipe_path + debug(f"Got pipe path from {store} backend: {path}") + except KeyError: + # Store not found in file_storage - it could be a search provider (youtube, bandcamp, etc.) + from helper.provider import get_search_provider + try: + provider = get_search_provider(store, config or {}) + if provider and hasattr(provider, 'pipe') and callable(provider.pipe): + try: + debug(f"Calling provider.pipe for '{store}' with path: {path}") + provider_path = provider.pipe(path, config or {}) + debug(f"provider.pipe returned: {provider_path}") + if provider_path: + path = provider_path + debug(f"Got pipe path from provider '{store}': {path}") + except Exception as e: + debug(f"Error in provider.pipe for '{store}': {e}", file=sys.stderr) + except Exception as e: + debug(f"Error calling provider.pipe for '{store}': {e}", file=sys.stderr) + except Exception as e: + debug(f"Error calling .pipe() on store '{store}': {e}", file=sys.stderr) + + # As a fallback, if a provider exists for this store (e.g., youtube) and + # this store is not part of FileStorage backends, call provider.pipe() + if store and (not file_storage or store not in (file_storage.list_backends() if file_storage else [])): + try: + from helper.provider import get_search_provider + provider = get_search_provider(store, config or {}) + if provider and hasattr(provider, 'pipe') and callable(provider.pipe): + provider_path = provider.pipe(path, config or {}) + if provider_path: + path = provider_path + debug(f"Got pipe path from provider '{store}' (fallback): {path}") + except Exception as e: + debug(f"Error calling provider.pipe (fallback) for '{store}': {e}", file=sys.stderr) + + return (path, title) + + def _queue_items(items: List[Any], clear_first: bool = False, config: Optional[Dict[str, Any]] = None) -> bool: """Queue items to MPV, starting it if necessary. @@ -323,6 +557,12 @@ def _queue_items(items: List[Any], clear_first: bool = False, config: Optional[D Returns: True if MPV was started, False if items were queued via IPC. """ + # Debug: print incoming items + try: + debug(f"_queue_items: count={len(items)} types={[type(i).__name__ for i in items]}") + except Exception: + pass + # Just verify cookies are configured, don't try to set via IPC _ensure_ytdl_cookies() @@ -333,6 +573,14 @@ def _queue_items(items: List[Any], clear_first: bool = False, config: Optional[D hydrus_url = get_hydrus_url(config) if config is not None else None except Exception: hydrus_url = None + + # Initialize FileStorage for path resolution + file_storage = None + try: + from helper.store import FileStorage + file_storage = FileStorage(config or {}) + except Exception as e: + debug(f"Warning: Could not initialize FileStorage: {e}", file=sys.stderr) # Dedupe existing playlist before adding more (unless we're replacing it) existing_targets: set[str] = set() @@ -342,7 +590,7 @@ def _queue_items(items: List[Any], clear_first: bool = False, config: Optional[D for idx, pl_item in enumerate(playlist): fname = pl_item.get("filename") if isinstance(pl_item, dict) else str(pl_item) alt = pl_item.get("playlist-path") if isinstance(pl_item, dict) else None - norm = _normalize_playlist_target(fname) or _normalize_playlist_target(alt) + norm = _normalize_playlist_path(fname) or _normalize_playlist_path(alt) if not norm: continue if norm in existing_targets: @@ -360,25 +608,25 @@ def _queue_items(items: List[Any], clear_first: bool = False, config: Optional[D new_targets: set[str] = set() for i, item in enumerate(items): - # Extract URL/Path - target = None - title = None + # Debug: show the item being processed + try: + debug(f"_queue_items: processing idx={i} type={type(item)} repr={repr(item)[:200]}") + except Exception: + pass + # Extract URL/Path using store-aware logic + result = _get_playable_path(item, file_storage, config) + if not result: + debug(f"_queue_items: item idx={i} produced no playable path") + continue - if isinstance(item, dict): - target = item.get("target") or item.get("url") or item.get("path") or item.get("filename") - title = item.get("title") or item.get("name") - elif hasattr(item, "target"): - target = item.target - title = getattr(item, "title", None) - elif isinstance(item, str): - target = item + target, title = result if target: # If we just have a hydrus hash, build a direct file URL for MPV if re.fullmatch(r"[0-9a-f]{64}", str(target).strip().lower()) and hydrus_url: target = f"{hydrus_url.rstrip('/')}/get_files/file?hash={str(target).strip()}" - norm_key = _normalize_playlist_target(target) or str(target).strip().lower() + norm_key = _normalize_playlist_path(target) or str(target).strip().lower() if norm_key in existing_targets or norm_key in new_targets: debug(f"Skipping duplicate playlist entry: {title or target}") continue @@ -386,11 +634,16 @@ def _queue_items(items: List[Any], clear_first: bool = False, config: Optional[D # Check if it's a yt-dlp supported URL is_ytdlp = False - if target.startswith("http") and is_url_supported_by_ytdlp(target): - is_ytdlp = True + # Treat any http(s) target as yt-dlp candidate. If the Python yt-dlp + # module is available we also check more deeply, but default to True + # so MPV can use its ytdl hooks for remote streaming sites. + try: + is_ytdlp = target.startswith("http") or is_url_supported_by_ytdlp(target) + except Exception: + is_ytdlp = target.startswith("http") # Use memory:// M3U hack to pass title to MPV - # Skip for yt-dlp URLs to ensure proper handling + # Skip for yt-dlp url to ensure proper handling if title and not is_ytdlp: # Sanitize title for M3U (remove newlines) safe_title = title.replace('\n', ' ').replace('\r', '') @@ -403,8 +656,8 @@ def _queue_items(items: List[Any], clear_first: bool = False, config: Optional[D if clear_first and i == 0: mode = "replace" - # If this is a Hydrus target, set header property and yt-dlp headers before loading - if hydrus_header and _is_hydrus_target(target_to_send, hydrus_url): + # If this is a Hydrus path, set header property and yt-dlp headers before loading + if hydrus_header and _is_hydrus_path(target_to_send, hydrus_url): header_cmd = {"command": ["set_property", "http-header-fields", hydrus_header], "request_id": 199} _send_ipc_command(header_cmd, silent=True) if ytdl_opts: @@ -412,11 +665,18 @@ def _queue_items(items: List[Any], clear_first: bool = False, config: Optional[D _send_ipc_command(ytdl_cmd, silent=True) cmd = {"command": ["loadfile", target_to_send, mode], "request_id": 200} - resp = _send_ipc_command(cmd) + try: + debug(f"Sending MPV loadfile: {target_to_send} mode={mode}") + resp = _send_ipc_command(cmd) + debug(f"MPV loadfile response: {resp}") + except Exception as e: + debug(f"Exception sending loadfile to MPV: {e}", file=sys.stderr) + resp = None if resp is None: # MPV not running (or died) # Start MPV with remaining items + debug(f"MPV not running/died while queuing, starting MPV with remaining items: {items[i:]}") _start_mpv(items[i:], config=config) return True elif resp.get("error") == "success": @@ -435,6 +695,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: parsed = parse_cmdlet_args(args, CMDLET) + # Initialize FileStorage for detecting Hydrus instance names + file_storage = None + try: + from helper.store import FileStorage + file_storage = FileStorage(config) + except Exception as e: + debug(f"Warning: Could not initialize FileStorage: {e}", file=sys.stderr) + # Initialize mpv_started flag mpv_started = False @@ -485,7 +753,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Emit the current item to pipeline result_obj = { - 'file_path': filename, + 'path': filename, 'title': title, 'cmdlet_name': '.pipe', 'source': 'pipe', @@ -683,10 +951,20 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: items_to_add = result elif isinstance(result, dict): items_to_add = [result] - - if _queue_items(items_to_add, config=config): + else: + # Handle PipeObject or any other object type + items_to_add = [result] + + # Debug: inspect incoming result and attributes + try: + debug(f"pipe._run: received result type={type(result)} repr={repr(result)[:200]}") + debug(f"pipe._run: attrs path={getattr(result, 'path', None)} url={getattr(result, 'url', None)} store={getattr(result, 'store', None)} hash={getattr(result, 'hash', None)}") + except Exception: + pass + + if items_to_add and _queue_items(items_to_add, config=config): mpv_started = True - + if items_to_add: # If we added items, we might want to play the first one if nothing is playing? # For now, just list the playlist @@ -760,7 +1038,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: return 1 else: # Play item - if hydrus_header and _is_hydrus_target(filename, hydrus_url): + if hydrus_header and _is_hydrus_path(filename, hydrus_url): header_cmd = {"command": ["set_property", "http-header-fields", hydrus_header], "request_id": 198} _send_ipc_command(header_cmd, silent=True) cmd = {"command": ["playlist-play-index", idx], "request_id": 102} @@ -799,28 +1077,84 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: except NameError: table_title = "MPV Playlist" - table = ResultTable(table_title) + table = ResultTable(table_title, preserve_order=True) + # Convert MPV items to PipeObjects with proper hash and store + pipe_objects = [] for i, item in enumerate(items): is_current = item.get("current", False) title = _extract_title_from_item(item) - store = _infer_store_from_playlist_item(item) - - # Truncate if too long - if len(title) > 80: - title = title[:77] + "..." + filename = item.get("filename", "") + + # Extract the real path/URL from memory:// wrapper if present + real_path = _extract_target_from_memory_uri(filename) or filename + + # Try to extract hash from the path/URL + file_hash = None + store_name = None + + # Check if it's a Hydrus URL + if "get_files/file" in real_path or "hash=" in real_path: + # Extract hash from Hydrus URL + hash_match = re.search(r"hash=([0-9a-f]{64})", real_path.lower()) + if hash_match: + file_hash = hash_match.group(1) + # Try to find which Hydrus instance has this file + if file_storage: + store_name = _find_hydrus_instance_for_hash(file_hash, file_storage) + if not store_name: + store_name = "hydrus" + # Check if it's a hash-based local file + elif real_path: + # Try to extract hash from filename (e.g., C:\path\1e8c46...a1b2.mp4) + path_obj = Path(real_path) + stem = path_obj.stem # filename without extension + if len(stem) == 64 and all(c in '0123456789abcdef' for c in stem.lower()): + file_hash = stem.lower() + # Find which folder store has this file + if file_storage: + for backend_name in file_storage.list_backends(): + backend = file_storage[backend_name] + if type(backend).__name__ == "Folder": + # Check if this backend has the file + try: + result_path = backend.get_file(file_hash) + if result_path and result_path.exists(): + store_name = backend_name + break + except Exception: + pass + + # Fallback to inferred store if we couldn't find it + if not store_name: + store_name = _infer_store_from_playlist_item(item, file_storage=file_storage) + + # Build PipeObject with proper metadata + from models import PipeObject + pipe_obj = PipeObject( + hash=file_hash or "unknown", + store=store_name or "unknown", + title=title, + path=real_path + ) + pipe_objects.append(pipe_obj) + + # Truncate title for display + display_title = title + if len(display_title) > 80: + display_title = display_title[:77] + "..." row = table.add_row() row.add_column("Current", "*" if is_current else "") - row.add_column("Store", store) - row.add_column("Title", title) + row.add_column("Store", store_name or "unknown") + row.add_column("Title", display_title) table.set_row_selection_args(i, [str(i + 1)]) table.set_source_command(".pipe") - # Register results with pipeline context so @N selection works - ctx.set_last_result_table_overlay(table, items) + # Register PipeObjects (not raw MPV items) with pipeline context + ctx.set_last_result_table_overlay(table, pipe_objects) ctx.set_current_stage_table(table) print(table) @@ -889,16 +1223,30 @@ def _start_mpv(items: List[Any], config: Optional[Dict[str, Any]] = None) -> Non if items: _queue_items(items, config=config) + # Auto-play the first item + import time + time.sleep(0.3) # Give MPV a moment to process the queued items + + # Play the first item (index 0) and unpause + play_cmd = {"command": ["playlist-play-index", 0], "request_id": 102} + play_resp = _send_ipc_command(play_cmd, silent=True) + + if play_resp and play_resp.get("error") == "success": + # Ensure playback starts (unpause) + unpause_cmd = {"command": ["set_property", "pause", False], "request_id": 103} + _send_ipc_command(unpause_cmd, silent=True) + debug("Auto-playing first item") + except Exception as e: debug(f"Error starting MPV: {e}", file=sys.stderr) CMDLET = Cmdlet( name=".pipe", - aliases=["pipe", "playlist", "queue", "ls-pipe"], + alias=["pipe", "playlist", "queue", "ls-pipe"], summary="Manage and play items in the MPV playlist via IPC", usage=".pipe [index|url] [-current] [-clear] [-list] [-url URL]", - args=[ + arg=[ CmdletArg( name="index", type="string", # Changed to string to allow URL detection diff --git a/cmdnats/worker.py b/cmdnats/worker.py index 98d88cc..7b91c8f 100644 --- a/cmdnats/worker.py +++ b/cmdnats/worker.py @@ -21,14 +21,14 @@ CMDLET = Cmdlet( name=".worker", summary="Display workers table in result table format.", usage=".worker [status] [-limit N] [@N]", - args=[ + arg=[ CmdletArg("status", description="Filter by status: running, completed, error (default: all)"), CmdletArg("limit", type="integer", description="Limit results (default: 100)"), CmdletArg("@N", description="Select worker by index (1-based) and display full logs"), CmdletArg("-id", description="Show full logs for a specific worker"), CmdletArg("-clear", type="flag", description="Remove completed workers from the database"), ], - details=[ + detail=[ "- Shows all background worker tasks and their output", "- Can filter by status: running, completed, error", "- Search result stdout is captured from each worker", @@ -74,9 +74,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: return 1 try: - from helper.local_library import LocalLibraryDB + from helper.folder_store import FolderDB - with LocalLibraryDB(library_root) as db: + with FolderDB(library_root) as db: if options.clear: count = db.clear_finished_workers() log(f"Cleared {count} finished workers.") diff --git a/config.py b/config.py index 27f649c..098916c 100644 --- a/config.py +++ b/config.py @@ -25,18 +25,28 @@ def _make_cache_key(config_dir: Optional[Path], filename: str, actual_path: Opti def get_hydrus_instance(config: Dict[str, Any], instance_name: str = "home") -> Optional[Dict[str, Any]]: """Get a specific Hydrus instance config by name. - Supports both formats: - - New: config["storage"]["hydrus"][instance_name] = {"key": "...", "url": "..."} - - Old: config["HydrusNetwork"][instance_name] = {"key": "...", "url": "..."} + Supports multiple formats: + - Current: config["store"]["hydrusnetwork"][instance_name] + - Legacy: config["storage"]["hydrus"][instance_name] + - Old: config["HydrusNetwork"][instance_name] Args: config: Configuration dict instance_name: Name of the Hydrus instance (default: "home") Returns: - Dict with "key" and "url" keys, or None if not found + Dict with access key and URL, or None if not found """ - # Try new format first + # Try current format first: config["store"]["hydrusnetwork"]["home"] + store = config.get("store", {}) + if isinstance(store, dict): + hydrusnetwork = store.get("hydrusnetwork", {}) + if isinstance(hydrusnetwork, dict): + instance = hydrusnetwork.get(instance_name) + if isinstance(instance, dict): + return instance + + # Try legacy format: config["storage"]["hydrus"] storage = config.get("storage", {}) if isinstance(storage, dict): hydrus_config = storage.get("hydrus", {}) @@ -45,7 +55,7 @@ def get_hydrus_instance(config: Dict[str, Any], instance_name: str = "home") -> if isinstance(instance, dict): return instance - # Fall back to old format + # Fall back to old format: config["HydrusNetwork"] hydrus_network = config.get("HydrusNetwork") if not isinstance(hydrus_network, dict): return None @@ -60,9 +70,10 @@ def get_hydrus_instance(config: Dict[str, Any], instance_name: str = "home") -> def get_hydrus_access_key(config: Dict[str, Any], instance_name: str = "home") -> Optional[str]: """Get Hydrus access key for an instance. - Supports both old flat format and new nested format: + Supports multiple formats: + - Current: config["store"]["hydrusnetwork"][name]["Hydrus-Client-API-Access-Key"] + - Legacy: config["storage"]["hydrus"][name]["key"] - Old: config["HydrusNetwork_Access_Key"] - - New: config["HydrusNetwork"][instance_name]["key"] Args: config: Configuration dict @@ -72,7 +83,18 @@ def get_hydrus_access_key(config: Dict[str, Any], instance_name: str = "home") - Access key string, or None if not found """ instance = get_hydrus_instance(config, instance_name) - key = instance.get("key") if instance else config.get("HydrusNetwork_Access_Key") + if instance: + # Try current format key name + key = instance.get("Hydrus-Client-API-Access-Key") + if key: + return str(key).strip() + # Try legacy key name + key = instance.get("key") + if key: + return str(key).strip() + + # Fall back to old flat format + key = config.get("HydrusNetwork_Access_Key") return str(key).strip() if key else None @@ -140,8 +162,9 @@ def resolve_output_dir(config: Dict[str, Any]) -> Path: def get_local_storage_path(config: Dict[str, Any]) -> Optional[Path]: """Get local storage path from config. - Supports both formats: - - New: config["storage"]["local"]["path"] + Supports multiple formats: + - New: config["store"]["folder"]["default"]["path"] + - Old: config["storage"]["local"]["path"] - Old: config["Local"]["path"] Args: @@ -150,7 +173,18 @@ def get_local_storage_path(config: Dict[str, Any]) -> Optional[Path]: Returns: Path object if found, None otherwise """ - # Try new format first + # Try new format first: store.folder.default.path + store = config.get("store", {}) + if isinstance(store, dict): + folder_config = store.get("folder", {}) + if isinstance(folder_config, dict): + default_config = folder_config.get("default", {}) + if isinstance(default_config, dict): + path_str = default_config.get("path") + if path_str: + return Path(str(path_str)).expanduser() + + # Fall back to storage.local.path format storage = config.get("storage", {}) if isinstance(storage, dict): local_config = storage.get("local", {}) @@ -159,7 +193,7 @@ def get_local_storage_path(config: Dict[str, Any]) -> Optional[Path]: if path_str: return Path(str(path_str)).expanduser() - # Fall back to old format + # Fall back to old Local format local_config = config.get("Local", {}) if isinstance(local_config, dict): path_str = local_config.get("path") diff --git a/helper/__init__.py b/helper/__init__.py index ffe9ab4..4d1878d 100644 --- a/helper/__init__.py +++ b/helper/__init__.py @@ -50,7 +50,6 @@ UrlPolicy = _utils.UrlPolicy DownloadOptions = _download.DownloadOptions DownloadError = _download.DownloadError DownloadMediaResult = _download.DownloadMediaResult -download_media = _download.download_media is_url_supported_by_ytdlp = _download.is_url_supported_by_ytdlp probe_url = _download.probe_url # Hydrus utilities diff --git a/helper/alldebrid.py b/helper/alldebrid.py index 653e0ed..7a80885 100644 --- a/helper/alldebrid.py +++ b/helper/alldebrid.py @@ -35,7 +35,7 @@ class AllDebridClient: """Client for AllDebrid API.""" # Try both v4 and v3 APIs - BASE_URLS = [ + BASE_url = [ "https://api.alldebrid.com/v4", "https://api.alldebrid.com/v3", ] @@ -49,7 +49,7 @@ class AllDebridClient: self.api_key = api_key.strip() if not self.api_key: raise AllDebridError("AllDebrid API key is empty") - self.base_url = self.BASE_URLS[0] # Start with v4 + self.base_url = self.BASE_url[0] # Start with v4 def _request(self, endpoint: str, params: Optional[Dict[str, str]] = None) -> Dict[str, Any]: """Make a request to AllDebrid API. @@ -738,7 +738,7 @@ def parse_magnet_or_hash(uri: str) -> Optional[str]: def unlock_link_cmdlet(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Unlock a restricted link using AllDebrid. - Converts free hosters and restricted links to direct download URLs. + Converts free hosters and restricted links to direct download url. Usage: unlock-link diff --git a/helper/archive_client.py b/helper/archive_client.py index 80e9dab..d67e415 100644 --- a/helper/archive_client.py +++ b/helper/archive_client.py @@ -378,7 +378,7 @@ def download( session: Authenticated requests.Session n_threads: Number of download threads directory: Directory to save images to - links: List of image URLs + links: List of image url scale: Image resolution (0=highest, 10=lowest) book_id: Archive.org book ID (for re-borrowing) diff --git a/helper/background_notifier.py b/helper/background_notifier.py new file mode 100644 index 0000000..1eb90dd --- /dev/null +++ b/helper/background_notifier.py @@ -0,0 +1,195 @@ +"""Lightweight console notifier for background WorkerManager tasks. + +Registers a refresh callback on WorkerManager and prints concise updates when +workers start, progress, or finish. Intended for CLI background workflows. + +Filters to show only workers related to the current pipeline session to avoid +cluttering the terminal with workers from previous sessions. +""" +from __future__ import annotations + +from typing import Any, Callable, Dict, Optional, Set + +from helper.logger import log, debug + + +class BackgroundNotifier: + """Simple notifier that prints worker status changes for a session.""" + + def __init__( + self, + manager: Any, + output: Callable[[str], None] = log, + session_worker_ids: Optional[Set[str]] = None, + only_terminal_updates: bool = False, + overlay_mode: bool = False, + ) -> None: + self.manager = manager + self.output = output + self.session_worker_ids = session_worker_ids if session_worker_ids is not None else set() + self.only_terminal_updates = only_terminal_updates + self.overlay_mode = overlay_mode + self._filter_enabled = session_worker_ids is not None + self._last_state: Dict[str, str] = {} + + try: + self.manager.add_refresh_callback(self._on_refresh) + self.manager.start_auto_refresh() + except Exception as exc: # pragma: no cover - best effort + debug(f"[notifier] Could not attach refresh callback: {exc}") + + def _render_line(self, worker: Dict[str, Any]) -> Optional[str]: + # Use worker_id (the actual worker ID we set) for filtering and display + worker_id = str(worker.get("worker_id") or "").strip() + if not worker_id: + # Fallback to database id if worker_id is not set + worker_id = str(worker.get("id") or "").strip() + if not worker_id: + return None + + status = str(worker.get("status") or "running") + progress_val = worker.get("progress") or worker.get("progress_percent") + progress = "" + if isinstance(progress_val, (int, float)): + progress = f" {progress_val:.1f}%" + elif progress_val: + progress = f" {progress_val}" + + step = str(worker.get("current_step") or worker.get("description") or "").strip() + parts = [f"[worker:{worker_id}] {status}{progress}"] + if step: + parts.append(step) + return " - ".join(parts) + + def _on_refresh(self, workers: list[Dict[str, Any]]) -> None: + overlay_active_workers = 0 + + for worker in workers: + # Use worker_id (the actual worker ID we set) for filtering + worker_id = str(worker.get("worker_id") or "").strip() + if not worker_id: + # Fallback to database id if worker_id is not set + worker_id = str(worker.get("id") or "").strip() + if not worker_id: + continue + + # If filtering is enabled, skip workers not in this session + if self._filter_enabled and worker_id not in self.session_worker_ids: + continue + + status = str(worker.get("status") or "running") + + # Overlay mode: only emit on completion; suppress start/progress spam + if self.overlay_mode: + if status in ("completed", "finished", "error"): + progress_val = worker.get("progress") or worker.get("progress_percent") or "" + step = str(worker.get("current_step") or worker.get("description") or "").strip() + signature = f"{status}|{progress_val}|{step}" + + if self._last_state.get(worker_id) == signature: + continue + + self._last_state[worker_id] = signature + line = self._render_line(worker) + if line: + try: + self.output(line) + except Exception: + pass + + self._last_state.pop(worker_id, None) + self.session_worker_ids.discard(worker_id) + continue + + # For terminal-only mode, emit once when the worker finishes and skip intermediate updates + if self.only_terminal_updates: + if status in ("completed", "finished", "error"): + if self._last_state.get(worker_id) == status: + continue + self._last_state[worker_id] = status + line = self._render_line(worker) + if line: + try: + self.output(line) + except Exception: + pass + # Stop tracking this worker after terminal notification + self.session_worker_ids.discard(worker_id) + continue + + # Skip finished workers after showing them once (standard verbose mode) + if status in ("completed", "finished", "error"): + if worker_id in self._last_state: + # Already shown, remove from tracking + self._last_state.pop(worker_id, None) + self.session_worker_ids.discard(worker_id) + continue + + progress_val = worker.get("progress") or worker.get("progress_percent") or "" + step = str(worker.get("current_step") or worker.get("description") or "").strip() + signature = f"{status}|{progress_val}|{step}" + + if self._last_state.get(worker_id) == signature: + continue + + self._last_state[worker_id] = signature + line = self._render_line(worker) + if line: + try: + self.output(line) + except Exception: + pass + + if self.overlay_mode: + try: + # If nothing active for this session, clear the overlay text + if overlay_active_workers == 0: + self.output("") + except Exception: + pass + + +def ensure_background_notifier( + manager: Any, + output: Callable[[str], None] = log, + session_worker_ids: Optional[Set[str]] = None, + only_terminal_updates: bool = False, + overlay_mode: bool = False, +) -> Optional[BackgroundNotifier]: + """Attach a BackgroundNotifier to a WorkerManager if not already present. + + Args: + manager: WorkerManager instance + output: Function to call for printing updates + session_worker_ids: Set of worker IDs belonging to this pipeline session. + If None, show all workers. If a set (even empty), only show workers in that set. + """ + if manager is None: + return None + + existing = getattr(manager, "_background_notifier", None) + if isinstance(existing, BackgroundNotifier): + # Update session IDs if provided + if session_worker_ids is not None: + existing._filter_enabled = True + existing.session_worker_ids.update(session_worker_ids) + # Respect the most restrictive setting for terminal-only updates + if only_terminal_updates: + existing.only_terminal_updates = True + # Enable overlay mode if requested later + if overlay_mode: + existing.overlay_mode = True + return existing + + notifier = BackgroundNotifier( + manager, + output, + session_worker_ids=session_worker_ids, + only_terminal_updates=only_terminal_updates, + overlay_mode=overlay_mode, + ) + try: + manager._background_notifier = notifier # type: ignore[attr-defined] + except Exception: + pass + return notifier diff --git a/helper/cmdlet_catalog.py b/helper/cmdlet_catalog.py new file mode 100644 index 0000000..c45e759 --- /dev/null +++ b/helper/cmdlet_catalog.py @@ -0,0 +1,223 @@ +from __future__ import annotations + +from importlib import import_module +from typing import Any, Dict, List, Optional + +try: + from cmdlets import REGISTRY +except Exception: + REGISTRY = {} # type: ignore + +try: + from cmdnats import register_native_commands as _register_native_commands +except Exception: + _register_native_commands = None + + +def ensure_registry_loaded() -> None: + """Ensure native commands are registered into REGISTRY (idempotent).""" + if _register_native_commands and REGISTRY is not None: + try: + _register_native_commands(REGISTRY) + except Exception: + pass + + +def _normalize_mod_name(mod_name: str) -> str: + """Normalize a command/module name for import resolution.""" + normalized = (mod_name or "").strip() + if normalized.startswith('.'): + normalized = normalized.lstrip('.') + normalized = normalized.replace('-', '_') + return normalized + + +def import_cmd_module(mod_name: str): + """Import a cmdlet/native module from cmdnats or cmdlets packages.""" + normalized = _normalize_mod_name(mod_name) + if not normalized: + return None + for package in ("cmdnats", "cmdlets", None): + try: + qualified = f"{package}.{normalized}" if package else normalized + return import_module(qualified) + except ModuleNotFoundError: + continue + except Exception: + continue + return None + + +def _normalize_arg(arg: Any) -> Dict[str, Any]: + """Convert a CmdletArg/dict into a plain metadata dict.""" + if isinstance(arg, dict): + name = arg.get("name", "") + return { + "name": str(name).lstrip("-"), + "type": arg.get("type", "string"), + "required": bool(arg.get("required", False)), + "description": arg.get("description", ""), + "choices": arg.get("choices", []) or [], + "alias": arg.get("alias", ""), + "variadic": arg.get("variadic", False), + } + + name = getattr(arg, "name", "") or "" + return { + "name": str(name).lstrip("-"), + "type": getattr(arg, "type", "string"), + "required": bool(getattr(arg, "required", False)), + "description": getattr(arg, "description", ""), + "choices": getattr(arg, "choices", []) or [], + "alias": getattr(arg, "alias", ""), + "variadic": getattr(arg, "variadic", False), + } + + +def get_cmdlet_metadata(cmd_name: str) -> Optional[Dict[str, Any]]: + """Return normalized metadata for a cmdlet, if available (aliases supported).""" + ensure_registry_loaded() + normalized = cmd_name.replace("-", "_") + mod = import_cmd_module(normalized) + data = getattr(mod, "CMDLET", None) if mod else None + + # Fallback: resolve via registered function's module (covers aliases) + if data is None: + try: + reg_fn = (REGISTRY or {}).get(cmd_name.replace('_', '-').lower()) + if reg_fn: + owner_mod = getattr(reg_fn, "__module__", "") + if owner_mod: + owner = import_module(owner_mod) + data = getattr(owner, "CMDLET", None) + except Exception: + data = None + + if not data: + return None + + if hasattr(data, "to_dict"): + base = data.to_dict() + elif isinstance(data, dict): + base = data + else: + base = {} + + name = getattr(data, "name", base.get("name", cmd_name)) or cmd_name + aliases = getattr(data, "aliases", base.get("aliases", [])) or [] + usage = getattr(data, "usage", base.get("usage", "")) + summary = getattr(data, "summary", base.get("summary", "")) + details = getattr(data, "details", base.get("details", [])) or [] + args_list = getattr(data, "args", base.get("args", [])) or [] + args = [_normalize_arg(arg) for arg in args_list] + + return { + "name": str(name).replace("_", "-").lower(), + "aliases": [str(a).replace("_", "-").lower() for a in aliases if a], + "usage": usage, + "summary": summary, + "details": details, + "args": args, + "raw": data, + } + + +def list_cmdlet_metadata() -> Dict[str, Dict[str, Any]]: + """Collect metadata for all registered cmdlets keyed by canonical name.""" + ensure_registry_loaded() + entries: Dict[str, Dict[str, Any]] = {} + for reg_name in (REGISTRY or {}).keys(): + meta = get_cmdlet_metadata(reg_name) + canonical = str(reg_name).replace("_", "-").lower() + + if meta: + canonical = meta.get("name", canonical) + aliases = meta.get("aliases", []) + base = entries.get( + canonical, + { + "name": canonical, + "aliases": [], + "usage": "", + "summary": "", + "details": [], + "args": [], + "raw": meta.get("raw"), + }, + ) + merged_aliases = set(base.get("aliases", [])) | set(aliases) + if canonical != reg_name: + merged_aliases.add(reg_name) + base["aliases"] = sorted(a for a in merged_aliases if a and a != canonical) + if not base.get("usage") and meta.get("usage"): + base["usage"] = meta["usage"] + if not base.get("summary") and meta.get("summary"): + base["summary"] = meta["summary"] + if not base.get("details") and meta.get("details"): + base["details"] = meta["details"] + if not base.get("args") and meta.get("args"): + base["args"] = meta["args"] + if not base.get("raw"): + base["raw"] = meta.get("raw") + entries[canonical] = base + else: + entries.setdefault( + canonical, + {"name": canonical, "aliases": [], "usage": "", "summary": "", "details": [], "args": [], "raw": None}, + ) + return entries + + +def list_cmdlet_names(include_aliases: bool = True) -> List[str]: + """Return sorted cmdlet names (optionally including aliases).""" + ensure_registry_loaded() + entries = list_cmdlet_metadata() + names = set() + for meta in entries.values(): + names.add(meta.get("name", "")) + if include_aliases: + for alias in meta.get("aliases", []): + names.add(alias) + return sorted(n for n in names if n) + + +def get_cmdlet_arg_flags(cmd_name: str) -> List[str]: + """Return flag variants for cmdlet arguments (e.g., -name/--name).""" + meta = get_cmdlet_metadata(cmd_name) + if not meta: + return [] + + raw = meta.get("raw") + if raw and hasattr(raw, "build_flag_registry"): + try: + registry = raw.build_flag_registry() + flags: List[str] = [] + for flag_set in registry.values(): + flags.extend(flag_set) + return sorted(set(flags)) + except Exception: + pass + + flags: List[str] = [] + for arg in meta.get("args", []): + name = arg.get("name") + if not name: + continue + flags.append(f"-{name}") + flags.append(f"--{name}") + alias = arg.get("alias") + if alias: + flags.append(f"-{alias}") + return flags + + +def get_cmdlet_arg_choices(cmd_name: str, arg_name: str) -> List[str]: + """Return declared choices for a cmdlet argument.""" + meta = get_cmdlet_metadata(cmd_name) + if not meta: + return [] + target = arg_name.lstrip("-") + for arg in meta.get("args", []): + if arg.get("name") == target: + return list(arg.get("choices", []) or []) + return [] diff --git a/helper/download.py b/helper/download.py index 29c05e5..4d19ed5 100644 --- a/helper/download.py +++ b/helper/download.py @@ -28,7 +28,6 @@ from helper.logger import log, debug from .utils import ensure_directory, sha256_file from .http_client import HTTPClient from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLogger, ProgressBar -from hydrus_health_check import get_cookies_file_path try: import yt_dlp # type: ignore @@ -145,7 +144,7 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s return None -def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str]) -> tuple[Optional[str], Dict[str, Any]]: +def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False) -> tuple[Optional[str], Dict[str, Any]]: """Download each section separately so merge-file can combine them. yt-dlp with multiple --download-sections args merges them into one file. @@ -204,11 +203,14 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect info_dict = json.loads(meta_result.stdout.strip()) first_section_info = info_dict title_from_first = info_dict.get('title') - debug(f"Extracted title from metadata: {title_from_first}") + if not quiet: + debug(f"Extracted title from metadata: {title_from_first}") except json.JSONDecodeError: - debug("Could not parse JSON metadata") + if not quiet: + debug("Could not parse JSON metadata") except Exception as e: - debug(f"Error extracting metadata: {e}") + if not quiet: + debug(f"Error extracting metadata: {e}") # Build yt-dlp command for downloading this section cmd = ["yt-dlp"] @@ -240,8 +242,9 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect # Add the URL cmd.append(url) - debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}") - debug(f"Command: {' '.join(cmd)}") + if not quiet: + debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}") + debug(f"Command: {' '.join(cmd)}") # Run the subprocess - don't capture output so progress is shown try: @@ -273,13 +276,15 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]: "fragment_retries": 10, "http_chunk_size": 10_485_760, "restrictfilenames": True, - "progress_hooks": [_progress_callback], + "progress_hooks": [] if opts.quiet else [_progress_callback], } if opts.cookies_path and opts.cookies_path.is_file(): base_options["cookiefile"] = str(opts.cookies_path) else: - # Check global cookies file + # Check global cookies file lazily to avoid import cycles + from hydrus_health_check import get_cookies_file_path # local import + global_cookies = get_cookies_file_path() if global_cookies: base_options["cookiefile"] = global_cookies @@ -287,7 +292,7 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]: # Fallback to browser cookies base_options["cookiesfrombrowser"] = ("chrome",) - # Add no-playlist option if specified (for single video from playlist URLs) + # Add no-playlist option if specified (for single video from playlist url) if opts.no_playlist: base_options["noplaylist"] = True @@ -336,7 +341,8 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]: if opts.playlist_items: base_options["playlist_items"] = opts.playlist_items - debug(f"yt-dlp: mode={opts.mode}, format={base_options.get('format')}") + if not opts.quiet: + debug(f"yt-dlp: mode={opts.mode}, format={base_options.get('format')}") return base_options @@ -411,8 +417,8 @@ def _extract_sha256(info: Dict[str, Any]) -> Optional[str]: def _get_libgen_download_url(libgen_url: str) -> Optional[str]: """Extract the actual download link from LibGen redirect URL. - LibGen URLs like https://libgen.gl/file.php?id=123456 redirect to - actual mirror URLs. This follows the redirect chain to get the real file. + LibGen url like https://libgen.gl/file.php?id=123456 redirect to + actual mirror url. This follows the redirect chain to get the real file. Args: libgen_url: LibGen file.php URL @@ -491,6 +497,7 @@ def _download_direct_file( url: str, output_dir: Path, debug_logger: Optional[DebugLogger] = None, + quiet: bool = False, ) -> DownloadMediaResult: """Download a direct file (PDF, image, document, etc.) without yt-dlp.""" ensure_directory(output_dir) @@ -535,9 +542,11 @@ def _download_direct_file( extracted_name = match.group(1) or match.group(2) if extracted_name: filename = unquote(extracted_name) - debug(f"Filename from Content-Disposition: {filename}") + if not quiet: + debug(f"Filename from Content-Disposition: {filename}") except Exception as e: - log(f"Could not get filename from headers: {e}", file=sys.stderr) + if not quiet: + log(f"Could not get filename from headers: {e}", file=sys.stderr) # Fallback if we still don't have a good filename if not filename or "." not in filename: @@ -546,7 +555,8 @@ def _download_direct_file( file_path = output_dir / filename progress_bar = ProgressBar() - debug(f"Direct download: {filename}") + if not quiet: + debug(f"Direct download: {filename}") try: start_time = time.time() @@ -577,7 +587,8 @@ def _download_direct_file( speed_str=speed_str, eta_str=eta_str, ) - debug(progress_line) + if not quiet: + debug(progress_line) last_progress_time[0] = now with HTTPClient(timeout=30.0) as client: @@ -585,7 +596,8 @@ def _download_direct_file( elapsed = time.time() - start_time avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s" - debug(f"✓ Downloaded in {elapsed:.1f}s at {avg_speed_str}") + if not quiet: + debug(f"✓ Downloaded in {elapsed:.1f}s at {avg_speed_str}") # For direct file downloads, create minimal info dict without filename as title # This prevents creating duplicate title: tags when filename gets auto-generated @@ -658,375 +670,98 @@ def _download_direct_file( raise DownloadError(f"Error downloading file: {exc}") from exc -def probe_url(url: str, no_playlist: bool = False) -> Optional[Dict[str, Any]]: +def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) -> Optional[Dict[str, Any]]: """Probe URL to extract metadata WITHOUT downloading. Args: url: URL to probe no_playlist: If True, ignore playlists and probe only the single video + timeout_seconds: Max seconds to wait for probe (default 15s) Returns: Dict with keys: extractor, title, entries (if playlist), duration, etc. - Returns None if not supported by yt-dlp. + Returns None if not supported by yt-dlp or on timeout. """ if not is_url_supported_by_ytdlp(url): return None - _ensure_yt_dlp_ready() + # Wrap probe in timeout to prevent hanging on large playlists + import threading + from typing import cast - assert yt_dlp is not None - try: - # Extract info without downloading - # Use extract_flat='in_playlist' to get full metadata for playlist items - ydl_opts = { - "quiet": True, # Suppress all output - "no_warnings": True, - "socket_timeout": 10, - "retries": 3, - "skip_download": True, # Don't actually download - "extract_flat": "in_playlist", # Get playlist with metadata for each entry - "noprogress": True, # No progress bars - } - - # Add cookies if available - global_cookies = get_cookies_file_path() - if global_cookies: - ydl_opts["cookiefile"] = global_cookies - - # Add no_playlist option if specified - if no_playlist: - ydl_opts["noplaylist"] = True - - with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type] - info = ydl.extract_info(url, download=False) - - if not isinstance(info, dict): - return None - - # Extract relevant fields - return { - "extractor": info.get("extractor", ""), - "title": info.get("title", ""), - "entries": info.get("entries", []), # Will be populated if playlist - "duration": info.get("duration"), - "uploader": info.get("uploader"), - "description": info.get("description"), - "url": url, - } - except Exception as exc: - log(f"Probe failed for {url}: {exc}") - return None - - -def download_media( - opts: DownloadOptions, - *, - debug_logger: Optional[DebugLogger] = None, -) -> DownloadMediaResult: - """Download media from URL using yt-dlp or direct HTTP download. + result_container: List[Optional[Any]] = [None, None] # [result, error] - Args: - opts: DownloadOptions with url, mode, output_dir, etc. - debug_logger: Optional debug logger for troubleshooting - - Returns: - DownloadMediaResult with path, info, tags, hash - - Raises: - DownloadError: If download fails - """ - # Handle LibGen URLs specially - # file.php redirects to mirrors, get.php is direct from modern API - if 'libgen' in opts.url.lower(): - if '/get.php' in opts.url.lower(): - # Modern API get.php links are direct downloads from mirrors (not file redirects) - log(f"Detected LibGen get.php URL, downloading directly...") - if debug_logger is not None: - debug_logger.write_record("libgen-direct", {"url": opts.url}) - return _download_direct_file(opts.url, opts.output_dir, debug_logger) - elif '/file.php' in opts.url.lower(): - # Old-style file.php redirects to mirrors, we need to resolve - log(f"Detected LibGen file.php URL, resolving to actual mirror...") - actual_url = _get_libgen_download_url(opts.url) - if actual_url and actual_url != opts.url: - log(f"Resolved LibGen URL to mirror: {actual_url}") - opts.url = actual_url - # After resolution, this will typically be an onion link or direct file - # Skip yt-dlp for this (it won't support onion/mirrors), go direct - if debug_logger is not None: - debug_logger.write_record("libgen-resolved", {"original": opts.url, "resolved": actual_url}) - return _download_direct_file(opts.url, opts.output_dir, debug_logger) - else: - log(f"Could not resolve LibGen URL, trying direct download anyway", file=sys.stderr) - if debug_logger is not None: - debug_logger.write_record("libgen-resolve-failed", {"url": opts.url}) - return _download_direct_file(opts.url, opts.output_dir, debug_logger) - - # Handle GoFile shares with a dedicated resolver before yt-dlp/direct fallbacks - try: - netloc = urlparse(opts.url).netloc.lower() - except Exception: - netloc = "" - if "gofile.io" in netloc: - msg = "GoFile links are currently unsupported" - debug(msg) - if debug_logger is not None: - debug_logger.write_record("gofile-unsupported", {"url": opts.url}) - raise DownloadError(msg) - - # Determine if yt-dlp should be used - ytdlp_supported = is_url_supported_by_ytdlp(opts.url) - if ytdlp_supported: - probe_result = probe_url(opts.url, no_playlist=opts.no_playlist) - if probe_result is None: - log(f"URL supported by yt-dlp but no media detected, falling back to direct download: {opts.url}") - if debug_logger is not None: - debug_logger.write_record("ytdlp-skip-no-media", {"url": opts.url}) - return _download_direct_file(opts.url, opts.output_dir, debug_logger) - else: - log(f"URL not supported by yt-dlp, trying direct download: {opts.url}") - if debug_logger is not None: - debug_logger.write_record("direct-file-attempt", {"url": opts.url}) - return _download_direct_file(opts.url, opts.output_dir, debug_logger) - - _ensure_yt_dlp_ready() - - ytdl_options = _build_ytdlp_options(opts) - debug(f"Starting yt-dlp download: {opts.url}") - if debug_logger is not None: - debug_logger.write_record("ytdlp-start", {"url": opts.url}) - - assert yt_dlp is not None - try: - # Debug: show what options we're using - if ytdl_options.get("download_sections"): - debug(f"[yt-dlp] download_sections: {ytdl_options['download_sections']}") - debug(f"[yt-dlp] force_keyframes_at_cuts: {ytdl_options.get('force_keyframes_at_cuts', False)}") - - # Use subprocess when download_sections are present (Python API doesn't support them properly) - session_id = None - first_section_info = {} - if ytdl_options.get("download_sections"): - session_id, first_section_info = _download_with_sections_via_cli(opts.url, ytdl_options, ytdl_options.get("download_sections", [])) - info = None - else: - with yt_dlp.YoutubeDL(ytdl_options) as ydl: # type: ignore[arg-type] - info = ydl.extract_info(opts.url, download=True) - except Exception as exc: - log(f"yt-dlp failed: {exc}", file=sys.stderr) - if debug_logger is not None: - debug_logger.write_record( - "exception", - { - "phase": "yt-dlp", - "error": str(exc), - "traceback": traceback.format_exc(), - }, - ) - raise DownloadError("yt-dlp download failed") from exc - - # If we used subprocess, we need to find the file manually - if info is None: - # Find files created/modified during this download (after we started) - # Look for files matching the expected output template pattern + def _do_probe() -> None: try: - import glob - import time - import re + _ensure_yt_dlp_ready() - # Get the expected filename pattern from outtmpl - # For sections: "C:\path\{session_id}.section_1_of_3.ext", etc. - # For non-sections: "C:\path\title.ext" - - # Wait a moment to ensure files are fully written - time.sleep(0.5) - - # List all files in output_dir, sorted by modification time - files = sorted(opts.output_dir.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True) - if not files: - raise FileNotFoundError(f"No files found in {opts.output_dir}") - - # If we downloaded sections, look for files with the session_id pattern - if opts.clip_sections and session_id: - # Pattern: "{session_id}_1.ext", "{session_id}_2.ext", etc. - section_pattern = re.compile(rf'^{re.escape(session_id)}_(\d+)\.') - matching_files = [f for f in files if section_pattern.search(f.name)] - - if matching_files: - # Sort by section number to ensure correct order - def extract_section_num(path: Path) -> int: - match = section_pattern.search(path.name) - return int(match.group(1)) if match else 999 - - matching_files.sort(key=extract_section_num) - debug(f"Found {len(matching_files)} section file(s) matching pattern") - - # Now rename section files to use hash-based names - # This ensures unique filenames for each section content - renamed_files = [] - - for idx, section_file in enumerate(matching_files, 1): - try: - # Calculate hash for the file - file_hash = sha256_file(section_file) - ext = section_file.suffix - new_name = f"{file_hash}{ext}" - new_path = opts.output_dir / new_name - - if new_path.exists() and new_path != section_file: - # If file with same hash exists, use it and delete the temp one - debug(f"File with hash {file_hash} already exists, using existing file.") - try: - section_file.unlink() - except OSError: - pass - renamed_files.append(new_path) - else: - section_file.rename(new_path) - debug(f"Renamed section file: {section_file.name} → {new_name}") - renamed_files.append(new_path) - except Exception as e: - debug(f"Failed to process section file {section_file.name}: {e}") - renamed_files.append(section_file) - - media_path = renamed_files[0] - media_paths = renamed_files - debug(f"✓ Downloaded {len(media_paths)} section file(s) (session: {session_id})") - else: - # Fallback to most recent file if pattern not found - media_path = files[0] - media_paths = None - debug(f"✓ Downloaded section file (pattern not found): {media_path.name}") - else: - # No sections, just take the most recent file - media_path = files[0] - media_paths = None - - debug(f"✓ Downloaded: {media_path.name}") - if debug_logger is not None: - debug_logger.write_record("ytdlp-file-found", {"path": str(media_path)}) - except Exception as exc: - log(f"Error finding downloaded file: {exc}", file=sys.stderr) - if debug_logger is not None: - debug_logger.write_record( - "exception", - {"phase": "find-file", "error": str(exc)}, - ) - raise DownloadError(str(exc)) from exc - - # Create result with minimal data extracted from filename - file_hash = sha256_file(media_path) - - # For section downloads, create tags with the title and build proper info dict - tags = [] - title = '' - if first_section_info: - title = first_section_info.get('title', '') - if title: - tags.append(f'title:{title}') - debug(f"Added title tag for section download: {title}") - - # Build info dict - always use extracted title if available, not hash - if first_section_info: - info_dict = first_section_info - else: - info_dict = { - "id": media_path.stem, - "title": title or media_path.stem, - "ext": media_path.suffix.lstrip(".") + assert yt_dlp is not None + # Extract info without downloading + # Use extract_flat='in_playlist' to get full metadata for playlist items + ydl_opts = { + "quiet": True, # Suppress all output + "no_warnings": True, + "socket_timeout": 10, + "retries": 2, # Reduce retries for faster timeout + "skip_download": True, # Don't actually download + "extract_flat": "in_playlist", # Get playlist with metadata for each entry + "noprogress": True, # No progress bars } - - return DownloadMediaResult( - path=media_path, - info=info_dict, - tags=tags, - source_url=opts.url, - hash_value=file_hash, - paths=media_paths, # Include all section files if present - ) + + # Add cookies if available (lazy import to avoid circular dependency) + from hydrus_health_check import get_cookies_file_path # local import - if not isinstance(info, dict): - log(f"Unexpected yt-dlp response: {type(info)}", file=sys.stderr) - raise DownloadError("Unexpected yt-dlp response type") - - info_dict: Dict[str, Any] = info - if debug_logger is not None: - debug_logger.write_record( - "ytdlp-info", - { - "keys": sorted(info_dict.keys()), - "is_playlist": bool(info_dict.get("entries")), - }, - ) - - try: - entry, media_path = _resolve_entry_and_path(info_dict, opts.output_dir) - except FileNotFoundError as exc: - log(f"Error: {exc}", file=sys.stderr) - if debug_logger is not None: - debug_logger.write_record( - "exception", - {"phase": "resolve-path", "error": str(exc)}, - ) - raise DownloadError(str(exc)) from exc - - if debug_logger is not None: - debug_logger.write_record( - "resolved-media", - {"path": str(media_path), "entry_keys": sorted(entry.keys())}, - ) - - # Extract hash from metadata or compute - hash_value = _extract_sha256(entry) or _extract_sha256(info_dict) - if not hash_value: - try: - hash_value = sha256_file(media_path) - except OSError as exc: - if debug_logger is not None: - debug_logger.write_record( - "hash-error", - {"path": str(media_path), "error": str(exc)}, - ) - - # Extract tags using metadata.py - tags = [] - if extract_ytdlp_tags: - try: - tags = extract_ytdlp_tags(entry) - except Exception as e: - log(f"Error extracting tags: {e}", file=sys.stderr) - - source_url = ( - entry.get("webpage_url") - or entry.get("original_url") - or entry.get("url") - ) - - debug(f"✓ Downloaded: {media_path.name} ({len(tags)} tags)") - if debug_logger is not None: - debug_logger.write_record( - "downloaded", - { - "path": str(media_path), - "tag_count": len(tags), - "source_url": source_url, - "sha256": hash_value, - }, - ) - - return DownloadMediaResult( - path=media_path, - info=entry, - tags=tags, - source_url=source_url, - hash_value=hash_value, - ) + global_cookies = get_cookies_file_path() + if global_cookies: + ydl_opts["cookiefile"] = global_cookies + + # Add no_playlist option if specified + if no_playlist: + ydl_opts["noplaylist"] = True + + with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type] + info = ydl.extract_info(url, download=False) + + if not isinstance(info, dict): + result_container[0] = None + return + + # Extract relevant fields + result_container[0] = { + "extractor": info.get("extractor", ""), + "title": info.get("title", ""), + "entries": info.get("entries", []), # Will be populated if playlist + "duration": info.get("duration"), + "uploader": info.get("uploader"), + "description": info.get("description"), + "url": url, + } + except Exception as exc: + log(f"Probe error for {url}: {exc}") + result_container[1] = exc + + thread = threading.Thread(target=_do_probe, daemon=False) + thread.start() + thread.join(timeout=timeout_seconds) + + if thread.is_alive(): + # Probe timed out - return None to fall back to direct download + debug(f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download") + return None + + if result_container[1] is not None: + # Probe error - return None to proceed anyway + return None + + return cast(Optional[Dict[str, Any]], result_container[0]) __all__ = [ - "download_media", "is_url_supported_by_ytdlp", + "list_formats", + "probe_url", "DownloadError", "DownloadOptions", "DownloadMediaResult", ] + diff --git a/helper/file_storage.py b/helper/file_storage.py deleted file mode 100644 index 758156e..0000000 --- a/helper/file_storage.py +++ /dev/null @@ -1,1535 +0,0 @@ -"""File storage abstraction layer for uploading files to different services. - -Supports multiple backend storage services (0x0.st, local directories, Hydrus, etc.) -with a unified interface. - -Example: - storage = FileStorage() - - # Upload to 0x0.st - url = storage["0x0"].upload(Path("file.mp3")) - - # Copy to local directory - path = storage["local"].upload(Path("file.mp3"), location="/home/user/files") - - # Upload to Hydrus - hash_result = storage["hydrus"].upload(file_path, config=config) -""" - -from __future__ import annotations - -from abc import ABC, abstractmethod -from pathlib import Path -from typing import Any, Dict, Optional -import sys -import shutil -import requests -import re - -from helper.logger import log, debug -from helper.utils_constant import mime_maps -from helper.utils import sha256_file - - -HEX_DIGITS = set("0123456789abcdef") - - -def _normalize_hex_hash(value: Optional[str]) -> Optional[str]: - """Return a normalized 64-character lowercase hash or None.""" - if value is None: - return None - - try: - cleaned = ''.join(ch for ch in str(value).strip().lower() if ch in HEX_DIGITS) - except Exception: - return None - - if len(cleaned) == 64: - return cleaned - return None - - -def _resolve_file_hash(candidate: Optional[str], path: Path) -> Optional[str]: - """Return the given hash if valid, otherwise compute sha256 from disk.""" - normalized = _normalize_hex_hash(candidate) - if normalized is not None: - return normalized - - if not path.exists(): - return None - - try: - return sha256_file(path) - except Exception as exc: - debug(f"Failed to compute hash for {path}: {exc}") - return None - - -class StorageBackend(ABC): - """Abstract base class for file storage backends. - - Backends can optionally support searching by implementing the search() method. - """ - - @abstractmethod - def upload(self, file_path: Path, **kwargs: Any) -> str: - """Upload a file and return a result identifier (URL, hash, path, etc.). - - Args: - file_path: Path to the file to upload - **kwargs: Backend-specific options - - Returns: - Result identifier (e.g., URL for 0x0.st, hash for Hydrus, path for local) - - Raises: - Exception: If upload fails - """ - - @abstractmethod - def get_name(self) -> str: - """Get the unique name of this backend.""" - - def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: - """Search for files in backends that support it. - - This method is optional and only implemented by searchable backends - (e.g., Hydrus, Debrid, Soulseek). - - Args: - query: Search query string - **kwargs: Backend-specific search options - - Returns: - List of search results, each as a dict with backend-specific fields. - Common fields: 'name', 'size', 'hash', 'url', 'id', etc. - - Raises: - NotImplementedError: If backend doesn't support searching - Exception: If search fails - - Example: - results = storage["hydrus"].search("music artist:john") - for result in results: - print(result['name'], result['hash']) - """ - raise NotImplementedError(f"{self.get_name()} backend does not support searching") - - def supports_search(self) -> bool: - """Check if this backend supports searching. - - Returns: - True if search() is implemented, False otherwise - """ - return self.search.__func__ is not StorageBackend.search - - -class LocalStorageBackend(StorageBackend): - """File storage backend for local file system copy.""" - - def __init__(self, location: Optional[str] = None) -> None: - """Initialize local storage backend. - - Args: - location: Default directory path for storage operations - """ - self._location = location - - def get_name(self) -> str: - return "local" - - def upload(self, file_path: Path, **kwargs: Any) -> str: - """Copy or move file to a local directory. - - Args: - file_path: Path to the file to upload - location: Destination directory path (uses default if not provided) - move: When True, move the file instead of copying (default: False) - - Returns: - Absolute path to the copied/moved file - - Raises: - ValueError: If location not provided and no default configured - Exception: If copy fails or duplicate detected - """ - from helper.utils import unique_path as utils_unique_path - from helper.utils import sha256_file - from helper.local_library import LocalLibraryDB - - location = kwargs.get("location") or self._location - move_file = bool(kwargs.get("move")) - if not location: - raise ValueError("'location' parameter required for local storage (not configured)") - - try: - # Compute file hash - file_hash = sha256_file(file_path) - debug(f"File hash: {file_hash}", file=sys.stderr) - - dest_dir = Path(location).expanduser() - dest_dir.mkdir(parents=True, exist_ok=True) - - # Check for duplicate files using LocalLibraryDB (fast - uses index) - try: - with LocalLibraryDB(dest_dir) as db: - existing_path = db.search_by_hash(file_hash) - if existing_path and existing_path.exists(): - log( - f"✓ File already in local storage: {existing_path}", - file=sys.stderr, - ) - return str(existing_path) - except Exception as exc: - log(f"⚠️ Could not check for duplicates in DB: {exc}", file=sys.stderr) - - dest_file = dest_dir / file_path.name - dest_file = utils_unique_path(dest_file) - - if move_file: - shutil.move(str(file_path), dest_file) - debug(f"Local move: {dest_file}", file=sys.stderr) - else: - shutil.copy2(file_path, dest_file) - debug(f"Local copy: {dest_file}", file=sys.stderr) - return str(dest_file) - except Exception as exc: - debug(f"Local copy failed: {exc}", file=sys.stderr) - raise - - def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: - """Search local database for files by title tag or filename. - - Args: - query: Search string supporting: - - Title tag search: "title:document" or just searches DB for matching title tags - - Tag namespace search: "creator:Mac*" matches tags in database - - Filename fallback: if query not in DB, searches filesystem - - "*" means "match all files" - location: Directory to search in (uses default if not provided) - recursive: Search subdirectories (default: True) - - Returns: - List of dicts with 'name', 'path', 'size' fields - """ - from fnmatch import fnmatch - from helper.local_library import LocalLibraryDB - - location = kwargs.get("location") or self._location - if not location: - raise ValueError("'location' parameter required for local search (not configured)") - - limit = kwargs.get("limit") - try: - limit = int(limit) if limit is not None else None - except (TypeError, ValueError): - limit = None - if isinstance(limit, int) and limit <= 0: - limit = None - - query_lower = query.lower() - match_all = query_lower == "*" - results = [] - search_dir = Path(location).expanduser() - debug(f"Searching local storage at: {search_dir}") - - # Support comma-separated AND queries (token1,token2,...). Each token must match. - tokens = [t.strip() for t in query.split(',') if t.strip()] - - # Require explicit namespace for hash lookups to avoid accidental filename matches - if not match_all and len(tokens) == 1 and _normalize_hex_hash(query_lower): - debug("Hash queries require 'hash:' prefix for local search") - return results - - # Require explicit namespace for hash lookups to avoid accidental filename matches - if not match_all and _normalize_hex_hash(query_lower): - debug("Hash queries require 'hash:' prefix for local search") - return results - - def _create_entry(file_path: Path, tags: list[str], size_bytes: int | None, db_hash: Optional[str]) -> dict[str, Any]: - path_str = str(file_path) - entry = { - "name": file_path.stem, - "title": next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), file_path.stem), - "ext": file_path.suffix.lstrip('.'), - "path": path_str, - "target": path_str, - "origin": "local", - "size": size_bytes, - "size_bytes": size_bytes, - "tags": tags, - } - hash_value = _resolve_file_hash(db_hash, file_path) - if hash_value: - entry["hash"] = hash_value - entry["hash_hex"] = hash_value - entry["file_hash"] = hash_value - return entry - - try: - if not search_dir.exists(): - debug(f"Search directory does not exist: {search_dir}") - return results - - # Try database search first (much faster than filesystem scan) - try: - with LocalLibraryDB(search_dir) as db: - cursor = db.connection.cursor() - - # Check if query is a tag namespace search (format: "namespace:pattern") - if tokens and len(tokens) > 1: - # AND mode across comma-separated tokens - def _like_pattern(term: str) -> str: - return term.replace('*', '%').replace('?', '_') - - def _ids_for_token(token: str, cursor) -> set[int]: - token = token.strip() - if not token: - return set() - - # Namespaced token - if ':' in token and not token.startswith(':'): - namespace, pattern = token.split(':', 1) - namespace = namespace.strip().lower() - pattern = pattern.strip().lower() - - if namespace == 'hash': - normalized_hash = _normalize_hex_hash(pattern) - if not normalized_hash: - return set() - cursor.execute( - """ - SELECT id FROM files - WHERE LOWER(file_hash) = ? - """, - (normalized_hash,) - ) - return {row[0] for row in cursor.fetchall()} - - if namespace == 'store': - # Local backend only serves local store - if pattern not in {'local', 'file', 'filesystem'}: - return set() - cursor.execute("SELECT id FROM files") - return {row[0] for row in cursor.fetchall()} - - # Generic namespace match on tags - query_pattern = f"{namespace}:%" - cursor.execute( - """ - SELECT DISTINCT f.id, t.tag - FROM files f - JOIN tags t ON f.id = t.file_id - WHERE LOWER(t.tag) LIKE ? - """, - (query_pattern,) - ) - matched: set[int] = set() - for file_id, tag_val in cursor.fetchall(): - if not tag_val: - continue - tag_lower = str(tag_val).lower() - if not tag_lower.startswith(f"{namespace}:"): - continue - value = tag_lower[len(namespace)+1:] - if fnmatch(value, pattern): - matched.add(int(file_id)) - return matched - - # Bare token: match filename OR any tag (including title) - term = token.lower() - like_pattern = f"%{_like_pattern(term)}%" - - ids: set[int] = set() - # Filename match - cursor.execute( - """ - SELECT DISTINCT id FROM files - WHERE LOWER(file_path) LIKE ? - """, - (like_pattern,) - ) - ids.update(int(row[0]) for row in cursor.fetchall()) - - # Tag match (any namespace, including title) - cursor.execute( - """ - SELECT DISTINCT f.id - FROM files f - JOIN tags t ON f.id = t.file_id - WHERE LOWER(t.tag) LIKE ? - """, - (like_pattern,) - ) - ids.update(int(row[0]) for row in cursor.fetchall()) - return ids - - try: - with LocalLibraryDB(search_dir) as db: - cursor = db.connection.cursor() - matching_ids: set[int] | None = None - for token in tokens: - ids = _ids_for_token(token, cursor) - matching_ids = ids if matching_ids is None else matching_ids & ids - if not matching_ids: - return results - - if not matching_ids: - return results - - # Fetch rows for matching IDs - placeholders = ",".join(["?"] * len(matching_ids)) - fetch_sql = f""" - SELECT id, file_path, file_size, file_hash - FROM files - WHERE id IN ({placeholders}) - ORDER BY file_path - LIMIT ? - """ - cursor.execute(fetch_sql, (*matching_ids, limit or len(matching_ids))) - rows = cursor.fetchall() - for file_id, file_path_str, size_bytes, file_hash in rows: - if not file_path_str: - continue - file_path = Path(file_path_str) - if not file_path.exists(): - continue - if size_bytes is None: - try: - size_bytes = file_path.stat().st_size - except OSError: - size_bytes = None - cursor.execute( - """ - SELECT tag FROM tags WHERE file_id = ? - """, - (file_id,), - ) - tags = [row[0] for row in cursor.fetchall()] - entry = _create_entry(file_path, tags, size_bytes, file_hash) - results.append(entry) - if limit is not None and len(results) >= limit: - return results - return results - except Exception as exc: - log(f"⚠️ AND search failed: {exc}", file=sys.stderr) - debug(f"AND search exception details: {exc}") - return [] - - if ":" in query and not query.startswith(":"): - namespace, pattern = query.split(":", 1) - namespace = namespace.strip().lower() - pattern = pattern.strip().lower() - debug(f"Performing namespace search: {namespace}:{pattern}") - - # Special-case hash: lookups against file_hash column - if namespace == "hash": - normalized_hash = _normalize_hex_hash(pattern) - if not normalized_hash: - return results - cursor.execute( - """ - SELECT id, file_path, file_size, file_hash - FROM files - WHERE LOWER(file_hash) = ? - ORDER BY file_path - LIMIT ? - """, - (normalized_hash, limit or 1000), - ) - - for file_id, file_path_str, size_bytes, file_hash in cursor.fetchall(): - if not file_path_str: - continue - file_path = Path(file_path_str) - if not file_path.exists(): - continue - if size_bytes is None: - try: - size_bytes = file_path.stat().st_size - except OSError: - size_bytes = None - cursor.execute( - """ - SELECT tag FROM tags WHERE file_id = ? - """, - (file_id,), - ) - all_tags = [row[0] for row in cursor.fetchall()] - entry = _create_entry(file_path, all_tags, size_bytes, file_hash) - results.append(entry) - if limit is not None and len(results) >= limit: - return results - return results - - # Search for tags matching the namespace and pattern - query_pattern = f"{namespace}:%" - - cursor.execute(""" - SELECT DISTINCT f.id, f.file_path, f.file_size, f.file_hash - FROM files f - JOIN tags t ON f.id = t.file_id - WHERE LOWER(t.tag) LIKE ? - ORDER BY f.file_path - LIMIT ? - """, (query_pattern, limit or 1000)) - - rows = cursor.fetchall() - debug(f"Found {len(rows)} potential matches in DB") - - # Filter results by pattern match - for file_id, file_path_str, size_bytes, file_hash in rows: - if not file_path_str: - continue - - # Get the file's tags and check if any match the pattern - cursor.execute(""" - SELECT DISTINCT tag FROM tags - WHERE file_id = ? - AND LOWER(tag) LIKE ? - """, (file_id, query_pattern)) - - tags = [row[0] for row in cursor.fetchall()] - - # Check if any tag matches the pattern (case-insensitive wildcard) - for tag in tags: - tag_lower = tag.lower() - # Extract the value part after "namespace:" - if tag_lower.startswith(f"{namespace}:"): - value = tag_lower[len(namespace)+1:] - # Use fnmatch for wildcard matching - if fnmatch(value, pattern): - file_path = Path(file_path_str) - if file_path.exists(): - if size_bytes is None: - size_bytes = file_path.stat().st_size - cursor.execute(""" - SELECT tag FROM tags WHERE file_id = ? - """, (file_id,)) - all_tags = [row[0] for row in cursor.fetchall()] - entry = _create_entry(file_path, all_tags, size_bytes, file_hash) - results.append(entry) - else: - debug(f"File missing on disk: {file_path}") - break # Don't add same file multiple times - - if limit is not None and len(results) >= limit: - return results - - elif not match_all: - # Search by filename or simple tags (namespace-agnostic for plain text) - # For plain text search, match: - # 1. Filenames containing the query - # 2. Simple tags (without namespace) containing the query - # NOTE: Does NOT match namespaced tags (e.g., "joe" won't match "channel:Joe Mullan") - # Use explicit namespace search for that (e.g., "channel:joe*") - - # Split query into terms for AND logic - terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()] - if not terms: - terms = [query_lower] - - debug(f"Performing filename/tag search for terms: {terms}") - - # Fetch more results than requested to allow for filtering - fetch_limit = (limit or 45) * 50 - - # 1. Filename search (AND logic) - conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms] - params = [f"%{t}%" for t in terms] - where_clause = " AND ".join(conditions) - - cursor.execute(f""" - SELECT DISTINCT f.id, f.file_path, f.file_size, f.file_hash - FROM files f - WHERE {where_clause} - ORDER BY f.file_path - LIMIT ? - """, (*params, fetch_limit)) - - rows = cursor.fetchall() - debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)") - - # Compile regex for whole word matching (only if single term, otherwise skip) - word_regex = None - if len(terms) == 1: - term = terms[0] - # Check if term contains wildcard characters - has_wildcard = '*' in term or '?' in term - - if has_wildcard: - # Use fnmatch for wildcard patterns (e.g., "sie*" matches "SiebeliebenWohl...") - try: - from fnmatch import translate - word_regex = re.compile(translate(term), re.IGNORECASE) - except Exception: - word_regex = None - else: - # Use custom boundary that treats underscores as separators - # \b treats _ as a word character, so "foo_bar" wouldn't match "bar" with \b - try: - # Match if not preceded or followed by alphanumeric chars - pattern = r'(?= limit: - return results - - # Title-tag search: treat freeform terms as title namespace queries (AND across terms) - if terms: - title_hits: dict[int, dict[str, Any]] = {} - for term in terms: - cursor.execute( - """ - SELECT DISTINCT f.id, f.file_path, f.file_size, f.file_hash - FROM files f - JOIN tags t ON f.id = t.file_id - WHERE LOWER(t.tag) LIKE ? - ORDER BY f.file_path - LIMIT ? - """, - (f"title:%{term}%", fetch_limit), - ) - for file_id, file_path_str, size_bytes, file_hash in cursor.fetchall(): - if not file_path_str: - continue - entry = title_hits.get(file_id) - if entry: - entry["count"] += 1 - if size_bytes is not None: - entry["size"] = size_bytes - else: - title_hits[file_id] = { - "path": file_path_str, - "size": size_bytes, - "hash": file_hash, - "count": 1, - } - - if title_hits: - required = len(terms) - for file_id, info in title_hits.items(): - if info.get("count") != required: - continue - file_path_str = info.get("path") - if not file_path_str or file_path_str in seen_files: - continue - file_path = Path(file_path_str) - if not file_path.exists(): - continue - seen_files.add(file_path_str) - - size_bytes = info.get("size") - if size_bytes is None: - try: - size_bytes = file_path.stat().st_size - except OSError: - size_bytes = None - - cursor.execute( - """ - SELECT tag FROM tags WHERE file_id = ? - """, - (file_id,), - ) - tags = [row[0] for row in cursor.fetchall()] - entry = _create_entry(file_path, tags, size_bytes, info.get("hash")) - results.append(entry) - if limit is not None and len(results) >= limit: - return results - - # Also search for simple tags (without namespace) containing the query - # Only perform tag search if single term, or if we want to support multi-term tag search - # For now, fallback to single pattern search for tags if multiple terms - # (searching for a tag that contains "term1 term2" or "term1,term2") - # This is less useful for AND logic across multiple tags, but consistent with previous behavior - query_pattern = f"%{query_lower}%" - - cursor.execute(""" - SELECT DISTINCT f.id, f.file_path, f.file_size, f.file_hash - FROM files f - JOIN tags t ON f.id = t.file_id - WHERE LOWER(t.tag) LIKE ? AND LOWER(t.tag) NOT LIKE '%:%' - ORDER BY f.file_path - LIMIT ? - """, (query_pattern, limit or 1000)) - - tag_rows = cursor.fetchall() - for file_id, file_path_str, size_bytes, file_hash in tag_rows: - if not file_path_str or file_path_str in seen_files: - continue - seen_files.add(file_path_str) - - file_path = Path(file_path_str) - if file_path.exists(): - path_str = str(file_path) - if size_bytes is None: - size_bytes = file_path.stat().st_size - - # Fetch tags for this file - cursor.execute(""" - SELECT tag FROM tags WHERE file_id = ? - """, (file_id,)) - tags = [row[0] for row in cursor.fetchall()] - entry = _create_entry(file_path, tags, size_bytes, file_hash) - results.append(entry) - - if limit is not None and len(results) >= limit: - return results - - else: - # Match all - get all files from database - cursor.execute(""" - SELECT id, file_path, file_size, file_hash - FROM files - ORDER BY file_path - LIMIT ? - """, (limit or 1000,)) - - rows = cursor.fetchall() - for file_id, file_path_str, size_bytes, file_hash in rows: - if file_path_str: - file_path = Path(file_path_str) - if file_path.exists(): - path_str = str(file_path) - if size_bytes is None: - size_bytes = file_path.stat().st_size - - # Fetch tags for this file - cursor.execute(""" - SELECT tag FROM tags WHERE file_id = ? - """, (file_id,)) - tags = [row[0] for row in cursor.fetchall()] - entry = _create_entry(file_path, tags, size_bytes, file_hash) - results.append(entry) - - if results: - debug(f"Returning {len(results)} results from DB") - else: - debug("No results found in DB") - return results - - except Exception as e: - log(f"⚠️ Database search failed: {e}", file=sys.stderr) - debug(f"DB search exception details: {e}") - return [] - - except Exception as exc: - log(f"❌ Local search failed: {exc}", file=sys.stderr) - raise - - -class HydrusStorageBackend(StorageBackend): - """File storage backend for Hydrus client.""" - - def __init__(self, config: Optional[Dict[str, Any]] = None) -> None: - """Initialize Hydrus storage backend. - - Args: - config: Configuration dict with Hydrus settings (HydrusNetwork section) - """ - self._config = config or {} - - def get_name(self) -> str: - return "hydrus" - - def upload(self, file_path: Path, **kwargs: Any) -> str: - """Upload file to Hydrus. - - Args: - file_path: Path to the file to upload - tags: Optional list of tags to add (uses default config if not provided) - config: Optional override for config (uses default if not provided) - - Returns: - File hash from Hydrus - - Raises: - Exception: If upload fails - """ - from helper import hydrus as hydrus_wrapper - from helper.utils import sha256_file - - config = kwargs.get("config") or self._config - if not config: - raise ValueError("'config' parameter required for Hydrus storage (not configured)") - - tags = kwargs.get("tags", []) - - try: - # Compute file hash - file_hash = sha256_file(file_path) - debug(f"File hash: {file_hash}") - - # Build Hydrus client - client = hydrus_wrapper.get_client(config) - if client is None: - raise Exception("Hydrus client unavailable") - - # Check if file already exists in Hydrus - try: - metadata = client.fetch_file_metadata(hashes=[file_hash]) - if metadata and isinstance(metadata, dict): - files = metadata.get("file_metadata", []) - if files: - log( - f"ℹ️ Duplicate detected - file already in Hydrus with hash: {file_hash}", - file=sys.stderr, - ) - # Even if duplicate, we should add tags if provided - if tags: - try: - service_name = hydrus_wrapper.get_tag_service_name(config) - except Exception: - service_name = "my tags" - - try: - debug(f"Adding {len(tags)} tag(s) to existing file in Hydrus: {tags}") - client.add_tags(file_hash, tags, service_name) - log(f"✅ Tags added to existing file via '{service_name}'", file=sys.stderr) - except Exception as exc: - log(f"⚠️ Failed to add tags to existing file: {exc}", file=sys.stderr) - - return file_hash - except Exception: - pass - - # Upload file to Hydrus - log(f"Uploading to Hydrus: {file_path.name}", file=sys.stderr) - response = client.add_file(file_path) - - # Extract hash from response - hydrus_hash: Optional[str] = None - if isinstance(response, dict): - hydrus_hash = response.get("hash") or response.get("file_hash") - if not hydrus_hash: - hashes = response.get("hashes") - if isinstance(hashes, list) and hashes: - hydrus_hash = hashes[0] - - if not hydrus_hash: - raise Exception(f"Hydrus response missing file hash: {response}") - - file_hash = hydrus_hash - log(f"Hydrus: {file_hash}", file=sys.stderr) - - # Add tags if provided - if tags: - try: - service_name = hydrus_wrapper.get_tag_service_name(config) - except Exception: - service_name = "my tags" - - try: - debug(f"Adding {len(tags)} tag(s) to Hydrus: {tags}") - client.add_tags(file_hash, tags, service_name) - log(f"✅ Tags added via '{service_name}'", file=sys.stderr) - except Exception as exc: - log(f"⚠️ Failed to add tags: {exc}", file=sys.stderr) - - return file_hash - - except Exception as exc: - log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr) - raise - - def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: - """Search Hydrus database for files matching query. - - Args: - query: Search query (tags, filenames, hashes, etc.) - limit: Maximum number of results to return (default: 100) - config: Optional override for config (uses default if not provided) - - Returns: - List of dicts with 'name', 'hash', 'size', 'tags' fields - - Example: - results = storage["hydrus"].search("artist:john_doe music") - results = storage["hydrus"].search("Simple Man") - """ - from helper import hydrus as hydrus_wrapper - - config = kwargs.get("config") or self._config - if not config: - raise ValueError("'config' parameter required for Hydrus search (not configured)") - - limit = kwargs.get("limit", 100) - - try: - client = hydrus_wrapper.get_client(config) - if client is None: - raise Exception("Hydrus client unavailable") - - debug(f"Searching Hydrus for: {query}") - - # Parse the query into tags - # Handle both simple tags and complex queries - # "*" means "match all" - use system:everything tag in Hydrus - if query.strip() == "*": - # Use system:everything to match all files in Hydrus - tags = ["system:everything"] - else: - query_lower = query.lower().strip() - # If query doesn't have a namespace (no ':'), search all files and filter by title/tags - # If query has explicit namespace, use it as a tag search - if ':' not in query_lower: - # No namespace provided: search all files, then filter by title/tags containing the query - tags = ["system:everything"] - else: - # User provided explicit namespace (e.g., "creator:john" or "system:has_audio") - # Use it as a tag search - tags = [query_lower] - - if not tags: - debug(f"Found 0 result(s)") - return [] - - # Search files with the tags - search_result = client.search_files( - tags=tags, - return_hashes=True, - return_file_ids=True - ) - - # Extract file IDs from search result - file_ids = search_result.get("file_ids", []) - hashes = search_result.get("hashes", []) - - if not file_ids and not hashes: - debug(f"Found 0 result(s)") - return [] - - # Fetch metadata for the found files - results = [] - query_lower = query.lower().strip() - # Split by comma or space for AND logic - search_terms = set(query_lower.replace(',', ' ').split()) # For substring matching - - if file_ids: - metadata = client.fetch_file_metadata(file_ids=file_ids) - metadata_list = metadata.get("metadata", []) - - for meta in metadata_list: - if len(results) >= limit: - break - - file_id = meta.get("file_id") - hash_hex = meta.get("hash") - size = meta.get("size", 0) - - # Get tags for this file and extract title - tags_set = meta.get("tags", {}) - all_tags = [] - title = f"Hydrus File {file_id}" # Default fallback - all_tags_str = "" # For substring matching - - # debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}") - - if isinstance(tags_set, dict): - # Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen - def _collect(tag_list: Any) -> None: - nonlocal title, all_tags_str - if not isinstance(tag_list, list): - return - for tag in tag_list: - tag_text = str(tag) if tag else "" - if not tag_text: - continue - all_tags.append(tag_text) - all_tags_str += " " + tag_text.lower() - if tag_text.lower().startswith("title:") and title == f"Hydrus File {file_id}": - title = tag_text.split(":", 1)[1].strip() - - for service_name, service_tags in tags_set.items(): - if not isinstance(service_tags, dict): - continue - - storage_tags = service_tags.get("storage_tags", {}) - if isinstance(storage_tags, dict): - for tag_list in storage_tags.values(): - _collect(tag_list) - - display_tags = service_tags.get("display_tags", []) - _collect(display_tags) - - # Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it) - top_level_tags = meta.get("tags_flat", []) or meta.get("tags", []) - _collect(top_level_tags) - - # Resolve extension from MIME type - mime_type = meta.get("mime") - ext = "" - if mime_type: - for category in mime_maps.values(): - for ext_key, info in category.items(): - if mime_type in info.get("mimes", []): - ext = info.get("ext", "").lstrip('.') - break - if ext: - break - - # Filter results based on query type - # If user provided explicit namespace (has ':'), don't do substring filtering - # Just include what the tag search returned - has_namespace = ':' in query_lower - - if has_namespace: - # Explicit namespace search - already filtered by Hydrus tag search - # Include this result as-is - results.append({ - "hash": hash_hex, - "hash_hex": hash_hex, - "target": hash_hex, - "name": title, - "title": title, - "size": size, - "size_bytes": size, - "origin": "hydrus", - "tags": all_tags, - "file_id": file_id, - "mime": mime_type, - "ext": ext, - }) - else: - # Free-form search: check if search terms match the title or tags - # Match if ALL search terms are found in title or tags (AND logic) - # AND use whole word matching - - # Combine title and tags for searching - searchable_text = (title + " " + all_tags_str).lower() - - match = True - if query_lower != "*": - for term in search_terms: - # Regex for whole word: \bterm\b - # Escape term to handle special chars - pattern = r'\b' + re.escape(term) + r'\b' - if not re.search(pattern, searchable_text): - match = False - break - - if match: - results.append({ - "hash": hash_hex, - "hash_hex": hash_hex, - "target": hash_hex, - "name": title, - "title": title, - "size": size, - "size_bytes": size, - "origin": "hydrus", - "tags": all_tags, - "file_id": file_id, - "mime": mime_type, - "ext": ext, - }) - - debug(f"Found {len(results)} result(s)") - return results[:limit] - - except Exception as exc: - log(f"❌ Hydrus search failed: {exc}", file=sys.stderr) - import traceback - traceback.print_exc(file=sys.stderr) - raise -class MatrixStorageBackend(StorageBackend): - """File storage backend for Matrix (Element) chat rooms.""" - - def get_name(self) -> str: - return "matrix" - - def list_rooms(self, config: Dict[str, Any]) -> List[Dict[str, Any]]: - """List joined rooms with their names.""" - matrix_conf = config.get('storage', {}).get('matrix', {}) - homeserver = matrix_conf.get('homeserver') - access_token = matrix_conf.get('access_token') - - if not homeserver or not access_token: - return [] - - if not homeserver.startswith('http'): - homeserver = f"https://{homeserver}" - - headers = {"Authorization": f"Bearer {access_token}"} - - try: - # Get joined rooms - resp = requests.get(f"{homeserver}/_matrix/client/v3/joined_rooms", headers=headers, timeout=10) - if resp.status_code != 200: - return [] - - room_ids = resp.json().get('joined_rooms', []) - rooms = [] - - for rid in room_ids: - # Try to get room name - name = "Unknown Room" - try: - # Get state event for name - name_resp = requests.get( - f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.name", - headers=headers, - timeout=2 - ) - if name_resp.status_code == 200: - name = name_resp.json().get('name', name) - else: - # Try canonical alias - alias_resp = requests.get( - f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.canonical_alias", - headers=headers, - timeout=2 - ) - if alias_resp.status_code == 200: - name = alias_resp.json().get('alias', name) - except Exception: - pass - - rooms.append({'id': rid, 'name': name}) - - return rooms - except Exception as e: - log(f"Error listing Matrix rooms: {e}", file=sys.stderr) - return [] - - def upload(self, file_path: Path, **kwargs: Any) -> str: - """Upload file to Matrix room. - - Requires 'config' in kwargs with 'storage.matrix' settings: - - homeserver: URL of homeserver (e.g. https://matrix.org) - - user_id: User ID (e.g. @user:matrix.org) - - access_token: Access token (preferred) OR password - - room_id: Room ID to upload to (e.g. !roomid:matrix.org) - """ - config = kwargs.get('config', {}) - if not config: - raise ValueError("Config required for Matrix upload") - - matrix_conf = config.get('storage', {}).get('matrix', {}) - if not matrix_conf: - raise ValueError("Matrix storage not configured in config.json") - - homeserver = matrix_conf.get('homeserver') - # user_id = matrix_conf.get('user_id') # Not strictly needed if we have token - access_token = matrix_conf.get('access_token') - room_id = matrix_conf.get('room_id') - - if not homeserver: - raise ValueError("Matrix homeserver required") - - # Ensure homeserver has protocol - if not homeserver.startswith('http'): - homeserver = f"https://{homeserver}" - - # Login if no access token (optional implementation, for now assume token) - if not access_token: - raise ValueError("Matrix access_token required (login not yet implemented)") - - # Handle room selection if not provided - if not room_id: - log("No room_id configured. Fetching joined rooms...", file=sys.stderr) - rooms = self.list_rooms(config) - - if not rooms: - raise ValueError("No joined rooms found or failed to fetch rooms.") - - from result_table import ResultTable - table = ResultTable("Matrix Rooms") - for i, room in enumerate(rooms): - row = table.add_row() - row.add_column("#", str(i + 1)) - row.add_column("Name", room['name']) - row.add_column("ID", room['id']) - - print(table) - - # Simple interactive selection - try: - selection = input("Select room # to upload to: ") - idx = int(selection) - 1 - if 0 <= idx < len(rooms): - room_id = rooms[idx]['id'] - log(f"Selected room: {rooms[idx]['name']} ({room_id})", file=sys.stderr) - else: - raise ValueError("Invalid selection") - except Exception: - raise ValueError("Invalid room selection") - - if not room_id: - raise ValueError("Matrix room_id required") - - # 1. Upload Media - upload_url = f"{homeserver}/_matrix/media/r3/upload" - headers = { - "Authorization": f"Bearer {access_token}", - "Content-Type": "application/octet-stream" # Or guess mime type - } - - import mimetypes - mime_type, _ = mimetypes.guess_type(file_path) - if mime_type: - headers["Content-Type"] = mime_type - - filename = file_path.name - - try: - with open(file_path, 'rb') as f: - resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename}) - - if resp.status_code != 200: - raise Exception(f"Matrix upload failed: {resp.text}") - - content_uri = resp.json().get('content_uri') - if not content_uri: - raise Exception("No content_uri returned from Matrix upload") - - # 2. Send Message - send_url = f"{homeserver}/_matrix/client/r0/rooms/{room_id}/send/m.room.message" - - # Determine msgtype - msgtype = "m.file" - if mime_type: - if mime_type.startswith("image/"): msgtype = "m.image" - elif mime_type.startswith("video/"): msgtype = "m.video" - elif mime_type.startswith("audio/"): msgtype = "m.audio" - - payload = { - "msgtype": msgtype, - "body": filename, - "url": content_uri, - "info": { - "mimetype": mime_type, - "size": file_path.stat().st_size - } - } - - resp = requests.post(send_url, headers=headers, json=payload) - if resp.status_code != 200: - raise Exception(f"Matrix send message failed: {resp.text}") - - event_id = resp.json().get('event_id') - return f"matrix://{room_id}/{event_id}" - - except Exception as e: - log(f"❌ Matrix upload error: {e}", file=sys.stderr) - raise - - -class RemoteStorageBackend(StorageBackend): - """File storage backend for remote Android/network storage servers. - - Connects to a remote storage server (e.g., running on Android phone) - via REST API. All operations are proxied to the remote server. - """ - - def __init__(self, server_url: str, timeout: int = 30, api_key: str = None) -> None: - """Initialize remote storage backend. - - Args: - server_url: Base URL of remote storage server (e.g., http://192.168.1.100:5000) - timeout: Request timeout in seconds - api_key: Optional API key for authentication - """ - try: - import requests - except ImportError: - raise ImportError("requests library required for RemoteStorageBackend. Install with: pip install requests") - - self.server_url = server_url.rstrip('/') - self.timeout = timeout - self.api_key = api_key - self._session = requests.Session() - - # Add API key to default headers if provided - if self.api_key: - self._session.headers.update({'X-API-Key': self.api_key}) - - def get_name(self) -> str: - return "remote" - - def _request(self, method: str, endpoint: str, **kwargs) -> Dict[str, Any]: - """Make HTTP request to remote server.""" - import requests - from urllib.parse import urljoin - - url = urljoin(self.server_url, endpoint) - - try: - response = self._session.request( - method, - url, - timeout=self.timeout, - **kwargs - ) - - if response.status_code == 404: - raise Exception(f"Remote resource not found: {endpoint}") - - if response.status_code >= 400: - try: - error_data = response.json() - error_msg = error_data.get('error', response.text) - except: - error_msg = response.text - raise Exception(f"Remote server error {response.status_code}: {error_msg}") - - return response.json() - - except requests.exceptions.RequestException as e: - raise Exception(f"Connection to {self.server_url} failed: {e}") - - def upload(self, file_path: Path, **kwargs: Any) -> str: - """Upload file to remote storage. - - Args: - file_path: Path to the file to upload - tags: Optional list of tags to add - urls: Optional list of known URLs - - Returns: - Remote file hash - """ - from helper.utils import sha256_file - - if not file_path.exists(): - raise ValueError(f"File not found: {file_path}") - - try: - # Index the file on remote server - data = {"path": str(file_path)} - - tags = kwargs.get("tags", []) - if tags: - data["tags"] = tags - - urls = kwargs.get("urls", []) - if urls: - data["urls"] = urls - - result = self._request('POST', '/files/index', json=data) - file_hash = result.get('hash') - - if file_hash: - log(f"✓ File indexed on remote storage: {file_hash}", file=sys.stderr) - return file_hash - else: - raise Exception("Remote server did not return file hash") - - except Exception as exc: - debug(f"Remote upload failed: {exc}", file=sys.stderr) - raise - - def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: - """Search files on remote storage. - - Args: - query: Search query - limit: Maximum results - - Returns: - List of search results - """ - limit = kwargs.get("limit") - try: - limit = int(limit) if limit is not None else 100 - except (TypeError, ValueError): - limit = 100 - - if limit <= 0: - limit = 100 - - try: - response = self._request('GET', '/files/search', params={ - 'q': query, - 'limit': limit - }) - - files = response.get('files', []) - - # Transform remote format to standard result format - results = [] - for f in files: - results.append({ - "name": f.get('name', '').split('/')[-1], # Get filename from path - "title": f.get('name', f.get('path', '')).split('/')[-1], - "ext": f.get('ext', ''), - "path": f.get('path', ''), - "target": f.get('path', ''), - "hash": f.get('hash', ''), - "origin": "remote", - "size": f.get('size', 0), - "size_bytes": f.get('size', 0), - "tags": f.get('tags', []), - }) - - debug(f"Remote search found {len(results)} results", file=sys.stderr) - return results - - except Exception as exc: - log(f"❌ Remote search failed: {exc}", file=sys.stderr) - raise - - -class FileStorage: - """Unified file storage interface supporting multiple backend services. - - Example: - storage = FileStorage(config) - - # Upload to different backends (uses configured locations) - url = storage["0x0"].upload(Path("file.mp3")) - local_path = storage["local"].upload(Path("file.mp3")) # Uses config["Local"]["path"] - hydrus_hash = storage["hydrus"].upload(Path("file.mp3"), tags=["music"]) - - # Search with searchable backends (uses configured locations) - results = storage["hydrus"].search("music") - results = storage["local"].search("song") # Uses config["Local"]["path"] - """ - - def __init__(self, config: Optional[Dict[str, Any]] = None) -> None: - """Initialize the file storage system with available backends. - - Args: - config: Configuration dict with backend settings (Local.path, HydrusNetwork, Debrid, etc.) - """ - config = config or {} - - # Extract backend-specific settings from config - from config import get_local_storage_path - - local_path = get_local_storage_path(config) - local_path_str = str(local_path) if local_path else None - - self._backends: Dict[str, StorageBackend] = {} - - # Always include local backend (even if no default path configured) - # The location can be specified at upload time if not configured globally - self._backends["local"] = LocalStorageBackend(location=local_path_str) - - # Include Hydrus backend (configuration optional) - self._backends["hydrus"] = HydrusStorageBackend(config=config) - - # Include Matrix backend - self._backends["matrix"] = MatrixStorageBackend() - - # Include remote storage backends from config (for Android/network servers) - remote_storages = config.get("remote_storages", []) - if isinstance(remote_storages, list): - for remote_config in remote_storages: - if isinstance(remote_config, dict): - name = remote_config.get("name", "remote") - url = remote_config.get("url") - timeout = remote_config.get("timeout", 30) - api_key = remote_config.get("api_key") - - if url: - try: - backend = RemoteStorageBackend(url, timeout=timeout, api_key=api_key) - self._backends[name] = backend - auth_status = " (with auth)" if api_key else " (no auth)" - log(f"Registered remote storage backend: {name} -> {url}{auth_status}", file=sys.stderr) - except Exception as e: - log(f"Failed to register remote storage '{name}': {e}", file=sys.stderr) - - def list_backends(self) -> list[str]: - """Return available backend keys for autocomplete and validation.""" - return sorted(self._backends.keys()) - - def __getitem__(self, backend_name: str) -> StorageBackend: - """Get a storage backend by name. - - Args: - backend_name: Name of the backend ('0x0', 'local', 'hydrus') - - Returns: - StorageBackend instance - - Raises: - KeyError: If backend not found - """ - if backend_name not in self._backends: - raise KeyError( - f"Unknown storage backend: {backend_name}. " - f"Available: {list(self._backends.keys())}" - ) - return self._backends[backend_name] - - def register(self, backend: StorageBackend) -> None: - """Register a custom storage backend. - - Args: - backend: StorageBackend instance to register - """ - name = backend.get_name() - self._backends[name] = backend - log(f"Registered storage backend: {name}", file=sys.stderr) - - def list_backends(self) -> list[str]: - """Get list of available backend names. - - Returns: - List of backend names - """ - return list(self._backends.keys()) - - def is_available(self, backend_name: str) -> bool: - """Check if a backend is available. - - Args: - backend_name: Name of the backend - - Returns: - True if backend is registered - """ - return backend_name in self._backends - - def list_searchable_backends(self) -> list[str]: - """Get list of backends that support searching. - - Returns: - List of searchable backend names - """ - return [ - name for name, backend in self._backends.items() - if backend.supports_search() - ] - - def supports_search(self, backend_name: str) -> bool: - """Check if a backend supports searching. - - Args: - backend_name: Name of the backend - - Returns: - True if backend supports search(), False otherwise - """ - if backend_name not in self._backends: - return False - return self._backends[backend_name].supports_search() diff --git a/helper/local_library.py b/helper/folder_store.py similarity index 66% rename from helper/local_library.py rename to helper/folder_store.py index 68def1c..787eddc 100644 --- a/helper/local_library.py +++ b/helper/folder_store.py @@ -71,7 +71,7 @@ def read_sidecar(sidecar_path: Path) -> Tuple[Optional[str], List[str], List[str sidecar_path: Path to .tags sidecar file Returns: - Tuple of (hash_value, tags_list, urls_list) + Tuple of (hash_value, tags_list, url_list) Returns (None, [], []) if file doesn't exist or can't be read """ if _read_sidecar_metadata is None: @@ -83,7 +83,7 @@ def read_sidecar(sidecar_path: Path) -> Tuple[Optional[str], List[str], List[str return None, [], [] -def write_sidecar(media_path: Path, tags: List[str], known_urls: List[str], +def write_sidecar(media_path: Path, tags: List[str], url: List[str], hash_value: Optional[str] = None) -> bool: """Write metadata to a sidecar file. @@ -92,7 +92,7 @@ def write_sidecar(media_path: Path, tags: List[str], known_urls: List[str], Args: media_path: Path to the media file (sidecar created as media_path.tags) tags: List of tag strings - known_urls: List of known URL strings + url: List of known URL strings hash_value: Optional SHA256 hash to include Returns: @@ -105,7 +105,7 @@ def write_sidecar(media_path: Path, tags: List[str], known_urls: List[str], return False try: - write_tags(media_path, tags, known_urls, hash_value) + write_tags(media_path, tags, url, hash_value) return True except Exception: return False @@ -143,11 +143,11 @@ def has_sidecar(media_path: Path) -> bool: """Check if a media file has a sidecar.""" return find_sidecar(media_path) is not None -class LocalLibraryDB: +class FolderDB: """SQLite database for caching local library metadata.""" - DB_NAME = ".downlow_library.db" - SCHEMA_VERSION = 2 + DB_NAME = "medios-macina.db" + SCHEMA_VERSION = 4 def __init__(self, library_root: Path): """Initialize the database at the library root. @@ -193,10 +193,8 @@ class LocalLibraryDB: cursor.execute(""" CREATE TABLE IF NOT EXISTS files ( - id INTEGER PRIMARY KEY AUTOINCREMENT, + hash TEXT PRIMARY KEY NOT NULL, file_path TEXT UNIQUE NOT NULL, - file_hash TEXT, - file_size INTEGER, file_modified REAL, indexed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP @@ -205,44 +203,39 @@ class LocalLibraryDB: cursor.execute(""" CREATE TABLE IF NOT EXISTS metadata ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - file_id INTEGER UNIQUE NOT NULL, - hash TEXT, - known_urls TEXT, + hash TEXT PRIMARY KEY NOT NULL, + url TEXT, relationships TEXT, duration REAL, size INTEGER, ext TEXT, - media_type TEXT, - media_kind TEXT, + type TEXT, time_imported TIMESTAMP DEFAULT CURRENT_TIMESTAMP, time_modified TIMESTAMP DEFAULT CURRENT_TIMESTAMP, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE + FOREIGN KEY (hash) REFERENCES files(hash) ON DELETE CASCADE ) """) cursor.execute(""" CREATE TABLE IF NOT EXISTS tags ( id INTEGER PRIMARY KEY AUTOINCREMENT, - file_id INTEGER NOT NULL, + hash TEXT NOT NULL, tag TEXT NOT NULL, - tag_type TEXT DEFAULT 'user', created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE, - UNIQUE(file_id, tag) + FOREIGN KEY (hash) REFERENCES files(hash) ON DELETE CASCADE, + UNIQUE(hash, tag) ) """) cursor.execute(""" CREATE TABLE IF NOT EXISTS notes ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - file_id INTEGER UNIQUE NOT NULL, + hash TEXT PRIMARY KEY NOT NULL, note TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE + FOREIGN KEY (hash) REFERENCES files(hash) ON DELETE CASCADE ) """) @@ -261,10 +254,8 @@ class LocalLibraryDB: # Create indices for performance cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_path ON files(file_path)") - cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_hash ON files(file_hash)") - cursor.execute("CREATE INDEX IF NOT EXISTS idx_tags_file_id ON tags(file_id)") + cursor.execute("CREATE INDEX IF NOT EXISTS idx_tags_hash ON tags(hash)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_tags_tag ON tags(tag)") - cursor.execute("CREATE INDEX IF NOT EXISTS idx_metadata_file_id ON metadata(file_id)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_worker_id ON worker(worker_id)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_worker_status ON worker(status)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_worker_type ON worker(worker_type)") @@ -409,12 +400,28 @@ class LocalLibraryDB: logger.error(f"Error clearing worker log for {worker_id}: {exc}", exc_info=True) def _migrate_metadata_schema(self, cursor) -> None: - """Add missing columns to metadata table if they don't exist.""" + """Import legacy metadata from old schema if present. Existing hash-based schema is ready to use.""" try: + # Check if this is a fresh new database (hash-based schema) cursor.execute('PRAGMA table_info(metadata)') existing_columns = {row[1] for row in cursor.fetchall()} + # If hash column exists, we're already on the new schema + if 'hash' in existing_columns: + logger.info("Database is already using hash-based schema - no migration needed") + return + + # Legacy migration: If old schema exists, try to import data + # Old schema would have had: id (INTEGER PRIMARY KEY), file_hash (TEXT), etc. + if 'id' in existing_columns and 'file_hash' in existing_columns: + logger.info("Detected legacy metadata schema - importing to new hash-based schema") + # This would be complex legacy migration - for now just note it + logger.info("Legacy metadata table detected but import not yet implemented") + return + + # Add any missing columns to the new schema for col_name, col_def in [('size', 'INTEGER'), ('ext', 'TEXT'), + ('type', 'TEXT'), ('time_imported', 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP'), ('time_modified', 'TIMESTAMP DEFAULT CURRENT_TIMESTAMP')]: if col_name not in existing_columns: @@ -422,45 +429,74 @@ class LocalLibraryDB: cursor.execute(f"ALTER TABLE metadata ADD COLUMN {col_name} {col_def}") logger.info(f"Added '{col_name}' column to metadata table") except Exception as e: - logger.warning(f"Could not add '{col_name}' column: {e}") + logger.debug(f"Column '{col_name}' may already exist: {e}") + + # Populate type column from ext if not already populated + if 'type' in existing_columns and 'ext' in existing_columns: + try: + from helper.utils_constant import get_type_from_ext + cursor.execute("SELECT hash, ext FROM metadata WHERE type IS NULL OR type = ''") + rows = cursor.fetchall() + for file_hash, ext in rows: + file_type = get_type_from_ext(ext or '') + cursor.execute("UPDATE metadata SET type = ? WHERE hash = ?", (file_type, file_hash)) + if rows: + logger.info(f"Populated type column for {len(rows)} metadata entries") + except Exception as e: + logger.debug(f"Could not populate type column: {e}") + + self.connection.commit() except Exception as e: - logger.error(f"Error during metadata schema migration: {e}") + logger.debug(f"Note: Schema import/migration completed with status: {e}") - def _update_metadata_modified_time(self, file_id: int) -> None: + def _update_metadata_modified_time(self, file_hash: str) -> None: """Update the time_modified timestamp for a file's metadata.""" try: cursor = self.connection.cursor() cursor.execute(""" - UPDATE metadata SET time_modified = CURRENT_TIMESTAMP WHERE file_id = ? - """, (file_id,)) + UPDATE metadata SET time_modified = CURRENT_TIMESTAMP WHERE hash = ? + """, (file_hash,)) self.connection.commit() except Exception as e: - logger.debug(f"Could not update metadata modified time for file_id {file_id}: {e}") + logger.debug(f"Could not update metadata modified time for hash {file_hash}: {e}") - def get_or_create_file_entry(self, file_path: Path) -> int: - """Get or create a file entry in the database.""" + def get_or_create_file_entry(self, file_path: Path, file_hash: Optional[str] = None) -> str: + """Get or create a file entry in the database and return the hash. + + Args: + file_path: Path to the file + file_hash: Optional hash (will be computed if not provided) + + Returns: + The file hash (primary key) + """ try: str_path = str(file_path.resolve()) logger.debug(f"[get_or_create_file_entry] Looking up: {str_path}") + # If hash not provided, compute it + if not file_hash: + file_hash = sha256_file(file_path) + logger.debug(f"[get_or_create_file_entry] Computed hash: {file_hash}") + cursor = self.connection.cursor() - cursor.execute("SELECT id FROM files WHERE file_path = ?", (str_path,)) + # Check if file entry exists + cursor.execute("SELECT hash FROM files WHERE hash = ?", (file_hash,)) row = cursor.fetchone() if row: - logger.debug(f"[get_or_create_file_entry] Found existing file_id: {row[0]}") - return row[0] + logger.debug(f"[get_or_create_file_entry] Found existing file hash: {file_hash}") + return file_hash logger.debug(f"[get_or_create_file_entry] File entry not found, creating new one") stat = file_path.stat() cursor.execute(""" - INSERT INTO files (file_path, file_size, file_modified) + INSERT INTO files (hash, file_path, file_modified) VALUES (?, ?, ?) - """, (str_path, stat.st_size, stat.st_mtime)) + """, (file_hash, str_path, stat.st_mtime)) - file_id = cursor.lastrowid - logger.debug(f"[get_or_create_file_entry] Created new file_id: {file_id}") + logger.debug(f"[get_or_create_file_entry] Created new file entry for hash: {file_hash}") # Auto-create title tag filename_without_ext = file_path.stem @@ -469,41 +505,39 @@ class LocalLibraryDB: title_value = filename_without_ext.replace("_", " ").strip() title_tag = f"title:{title_value}" cursor.execute(""" - INSERT OR IGNORE INTO tags (file_id, tag, tag_type) - VALUES (?, ?, 'user') - """, (file_id, title_tag)) - logger.debug(f"[get_or_create_file_entry] Auto-created title tag for file_id {file_id}") + INSERT OR IGNORE INTO tags (hash, tag) + VALUES (?, ?) + """, (file_hash, title_tag)) + logger.debug(f"[get_or_create_file_entry] Auto-created title tag for hash {file_hash}") self.connection.commit() - logger.debug(f"[get_or_create_file_entry] Committed file entry {file_id}") - return file_id + logger.debug(f"[get_or_create_file_entry] Committed file entry {file_hash}") + return file_hash except Exception as e: logger.error(f"[get_or_create_file_entry] ❌ Error getting/creating file entry for {file_path}: {e}", exc_info=True) raise - def get_file_id(self, file_path: Path) -> Optional[int]: - """Get the file ID for a file path, or None if not found.""" + def get_file_hash(self, file_path: Path) -> Optional[str]: + """Get the file hash for a file path, or None if not found.""" try: str_path = str(file_path.resolve()) cursor = self.connection.cursor() - cursor.execute("SELECT id FROM files WHERE file_path = ?", (str_path,)) + cursor.execute("SELECT hash FROM files WHERE file_path = ?", (str_path,)) row = cursor.fetchone() return row[0] if row else None except Exception as e: - logger.error(f"Error getting file ID for {file_path}: {e}", exc_info=True) + logger.error(f"Error getting file hash for {file_path}: {e}", exc_info=True) return None - def get_metadata(self, file_path: Path) -> Optional[Dict[str, Any]]: - """Get metadata for a file.""" + def get_metadata(self, file_hash: str) -> Optional[Dict[str, Any]]: + """Get metadata for a file by hash.""" try: - str_path = str(file_path.resolve()) cursor = self.connection.cursor() cursor.execute(""" SELECT m.* FROM metadata m - JOIN files f ON m.file_id = f.id - WHERE f.file_path = ? - """, (str_path,)) + WHERE m.hash = ? + """, (file_hash,)) row = cursor.fetchone() if not row: @@ -512,16 +546,16 @@ class LocalLibraryDB: metadata = dict(row) # Parse JSON fields - for field in ['known_urls', 'relationships']: + for field in ['url', 'relationships']: if metadata.get(field): try: metadata[field] = json.loads(metadata[field]) except (json.JSONDecodeError, TypeError): - metadata[field] = [] if field == 'known_urls' else [] + metadata[field] = [] if field == 'url' else [] return metadata except Exception as e: - logger.error(f"Error getting metadata for {file_path}: {e}", exc_info=True) + logger.error(f"Error getting metadata for hash {file_hash}: {e}", exc_info=True) return None def save_metadata(self, file_path: Path, metadata: Dict[str, Any]) -> None: @@ -530,49 +564,50 @@ class LocalLibraryDB: str_path = str(file_path.resolve()) logger.debug(f"[save_metadata] Starting save for: {str_path}") - file_id = self.get_or_create_file_entry(file_path) - logger.debug(f"[save_metadata] Got/created file_id: {file_id}") + file_hash = self.get_or_create_file_entry(file_path, metadata.get('hash')) + logger.debug(f"[save_metadata] Got/created file_hash: {file_hash}") cursor = self.connection.cursor() - # Update file hash in files table if present - if metadata.get('hash'): - cursor.execute("UPDATE files SET file_hash = ? WHERE id = ?", (metadata['hash'], file_id)) - - known_urls = metadata.get('known_urls', []) - if not isinstance(known_urls, str): - known_urls = json.dumps(known_urls) + url = metadata.get('url', []) + if not isinstance(url, str): + url = json.dumps(url) relationships = metadata.get('relationships', []) if not isinstance(relationships, str): relationships = json.dumps(relationships) + # Determine type from ext if not provided + file_type = metadata.get('type') + ext = metadata.get('ext') + if not file_type and ext: + from helper.utils_constant import get_type_from_ext + file_type = get_type_from_ext(str(ext)) + cursor.execute(""" INSERT INTO metadata ( - file_id, hash, known_urls, relationships, - duration, size, ext, media_type, media_kind, + hash, url, relationships, + duration, size, ext, type, time_imported, time_modified ) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) - ON CONFLICT(file_id) DO UPDATE SET - hash = excluded.hash, - known_urls = excluded.known_urls, + VALUES (?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + ON CONFLICT(hash) DO UPDATE SET + url = excluded.url, relationships = excluded.relationships, duration = excluded.duration, size = excluded.size, ext = excluded.ext, - media_type = excluded.media_type, - media_kind = excluded.media_kind, + type = excluded.type, time_modified = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP """, ( - file_id, metadata.get('hash'), known_urls, relationships, - metadata.get('duration'), metadata.get('size'), metadata.get('ext'), - metadata.get('media_type'), metadata.get('media_kind') + file_hash, url, relationships, + metadata.get('duration'), metadata.get('size'), ext, + file_type )) self.connection.commit() - logger.debug(f"[save_metadata] ✅ Committed metadata for file_id {file_id}") + logger.debug(f"[save_metadata] Committed metadata for hash {file_hash}") except Exception as e: logger.error(f"[save_metadata] ❌ Error saving metadata for {file_path}: {e}", exc_info=True) raise @@ -583,82 +618,81 @@ class LocalLibraryDB: str_path = str(file_path.resolve()) logger.debug(f"[save_file_info] Starting save for: {str_path}") - file_id = self.get_or_create_file_entry(file_path) + file_hash = self.get_or_create_file_entry(file_path, metadata.get('hash')) cursor = self.connection.cursor() - # Update file hash in files table if present - if metadata.get('hash'): - cursor.execute("UPDATE files SET file_hash = ? WHERE id = ?", (metadata['hash'], file_id)) - # 1. Save Metadata - known_urls = metadata.get('known_urls', []) - if not isinstance(known_urls, str): - known_urls = json.dumps(known_urls) + url = metadata.get('url', []) + if not isinstance(url, str): + url = json.dumps(url) relationships = metadata.get('relationships', []) if not isinstance(relationships, str): relationships = json.dumps(relationships) + # Determine type from ext if not provided + file_type = metadata.get('type') + ext = metadata.get('ext') + if not file_type and ext: + from helper.utils_constant import get_type_from_ext + file_type = get_type_from_ext(str(ext)) + cursor.execute(""" INSERT INTO metadata ( - file_id, hash, known_urls, relationships, - duration, size, ext, media_type, media_kind, + hash, url, relationships, + duration, size, ext, type, time_imported, time_modified ) - VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) - ON CONFLICT(file_id) DO UPDATE SET - hash = excluded.hash, - known_urls = excluded.known_urls, + VALUES (?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + ON CONFLICT(hash) DO UPDATE SET + url = excluded.url, relationships = excluded.relationships, duration = excluded.duration, size = excluded.size, ext = excluded.ext, - media_type = excluded.media_type, - media_kind = excluded.media_kind, + type = excluded.type, time_modified = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP """, ( - file_id, metadata.get('hash'), known_urls, relationships, - metadata.get('duration'), metadata.get('size'), metadata.get('ext'), - metadata.get('media_type'), metadata.get('media_kind') + file_hash, url, relationships, + metadata.get('duration'), metadata.get('size'), ext, + file_type )) # 2. Save Tags # We assume tags list is complete and includes title if needed - cursor.execute("DELETE FROM tags WHERE file_id = ?", (file_id,)) + cursor.execute("DELETE FROM tags WHERE hash = ?", (file_hash,)) for tag in tags: tag = tag.strip() if tag: cursor.execute(""" - INSERT OR IGNORE INTO tags (file_id, tag, tag_type) - VALUES (?, ?, 'user') - """, (file_id, tag)) + INSERT OR IGNORE INTO tags (hash, tag) + VALUES (?, ?) + """, (file_hash, tag)) self.connection.commit() - logger.debug(f"[save_file_info] ✅ Committed metadata and tags for file_id {file_id}") + logger.debug(f"[save_file_info] Committed metadata and tags for hash {file_hash}") except Exception as e: logger.error(f"[save_file_info] ❌ Error saving file info for {file_path}: {e}", exc_info=True) raise - def get_tags(self, file_path: Path) -> List[str]: - """Get all tags for a file.""" + def get_tags(self, file_hash: str) -> List[str]: + """Get all tags for a file by hash.""" try: - str_path = str(file_path.resolve()) cursor = self.connection.cursor() cursor.execute(""" SELECT t.tag FROM tags t - JOIN files f ON t.file_id = f.id - WHERE f.file_path = ? + WHERE t.hash = ? ORDER BY t.tag - """, (str_path,)) + """, (file_hash,)) return [row[0] for row in cursor.fetchall()] except Exception as e: - logger.error(f"Error getting tags for {file_path}: {e}", exc_info=True) + logger.error(f"Error getting tags for hash {file_hash}: {e}", exc_info=True) return [] def save_tags(self, file_path: Path, tags: List[str]) -> None: @@ -667,26 +701,26 @@ class LocalLibraryDB: str_path = str(file_path.resolve()) logger.debug(f"[save_tags] Starting save for: {str_path}") - file_id = self.get_or_create_file_entry(file_path) - logger.debug(f"[save_tags] Got/created file_id: {file_id}") + file_hash = self.get_or_create_file_entry(file_path) + logger.debug(f"[save_tags] Got/created file_hash: {file_hash}") cursor = self.connection.cursor() cursor.execute(""" - SELECT tag FROM tags WHERE file_id = ? AND tag LIKE 'title:%' - """, (file_id,)) + SELECT tag FROM tags WHERE hash = ? AND tag LIKE 'title:%' + """, (file_hash,)) existing_title = cursor.fetchone() - cursor.execute("DELETE FROM tags WHERE file_id = ?", (file_id,)) - logger.debug(f"[save_tags] Deleted existing tags for file_id {file_id}") + cursor.execute("DELETE FROM tags WHERE hash = ?", (file_hash,)) + logger.debug(f"[save_tags] Deleted existing tags for hash {file_hash}") # Check if new tags provide a title new_title_provided = any(str(t).strip().lower().startswith("title:") for t in tags) if existing_title and not new_title_provided: cursor.execute(""" - INSERT INTO tags (file_id, tag, tag_type) VALUES (?, ?, 'user') - """, (file_id, existing_title[0])) + INSERT INTO tags (hash, tag) VALUES (?, ?) + """, (file_hash, existing_title[0])) logger.debug(f"[save_tags] Preserved existing title tag") elif not existing_title and not new_title_provided: filename_without_ext = file_path.stem @@ -695,27 +729,27 @@ class LocalLibraryDB: title_value = filename_without_ext.replace("_", " ").strip() title_tag = f"title:{title_value}" cursor.execute(""" - INSERT INTO tags (file_id, tag, tag_type) VALUES (?, ?, 'user') - """, (file_id, title_tag)) + INSERT INTO tags (hash, tag) VALUES (?, ?) + """, (file_hash, title_tag)) logger.debug(f"[save_tags] Created auto-title tag: {title_tag}") for tag in tags: tag = tag.strip() if tag: cursor.execute(""" - INSERT OR IGNORE INTO tags (file_id, tag, tag_type) - VALUES (?, ?, 'user') - """, (file_id, tag)) + INSERT OR IGNORE INTO tags (hash, tag) + VALUES (?, ?) + """, (file_hash, tag)) self.connection.commit() - logger.debug(f"[save_tags] ✅ Committed {len(tags)} tags for file_id {file_id}") + logger.debug(f"[save_tags] Committed {len(tags)} tags for hash {file_hash}") # Verify they were actually saved - cursor.execute("SELECT COUNT(*) FROM tags WHERE file_id = ?", (file_id,)) + cursor.execute("SELECT COUNT(*) FROM tags WHERE hash = ?", (file_hash,)) saved_count = cursor.fetchone()[0] - logger.debug(f"[save_tags] Verified: {saved_count} tags in database for file_id {file_id}") + logger.debug(f"[save_tags] Verified: {saved_count} tags in database for hash {file_hash}") - self._update_metadata_modified_time(file_id) + self._update_metadata_modified_time(file_hash) except Exception as e: logger.error(f"[save_tags] ❌ Error saving tags for {file_path}: {e}", exc_info=True) raise @@ -723,7 +757,7 @@ class LocalLibraryDB: def add_tags(self, file_path: Path, tags: List[str]) -> None: """Add tags to a file.""" try: - file_id = self.get_or_create_file_entry(file_path) + file_hash = self.get_or_create_file_entry(file_path) cursor = self.connection.cursor() user_title_tag = next((tag.strip() for tag in tags @@ -731,12 +765,12 @@ class LocalLibraryDB: if user_title_tag: cursor.execute(""" - DELETE FROM tags WHERE file_id = ? AND tag LIKE 'title:%' - """, (file_id,)) + DELETE FROM tags WHERE hash = ? AND tag LIKE 'title:%' + """, (file_hash,)) else: cursor.execute(""" - SELECT COUNT(*) FROM tags WHERE file_id = ? AND tag LIKE 'title:%' - """, (file_id,)) + SELECT COUNT(*) FROM tags WHERE hash = ? AND tag LIKE 'title:%' + """, (file_hash,)) has_title = cursor.fetchone()[0] > 0 if not has_title: @@ -746,20 +780,20 @@ class LocalLibraryDB: title_value = filename_without_ext.replace("_", " ").strip() title_tag = f"title:{title_value}" cursor.execute(""" - INSERT OR IGNORE INTO tags (file_id, tag, tag_type) - VALUES (?, ?, 'user') - """, (file_id, title_tag)) + INSERT OR IGNORE INTO tags (hash, tag) + VALUES (?, ?) + """, (file_hash, title_tag)) for tag in tags: tag = tag.strip() if tag: cursor.execute(""" - INSERT OR IGNORE INTO tags (file_id, tag, tag_type) - VALUES (?, ?, 'user') - """, (file_id, tag)) + INSERT OR IGNORE INTO tags (hash, tag) + VALUES (?, ?) + """, (file_hash, tag)) self.connection.commit() - self._update_metadata_modified_time(file_id) + self._update_metadata_modified_time(file_hash) logger.debug(f"Added {len(tags)} tags for {file_path}") except Exception as e: logger.error(f"Error adding tags for {file_path}: {e}", exc_info=True) @@ -768,7 +802,7 @@ class LocalLibraryDB: def remove_tags(self, file_path: Path, tags: List[str]) -> None: """Remove specific tags from a file.""" try: - str_path = str(file_path.resolve()) + file_hash = self.get_or_create_file_entry(file_path) cursor = self.connection.cursor() for tag in tags: @@ -776,15 +810,91 @@ class LocalLibraryDB: if tag: cursor.execute(""" DELETE FROM tags - WHERE file_id = (SELECT id FROM files WHERE file_path = ?) + WHERE hash = ? AND tag = ? - """, (str_path, tag)) + """, (file_hash, tag)) self.connection.commit() logger.debug(f"Removed {len(tags)} tags for {file_path}") except Exception as e: logger.error(f"Error removing tags for {file_path}: {e}", exc_info=True) raise + + def add_tags_to_hash(self, file_hash: str, tags: List[str]) -> None: + """Add tags to a file by hash.""" + try: + cursor = self.connection.cursor() + + user_title_tag = next((tag.strip() for tag in tags + if tag.strip().lower().startswith('title:')), None) + + if user_title_tag: + cursor.execute(""" + DELETE FROM tags WHERE hash = ? AND tag LIKE 'title:%' + """, (file_hash,)) + + for tag in tags: + tag = tag.strip() + if tag: + cursor.execute(""" + INSERT OR IGNORE INTO tags (hash, tag) + VALUES (?, ?) + """, (file_hash, tag)) + + self.connection.commit() + self._update_metadata_modified_time(file_hash) + logger.debug(f"Added {len(tags)} tags for hash {file_hash}") + except Exception as e: + logger.error(f"Error adding tags for hash {file_hash}: {e}", exc_info=True) + raise + + def remove_tags_from_hash(self, file_hash: str, tags: List[str]) -> None: + """Remove specific tags from a file by hash.""" + try: + cursor = self.connection.cursor() + + for tag in tags: + tag = tag.strip() + if tag: + cursor.execute(""" + DELETE FROM tags + WHERE hash = ? + AND tag = ? + """, (file_hash, tag)) + + self.connection.commit() + logger.debug(f"Removed {len(tags)} tags for hash {file_hash}") + except Exception as e: + logger.error(f"Error removing tags for hash {file_hash}: {e}", exc_info=True) + raise + + def update_metadata_by_hash(self, file_hash: str, metadata_updates: Dict[str, Any]) -> None: + """Update metadata for a file by hash.""" + try: + cursor = self.connection.cursor() + + fields = [] + values = [] + + for key, value in metadata_updates.items(): + if key in ['url', 'relationships']: + if not isinstance(value, str): + value = json.dumps(value) + fields.append(f"{key} = ?") + values.append(value) + + if not fields: + return + + values.append(file_hash) + + sql = f"UPDATE metadata SET {', '.join(fields)}, time_modified = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP WHERE hash = ?" + cursor.execute(sql, values) + self.connection.commit() + logger.debug(f"Updated metadata for hash {file_hash}") + except Exception as e: + logger.error(f"Error updating metadata for hash {file_hash}: {e}", exc_info=True) + raise def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None: """Set a relationship between two local files. @@ -798,31 +908,15 @@ class LocalLibraryDB: str_path = str(file_path.resolve()) str_related_path = str(related_file_path.resolve()) - file_id = self.get_or_create_file_entry(file_path) - related_file_id = self.get_or_create_file_entry(related_file_path) + file_hash = self.get_or_create_file_entry(file_path) + related_file_hash = self.get_or_create_file_entry(related_file_path) cursor = self.connection.cursor() - # Get hashes for both files - file_hash = sha256_file(file_path) - related_file_hash = sha256_file(related_file_path) - - if not file_hash or not related_file_hash: - logger.warning(f"Cannot set relationship: missing hash for {file_path} or {related_file_path}") - return - - # Store the hashes in the files table for future lookups + # Get current relationships for the main file cursor.execute(""" - UPDATE files SET file_hash = ? WHERE id = ? - """, (file_hash, file_id)) - cursor.execute(""" - UPDATE files SET file_hash = ? WHERE id = ? - """, (related_file_hash, related_file_id)) - - # Get current relationships - cursor.execute(""" - SELECT relationships FROM metadata WHERE file_id = ? - """, (file_id,)) + SELECT relationships FROM metadata WHERE hash = ? + """, (file_hash,)) row = cursor.fetchone() # Use index access to be safe regardless of row_factory @@ -850,12 +944,12 @@ class LocalLibraryDB: # Save the updated relationships for the main file cursor.execute(""" - INSERT INTO metadata (file_id, relationships) + INSERT INTO metadata (hash, relationships) VALUES (?, ?) - ON CONFLICT(file_id) DO UPDATE SET + ON CONFLICT(hash) DO UPDATE SET relationships = excluded.relationships, time_modified = CURRENT_TIMESTAMP - """, (file_id, json.dumps(relationships))) + """, (file_hash, json.dumps(relationships))) logger.debug(f"Set {rel_type} relationship: {str_path} ({file_hash}) -> {str_related_path} ({related_file_hash})") @@ -867,8 +961,8 @@ class LocalLibraryDB: # Update the related file cursor.execute(""" - SELECT relationships FROM metadata WHERE file_id = ? - """, (related_file_id,)) + SELECT relationships FROM metadata WHERE hash = ? + """, (related_file_hash,)) row = cursor.fetchone() relationships_str = row[0] if row else None @@ -892,12 +986,12 @@ class LocalLibraryDB: # Save the updated reverse relationships cursor.execute(""" - INSERT INTO metadata (file_id, relationships) + INSERT INTO metadata (hash, relationships) VALUES (?, ?) - ON CONFLICT(file_id) DO UPDATE SET + ON CONFLICT(hash) DO UPDATE SET relationships = excluded.relationships, time_modified = CURRENT_TIMESTAMP - """, (related_file_id, json.dumps(reverse_relationships))) + """, (related_file_hash, json.dumps(reverse_relationships))) self.connection.commit() @@ -928,7 +1022,7 @@ class LocalLibraryDB: cursor.execute(""" SELECT f.file_path, m.relationships FROM metadata m - JOIN files f ON m.file_id = f.id + JOIN files f ON m.hash = f.hash WHERE m.relationships LIKE ? """, (f"%{target_hash}%",)) @@ -957,37 +1051,35 @@ class LocalLibraryDB: logger.error(f"Error finding files pointing to {target_path}: {e}", exc_info=True) return [] - def get_note(self, file_path: Path) -> Optional[str]: - """Get note for a file.""" + def get_note(self, file_hash: str) -> Optional[str]: + """Get note for a file by hash.""" try: - str_path = str(file_path.resolve()) cursor = self.connection.cursor() cursor.execute(""" SELECT n.note FROM notes n - JOIN files f ON n.file_id = f.id - WHERE f.file_path = ? - """, (str_path,)) + WHERE n.hash = ? + """, (file_hash,)) row = cursor.fetchone() return row[0] if row else None except Exception as e: - logger.error(f"Error getting note for {file_path}: {e}", exc_info=True) + logger.error(f"Error getting note for hash {file_hash}: {e}", exc_info=True) return None def save_note(self, file_path: Path, note: str) -> None: """Save note for a file.""" try: - file_id = self.get_or_create_file_entry(file_path) + file_hash = self.get_or_create_file_entry(file_path) cursor = self.connection.cursor() cursor.execute(""" - INSERT INTO notes (file_id, note) + INSERT INTO notes (hash, note) VALUES (?, ?) - ON CONFLICT(file_id) DO UPDATE SET + ON CONFLICT(hash) DO UPDATE SET note = excluded.note, updated_at = CURRENT_TIMESTAMP - """, (file_id, note)) + """, (file_hash, note)) self.connection.commit() logger.debug(f"Saved note for {file_path}") @@ -995,30 +1087,30 @@ class LocalLibraryDB: logger.error(f"Error saving note for {file_path}: {e}", exc_info=True) raise - def search_by_tag(self, tag: str, limit: int = 100) -> List[Path]: - """Search for files with a specific tag.""" + def search_by_tag(self, tag: str, limit: int = 100) -> List[tuple]: + """Search for files with a specific tag. Returns list of (hash, file_path) tuples.""" try: cursor = self.connection.cursor() cursor.execute(""" - SELECT DISTINCT f.file_path FROM files f - JOIN tags t ON f.id = t.file_id + SELECT DISTINCT f.hash, f.file_path FROM files f + JOIN tags t ON f.hash = t.hash WHERE t.tag = ? LIMIT ? """, (tag, limit)) - return [Path(row[0]) for row in cursor.fetchall()] + return cursor.fetchall() except Exception as e: logger.error(f"Error searching by tag '{tag}': {e}", exc_info=True) return [] - def search_by_hash(self, file_hash: str) -> Optional[Path]: + def search_hash(self, file_hash: str) -> Optional[Path]: """Search for a file by hash.""" try: cursor = self.connection.cursor() cursor.execute(""" - SELECT file_path FROM files WHERE file_hash = ? + SELECT file_path FROM files WHERE hash = ? """, (file_hash,)) row = cursor.fetchone() @@ -1028,21 +1120,13 @@ class LocalLibraryDB: return None def update_file_hash(self, file_path: Path, file_hash: str) -> None: - """Update the file hash.""" - try: - str_path = str(file_path.resolve()) - cursor = self.connection.cursor() - - cursor.execute(""" - UPDATE files SET file_hash = ?, updated_at = CURRENT_TIMESTAMP - WHERE file_path = ? - """, (file_hash, str_path)) - - self.connection.commit() - logger.debug(f"Updated hash for {file_path}") - except Exception as e: - logger.error(f"Error updating file hash for {file_path}: {e}", exc_info=True) - raise + """Deprecated: Hash is managed as primary key. This method is no-op. + + In the new hash-based schema, the file hash is the primary key (immutable). + Use get_or_create_file_entry() to ensure the hash is properly registered. + """ + # This is now a no-op since hash is the immutable primary key + pass def rename_file(self, old_path: Path, new_path: Path) -> None: """Rename a file in the database, preserving all metadata.""" @@ -1066,12 +1150,12 @@ class LocalLibraryDB: """Remove entries for files that no longer exist.""" try: cursor = self.connection.cursor() - cursor.execute("SELECT id, file_path FROM files") + cursor.execute("SELECT hash, file_path FROM files") removed_count = 0 - for row_id, file_path in cursor.fetchall(): + for file_hash, file_path in cursor.fetchall(): if not Path(file_path).exists(): - cursor.execute("DELETE FROM files WHERE id = ?", (row_id,)) + cursor.execute("DELETE FROM files WHERE hash = ?", (file_hash,)) removed_count += 1 self.connection.commit() @@ -1081,6 +1165,31 @@ class LocalLibraryDB: logger.error(f"Error cleaning up missing files: {e}", exc_info=True) return 0 + def delete_file(self, file_path: Path) -> bool: + """Delete a file from the database by path. Cascades to metadata, tags, notes, etc.""" + try: + str_path = str(file_path.resolve()) + cursor = self.connection.cursor() + + # Get the hash first (for logging) + cursor.execute("SELECT hash FROM files WHERE file_path = ?", (str_path,)) + row = cursor.fetchone() + if not row: + logger.debug(f"File not found in database: {str_path}") + return False + + file_hash = row[0] + + # Delete the file entry (cascades to metadata, tags, notes, etc via foreign keys) + cursor.execute("DELETE FROM files WHERE file_path = ?", (str_path,)) + self.connection.commit() + + logger.debug(f"Deleted file from database: {str_path} (hash: {file_hash})") + return cursor.rowcount > 0 + except Exception as e: + logger.error(f"Error deleting file {file_path}: {e}", exc_info=True) + return False + # ======================================================================== # WORKER MANAGEMENT # ======================================================================== @@ -1417,6 +1526,207 @@ class LocalLibraryDB: self.close() +# ============================================================================ +# DATABASE QUERY API +# ============================================================================ + +class DatabaseAPI: + """Query API wrapper for LocalLibraryDB providing specialized search methods.""" + + def __init__(self, search_dir: Path): + self.search_dir = search_dir + self.db = FolderDB(search_dir) + + def __enter__(self): + self.db.__enter__() + return self + + def __exit__(self, *args): + return self.db.__exit__(*args) + + def get_cursor(self): + return self.db.connection.cursor() + + def get_file_hash_by_hash(self, file_hash: str) -> Optional[str]: + """Get file hash from the database, or None if not found.""" + cursor = self.get_cursor() + cursor.execute( + "SELECT hash FROM files WHERE LOWER(hash) = ?", + (file_hash.lower(),) + ) + row = cursor.fetchone() + return row[0] if row else None + + def get_all_file_hashes(self) -> Set[str]: + """Get all file hashes in the database.""" + cursor = self.get_cursor() + cursor.execute("SELECT hash FROM files") + return {row[0] for row in cursor.fetchall()} + + def get_file_hashes_by_tag_pattern(self, query_pattern: str) -> List[tuple]: + """Get (hash, tag) tuples matching a tag pattern.""" + cursor = self.get_cursor() + cursor.execute( + """ + SELECT DISTINCT f.hash, t.tag + FROM files f + JOIN tags t ON f.hash = t.hash + WHERE LOWER(t.tag) LIKE ? + """, + (query_pattern,) + ) + return cursor.fetchall() + + def get_file_hashes_by_path_pattern(self, like_pattern: str) -> Set[str]: + """Get hashes of files matching a path pattern.""" + cursor = self.get_cursor() + cursor.execute( + "SELECT DISTINCT hash FROM files WHERE LOWER(file_path) LIKE ?", + (like_pattern,) + ) + return {row[0] for row in cursor.fetchall()} + + def get_file_hashes_by_tag_substring(self, like_pattern: str) -> Set[str]: + """Get hashes of files matching a tag substring.""" + cursor = self.get_cursor() + cursor.execute( + """ + SELECT DISTINCT f.hash + FROM files f + JOIN tags t ON f.hash = t.hash + WHERE LOWER(t.tag) LIKE ? + """, + (like_pattern,) + ) + return {row[0] for row in cursor.fetchall()} + + def get_file_metadata(self, file_hashes: Set[str], limit: Optional[int] = None) -> List[tuple]: + """Get metadata for files given their hashes. Returns (hash, file_path, size, extension) tuples.""" + if not file_hashes: + return [] + cursor = self.get_cursor() + placeholders = ",".join(["?"] * len(file_hashes)) + fetch_sql = f""" + SELECT hash, file_path, + COALESCE((SELECT size FROM metadata WHERE hash = files.hash), 0) as size, + COALESCE((SELECT ext FROM metadata WHERE hash = files.hash), '') as ext + FROM files + WHERE hash IN ({placeholders}) + ORDER BY file_path + LIMIT ? + """ + cursor.execute(fetch_sql, (*file_hashes, limit or len(file_hashes))) + return cursor.fetchall() + + def get_all_files(self, limit: Optional[int] = None) -> List[tuple]: + """Get all files in database. Returns (hash, file_path, size, ext) tuples.""" + cursor = self.get_cursor() + cursor.execute( + """ + SELECT f.hash, f.file_path, + COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size, + COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext + FROM files f + ORDER BY file_path + LIMIT ? + """, + (limit or 1000,) + ) + return cursor.fetchall() + + def get_tags_for_file(self, file_hash: str) -> List[str]: + """Get all tags for a file given its hash.""" + cursor = self.get_cursor() + cursor.execute( + "SELECT tag FROM tags WHERE hash = ?", + (file_hash,) + ) + return [row[0] for row in cursor.fetchall()] + + def get_tags_by_namespace_and_file(self, file_hash: str, query_pattern: str) -> List[str]: + """Get tags for a file matching a pattern.""" + cursor = self.get_cursor() + cursor.execute( + """ + SELECT DISTINCT tag FROM tags + WHERE hash = ? + AND LOWER(tag) LIKE ? + """, + (file_hash, query_pattern) + ) + return [row[0] for row in cursor.fetchall()] + + def get_files_by_namespace_pattern(self, query_pattern: str, limit: Optional[int] = None) -> List[tuple]: + """Get files with tags matching a pattern. Returns (hash, file_path, size, ext) tuples.""" + cursor = self.get_cursor() + cursor.execute( + """ + SELECT DISTINCT f.hash, f.file_path, + COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size, + COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext + FROM files f + JOIN tags t ON f.hash = t.hash + WHERE LOWER(t.tag) LIKE ? + ORDER BY f.file_path + LIMIT ? + """, + (query_pattern, limit or 1000) + ) + return cursor.fetchall() + + def get_files_by_simple_tag_pattern(self, query_pattern: str, limit: Optional[int] = None) -> List[tuple]: + """Get files with non-namespaced tags matching a pattern. Returns (hash, file_path, size, ext) tuples.""" + cursor = self.get_cursor() + cursor.execute( + """ + SELECT DISTINCT f.hash, f.file_path, + COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size, + COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext + FROM files f + JOIN tags t ON f.hash = t.hash + WHERE LOWER(t.tag) LIKE ? AND LOWER(t.tag) NOT LIKE '%:%' + ORDER BY f.file_path + LIMIT ? + """, + (query_pattern, limit or 1000) + ) + return cursor.fetchall() + + def get_files_by_multiple_path_conditions(self, conditions: List[str], params: List[str], limit: Optional[int] = None) -> List[tuple]: + """Get files matching multiple path conditions. Returns (hash, file_path, size, ext) tuples.""" + cursor = self.get_cursor() + where_clause = " AND ".join(conditions) + sql = f""" + SELECT DISTINCT f.hash, f.file_path, + COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size, + COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext + FROM files f + WHERE {where_clause} + ORDER BY f.file_path + LIMIT ? + """ + cursor.execute(sql, (*params, limit or 10000)) + return cursor.fetchall() + + def get_files_by_title_tag_pattern(self, title_pattern: str, limit: Optional[int] = None) -> List[tuple]: + """Get files with title tags matching a pattern. Returns (hash, file_path, size, ext) tuples.""" + cursor = self.get_cursor() + cursor.execute( + """ + SELECT DISTINCT f.hash, f.file_path, + COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size, + COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext + FROM files f + JOIN tags t ON f.hash = t.hash + WHERE LOWER(t.tag) LIKE ? + ORDER BY f.file_path + LIMIT ? + """, + (title_pattern, limit or 10000) + ) + return cursor.fetchall() + + # ============================================================================ # LIBRARY INITIALIZATION & MIGRATION # ============================================================================ @@ -1427,7 +1737,7 @@ class LocalLibraryInitializer: def __init__(self, library_root: Path): """Initialize the database scanner.""" self.library_root = Path(library_root) - self.db = LocalLibraryDB(library_root) + self.db = FolderDB(library_root) self.stats = { 'files_scanned': 0, 'files_new': 0, 'files_existing': 0, 'sidecars_imported': 0, 'sidecars_deleted': 0, @@ -1475,16 +1785,16 @@ class LocalLibraryInitializer: return sorted(media_files) - def _get_database_files(self) -> Dict[str, int]: - """Get existing files from database by normalized path.""" + def _get_database_files(self) -> Dict[str, str]: + """Get existing files from database by normalized path, returns {normalized_path: hash}.""" try: cursor = self.db.connection.cursor() - cursor.execute("SELECT id, file_path FROM files") + cursor.execute("SELECT hash, file_path FROM files") result = {} - for file_id, file_path in cursor.fetchall(): + for file_hash, file_path in cursor.fetchall(): normalized = str(Path(file_path).resolve()).lower() - result[normalized] = file_id + result[normalized] = file_hash return result except Exception as e: @@ -1508,47 +1818,210 @@ class LocalLibraryInitializer: self.stats['errors'] += 1 def _import_sidecars_batch(self) -> None: - """Batch import all sidecar files.""" + """Batch import sidecars, hash files, and rename files to their hash.""" try: - for sidecar_path in self.library_root.rglob("*.tags"): + sidecar_map = self._collect_sidecars() + + for base_path, sidecars in sidecar_map.items(): try: - base_path = Path(str(sidecar_path)[:-len('.tags')]) if not base_path.exists(): continue - - hash_val, tags, urls = read_sidecar(sidecar_path) - - if hash_val or tags or urls: - if hash_val: - self.db.update_file_hash(base_path, hash_val) - if tags: - self.db.save_tags(base_path, tags) - if urls: - self.db.save_metadata(base_path, {'known_urls': urls}) - - self.stats['sidecars_imported'] += 1 + + tags = self._read_tag_sidecars(sidecars) + metadata_info = self._read_metadata_sidecar(sidecars) + note_text = self._read_notes_sidecar(sidecars) + + hashed_path, file_hash = self._ensure_hashed_filename(base_path, sidecars) + + # Always trust freshly computed hash + metadata_info['hash'] = file_hash + try: + stat_result = hashed_path.stat() + metadata_info.setdefault('size', stat_result.st_size) + metadata_info.setdefault('ext', hashed_path.suffix) + except OSError: + pass + + self.db.save_file_info(hashed_path, metadata_info, tags) + if note_text: + self.db.save_note(hashed_path, note_text) + + # Delete all sidecars after importing + self._delete_sidecars(sidecars) + + self.stats['sidecars_imported'] += 1 except Exception as e: - logger.warning(f"Error importing sidecar {sidecar_path}: {e}") + logger.warning(f"Error importing sidecar bundle for {base_path}: {e}") self.stats['errors'] += 1 except Exception as e: logger.error(f"Error batch importing sidecars: {e}", exc_info=True) + + def _collect_sidecars(self) -> Dict[Path, Dict[str, List[Path]]]: + """Collect sidecars grouped by their base media file.""" + sidecar_map: Dict[Path, Dict[str, List[Path]]] = {} + + patterns = [ + ("*.tag", "tags"), + ("*.tags", "tags"), + ("*.metadata", "metadata"), + ("*.notes", "notes"), + ] + + for pattern, key in patterns: + for sidecar in self.library_root.rglob(pattern): + try: + base = sidecar.with_suffix("") + except Exception: + continue + + if not base.exists(): + continue + + bucket = sidecar_map.setdefault(base, {"tags": [], "metadata": [], "notes": []}) + bucket[key].append(sidecar) + + return sidecar_map + + def _read_tag_sidecars(self, sidecars: Dict[str, List[Path]]) -> List[str]: + tags: List[str] = [] + for tag_path in sidecars.get("tags", []): + try: + content = tag_path.read_text(encoding="utf-8") + except OSError: + continue + + for raw_line in content.splitlines(): + line = raw_line.strip() + if line: + tags.append(line) + return tags + + def _read_metadata_sidecar(self, sidecars: Dict[str, List[Path]]) -> Dict[str, Any]: + metadata: Dict[str, Any] = {"url": [], "relationships": []} + + meta_path = sidecars.get("metadata", []) + if not meta_path: + return metadata + + for path in meta_path: + try: + content = path.read_text(encoding="utf-8") + except OSError: + continue + + for raw_line in content.splitlines(): + line = raw_line.strip() + if not line or line.startswith('#'): + continue + + lower = line.lower() + if lower.startswith("hash:"): + metadata["hash"] = line.split(":", 1)[1].strip() + elif lower.startswith("url:") or lower.startswith("url:"): + url_part = line.split(":", 1)[1].strip() + if url_part: + for url_segment in url_part.replace(",", " ").split(): + clean = url_segment.strip() + if clean and clean not in metadata["url"]: + metadata["url"].append(clean) + elif lower.startswith("relationship:"): + rel_value = line.split(":", 1)[1].strip() + if rel_value: + metadata["relationships"].append(rel_value) + + return metadata + + def _read_notes_sidecar(self, sidecars: Dict[str, List[Path]]) -> Optional[str]: + note_paths = sidecars.get("notes", []) + for note_path in note_paths: + try: + content = note_path.read_text(encoding="utf-8").strip() + except OSError: + continue + if content: + return content + return None + + def _ensure_hashed_filename(self, file_path: Path, sidecars: Dict[str, List[Path]]) -> Tuple[Path, str]: + """Compute hash, rename file to hash-based name, and move sidecars accordingly.""" + file_hash = sha256_file(file_path) + target_name = f"{file_hash}{file_path.suffix}" + target_path = file_path.with_name(target_name) + + # Nothing to do if already hashed + if target_path == file_path: + return file_path, file_hash + + try: + if target_path.exists(): + logger.warning(f"Hash target already exists, keeping original: {target_path}") + return file_path, file_hash + + file_path.rename(target_path) + self._rename_sidecars(file_path, target_path, sidecars) + try: + self.db.rename_file(file_path, target_path) + except Exception: + # Entry might not exist yet; it will be created during save. + pass + return target_path, file_hash + except Exception as e: + logger.warning(f"Failed to rename {file_path} to hash {target_path}: {e}") + return file_path, file_hash + + def _rename_sidecars(self, old_base: Path, new_base: Path, sidecars: Dict[str, List[Path]]) -> None: + """Rename sidecars to follow the new hashed filename.""" + mappings = [ + (sidecars.get("tags", []), ".tag"), + (sidecars.get("metadata", []), ".metadata"), + (sidecars.get("notes", []), ".notes"), + ] + + for candidates, suffix in mappings: + for source in candidates: + try: + dest = new_base.with_name(new_base.name + suffix) + except Exception: + continue + + if dest == source: + continue + + try: + source.rename(dest) + except Exception as e: + logger.warning(f"Failed to rename sidecar {source} -> {dest}: {e}") + + def _delete_sidecars(self, sidecars: Dict[str, List[Path]]) -> None: + """Delete sidecar files after they've been imported.""" + for sidecar_list in sidecars.values(): + for sidecar_path in sidecar_list: + try: + if sidecar_path.exists(): + sidecar_path.unlink() + self.stats['sidecars_deleted'] += 1 + except Exception as e: + logger.warning(f"Could not delete sidecar {sidecar_path}: {e}") def _cleanup_orphaned_sidecars(self) -> None: """Remove sidecars for non-existent files.""" try: - for sidecar_path in self.library_root.rglob("*.tags"): - base_path = Path(str(sidecar_path)[:-len('.tags')]) - if not base_path.exists(): - try: - sidecar_path.unlink() - self.stats['sidecars_deleted'] += 1 - except Exception as e: - logger.warning(f"Could not delete orphaned sidecar {sidecar_path}: {e}") + patterns = ["*.tag", "*.tags", "*.metadata", "*.notes"] + + for pattern in patterns: + for sidecar_path in self.library_root.rglob(pattern): + base_path = sidecar_path.with_suffix("") + if not base_path.exists(): + try: + sidecar_path.unlink() + self.stats['sidecars_deleted'] += 1 + except Exception as e: + logger.warning(f"Could not delete orphaned sidecar {sidecar_path}: {e}") except Exception as e: logger.error(f"Error cleaning up orphaned sidecars: {e}", exc_info=True) -def migrate_tags_to_db(library_root: Path, db: LocalLibraryDB) -> int: +def migrate_tags_to_db(library_root: Path, db: FolderDB) -> int: """Migrate .tags files to the database.""" migrated_count = 0 @@ -1577,7 +2050,7 @@ def migrate_tags_to_db(library_root: Path, db: LocalLibraryDB) -> int: return migrated_count -def migrate_metadata_to_db(library_root: Path, db: LocalLibraryDB) -> int: +def migrate_metadata_to_db(library_root: Path, db: FolderDB) -> int: """Migrate .metadata files to the database.""" migrated_count = 0 @@ -1615,13 +2088,13 @@ def _parse_metadata_file(content: str) -> Dict[str, Any]: return {} -def migrate_all(library_root: Path, db: Optional[LocalLibraryDB] = None) -> Dict[str, int]: +def migrate_all(library_root: Path, db: Optional[FolderDB] = None) -> Dict[str, int]: """Migrate all sidecar files to database.""" should_close = db is None try: if db is None: - db = LocalLibraryDB(library_root) + db = FolderDB(library_root) return { 'tags': migrate_tags_to_db(library_root, db), @@ -1642,11 +2115,11 @@ class LocalLibrarySearchOptimizer: def __init__(self, library_root: Path): """Initialize the search optimizer.""" self.library_root = Path(library_root) - self.db: Optional[LocalLibraryDB] = None + self.db: Optional[FolderDB] = None def __enter__(self): """Context manager entry.""" - self.db = LocalLibraryDB(self.library_root) + self.db = FolderDB(self.library_root) return self def __exit__(self, exc_type, exc_val, exc_tb): @@ -1819,7 +2292,7 @@ class LocalLibrarySearchOptimizer: """Fast hash-based search using database.""" if not self.db: return None - return self.db.search_by_hash(file_hash) + return self.db.search_hash(file_hash) def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None: """Set a relationship between two files in the database. diff --git a/helper/hydrus.py b/helper/hydrus.py index 9bdd435..016d6ad 100644 --- a/helper/hydrus.py +++ b/helper/hydrus.py @@ -73,7 +73,7 @@ class HydrusRequestSpec: class HydrusClient: """Thin wrapper around the Hydrus Client API.""" - base_url: str + url: str access_key: str = "" timeout: float = 60.0 @@ -84,10 +84,10 @@ class HydrusClient: _session_key: str = field(init=False, default="", repr=False) # Cached session key def __post_init__(self) -> None: - if not self.base_url: + if not self.url: raise ValueError("Hydrus base URL is required") - self.base_url = self.base_url.rstrip("/") - parsed = urlsplit(self.base_url) + self.url = self.url.rstrip("/") + parsed = urlsplit(self.url) if parsed.scheme not in {"http", "https"}: raise ValueError("Hydrus base URL must use http or https") self.scheme = parsed.scheme @@ -374,24 +374,24 @@ class HydrusClient: hashes = self._ensure_hashes(file_hashes) if len(hashes) == 1: body = {"hash": hashes[0], "url_to_add": url} - return self._post("/add_urls/associate_url", data=body) + return self._post("/add_url/associate_url", data=body) results: dict[str, Any] = {} for file_hash in hashes: body = {"hash": file_hash, "url_to_add": url} - results[file_hash] = self._post("/add_urls/associate_url", data=body) + results[file_hash] = self._post("/add_url/associate_url", data=body) return {"batched": results} def delete_url(self, file_hashes: Union[str, Iterable[str]], url: str) -> dict[str, Any]: hashes = self._ensure_hashes(file_hashes) if len(hashes) == 1: body = {"hash": hashes[0], "url_to_delete": url} - return self._post("/add_urls/associate_url", data=body) + return self._post("/add_url/associate_url", data=body) results: dict[str, Any] = {} for file_hash in hashes: body = {"hash": file_hash, "url_to_delete": url} - results[file_hash] = self._post("/add_urls/associate_url", data=body) + results[file_hash] = self._post("/add_url/associate_url", data=body) return {"batched": results} def set_notes(self, file_hashes: Union[str, Iterable[str]], notes: dict[str, str], service_name: str) -> dict[str, Any]: @@ -517,7 +517,7 @@ class HydrusClient: file_ids: Sequence[int] | None = None, hashes: Sequence[str] | None = None, include_service_keys_to_tags: bool = True, - include_file_urls: bool = False, + include_file_url: bool = False, include_duration: bool = True, include_size: bool = True, include_mime: bool = False, @@ -535,7 +535,7 @@ class HydrusClient: include_service_keys_to_tags, lambda v: "true" if v else None, ), - ("include_file_urls", include_file_urls, lambda v: "true" if v else None), + ("include_file_url", include_file_url, lambda v: "true" if v else None), ("include_duration", include_duration, lambda v: "true" if v else None), ("include_size", include_size, lambda v: "true" if v else None), ("include_mime", include_mime, lambda v: "true" if v else None), @@ -559,13 +559,13 @@ class HydrusClient: def file_url(self, file_hash: str) -> str: hash_param = quote(file_hash) # Don't append access_key parameter for file downloads - use header instead - url = f"{self.base_url}/get_files/file?hash={hash_param}" + url = f"{self.url}/get_files/file?hash={hash_param}" return url def thumbnail_url(self, file_hash: str) -> str: hash_param = quote(file_hash) # Don't append access_key parameter for file downloads - use header instead - url = f"{self.base_url}/get_files/thumbnail?hash={hash_param}" + url = f"{self.url}/get_files/thumbnail?hash={hash_param}" return url @@ -612,7 +612,7 @@ def hydrus_request(args, parser) -> int: parsed = urlsplit(options.url) if parsed.scheme not in ('http', 'https'): - parser.error('Only http and https URLs are supported') + parser.error('Only http and https url are supported') if not parsed.hostname: parser.error('Invalid Hydrus URL') @@ -1064,7 +1064,7 @@ def hydrus_export(args, _parser) -> int: file_hash = getattr(args, 'file_hash', None) or _extract_hash(args.file_url) if hydrus_url and file_hash: try: - client = HydrusClient(base_url=hydrus_url, access_key=args.access_key, timeout=args.timeout) + client = HydrusClient(url=hydrus_url, access_key=args.access_key, timeout=args.timeout) meta_response = client.fetch_file_metadata(hashes=[file_hash], include_mime=True) entries = meta_response.get('metadata') if isinstance(meta_response, dict) else None if isinstance(entries, list) and entries: @@ -1301,8 +1301,7 @@ def is_available(config: dict[str, Any], use_cache: bool = True) -> tuple[bool, Performs a lightweight probe to verify: - Hydrus URL is configured - - Hydrus client library is available - - Can connect to Hydrus and retrieve services + - Can connect to Hydrus URL/port Results are cached per session unless use_cache=False. @@ -1330,50 +1329,43 @@ def is_available(config: dict[str, Any], use_cache: bool = True) -> tuple[bool, return False, reason access_key = get_hydrus_access_key(config, "home") or "" + if not access_key: + reason = "Hydrus access key not configured" + _HYDRUS_AVAILABLE = False + _HYDRUS_UNAVAILABLE_REASON = reason + return False, reason + timeout_raw = config.get("HydrusNetwork_Request_Timeout") try: - timeout = float(timeout_raw) if timeout_raw is not None else 10.0 + timeout = float(timeout_raw) if timeout_raw is not None else 5.0 except (TypeError, ValueError): - timeout = 10.0 + timeout = 5.0 try: - # Use HTTPClient directly to avoid session key logic and reduce retries - # This prevents log spam when Hydrus is offline (avoiding 3 retries x 2 requests) - from helper.http_client import HTTPClient + # Simple TCP connection test to URL/port + import socket + from urllib.parse import urlparse - probe_url = f"{url.rstrip('/')}/get_services" + parsed = urlparse(url) + hostname = parsed.hostname or 'localhost' + port = parsed.port or (443 if parsed.scheme == 'https' else 80) - headers = {} - if access_key: - headers["Hydrus-Client-API-Access-Key"] = access_key - - # Suppress HTTPClient logging during probe to avoid "Request failed" logs on startup - http_logger = logging.getLogger("helper.http_client") - original_level = http_logger.level - http_logger.setLevel(logging.CRITICAL) - + # Try to connect to the host/port + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + sock.settimeout(timeout) try: - # Use retries=1 (single attempt, no retry) to fail fast - with HTTPClient(timeout=timeout, retries=1, headers=headers, verify_ssl=False) as http: - try: - response = http.get(probe_url) - if response.status_code == 200: - _HYDRUS_AVAILABLE = True - _HYDRUS_UNAVAILABLE_REASON = None - return True, None - else: - # Even if we get a 4xx/5xx, the service is "reachable" but maybe auth failed - # But for "availability" we usually mean "usable". - # If auth fails (403), we can't use it, so return False. - reason = f"HTTP {response.status_code}: {response.reason_phrase}" - _HYDRUS_AVAILABLE = False - _HYDRUS_UNAVAILABLE_REASON = reason - return False, reason - except Exception as e: - # This catches connection errors from HTTPClient - raise e + result = sock.connect_ex((hostname, port)) + if result == 0: + _HYDRUS_AVAILABLE = True + _HYDRUS_UNAVAILABLE_REASON = None + return True, None + else: + reason = f"Cannot connect to {hostname}:{port}" + _HYDRUS_AVAILABLE = False + _HYDRUS_UNAVAILABLE_REASON = reason + return False, reason finally: - http_logger.setLevel(original_level) + sock.close() except Exception as exc: reason = str(exc) diff --git a/helper/logger.py b/helper/logger.py index ec86f01..b41b423 100644 --- a/helper/logger.py +++ b/helper/logger.py @@ -2,15 +2,29 @@ import sys import inspect +import threading from pathlib import Path _DEBUG_ENABLED = False +_thread_local = threading.local() + +def set_thread_stream(stream): + """Set a custom output stream for the current thread.""" + _thread_local.stream = stream + +def get_thread_stream(): + """Get the custom output stream for the current thread, if any.""" + return getattr(_thread_local, 'stream', None) def set_debug(enabled: bool) -> None: """Enable or disable debug logging.""" global _DEBUG_ENABLED _DEBUG_ENABLED = enabled +def is_debug_enabled() -> bool: + """Check if debug logging is enabled.""" + return _DEBUG_ENABLED + def debug(*args, **kwargs) -> None: """Print debug message if debug logging is enabled. @@ -18,9 +32,22 @@ def debug(*args, **kwargs) -> None: """ if not _DEBUG_ENABLED: return + + # Check if stderr has been redirected to /dev/null (quiet mode) + # If so, skip output to avoid queuing in background worker's capture + try: + stderr_name = getattr(sys.stderr, 'name', '') + if 'nul' in str(stderr_name).lower() or '/dev/null' in str(stderr_name): + return + except Exception: + pass + # Check for thread-local stream first + stream = get_thread_stream() + if stream: + kwargs['file'] = stream # Set default to stderr for debug messages - if 'file' not in kwargs: + elif 'file' not in kwargs: kwargs['file'] = sys.stderr # Prepend DEBUG label @@ -59,8 +86,12 @@ def log(*args, **kwargs) -> None: # Get function name func_name = caller_frame.f_code.co_name + # Check for thread-local stream first + stream = get_thread_stream() + if stream: + kwargs['file'] = stream # Set default to stdout if not specified - if 'file' not in kwargs: + elif 'file' not in kwargs: kwargs['file'] = sys.stdout if add_prefix: diff --git a/helper/mpv_file.py b/helper/mpv_file.py index 6a014f3..26fdbb4 100644 --- a/helper/mpv_file.py +++ b/helper/mpv_file.py @@ -96,7 +96,7 @@ class MPVfile: relationship_metadata: Dict[str, Any] = field(default_factory=dict) tags: List[str] = field(default_factory=list) original_tags: Dict[str, str] = field(default_factory=dict) - known_urls: List[str] = field(default_factory=list) + url: List[str] = field(default_factory=list) title: Optional[str] = None source_url: Optional[str] = None clip_time: Optional[str] = None @@ -128,7 +128,7 @@ class MPVfile: "relationship_metadata": self.relationship_metadata, "tags": self.tags, "original_tags": self.original_tags, - "known_urls": self.known_urls, + "url": self.url, "title": self.title, "source_url": self.source_url, "clip_time": self.clip_time, @@ -293,10 +293,10 @@ class MPVFileBuilder: if s.tags: s.original_tags = {tag: tag for tag in s.tags} - # known URLs + last_url - s.known_urls = _normalise_string_list(p.get("known_urls")) - if self.last_url and self.last_url not in s.known_urls: - s.known_urls.append(self.last_url) + # known url + last_url + s.url = _normalise_string_list(p.get("url")) + if self.last_url and self.last_url not in s.url: + s.url.append(self.last_url) # source URL (explicit or fallback to last_url) explicit_source = p.get("source_url") @@ -500,8 +500,8 @@ class MPVFileBuilder: self._apply_hydrus_result(result) self.state.type = "hydrus" matched_url = result.get("matched_url") or result.get("url") - if matched_url and matched_url not in self.state.known_urls: - self.state.known_urls.append(matched_url) + if matched_url and matched_url not in self.state.url: + self.state.url.append(matched_url) # Enrich relationships once we know the hash if self.include_relationships and self.state.hash and self.hydrus_settings.base_url: self._enrich_relationships_from_api(self.state.hash) @@ -527,7 +527,7 @@ class MPVFileBuilder: metadata_payload["type"] = "other" self.state.metadata = metadata_payload # Do NOT overwrite MPVfile.type with metadata.type - self._merge_known_urls(metadata_payload.get("known_urls") or metadata_payload.get("known_urls_set")) + self._merge_url(metadata_payload.get("url") or metadata_payload.get("url_set")) source_url = metadata_payload.get("original_url") or metadata_payload.get("source_url") if source_url and not self.state.source_url: self.state.source_url = self._normalise_url(source_url) @@ -722,7 +722,7 @@ class MPVFileBuilder: include_service_keys_to_tags=True, include_duration=True, include_size=True, - include_file_urls=False, + include_file_url=False, include_mime=False, ) except HydrusRequestError as hre: # pragma: no cover @@ -801,11 +801,11 @@ class MPVFileBuilder: if tag not in self.state.original_tags: self.state.original_tags[tag] = tag - def _merge_known_urls(self, urls: Optional[Iterable[Any]]) -> None: - if not urls: + def _merge_url(self, url: Optional[Iterable[Any]]) -> None: + if not url: return - combined = list(self.state.known_urls or []) + _normalise_string_list(urls) - self.state.known_urls = unique_preserve_order(combined) + combined = list(self.state.url or []) + _normalise_string_list(url) + self.state.url = unique_preserve_order(combined) def _load_sidecar_tags(self, local_path: str) -> None: try: @@ -821,7 +821,7 @@ class MPVFileBuilder: if hash_value and not self.state.hash and _looks_like_hash(hash_value): self.state.hash = hash_value.lower() self._merge_tags(tags) - self._merge_known_urls(known) + self._merge_url(known) break def _read_sidecar(self, sidecar_path: Path) -> tuple[Optional[str], List[str], List[str]]: @@ -831,7 +831,7 @@ class MPVFileBuilder: return None, [], [] hash_value: Optional[str] = None tags: List[str] = [] - known_urls: List[str] = [] + url: List[str] = [] for line in raw.splitlines(): trimmed = line.strip() if not trimmed: @@ -841,13 +841,13 @@ class MPVFileBuilder: candidate = trimmed.split(":", 1)[1].strip() if ":" in trimmed else "" if candidate: hash_value = candidate - elif lowered.startswith("known_url:") or lowered.startswith("url:"): + elif lowered.startswith("url:") or lowered.startswith("url:"): candidate = trimmed.split(":", 1)[1].strip() if ":" in trimmed else "" if candidate: - known_urls.append(candidate) + url.append(candidate) else: tags.append(trimmed) - return hash_value, tags, known_urls + return hash_value, tags, url def _compute_local_hash(self, local_path: str) -> None: try: @@ -864,8 +864,8 @@ class MPVFileBuilder: def _finalise(self) -> None: if self.state.tags: self.state.tags = unique_preserve_order(self.state.tags) - if self.state.known_urls: - self.state.known_urls = unique_preserve_order(self.state.known_urls) + if self.state.url: + self.state.url = unique_preserve_order(self.state.url) # Ensure metadata.type is always present for Lua, but do NOT overwrite MPVfile.type if not self.state.title: if self.state.metadata.get("title"): diff --git a/helper/mpv_ipc.py b/helper/mpv_ipc.py index 7f18795..5c73a10 100644 --- a/helper/mpv_ipc.py +++ b/helper/mpv_ipc.py @@ -85,7 +85,7 @@ def _normalize_target(text: Optional[str]) -> Optional[str]: except Exception: pass - # Normalize paths/urls for comparison + # Normalize paths/url for comparison return lower.replace('\\', '\\') diff --git a/helper/provider.py b/helper/provider.py new file mode 100644 index 0000000..0ddb60f --- /dev/null +++ b/helper/provider.py @@ -0,0 +1,818 @@ +"""Provider interfaces for search and file upload functionality. + +This module defines two distinct provider types: +1. SearchProvider: For searching content (books, music, videos, games) +2. FileProvider: For uploading files to hosting services + +No legacy code or backwards compatibility - clean, single source of truth. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional, Tuple +from dataclasses import dataclass, field +from pathlib import Path +import sys +import os +import json +import re +import time +import asyncio +import subprocess +import shutil +import mimetypes +import traceback +import requests + +from helper.logger import log, debug + +# Optional dependencies +try: + from playwright.sync_api import sync_playwright + PLAYWRIGHT_AVAILABLE = True +except ImportError: + PLAYWRIGHT_AVAILABLE = False + + +# ============================================================================ +# SEARCH PROVIDERS +# ============================================================================ + +@dataclass +class SearchResult: + """Unified search result format across all search providers.""" + + origin: str # Provider name: "libgen", "soulseek", "debrid", "bandcamp", etc. + title: str # Display title/filename + path: str # Download target (URL, path, magnet, identifier) + + detail: str = "" # Additional description + annotations: List[str] = field(default_factory=list) # Tags: ["120MB", "flac", "ready"] + media_kind: str = "other" # Type: "book", "audio", "video", "game", "magnet" + size_bytes: Optional[int] = None + tags: set[str] = field(default_factory=set) # Searchable tags + columns: List[Tuple[str, str]] = field(default_factory=list) # Display columns + full_metadata: Dict[str, Any] = field(default_factory=dict) # Extra metadata + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for pipeline processing.""" + return { + "origin": self.origin, + "title": self.title, + "path": self.path, + "detail": self.detail, + "annotations": self.annotations, + "media_kind": self.media_kind, + "size_bytes": self.size_bytes, + "tags": list(self.tags), + "columns": list(self.columns), + "full_metadata": self.full_metadata, + } + + +class SearchProvider(ABC): + """Base class for search providers.""" + + def __init__(self, config: Dict[str, Any] = None): + self.config = config or {} + self.name = self.__class__.__name__.lower() + + @abstractmethod + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs + ) -> List[SearchResult]: + """Search for items matching the query. + + Args: + query: Search query string + limit: Maximum results to return + filters: Optional filtering criteria + **kwargs: Provider-specific arguments + + Returns: + List of SearchResult objects + """ + pass + + def validate(self) -> bool: + """Check if provider is available and properly configured.""" + return True + + +class Libgen(SearchProvider): + """Search provider for Library Genesis books.""" + + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs + ) -> List[SearchResult]: + filters = filters or {} + + try: + from helper.unified_book_downloader import UnifiedBookDownloader + from helper.query_parser import parse_query, get_field, get_free_text + + parsed = parse_query(query) + isbn = get_field(parsed, 'isbn') + author = get_field(parsed, 'author') + title = get_field(parsed, 'title') + free_text = get_free_text(parsed) + + search_query = isbn or title or author or free_text or query + + downloader = UnifiedBookDownloader(config=self.config) + books = downloader.search_libgen(search_query, limit=limit) + + results = [] + for idx, book in enumerate(books, 1): + title = book.get("title", "Unknown") + author = book.get("author", "Unknown") + year = book.get("year", "Unknown") + pages = book.get("pages") or book.get("pages_str") or "" + extension = book.get("extension", "") or book.get("ext", "") + filesize = book.get("filesize_str", "Unknown") + isbn = book.get("isbn", "") + mirror_url = book.get("mirror_url", "") + + columns = [ + ("Title", title), + ("Author", author), + ("Pages", str(pages)), + ("Ext", str(extension)), + ] + + detail = f"By: {author}" + if year and year != "Unknown": + detail += f" ({year})" + + annotations = [f"{filesize}"] + if isbn: + annotations.append(f"ISBN: {isbn}") + + results.append(SearchResult( + origin="libgen", + title=title, + path=mirror_url or f"libgen:{book.get('id', '')}", + detail=detail, + annotations=annotations, + media_kind="book", + columns=columns, + full_metadata={ + "number": idx, + "author": author, + "year": year, + "isbn": isbn, + "filesize": filesize, + "pages": pages, + "extension": extension, + "book_id": book.get("book_id", ""), + "md5": book.get("md5", ""), + }, + )) + + return results + + except Exception as e: + log(f"[libgen] Search error: {e}", file=sys.stderr) + return [] + + def validate(self) -> bool: + try: + from helper.unified_book_downloader import UnifiedBookDownloader + return True + except Exception: + return False + + +class Soulseek(SearchProvider): + """Search provider for Soulseek P2P network.""" + + MUSIC_EXTENSIONS = { + '.flac', '.mp3', '.m4a', '.aac', '.ogg', '.opus', + '.wav', '.alac', '.wma', '.ape', '.aiff', '.dsf', + '.dff', '.wv', '.tta', '.tak', '.ac3', '.dts' + } + + USERNAME = "asjhkjljhkjfdsd334" + PASSWORD = "khhhg" + DOWNLOAD_DIR = "./downloads" + MAX_WAIT_TRANSFER = 1200 + + async def perform_search( + self, + query: str, + timeout: float = 9.0, + limit: int = 50 + ) -> List[Dict[str, Any]]: + """Perform async Soulseek search.""" + import os + from aioslsk.client import SoulSeekClient + from aioslsk.settings import Settings, CredentialsSettings + + os.makedirs(self.DOWNLOAD_DIR, exist_ok=True) + + settings = Settings(credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD)) + client = SoulSeekClient(settings) + + try: + await client.start() + await client.login() + except Exception as e: + log(f"[soulseek] Login failed: {type(e).__name__}: {e}", file=sys.stderr) + return [] + + try: + search_request = await client.searches.search(query) + await self._collect_results(client, search_request, timeout=timeout) + return self._flatten_results(search_request)[:limit] + except Exception as e: + log(f"[soulseek] Search error: {type(e).__name__}: {e}", file=sys.stderr) + return [] + finally: + try: + await client.stop() + except Exception: + pass + + def _flatten_results(self, search_request) -> List[dict]: + flat = [] + for result in search_request.results: + username = getattr(result, "username", "?") + + for file_data in getattr(result, "shared_items", []): + flat.append({ + "file": file_data, + "username": username, + "filename": getattr(file_data, "filename", "?"), + "size": getattr(file_data, "filesize", 0), + }) + + for file_data in getattr(result, "locked_results", []): + flat.append({ + "file": file_data, + "username": username, + "filename": getattr(file_data, "filename", "?"), + "size": getattr(file_data, "filesize", 0), + }) + + return flat + + async def _collect_results(self, client, search_request, timeout: float = 75.0) -> None: + end = time.time() + timeout + last_count = 0 + while time.time() < end: + current_count = len(search_request.results) + if current_count > last_count: + debug(f"[soulseek] Got {current_count} result(s)...") + last_count = current_count + await asyncio.sleep(0.5) + + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs + ) -> List[SearchResult]: + filters = filters or {} + + try: + flat_results = asyncio.run(self.perform_search(query, timeout=9.0, limit=limit)) + + if not flat_results: + return [] + + # Filter to music files only + music_results = [] + for item in flat_results: + filename = item['filename'] + ext = '.' + filename.rsplit('.', 1)[-1].lower() if '.' in filename else '' + if ext in self.MUSIC_EXTENSIONS: + music_results.append(item) + + if not music_results: + return [] + + # Extract metadata + enriched_results = [] + for item in music_results: + filename = item['filename'] + ext = '.' + filename.rsplit('.', 1)[-1].lower() if '.' in filename else '' + + # Get display filename + display_name = filename.split('\\')[-1] if '\\' in filename else filename.split('/')[-1] if '/' in filename else filename + + # Extract path hierarchy + path_parts = filename.replace('\\', '/').split('/') + artist = path_parts[-3] if len(path_parts) >= 3 else '' + album = path_parts[-2] if len(path_parts) >= 3 else path_parts[-2] if len(path_parts) == 2 else '' + + # Extract track number and title + base_name = display_name.rsplit('.', 1)[0] if '.' in display_name else display_name + track_num = '' + title = base_name + filename_artist = '' + + match = re.match(r'^(\d{1,3})\s*[\.\-]?\s+(.+)$', base_name) + if match: + track_num = match.group(1) + rest = match.group(2) + if ' - ' in rest: + filename_artist, title = rest.split(' - ', 1) + else: + title = rest + + if filename_artist: + artist = filename_artist + + enriched_results.append({ + **item, + 'artist': artist, + 'album': album, + 'title': title, + 'track_num': track_num, + 'ext': ext + }) + + # Apply filters + if filters: + artist_filter = filters.get('artist', '').lower() if filters.get('artist') else '' + album_filter = filters.get('album', '').lower() if filters.get('album') else '' + track_filter = filters.get('track', '').lower() if filters.get('track') else '' + + if artist_filter or album_filter or track_filter: + filtered = [] + for item in enriched_results: + if artist_filter and artist_filter not in item['artist'].lower(): + continue + if album_filter and album_filter not in item['album'].lower(): + continue + if track_filter and track_filter not in item['title'].lower(): + continue + filtered.append(item) + enriched_results = filtered + + # Sort: .flac first, then by size + enriched_results.sort(key=lambda item: (item['ext'].lower() != '.flac', -item['size'])) + + # Convert to SearchResult + results = [] + for idx, item in enumerate(enriched_results, 1): + artist_display = item['artist'] if item['artist'] else "(no artist)" + album_display = item['album'] if item['album'] else "(no album)" + size_mb = int(item['size'] / 1024 / 1024) + + columns = [ + ("Track", item['track_num'] or "?"), + ("Title", item['title'][:40]), + ("Artist", artist_display[:32]), + ("Album", album_display[:32]), + ("Size", f"{size_mb} MB"), + ] + + results.append(SearchResult( + origin="soulseek", + title=item['title'], + path=item['filename'], + detail=f"{artist_display} - {album_display}", + annotations=[f"{size_mb} MB", item['ext'].lstrip('.').upper()], + media_kind="audio", + size_bytes=item['size'], + columns=columns, + full_metadata={ + "username": item['username'], + "filename": item['filename'], + "artist": item['artist'], + "album": item['album'], + "track_num": item['track_num'], + "ext": item['ext'], + }, + )) + + return results + + except Exception as e: + log(f"[soulseek] Search error: {e}", file=sys.stderr) + return [] + + def validate(self) -> bool: + try: + from aioslsk.client import SoulSeekClient + return True + except ImportError: + return False + + +class Bandcamp(SearchProvider): + """Search provider for Bandcamp.""" + + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs + ) -> List[SearchResult]: + if not PLAYWRIGHT_AVAILABLE: + log("[bandcamp] Playwright not available. Install with: pip install playwright", file=sys.stderr) + return [] + + results = [] + try: + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + page = browser.new_page() + + # Parse query for artist: prefix + if query.strip().lower().startswith("artist:"): + artist_name = query[7:].strip().strip('"') + search_url = f"https://bandcamp.com/search?q={artist_name}&item_type=b" + else: + search_url = f"https://bandcamp.com/search?q={query}&item_type=a" + + results = self._scrape_url(page, search_url, limit) + + browser.close() + except Exception as e: + log(f"[bandcamp] Search error: {e}", file=sys.stderr) + return [] + + return results + + def _scrape_url(self, page, url: str, limit: int) -> List[SearchResult]: + debug(f"[bandcamp] Scraping: {url}") + + page.goto(url) + page.wait_for_load_state("domcontentloaded") + + results = [] + + # Check for search results + search_results = page.query_selector_all(".searchresult") + if search_results: + for item in search_results[:limit]: + try: + heading = item.query_selector(".heading") + if not heading: + continue + + link = heading.query_selector("a") + if not link: + continue + + title = link.inner_text().strip() + target_url = link.get_attribute("href") + + subhead = item.query_selector(".subhead") + artist = subhead.inner_text().strip() if subhead else "Unknown" + + itemtype = item.query_selector(".itemtype") + media_type = itemtype.inner_text().strip() if itemtype else "album" + + results.append(SearchResult( + origin="bandcamp", + title=title, + path=target_url, + detail=f"By: {artist}", + annotations=[media_type], + media_kind="audio", + columns=[ + ("Name", title), + ("Artist", artist), + ("Type", media_type), + ], + full_metadata={ + "artist": artist, + "type": media_type, + }, + )) + except Exception as e: + debug(f"[bandcamp] Error parsing result: {e}") + continue + + return results + + def validate(self) -> bool: + return PLAYWRIGHT_AVAILABLE + + +class YouTube(SearchProvider): + """Search provider for YouTube using yt-dlp.""" + + def search( + self, + query: str, + limit: int = 10, + filters: Optional[Dict[str, Any]] = None, + **kwargs + ) -> List[SearchResult]: + ytdlp_path = shutil.which("yt-dlp") + if not ytdlp_path: + log("[youtube] yt-dlp not found in PATH", file=sys.stderr) + return [] + + search_query = f"ytsearch{limit}:{query}" + + cmd = [ + ytdlp_path, + "--dump-json", + "--flat-playlist", + "--no-warnings", + search_query + ] + + try: + process = subprocess.run( + cmd, + capture_output=True, + text=True, + encoding="utf-8", + errors="replace" + ) + + if process.returncode != 0: + log(f"[youtube] yt-dlp failed: {process.stderr}", file=sys.stderr) + return [] + + results = [] + for line in process.stdout.splitlines(): + if not line.strip(): + continue + try: + video_data = json.loads(line) + title = video_data.get("title", "Unknown") + video_id = video_data.get("id", "") + url = video_data.get("url") or f"https://youtube.com/watch?v={video_id}" + uploader = video_data.get("uploader", "Unknown") + duration = video_data.get("duration", 0) + view_count = video_data.get("view_count", 0) + + duration_str = f"{int(duration//60)}:{int(duration%60):02d}" if duration else "" + views_str = f"{view_count:,}" if view_count else "" + + results.append(SearchResult( + origin="youtube", + title=title, + path=url, + detail=f"By: {uploader}", + annotations=[duration_str, f"{views_str} views"], + media_kind="video", + columns=[ + ("Title", title), + ("Uploader", uploader), + ("Duration", duration_str), + ("Views", views_str), + ], + full_metadata={ + "video_id": video_id, + "uploader": uploader, + "duration": duration, + "view_count": view_count, + }, + )) + except json.JSONDecodeError: + continue + + return results + + except Exception as e: + log(f"[youtube] Error: {e}", file=sys.stderr) + return [] + + def validate(self) -> bool: + return shutil.which("yt-dlp") is not None + + def pipe(self, path: str, config: Optional[Dict[str, Any]] = None) -> Optional[str]: + """Return the playable URL for MPV (just the path for YouTube).""" + return path + + +# Search provider registry +_SEARCH_PROVIDERS = { + "libgen": Libgen, + "soulseek": Soulseek, + "bandcamp": Bandcamp, + "youtube": YouTube, +} + + +def get_search_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[SearchProvider]: + """Get a search provider by name.""" + provider_class = _SEARCH_PROVIDERS.get(name.lower()) + + if provider_class is None: + log(f"[provider] Unknown search provider: {name}", file=sys.stderr) + return None + + try: + provider = provider_class(config) + if not provider.validate(): + log(f"[provider] Provider '{name}' is not available", file=sys.stderr) + return None + return provider + except Exception as e: + log(f"[provider] Error initializing '{name}': {e}", file=sys.stderr) + return None + + +def list_search_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]: + """List all search providers and their availability.""" + availability = {} + for name, provider_class in _SEARCH_PROVIDERS.items(): + try: + provider = provider_class(config) + availability[name] = provider.validate() + except Exception: + availability[name] = False + return availability + + +# ============================================================================ +# FILE PROVIDERS +# ============================================================================ + +class FileProvider(ABC): + """Base class for file upload providers.""" + + def __init__(self, config: Optional[Dict[str, Any]] = None): + self.config = config or {} + self.name = self.__class__.__name__.lower() + + @abstractmethod + def upload(self, file_path: str, **kwargs: Any) -> str: + """Upload a file and return the URL.""" + pass + + def validate(self) -> bool: + """Check if provider is available/configured.""" + return True + + +class ZeroXZero(FileProvider): + """File provider for 0x0.st.""" + + def upload(self, file_path: str, **kwargs: Any) -> str: + from helper.http_client import HTTPClient + + if not os.path.exists(file_path): + raise FileNotFoundError(f"File not found: {file_path}") + + try: + headers = {"User-Agent": "Medeia-Macina/1.0"} + with HTTPClient(headers=headers) as client: + with open(file_path, 'rb') as f: + response = client.post( + "https://0x0.st", + files={"file": f} + ) + + if response.status_code == 200: + return response.text.strip() + else: + raise Exception(f"Upload failed: {response.status_code} - {response.text}") + + except Exception as e: + log(f"[0x0] Upload error: {e}", file=sys.stderr) + raise + + def validate(self) -> bool: + return True + + +class Matrix(FileProvider): + """File provider for Matrix (Element) chat rooms.""" + + def validate(self) -> bool: + if not self.config: + return False + matrix_conf = self.config.get('storage', {}).get('matrix', {}) + return bool( + matrix_conf.get('homeserver') and + matrix_conf.get('room_id') and + (matrix_conf.get('access_token') or matrix_conf.get('password')) + ) + + def upload(self, file_path: str, **kwargs: Any) -> str: + from pathlib import Path + + path = Path(file_path) + if not path.exists(): + raise FileNotFoundError(f"File not found: {file_path}") + + matrix_conf = self.config.get('storage', {}).get('matrix', {}) + homeserver = matrix_conf.get('homeserver') + access_token = matrix_conf.get('access_token') + room_id = matrix_conf.get('room_id') + + if not homeserver.startswith('http'): + homeserver = f"https://{homeserver}" + + # Upload media + upload_url = f"{homeserver}/_matrix/media/v3/upload" + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/octet-stream" + } + + mime_type, _ = mimetypes.guess_type(path) + if mime_type: + headers["Content-Type"] = mime_type + + filename = path.name + + with open(path, 'rb') as f: + resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename}) + + if resp.status_code != 200: + raise Exception(f"Matrix upload failed: {resp.text}") + + content_uri = resp.json().get('content_uri') + if not content_uri: + raise Exception("No content_uri returned") + + # Send message + send_url = f"{homeserver}/_matrix/client/v3/rooms/{room_id}/send/m.room.message" + + # Determine message type + msgtype = "m.file" + ext = path.suffix.lower() + + AUDIO_EXTS = {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka', '.alac'} + VIDEO_EXTS = {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'} + IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'} + + if ext in AUDIO_EXTS: + msgtype = "m.audio" + elif ext in VIDEO_EXTS: + msgtype = "m.video" + elif ext in IMAGE_EXTS: + msgtype = "m.image" + + info = { + "mimetype": mime_type, + "size": path.stat().st_size + } + + payload = { + "msgtype": msgtype, + "body": filename, + "url": content_uri, + "info": info + } + + resp = requests.post(send_url, headers=headers, json=payload) + if resp.status_code != 200: + raise Exception(f"Matrix send message failed: {resp.text}") + + event_id = resp.json().get('event_id') + return f"https://matrix.to/#/{room_id}/{event_id}" + + +# File provider registry +_FILE_PROVIDERS = { + "0x0": ZeroXZero, + "matrix": Matrix, +} + + +def get_file_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[FileProvider]: + """Get a file provider by name.""" + provider_class = _FILE_PROVIDERS.get(name.lower()) + + if provider_class is None: + log(f"[provider] Unknown file provider: {name}", file=sys.stderr) + return None + + try: + provider = provider_class(config) + if not provider.validate(): + log(f"[provider] File provider '{name}' is not available", file=sys.stderr) + return None + return provider + except Exception as e: + log(f"[provider] Error initializing file provider '{name}': {e}", file=sys.stderr) + return None + + +def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]: + """List all file providers and their availability.""" + availability = {} + for name, provider_class in _FILE_PROVIDERS.items(): + try: + provider = provider_class(config) + availability[name] = provider.validate() + except Exception: + availability[name] = False + return availability + + + + diff --git a/helper/remote_storage_server.py b/helper/remote_storage_server.py index 5868fae..aa0c2e0 100644 --- a/helper/remote_storage_server.py +++ b/helper/remote_storage_server.py @@ -159,8 +159,8 @@ def create_app(): status["storage_path"] = str(STORAGE_PATH) status["storage_exists"] = STORAGE_PATH.exists() try: - from helper.local_library import LocalLibraryDB - with LocalLibraryDB(STORAGE_PATH) as db: + from helper.folder_store import FolderDB + with FolderDB(STORAGE_PATH) as db: status["database_accessible"] = True except Exception as e: status["database_accessible"] = False @@ -177,7 +177,7 @@ def create_app(): @require_storage() def search_files(): """Search for files by name or tag.""" - from helper.local_library import LocalLibrarySearchOptimizer + from helper.folder_store import LocalLibrarySearchOptimizer query = request.args.get('q', '') limit = request.args.get('limit', 100, type=int) @@ -205,11 +205,11 @@ def create_app(): @require_storage() def get_file_metadata(file_hash: str): """Get metadata for a specific file by hash.""" - from helper.local_library import LocalLibraryDB + from helper.folder_store import FolderDB try: - with LocalLibraryDB(STORAGE_PATH) as db: - file_path = db.search_by_hash(file_hash) + with FolderDB(STORAGE_PATH) as db: + file_path = db.search_hash(file_hash) if not file_path or not file_path.exists(): return jsonify({"error": "File not found"}), 404 @@ -233,13 +233,13 @@ def create_app(): @require_storage() def index_file(): """Index a new file in the storage.""" - from helper.local_library import LocalLibraryDB + from helper.folder_store import FolderDB from helper.utils import sha256_file data = request.get_json() or {} file_path_str = data.get('path') tags = data.get('tags', []) - urls = data.get('urls', []) + url = data.get('url', []) if not file_path_str: return jsonify({"error": "File path required"}), 400 @@ -250,14 +250,14 @@ def create_app(): if not file_path.exists(): return jsonify({"error": "File does not exist"}), 404 - with LocalLibraryDB(STORAGE_PATH) as db: + with FolderDB(STORAGE_PATH) as db: db.get_or_create_file_entry(file_path) if tags: db.add_tags(file_path, tags) - if urls: - db.add_known_urls(file_path, urls) + if url: + db.add_url(file_path, url) file_hash = sha256_file(file_path) @@ -265,7 +265,7 @@ def create_app(): "hash": file_hash, "path": str(file_path), "tags_added": len(tags), - "urls_added": len(urls) + "url_added": len(url) }), 201 except Exception as e: logger.error(f"Index error: {e}", exc_info=True) @@ -280,11 +280,11 @@ def create_app(): @require_storage() def get_tags(file_hash: str): """Get tags for a file.""" - from helper.local_library import LocalLibraryDB + from helper.folder_store import FolderDB try: - with LocalLibraryDB(STORAGE_PATH) as db: - file_path = db.search_by_hash(file_hash) + with FolderDB(STORAGE_PATH) as db: + file_path = db.search_hash(file_hash) if not file_path: return jsonify({"error": "File not found"}), 404 @@ -299,7 +299,7 @@ def create_app(): @require_storage() def add_tags(file_hash: str): """Add tags to a file.""" - from helper.local_library import LocalLibraryDB + from helper.folder_store import FolderDB data = request.get_json() or {} tags = data.get('tags', []) @@ -309,8 +309,8 @@ def create_app(): return jsonify({"error": "Tags required"}), 400 try: - with LocalLibraryDB(STORAGE_PATH) as db: - file_path = db.search_by_hash(file_hash) + with FolderDB(STORAGE_PATH) as db: + file_path = db.search_hash(file_hash) if not file_path: return jsonify({"error": "File not found"}), 404 @@ -328,13 +328,13 @@ def create_app(): @require_storage() def remove_tags(file_hash: str): """Remove tags from a file.""" - from helper.local_library import LocalLibraryDB + from helper.folder_store import FolderDB tags_str = request.args.get('tags', '') try: - with LocalLibraryDB(STORAGE_PATH) as db: - file_path = db.search_by_hash(file_hash) + with FolderDB(STORAGE_PATH) as db: + file_path = db.search_hash(file_hash) if not file_path: return jsonify({"error": "File not found"}), 404 @@ -358,11 +358,11 @@ def create_app(): @require_storage() def get_relationships(file_hash: str): """Get relationships for a file.""" - from helper.local_library import LocalLibraryDB + from helper.folder_store import FolderDB try: - with LocalLibraryDB(STORAGE_PATH) as db: - file_path = db.search_by_hash(file_hash) + with FolderDB(STORAGE_PATH) as db: + file_path = db.search_hash(file_hash) if not file_path: return jsonify({"error": "File not found"}), 404 @@ -378,7 +378,7 @@ def create_app(): @require_storage() def set_relationship(): """Set a relationship between two files.""" - from helper.local_library import LocalLibraryDB + from helper.folder_store import FolderDB data = request.get_json() or {} from_hash = data.get('from_hash') @@ -389,9 +389,9 @@ def create_app(): return jsonify({"error": "from_hash and to_hash required"}), 400 try: - with LocalLibraryDB(STORAGE_PATH) as db: - from_path = db.search_by_hash(from_hash) - to_path = db.search_by_hash(to_hash) + with FolderDB(STORAGE_PATH) as db: + from_path = db.search_hash(from_hash) + to_path = db.search_hash(to_hash) if not from_path or not to_path: return jsonify({"error": "File not found"}), 404 @@ -406,49 +406,49 @@ def create_app(): # URL OPERATIONS # ======================================================================== - @app.route('/urls/', methods=['GET']) + @app.route('/url/', methods=['GET']) @require_auth() @require_storage() - def get_urls(file_hash: str): - """Get known URLs for a file.""" - from helper.local_library import LocalLibraryDB + def get_url(file_hash: str): + """Get known url for a file.""" + from helper.folder_store import FolderDB try: - with LocalLibraryDB(STORAGE_PATH) as db: - file_path = db.search_by_hash(file_hash) + with FolderDB(STORAGE_PATH) as db: + file_path = db.search_hash(file_hash) if not file_path: return jsonify({"error": "File not found"}), 404 metadata = db.get_metadata(file_path) - urls = metadata.get('known_urls', []) if metadata else [] - return jsonify({"hash": file_hash, "urls": urls}), 200 + url = metadata.get('url', []) if metadata else [] + return jsonify({"hash": file_hash, "url": url}), 200 except Exception as e: - logger.error(f"Get URLs error: {e}", exc_info=True) + logger.error(f"Get url error: {e}", exc_info=True) return jsonify({"error": f"Failed: {str(e)}"}), 500 - @app.route('/urls/', methods=['POST']) + @app.route('/url/', methods=['POST']) @require_auth() @require_storage() - def add_urls(file_hash: str): - """Add URLs to a file.""" - from helper.local_library import LocalLibraryDB + def add_url(file_hash: str): + """Add url to a file.""" + from helper.folder_store import FolderDB data = request.get_json() or {} - urls = data.get('urls', []) + url = data.get('url', []) - if not urls: - return jsonify({"error": "URLs required"}), 400 + if not url: + return jsonify({"error": "url required"}), 400 try: - with LocalLibraryDB(STORAGE_PATH) as db: - file_path = db.search_by_hash(file_hash) + with FolderDB(STORAGE_PATH) as db: + file_path = db.search_hash(file_hash) if not file_path: return jsonify({"error": "File not found"}), 404 - db.add_known_urls(file_path, urls) - return jsonify({"hash": file_hash, "urls_added": len(urls)}), 200 + db.add_url(file_path, url) + return jsonify({"hash": file_hash, "url_added": len(url)}), 200 except Exception as e: - logger.error(f"Add URLs error: {e}", exc_info=True) + logger.error(f"Add url error: {e}", exc_info=True) return jsonify({"error": f"Failed: {str(e)}"}), 500 return app @@ -509,8 +509,8 @@ def main(): print(f"\n{'='*70}\n") try: - from helper.local_library import LocalLibraryDB - with LocalLibraryDB(STORAGE_PATH) as db: + from helper.folder_store import FolderDB + with FolderDB(STORAGE_PATH) as db: logger.info("Database initialized successfully") except Exception as e: logger.error(f"Failed to initialize database: {e}") diff --git a/helper/search_provider.py b/helper/search_provider.py deleted file mode 100644 index 668aa68..0000000 --- a/helper/search_provider.py +++ /dev/null @@ -1,2224 +0,0 @@ -""" -SearchProvider: Unified interface for different search backends. - -This module defines a base class and registry for search providers that can be -used by search-file and other search-related cmdlets to handle different sources: -- Local file storage (LocalStorageBackend) -- Hydrus database -- AllDebrid magnets (search-debrid) -- Library Genesis / OpenLibrary books (search-libgen) -- Soulseek P2P network (search-soulseek) -- IMDB movies (future) -- Other sources - -Usage: - from helper.search_provider import SearchProvider, get_provider - - provider = get_provider("libgen") - results = provider.search("python programming", limit=10) - - for result in results: - print(result["title"], result["target"], result["annotations"]) -""" - -from __future__ import annotations - -from abc import ABC, abstractmethod -from typing import Any, Dict, List, Optional, Sequence, Tuple -from dataclasses import dataclass -from pathlib import Path -import sys - -try: - from playwright.sync_api import sync_playwright - PLAYWRIGHT_AVAILABLE = True -except ImportError: - PLAYWRIGHT_AVAILABLE = False -import subprocess -import json -import shutil -from helper.logger import log, debug - - -from helper.logger import log, debug - - -@dataclass -class SearchResult: - """Unified search result format across all providers.""" - - # Required fields - origin: str # Provider name: "libgen", "soulseek", "debrid", "local", "hydrus", etc. - title: str # Display title/filename - target: str # Unique identifier or download target (URL, path, magnet hash, etc.) - - # Optional fields - detail: str = "" # Additional details (size, status, format, etc.) - annotations: List[str] = None # Tags/annotations: ["ready", "120MB", "mp3", etc.] - media_kind: str = "other" # Type: "book", "audio", "video", "file", "magnet", etc. - size_bytes: Optional[int] = None # File size in bytes - tags: Optional[set[str]] = None # Searchable tags - full_metadata: Optional[Dict[str, Any]] = None # Extra metadata (author, year, etc.) - columns: List[Tuple[str, str]] = None # Display columns: [("Header", "value"), ...] for result table - - def __post_init__(self): - """Ensure mutable defaults are properly initialized.""" - if self.annotations is None: - self.annotations = [] - if self.tags is None: - self.tags = set() - if self.full_metadata is None: - self.full_metadata = {} - if self.columns is None: - self.columns = [] - - def to_dict(self) -> Dict[str, Any]: - """Convert to dictionary for JSON serialization. - - Note: full_metadata is excluded from dict to keep response size small - until the result is actually selected/used. This speeds up initial - search result display and piping. - """ - data = { - "origin": self.origin, - "title": self.title, - "target": self.target, - "detail": self.detail, - "annotations": self.annotations, - "media_kind": self.media_kind, - "size_bytes": self.size_bytes, - "tags": list(self.tags) if self.tags else [], - } - if self.columns: - data["columns"] = list(self.columns) - # Note: full_metadata is NOT included in dict to keep payload small - return data - - -class SearchProvider(ABC): - """Abstract base class for search providers.""" - - # Provider-specific field definitions: list of (api_field_name, display_column_name, formatter_func) - # Override in subclasses to define which fields to request and how to display them - # Example: [("title", "Title", None), ("author_name", "Author(s)", lambda x: ", ".join(x) if isinstance(x, list) else x)] - RESULT_FIELDS: List[Tuple[str, str, Optional[Any]]] = [] - - def __init__(self, config: Dict[str, Any] = None): - """ - Initialize provider with optional configuration. - - Args: - config: Configuration dictionary (global config dict) - """ - self.config = config or {} - self.name = self.__class__.__name__.replace("Provider", "").lower() - - @abstractmethod - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - """ - Search for items matching the query. - - Args: - query: Search query string. Special value "*" means "match all" - limit: Maximum number of results to return - filters: Optional filtering criteria (type, size, status, etc.) - **kwargs: Provider-specific arguments - - Returns: - List of SearchResult objects - """ - pass - - @abstractmethod - def get_result_args(self) -> List[str]: - """ - Get command-line arguments from a search result to pass to downstream cmdlets. - - Example: For libgen, returns ["-url", result.target] - For soulseek, returns ["-id", result.target] - For local, returns ["-path", result.target] - - Returns: - List of arguments to append to cmdlet invocation - """ - pass - - def parse_args(self, args: Sequence[str]) -> Tuple[str, Dict[str, Any]]: - """ - Parse provider-specific command-line arguments. - - Args: - args: Sequence of command-line arguments - - Returns: - Tuple of (query, filters_dict) - """ - # Default implementation: first arg is query, rest are filters - query = args[0] if args else "" - filters = {} - return query, filters - - def validate(self) -> bool: - """ - Validate that provider is properly configured and ready to use. - - Returns: - True if provider is available, False otherwise - """ - return True - - def get_columns_format(self) -> List[str]: - """ - Define which columns this provider displays in result table. - - Returns: - List of column names to display. - Each provider can override to customize result table appearance. - Examples: ["Title", "Author", "Year"] for books - ["Title", "Duration", "Format"] for media - ["Title", "Size", "Status"] for files - - Default: Empty list (uses traditional detail/origin/media_kind/target) - """ - return [col_name for _, col_name, _ in self.RESULT_FIELDS] if self.RESULT_FIELDS else [] - - def get_api_fields_string(self) -> str: - """ - Generate comma-separated API fields string from RESULT_FIELDS. - - Returns: - Comma-separated string of API field names to request - Example: "title,author_name,first_publish_year,isbn,key" - """ - if not self.RESULT_FIELDS: - return "" - return ",".join(field_name for field_name, _, _ in self.RESULT_FIELDS) - - def build_columns_from_doc(self, doc: Dict[str, Any], idx: int = None) -> List[Tuple[str, str]]: - """ - Dynamically build columns from a result document using RESULT_FIELDS definition. - - Args: - doc: API response document (dict with field values) - idx: Optional index/number for the result (typically added as first column) - - Returns: - List of (header, value) tuples ready for SearchResult.columns - """ - columns = [] - - # Add index as first column if provided - if idx is not None: - columns.append(("#", str(idx))) - - # Process each field definition - for api_field_name, display_col_name, formatter_func in self.RESULT_FIELDS: - value = doc.get(api_field_name, "") - - # Apply formatter if defined - if formatter_func and value: - value = formatter_func(value) - - # Convert to string and add to columns - value_str = str(value) if value else "Unknown" - columns.append((display_col_name, value_str)) - - return columns - - -class LocalStorageProvider(SearchProvider): - """Search provider for local file system storage.""" - - def __init__(self, config: Dict[str, Any] = None): - super().__init__(config) - self.name = "local" - # Import here to avoid circular dependency - from helper.file_storage import FileStorage - self.storage = FileStorage(config) - - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - """Search local file storage.""" - filters = filters or {} - backend_name = filters.get("backend", "local") - - try: - # Use the backend from FileStorage - results = self.storage[backend_name].search(query, limit=limit) - - search_results = [] - for result_dict in results: - path = result_dict.get("path", "") - size = result_dict.get("size") - annotations = [] - - if size: - annotations.append(f"{size / 1e6:.1f}MB") - - search_results.append(SearchResult( - origin="local", - title=path.split("\\")[-1] if path else "Unknown", - target=path, - detail=f"Local: {path}", - annotations=annotations, - size_bytes=size, - )) - - return search_results - - except Exception as e: - log(f"[local] Search error: {e}", file=sys.stderr) - return [] - - def get_result_args(self) -> List[str]: - """Local storage uses -path argument.""" - return ["-path"] - - def validate(self) -> bool: - """Local storage is always available.""" - return True - - -class LibGenProvider(SearchProvider): - """Search provider for Library Genesis books.""" - - RESULT_FIELDS: List[Tuple[str, str, Optional[Any]]] = [] # columns built manually - - def __init__(self, config: Dict[str, Any] = None): - super().__init__(config) - self.name = "libgen" - - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - """Search Library Genesis for books. - - Supports dynamic query format: - - isbn:0557677203 - - author:"Albert Pike" - - title:"Book Title" - - Combination: isbn:0557677203 author:"Albert Pike" free text - - Priority: ISBN is the authoritative key for searching. - """ - filters = filters or {} - - try: - from helper.unified_book_downloader import UnifiedBookDownloader - from helper.query_parser import parse_query, get_field, get_free_text - - debug(f"[libgen] Starting search for: {query}") - - # Parse the query to extract structured fields - parsed = parse_query(query) - isbn = get_field(parsed, 'isbn') - author = get_field(parsed, 'author') - title = get_field(parsed, 'title') - free_text = get_free_text(parsed) - - # Build the search query for libgen - # Priority: isbn (authoritative key) > title > author > free_text - if isbn: - search_query = isbn - elif title: - search_query = title - elif author: - search_query = author - else: - search_query = free_text or query - - debug(f"[libgen] Built search query: {search_query}") - - downloader = UnifiedBookDownloader(config=self.config) - search_fn = getattr(downloader, "search_libgen", None) - - if not callable(search_fn): - log("[libgen] Searcher unavailable", file=sys.stderr) - return [] - - debug(f"[libgen] Calling search_libgen with query: {search_query}") - books = search_fn(search_query, limit=limit) - debug(f"[libgen] Got {len(books) if books else 0} results from search_libgen") - - search_results = [] - for idx, book in enumerate(books, 1): - title = book.get("title", "Unknown") - author = book.get("author", "Unknown") - year = book.get("year", "Unknown") - pages = book.get("pages") or book.get("pages_str") or "" - extension = book.get("extension", "") or book.get("ext", "") - filesize = book.get("filesize_str", "Unknown") - isbn = book.get("isbn", "") - mirror_url = book.get("mirror_url", "") - - # Columns: Title, Author, Pages, Ext - columns = [ - ("Title", title), - ("Author", author), - ("Pages", str(pages)), - ("Ext", str(extension)), - ] - - # Build detail with author and year - detail = f"By: {author}" - if year and year != "Unknown": - detail += f" ({year})" - - annotations = [f"{filesize}"] - if isbn: - annotations.append(f"ISBN: {isbn}") - - # Store full book data without mirrors in metadata to avoid serialization overhead - search_results.append(SearchResult( - origin="libgen", - title=title, - target=mirror_url or f"libgen:{book.get('id', '')}", - detail=detail, - annotations=annotations, - media_kind="book", - columns=columns, - full_metadata={ - "number": idx, - "author": author, - "year": year, - "isbn": isbn, - "filesize": filesize, - # Exclude mirrors dict from metadata to reduce serialization overhead - # Mirrors can be re-fetched if the result is selected - "book_id": book.get("book_id", ""), - "md5": book.get("md5", ""), - }, - )) - - debug(f"[libgen] Returning {len(search_results)} formatted results") - return search_results - - except Exception as e: - log(f"[libgen] Search error: {e}", file=sys.stderr) - import traceback - log(traceback.format_exc(), file=sys.stderr) - return [] - - def get_result_args(self) -> List[str]: - """LibGen results use -url for download or -mirror for selection.""" - return ["-url"] - - def validate(self) -> bool: - """Check if LibGen downloader is available.""" - try: - from helper.unified_book_downloader import UnifiedBookDownloader - return True - except Exception: - return False - - -class SoulSeekProvider(SearchProvider): - """Search provider for Soulseek P2P network.""" - - # Allowed music file extensions - MUSIC_EXTENSIONS = { - '.flac', '.mp3', '.m4a', '.aac', '.ogg', '.opus', - '.wav', '.alac', '.wma', '.ape', '.aiff', '.dsf', - '.dff', '.wv', '.tta', '.tak', '.ac3', '.dts' - } - - # Display columns for search results - RESULT_FIELDS = [ - ("track_num", "Track", None), - ("title", "Title", None), - ("artist", "Artist", lambda x: (str(x)[:32] + '...') if x and len(str(x)) > 35 else x), - ("album", "Album", lambda x: (str(x)[:32] + '...') if x and len(str(x)) > 35 else x), - ("size", "Size", lambda x: f"{int(int(x)/1024/1024)} MB" if x else ""), - ] - - # Soulseek config - USERNAME = "asjhkjljhkjfdsd334" - PASSWORD = "khhhg" - DOWNLOAD_DIR = "./downloads" - MAX_WAIT_TRANSFER = 1200 - - def __init__(self, config: Dict[str, Any] = None): - super().__init__(config) - self.name = "soulseek" - - async def perform_search( - self, - query: str, - timeout: float = 9.0, - limit: int = 50 - ) -> List[Dict[str, Any]]: - """Perform async Soulseek search and return flattened results.""" - import asyncio - import os - import re - import time - from aioslsk.client import SoulSeekClient - from aioslsk.settings import Settings, CredentialsSettings - - os.makedirs(self.DOWNLOAD_DIR, exist_ok=True) - - settings = Settings(credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD)) - client = SoulSeekClient(settings) - - try: - await client.start() - await client.login() - except Exception as e: - log(f"[soulseek] Login failed: {type(e).__name__}: {e}", file=sys.stderr) - return [] - - try: - search_request = await client.searches.search(query) - await self._collect_search_results(client, search_request, timeout=timeout) - flat = self._flatten_search_results(search_request)[:limit] - return flat - except Exception as e: - log(f"[soulseek] Search error: {type(e).__name__}: {e}", file=sys.stderr) - return [] - finally: - try: - await client.stop() - except Exception: - pass - - def _flatten_search_results(self, search_request) -> List[dict]: - """Extract files from SearchRequest.results.""" - flat: List[dict] = [] - for result in search_request.results: - username = getattr(result, "username", "?") - - for file_data in getattr(result, "shared_items", []): - flat.append({ - "file": file_data, - "username": username, - "filename": getattr(file_data, "filename", "?"), - "size": getattr(file_data, "filesize", 0), - }) - - for file_data in getattr(result, "locked_results", []): - flat.append({ - "file": file_data, - "username": username, - "filename": getattr(file_data, "filename", "?"), - "size": getattr(file_data, "filesize", 0), - }) - - return flat - - async def _collect_search_results(self, client, search_request, timeout: float = 75.0) -> None: - """Collect search results by waiting.""" - import asyncio - import time - debug(f"[soulseek] Collecting results for {timeout}s...") - end = time.time() + timeout - last_count = 0 - while time.time() < end: - current_count = len(search_request.results) - if current_count > last_count: - debug(f"[soulseek] Got {current_count} result(s) so far...") - last_count = current_count - await asyncio.sleep(0.5) - - async def download_file( - self, - username: str, - filename: str, - file_size: int, - target_dir: Optional[str] = None - ) -> bool: - """Download a file from Soulseek to a specific directory.""" - import asyncio - import os - import time - from aioslsk.client import SoulSeekClient - from aioslsk.settings import Settings, CredentialsSettings - from aioslsk.events import TransferProgressEvent - from tqdm import tqdm - - download_dir = target_dir if target_dir else self.DOWNLOAD_DIR - os.makedirs(download_dir, exist_ok=True) - - settings = Settings(credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD)) - settings.shares.download = download_dir - client = SoulSeekClient(settings) - - try: - await client.start() - await client.login() - - debug(f"[soulseek] Starting: {filename} from {username}") - - transfer = await client.transfers.download(username, filename) - if transfer is None: - log("[soulseek] Failed: transfer object is None") - return False - - success = await self._wait_for_transfer(client, transfer, file_size=file_size, max_wait=self.MAX_WAIT_TRANSFER) - - return success - - except Exception as e: - log(f"[soulseek] Download error: {type(e).__name__}: {e}", file=sys.stderr) - return False - - finally: - try: - await client.stop() - except Exception: - pass - - async def _wait_for_transfer(self, client, transfer_obj: Any, file_size: Any = None, max_wait: float = 1200) -> bool: - """Wait for transfer finish using event listeners with TQDM progress bar. - - Returns: - True if transfer completed successfully, False if failed or timed out. - """ - import asyncio - import time - from aioslsk.events import TransferProgressEvent - from tqdm import tqdm - - if transfer_obj is None: - log("[soulseek] No transfer object returned") - return False - - transfer_finished = False - transfer_success = False - pbar = None - total_size = file_size - last_speed_time = time.time() - last_speed = 0 - - async def on_progress(event): - nonlocal last_speed_time, last_speed, transfer_finished, transfer_success, pbar, total_size - if not hasattr(event, 'updates') or not event.updates: - return - - for transfer, _, curr_snapshot in event.updates: - if (transfer.username == transfer_obj.username and transfer.remote_path == transfer_obj.remote_path): - bytes_xfer = getattr(curr_snapshot, 'bytes_transfered', 0) - state_name = curr_snapshot.state.name if hasattr(curr_snapshot, 'state') else "?" - speed = getattr(curr_snapshot, 'speed', 0) - - if total_size is None and hasattr(transfer, 'file_attributes'): - try: - size = getattr(transfer, 'file_size', None) or getattr(transfer, 'size', None) - if size: - total_size = size - except Exception: - pass - - if pbar is None: - total = total_size if total_size else 100 * 1024 * 1024 - pbar = tqdm(total=total, unit='B', unit_scale=True, desc='[transfer]') - - if pbar: - pbar.n = bytes_xfer - if speed > 0: - pbar.set_postfix({"speed": f"{speed/1024:.1f} KB/s", "state": state_name}) - pbar.refresh() - - if state_name in ('FINISHED', 'COMPLETE'): - if pbar: - pbar.close() - debug(f"[soulseek] Transfer {state_name.lower()}") - transfer_finished = True - transfer_success = True - return - elif state_name in ('ABORTED', 'FAILED', 'PAUSED'): - if pbar: - pbar.close() - debug(f"[soulseek] Transfer {state_name.lower()}") - transfer_finished = True - transfer_success = False - return - - if total_size and bytes_xfer >= total_size: - if pbar: - pbar.close() - debug(f"[soulseek] Transfer complete ({bytes_xfer / 1024 / 1024:.1f} MB)") - transfer_finished = True - transfer_success = True - return - - if speed == 0 and bytes_xfer > 0: - now = time.time() - if now - last_speed_time > 3: - if pbar: - pbar.close() - debug(f"[soulseek] Transfer complete ({bytes_xfer / 1024 / 1024:.1f} MB)") - transfer_finished = True - transfer_success = True - return - else: - last_speed_time = time.time() - - last_speed = speed - - client.events.register(TransferProgressEvent, on_progress) - end = time.time() + max_wait - - while time.time() < end: - if transfer_finished: - break - await asyncio.sleep(0.5) - - client.events.unregister(TransferProgressEvent, on_progress) - - if pbar: - pbar.close() - - if not transfer_finished: - log(f"[soulseek] Timed out after {max_wait}s; transfer may still be in progress") - return False - else: - return transfer_success - - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - """Search Soulseek P2P network (synchronous wrapper).""" - import asyncio - import re - - filters = filters or {} - - try: - # Run async search - flat_results = asyncio.run(self.perform_search(query, timeout=9.0, limit=limit)) - - if not flat_results: - return [] - - # Filter to music files only - music_results = [] - for item in flat_results: - filename = item['filename'] - if '.' in filename: - ext = '.' + filename.rsplit('.', 1)[-1].lower() - else: - ext = '' - - if ext in self.MUSIC_EXTENSIONS: - music_results.append(item) - - if not music_results: - return [] - - # Extract metadata for all results - enriched_results = [] - for item in music_results: - filename = item['filename'] - - # Extract extension - if '.' in filename: - _, ext = filename.rsplit('.', 1) - ext = '.' + ext.lower() - else: - ext = '' - - # Get display filename - if '\\' in filename: - display_name = filename.rsplit('\\', 1)[-1] - elif '/' in filename: - display_name = filename.rsplit('/', 1)[-1] - else: - display_name = filename - - # Extract path hierarchy for artist/album - path_parts = filename.replace('\\', '/').split('/') - artist = '' - album = '' - - if len(path_parts) >= 3: - artist = path_parts[-3] - album = path_parts[-2] - if ' - ' in album and re.match(r'^\d{4}', album): - album = album.split(' - ', 1)[1] - elif len(path_parts) == 2: - artist = path_parts[-2] - - # Extract track number and title - base_name = display_name.rsplit('.', 1)[0] if '.' in display_name else display_name - track_num = '' - title = base_name - filename_artist = '' - - # First, extract track number if present (e.g., "30 Stumfol - Prisoner" -> track=30, rest="Stumfol - Prisoner") - match = re.match(r'^(\d{1,3})\s*[\.\-]?\s+(.+)$', base_name) - if match: - track_num = match.group(1) - remainder = match.group(2) - - # Now parse "Artist - Title" from the remainder - # If there's a " - " separator, split on it - if ' - ' in remainder: - parts = remainder.split(' - ', 1) - filename_artist = parts[0].strip() - title = parts[1].strip() - else: - # No artist-title separator, use the whole remainder as title - title = remainder - else: - # No track number, check if there's "Artist - Title" format - if ' - ' in base_name: - parts = base_name.split(' - ', 1) - filename_artist = parts[0].strip() - title = parts[1].strip() - - # Use filename_artist if extracted, otherwise fall back to path artist - if filename_artist: - artist = filename_artist - - enriched_results.append({ - **item, - 'artist': artist, - 'album': album, - 'title': title, - 'track_num': track_num, - 'ext': ext - }) - - # Apply filters if specified - if filters: - artist_filter = filters.get('artist', '').lower() if filters.get('artist') else '' - album_filter = filters.get('album', '').lower() if filters.get('album') else '' - track_filter = filters.get('track', '').lower() if filters.get('track') else '' - - if artist_filter or album_filter or track_filter: - filtered_results = [] - for item in enriched_results: - if artist_filter and artist_filter not in (item['artist'] or '').lower(): - continue - if album_filter and album_filter not in (item['album'] or '').lower(): - continue - if track_filter and track_filter not in (item['title'] or '').lower(): - continue - filtered_results.append(item) - - enriched_results = filtered_results - - # Sort: .flac first, then others - enriched_results.sort(key=lambda item: (item['ext'].lower() != '.flac', -item['size'])) - - # Convert to SearchResult format - search_results = [] - for idx, item in enumerate(enriched_results, 1): - artist_display = item['artist'] if item['artist'] else "(no artist)" - album_display = item['album'] if item['album'] else "(no album)" - size_mb = int(round(item['size'] / 1024 / 1024)) - - if item['track_num']: - track_title = f"[{item['track_num']}] {item['title']}" - else: - track_title = item['title'] or "(untitled)" - - # Build columns from enriched metadata - columns = self.build_columns_from_doc(item, idx=idx) - - search_results.append(SearchResult( - origin="soulseek", - title=track_title, - target=item['filename'], - detail=f"Artist: {artist_display} | Album: {album_display}", - annotations=[f"{size_mb} MB", item['ext']], - media_kind="audio", - size_bytes=item['size'], - columns=columns, - full_metadata={ - "artist": item['artist'], - "album": item['album'], - "track_num": item['track_num'], - "username": item['username'], - "filename": item['filename'], - "ext": item['ext'], - }, - )) - - return search_results - - except Exception as e: - log(f"Soulseek search error: {e}", file=sys.stderr) - return [] - - def get_result_args(self) -> List[str]: - """Soulseek results use filename/path for results.""" - return ["-path"] - - def validate(self) -> bool: - """Check if Soulseek client is available.""" - try: - import aioslsk # type: ignore - return True - except ImportError: - return False - - -class DebridProvider(SearchProvider): - """Search provider for AllDebrid magnets.""" - - # Status code mappings - STATUS_MAP = { - 0: "In Queue", - 1: "Downloading", - 2: "Compressing", - 3: "Uploading", - 4: "Ready", - 5: "Upload Failed", - 6: "Unpack Error", - 7: "Not Downloaded", - 8: "File Too Big", - 9: "Internal Error", - 10: "Download Timeout", - 11: "Deleted", - 12: "Processing Failed", - 13: "Processing Failed", - 14: "Tracker Error", - 15: "No Peers" - } - - def __init__(self, config: Dict[str, Any] = None): - super().__init__(config) - self.name = "debrid" - self._magnet_files_cache = {} - - def _format_size(self, bytes_val: float) -> str: - """Format bytes to human readable size.""" - for unit in ['B', 'KB', 'MB', 'GB', 'TB']: - if bytes_val < 1024: - return f"{bytes_val:.2f} {unit}" - bytes_val /= 1024 - return f"{bytes_val:.2f} PB" - - def _get_status_display(self, status_code: int) -> str: - """Get human-readable status for AllDebrid status codes.""" - return self.STATUS_MAP.get(status_code, f"Unknown ({status_code})") - - def _should_filter_magnet(self, status_code: int, status_text: str) -> bool: - """Check if magnet should be filtered out (expired/deleted).""" - # Filter expired/deleted entries - return status_code in (5, 6, 7, 8, 11, 12, 13, 14) - - def _fuzzy_match(self, text: str, pattern: str) -> bool: - """Check if pattern fuzzy-matches text (case-insensitive, substring matching).""" - return pattern.lower() in text.lower() - - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - """Search AllDebrid magnets with optional status and name filtering. - - Args: - query: Search query (magnet filename or '*' for all) - limit: Max results to return - filters: Optional dict with 'status' filter ('all', 'active', 'ready', 'error') - - Returns: - List of SearchResult objects - """ - filters = filters or {} - - try: - from helper.alldebrid import AllDebridClient - from config import get_debrid_api_key - - api_key = get_debrid_api_key(self.config) - - if not api_key: - log("[debrid] API key not configured", file=sys.stderr) - return [] - - client = AllDebridClient(api_key) - - # Parse status filter - status_filter_param = filters.get('status', 'all').lower() if filters.get('status') else 'all' - - # Get magnets with optional status filter - response = client._request("magnet/status", {}) - - if response.get("status") != "success": - log(f"[debrid] API error: {response.get('error', 'Unknown')}", file=sys.stderr) - return [] - - magnets = response.get("data", {}).get("magnets", []) - - # Handle both list and dict formats - if isinstance(magnets, dict): - magnets = list(magnets.values()) - - # Filter by status if specified - if status_filter_param == 'active': - magnets = [m for m in magnets if m.get('statusCode', -1) in (0, 1, 2, 3)] - elif status_filter_param == 'ready': - magnets = [m for m in magnets if m.get('statusCode', -1) == 4] - elif status_filter_param == 'error': - magnets = [m for m in magnets if m.get('statusCode', -1) in (5, 6, 8, 9, 10, 12, 13, 14, 15)] - # 'all' includes everything - - # Filter by query (fuzzy match on filename) - results = [] - count = 0 - for magnet in magnets: - if count >= limit: - break - - filename = magnet.get("filename", "") - status_code = magnet.get("statusCode", -1) - status_text = magnet.get("status", "Unknown") - - # Skip expired/deleted unless 'all' filter - if status_filter_param != 'all' and self._should_filter_magnet(status_code, status_text): - continue - - # Apply query filter (skip if doesn't match) - if query and query != "*" and not self._fuzzy_match(filename, query): - continue - - magnet_id = magnet.get("id") - size = magnet.get("size", 0) - downloaded = magnet.get("downloaded", 0) - progress = (downloaded / size * 100) if size > 0 else 0 - - # Get status emoji - if status_code == 4: - status_emoji = "✓" - elif status_code < 4: - status_emoji = "⧗" - else: - status_emoji = "✗" - - annotations = [self._get_status_display(status_code)] - if size > 0: - annotations.append(self._format_size(size)) - if progress > 0 and progress < 100: - annotations.append(f"{progress:.1f}%") - - results.append(SearchResult( - origin="debrid", - title=filename or "Unknown", - target=str(magnet_id), - detail=f"{status_emoji} {self._get_status_display(status_code)} | {self._format_size(size)}", - annotations=annotations, - media_kind="magnet", - size_bytes=size, - full_metadata={ - "magnet_id": magnet_id, - "status_code": status_code, - "status_text": status_text, - "progress": progress, - "downloaded": downloaded, - "seeders": magnet.get("seeders", 0), - "download_speed": magnet.get("downloadSpeed", 0), - }, - )) - - count += 1 - - # Cache metadata for ready magnets - if results: - self._cache_ready_magnet_metadata(client, [r for r in results if r.full_metadata.get('status_code') == 4]) - - return results - - except Exception as e: - log(f"Debrid search error: {e}", file=sys.stderr) - return [] - - def _cache_ready_magnet_metadata(self, client, results: List[SearchResult]) -> None: - """Cache file metadata for ready magnets.""" - if not results: - return - - try: - ready_ids = [r.full_metadata.get('magnet_id') for r in results if r.full_metadata.get('status_code') == 4] - if ready_ids: - self._magnet_files_cache = client.magnet_links(ready_ids) - log(f"[debrid] Cached metadata for {len(self._magnet_files_cache)} ready magnet(s)", file=sys.stderr) - except Exception as e: - log(f"[debrid] Warning: Could not cache magnet metadata: {e}", file=sys.stderr) - - def get_magnet_metadata(self, magnet_id: int) -> Optional[Dict[str, Any]]: - """Get cached metadata for a magnet.""" - return self._magnet_files_cache.get(str(magnet_id)) - - def get_result_args(self) -> List[str]: - """Debrid results use magnet ID for download.""" - return ["-id"] - - def validate(self) -> bool: - """Check if AllDebrid is configured.""" - from config import get_debrid_api_key - return bool(get_debrid_api_key(self.config)) - - -class OpenLibraryProvider(SearchProvider): - """Search provider for OpenLibrary.""" - - # Define fields to request from API and how to display them - RESULT_FIELDS: List[Tuple[str, str, Optional[Any]]] = [] # columns built manually - - def __init__(self, config: Dict[str, Any] = None): - super().__init__(config) - self.name = "openlibrary" - - def _derive_status(self, doc: Dict[str, Any]) -> tuple[str, Optional[str]]: - """Determine availability label and archive identifier.""" - ebook_access = str(doc.get("ebook_access", "") or "").strip().lower() - has_fulltext = bool(doc.get("has_fulltext")) - ia_entries = doc.get("ia") - archive_id = "" - if isinstance(ia_entries, list): - for entry in ia_entries: - if isinstance(entry, str) and entry.strip(): - archive_id = entry.strip() - break - elif isinstance(ia_entries, str) and ia_entries.strip(): - archive_id = ia_entries.strip() - elif isinstance(doc.get("ocaid"), str) and doc["ocaid"].strip(): - archive_id = doc["ocaid"].strip() - - available = False - if ebook_access in {"borrowable", "public", "full"}: - available = True - elif has_fulltext: - available = True - elif archive_id: - available = True - - status = "download" if available else "?Libgen" - return status, archive_id or None - - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - """Search OpenLibrary for books. - - Smart search that detects ISBN, OCLC, OpenLibrary ID, and falls back to title search. - """ - filters = filters or {} - - try: - import requests - - query_clean = query.strip() - search_url = "https://openlibrary.org/search.json" - - # Try to detect query type (ISBN, OCLC, OL ID, or title) - if query_clean.isdigit() and len(query_clean) in (10, 13): - # ISBN search - url = f"https://openlibrary.org/isbn/{query_clean}.json" - response = requests.get(url, timeout=9) - if response.status_code == 200: - book_data = response.json() - return [self._format_isbn_result(book_data, query_clean)] - elif response.status_code == 404: - return [] - - # Default to title/general search - requested_fields = [ - "title", - "author_name", - "first_publish_year", - "number_of_pages_median", - "isbn", - "oclc_numbers", - "lccn", - "language", - "key", - "edition_key", - "ebook_access", - "ia", - "has_fulltext", - ] - params = { - "q": query_clean, - "limit": limit, - "fields": ",".join(requested_fields), - } - - response = requests.get(search_url, params=params, timeout=9) - response.raise_for_status() - data = response.json() - - search_results = [] - for idx, doc in enumerate(data.get("docs", []), 1): - # Prefer edition_key (books/OLxxxM). Fallback to work key. - edition_keys = doc.get("edition_key") or [] - olid = "" - if isinstance(edition_keys, list) and edition_keys: - olid = str(edition_keys[0]).strip() - if not olid: - olid = doc.get("key", "").split("/")[-1] - - # Determine status/availability - status, archive_id = self._derive_status(doc) - doc["status"] = status - - # Extract additional metadata - title = doc.get("title", "Unknown") - authors = doc.get("author_name", ["Unknown"]) - year = doc.get("first_publish_year", "") - isbn_list = doc.get("isbn", []) - isbn = isbn_list[0] if isbn_list else "" - oclc_list = doc.get("oclc_numbers", []) - oclc = oclc_list[0] if oclc_list else "" - lccn_list = doc.get("lccn", []) - lccn = lccn_list[0] if lccn_list else "" - pages = doc.get("number_of_pages_median", "") - languages = doc.get("language", []) - language = languages[0] if languages else "" - - author_str = ", ".join(authors) if authors else "Unknown" - - # Columns: Title, Author, Pages - columns = [ - ("Title", title), - ("Author", author_str), - ("Pages", str(pages or "")), - ] - - # Build detail with author and year - detail = f"By: {author_str}" - if year: - detail += f" ({year})" - - # Build annotations with additional info - annotations = [] - if pages: - annotations.append(f"{pages} pages") - if isbn: - annotations.append(f"ISBN: {isbn}") - - search_results.append(SearchResult( - origin="openlibrary", - title=title, - target=f"https://openlibrary.org/books/{olid}", - detail=detail, - annotations=annotations, - media_kind="book", - columns=columns, - full_metadata={ - "number": idx, - "authors": authors, - "year": year, - "isbn": isbn, - "oclc": oclc, - "lccn": lccn, - "pages": pages, - "language": language, - "olid": olid, - "ebook_access": doc.get("ebook_access", ""), - "status": status, - "archive_id": archive_id, - }, - )) - - # Sort results: borrowable ones first, then not borrowable, then unknown - def sort_key(result): - status = (result.full_metadata.get("status") or "").strip().lower() - if status == "download": - return (0, result.title) - elif status.startswith("?libgen"): - return (1, result.title) - else: - return (2, result.title) - - search_results.sort(key=sort_key) - - # Rebuild number field after sorting - for new_idx, result in enumerate(search_results, 1): - result.full_metadata["number"] = new_idx - # Update the # column in columns - if result.columns and result.columns[0][0] == "#": - result.columns[0] = ("#", str(new_idx)) - - return search_results - - except Exception as e: - log(f"OpenLibrary search error: {e}", file=sys.stderr) - return [] - - def _format_isbn_result(self, book_data: Dict[str, Any], isbn: str) -> SearchResult: - """Format a book result from ISBN endpoint.""" - # Get title from book data - title = book_data.get("title", "Unknown") - - # Get authors - author_list = [] - for author_key in book_data.get("authors", []): - if isinstance(author_key, dict): - author_list.append(author_key.get("name", "")) - elif isinstance(author_key, str): - author_list.append(author_key) - - author_str = ", ".join(filter(None, author_list)) if author_list else "Unknown" - - # Extract other metadata - year = book_data.get("first_publish_year", "") - publishers = book_data.get("publishers", []) - publisher = publishers[0].get("name", "") if publishers and isinstance(publishers[0], dict) else "" - pages = book_data.get("number_of_pages", "") - languages = book_data.get("languages", []) - language = languages[0].get("key", "").replace("/languages/", "") if languages else "" - olid = book_data.get("key", "").split("/")[-1] if book_data.get("key") else "" - - # Build doc for column rendering - doc = { - "title": title, - "author_name": author_list, - "first_publish_year": year, - "ebook_access": book_data.get("ebook_access", ""), - "has_fulltext": bool(book_data.get("ocaid")), - "ia": [book_data.get("ocaid")] if book_data.get("ocaid") else [], - "ocaid": book_data.get("ocaid", ""), - } - status, archive_id = self._derive_status(doc) - doc["status"] = status - - # Build detail - detail = f"By: {author_str}" - if year: - detail += f" ({year})" - - # Build annotations - annotations = [] - if pages: - annotations.append(f"{pages} pages") - annotations.append(f"ISBN: {isbn}") - - # Build columns using shared helper for consistency - columns = self.build_columns_from_doc(doc, idx=1) - - return SearchResult( - origin="openlibrary", - title=title, - target=f"https://openlibrary.org/books/{olid}", - detail=detail, - annotations=annotations, - media_kind="book", - columns=columns, - full_metadata={ - "number": 1, - "authors": author_list, - "year": year, - "isbn": isbn, - "oclc": "", - "lccn": "", - "pages": pages, - "language": language, - "olid": olid, - "publisher": publisher, - "ebook_access": doc.get("ebook_access", ""), - "status": status, - "archive_id": archive_id, - }, - ) - - def get_result_args(self) -> List[str]: - """OpenLibrary results are info/links only.""" - return ["-info"] - - def validate(self) -> bool: - """OpenLibrary is always available (no auth needed).""" - return True - - -class GogGamesProvider(SearchProvider): - """Search provider for GOG Games.""" - - def __init__(self, config: Dict[str, Any] = None): - super().__init__(config) - self.name = "gog" - self.base_url = "https://gog-games.to" - self.headers = { - "Referer": "https://gog-games.to/", - "Origin": "https://gog-games.to", - "X-Requested-With": "XMLHttpRequest" - } - - def _request(self, client, endpoint: str, is_json: bool = True) -> Any: - """Helper for API requests.""" - url = f"{self.base_url}/api/web/{endpoint}" - try: - response = client.get(url, headers=self.headers) - if response.status_code == 200: - return response.json() if is_json else response.text - elif response.status_code == 404: - return None - else: - log(f"[gog] API request failed: {response.status_code} for {endpoint}", file=sys.stderr) - return None - except Exception as e: - log(f"[gog] Request error: {e}", file=sys.stderr) - return None - - def get_all_games(self, client) -> List[Dict[str, Any]]: - """Fetch all games from the API.""" - return self._request(client, "all-games") or [] - - def get_game_details(self, client, slug: str) -> Optional[Dict[str, Any]]: - """Fetch details for a specific game.""" - return self._request(client, f"query-game/{slug}") - - def get_game_md5(self, client, slug: str) -> Optional[str]: - """Fetch MD5 checksums for a game.""" - return self._request(client, f"download-md5/{slug}", is_json=False) - - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - """Search GOG Games.""" - from helper.http_client import HTTPClient - - results = [] - query_norm = query.strip().lower() - - with HTTPClient() as client: - # 1. Fetch all games to perform fuzzy search - all_games = self.get_all_games(client) - - matches = [] - if all_games: - for game in all_games: - if (query_norm in game.get("title", "").lower() or - query_norm in game.get("slug", "").lower()): - matches.append(game) - - # 2. Fallback: If no matches and query looks like a slug, try direct lookup - if not matches and "_" in query_norm: - details = self.get_game_details(client, query_norm) - if details and "game_info" in details: - matches.append(details["game_info"]) - - for game in matches[:limit]: - slug = game.get("slug") - title = game.get("title", slug) - infohash = game.get("infohash") - gog_url = game.get("gog_url", "") - - # Note: 'all-games' endpoint doesn't provide file size. - # We set size to 0 to avoid N+1 requests. - - if infohash: - magnet_link = f"magnet:?xt=urn:btih:{infohash}&dn={slug}" - results.append(SearchResult( - origin="gog", - title=title, - target=magnet_link, - media_kind="magnet", - detail="Magnet Link", - size_bytes=0, - annotations=["Magnet"], - full_metadata=game - )) - else: - results.append(SearchResult( - origin="gog", - title=title, - target=gog_url, - media_kind="game", - detail="No magnet available", - size_bytes=0, - annotations=["No Magnet"], - full_metadata=game - )) - - return results - - def get_result_args(self) -> List[str]: - """GOG results are URLs.""" - return ["-url"] - - def validate(self) -> bool: - """GOG Games is a public website.""" - return True - - -class YoutubeSearchProvider(SearchProvider): - """ - Search provider for YouTube using yt-dlp. - """ - - RESULT_FIELDS = [ - ("title", "Title", None), - ("uploader", "Uploader", None), - ("duration_string", "Duration", None), - ("view_count", "Views", lambda x: f"{x:,}" if x else ""), - ] - - def search(self, query: str, limit: int = 10, filters: Optional[Dict[str, Any]] = None, **kwargs) -> List[SearchResult]: - """ - Search YouTube using yt-dlp. - - Args: - query: Search query - limit: Maximum number of results - filters: Optional filtering criteria (ignored for now) - - Returns: - List of SearchResult objects - """ - # Check if yt-dlp is available - ytdlp_path = shutil.which("yt-dlp") - if not ytdlp_path: - log("yt-dlp not found in PATH", file=sys.stderr) - return [] - - # Construct command - # ytsearchN:query searches for N results - search_query = f"ytsearch{limit}:{query}" - - cmd = [ - ytdlp_path, - "--dump-json", - "--flat-playlist", # Don't resolve video details fully, faster - "--no-warnings", - search_query - ] - - try: - # Run yt-dlp - # We need to capture stdout. yt-dlp outputs one JSON object per line for search results - process = subprocess.run( - cmd, - capture_output=True, - text=True, - encoding="utf-8", - errors="replace" - ) - - if process.returncode != 0: - log(f"yt-dlp search failed: {process.stderr}", file=sys.stderr) - return [] - - results = [] - for line in process.stdout.splitlines(): - if not line.strip(): - continue - - try: - data = json.loads(line) - - # Extract fields - title = data.get("title", "Unknown Title") - url = data.get("url") - if not url: - # Sometimes flat-playlist gives 'id', construct URL - video_id = data.get("id") - if video_id: - url = f"https://www.youtube.com/watch?v={video_id}" - else: - continue - - uploader = data.get("uploader", "Unknown Uploader") - duration = data.get("duration") # seconds - view_count = data.get("view_count") - - # Format duration - duration_str = "" - if duration: - try: - m, s = divmod(int(duration), 60) - h, m = divmod(m, 60) - if h > 0: - duration_str = f"{h}:{m:02d}:{s:02d}" - else: - duration_str = f"{m}:{s:02d}" - except (ValueError, TypeError): - pass - - # Create annotations - annotations = [] - if duration_str: - annotations.append(duration_str) - if view_count: - # Simple format for views - try: - vc = int(view_count) - if vc >= 1000000: - views_str = f"{vc/1000000:.1f}M views" - elif vc >= 1000: - views_str = f"{vc/1000:.1f}K views" - else: - views_str = f"{vc} views" - annotations.append(views_str) - except (ValueError, TypeError): - pass - - annotations.append("youtube") - - # Create result - result = SearchResult( - origin="youtube", - title=title, - target=url, - detail=f"by {uploader}", - annotations=annotations, - media_kind="video", - full_metadata=data, - columns=[ - ("Title", title), - ("Uploader", uploader), - ("Duration", duration_str), - ("Views", str(view_count) if view_count else "") - ] - ) - results.append(result) - - except json.JSONDecodeError: - continue - - return results - - except Exception as e: - log(f"Error running yt-dlp: {e}", file=sys.stderr) - return [] - - def get_result_args(self) -> List[str]: - """YouTube results are URLs.""" - return ["-url"] - - def validate(self) -> bool: - """Check if yt-dlp is installed.""" - return shutil.which("yt-dlp") is not None - - -class BandcampProvider(SearchProvider): - """ - Search provider for Bandcamp using Playwright scraper. - """ - RESULT_FIELDS = [ - ("name", "Name", None), - ("artist", "Artist/Loc", None), - ("type", "Type", None) - ] - - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - if not PLAYWRIGHT_AVAILABLE: - print("Playwright library not available. Please install it (pip install playwright).") - return [] - - results = [] - try: - with sync_playwright() as p: - # Launch browser (headless) - browser = p.chromium.launch(headless=True) - page = browser.new_page() - - # Check if query is a URL (Artist/Album Scraping Mode) - if query.startswith("http://") or query.startswith("https://"): - return self._scrape_url(page, query, limit) - - # Search Mode - # Parse query for prefixes - search_type = "t" # Default to track - clean_query = query - - if "artist:" in query.lower(): - search_type = "b" - clean_query = query.lower().replace("artist:", "").strip() - elif "album:" in query.lower(): - search_type = "a" - clean_query = query.lower().replace("album:", "").strip() - elif "track:" in query.lower(): - search_type = "t" - clean_query = query.lower().replace("track:", "").strip() - elif "label:" in query.lower(): - search_type = "b" - clean_query = query.lower().replace("label:", "").strip() - - # Filters override prefix - if filters: - ftype = filters.get("type", "").lower() - if ftype in ["album", "albums"]: - search_type = "a" - elif ftype in ["artist", "artists", "label", "labels"]: - search_type = "b" - elif ftype in ["track", "tracks"]: - search_type = "t" - - # Construct URL with item_type - url = f"https://bandcamp.com/search?q={clean_query}&item_type={search_type}" - debug(f"[Bandcamp] Navigating to search URL: {url}") - page.goto(url) - page.wait_for_load_state("domcontentloaded") - - # Wait for results - try: - # Wait for the search results to appear in the DOM - page.wait_for_selector(".searchresult", timeout=10000) - except Exception as e: - # No results found or timeout - log(f"Bandcamp search timeout or no results: {e}") - browser.close() - return [] - - # Extract items - items = page.query_selector_all(".searchresult") - debug(f"[Bandcamp] Found {len(items)} results") - - for item in items: - if len(results) >= limit: - break - - try: - # Extract data - heading_el = item.query_selector(".heading a") - if not heading_el: - debug("[Bandcamp] Skipping item: No heading found") - continue - - name = heading_el.inner_text().strip() - item_url = heading_el.get_attribute("href") - # Clean URL (remove query params) - if item_url and "?" in item_url: - item_url = item_url.split("?")[0] - - item_type_el = item.query_selector(".itemtype") - item_type = item_type_el.inner_text().strip() if item_type_el else "Unknown" - - subhead_el = item.query_selector(".subhead") - subhead = subhead_el.inner_text().strip() if subhead_el else "" - - art_el = item.query_selector(".art img") - img = art_el.get_attribute("src") if art_el else None - - # Map to metadata - metadata = { - "name": name, - "type": item_type, - "url": item_url, - "img": img, - "subhead": subhead - } - - # Refine metadata based on type - artist_or_loc = subhead - if "ALBUM" in item_type.upper(): - artist_or_loc = subhead.replace("by ", "").strip() - metadata["artist"] = artist_or_loc - elif "ARTIST" in item_type.upper() or "LABEL" in item_type.upper(): - metadata["location"] = subhead - elif "TRACK" in item_type.upper(): - artist_or_loc = subhead.replace("by ", "").strip() - metadata["artist"] = artist_or_loc - - columns = [ - ("Name", name), - ("Artist/Loc", artist_or_loc), - ("Type", item_type) - ] - - results.append(SearchResult( - origin="bandcamp", - title=name, - target=item_url, - full_metadata=metadata, - columns=columns - )) - except Exception as e: - # Skip malformed items - debug(f"[Bandcamp] Error parsing item: {e}") - continue - - browser.close() - - except Exception as e: - log(f"Bandcamp search error: {e}") - return [] - - return results - - def _scrape_url(self, page, url: str, limit: int) -> List[SearchResult]: - """Scrape a Bandcamp artist or album page.""" - debug(f"[Bandcamp] Scraping URL: {url}") - - # If it's an artist page, try to go to /music to see all - if ".bandcamp.com" in url and "/music" not in url and "/album/" not in url and "/track/" not in url: - # Check if it's likely an artist root - url = url.rstrip("/") + "/music" - debug(f"[Bandcamp] Adjusted to music page: {url}") - - page.goto(url) - page.wait_for_load_state("domcontentloaded") - - results = [] - - # Check for grid items (Artist page /music) - grid_items = page.query_selector_all(".music-grid-item") - if grid_items: - debug(f"[Bandcamp] Found {len(grid_items)} grid items") - - # Try to get global artist name from page metadata/header as fallback - page_artist = "" - try: - og_site_name = page.query_selector('meta[property="og:site_name"]') - if og_site_name: - page_artist = og_site_name.get_attribute("content") or "" - - if not page_artist: - band_name = page.query_selector('#band-name-location .title') - if band_name: - page_artist = band_name.inner_text().strip() - except Exception: - pass - - for item in grid_items: - if len(results) >= limit: - break - try: - title_el = item.query_selector(".title") - # Sanitize title to remove newlines which break the table - title = title_el.inner_text().strip().replace("\n", " ").replace("\r", "") if title_el else "Unknown" - # Remove extra spaces - title = " ".join(title.split()) - - link_el = item.query_selector("a") - href = link_el.get_attribute("href") if link_el else "" - if href and not href.startswith("http"): - # Relative link, construct full URL - base = url.split("/music")[0] - href = base + href - - artist_el = item.query_selector(".artist") - artist = artist_el.inner_text().replace("by ", "").strip() if artist_el else "" - - # Use page artist if item artist is missing - if not artist and page_artist: - artist = page_artist - - # Sanitize artist - artist = artist.replace("\n", " ").replace("\r", "") - artist = " ".join(artist.split()) - - columns = [ - ("Name", title), - ("Artist", artist), - ("Type", "Album/Track") - ] - - results.append(SearchResult( - origin="bandcamp", - title=title, - target=href, - full_metadata={"artist": artist}, - columns=columns - )) - except Exception as e: - debug(f"[Bandcamp] Error parsing grid item: {e}") - continue - return results - - # Check for track list (Album page) - track_rows = page.query_selector_all(".track_row_view") - if track_rows: - debug(f"[Bandcamp] Found {len(track_rows)} track rows") - # Get Album Artist - artist_el = page.query_selector("#name-section h3 span a") - album_artist = artist_el.inner_text().strip() if artist_el else "Unknown" - - for row in track_rows: - if len(results) >= limit: - break - try: - title_el = row.query_selector(".track-title") - # Sanitize title - title = title_el.inner_text().strip().replace("\n", " ").replace("\r", "") if title_el else "Unknown" - title = " ".join(title.split()) - - # Track link - link_el = row.query_selector(".title a") - href = link_el.get_attribute("href") if link_el else "" - if href and not href.startswith("http"): - base = url.split(".com")[0] + ".com" - href = base + href - - duration_el = row.query_selector(".time") - duration = duration_el.inner_text().strip() if duration_el else "" - - columns = [ - ("Name", title), - ("Artist", album_artist), - ("Duration", duration) - ] - - results.append(SearchResult( - origin="bandcamp", - title=title, - target=href, - full_metadata={"artist": album_artist, "duration": duration}, - columns=columns - )) - except Exception as e: - debug(f"[Bandcamp] Error parsing track row: {e}") - continue - return results - - debug("[Bandcamp] No recognizable items found on page") - return [] - - def get_result_args(self) -> List[str]: - return ["-url"] - - -# Provider registry -_PROVIDERS = { - "bandcamp": BandcampProvider, - "local": LocalStorageProvider, - "libgen": LibGenProvider, - "soulseek": SoulSeekProvider, - "debrid": DebridProvider, - "openlibrary": OpenLibraryProvider, - "gog": GogGamesProvider, - "youtube": YoutubeSearchProvider, -} - - -def get_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[SearchProvider]: - """ - Get a search provider by name. - - Args: - name: Provider name (case-insensitive): "local", "libgen", "soulseek", "debrid", "openlibrary" - config: Optional configuration dictionary - - Returns: - SearchProvider instance or None if not found - """ - provider_class = _PROVIDERS.get(name.lower()) - - if provider_class is None: - log(f"Unknown search provider: {name}", file=sys.stderr) - return None - - try: - provider = provider_class(config) - if not provider.validate(): - log(f"Provider '{name}' is not properly configured or available", file=sys.stderr) - return None - return provider - - except Exception as e: - log(f"Error initializing provider '{name}': {e}", file=sys.stderr) - return None - - -def list_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]: - """ - List all available providers and whether they're available. - - Args: - config: Optional configuration dictionary - - Returns: - Dictionary mapping provider names to availability (True/False) - """ - availability = {} - for name, provider_class in _PROVIDERS.items(): - try: - provider = provider_class(config) - availability[name] = provider.validate() - except Exception: - availability[name] = False - return availability - - -def register_provider(name: str, provider_class: type) -> None: - """ - Register a new search provider. - - Args: - name: Provider name (lowercase) - provider_class: Class that inherits from SearchProvider - """ - _PROVIDERS[name.lower()] = provider_class - - -class FileProvider(ABC): - """Abstract base class for file hosting providers.""" - - def __init__(self, config: Optional[Dict[str, Any]] = None): - self.config = config or {} - self.name = self.__class__.__name__.replace("FileProvider", "").lower() - - @abstractmethod - def upload(self, file_path: str, **kwargs: Any) -> str: - """Upload a file and return the URL.""" - pass - - def validate(self) -> bool: - """Check if provider is available/configured.""" - return True - - -class ZeroXZeroFileProvider(FileProvider): - """File provider for 0x0.st.""" - - def __init__(self, config: Optional[Dict[str, Any]] = None): - super().__init__(config) - self.name = "0x0" - self.base_url = "https://0x0.st" - - def upload(self, file_path: str, **kwargs: Any) -> str: - """Upload file to 0x0.st.""" - from helper.http_client import HTTPClient - import os - - if not os.path.exists(file_path): - raise FileNotFoundError(f"File not found: {file_path}") - - try: - # 0x0.st expects 'file' field in multipart/form-data - # Use a custom User-Agent to avoid 403 Forbidden - headers = {"User-Agent": "Medeia-Macina/1.0"} - with HTTPClient(headers=headers) as client: - with open(file_path, 'rb') as f: - files = {'file': f} - response = client.post(self.base_url, files=files) - - if response.status_code == 200: - return response.text.strip() - else: - raise Exception(f"Upload failed: {response.status_code} - {response.text}") - - except Exception as e: - log(f"[0x0] Upload error: {e}", file=sys.stderr) - raise - - def validate(self) -> bool: - return True - - -class MatrixFileProvider(FileProvider): - """File provider for Matrix (Element) chat rooms.""" - - def __init__(self, config: Optional[Dict[str, Any]] = None): - super().__init__(config) - self.name = "matrix" - - def validate(self) -> bool: - """Check if Matrix is configured.""" - if not self.config: return False - matrix_conf = self.config.get('storage', {}).get('matrix', {}) - return bool(matrix_conf.get('homeserver') and matrix_conf.get('room_id') and (matrix_conf.get('access_token') or matrix_conf.get('password'))) - - def upload(self, file_path: str, **kwargs: Any) -> str: - """Upload file to Matrix room.""" - import requests - import mimetypes - from pathlib import Path - import json - - debug(f"[Matrix] Starting upload for: {file_path}") - debug(f"[Matrix] kwargs: {kwargs}") - - path = Path(file_path) - if not path.exists(): - raise FileNotFoundError(f"File not found: {file_path}") - - matrix_conf = self.config.get('storage', {}).get('matrix', {}) - homeserver = matrix_conf.get('homeserver') - access_token = matrix_conf.get('access_token') - room_id = matrix_conf.get('room_id') - - if not homeserver.startswith('http'): - homeserver = f"https://{homeserver}" - - # 1. Upload Media - # Use v3 API - upload_url = f"{homeserver}/_matrix/media/v3/upload" - headers = { - "Authorization": f"Bearer {access_token}", - "Content-Type": "application/octet-stream" - } - - mime_type, _ = mimetypes.guess_type(path) - if mime_type: - headers["Content-Type"] = mime_type - - filename = path.name - - debug(f"[Matrix] Uploading media to {upload_url} with mime_type: {mime_type}") - - with open(path, 'rb') as f: - resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename}) - - if resp.status_code != 200: - raise Exception(f"Matrix upload failed: {resp.text}") - - content_uri = resp.json().get('content_uri') - if not content_uri: - raise Exception("No content_uri returned from Matrix upload") - - debug(f"[Matrix] Media uploaded, content_uri: {content_uri}") - - # 2. Send Message - # Use v3 API - send_url = f"{homeserver}/_matrix/client/v3/rooms/{room_id}/send/m.room.message" - - # Determine msgtype with better fallback for audio - msgtype = "m.file" - ext = path.suffix.lower() - - # Explicit check for common audio extensions to force m.audio - # This prevents audio files being treated as generic files or video - AUDIO_EXTS = {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka', '.alac'} - VIDEO_EXTS = {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'} - IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'} - - if ext in AUDIO_EXTS: - msgtype = "m.audio" - elif ext in VIDEO_EXTS: - msgtype = "m.video" - elif ext in IMAGE_EXTS: - msgtype = "m.image" - elif mime_type: - if mime_type.startswith("audio/"): msgtype = "m.audio" - elif mime_type.startswith("video/"): msgtype = "m.video" - elif mime_type.startswith("image/"): msgtype = "m.image" - - debug(f"[Matrix] Determined msgtype: {msgtype} (ext: {ext}, mime: {mime_type})") - - info = { - "mimetype": mime_type, - "size": path.stat().st_size - } - - # Try to get duration for audio/video - if msgtype in ("m.audio", "m.video"): - try: - # Try mutagen first (lightweight) - # Use dynamic import to avoid top-level dependency if not installed - # Note: mutagen.File is available at package level at runtime but type checkers might miss it - import mutagen # type: ignore - m = mutagen.File(str(path)) # type: ignore - if m and m.info and hasattr(m.info, 'length'): - duration_ms = int(m.info.length * 1000) - info['duration'] = duration_ms - debug(f"[Matrix] Extracted duration: {duration_ms}ms") - except Exception as e: - debug(f"[Matrix] Failed to extract duration: {e}") - - payload = { - "msgtype": msgtype, - "body": filename, - "url": content_uri, - "info": info - } - - debug(f"[Matrix] Sending message payload: {json.dumps(payload, indent=2)}") - - resp = requests.post(send_url, headers=headers, json=payload) - if resp.status_code != 200: - raise Exception(f"Matrix send message failed: {resp.text}") - - event_id = resp.json().get('event_id') - return f"https://matrix.to/#/{room_id}/{event_id}" - - -# File provider registry -_FILE_PROVIDERS = { - "0x0": ZeroXZeroFileProvider, - "matrix": MatrixFileProvider, -} - - -def get_file_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[FileProvider]: - """ - Get a file hosting provider by name. - - Args: - name: Provider name (case-insensitive): "0x0" - config: Optional configuration dictionary - - Returns: - FileProvider instance or None if not found - """ - provider_class = _FILE_PROVIDERS.get(name.lower()) - - if provider_class is None: - log(f"Unknown file provider: {name}", file=sys.stderr) - return None - - try: - provider = provider_class(config) - if not provider.validate(): - log(f"File provider '{name}' is not properly configured or available", file=sys.stderr) - return None - return provider - - except Exception as e: - log(f"Error initializing file provider '{name}': {e}", file=sys.stderr) - return None - - -def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]: - """ - List all available file hosting providers and whether they're available. - - Args: - config: Optional configuration dictionary - - Returns: - Dictionary mapping provider names to availability (True/False) - """ - availability = {} - for name, provider_class in _FILE_PROVIDERS.items(): - try: - provider = provider_class(config) - availability[name] = provider.validate() - except Exception: - availability[name] = False - return availability - - -def register_file_provider(name: str, provider_class: type) -> None: - """ - Register a new file hosting provider. - - Args: - name: Provider name (lowercase) - provider_class: Class that inherits from FileProvider - """ - _FILE_PROVIDERS[name.lower()] = provider_class - - - - diff --git a/helper/store.py b/helper/store.py new file mode 100644 index 0000000..d2ff41c --- /dev/null +++ b/helper/store.py @@ -0,0 +1,2268 @@ +"""File storage abstraction layer for uploading files to different services. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any, Dict, Optional, Tuple, List +import sys +import shutil +import requests +import re + +from helper.logger import log, debug +from helper.utils_constant import mime_maps +from helper.utils import sha256_file +from helper.folder_store import FolderDB +from config import get_local_storage_path + + +HEX_DIGITS = set("0123456789abcdef") + + +def _normalize_hex_hash(value: Optional[str]) -> Optional[str]: + """Return a normalized 64-character lowercase hash or None.""" + if value is None: + return None + + try: + cleaned = ''.join(ch for ch in str(value).strip().lower() if ch in HEX_DIGITS) + except Exception: + return None + + if len(cleaned) == 64: + return cleaned + return None + + +def _resolve_file_hash(candidate: Optional[str], path: Path) -> Optional[str]: + """Return the given hash if valid, otherwise compute sha256 from disk.""" + normalized = _normalize_hex_hash(candidate) + if normalized is not None: + return normalized + + if not path.exists(): + return None + + try: + return sha256_file(path) + except Exception as exc: + debug(f"Failed to compute hash for {path}: {exc}") + return None + + +class store(ABC): + """""" + @abstractmethod + def add_file(self, file_path: Path, **kwargs: Any) -> str: + """""" + @abstractmethod + def name(self) -> str: + """""" + def search_file(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: + """""" + raise NotImplementedError(f"{self.name()} backend does not support searching") + + @abstractmethod + def get_file(self, file_hash: str, **kwargs: Any) -> Optional[Path]: + """Retrieve file by hash, returning path to the file. + + Args: + file_hash: SHA256 hash of the file (64-char hex string) + + Returns: + Path to the file or None if not found + """ + raise NotImplementedError(f"{self.name()} backend does not support get_file") + + @abstractmethod + def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]: + """Get metadata for a file by hash. + + Args: + file_hash: SHA256 hash of the file (64-char hex string) + + Returns: + Dict with metadata fields or None if not found + """ + raise NotImplementedError(f"{self.name()} backend does not support get_metadata") + + @abstractmethod + def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: + """""" + raise NotImplementedError(f"{self.name()} backend does not support get_tags") + @abstractmethod + def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: + """""" + raise NotImplementedError(f"{self.name()} backend does not support add_tag") + @abstractmethod + def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: + """""" + raise NotImplementedError(f"{self.name()} backend does not support delete_tag") + @abstractmethod + def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: + """""" + raise NotImplementedError(f"{self.name()} backend does not support get_url") + @abstractmethod + def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: + """""" + raise NotImplementedError(f"{self.name()} backend does not support add_url") + @abstractmethod + def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: + """""" + raise NotImplementedError(f"{self.name()} backend does not support delete_url") +class Folder(store): + """""" + # Track which locations have already been migrated to avoid repeated migrations + _migrated_locations = set() + + def __init__(self, location: Optional[str] = None, name: Optional[str] = None) -> None: + self._location = location + self._name = name + + if self._location: + try: + from helper.folder_store import FolderDB + from pathlib import Path + location_path = Path(self._location).expanduser() + + # Use context manager to ensure connection is properly closed + with FolderDB(location_path) as db: + if db.connection: + db.connection.commit() + + # Call migration and discovery at startup + Folder.migrate_location(self._location) + except Exception as exc: + debug(f"Failed to initialize database for '{name}': {exc}") + + @classmethod + def migrate_location(cls, location: Optional[str]) -> None: + """Migrate a location to hash-based storage (one-time operation, call explicitly at startup).""" + if not location: + return + + from pathlib import Path + location_path = Path(location).expanduser() + location_str = str(location_path) + + # Only migrate once per location + if location_str in cls._migrated_locations: + return + + cls._migrated_locations.add(location_str) + + # Create a temporary instance just to call the migration + temp_instance = cls(location=location) + temp_instance._migrate_to_hash_storage(location_path) + + def _migrate_to_hash_storage(self, location_path: Path) -> None: + """Migrate existing files from filename-based to hash-based storage. + + Checks for sidecars (.metadata, .tag) and imports them before renaming. + Also ensures all files have a title: tag. + """ + from helper.folder_store import read_sidecar, write_sidecar, find_sidecar + + try: + with FolderDB(location_path) as db: + cursor = db.connection.cursor() + + # First pass: migrate filename-based files and add title tags + # Scan all files in the storage directory + for file_path in sorted(location_path.iterdir()): + if not file_path.is_file(): + continue + + # Skip database files and sidecars + if file_path.suffix in ('.db', '.metadata', '.tag', '-shm', '-wal'): + continue + # Also skip if the file ends with -shm or -wal (SQLite journal files) + if file_path.name.endswith(('-shm', '-wal')): + continue + + # Check if filename is already a hash (without extension) + if len(file_path.stem) == 64 and all(c in '0123456789abcdef' for c in file_path.stem.lower()): + continue # Already migrated, will process in second pass + + try: + # Compute file hash + file_hash = sha256_file(file_path) + # Preserve extension in the hash-based filename + file_ext = file_path.suffix # e.g., '.mp4' + hash_filename = file_hash + file_ext if file_ext else file_hash + hash_path = location_path / hash_filename + + # Check for sidecars and import them + sidecar_path = find_sidecar(file_path) + tags_to_add = [] + url_to_add = [] + has_title_tag = False + + if sidecar_path and sidecar_path.exists(): + try: + _, tags, url = read_sidecar(sidecar_path) + if tags: + tags_to_add = list(tags) + # Check if title tag exists + has_title_tag = any(t.lower().startswith('title:') for t in tags_to_add) + if url: + url_to_add = list(url) + debug(f"Found sidecar for {file_path.name}: {len(tags_to_add)} tags, {len(url_to_add)} url", file=sys.stderr) + # Delete the sidecar after importing + sidecar_path.unlink() + except Exception as exc: + debug(f"Failed to read sidecar for {file_path.name}: {exc}", file=sys.stderr) + + # Ensure there's a title tag (use original filename if not present) + if not has_title_tag: + tags_to_add.append(f"title:{file_path.name}") + + # Rename file to hash if needed + if hash_path != file_path and not hash_path.exists(): + debug(f"Migrating: {file_path.name} -> {hash_filename}", file=sys.stderr) + file_path.rename(hash_path) + + # Create or update database entry using FolderDB methods + db.get_or_create_file_entry(hash_path) + + # Save extension metadata + ext_clean = file_ext.lstrip('.') if file_ext else '' + db.save_metadata(hash_path, { + 'hash': file_hash, + 'ext': ext_clean, + 'size': hash_path.stat().st_size + }) + + # Add all tags (including title tag) + if tags_to_add: + db.save_tags(hash_path, tags_to_add) + debug(f"Added {len(tags_to_add)} tags to {file_hash}", file=sys.stderr) + + # Note: url would need a separate table if you want to store them + # For now, we're just noting them in debug + if url_to_add: + debug(f"Imported {len(url_to_add)} url for {file_hash}: {url_to_add}", file=sys.stderr) + + except Exception as exc: + debug(f"Failed to migrate file {file_path.name}: {exc}", file=sys.stderr) + + # Second pass: ensure all files in database have a title: tag + db.connection.commit() + cursor.execute(''' + SELECT f.hash, f.file_path + FROM files f + WHERE NOT EXISTS ( + SELECT 1 FROM tags t WHERE t.hash = f.hash AND LOWER(t.tag) LIKE 'title:%' + ) + ''') + files_without_title = cursor.fetchall() + + for file_hash, file_path_str in files_without_title: + try: + file_path = Path(file_path_str) + if file_path.exists(): + # Use the filename as the title + title_tag = f"title:{file_path.name}" + db.save_tags(file_path, [title_tag]) + debug(f"Added title tag to {file_path.name}", file=sys.stderr) + except Exception as exc: + debug(f"Failed to add title tag to file {file_path_str}: {exc}", file=sys.stderr) + + db.connection.commit() + + # Third pass: discover files on disk that aren't in the database yet + # These are hash-named files that were added after initial indexing + cursor.execute('SELECT LOWER(hash) FROM files') + db_hashes = {row[0] for row in cursor.fetchall()} + + discovered = 0 + for file_path in sorted(location_path.rglob("*")): + if file_path.is_file(): + # Check if file name (without extension) is a 64-char hex hash + name_without_ext = file_path.stem + if len(name_without_ext) == 64 and all(c in '0123456789abcdef' for c in name_without_ext.lower()): + file_hash = name_without_ext.lower() + + # Skip if already in DB + if file_hash in db_hashes: + continue + + try: + # Add file to DB (creates entry and auto-adds title: tag) + db.get_or_create_file_entry(file_path) + + # Save extension metadata + file_ext = file_path.suffix + ext_clean = file_ext.lstrip('.') if file_ext else '' + db.save_metadata(file_path, { + 'hash': file_hash, + 'ext': ext_clean, + 'size': file_path.stat().st_size + }) + + discovered += 1 + except Exception as e: + debug(f"Failed to discover file {file_path.name}: {e}", file=sys.stderr) + + if discovered > 0: + debug(f"Discovered and indexed {discovered} undiscovered files in {location_path.name}", file=sys.stderr) + db.connection.commit() + except Exception as exc: + debug(f"Migration to hash storage failed: {exc}", file=sys.stderr) + + + def location(self) -> str: + return self._location + + def name(self) -> str: + return self._name + + def add_file(self, file_path: Path, **kwargs: Any) -> str: + """Add file to local folder storage with full metadata support. + + Args: + file_path: Path to the file to add + move: If True, move file instead of copy (default: False) + tags: Optional list of tags to add + url: Optional list of url to associate with the file + title: Optional title (will be added as 'title:value' tag) + + Returns: + File hash (SHA256 hex string) as identifier + """ + move_file = bool(kwargs.get("move")) + tags = kwargs.get("tags", []) + url = kwargs.get("url", []) + title = kwargs.get("title") + + # Extract title from tags if not explicitly provided + if not title: + for tag in tags: + if isinstance(tag, str) and tag.lower().startswith("title:"): + title = tag.split(":", 1)[1].strip() + break + + # Fallback to filename if no title + if not title: + title = file_path.name + + # Ensure title is in tags + title_tag = f"title:{title}" + if not any(str(tag).lower().startswith("title:") for tag in tags): + tags = [title_tag] + list(tags) + + try: + file_hash = sha256_file(file_path) + debug(f"File hash: {file_hash}", file=sys.stderr) + + # Preserve extension in the stored filename + file_ext = file_path.suffix # e.g., '.mp4' + save_filename = file_hash + file_ext if file_ext else file_hash + save_file = Path(self._location) / save_filename + + # Check if file already exists + with FolderDB(Path(self._location)) as db: + existing_path = db.search_hash(file_hash) + if existing_path and existing_path.exists(): + log( + f"✓ File already in local storage: {existing_path}", + file=sys.stderr, + ) + # Still add tags and url if provided + if tags: + self.add_tag(file_hash, tags) + if url: + self.add_url(file_hash, url) + return file_hash + + # Move or copy file + if move_file: + shutil.move(str(file_path), str(save_file)) + debug(f"Local move: {save_file}", file=sys.stderr) + else: + shutil.copy2(str(file_path), str(save_file)) + debug(f"Local copy: {save_file}", file=sys.stderr) + + # Save to database + with FolderDB(Path(self._location)) as db: + db.get_or_create_file_entry(save_file) + # Save metadata including extension + ext_clean = file_ext.lstrip('.') if file_ext else '' + db.save_metadata(save_file, { + 'hash': file_hash, + 'ext': ext_clean, + 'size': file_path.stat().st_size + }) + + # Add tags if provided + if tags: + self.add_tag(file_hash, tags) + + # Add url if provided + if url: + self.add_url(file_hash, url) + + log(f"✓ Added to local storage: {save_file.name}", file=sys.stderr) + return file_hash + + except Exception as exc: + log(f"❌ Local storage failed: {exc}", file=sys.stderr) + raise + + def search_file(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: + """Search local database for files by title tag or filename.""" + from fnmatch import fnmatch + from helper.folder_store import DatabaseAPI + + limit = kwargs.get("limit") + try: + limit = int(limit) if limit is not None else None + except (TypeError, ValueError): + limit = None + if isinstance(limit, int) and limit <= 0: + limit = None + + query = query.lower() + query_lower = query # Ensure query_lower is defined for all code paths + match_all = query == "*" + results = [] + search_dir = Path(self._location).expanduser() + + tokens = [t.strip() for t in query.split(',') if t.strip()] + + if not match_all and len(tokens) == 1 and _normalize_hex_hash(query): + debug("Hash queries require 'hash:' prefix for local search") + return results + + if not match_all and _normalize_hex_hash(query): + debug("Hash queries require 'hash:' prefix for local search") + return results + + def _create_entry(file_path: Path, tags: list[str], size_bytes: int | None, db_hash: Optional[str]) -> dict[str, Any]: + path_str = str(file_path) + # Get title from tags if available, otherwise use hash as fallback + title = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None) + if not title: + # Fallback to hash if no title tag exists + hash_value = _resolve_file_hash(db_hash, file_path) + title = hash_value if hash_value else file_path.stem + + # Extract extension from file path + ext = file_path.suffix.lstrip('.') + if not ext: + # Fallback: try to extract from title (original filename might be in title) + title_path = Path(title) + ext = title_path.suffix.lstrip('.') + + # Build clean entry with only necessary fields + hash_value = _resolve_file_hash(db_hash, file_path) + entry = { + "title": title, + "ext": ext, + "path": path_str, + "target": path_str, + "store": self._name, + "size": size_bytes, + "hash": hash_value, + "tag": tags, + } + return entry + + try: + if not search_dir.exists(): + debug(f"Search directory does not exist: {search_dir}") + return results + + try: + with DatabaseAPI(search_dir) as api: + if tokens and len(tokens) > 1: + def _like_pattern(term: str) -> str: + return term.replace('*', '%').replace('?', '_') + + def _ids_for_token(token: str) -> set[int]: + token = token.strip() + if not token: + return set() + + if ':' in token and not token.startswith(':'): + namespace, pattern = token.split(':', 1) + namespace = namespace.strip().lower() + pattern = pattern.strip().lower() + + if namespace == 'hash': + normalized_hash = _normalize_hex_hash(pattern) + if not normalized_hash: + return set() + h = api.get_file_hash_by_hash(normalized_hash) + return {h} if h else set() + + if namespace == 'store': + if pattern not in {'local', 'file', 'filesystem'}: + return set() + return api.get_all_file_hashes() + + query_pattern = f"{namespace}:%" + tag_rows = api.get_file_hashes_by_tag_pattern(query_pattern) + matched: set[str] = set() + for file_hash, tag_val in tag_rows: + if not tag_val: + continue + tag_lower = str(tag_val).lower() + if not tag_lower.startswith(f"{namespace}:"): + continue + value = tag_lower[len(namespace)+1:] + if fnmatch(value, pattern): + matched.add(file_hash) + return matched + + term = token.lower() + like_pattern = f"%{_like_pattern(term)}%" + hashes = api.get_file_hashes_by_path_pattern(like_pattern) + hashes.update(api.get_file_hashes_by_tag_substring(like_pattern)) + return hashes + + try: + matching_hashes: set[str] | None = None + for token in tokens: + hashes = _ids_for_token(token) + matching_hashes = hashes if matching_hashes is None else matching_hashes & hashes + if not matching_hashes: + return results + + if not matching_hashes: + return results + + rows = api.get_file_metadata(matching_hashes, limit) + for file_hash, file_path_str, size_bytes, ext in rows: + if not file_path_str: + continue + file_path = Path(file_path_str) + if not file_path.exists(): + continue + if size_bytes is None: + try: + size_bytes = file_path.stat().st_size + except OSError: + size_bytes = None + tags = api.get_tags_for_file(file_hash) + entry = _create_entry(file_path, tags, size_bytes, file_hash) + results.append(entry) + if limit is not None and len(results) >= limit: + return results + return results + except Exception as exc: + log(f"⚠️ AND search failed: {exc}", file=sys.stderr) + debug(f"AND search exception details: {exc}") + return [] + + if ":" in query and not query.startswith(":"): + namespace, pattern = query.split(":", 1) + namespace = namespace.strip().lower() + pattern = pattern.strip().lower() + debug(f"Performing namespace search: {namespace}:{pattern}") + + if namespace == "hash": + normalized_hash = _normalize_hex_hash(pattern) + if not normalized_hash: + return results + h = api.get_file_hash_by_hash(normalized_hash) + hashes = {h} if h else set() + rows = api.get_file_metadata(hashes, limit) + for file_hash, file_path_str, size_bytes, ext in rows: + if not file_path_str: + continue + file_path = Path(file_path_str) + if not file_path.exists(): + continue + if size_bytes is None: + try: + size_bytes = file_path.stat().st_size + except OSError: + size_bytes = None + tags = api.get_tags_for_file(file_hash) + entry = _create_entry(file_path, tags, size_bytes, file_hash) + results.append(entry) + if limit is not None and len(results) >= limit: + return results + return results + + query_pattern = f"{namespace}:%" + rows = api.get_files_by_namespace_pattern(query_pattern, limit) + debug(f"Found {len(rows)} potential matches in DB") + + for file_hash, file_path_str, size_bytes, ext in rows: + if not file_path_str: + continue + + tags = api.get_tags_by_namespace_and_file(file_hash, query_pattern) + + for tag in tags: + tag_lower = tag.lower() + if tag_lower.startswith(f"{namespace}:"): + value = tag_lower[len(namespace)+1:] + if fnmatch(value, pattern): + file_path = Path(file_path_str) + if file_path.exists(): + if size_bytes is None: + size_bytes = file_path.stat().st_size + all_tags = api.get_tags_for_file(file_hash) + entry = _create_entry(file_path, all_tags, size_bytes, file_hash) + results.append(entry) + else: + debug(f"File missing on disk: {file_path}") + break + + if limit is not None and len(results) >= limit: + return results + elif not match_all: + terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()] + if not terms: + terms = [query_lower] + + debug(f"Performing filename/tag search for terms: {terms}") + + fetch_limit = (limit or 45) * 50 + + conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms] + params = [f"%{t}%" for t in terms] + + rows = api.get_files_by_multiple_path_conditions(conditions, params, fetch_limit) + debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)") + + word_regex = None + if len(terms) == 1: + term = terms[0] + has_wildcard = '*' in term or '?' in term + + if has_wildcard: + try: + from fnmatch import translate + word_regex = re.compile(translate(term), re.IGNORECASE) + except Exception: + word_regex = None + else: + try: + pattern = r'(?= limit: + return results + + if terms: + title_hits: dict[str, dict[str, Any]] = {} + for term in terms: + title_pattern = f"title:%{term}%" + title_rows = api.get_files_by_title_tag_pattern(title_pattern, fetch_limit) + for file_hash, file_path_str, size_bytes, ext in title_rows: + if not file_path_str: + continue + entry = title_hits.get(file_hash) + if entry: + entry["count"] += 1 + if size_bytes is not None: + entry["size"] = size_bytes + else: + title_hits[file_hash] = { + "path": file_path_str, + "size": size_bytes, + "hash": file_hash, + "count": 1, + } + + if title_hits: + required = len(terms) + for file_hash, info in title_hits.items(): + if info.get("count") != required: + continue + file_path_str = info.get("path") + if not file_path_str or file_path_str in seen_files: + continue + file_path = Path(file_path_str) + if not file_path.exists(): + continue + seen_files.add(file_path_str) + + size_bytes = info.get("size") + if size_bytes is None: + try: + size_bytes = file_path.stat().st_size + except OSError: + size_bytes = None + + tags = api.get_tags_for_file(file_hash) + entry = _create_entry(file_path, tags, size_bytes, info.get("hash")) + results.append(entry) + if limit is not None and len(results) >= limit: + return results + + query_pattern = f"%{query_lower}%" + tag_rows = api.get_files_by_simple_tag_pattern(query_pattern, limit) + + for file_hash, file_path_str, size_bytes, ext in tag_rows: + if not file_path_str or file_path_str in seen_files: + continue + seen_files.add(file_path_str) + + file_path = Path(file_path_str) + if file_path.exists(): + if size_bytes is None: + size_bytes = file_path.stat().st_size + + tags = api.get_tags_for_file(file_hash) + entry = _create_entry(file_path, tags, size_bytes, file_hash) + results.append(entry) + + if limit is not None and len(results) >= limit: + return results + + else: + rows = api.get_all_files(limit) + for file_hash, file_path_str, size_bytes, ext in rows: + if file_path_str: + file_path = Path(file_path_str) + if file_path.exists(): + if size_bytes is None: + size_bytes = file_path.stat().st_size + + tags = api.get_tags_for_file(file_hash) + entry = _create_entry(file_path, tags, size_bytes, file_hash) + results.append(entry) + + if results: + debug(f"Returning {len(results)} results from DB") + else: + debug("No results found in DB") + return results + + except Exception as e: + log(f"⚠️ Database search failed: {e}", file=sys.stderr) + debug(f"DB search exception details: {e}") + return [] + + except Exception as exc: + log(f"❌ Local search failed: {exc}", file=sys.stderr) + raise + + def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: + """Alias for search_file to match the interface expected by FileStorage.""" + return self.search_file(query, **kwargs) + + def _resolve_library_root(self, file_path: Path, config: Dict[str, Any]) -> Optional[Path]: + """Return the library root containing medios-macina.db. + + Prefer the store's configured location, then config override, then walk parents + of the file path to find a directory with medios-macina.db.""" + candidates: list[Path] = [] + if self._location: + candidates.append(Path(self._location).expanduser()) + cfg_root = get_local_storage_path(config) if config else None + if cfg_root: + candidates.append(Path(cfg_root).expanduser()) + + for root in candidates: + db_path = root / "medios-macina.db" + if db_path.exists(): + return root + + try: + for parent in [file_path] + list(file_path.parents): + db_path = parent / "medios-macina.db" + if db_path.exists(): + return parent + except Exception: + pass + return None + + def get_file(self, file_hash: str, **kwargs: Any) -> Optional[Path]: + """Retrieve file by hash, returning path to the file. + + Args: + file_hash: SHA256 hash of the file (64-char hex string) + + Returns: + Path to the file or None if not found + """ + try: + # Normalize the hash + normalized_hash = _normalize_hex_hash(file_hash) + if not normalized_hash: + return None + + search_dir = Path(self._location).expanduser() + from helper.folder_store import FolderDB + + with FolderDB(search_dir) as db: + # Search for file by hash + file_path = db.search_hash(normalized_hash) + + if file_path and file_path.exists(): + return file_path + + return None + + except Exception as exc: + debug(f"Failed to get file for hash {file_hash}: {exc}") + return None + + def pipe(self, file_hash: str, config: Optional[Dict[str, Any]] = None) -> Optional[str]: + """Get a playable path for a file in this folder store. + + For folder stores, this resolves the hash to the actual file path on disk. + + Args: + file_hash: SHA256 hash of the file + config: Optional config dict (unused for folder stores) + + Returns: + Absolute file path as string, or None if file not found + """ + file_path = self.get_file(file_hash) + if file_path: + return str(file_path.absolute()) + return None + + def get_metadata(self, file_hash: str) -> Optional[Dict[str, Any]]: + """Get metadata for a file from the database by hash. + + Args: + file_hash: SHA256 hash of the file (64-char hex string) + + Returns: + Dict with metadata fields (ext, size, hash, duration, etc.) or None if not found + """ + try: + # Normalize the hash + normalized_hash = _normalize_hex_hash(file_hash) + if not normalized_hash: + return None + + search_dir = Path(self._location).expanduser() + from helper.folder_store import DatabaseAPI + + with DatabaseAPI(search_dir) as api: + # Get file hash + file_hash_result = api.get_file_hash_by_hash(normalized_hash) + if not file_hash_result: + return None + + # Query metadata directly from database + cursor = api.get_cursor() + cursor.execute(""" + SELECT * FROM metadata WHERE hash = ? + """, (file_hash_result,)) + + row = cursor.fetchone() + if not row: + return None + + metadata = dict(row) + + # Parse JSON fields + for field in ['url', 'relationships']: + if metadata.get(field): + try: + import json + metadata[field] = json.loads(metadata[field]) + except (json.JSONDecodeError, TypeError): + metadata[field] = [] if field == 'url' else [] + + return metadata + except Exception as exc: + debug(f"Failed to get metadata for hash {file_hash}: {exc}") + return None + + def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: + """Get tags for a local file by hash. + + Returns: + Tuple of (tags_list, store_name) where store_name is the actual store name + """ + from helper.folder_store import FolderDB + try: + file_hash = file_identifier + if self._location: + try: + with FolderDB(Path(self._location)) as db: + db_tags = db.get_tags(file_hash) + if db_tags: + # Return actual store name instead of generic "local_db" + store_name = self._name if self._name else "local" + return list(db_tags), store_name + except Exception as exc: + debug(f"Local DB lookup failed: {exc}") + return [], "unknown" + except Exception as exc: + debug(f"get_tags failed for local file: {exc}") + return [], "unknown" + + def add_tag(self, hash: str, tag: List[str], **kwargs: Any) -> bool: + """Add tags to a local file by hash (via FolderDB). + + Handles namespace collapsing: when adding namespace:value, removes existing namespace:* tags. + Returns True if tags were successfully added. + """ + from helper.folder_store import FolderDB + try: + if not self._location: + return False + + try: + with FolderDB(Path(self._location)) as db: + # Get existing tags + existing_tags = list(db.get_tags(hash) or []) + original_tags_lower = {t.lower() for t in existing_tags} + + # Merge new tags, handling namespace overwrites + for new_tag in tag: + if ':' in new_tag: + namespace = new_tag.split(':', 1)[0] + # Remove existing tags in same namespace + existing_tags = [t for t in existing_tags if not t.startswith(namespace + ':')] + # Add new tag if not already present (case-insensitive check) + if new_tag.lower() not in original_tags_lower: + existing_tags.append(new_tag) + + # Save merged tags + db.add_tags_to_hash(hash, existing_tags) + return True + except Exception as exc: + debug(f"Local DB add_tags failed: {exc}") + return False + except Exception as exc: + debug(f"add_tag failed for local file: {exc}") + return False + + def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: + """Remove tags from a local file by hash.""" + from helper.folder_store import FolderDB + try: + file_hash = file_identifier + if self._location: + try: + with FolderDB(Path(self._location)) as db: + db.remove_tags_from_hash(file_hash, list(tags)) + return True + except Exception as exc: + debug(f"Local DB remove_tags failed: {exc}") + return False + except Exception as exc: + debug(f"delete_tag failed for local file: {exc}") + return False + + def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: + """Get known url for a local file by hash.""" + from helper.folder_store import FolderDB + try: + file_hash = file_identifier + if self._location: + try: + with FolderDB(Path(self._location)) as db: + meta = db.get_metadata(file_hash) or {} + return list(meta.get("url") or []) + except Exception as exc: + debug(f"Local DB get_metadata failed: {exc}") + return [] + except Exception as exc: + debug(f"get_url failed for local file: {exc}") + return [] + + def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: + """Add known url to a local file by hash.""" + from helper.folder_store import FolderDB + try: + file_hash = file_identifier + if self._location: + try: + with FolderDB(Path(self._location)) as db: + meta = db.get_metadata(file_hash) or {} + url = list(meta.get("url") or []) + changed = False + for u in url: + if u not in url: + url.append(u) + changed = True + if changed: + db.update_metadata_by_hash(file_hash, {"url": url}) + return True + except Exception as exc: + debug(f"Local DB add_url failed: {exc}") + return False + except Exception as exc: + debug(f"add_url failed for local file: {exc}") + return False + + def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: + """Delete known url from a local file by hash.""" + from helper.folder_store import FolderDB + try: + file_hash = file_identifier + if self._location: + try: + with FolderDB(Path(self._location)) as db: + meta = db.get_metadata(file_hash) or {} + url = list(meta.get("url") or []) + changed = False + for u in url: + if u in url: + url.remove(u) + changed = True + if changed: + db.update_metadata_by_hash(file_hash, {"url": url}) + return True + except Exception as exc: + debug(f"Local DB delete_url failed: {exc}") + return False + except Exception as exc: + debug(f"delete_url failed for local file: {exc}") + return False + + def delete_file(self, file_identifier: str, **kwargs: Any) -> bool: + """Delete a file from the folder store. + + Args: + file_identifier: The file path (as string) or hash of the file to delete + **kwargs: Optional parameters + + Returns: + True if deletion succeeded, False otherwise + """ + from helper.folder_store import FolderDB + try: + file_path = Path(file_identifier) + + # Delete from database + with FolderDB(Path(self._location)) as db: + db.delete_file(file_path) + + # Delete the actual file from disk + if file_path.exists(): + file_path.unlink() + debug(f"Deleted file: {file_path}") + return True + else: + debug(f"File not found on disk: {file_path}") + return True # Already gone + except Exception as exc: + debug(f"delete_file failed: {exc}") + return False + + +class HydrusNetwork(store): + """File storage backend for Hydrus client. + + Each instance represents a specific Hydrus client connection. + Maintains its own HydrusClient with session key. + """ + + def __init__(self, instance_name: str, api_key: str, url: str) -> None: + """Initialize Hydrus storage backend. + + Args: + instance_name: Name of this Hydrus instance (e.g., 'home', 'work') + api_key: Hydrus Client API access key + url: Hydrus client URL (e.g., 'http://192.168.1.230:45869') + """ + from helper import hydrus as hydrus_wrapper + + self._instance_name = instance_name + self._api_key = api_key + self._url = url + # Create persistent client with session key for this instance + self._client = hydrus_wrapper.HydrusClient(url=url, access_key=api_key) + + def name(self) -> str: + return self._instance_name + + def get_name(self) -> str: + return self._instance_name + + def add_file(self, file_path: Path, **kwargs: Any) -> str: + """Upload file to Hydrus with full metadata support. + + Args: + file_path: Path to the file to upload + tags: Optional list of tags to add + url: Optional list of url to associate with the file + title: Optional title (will be added as 'title:value' tag) + + Returns: + File hash from Hydrus + + Raises: + Exception: If upload fails + """ + from helper import hydrus as hydrus_wrapper + from helper.utils import sha256_file + + tags = kwargs.get("tags", []) + url = kwargs.get("url", []) + title = kwargs.get("title") + + # Add title to tags if provided and not already present + if title: + title_tag = f"title:{title}" + if not any(str(tag).lower().startswith("title:") for tag in tags): + tags = [title_tag] + list(tags) + + try: + # Compute file hash + file_hash = sha256_file(file_path) + debug(f"File hash: {file_hash}") + + # Use persistent client with session key + client = self._client + if client is None: + raise Exception("Hydrus client unavailable") + + # Check if file already exists in Hydrus + file_exists = False + try: + metadata = client.fetch_file_metadata(hashes=[file_hash]) + if metadata and isinstance(metadata, dict): + files = metadata.get("file_metadata", []) + if files: + file_exists = True + log( + f"ℹ️ Duplicate detected - file already in Hydrus with hash: {file_hash}", + file=sys.stderr, + ) + except Exception: + pass + + # Upload file if not already present + if not file_exists: + log(f"Uploading to Hydrus: {file_path.name}", file=sys.stderr) + response = client.add_file(file_path) + + # Extract hash from response + hydrus_hash: Optional[str] = None + if isinstance(response, dict): + hydrus_hash = response.get("hash") or response.get("file_hash") + if not hydrus_hash: + hashes = response.get("hashes") + if isinstance(hashes, list) and hashes: + hydrus_hash = hashes[0] + + if not hydrus_hash: + raise Exception(f"Hydrus response missing file hash: {response}") + + file_hash = hydrus_hash + log(f"Hydrus: {file_hash}", file=sys.stderr) + + # Add tags if provided (both for new and existing files) + if tags: + try: + # Use default tag service + service_name = "my tags" + except Exception: + service_name = "my tags" + + try: + debug(f"Adding {len(tags)} tag(s) to Hydrus: {tags}") + client.add_tags(file_hash, tags, service_name) + log(f"Tags added via '{service_name}'", file=sys.stderr) + except Exception as exc: + log(f"⚠️ Failed to add tags: {exc}", file=sys.stderr) + + # Associate url if provided (both for new and existing files) + if url: + log(f"Associating {len(url)} URL(s) with file", file=sys.stderr) + for url in url: + if url: + try: + client.associate_url(file_hash, str(url)) + debug(f"Associated URL: {url}") + except Exception as exc: + log(f"⚠️ Failed to associate URL {url}: {exc}", file=sys.stderr) + + return file_hash + + except Exception as exc: + log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr) + raise + + def search_file(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: + """Search Hydrus database for files matching query. + + Args: + query: Search query (tags, filenames, hashes, etc.) + limit: Maximum number of results to return (default: 100) + + Returns: + List of dicts with 'name', 'hash', 'size', 'tags' fields + + Example: + results = storage["hydrus"].search("artist:john_doe music") + results = storage["hydrus"].search("Simple Man") + """ + limit = kwargs.get("limit", 100) + + try: + client = self._client + if client is None: + raise Exception("Hydrus client unavailable") + + debug(f"Searching Hydrus for: {query}") + + # Parse the query into tags + # Handle both simple tags and complex queries + # "*" means "match all" - use system:everything tag in Hydrus + if query.strip() == "*": + # Use system:everything to match all files in Hydrus + tags = ["system:everything"] + else: + query_lower = query.lower().strip() + # If query doesn't have a namespace (no ':'), search all files and filter by title/tags + # If query has explicit namespace, use it as a tag search + if ':' not in query_lower: + # No namespace provided: search all files, then filter by title/tags containing the query + tags = ["system:everything"] + else: + # User provided explicit namespace (e.g., "creator:john" or "system:has_audio") + # Use it as a tag search + tags = [query_lower] + + if not tags: + debug(f"Found 0 result(s)") + return [] + + # Search files with the tags + search_result = client.search_files( + tags=tags, + return_hashes=True, + return_file_ids=True + ) + + # Extract file IDs from search result + file_ids = search_result.get("file_ids", []) + hashes = search_result.get("hashes", []) + + if not file_ids and not hashes: + debug(f"Found 0 result(s)") + return [] + + # Fetch metadata for the found files + results = [] + query_lower = query.lower().strip() + # Split by comma or space for AND logic + search_terms = set(query_lower.replace(',', ' ').split()) # For substring matching + + if file_ids: + metadata = client.fetch_file_metadata(file_ids=file_ids) + metadata_list = metadata.get("metadata", []) + + for meta in metadata_list: + if len(results) >= limit: + break + + file_id = meta.get("file_id") + hash_hex = meta.get("hash") + size = meta.get("size", 0) + + # Get tags for this file and extract title + tags_set = meta.get("tags", {}) + all_tags = [] + title = f"Hydrus File {file_id}" # Default fallback + all_tags_str = "" # For substring matching + + # debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}") + + if isinstance(tags_set, dict): + # Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen + def _collect(tag_list: Any) -> None: + nonlocal title, all_tags_str + if not isinstance(tag_list, list): + return + for tag in tag_list: + tag_text = str(tag) if tag else "" + if not tag_text: + continue + all_tags.append(tag_text) + all_tags_str += " " + tag_text.lower() + if tag_text.lower().startswith("title:") and title == f"Hydrus File {file_id}": + title = tag_text.split(":", 1)[1].strip() + + for service_name, service_tags in tags_set.items(): + if not isinstance(service_tags, dict): + continue + + storage_tags = service_tags.get("storage_tags", {}) + if isinstance(storage_tags, dict): + for tag_list in storage_tags.values(): + _collect(tag_list) + + display_tags = service_tags.get("display_tags", []) + _collect(display_tags) + + # Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it) + top_level_tags = meta.get("tags_flat", []) or meta.get("tags", []) + _collect(top_level_tags) + + # Resolve extension from MIME type + mime_type = meta.get("mime") + ext = "" + if mime_type: + for category in mime_maps.values(): + for ext_key, info in category.items(): + if mime_type in info.get("mimes", []): + ext = info.get("ext", "").lstrip('.') + break + if ext: + break + + # Filter results based on query type + # If user provided explicit namespace (has ':'), don't do substring filtering + # Just include what the tag search returned + has_namespace = ':' in query_lower + + if has_namespace: + # Explicit namespace search - already filtered by Hydrus tag search + # Include this result as-is + results.append({ + "hash": hash_hex, + "hash_hex": hash_hex, + "target": hash_hex, + "name": title, + "title": title, + "size": size, + "size_bytes": size, + "origin": self._instance_name, + "tags": all_tags, + "file_id": file_id, + "mime": mime_type, + "ext": ext, + }) + else: + # Free-form search: check if search terms match the title or tags + # Match if ALL search terms are found in title or tags (AND logic) + # AND use whole word matching + + # Combine title and tags for searching + searchable_text = (title + " " + all_tags_str).lower() + + match = True + if query_lower != "*": + for term in search_terms: + # Regex for whole word: \bterm\b + # Escape term to handle special chars + pattern = r'\b' + re.escape(term) + r'\b' + if not re.search(pattern, searchable_text): + match = False + break + + if match: + results.append({ + "hash": hash_hex, + "hash_hex": hash_hex, + "target": hash_hex, + "name": title, + "title": title, + "size": size, + "size_bytes": size, + "origin": self._instance_name, + "tags": all_tags, + "file_id": file_id, + "mime": mime_type, + "ext": ext, + }) + + debug(f"Found {len(results)} result(s)") + return results[:limit] + + except Exception as exc: + log(f"❌ Hydrus search failed: {exc}", file=sys.stderr) + import traceback + traceback.print_exc(file=sys.stderr) + raise + + def get_file(self, file_hash: str, **kwargs: Any) -> Optional[Path]: + """Open file in browser via Hydrus client API URL.""" + import tempfile + import webbrowser + + debug(f"[HydrusNetwork.get_file] Starting for hash: {file_hash[:12]}...") + + # Build browser URL with access key + base_url = self._client.url.rstrip('/') + access_key = self._client.access_key + browser_url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}" + debug(f"[HydrusNetwork.get_file] Opening URL: {browser_url}") + + # Open in default browser + webbrowser.open(browser_url) + debug(f"[HydrusNetwork.get_file] Browser opened successfully") + + # Return the URL string instead of downloading + debug(f"[HydrusNetwork.get_file] Returning URL: {browser_url}") + return browser_url + + def pipe(self, file_hash: str, config: Optional[Dict[str, Any]] = None) -> Optional[str]: + """Get a playable path for a file in this Hydrus instance. + + For Hydrus stores, this builds a file URL with authentication. + + Args: + file_hash: SHA256 hash of the file + config: Optional config dict (unused, URL and key are from instance) + + Returns: + Hydrus API file URL with embedded access key, or None if client unavailable + """ + try: + if not self._client: + return None + + base_url = self._client.url.rstrip('/') + access_key = self._client.access_key + + # Build Hydrus file URL with access key + url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}" + return url + except Exception as e: + debug(f"Error building Hydrus URL for {file_hash}: {e}") + return None + + def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]: + """Get metadata for a file from Hydrus by hash. + + Args: + file_hash: SHA256 hash of the file (64-char hex string) + + Returns: + Dict with metadata fields or None if not found + """ + try: + client = self._client + if not client: + debug("get_metadata: Hydrus client unavailable") + return None + + # Fetch file metadata + payload = client.fetch_file_metadata(hashes=[file_hash], include_service_keys_to_tags=True) + + if not payload or not payload.get("metadata"): + return None + + meta = payload["metadata"][0] + + # Extract title from tags + title = f"Hydrus_{file_hash[:12]}" + tags_payload = meta.get("tags", {}) + if isinstance(tags_payload, dict): + for service_data in tags_payload.values(): + if isinstance(service_data, dict): + display_tags = service_data.get("display_tags", {}) + if isinstance(display_tags, dict): + current_tags = display_tags.get("0", []) + if isinstance(current_tags, list): + for tag in current_tags: + if str(tag).lower().startswith("title:"): + title = tag.split(":", 1)[1].strip() + break + if title != f"Hydrus_{file_hash[:12]}": + break + + # Determine extension from mime type + mime_type = meta.get("mime", "") + ext = "" + if mime_type: + from helper.utils_constant import mime_maps + for category, extensions in mime_maps.items(): + for extension, mime in extensions.items(): + if mime == mime_type: + ext = extension.lstrip(".") + break + if ext: + break + + return { + "hash": file_hash, + "title": title, + "ext": ext, + "size": meta.get("size", 0), + "mime": mime_type, + } + + except Exception as exc: + debug(f"Failed to get metadata from Hydrus: {exc}") + return None + + def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: + """Get tags for a file from Hydrus by hash. + + Args: + file_identifier: File hash (SHA256 hex string) + **kwargs: Optional service_name parameter + + Returns: + Tuple of (tags_list, source_description) + where source is always "hydrus" + """ + try: + file_hash = str(file_identifier) + + # Get Hydrus client and service info + client = self._client + if not client: + debug("get_tags: Hydrus client unavailable") + return [], "unknown" + + # Fetch file metadata + payload = client.fetch_file_metadata( + hashes=[file_hash], + include_service_keys_to_tags=True, + include_file_url=False + ) + + items = payload.get("metadata") if isinstance(payload, dict) else None + if not isinstance(items, list) or not items: + debug(f"get_tags: No metadata returned for hash {file_hash}") + return [], "unknown" + + meta = items[0] if isinstance(items[0], dict) else None + if not isinstance(meta, dict) or meta.get("file_id") is None: + debug(f"get_tags: Invalid metadata for hash {file_hash}") + return [], "unknown" + + # Extract tags using service name + service_name = "my tags" + service_key = hydrus_wrapper.get_tag_service_key(client, service_name) + + # Extract tags from metadata + tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name) + + return tags, "hydrus" + + except Exception as exc: + debug(f"get_tags failed for Hydrus file: {exc}") + return [], "unknown" + + def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: + """Add tags to a Hydrus file. + """ + try: + client = self._client + if client is None: + debug("add_tag: Hydrus client unavailable") + return False + service_name = kwargs.get("service_name") or "my tags" + # Ensure tags is a list + tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] + if not tag_list: + return False + client.add_tags(file_identifier, tag_list, service_name) + return True + except Exception as exc: + debug(f"Hydrus add_tag failed: {exc}") + return False + + def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: + """Delete tags from a Hydrus file. + """ + try: + client = self._client + if client is None: + debug("delete_tag: Hydrus client unavailable") + return False + service_name = kwargs.get("service_name") or "my tags" + tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] + if not tag_list: + return False + client.delete_tags(file_identifier, tag_list, service_name) + return True + except Exception as exc: + debug(f"Hydrus delete_tag failed: {exc}") + return False + + def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: + """Get known url for a Hydrus file. + """ + try: + client = self._client + if client is None: + debug("get_url: Hydrus client unavailable") + return [] + payload = client.fetch_file_metadata(hashes=[str(file_identifier)], include_file_url=True) + items = payload.get("metadata") if isinstance(payload, dict) else None + if not isinstance(items, list) or not items: + return [] + meta = items[0] + url = meta.get("url") or [] + return list(url) + except Exception as exc: + debug(f"Hydrus get_url failed: {exc}") + return [] + + def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: + """Associate one or more url with a Hydrus file. + """ + try: + client = self._client + if client is None: + debug("add_url: Hydrus client unavailable") + return False + for u in url: + client.associate_url(file_identifier, u) + return True + except Exception as exc: + debug(f"Hydrus add_url failed: {exc}") + return False + + def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: + """Delete one or more url from a Hydrus file. + """ + try: + client = self._client + if client is None: + debug("delete_url: Hydrus client unavailable") + return False + for u in url: + client.delete_url(file_identifier, u) + return True + except Exception as exc: + debug(f"Hydrus delete_url failed: {exc}") + return False + + @staticmethod + def _extract_tags_from_hydrus_meta( + meta: Dict[str, Any], + service_key: Optional[str], + service_name: str + ) -> List[str]: + """Extract current tags from Hydrus metadata dict. + + Prefers display_tags (includes siblings/parents, excludes deleted). + Falls back to storage_tags status '0' (current). + """ + tags_payload = meta.get("tags") + if not isinstance(tags_payload, dict): + return [] + + svc_data = None + if service_key: + svc_data = tags_payload.get(service_key) + if not isinstance(svc_data, dict): + return [] + + # Prefer display_tags (Hydrus computes siblings/parents) + display = svc_data.get("display_tags") + if isinstance(display, list) and display: + return [str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip()] + + # Fallback to storage_tags status '0' (current) + storage = svc_data.get("storage_tags") + if isinstance(storage, dict): + current_list = storage.get("0") or storage.get(0) + if isinstance(current_list, list): + return [str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()] + + return [] + + + +class MatrixStorageBackend(store): + """File storage backend for Matrix (Element) chat rooms.""" + + def get_name(self) -> str: + return "matrix" + + def list_rooms(self, config: Dict[str, Any]) -> List[Dict[str, Any]]: + """List joined rooms with their names.""" + matrix_conf = config.get('storage', {}).get('matrix', {}) + homeserver = matrix_conf.get('homeserver') + access_token = matrix_conf.get('access_token') + + if not homeserver or not access_token: + return [] + + if not homeserver.startswith('http'): + homeserver = f"https://{homeserver}" + + headers = {"Authorization": f"Bearer {access_token}"} + + try: + # Get joined rooms + resp = requests.get(f"{homeserver}/_matrix/client/v3/joined_rooms", headers=headers, timeout=10) + if resp.status_code != 200: + return [] + + room_ids = resp.json().get('joined_rooms', []) + rooms = [] + + for rid in room_ids: + # Try to get room name + name = "Unknown Room" + try: + # Get state event for name + name_resp = requests.get( + f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.name", + headers=headers, + timeout=2 + ) + if name_resp.status_code == 200: + name = name_resp.json().get('name', name) + else: + # Try canonical alias + alias_resp = requests.get( + f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.canonical_alias", + headers=headers, + timeout=2 + ) + if alias_resp.status_code == 200: + name = alias_resp.json().get('alias', name) + except Exception: + pass + + rooms.append({'id': rid, 'name': name}) + + return rooms + except Exception as e: + log(f"Error listing Matrix rooms: {e}", file=sys.stderr) + return [] + + def upload(self, file_path: Path, **kwargs: Any) -> str: + """Upload file to Matrix room. + + Requires 'config' in kwargs with 'storage.matrix' settings: + - homeserver: URL of homeserver (e.g. https://matrix.org) + - user_id: User ID (e.g. @user:matrix.org) + - access_token: Access token (preferred) OR password + - room_id: Room ID to upload to (e.g. !roomid:matrix.org) + """ + config = kwargs.get('config', {}) + if not config: + raise ValueError("Config required for Matrix upload") + + matrix_conf = config.get('storage', {}).get('matrix', {}) + if not matrix_conf: + raise ValueError("Matrix storage not configured in config.json") + + homeserver = matrix_conf.get('homeserver') + # user_id = matrix_conf.get('user_id') # Not strictly needed if we have token + access_token = matrix_conf.get('access_token') + room_id = matrix_conf.get('room_id') + + if not homeserver: + raise ValueError("Matrix homeserver required") + + # Ensure homeserver has protocol + if not homeserver.startswith('http'): + homeserver = f"https://{homeserver}" + + # Login if no access token (optional implementation, for now assume token) + if not access_token: + raise ValueError("Matrix access_token required (login not yet implemented)") + + # Handle room selection if not provided + if not room_id: + log("No room_id configured. Fetching joined rooms...", file=sys.stderr) + rooms = self.list_rooms(config) + + if not rooms: + raise ValueError("No joined rooms found or failed to fetch rooms.") + + from result_table import ResultTable + table = ResultTable("Matrix Rooms") + for i, room in enumerate(rooms): + row = table.add_row() + row.add_column("#", str(i + 1)) + row.add_column("Name", room['name']) + row.add_column("ID", room['id']) + + print(table) + + # Simple interactive selection + try: + selection = input("Select room # to upload to: ") + idx = int(selection) - 1 + if 0 <= idx < len(rooms): + room_id = rooms[idx]['id'] + log(f"Selected room: {rooms[idx]['name']} ({room_id})", file=sys.stderr) + else: + raise ValueError("Invalid selection") + except Exception: + raise ValueError("Invalid room selection") + + if not room_id: + raise ValueError("Matrix room_id required") + + # 1. Upload Media + upload_url = f"{homeserver}/_matrix/media/r3/upload" + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/octet-stream" # Or guess mime type + } + + import mimetypes + mime_type, _ = mimetypes.guess_type(file_path) + if mime_type: + headers["Content-Type"] = mime_type + + filename = file_path.name + + try: + with open(file_path, 'rb') as f: + resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename}) + + if resp.status_code != 200: + raise Exception(f"Matrix upload failed: {resp.text}") + + content_uri = resp.json().get('content_uri') + if not content_uri: + raise Exception("No content_uri returned from Matrix upload") + + # 2. Send Message + send_url = f"{homeserver}/_matrix/client/r0/rooms/{room_id}/send/m.room.message" + + # Determine msgtype + msgtype = "m.file" + if mime_type: + if mime_type.startswith("image/"): msgtype = "m.image" + elif mime_type.startswith("video/"): msgtype = "m.video" + elif mime_type.startswith("audio/"): msgtype = "m.audio" + + payload = { + "msgtype": msgtype, + "body": filename, + "url": content_uri, + "info": { + "mimetype": mime_type, + "size": file_path.stat().st_size + } + } + + resp = requests.post(send_url, headers=headers, json=payload) + if resp.status_code != 200: + raise Exception(f"Matrix send message failed: {resp.text}") + + event_id = resp.json().get('event_id') + return f"matrix://{room_id}/{event_id}" + + except Exception as e: + log(f"❌ Matrix upload error: {e}", file=sys.stderr) + raise + + + # --- Not supported for Matrix: tagging & URL operations (return safe defaults) --- + def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: + return [], "matrix" + + def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: + return False + + def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: + return False + + def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: + return [] + + def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: + return False + + def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: + return False + + + +class RemoteStorageBackend(store): + """File storage backend for remote Android/network storage servers. + + Connects to a remote storage server (e.g., running on Android phone) + via REST API. All operations are proxied to the remote server. + """ + + def __init__(self, server_url: str, timeout: int = 30, api_key: str = None) -> None: + """Initialize remote storage backend. + + Args: + server_url: Base URL of remote storage server (e.g., http://192.168.1.100:5000) + timeout: Request timeout in seconds + api_key: Optional API key for authentication + """ + try: + import requests + except ImportError: + raise ImportError("requests library required for RemoteStorageBackend. Install with: pip install requests") + + self.server_url = server_url.rstrip('/') + self.timeout = timeout + self.api_key = api_key + self._session = requests.Session() + + # Add API key to default headers if provided + if self.api_key: + self._session.headers.update({'X-API-Key': self.api_key}) + + def get_name(self) -> str: + return "remote" + + + + def _request(self, method: str, endpoint: str, **kwargs) -> Dict[str, Any]: + """Make HTTP request to remote server.""" + import requests + from urllib.parse import urljoin + + url = urljoin(self.server_url, endpoint) + + try: + response = self._session.request( + method, + url, + timeout=self.timeout, + **kwargs + ) + + if response.status_code == 404: + raise Exception(f"Remote resource not found: {endpoint}") + + if response.status_code >= 400: + try: + error_data = response.json() + error_msg = error_data.get('error', response.text) + except: + error_msg = response.text + raise Exception(f"Remote server error {response.status_code}: {error_msg}") + + return response.json() + + except requests.exceptions.RequestException as e: + raise Exception(f"Connection to {self.server_url} failed: {e}") + + def upload(self, file_path: Path, **kwargs: Any) -> str: + """Upload file to remote storage. + + Args: + file_path: Path to the file to upload + tags: Optional list of tags to add + url: Optional list of known url + + Returns: + Remote file hash + """ + from helper.utils import sha256_file + + if not file_path.exists(): + raise ValueError(f"File not found: {file_path}") + + try: + # Index the file on remote server + data = {"path": str(file_path)} + + tags = kwargs.get("tags", []) + if tags: + data["tags"] = tags + + url = kwargs.get("url", []) + if url: + data["url"] = url + + result = self._request('POST', '/files/index', json=data) + file_hash = result.get('hash') + + if file_hash: + log(f"✓ File indexed on remote storage: {file_hash}", file=sys.stderr) + return file_hash + else: + raise Exception("Remote server did not return file hash") + + except Exception as exc: + debug(f"Remote upload failed: {exc}", file=sys.stderr) + raise + + # Tag and URL operations - Remote server default: not supported + def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: + return [], "remote" + + def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: + return False + + def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: + return False + + def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: + return [] + + def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: + return False + + def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: + return False + + def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: + """Search files on remote storage. + + Args: + query: Search query + limit: Maximum results + + Returns: + List of search results + """ + limit = kwargs.get("limit") + try: + limit = int(limit) if limit is not None else 100 + except (TypeError, ValueError): + limit = 100 + + if limit <= 0: + limit = 100 + + try: + response = self._request('GET', '/files/search', params={ + 'q': query, + 'limit': limit + }) + + files = response.get('files', []) + + # Transform remote format to standard result format + results = [] + for f in files: + results.append({ + "name": f.get('name', '').split('/')[-1], # Get filename from path + "title": f.get('name', f.get('path', '')).split('/')[-1], + "ext": f.get('ext', ''), + "path": f.get('path', ''), + "target": f.get('path', ''), + "hash": f.get('hash', ''), + "origin": "remote", + "size": f.get('size', 0), + "size_bytes": f.get('size', 0), + "tags": f.get('tags', []), + }) + + debug(f"Remote search found {len(results)} results", file=sys.stderr) + return results + + except Exception as exc: + log(f"❌ Remote search failed: {exc}", file=sys.stderr) + raise + + +class FileStorage: + """Unified file storage interface supporting multiple backend instances. + + Each backend type (folder, hydrusnetwork) can have multiple named instances. + Access backends by their configured names. + + Config structure: + { + "store": { + "folder": { + "default": {"path": "C:\\Media Machina"}, + "test": {"path": "C:\\Users\\Admin\\Downloads\\Video"} + }, + "hydrusnetwork": { + "home": { + "Hydrus-Client-API-Access-Key": "d4321f...", + "url": "http://192.168.1.230:45869" + }, + "work": { + "Hydrus-Client-API-Access-Key": "abc123...", + "url": "http://192.168.1.100:45869" + } + } + } + } + + Example: + storage = FileStorage(config) + + # Upload to different named instances + hash1 = storage["test"].add_file(Path("file.mp3"), tags=["music"]) + hash2 = storage["home"].add_file(Path("file.mp3"), tags=["music"]) + hash3 = storage["work"].add_file(Path("file.mp3"), tags=["music"]) + + # Search across different instances + results = storage["home"].search("music") + results = storage["test"].search("song") + """ + + def __init__(self, config: Optional[Dict[str, Any]] = None, suppress_debug: bool = False) -> None: + """Initialize the file storage system with available backends. + + Args: + config: Configuration dict with backend settings (Local.path, HydrusNetwork, Debrid, etc.) + suppress_debug: If True, suppress debug output during initialization (useful for autocomplete) + """ + self.suppress_debug = suppress_debug + config = config or {} + + # Extract backend-specific settings from config + from config import get_local_storage_path + + local_path = get_local_storage_path(config) + local_path_str = str(local_path) if local_path else None + + self._backends: Dict[str, store] = {} + + # Build folder stores from config (support both 'storage' and legacy 'store' top-level keys) + folder_sources = None + cfg_storage = config.get("storage") or config.get("store") or {} + if isinstance(cfg_storage, dict): + val = cfg_storage.get("folder") + if isinstance(val, dict): + folder_sources = val + + # If folder sources provided, create backends for each entry + if folder_sources: + # Normalize into name -> path mapping + folder_map: Dict[str, str] = {} + for key, value in folder_sources.items(): + if isinstance(value, dict): + path_val = value.get("path") + elif isinstance(value, (str, bytes)): + path_val = str(value) + else: + path_val = None + if path_val: + folder_map[str(key)] = str(Path(path_val).expanduser()) + + # Register all folder stores by their explicit names from config + for name, path in folder_map.items(): + self._backends[name] = Folder(location=path, name=name) + else: + # Fallback: use legacy single local path if present + if local_path_str: + self._backends["default"] = Folder(location=local_path_str, name="default") + + # Matrix (chat room) acts as a provider, not a persistent storage backend. + # We no longer register Matrix as a storage backend here; providers should be separate classes. + + # Build HydrusNetwork backends from config['store']['hydrusnetwork'] + # Register all instances regardless of current connectivity - connection errors + # will be caught when actually trying to use the backend + hydrus_sources = cfg_storage.get("hydrusnetwork") + if isinstance(hydrus_sources, dict): + for instance_name, instance_config in hydrus_sources.items(): + if isinstance(instance_config, dict): + api_key = instance_config.get("Hydrus-Client-API-Access-Key") + url = instance_config.get("url") + + # Skip if missing credentials - don't register instances without full config + if not api_key or not url: + continue + + # Register the instance - connection will be tested when actually used + try: + self._backends[instance_name] = HydrusNetwork( + instance_name=instance_name, + api_key=api_key, + url=url + ) + if not self.suppress_debug: + debug(f"[FileStorage] Registered Hydrus instance '{instance_name}': {url}") + except Exception as e: + if not self.suppress_debug: + debug(f"[FileStorage] Failed to register Hydrus instance '{instance_name}': {e}") + continue + + # Include remote storage backends from config (for Android/network servers) + remote_storages = config.get("remote_storages", []) + if isinstance(remote_storages, list): + for remote_config in remote_storages: + if isinstance(remote_config, dict): + name = remote_config.get("name", "remote") + url = remote_config.get("url") + timeout = remote_config.get("timeout", 30) + api_key = remote_config.get("api_key") + + if url: + try: + backend = RemoteStorageBackend(url, timeout=timeout, api_key=api_key) + self._backends[name] = backend + auth_status = " (with auth)" if api_key else " (no auth)" + log(f"Registered remote storage backend: {name} -> {url}{auth_status}", file=sys.stderr) + except Exception as e: + log(f"Failed to register remote storage '{name}': {e}", file=sys.stderr) + + def list_backends(self) -> list[str]: + """Return available backend keys for autocomplete and validation.""" + return sorted(self._backends.keys()) + + def list_searchable_backends(self) -> list[str]: + """Return backend names that support searching.""" + searchable = [] + for name, backend in self._backends.items(): + if callable(getattr(backend, 'search', None)): + searchable.append(name) + return sorted(searchable) + + def __getitem__(self, backend_name: str) -> store: + """Get a storage backend by name. + + Args: + backend_name: Name of the backend ('0x0', 'local', 'hydrus') + + Returns: + StorageBackend instance + + Raises: + KeyError: If backend not found + """ + if backend_name not in self._backends: + raise KeyError( + f"Unknown storage backend: {backend_name}. " + f"Available: {list(self._backends.keys())}" + ) + return self._backends[backend_name] + + def register(self, backend: store) -> None: + """Register a custom storage backend. + + Args: + backend: StorageBackend instance to register + """ + name = backend.get_name() + self._backends[name] = backend + log(f"Registered storage backend: {name}", file=sys.stderr) + + def is_available(self, backend_name: str) -> bool: + """Check if a backend is available. + + Args: + backend_name: Name of the backend + + Returns: + True if backend is registered + """ + return backend_name in self._backends + + def list_searchable_backends(self) -> list[str]: + """Get list of backends that support searching. + + Returns: + List of searchable backend names + """ + return [ + name for name, backend in self._backends.items() + if callable(getattr(backend, 'search', None)) or callable(getattr(backend, 'search_file', None)) + ] + + # --- remaining FileStorage methods --- + + diff --git a/helper/unified_book_downloader.py b/helper/unified_book_downloader.py index 41a1fb5..c4e5dad 100644 --- a/helper/unified_book_downloader.py +++ b/helper/unified_book_downloader.py @@ -555,7 +555,7 @@ class UnifiedBookDownloader: This follows the exact process from archive_client.py: 1. Login with credentials 2. Call loan() to create 14-day borrow - 3. Get book info (extract page URLs) + 3. Get book info (extract page url) 4. Download all pages as images 5. Merge images into searchable PDF @@ -576,10 +576,10 @@ class UnifiedBookDownloader: # If we get here, borrowing succeeded logger.info(f"[UnifiedBookDownloader] Successfully borrowed book: {book_id}") - # Now get the book info (page URLs and metadata) + # Now get the book info (page url and metadata) logger.info(f"[UnifiedBookDownloader] Extracting book page information...") # Try both URL formats: with /borrow and without - book_urls = [ + book_url = [ f"https://archive.org/borrow/{book_id}", # Try borrow page first (for borrowed books) f"https://archive.org/details/{book_id}" # Fallback to details page ] @@ -589,7 +589,7 @@ class UnifiedBookDownloader: metadata = None last_error = None - for book_url in book_urls: + for book_url in book_url: try: logger.debug(f"[UnifiedBookDownloader] Trying to get book info from: {book_url}") response = session.get(book_url, timeout=10) @@ -611,7 +611,7 @@ class UnifiedBookDownloader: continue if links is None: - logger.error(f"[UnifiedBookDownloader] Failed to get book info from all URLs: {last_error}") + logger.error(f"[UnifiedBookDownloader] Failed to get book info from all url: {last_error}") # Borrow extraction failed - return False return False, "Could not extract borrowed book pages" diff --git a/helper/utils.py b/helper/utils.py index 385fbac..f7689fd 100644 --- a/helper/utils.py +++ b/helper/utils.py @@ -308,7 +308,7 @@ def format_metadata_value(key: str, value) -> str: # ============================================================================ # Link Utilities - Consolidated from link_utils.py # ============================================================================ -"""Link utilities - Extract and process URLs from various sources.""" +"""Link utilities - Extract and process url from various sources.""" def extract_link_from_args(args: Iterable[str]) -> Any | None: diff --git a/helper/utils_constant.py b/helper/utils_constant.py index b6cc1c0..cd106a5 100644 --- a/helper/utils_constant.py +++ b/helper/utils_constant.py @@ -77,3 +77,26 @@ mime_maps = { "csv": { "ext": ".csv", "mimes": ["text/csv"] } } } + + +def get_type_from_ext(ext: str) -> str: + """Determine the type (e.g., 'image', 'video', 'audio') from file extension. + + Args: + ext: File extension (with or without leading dot, e.g., 'jpg' or '.jpg') + + Returns: + Type string (e.g., 'image', 'video', 'audio') or 'other' if unknown + """ + if not ext: + return 'other' + + # Normalize: remove leading dot and convert to lowercase + ext_clean = ext.lstrip('.').lower() + + # Search through mime_maps to find matching type + for type_name, extensions_dict in mime_maps.items(): + if ext_clean in extensions_dict: + return type_name + + return 'other' diff --git a/helper/worker_manager.py b/helper/worker_manager.py index 18b987f..ab7f908 100644 --- a/helper/worker_manager.py +++ b/helper/worker_manager.py @@ -11,7 +11,7 @@ from datetime import datetime from threading import Thread, Lock import time -from .local_library import LocalLibraryDB +from .folder_store import FolderDB from helper.logger import log logger = logging.getLogger(__name__) @@ -140,7 +140,7 @@ class Worker: class WorkerLoggingHandler(logging.StreamHandler): """Custom logging handler that captures logs for a worker.""" - def __init__(self, worker_id: str, db: LocalLibraryDB, + def __init__(self, worker_id: str, db: FolderDB, manager: Optional['WorkerManager'] = None, buffer_size: int = 50): """Initialize the handler. @@ -235,7 +235,7 @@ class WorkerManager: auto_refresh_interval: Seconds between auto-refresh checks (0 = disabled) """ self.library_root = Path(library_root) - self.db = LocalLibraryDB(library_root) + self.db = FolderDB(library_root) self.auto_refresh_interval = auto_refresh_interval self.refresh_callbacks: List[Callable] = [] self.refresh_thread: Optional[Thread] = None @@ -244,6 +244,22 @@ class WorkerManager: self.worker_handlers: Dict[str, WorkerLoggingHandler] = {} # Track active handlers self._worker_last_step: Dict[str, str] = {} + def close(self) -> None: + """Close the database connection.""" + if self.db: + try: + self.db.close() + except Exception: + pass + + def __enter__(self): + """Context manager entry.""" + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + """Context manager exit - close database.""" + self.close() + def add_refresh_callback(self, callback: Callable[[List[Dict[str, Any]]], None]) -> None: """Register a callback to be called on worker updates. diff --git a/hydrus_health_check.py b/hydrus_health_check.py index 44c8202..0c1af67 100644 --- a/hydrus_health_check.py +++ b/hydrus_health_check.py @@ -12,26 +12,14 @@ from typing import Tuple, Optional, Dict, Any from pathlib import Path logger = logging.getLogger(__name__) - -# Global state for Hydrus availability -_HYDRUS_AVAILABLE: Optional[bool] = None -_HYDRUS_UNAVAILABLE_REASON: Optional[str] = None -_HYDRUS_CHECK_COMPLETE = False - -# Global state for Debrid availability -_DEBRID_AVAILABLE: Optional[bool] = None -_DEBRID_UNAVAILABLE_REASON: Optional[str] = None -_DEBRID_CHECK_COMPLETE = False - -# Global state for MPV availability -_MPV_AVAILABLE: Optional[bool] = None -_MPV_UNAVAILABLE_REASON: Optional[str] = None -_MPV_CHECK_COMPLETE = False - -# Global state for Matrix availability -_MATRIX_AVAILABLE: Optional[bool] = None -_MATRIX_UNAVAILABLE_REASON: Optional[str] = None -_MATRIX_CHECK_COMPLETE = False +# Global state for all service availability checks - consolidated from 12 separate globals +_SERVICE_STATE = { + "hydrus": {"available": None, "reason": None, "complete": False}, + "hydrusnetwork_stores": {}, # Track individual Hydrus instances + "debrid": {"available": None, "reason": None, "complete": False}, + "mpv": {"available": None, "reason": None, "complete": False}, + "matrix": {"available": None, "reason": None, "complete": False}, +} # Global state for Cookies availability _COOKIES_FILE_PATH: Optional[str] = None @@ -68,130 +56,73 @@ def check_hydrus_availability(config: Dict[str, Any]) -> Tuple[bool, Optional[st return False, error_msg -def initialize_hydrus_health_check(config: Dict[str, Any]) -> None: - """Initialize Hydrus health check at startup. - - This should be called once at application startup to determine if Hydrus - features should be enabled or disabled. - - Args: - config: Application configuration dictionary - """ - global _HYDRUS_AVAILABLE, _HYDRUS_UNAVAILABLE_REASON, _HYDRUS_CHECK_COMPLETE - +def initialize_hydrus_health_check(config: Dict[str, Any], emit_debug: bool = True) -> Tuple[bool, Optional[str]]: + """Initialize Hydrus health check at startup.""" + global _SERVICE_STATE logger.info("[Startup] Starting Hydrus health check...") + is_available, reason = check_hydrus_availability(config) + _SERVICE_STATE["hydrus"]["available"] = is_available + _SERVICE_STATE["hydrus"]["reason"] = reason + _SERVICE_STATE["hydrus"]["complete"] = True + # Track individual Hydrus instances try: - is_available, reason = check_hydrus_availability(config) - _HYDRUS_AVAILABLE = is_available - _HYDRUS_UNAVAILABLE_REASON = reason - _HYDRUS_CHECK_COMPLETE = True - - if is_available: - debug("Hydrus: ENABLED - All Hydrus features available", file=sys.stderr) - else: - debug(f"Hydrus: DISABLED - {reason or 'Connection failed'}", file=sys.stderr) - + store_config = config.get("store", {}) + hydrusnetwork = store_config.get("hydrusnetwork", {}) + for instance_name, instance_config in hydrusnetwork.items(): + if isinstance(instance_config, dict): + url = instance_config.get("url") + access_key = instance_config.get("Hydrus-Client-API-Access-Key") + if url and access_key: + _SERVICE_STATE["hydrusnetwork_stores"][instance_name] = { + "ok": is_available, + "url": url, + "detail": reason if not is_available else "Connected" + } + else: + _SERVICE_STATE["hydrusnetwork_stores"][instance_name] = { + "ok": False, + "url": url or "Not configured", + "detail": "Missing credentials" + } except Exception as e: - logger.error(f"[Startup] Failed to initialize Hydrus health check: {e}", exc_info=True) - _HYDRUS_AVAILABLE = False - _HYDRUS_UNAVAILABLE_REASON = str(e) - _HYDRUS_CHECK_COMPLETE = True - debug(f"Hydrus: DISABLED - Error during health check: {e}", file=sys.stderr) + logger.debug(f"Could not enumerate Hydrus instances: {e}") + + if emit_debug: + status = 'ENABLED' if is_available else f'DISABLED - {reason or "Connection failed"}' + debug(f"Hydrus: {status}", file=sys.stderr) + return is_available, reason def check_debrid_availability(config: Dict[str, Any]) -> Tuple[bool, Optional[str]]: - """Check if Debrid API is available. - - Args: - config: Application configuration dictionary - - Returns: - Tuple of (is_available: bool, reason: Optional[str]) - - (True, None) if Debrid API is available - - (False, reason) if Debrid API is unavailable with reason - """ + """Check if Debrid API is available.""" try: from helper.http_client import HTTPClient - - logger.info("[Debrid Health Check] Pinging Debrid API at https://api.alldebrid.com/v4/ping...") - - try: - # Use the public ping endpoint to check API availability - # This endpoint doesn't require authentication - with HTTPClient(timeout=10.0, verify_ssl=True) as client: - response = client.get('https://api.alldebrid.com/v4/ping') - logger.debug(f"[Debrid Health Check] Response status: {response.status_code}") - - # Read response text first (handles gzip decompression) - try: - response_text = response.text - logger.debug(f"[Debrid Health Check] Response text: {response_text}") - except Exception as e: - logger.error(f"[Debrid Health Check] ❌ Failed to read response text: {e}") - return False, f"Failed to read response: {e}" - - # Parse JSON - try: - result = response.json() - logger.debug(f"[Debrid Health Check] Response JSON: {result}") - except Exception as e: - logger.error(f"[Debrid Health Check] ❌ Failed to parse JSON: {e}") - logger.error(f"[Debrid Health Check] Response was: {response_text}") - return False, f"Failed to parse response: {e}" - - # Validate response format - if result.get('status') == 'success' and result.get('data', {}).get('ping') == 'pong': - logger.info("[Debrid Health Check] ✅ Debrid API is AVAILABLE") - return True, None - else: - logger.warning(f"[Debrid Health Check] ❌ Debrid API returned unexpected response: {result}") - return False, "Invalid API response" - except Exception as e: - error_msg = str(e) - logger.warning(f"[Debrid Health Check] ❌ Debrid API error: {error_msg}") - import traceback - logger.debug(f"[Debrid Health Check] Traceback: {traceback.format_exc()}") - return False, error_msg - + logger.info("[Debrid Health Check] Pinging Debrid API...") + with HTTPClient(timeout=10.0, verify_ssl=True) as client: + response = client.get('https://api.alldebrid.com/v4/ping') + result = response.json() + if result.get('status') == 'success' and result.get('data', {}).get('ping') == 'pong': + logger.info("[Debrid Health Check] Debrid API is AVAILABLE") + return True, None + return False, "Invalid API response" except Exception as e: - error_msg = str(e) - logger.error(f"[Debrid Health Check] ❌ Error checking Debrid availability: {error_msg}") - return False, error_msg + logger.warning(f"[Debrid Health Check] Debrid API error: {e}") + return False, str(e) -def initialize_debrid_health_check(config: Dict[str, Any]) -> None: - """Initialize Debrid health check at startup. - - This should be called once at application startup to determine if Debrid - features should be enabled or disabled. - - Args: - config: Application configuration dictionary - """ - global _DEBRID_AVAILABLE, _DEBRID_UNAVAILABLE_REASON, _DEBRID_CHECK_COMPLETE - +def initialize_debrid_health_check(config: Dict[str, Any], emit_debug: bool = True) -> Tuple[bool, Optional[str]]: + """Initialize Debrid health check at startup.""" + global _SERVICE_STATE logger.info("[Startup] Starting Debrid health check...") - - try: - is_available, reason = check_debrid_availability(config) - _DEBRID_AVAILABLE = is_available - _DEBRID_UNAVAILABLE_REASON = reason - _DEBRID_CHECK_COMPLETE = True - - if is_available: - debug("✅ Debrid: ENABLED - All Debrid features available", file=sys.stderr) - logger.info("[Startup] Debrid health check PASSED") - else: - debug(f"⚠️ Debrid: DISABLED - {reason or 'Connection failed'}", file=sys.stderr) - logger.warning(f"[Startup] Debrid health check FAILED: {reason}") - - except Exception as e: - logger.error(f"[Startup] Failed to initialize Debrid health check: {e}", exc_info=True) - _DEBRID_AVAILABLE = False - _DEBRID_UNAVAILABLE_REASON = str(e) - _DEBRID_CHECK_COMPLETE = True - debug(f"⚠️ Debrid: DISABLED - Error during health check: {e}", file=sys.stderr) + is_available, reason = check_debrid_availability(config) + _SERVICE_STATE["debrid"]["available"] = is_available + _SERVICE_STATE["debrid"]["reason"] = reason + _SERVICE_STATE["debrid"]["complete"] = True + if emit_debug: + status = 'ENABLED' if is_available else f'DISABLED - {reason or "Connection failed"}' + debug(f"Debrid: {status}", file=sys.stderr) + return is_available, reason def check_mpv_availability() -> Tuple[bool, Optional[str]]: @@ -200,10 +131,10 @@ def check_mpv_availability() -> Tuple[bool, Optional[str]]: Returns: Tuple of (is_available: bool, reason: Optional[str]) """ - global _MPV_AVAILABLE, _MPV_UNAVAILABLE_REASON, _MPV_CHECK_COMPLETE + global _SERVICE_STATE - if _MPV_CHECK_COMPLETE and _MPV_AVAILABLE is not None: - return _MPV_AVAILABLE, _MPV_UNAVAILABLE_REASON + if _SERVICE_STATE["mpv"]["complete"] and _SERVICE_STATE["mpv"]["available"] is not None: + return _SERVICE_STATE["mpv"]["available"], _SERVICE_STATE["mpv"]["reason"] import shutil import subprocess @@ -212,11 +143,8 @@ def check_mpv_availability() -> Tuple[bool, Optional[str]]: mpv_path = shutil.which("mpv") if not mpv_path: - _MPV_AVAILABLE = False - _MPV_UNAVAILABLE_REASON = "Executable 'mpv' not found in PATH" - _MPV_CHECK_COMPLETE = True - logger.warning(f"[MPV Health Check] ❌ MPV is UNAVAILABLE: {_MPV_UNAVAILABLE_REASON}") - return False, _MPV_UNAVAILABLE_REASON + logger.warning(f"[MPV Health Check] ❌ MPV is UNAVAILABLE: Executable 'mpv' not found in PATH") + return False, "Executable 'mpv' not found in PATH" # Try to get version to confirm it works try: @@ -228,55 +156,35 @@ def check_mpv_availability() -> Tuple[bool, Optional[str]]: ) if result.returncode == 0: version_line = result.stdout.split('\n')[0] - _MPV_AVAILABLE = True - _MPV_UNAVAILABLE_REASON = None - _MPV_CHECK_COMPLETE = True - logger.info(f"[MPV Health Check] ✅ MPV is AVAILABLE ({version_line})") + logger.info(f"[MPV Health Check] MPV is AVAILABLE ({version_line})") return True, None else: - _MPV_AVAILABLE = False - _MPV_UNAVAILABLE_REASON = f"MPV returned non-zero exit code: {result.returncode}" - _MPV_CHECK_COMPLETE = True - logger.warning(f"[MPV Health Check] ❌ MPV is UNAVAILABLE: {_MPV_UNAVAILABLE_REASON}") - return False, _MPV_UNAVAILABLE_REASON + reason = f"MPV returned non-zero exit code: {result.returncode}" + logger.warning(f"[MPV Health Check] ❌ MPV is UNAVAILABLE: {reason}") + return False, reason except Exception as e: - _MPV_AVAILABLE = False - _MPV_UNAVAILABLE_REASON = f"Error running MPV: {e}" - _MPV_CHECK_COMPLETE = True - logger.warning(f"[MPV Health Check] ❌ MPV is UNAVAILABLE: {_MPV_UNAVAILABLE_REASON}") - return False, _MPV_UNAVAILABLE_REASON + reason = f"Error running MPV: {e}" + logger.warning(f"[MPV Health Check] ❌ MPV is UNAVAILABLE: {reason}") + return False, reason -def initialize_mpv_health_check() -> None: - """Initialize MPV health check at startup. - - This should be called once at application startup to determine if MPV - features should be enabled or disabled. - """ - global _MPV_AVAILABLE, _MPV_UNAVAILABLE_REASON, _MPV_CHECK_COMPLETE +def initialize_mpv_health_check(emit_debug: bool = True) -> Tuple[bool, Optional[str]]: + """Initialize MPV health check at startup and return (is_available, reason).""" + global _SERVICE_STATE logger.info("[Startup] Starting MPV health check...") + is_available, reason = check_mpv_availability() + _SERVICE_STATE["mpv"]["available"] = is_available + _SERVICE_STATE["mpv"]["reason"] = reason + _SERVICE_STATE["mpv"]["complete"] = True - try: - is_available, reason = check_mpv_availability() - _MPV_AVAILABLE = is_available - _MPV_UNAVAILABLE_REASON = reason - _MPV_CHECK_COMPLETE = True - + if emit_debug: if is_available: - debug("✅ MPV: ENABLED - All MPV features available", file=sys.stderr) - logger.info("[Startup] MPV health check PASSED") - else: - debug(f"⚠️ MPV: DISABLED - {reason or 'Connection failed'}", file=sys.stderr) - debug("→ Hydrus features still available", file=sys.stderr) - logger.warning(f"[Startup] MPV health check FAILED: {reason}") - - except Exception as e: - logger.error(f"[Startup] Failed to initialize MPV health check: {e}", exc_info=True) - _MPV_AVAILABLE = False - _MPV_UNAVAILABLE_REASON = str(e) - _MPV_CHECK_COMPLETE = True - debug(f"⚠️ MPV: DISABLED - Error during health check: {e}", file=sys.stderr) + debug("MPV: ENABLED - All MPV features available", file=sys.stderr) + elif reason != "Not configured": + debug(f"MPV: DISABLED - {reason or 'Connection failed'}", file=sys.stderr) + + return is_available, reason def check_matrix_availability(config: Dict[str, Any]) -> Tuple[bool, Optional[str]]: @@ -324,264 +232,262 @@ def check_matrix_availability(config: Dict[str, Any]) -> Tuple[bool, Optional[st return False, str(e) -def initialize_matrix_health_check(config: Dict[str, Any]) -> None: - """Initialize Matrix health check at startup.""" - global _MATRIX_AVAILABLE, _MATRIX_UNAVAILABLE_REASON, _MATRIX_CHECK_COMPLETE + +def initialize_matrix_health_check(config: Dict[str, Any], emit_debug: bool = True) -> Tuple[bool, Optional[str]]: + """Initialize Matrix health check at startup and return (is_available, reason).""" + global _SERVICE_STATE logger.info("[Startup] Starting Matrix health check...") + is_available, reason = check_matrix_availability(config) + _SERVICE_STATE["matrix"]["available"] = is_available + _SERVICE_STATE["matrix"]["reason"] = reason + _SERVICE_STATE["matrix"]["complete"] = True - try: - is_available, reason = check_matrix_availability(config) - _MATRIX_AVAILABLE = is_available - _MATRIX_UNAVAILABLE_REASON = reason - _MATRIX_CHECK_COMPLETE = True - + if emit_debug: if is_available: debug("Matrix: ENABLED - Homeserver reachable", file=sys.stderr) - else: - if reason != "Not configured": - debug(f"Matrix: DISABLED - {reason}", file=sys.stderr) - - except Exception as e: - logger.error(f"[Startup] Failed to initialize Matrix health check: {e}", exc_info=True) - _MATRIX_AVAILABLE = False - _MATRIX_UNAVAILABLE_REASON = str(e) - _MATRIX_CHECK_COMPLETE = True - - -def is_hydrus_available() -> bool: - """Check if Hydrus is available (from cached health check). + elif reason != "Not configured": + debug(f"Matrix: DISABLED - {reason}", file=sys.stderr) - Returns: - True if Hydrus API is available, False otherwise - """ - return _HYDRUS_AVAILABLE is True + return is_available, reason + + +# Unified getter functions for service availability - all use _SERVICE_STATE +def is_hydrus_available() -> bool: + """Check if Hydrus is available (from cached health check).""" + return _SERVICE_STATE["hydrus"]["available"] is True def get_hydrus_unavailable_reason() -> Optional[str]: - """Get the reason why Hydrus is unavailable. - - Returns: - String explaining why Hydrus is unavailable, or None if available - """ - return _HYDRUS_UNAVAILABLE_REASON if not is_hydrus_available() else None + """Get the reason why Hydrus is unavailable.""" + return _SERVICE_STATE["hydrus"]["reason"] if not is_hydrus_available() else None def is_hydrus_check_complete() -> bool: - """Check if the Hydrus health check has been completed. - - Returns: - True if health check has run, False if still pending - """ - return _HYDRUS_CHECK_COMPLETE + """Check if the Hydrus health check has been completed.""" + return _SERVICE_STATE["hydrus"]["complete"] def disable_hydrus_features() -> None: - """Manually disable all Hydrus features (for testing/fallback). - - This can be called if Hydrus connectivity is lost after startup. - """ - global _HYDRUS_AVAILABLE, _HYDRUS_UNAVAILABLE_REASON - _HYDRUS_AVAILABLE = False - _HYDRUS_UNAVAILABLE_REASON = "Manually disabled or lost connection" + """Manually disable all Hydrus features (for testing/fallback).""" + global _SERVICE_STATE + _SERVICE_STATE["hydrus"]["available"] = False + _SERVICE_STATE["hydrus"]["reason"] = "Manually disabled or lost connection" logger.warning("[Hydrus] Features manually disabled") def enable_hydrus_features() -> None: - """Manually enable Hydrus features (for testing/fallback). - - This can be called if Hydrus connectivity is restored after startup. - """ - global _HYDRUS_AVAILABLE, _HYDRUS_UNAVAILABLE_REASON - _HYDRUS_AVAILABLE = True - _HYDRUS_UNAVAILABLE_REASON = None + """Manually enable Hydrus features (for testing/fallback).""" + global _SERVICE_STATE + _SERVICE_STATE["hydrus"]["available"] = True + _SERVICE_STATE["hydrus"]["reason"] = None logger.info("[Hydrus] Features manually enabled") def is_debrid_available() -> bool: - """Check if Debrid is available (from cached health check). - - Returns: - True if Debrid API is available, False otherwise - """ - return _DEBRID_AVAILABLE is True + """Check if Debrid is available (from cached health check).""" + return _SERVICE_STATE["debrid"]["available"] is True def get_debrid_unavailable_reason() -> Optional[str]: - """Get the reason why Debrid is unavailable. - - Returns: - String explaining why Debrid is unavailable, or None if available - """ - return _DEBRID_UNAVAILABLE_REASON if not is_debrid_available() else None + """Get the reason why Debrid is unavailable.""" + return _SERVICE_STATE["debrid"]["reason"] if not is_debrid_available() else None def is_debrid_check_complete() -> bool: - """Check if the Debrid health check has been completed. - - Returns: - True if health check has run, False if still pending - """ - return _DEBRID_CHECK_COMPLETE + """Check if the Debrid health check has been completed.""" + return _SERVICE_STATE["debrid"]["complete"] def disable_debrid_features() -> None: - """Manually disable all Debrid features (for testing/fallback). - - This can be called if Debrid connectivity is lost after startup. - """ - global _DEBRID_AVAILABLE, _DEBRID_UNAVAILABLE_REASON - _DEBRID_AVAILABLE = False - _DEBRID_UNAVAILABLE_REASON = "Manually disabled or lost connection" + """Manually disable all Debrid features (for testing/fallback).""" + global _SERVICE_STATE + _SERVICE_STATE["debrid"]["available"] = False + _SERVICE_STATE["debrid"]["reason"] = "Manually disabled or lost connection" logger.warning("[Debrid] Features manually disabled") def enable_debrid_features() -> None: - """Manually enable Debrid features (for testing/fallback). - - This can be called if Debrid connectivity is restored after startup. - """ - global _DEBRID_AVAILABLE, _DEBRID_UNAVAILABLE_REASON - _DEBRID_AVAILABLE = True - _DEBRID_UNAVAILABLE_REASON = None + """Manually enable Debrid features (for testing/fallback).""" + global _SERVICE_STATE + _SERVICE_STATE["debrid"]["available"] = True + _SERVICE_STATE["debrid"]["reason"] = None logger.info("[Debrid] Features manually enabled") def is_mpv_available() -> bool: - """Check if MPV is available (from cached health check). - - Returns: - True if MPV is available, False otherwise - """ - return _MPV_AVAILABLE is True - + """Check if MPV is available (from cached health check).""" + return _SERVICE_STATE["mpv"]["available"] is True def get_mpv_unavailable_reason() -> Optional[str]: - """Get the reason why MPV is unavailable. - - Returns: - String explaining why MPV is unavailable, or None if available - """ - return _MPV_UNAVAILABLE_REASON if not is_mpv_available() else None + """Get the reason why MPV is unavailable.""" + return _SERVICE_STATE["mpv"]["reason"] if not is_mpv_available() else None def is_mpv_check_complete() -> bool: - """Check if the MPV health check has been completed. - - Returns: - True if health check has run, False if still pending - """ - return _MPV_CHECK_COMPLETE + """Check if the MPV health check has been completed.""" + return _SERVICE_STATE["mpv"]["complete"] def disable_mpv_features() -> None: - """Manually disable all MPV features (for testing/fallback). - - This can be called if MPV connectivity is lost after startup. - """ - global _MPV_AVAILABLE, _MPV_UNAVAILABLE_REASON - _MPV_AVAILABLE = False - _MPV_UNAVAILABLE_REASON = "Manually disabled or lost connection" + """Manually disable all MPV features (for testing/fallback).""" + global _SERVICE_STATE + _SERVICE_STATE["mpv"]["available"] = False + _SERVICE_STATE["mpv"]["reason"] = "Manually disabled or lost connection" logger.warning("[MPV] Features manually disabled") def enable_mpv_features() -> None: - """Manually enable MPV features (for testing/fallback). - - This can be called if MPV connectivity is restored after startup. - """ - global _MPV_AVAILABLE, _MPV_UNAVAILABLE_REASON - _MPV_AVAILABLE = True - _MPV_UNAVAILABLE_REASON = None + """Manually enable MPV features (for testing/fallback).""" + global _SERVICE_STATE + _SERVICE_STATE["mpv"]["available"] = True + _SERVICE_STATE["mpv"]["reason"] = None logger.info("[MPV] Features manually enabled") def is_matrix_available() -> bool: - """Check if Matrix is available (from cached health check). - - Returns: - True if Matrix is available, False otherwise - """ - return _MATRIX_AVAILABLE is True + """Check if Matrix is available (from cached health check).""" + return _SERVICE_STATE["matrix"]["available"] is True def get_matrix_unavailable_reason() -> Optional[str]: - """Get the reason why Matrix is unavailable. - - Returns: - String explaining why Matrix is unavailable, or None if available - """ - return _MATRIX_UNAVAILABLE_REASON if not is_matrix_available() else None + """Get the reason why Matrix is unavailable.""" + return _SERVICE_STATE["matrix"]["reason"] if not is_matrix_available() else None def is_matrix_check_complete() -> bool: - """Check if the Matrix health check has been completed. - - Returns: - True if health check has run, False if still pending - """ - return _MATRIX_CHECK_COMPLETE + """Check if the Matrix health check has been completed.""" + return _SERVICE_STATE["matrix"]["complete"] def disable_matrix_features() -> None: - """Manually disable all Matrix features (for testing/fallback). - - This can be called if Matrix connectivity is lost after startup. - """ - global _MATRIX_AVAILABLE, _MATRIX_UNAVAILABLE_REASON - _MATRIX_AVAILABLE = False - _MATRIX_UNAVAILABLE_REASON = "Manually disabled or lost connection" + """Manually disable all Matrix features (for testing/fallback).""" + global _SERVICE_STATE + _SERVICE_STATE["matrix"]["available"] = False + _SERVICE_STATE["matrix"]["reason"] = "Manually disabled or lost connection" logger.warning("[Matrix] Features manually disabled") def enable_matrix_features() -> None: - """Manually enable Matrix features (for testing/fallback). - - This can be called if Matrix connectivity is restored after startup. - """ - global _MATRIX_AVAILABLE, _MATRIX_UNAVAILABLE_REASON - _MATRIX_AVAILABLE = True - _MATRIX_UNAVAILABLE_REASON = None + """Manually enable Matrix features (for testing/fallback).""" + global _SERVICE_STATE + _SERVICE_STATE["matrix"]["available"] = True + _SERVICE_STATE["matrix"]["reason"] = None logger.info("[Matrix] Features manually enabled") -def initialize_local_library_scan(config: Dict[str, Any]) -> None: - """Initialize and scan local library at startup. +def initialize_local_library_scan(config: Dict[str, Any], emit_debug: bool = True) -> Tuple[bool, str]: + """Initialize and scan all folder stores at startup. + + Returns a tuple of (success, detail_message). - This ensures that any new files in the local library folder are indexed + Note: Individual store results are stored in _SERVICE_STATE["folder_stores"] + for the CLI to display as separate table rows. + + This ensures that any new files in configured folder stores are indexed and their sidecar files are imported and cleaned up. """ - from config import get_local_storage_path - from helper.local_library import LocalLibraryInitializer + from helper.folder_store import LocalLibraryInitializer + from helper.store import Folder - logger.info("[Startup] Starting Local Library scan...") + logger.info("[Startup] Starting folder store scans...") try: - storage_path = get_local_storage_path(config) - if not storage_path: - debug("⚠️ Local Library: SKIPPED - No storage path configured", file=sys.stderr) - return + # Get all configured folder stores from config + folder_sources = config.get("store", {}).get("folder", {}) + if not isinstance(folder_sources, dict) or not folder_sources: + if emit_debug: + debug("⚠️ Folder stores: SKIPPED - No folder stores configured", file=sys.stderr) + return False, "No folder stores configured" + + results = [] + total_new_files = 0 + total_sidecars = 0 + failed_stores = [] + store_results = {} + + for store_name, store_config in folder_sources.items(): + if not isinstance(store_config, dict): + continue - debug(f"Scanning local library at: {storage_path}", file=sys.stderr) - initializer = LocalLibraryInitializer(storage_path) - stats = initializer.scan_and_index() + store_path = store_config.get("path") + if not store_path: + continue + + try: + from pathlib import Path + storage_path = Path(str(store_path)).expanduser() + + if emit_debug: + debug(f"Scanning folder store '{store_name}' at: {storage_path}", file=sys.stderr) + + # Migrate the folder store to hash-based naming (only runs once per location) + Folder.migrate_location(str(storage_path)) + + initializer = LocalLibraryInitializer(storage_path) + stats = initializer.scan_and_index() + + # Accumulate stats + new_files = stats.get('files_new', 0) + sidecars = stats.get('sidecars_imported', 0) + total_new_files += new_files + total_sidecars += sidecars + + # Record result for this store + if new_files > 0 or sidecars > 0: + result_detail = f"New: {new_files}, Sidecars: {sidecars}" + if emit_debug: + debug(f" {store_name}: {result_detail}", file=sys.stderr) + else: + result_detail = "Up to date" + if emit_debug: + debug(f" {store_name}: {result_detail}", file=sys.stderr) + + results.append(f"{store_name}: {result_detail}") + store_results[store_name] = { + "path": str(storage_path), + "detail": result_detail, + "ok": True + } + + except Exception as e: + logger.error(f"[Startup] Failed to scan folder store '{store_name}': {e}", exc_info=True) + if emit_debug: + debug(f" {store_name}: ERROR - {e}", file=sys.stderr) + failed_stores.append(store_name) + store_results[store_name] = { + "path": str(store_config.get("path", "?")), + "detail": f"ERROR - {e}", + "ok": False + } - # Log summary - new_files = stats.get('files_new', 0) - sidecars = stats.get('sidecars_imported', 0) + # Store individual results for CLI to display + _SERVICE_STATE["folder_stores"] = store_results - if new_files > 0 or sidecars > 0: - debug(f"✅ Local Library: Scanned - New files: {new_files}, Sidecars imported: {sidecars}", file=sys.stderr) + # Build detail message + if failed_stores: + detail = f"Scanned {len(results)} stores ({len(failed_stores)} failed); Total new: {total_new_files}, Sidecars: {total_sidecars}" + if emit_debug: + debug(f"Folder stores scan complete: {detail}", file=sys.stderr) + return len(failed_stores) < len(results), detail else: - debug("✅ Local Library: Up to date", file=sys.stderr) + detail = f"Scanned {len(results)} stores; Total new: {total_new_files}, Sidecars: {total_sidecars}" + if emit_debug: + debug(f"Folder stores scan complete: {detail}", file=sys.stderr) + return True, detail except Exception as e: - logger.error(f"[Startup] Failed to scan local library: {e}", exc_info=True) - debug(f"⚠️ Local Library: ERROR - Scan failed: {e}", file=sys.stderr) + logger.error(f"[Startup] Failed to scan folder stores: {e}", exc_info=True) + if emit_debug: + debug(f"⚠️ Folder stores: ERROR - Scan failed: {e}", file=sys.stderr) + return False, f"Scan failed: {e}" -def initialize_cookies_check() -> None: - """Check for cookies.txt in the application root directory.""" +def initialize_cookies_check(emit_debug: bool = True) -> Tuple[bool, str]: + """Check for cookies.txt in the application root directory. + + Returns a tuple of (found, detail_message). + """ global _COOKIES_FILE_PATH # Assume CLI.py is in the root @@ -590,10 +496,12 @@ def initialize_cookies_check() -> None: if cookies_path.exists(): _COOKIES_FILE_PATH = str(cookies_path) - debug(f"✅ Cookies: ENABLED - Found cookies.txt", file=sys.stderr) + if emit_debug: + debug(f"Cookies: ENABLED - Found cookies.txt", file=sys.stderr) + return True, str(cookies_path) else: _COOKIES_FILE_PATH = None - # debug("ℹ️ Cookies: Using browser cookies (fallback)", file=sys.stderr) + return False, "Not found" def get_cookies_file_path() -> Optional[str]: diff --git a/metadata.py b/metadata.py index f0ce446..56d94dc 100644 --- a/metadata.py +++ b/metadata.py @@ -28,6 +28,11 @@ except ImportError: # pragma: no cover load_config = None # type: ignore[assignment] resolve_output_dir = None # type: ignore[assignment] +try: + from helper.utils import sha256_file +except ImportError: # pragma: no cover + sha256_file = None # type: ignore[assignment] + try: from helpers.hydrus import HydrusClient, HydrusRequestError, HydrusRequestSpec # type: ignore except ImportError: # pragma: no cover @@ -45,6 +50,33 @@ else: # pragma: no cover _CURRENT_RELATIONSHIP_TRACKER = FileRelationshipTracker() +def field(obj: Any, name: str, value: Any = None) -> Any: + """Get or set a field on dict or object. + + Args: + obj: Dict or object to access + name: Field name + value: If None, gets the field; if not None, sets it and returns the value + + Returns: + The field value (when getting) or the value (when setting) + """ + if value is None: + # Get mode + if isinstance(obj, dict): + return obj.get(name) + else: + return getattr(obj, name, None) + else: + # Set mode + if isinstance(obj, dict): + obj[name] = value + else: + setattr(obj, name, value) + return value + + + def _generate_hydrus_url_variants(url: str) -> List[str]: seen: Set[str] = set() variants: List[str] = [] @@ -99,105 +131,136 @@ def value_normalize(value: str) -> str: def import_pending_sidecars(db_root: Path, db: Any) -> None: - """Import any .tags or .metadata sidecars that exist in the filesystem. - - Scans for sidecar files (.tags, .metadata, .notes) and imports their contents - into the database as tags and metadata for the associated files. - - Args: - db_root: Root directory to search for sidecar files - db: LocalLibraryDB instance to import metadata into - """ + """Import pending sidecars (.tag/.tags/.metadata/.notes) into the database.""" try: - sidecar_patterns = ['**/*.tags', '**/*.metadata', '**/*.notes'] - + sidecar_patterns = ['**/*.tag', '**/*.tags', '**/*.metadata', '**/*.notes'] + for pattern in sidecar_patterns: for sidecar_path in db_root.glob(pattern): if '.downlow' in sidecar_path.parts: continue - - if sidecar_path.suffix == '.tags': - orig_path = sidecar_path.parent / sidecar_path.name[:-5] - elif sidecar_path.suffix == '.metadata': - orig_path = sidecar_path.parent / sidecar_path.name[:-9] - elif sidecar_path.suffix == '.notes': - orig_path = sidecar_path.parent / sidecar_path.name[:-6] - else: + + try: + base_path = sidecar_path.with_suffix('') + except Exception: continue - - if not orig_path.exists(): + + if not base_path.exists(): continue - + + # Ensure file entry exists try: cursor = db.connection.cursor() if db.connection else None if cursor: - cursor.execute('SELECT id FROM files WHERE file_path = ?', (str(orig_path),)) + cursor.execute('SELECT id FROM files WHERE file_path = ?', (str(base_path),)) result = cursor.fetchone() file_id = result[0] if result else None except Exception: file_id = None - + if not file_id: try: cursor = db.connection.cursor() if db.connection else None if cursor: cursor.execute( 'INSERT INTO files (file_path, indexed_at, updated_at) VALUES (?, datetime("now"), datetime("now"))', - (str(orig_path),) + (str(base_path),) ) db.connection.commit() file_id = cursor.lastrowid except Exception: continue - - if sidecar_path.suffix == '.tags' and file_id: + + if not file_id: + continue + + if sidecar_path.suffix in {'.tag', '.tags'}: try: - with open(sidecar_path, 'r', encoding='utf-8') as f: - content = f.read().strip() - - if content: - if '\n' in content: - tags = [tag.strip() for tag in content.split('\n') if tag.strip()] - else: - tags = [tag.strip() for tag in content.split(',') if tag.strip()] - + content = sidecar_path.read_text(encoding='utf-8') + except Exception: + continue + + tags = [line.strip() for line in content.splitlines() if line.strip()] + if tags: + try: cursor = db.connection.cursor() if db.connection else None if cursor: for tag in tags: cursor.execute( - 'INSERT OR IGNORE INTO tags (file_id, tag, tag_type) VALUES (?, ?, ?)', - (file_id, tag, 'sidecar_import') + 'INSERT OR IGNORE INTO tags (hash, tag) VALUES (?, ?)', + (file_hash_value, tag) if hasattr(db, 'get_file_hash') else (None, tag) ) db.connection.commit() - - sidecar_path.unlink() - except Exception: - pass - - elif sidecar_path.suffix == '.metadata' and file_id: + except Exception: + pass + + elif sidecar_path.suffix == '.metadata': + url: List[str] = [] + relationships: List[str] = [] + hash_value: Optional[str] = None + + try: + content = sidecar_path.read_text(encoding='utf-8') + except Exception: + content = '' + + for raw_line in content.splitlines(): + line = raw_line.strip() + if not line or line.startswith('#'): + continue + lower = line.lower() + if lower.startswith('hash:'): + hash_value = line.split(':', 1)[1].strip() or None + elif lower.startswith('url:') or lower.startswith('url:'): + url_part = line.split(':', 1)[1].strip() + if url_part: + for url_segment in url_part.replace(',', ' ').split(): + clean = url_segment.strip() + if clean and clean not in url: + url.append(clean) + elif lower.startswith('relationship:'): + rel_value = line.split(':', 1)[1].strip() + if rel_value: + relationships.append(rel_value) + + if sha256_file and base_path.exists(): + try: + hash_value = sha256_file(base_path) + except Exception: + pass + try: - with open(sidecar_path, 'r', encoding='utf-8') as f: - metadata_dict = json.load(f) - cursor = db.connection.cursor() if db.connection else None - if cursor and metadata_dict: + if cursor: cursor.execute( - 'INSERT OR REPLACE INTO metadata (file_id, hash, size, ext, duration, media_type, time_imported, time_modified) VALUES (?, ?, ?, ?, ?, ?, datetime("now"), datetime("now"))', + 'INSERT OR REPLACE INTO metadata (file_id, hash, url, relationships, time_imported, time_modified) VALUES (?, ?, ?, ?, datetime("now"), datetime("now"))', ( file_id, - metadata_dict.get('hash'), - metadata_dict.get('size'), - metadata_dict.get('ext'), - metadata_dict.get('duration'), - metadata_dict.get('media_type'), + hash_value, + json.dumps(url), + json.dumps(relationships), ) ) db.connection.commit() - - sidecar_path.unlink() except Exception: pass - + + elif sidecar_path.suffix == '.notes': + try: + content = sidecar_path.read_text(encoding='utf-8').strip() + except Exception: + content = '' + if content: + try: + cursor = db.connection.cursor() if db.connection else None + if cursor: + cursor.execute( + 'INSERT INTO notes (file_id, note, created_at, updated_at) VALUES (?, ?, datetime("now"), datetime("now")) ON CONFLICT(file_id) DO UPDATE SET note = excluded.note, updated_at = datetime("now")', + (file_id, content) + ) + db.connection.commit() + except Exception: + pass except Exception: pass @@ -332,7 +395,7 @@ def imdb_tag(imdb_id: str) -> Dict[str, object]: break if cast_names: _extend_tags(tags, "cast", cast_names) - return PipeObject("imdb", canonical_id, tags=tags).to_dict() + return {"source": "imdb", "id": canonical_id, "tags": tags} def fetch_musicbrainz_tags(mbid: str, entity: str) -> Dict[str, object]: if not musicbrainzngs: raise RuntimeError("musicbrainzngs package is not available") @@ -388,7 +451,7 @@ def fetch_musicbrainz_tags(mbid: str, entity: str) -> Dict[str, object]: for genre in genre_list: if isinstance(genre, dict) and genre.get("name"): _add_tag(tags, "genre", genre["name"]) - return PipeObject("musicbrainz", mbid, tags=tags, extra={"entity": entity}).to_dict() + return {"source": "musicbrainz", "id": mbid, "tags": tags, "entity": entity} def fetch_openlibrary_tags(ol_id: str) -> Dict[str, object]: @@ -510,7 +573,7 @@ def fetch_openlibrary_tags(ol_id: str) -> Dict[str, object]: description = description.get("value") _add_tag(tags, "summary", description) - return PipeObject("openlibrary", ol_id, tags=tags).to_dict() + return {"source": "openlibrary", "id": ol_id, "tags": tags} def _append_unique(target: List[str], seen: Set[str], value: Optional[str]) -> None: @@ -655,19 +718,19 @@ def resolve_remote_metadata(payload: Dict[str, Any]) -> Dict[str, Any]: mpv_meta_candidate = payload.get('mpv_metadata') mpv_metadata = mpv_meta_candidate if isinstance(mpv_meta_candidate, dict) else None result_tags = bundle.get('tags') or existing_tags - result = PipeObject( - source='remote-metadata', - identifier=sanitized or 'unknown', - tags=result_tags, - title=bundle.get('title'), - source_url=bundle.get('source_url') or sanitized, - duration=bundle.get('duration'), - metadata=merged_metadata, - remote_metadata=remote_info, - warnings=warnings, - mpv_metadata=mpv_metadata, - ) - return result.to_serializable() + result = { + 'source': 'remote-metadata', + 'id': sanitized or 'unknown', + 'tags': result_tags, + 'title': bundle.get('title'), + 'source_url': bundle.get('source_url') or sanitized, + 'duration': bundle.get('duration'), + 'metadata': merged_metadata, + 'remote_metadata': remote_info, + 'warnings': warnings, + 'mpv_metadata': mpv_metadata, + } + return result def _ensure_hydrus_client() -> None: @@ -890,7 +953,7 @@ def _build_hydrus_query( query['include_file_relationships'] = json.dumps(True) if not minimal: extras = ( - 'include_known_urls', + 'include_url', 'include_size', 'include_width', 'include_height', @@ -1140,7 +1203,7 @@ def fetch_hydrus_metadata_by_url(payload: Dict[str, Any]) -> Dict[str, Any]: assert HydrusRequestSpec is not None spec = HydrusRequestSpec( method='GET', - endpoint='/add_urls/get_url_files', + endpoint='/add_url/get_url_files', query={'url': candidate}, ) try: @@ -1265,16 +1328,27 @@ def _normalise_string_list(values: Optional[Iterable[Any]]) -> List[str]: def _derive_sidecar_path(media_path: Path) -> Path: + """Return preferred sidecar path (.tag), falling back to legacy .tags if it exists. + + Keeps backward compatibility by preferring existing .tags, but new writes use .tag. + """ try: - return media_path.parent / (media_path.name + '.tags') + preferred = media_path.parent / (media_path.name + '.tag') + legacy = media_path.parent / (media_path.name + '.tags') except ValueError: - return media_path.with_name(media_path.name + '.tags') + preferred = media_path.with_name(media_path.name + '.tag') + legacy = media_path.with_name(media_path.name + '.tags') + + # Prefer legacy if it already exists to avoid duplicate sidecars + if legacy.exists(): + return legacy + return preferred def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str], List[str]]: - """Read hash, tags, and known_urls from .tags sidecar file. + """Read hash, tags, and url from .tags sidecar file. - Consolidated with read_tags_from_file - this extracts extra metadata (hash, urls). + Consolidated with read_tags_from_file - this extracts extra metadata (hash, url). """ if not sidecar_path.exists(): return None, [], [] @@ -1285,7 +1359,7 @@ def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str] hash_value: Optional[str] = None tags: List[str] = [] - known_urls: List[str] = [] + url: List[str] = [] for raw_line in raw.splitlines(): line = raw_line.strip() @@ -1295,20 +1369,20 @@ def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str] lower = line.lower() if lower.startswith('hash:'): hash_value = line.split(':', 1)[1].strip() if ':' in line else '' - elif lower.startswith('known_url:') or lower.startswith('url:'): - # Parse URLs (handle legacy 'url:' format) - urls_part = line.split(':', 1)[1].strip() if ':' in line else '' - if urls_part: - for url_segment in urls_part.split(','): + elif lower.startswith('url:') or lower.startswith('url:'): + # Parse url (handle legacy 'url:' format) + url_part = line.split(':', 1)[1].strip() if ':' in line else '' + if url_part: + for url_segment in url_part.split(','): for url in url_segment.split(): url_clean = url.strip() - if url_clean and url_clean not in known_urls: - known_urls.append(url_clean) + if url_clean and url_clean not in url: + url.append(url_clean) else: # Everything else is a tag (including relationship: lines) tags.append(line) - return hash_value, tags, known_urls + return hash_value, tags, url @@ -1387,35 +1461,22 @@ def rename(file_path: Path, tags: Iterable[str]) -> Optional[Path]: return None -def write_tags(media_path: Path, tags: Iterable[str], known_urls: Iterable[str], hash_value: Optional[str] = None, db=None) -> None: - """Write tags and metadata to database or sidecar file. +def write_tags(media_path: Path, tags: Iterable[str], url: Iterable[str], hash_value: Optional[str] = None, db=None) -> None: + """Write tags to database or sidecar file (tags only). - If db is provided, inserts into LocalLibraryDB and skips sidecar file creation. - Otherwise, creates .tags sidecar file with name: media.ext.tags (e.g., song.mp3.tags) - - Args: - media_path: Path to the media file - tags: Iterable of tag strings - known_urls: Iterable of known URL strings - hash_value: Optional hash value for the file - db: Optional LocalLibraryDB instance. If provided, skips sidecar creation. + Hash/URL data is no longer written to the tag sidecar; it belongs in metadata. + If db is provided, inserts tags only into LocalLibraryDB. Otherwise, writes .tag sidecar. """ if media_path.exists() and media_path.is_dir(): raise ValueError(f"write_tags_sidecar: media_path is a directory: {media_path}") - # Prepare tags lines and convert to list if needed + # Prepare tags lines and convert to list if needed (tags only) tag_list = list(tags) if not isinstance(tags, list) else tags - url_list = list(known_urls) if not isinstance(known_urls, list) else known_urls # If database provided, insert directly and skip sidecar if db is not None: try: - # Build tag list with hash and known_urls - db_tags = [] - if hash_value: - db_tags.append(f"hash:{hash_value}") - db_tags.extend(str(tag).strip() for tag in tag_list if str(tag).strip()) - db_tags.extend(f"known_url:{str(url).strip()}" for url in url_list if str(url).strip()) + db_tags = [str(tag).strip() for tag in tag_list if str(tag).strip()] if db_tags: db.add_tags(media_path, db_tags) @@ -1427,25 +1488,22 @@ def write_tags(media_path: Path, tags: Iterable[str], known_urls: Iterable[str], # Create sidecar path try: - sidecar = media_path.parent / (media_path.name + '.tags') + sidecar = media_path.parent / (media_path.name + '.tag') except Exception: - sidecar = media_path.with_name(media_path.name + '.tags') + sidecar = media_path.with_name(media_path.name + '.tag') # Handle edge case: empty/invalid base name try: - if not sidecar.stem or sidecar.name in {'.tags', '-.tags', '_.tags'}: + if not sidecar.stem or sidecar.name in {'.tag', '-.tag', '_.tag'}: fallback_base = media_path.stem or _sanitize_title_for_filename(extract_title(tag_list) or '') or 'untitled' - sidecar = media_path.parent / f"{fallback_base}.tags" + sidecar = media_path.parent / f"{fallback_base}.tag" except Exception: pass # Write via consolidated function try: lines = [] - if hash_value: - lines.append(f"hash:{hash_value}") lines.extend(str(tag).strip() for tag in tag_list if str(tag).strip()) - lines.extend(f"known_url:{str(url).strip()}" for url in url_list if str(url).strip()) if lines: sidecar.write_text("\n".join(lines) + "\n", encoding="utf-8") @@ -1467,16 +1525,16 @@ def write_tags(media_path: Path, tags: Iterable[str], known_urls: Iterable[str], debug(f"Failed to write tag sidecar {sidecar}: {exc}", file=sys.stderr) -def write_metadata(media_path: Path, hash_value: Optional[str] = None, known_urls: Optional[Iterable[str]] = None, relationships: Optional[Iterable[str]] = None, db=None) -> None: +def write_metadata(media_path: Path, hash_value: Optional[str] = None, url: Optional[Iterable[str]] = None, relationships: Optional[Iterable[str]] = None, db=None) -> None: """Write metadata to database or sidecar file. If db is provided, inserts into LocalLibraryDB and skips sidecar file creation. - Otherwise, creates .metadata sidecar file with hash, URLs, and relationships. + Otherwise, creates .metadata sidecar file with hash, url, and relationships. Args: media_path: Path to the media file hash_value: Optional hash value for the file - known_urls: Optional iterable of known URL strings + url: Optional iterable of known URL strings relationships: Optional iterable of relationship strings db: Optional LocalLibraryDB instance. If provided, skips sidecar creation. """ @@ -1484,7 +1542,7 @@ def write_metadata(media_path: Path, hash_value: Optional[str] = None, known_url raise ValueError(f"write_metadata_sidecar: media_path is a directory: {media_path}") # Prepare metadata lines - url_list = list(known_urls) if known_urls else [] + url_list = list(url) if url else [] rel_list = list(relationships) if relationships else [] # If database provided, insert directly and skip sidecar @@ -1496,7 +1554,8 @@ def write_metadata(media_path: Path, hash_value: Optional[str] = None, known_url db_tags.append(f"hash:{hash_value}") for url in url_list: if str(url).strip(): - db_tags.append(f"known_url:{str(url).strip()}") + clean = str(url).strip() + db_tags.append(f"url:{clean}") for rel in rel_list: if str(rel).strip(): db_tags.append(f"relationship:{str(rel).strip()}") @@ -1522,10 +1581,11 @@ def write_metadata(media_path: Path, hash_value: Optional[str] = None, known_url if hash_value: lines.append(f"hash:{hash_value}") - # Add known URLs + # Add known url for url in url_list: if str(url).strip(): - lines.append(f"known_url:{str(url).strip()}") + clean = str(url).strip() + lines.append(f"url:{clean}") # Add relationships for rel in rel_list: @@ -1664,7 +1724,7 @@ def sync_sidecar(payload: Dict[str, Any]) -> Dict[str, Any]: if hash_input: hash_value = _normalize_hash(hash_input) tags = _normalise_string_list(payload.get('tags')) - known_urls = _normalise_string_list(payload.get('known_urls')) + url = _normalise_string_list(payload.get('url')) if media_path is not None: sidecar_path = _derive_sidecar_path(media_path) search_roots = _collect_search_roots(payload) @@ -1680,15 +1740,15 @@ def sync_sidecar(payload: Dict[str, Any]) -> Dict[str, Any]: 'error': 'not_found', 'hash': hash_value, 'tags': tags, - 'known_urls': known_urls, + 'url': url, } else: raise ValueError('path or hash is required to synchronise sidecar') existing_hash, existing_tags, existing_known = _read_sidecar_metadata(sidecar_path) if not tags: tags = existing_tags - if not known_urls: - known_urls = existing_known + if not url: + url = existing_known hash_line = hash_value or existing_hash title_value: Optional[str] = None for tag in tags: @@ -1702,7 +1762,7 @@ def sync_sidecar(payload: Dict[str, Any]) -> Dict[str, Any]: if hash_line: lines.append(f'hash:{hash_line}') lines.extend(tags) - lines.extend(f'known_url:{url}' for url in known_urls) + lines.extend(f'url:{url}' for url in url) sidecar_path.parent.mkdir(parents=True, exist_ok=True) if lines: sidecar_path.write_text('\n'.join(lines) + '\n', encoding='utf-8') @@ -1715,7 +1775,7 @@ def sync_sidecar(payload: Dict[str, Any]) -> Dict[str, Any]: 'path': str(sidecar_path), 'hash': hash_line, 'tags': [], - 'known_urls': [], + 'url': [], 'deleted': True, 'title': title_value, } @@ -1723,7 +1783,7 @@ def sync_sidecar(payload: Dict[str, Any]) -> Dict[str, Any]: 'path': str(sidecar_path), 'hash': hash_line, 'tags': tags, - 'known_urls': known_urls, + 'url': url, 'title': title_value, } @@ -2220,14 +2280,14 @@ def embed_metadata_in_file( 'genre': 'genre', 'composer': 'composer', 'comment': 'comment', - 'known_url': 'comment', # Embed known URLs in comment field + 'url': 'comment', # Embed known url in comment field 'creator': 'artist', # Map creator to artist 'channel': 'album_artist', # Map channel to album_artist } # Extract metadata from tags metadata = {} - comments = [] # Collect comments (including URLs) + comments = [] # Collect comments (including url) for tag in tags: tag_str = str(tag).strip() if ':' in tag_str: @@ -2236,8 +2296,8 @@ def embed_metadata_in_file( value = value.strip() if namespace in tag_map and value: ffmpeg_key = tag_map[namespace] - if namespace == 'known_url': - # Collect URLs as comments + if namespace == 'url': + # Collect url as comments comments.append(f"URL: {value}") elif ffmpeg_key == 'comment': # Collect other comment-type tags @@ -2294,7 +2354,7 @@ def embed_metadata_in_file( # Replace original with temp file file_path.unlink() temp_file.rename(file_path) - debug(f"✅ Embedded metadata in file: {file_path.name}", file=sys.stderr) + debug(f"Embedded metadata in file: {file_path.name}", file=sys.stderr) return True else: # Clean up temp file if it exists @@ -2323,7 +2383,7 @@ def write_tags_to_file( file_path: Path, tags: List[str], source_hashes: Optional[List[str]] = None, - known_urls: Optional[List[str]] = None, + url: Optional[List[str]] = None, append: bool = False ) -> bool: """Write tags to .tags sidecar file. @@ -2335,7 +2395,7 @@ def write_tags_to_file( file_path: Path to .tags file (will be created if doesn't exist) tags: List of tags to write source_hashes: Optional source file hashes (written as source:hash1,hash2) - known_urls: Optional known URLs (each written on separate line as known_url:url) + url: Optional known url (each written on separate line as url:url) append: If True, append to existing file; if False, overwrite (default) Returns: @@ -2359,10 +2419,10 @@ def write_tags_to_file( if source_hashes: content_lines.append(f"source:{','.join(source_hashes)}") - # Add known URLs if provided - each on separate line to prevent corruption - if known_urls: - for url in known_urls: - content_lines.append(f"known_url:{url}") + # Add known url if provided - each on separate line to prevent corruption + if url: + for url in url: + content_lines.append(f"url:{url}") # Add tags if tags: @@ -3231,3 +3291,608 @@ def format_playlist_entry(entry: Dict[str, Any], index: int, extractor: str) -> result["release_date"] = entry.get("release_date", "") return result + + +# ============================================================================ +# Metadata helper functions for tag processing and scraping +# ============================================================================ + +def extract_title_from_tags(tags_list: List[str]) -> Optional[str]: + """Extract title from tags list.""" + try: + extracted = extract_title(tags_list) + if extracted: + return extracted + except Exception: + pass + + for t in tags_list: + if isinstance(t, str) and t.lower().startswith("title:"): + val = t.split(":", 1)[1].strip() + if val: + return val + return None + + +def summarize_tags(tags_list: List[str], limit: int = 8) -> str: + """Create a summary of tags for display.""" + shown = [t for t in tags_list[:limit] if t] + summary = ", ".join(shown) + remaining = max(0, len(tags_list) - len(shown)) + if remaining > 0: + summary = f"{summary} (+{remaining} more)" if summary else f"(+{remaining} more)" + if len(summary) > 200: + summary = summary[:197] + "..." + return summary + + +def extract_scrapable_identifiers(tags_list: List[str]) -> Dict[str, str]: + """Extract scrapable identifiers from tags.""" + identifiers = {} + scrapable_prefixes = { + 'openlibrary', 'isbn', 'isbn_10', 'isbn_13', + 'musicbrainz', 'musicbrainzalbum', 'imdb', 'tmdb', 'tvdb' + } + + for tag in tags_list: + if not isinstance(tag, str) or ':' not in tag: + continue + + parts = tag.split(':', 1) + if len(parts) != 2: + continue + + key_raw = parts[0].strip().lower() + key = key_raw.replace('-', '_') + if key == 'isbn10': + key = 'isbn_10' + elif key == 'isbn13': + key = 'isbn_13' + value = parts[1].strip() + + # Normalize ISBN values by removing hyphens for API friendliness + if key.startswith('isbn'): + value = value.replace('-', '') + + if key in scrapable_prefixes and value: + identifiers[key] = value + + return identifiers + + +def extract_tag_value(tags_list: List[str], namespace: str) -> Optional[str]: + """Get first tag value for a namespace (e.g., artist:, title:).""" + ns = namespace.lower() + for tag in tags_list: + if not isinstance(tag, str) or ':' not in tag: + continue + prefix, _, value = tag.partition(':') + if prefix.strip().lower() != ns: + continue + candidate = value.strip() + if candidate: + return candidate + return None + + +def scrape_url_metadata(url: str) -> Tuple[Optional[str], List[str], List[Tuple[str, str]], List[Dict[str, Any]]]: + """Scrape metadata from a URL using yt-dlp. + + Returns: + (title, tags, formats, playlist_items) tuple where: + - title: Video/content title + - tags: List of extracted tags (both namespaced and freeform) + - formats: List of (display_label, format_id) tuples + - playlist_items: List of playlist entry dicts (empty if not a playlist) + """ + try: + import json as json_module + + try: + from metadata import extract_ytdlp_tags + except ImportError: + extract_ytdlp_tags = None + + # Build yt-dlp command with playlist support + # IMPORTANT: Do NOT use --flat-playlist! It strips metadata like artist, album, uploader, genre + # Without it, yt-dlp gives us full metadata in an 'entries' array within a single JSON object + # This ensures we get album-level metadata from sources like BandCamp, YouTube Music, etc. + cmd = [ + "yt-dlp", + "-j", # Output JSON + "--no-warnings", + "--playlist-items", "1-10", # Get first 10 items if it's a playlist (provides entries) + "-f", "best", + url + ] + + result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) + + if result.returncode != 0: + log(f"yt-dlp error: {result.stderr}", file=sys.stderr) + return None, [], [], [] + + # Parse JSON output - WITHOUT --flat-playlist, we get ONE JSON object with 'entries' array + # This gives us full metadata instead of flat format + lines = result.stdout.strip().split('\n') + if not lines or not lines[0]: + log("yt-dlp returned empty output", file=sys.stderr) + return None, [], [], [] + + # Parse the single JSON object + try: + data = json_module.loads(lines[0]) + except json_module.JSONDecodeError as e: + log(f"Failed to parse yt-dlp JSON: {e}", file=sys.stderr) + return None, [], [], [] + + # Extract title - use the main title + title = data.get('title', 'Unknown') + + # Determine if this is a playlist/album (has entries array) + # is_playlist = 'entries' in data and isinstance(data.get('entries'), list) + + # Extract tags and playlist items + tags = [] + playlist_items = [] + + # IMPORTANT: Extract album/playlist-level tags FIRST (before processing entries) + # This ensures we get metadata about the collection, not just individual tracks + if extract_ytdlp_tags: + album_tags = extract_ytdlp_tags(data) + tags.extend(album_tags) + + # Case 1: Entries are nested in the main object (standard playlist structure) + if 'entries' in data and isinstance(data.get('entries'), list): + entries = data['entries'] + # Build playlist items with title and duration + for idx, entry in enumerate(entries, 1): + if isinstance(entry, dict): + item_title = entry.get('title', entry.get('id', f'Track {idx}')) + item_duration = entry.get('duration', 0) + playlist_items.append({ + 'index': idx, + 'id': entry.get('id', f'track_{idx}'), + 'title': item_title, + 'duration': item_duration, + 'url': entry.get('url') or entry.get('webpage_url', ''), + }) + + # Extract tags from each entry and merge (but don't duplicate album-level tags) + # Only merge entry tags that are multi-value prefixes (not single-value like title:, artist:, etc.) + if extract_ytdlp_tags: + entry_tags = extract_ytdlp_tags(entry) + + # Single-value namespaces that should not be duplicated from entries + single_value_namespaces = {'title', 'artist', 'album', 'creator', 'channel', 'release_date', 'upload_date', 'license', 'location'} + + for tag in entry_tags: + # Extract the namespace (part before the colon) + tag_namespace = tag.split(':', 1)[0].lower() if ':' in tag else None + + # Skip if this namespace already exists in tags (from album level) + if tag_namespace and tag_namespace in single_value_namespaces: + # Check if any tag with this namespace already exists in tags + already_has_namespace = any( + t.split(':', 1)[0].lower() == tag_namespace + for t in tags if ':' in t + ) + if already_has_namespace: + continue # Skip this tag, keep the album-level one + + if tag not in tags: # Avoid exact duplicates + tags.append(tag) + + # Case 2: Playlist detected by playlist_count field (BandCamp albums, etc.) + # These need a separate call with --flat-playlist to get the actual entries + elif (data.get('playlist_count') or 0) > 0 and 'entries' not in data: + try: + # Make a second call with --flat-playlist to get the actual tracks + flat_cmd = [ + "yt-dlp", + "-j", + "--no-warnings", + "--flat-playlist", + "-f", "best", + url + ] + flat_result = subprocess.run(flat_cmd, capture_output=True, text=True, timeout=30) + if flat_result.returncode == 0: + flat_lines = flat_result.stdout.strip().split('\n') + # With --flat-playlist, each line is a separate track JSON object + # (not nested in a playlist container), so process ALL lines + for idx, line in enumerate(flat_lines, 1): + if line.strip().startswith('{'): + try: + entry = json_module.loads(line) + item_title = entry.get('title', entry.get('id', f'Track {idx}')) + item_duration = entry.get('duration', 0) + playlist_items.append({ + 'index': idx, + 'id': entry.get('id', f'track_{idx}'), + 'title': item_title, + 'duration': item_duration, + 'url': entry.get('url') or entry.get('webpage_url', ''), + }) + except json_module.JSONDecodeError: + pass + except Exception as e: + pass # Silently ignore if we can't get playlist entries + + + # Fallback: if still no tags detected, get from first item + if not tags and extract_ytdlp_tags: + tags = extract_ytdlp_tags(data) + + # Extract formats from the main data object + formats = [] + if 'formats' in data: + formats = extract_url_formats(data.get('formats', [])) + + # Deduplicate tags by namespace to prevent duplicate title:, artist:, etc. + try: + if dedup_tags_by_namespace: + tags = dedup_tags_by_namespace(tags, keep_first=True) + except Exception: + pass # If dedup fails, return tags as-is + + return title, tags, formats, playlist_items + + except subprocess.TimeoutExpired: + log("yt-dlp timeout (>30s)", file=sys.stderr) + return None, [], [], [] + except Exception as e: + log(f"URL scraping error: {e}", file=sys.stderr) + return None, [], [], [] + + +def extract_url_formats(formats: list) -> List[Tuple[str, str]]: + """Extract best formats from yt-dlp formats list. + + Returns list of (display_label, format_id) tuples. + """ + try: + video_formats = {} # {resolution: format_data} + audio_formats = {} # {quality_label: format_data} + + for fmt in formats: + vcodec = fmt.get('vcodec', 'none') + acodec = fmt.get('acodec', 'none') + height = fmt.get('height') + ext = fmt.get('ext', 'unknown') + format_id = fmt.get('format_id', '') + tbr = fmt.get('tbr', 0) + abr = fmt.get('abr', 0) + + # Video format + if vcodec and vcodec != 'none' and height: + if height < 480: + continue + res_key = f"{height}p" + if res_key not in video_formats or tbr > video_formats[res_key].get('tbr', 0): + video_formats[res_key] = { + 'label': f"{height}p ({ext})", + 'format_id': format_id, + 'tbr': tbr, + } + + # Audio-only format + elif acodec and acodec != 'none' and (not vcodec or vcodec == 'none'): + audio_key = f"audio_{abr}" + if audio_key not in audio_formats or abr > audio_formats[audio_key].get('abr', 0): + audio_formats[audio_key] = { + 'label': f"audio ({ext})", + 'format_id': format_id, + 'abr': abr, + } + + result = [] + + # Add video formats in descending resolution order + for res in sorted(video_formats.keys(), key=lambda x: int(x.replace('p', '')), reverse=True): + fmt = video_formats[res] + result.append((fmt['label'], fmt['format_id'])) + + # Add best audio format + if audio_formats: + best_audio = max(audio_formats.values(), key=lambda x: x.get('abr', 0)) + result.append((best_audio['label'], best_audio['format_id'])) + + return result + + except Exception as e: + log(f"Error extracting formats: {e}", file=sys.stderr) + return [] + + +def scrape_isbn_metadata(isbn: str) -> List[str]: + """Scrape metadata for an ISBN using Open Library API.""" + new_tags = [] + try: + from helper.http_client import HTTPClient + import json as json_module + + isbn_clean = isbn.replace('-', '').strip() + url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json" + + try: + with HTTPClient() as client: + response = client.get(url) + response.raise_for_status() + data = json_module.loads(response.content.decode('utf-8')) + except Exception as e: + log(f"Failed to fetch ISBN metadata: {e}", file=sys.stderr) + return [] + + if not data: + log(f"No ISBN metadata found for: {isbn}") + return [] + + book_data = next(iter(data.values()), None) + if not book_data: + return [] + + if 'title' in book_data: + new_tags.append(f"title:{book_data['title']}") + + if 'authors' in book_data and isinstance(book_data['authors'], list): + for author in book_data['authors'][:3]: + if 'name' in author: + new_tags.append(f"author:{author['name']}") + + if 'publish_date' in book_data: + new_tags.append(f"publish_date:{book_data['publish_date']}") + + if 'publishers' in book_data and isinstance(book_data['publishers'], list): + for pub in book_data['publishers'][:1]: + if 'name' in pub: + new_tags.append(f"publisher:{pub['name']}") + + if 'description' in book_data: + desc = book_data['description'] + if isinstance(desc, dict) and 'value' in desc: + desc = desc['value'] + if desc: + desc_str = str(desc).strip() + # Include description if available (limit to 200 chars to keep it manageable) + if len(desc_str) > 0: + new_tags.append(f"description:{desc_str[:200]}") + + if 'number_of_pages' in book_data: + page_count = book_data['number_of_pages'] + if page_count and isinstance(page_count, int) and page_count > 0: + new_tags.append(f"pages:{page_count}") + + if 'identifiers' in book_data and isinstance(book_data['identifiers'], dict): + identifiers = book_data['identifiers'] + + if 'openlibrary' in identifiers: + ol_ids = identifiers['openlibrary'] + if isinstance(ol_ids, list) and ol_ids: + new_tags.append(f"openlibrary:{ol_ids[0]}") + elif isinstance(ol_ids, str): + new_tags.append(f"openlibrary:{ol_ids}") + + if 'lccn' in identifiers: + lccn_list = identifiers['lccn'] + if isinstance(lccn_list, list) and lccn_list: + new_tags.append(f"lccn:{lccn_list[0]}") + elif isinstance(lccn_list, str): + new_tags.append(f"lccn:{lccn_list}") + + if 'oclc' in identifiers: + oclc_list = identifiers['oclc'] + if isinstance(oclc_list, list) and oclc_list: + new_tags.append(f"oclc:{oclc_list[0]}") + elif isinstance(oclc_list, str): + new_tags.append(f"oclc:{oclc_list}") + + if 'goodreads' in identifiers: + goodreads_list = identifiers['goodreads'] + if isinstance(goodreads_list, list) and goodreads_list: + new_tags.append(f"goodreads:{goodreads_list[0]}") + elif isinstance(goodreads_list, str): + new_tags.append(f"goodreads:{goodreads_list}") + + if 'librarything' in identifiers: + lt_list = identifiers['librarything'] + if isinstance(lt_list, list) and lt_list: + new_tags.append(f"librarything:{lt_list[0]}") + elif isinstance(lt_list, str): + new_tags.append(f"librarything:{lt_list}") + + if 'doi' in identifiers: + doi_list = identifiers['doi'] + if isinstance(doi_list, list) and doi_list: + new_tags.append(f"doi:{doi_list[0]}") + elif isinstance(doi_list, str): + new_tags.append(f"doi:{doi_list}") + + if 'internet_archive' in identifiers: + ia_list = identifiers['internet_archive'] + if isinstance(ia_list, list) and ia_list: + new_tags.append(f"internet_archive:{ia_list[0]}") + elif isinstance(ia_list, str): + new_tags.append(f"internet_archive:{ia_list}") + + log(f"Found {len(new_tags)} tag(s) from ISBN lookup") + return new_tags + except Exception as e: + log(f"ISBN scraping error: {e}", file=sys.stderr) + return [] + + +def scrape_openlibrary_metadata(olid: str) -> List[str]: + """Scrape metadata for an OpenLibrary ID using the .json API endpoint. + + Fetches from https://openlibrary.org/books/{OLID}.json and extracts: + - Title, authors, publish date, publishers + - Description + - Subjects as freeform tags (without namespace prefix) + - Identifiers (ISBN, LCCN, OCLC, etc.) + """ + new_tags = [] + try: + from helper.http_client import HTTPClient + import json as json_module + + # Format: OL9674499M or just 9674499M + olid_clean = olid.replace('OL', '').replace('M', '') + if not olid_clean.isdigit(): + olid_clean = olid + + # Ensure we have the full OLID format for the URL + if not olid.startswith('OL'): + url = f"https://openlibrary.org/books/OL{olid_clean}M.json" + else: + url = f"https://openlibrary.org/books/{olid}.json" + + try: + with HTTPClient() as client: + response = client.get(url) + response.raise_for_status() + data = json_module.loads(response.content.decode('utf-8')) + except Exception as e: + log(f"Failed to fetch OpenLibrary metadata: {e}", file=sys.stderr) + return [] + + if not data: + log(f"No OpenLibrary metadata found for: {olid}") + return [] + + # Add title + if 'title' in data: + new_tags.append(f"title:{data['title']}") + + # Add authors + if 'authors' in data and isinstance(data['authors'], list): + for author in data['authors'][:3]: + if isinstance(author, dict) and 'name' in author: + new_tags.append(f"author:{author['name']}") + elif isinstance(author, str): + new_tags.append(f"author:{author}") + + # Add publish date + if 'publish_date' in data: + new_tags.append(f"publish_date:{data['publish_date']}") + + # Add publishers + if 'publishers' in data and isinstance(data['publishers'], list): + for pub in data['publishers'][:1]: + if isinstance(pub, dict) and 'name' in pub: + new_tags.append(f"publisher:{pub['name']}") + elif isinstance(pub, str): + new_tags.append(f"publisher:{pub}") + + # Add description + if 'description' in data: + desc = data['description'] + if isinstance(desc, dict) and 'value' in desc: + desc = desc['value'] + if desc: + desc_str = str(desc).strip() + if len(desc_str) > 0: + new_tags.append(f"description:{desc_str[:200]}") + + # Add number of pages + if 'number_of_pages' in data: + page_count = data['number_of_pages'] + if page_count and isinstance(page_count, int) and page_count > 0: + new_tags.append(f"pages:{page_count}") + + # Add subjects as FREEFORM tags (no namespace prefix) + if 'subjects' in data and isinstance(data['subjects'], list): + for subject in data['subjects'][:10]: + if subject and isinstance(subject, str): + subject_clean = str(subject).strip() + if subject_clean and subject_clean not in new_tags: + new_tags.append(subject_clean) + + # Add identifiers + if 'identifiers' in data and isinstance(data['identifiers'], dict): + identifiers = data['identifiers'] + + if 'isbn_10' in identifiers: + isbn_10_list = identifiers['isbn_10'] + if isinstance(isbn_10_list, list) and isbn_10_list: + new_tags.append(f"isbn_10:{isbn_10_list[0]}") + elif isinstance(isbn_10_list, str): + new_tags.append(f"isbn_10:{isbn_10_list}") + + if 'isbn_13' in identifiers: + isbn_13_list = identifiers['isbn_13'] + if isinstance(isbn_13_list, list) and isbn_13_list: + new_tags.append(f"isbn_13:{isbn_13_list[0]}") + elif isinstance(isbn_13_list, str): + new_tags.append(f"isbn_13:{isbn_13_list}") + + if 'lccn' in identifiers: + lccn_list = identifiers['lccn'] + if isinstance(lccn_list, list) and lccn_list: + new_tags.append(f"lccn:{lccn_list[0]}") + elif isinstance(lccn_list, str): + new_tags.append(f"lccn:{lccn_list}") + + if 'oclc_numbers' in identifiers: + oclc_list = identifiers['oclc_numbers'] + if isinstance(oclc_list, list) and oclc_list: + new_tags.append(f"oclc:{oclc_list[0]}") + elif isinstance(oclc_list, str): + new_tags.append(f"oclc:{oclc_list}") + + if 'goodreads' in identifiers: + goodreads_list = identifiers['goodreads'] + if isinstance(goodreads_list, list) and goodreads_list: + new_tags.append(f"goodreads:{goodreads_list[0]}") + elif isinstance(goodreads_list, str): + new_tags.append(f"goodreads:{goodreads_list}") + + log(f"Found {len(new_tags)} tag(s) from OpenLibrary lookup") + return new_tags + except Exception as e: + log(f"OpenLibrary scraping error: {e}", file=sys.stderr) + return [] + + +def perform_metadata_scraping(tags_list: List[str]) -> List[str]: + """Perform scraping based on identifiers in tags. + + Priority order: + 1. openlibrary: (preferred - more complete metadata) + 2. isbn_10 or isbn (fallback) + """ + identifiers = extract_scrapable_identifiers(tags_list) + + if not identifiers: + log("No scrapable identifiers found (openlibrary, ISBN, musicbrainz, imdb)") + return [] + + log(f"Found scrapable identifiers: {', '.join(identifiers.keys())}") + + new_tags = [] + + # Prefer OpenLibrary over ISBN (more complete metadata) + if 'openlibrary' in identifiers: + olid = identifiers['openlibrary'] + if olid: + log(f"Scraping OpenLibrary: {olid}") + new_tags.extend(scrape_openlibrary_metadata(olid)) + elif 'isbn_13' in identifiers or 'isbn_10' in identifiers or 'isbn' in identifiers: + isbn = identifiers.get('isbn_13') or identifiers.get('isbn_10') or identifiers.get('isbn') + if isbn: + log(f"Scraping ISBN: {isbn}") + new_tags.extend(scrape_isbn_metadata(isbn)) + + existing_tags_lower = {tag.lower() for tag in tags_list} + scraped_unique = [] + seen = set() + for tag in new_tags: + tag_lower = tag.lower() + if tag_lower not in existing_tags_lower and tag_lower not in seen: + scraped_unique.append(tag) + seen.add(tag_lower) + + if scraped_unique: + log(f"Added {len(scraped_unique)} new tag(s) from scraping") + + return scraped_unique diff --git a/models.py b/models.py index df0433b..1773004 100644 --- a/models.py +++ b/models.py @@ -16,134 +16,183 @@ from typing import Any, Callable, Dict, List, Optional, Protocol, TextIO, Tuple class PipeObject: """Unified pipeline object for tracking files, metadata, tags, and relationships through the pipeline. - This is the single source of truth for all result data in the pipeline. It can represent: - - Tag extraction results (IMDb, MusicBrainz, OpenLibrary lookups) - - Remote metadata fetches - - File operations with metadata/tags and relationship tracking - - Search results - - Files with version relationships (king/alt/related) + This is the single source of truth for all result data in the pipeline. Uses the hash+store + canonical pattern for file identification. Attributes: - source: Source of the object (e.g., 'imdb', 'musicbrainz', 'libgen', 'debrid', 'file', etc.) - identifier: Unique identifier from the source (e.g., IMDb ID, MBID, magnet hash, file hash) + hash: SHA-256 hash of the file (canonical identifier) + store: Storage backend name (e.g., 'default', 'hydrus', 'test', 'home') tags: List of extracted or assigned tags title: Human-readable title if applicable source_url: URL where the object came from duration: Duration in seconds if applicable metadata: Full metadata dictionary from source - remote_metadata: Additional remote metadata warnings: Any warnings or issues encountered - mpv_metadata: MPV-specific metadata if applicable - file_path: Path to the file if this object represents a file - file_hash: SHA-256 hash of the file for integrity and relationship tracking - king_hash: Hash of the primary/master version of this file (for alternates) - alt_hashes: List of hashes for alternate versions of this file - related_hashes: List of hashes for related files (e.g., screenshots, editions) + path: Path to the file if this object represents a file + relationships: Relationship data (king/alt/related hashes) is_temp: If True, this is a temporary/intermediate artifact that may be cleaned up - action: The cmdlet that created this object (format: 'cmdlet:cmdlet_name', e.g., 'cmdlet:get-file') - parent_id: Hash of the parent file in the pipeline chain (for tracking provenance/lineage) + action: The cmdlet that created this object (format: 'cmdlet:cmdlet_name') + parent_hash: Hash of the parent file in the pipeline chain (for tracking provenance/lineage) extra: Additional fields not covered above """ - source: str - identifier: str + hash: str + store: str tags: List[str] = field(default_factory=list) title: Optional[str] = None + url: Optional[str] = None source_url: Optional[str] = None duration: Optional[float] = None metadata: Dict[str, Any] = field(default_factory=dict) - remote_metadata: Optional[Dict[str, Any]] = None warnings: List[str] = field(default_factory=list) - mpv_metadata: Optional[Dict[str, Any]] = None - file_path: Optional[str] = None - file_hash: Optional[str] = None - king_hash: Optional[str] = None - alt_hashes: List[str] = field(default_factory=list) - related_hashes: List[str] = field(default_factory=list) + path: Optional[str] = None + relationships: Dict[str, Any] = field(default_factory=dict) is_temp: bool = False action: Optional[str] = None - parent_id: Optional[str] = None + parent_hash: Optional[str] = None extra: Dict[str, Any] = field(default_factory=dict) - def register_as_king(self, file_hash: str) -> None: - """Register this object as the king (primary) version of a file.""" - self.king_hash = file_hash - - def add_alternate(self, alt_hash: str) -> None: - """Add an alternate version hash for this file.""" - if alt_hash not in self.alt_hashes: - self.alt_hashes.append(alt_hash) - - def add_related(self, related_hash: str) -> None: - """Add a related file hash (e.g., screenshot, edition).""" - if related_hash not in self.related_hashes: - self.related_hashes.append(related_hash) + def add_relationship(self, rel_type: str, rel_hash: str) -> None: + """Add a relationship hash. + + Args: + rel_type: Relationship type ('king', 'alt', 'related') + rel_hash: Hash to add to the relationship + """ + if rel_type not in self.relationships: + self.relationships[rel_type] = [] + + if isinstance(self.relationships[rel_type], list): + if rel_hash not in self.relationships[rel_type]: + self.relationships[rel_type].append(rel_hash) + else: + # Single value (e.g., king), convert to that value + self.relationships[rel_type] = rel_hash def get_relationships(self) -> Dict[str, Any]: """Get all relationships for this object.""" - rels = {} - if self.king_hash: - rels["king"] = self.king_hash - if self.alt_hashes: - rels["alt"] = self.alt_hashes - if self.related_hashes: - rels["related"] = self.related_hashes - return rels + return self.relationships.copy() if self.relationships else {} + + def debug_table(self) -> None: + """Print a formatted debug table showing PipeObject state. + + Only prints when debug logging is enabled. Useful for tracking + object state throughout the pipeline. + """ + try: + from helper.logger import is_debug_enabled, debug + + if not is_debug_enabled(): + return + except Exception: + return + + # Prepare display values + hash_display = self.hash or "N/A" + store_display = self.store or "N/A" + title_display = self.title or "N/A" + tags_display = ", ".join(self.tags[:3]) if self.tags else "[]" + if len(self.tags) > 3: + tags_display += f" (+{len(self.tags) - 3} more)" + file_path_display = self.path or "N/A" + if file_path_display != "N/A" and len(file_path_display) > 50: + file_path_display = "..." + file_path_display[-47:] + + url_display = self.url or "N/A" + if url_display != "N/A" and len(url_display) > 48: + url_display = url_display[:45] + "..." + + relationships_display = "N/A" + if self.relationships: + rel_parts = [] + for key, val in self.relationships.items(): + if isinstance(val, list): + rel_parts.append(f"{key}({len(val)})") + else: + rel_parts.append(key) + relationships_display = ", ".join(rel_parts) + + warnings_display = f"{len(self.warnings)} warning(s)" if self.warnings else "none" + + # Print table + debug("┌─────────────────────────────────────────────────────────────┐") + debug("│ PipeObject Debug Info │") + debug("├─────────────────────────────────────────────────────────────┤") + debug(f"│ Hash : {hash_display:<48}│") + debug(f"│ Store : {store_display:<48}│") + debug(f"│ Title : {title_display:<48}│") + debug(f"│ Tags : {tags_display:<48}│") + debug(f"│ URL : {url_display:<48}│") + debug(f"│ File Path : {file_path_display:<48}│") + debug(f"│ Relationships: {relationships_display:<47}│") + debug(f"│ Warnings : {warnings_display:<48}│") + + # Show extra keys as individual rows + if self.extra: + debug("├─────────────────────────────────────────────────────────────┤") + debug("│ Extra Fields: │") + for key, val in self.extra.items(): + # Format value for display + if isinstance(val, (list, set)): + val_display = f"{type(val).__name__}({len(val)})" + elif isinstance(val, dict): + val_display = f"dict({len(val)})" + elif isinstance(val, (int, float)): + val_display = str(val) + else: + val_str = str(val) + val_display = val_str if len(val_str) <= 40 else val_str[:37] + "..." + + # Truncate key if needed + key_display = key if len(key) <= 15 else key[:12] + "..." + debug(f"│ {key_display:<15}: {val_display:<42}│") + + if self.action: + debug("├─────────────────────────────────────────────────────────────┤") + action_display = self.action[:48] + debug(f"│ Action : {action_display:<48}│") + if self.parent_hash: + if not self.action: + debug("├─────────────────────────────────────────────────────────────┤") + parent_display = self.parent_hash[:12] + "..." if len(self.parent_hash) > 12 else self.parent_hash + debug(f"│ Parent Hash : {parent_display:<48}│") + debug("└─────────────────────────────────────────────────────────────┘") def to_dict(self) -> Dict[str, Any]: """Serialize to dictionary, excluding None and empty values.""" data: Dict[str, Any] = { - "source": self.source, - "tags": self.tags, + "hash": self.hash, + "store": self.store, } - if self.identifier: - data["id"] = self.identifier + + if self.tags: + data["tags"] = self.tags if self.title: data["title"] = self.title + if self.url: + data["url"] = self.url if self.source_url: data["source_url"] = self.source_url if self.duration is not None: data["duration"] = self.duration if self.metadata: data["metadata"] = self.metadata - if self.remote_metadata is not None: - data["remote_metadata"] = self.remote_metadata - if self.mpv_metadata is not None: - data["mpv_metadata"] = self.mpv_metadata if self.warnings: data["warnings"] = self.warnings - if self.file_path: - data["file_path"] = self.file_path - if self.file_hash: - data["file_hash"] = self.file_hash - # Include pipeline chain tracking fields + if self.path: + data["path"] = self.path + if self.relationships: + data["relationships"] = self.relationships if self.is_temp: data["is_temp"] = self.is_temp if self.action: data["action"] = self.action - if self.parent_id: - data["parent_id"] = self.parent_id - # Include relationship data if present - rels = self.get_relationships() - if rels: - data["relationships"] = rels + if self.parent_hash: + data["parent_hash"] = self.parent_hash + + # Add extra fields data.update({k: v for k, v in self.extra.items() if v is not None}) return data - @property - def hash(self) -> str: - """Compute SHA-256 hash from source and identifier.""" - base = f"{self.source}:{self.identifier}" - return hashlib.sha256(base.encode('utf-8')).hexdigest() - - # Backwards compatibility aliases - def as_dict(self) -> Dict[str, Any]: - """Alias for to_dict() for backwards compatibility.""" - return self.to_dict() - - def to_serializable(self) -> Dict[str, Any]: - """Alias for to_dict() for backwards compatibility.""" - return self.to_dict() - class FileRelationshipTracker: """Track relationships between files for sidecar creation. @@ -235,6 +284,7 @@ class DownloadOptions: clip_sections: Optional[str] = None playlist_items: Optional[str] = None # yt-dlp --playlist-items format (e.g., "1-3,5,8") no_playlist: bool = False # If True, pass --no-playlist to yt-dlp + quiet: bool = False # If True, suppress all console output (progress, debug logs) class SendFunc(Protocol): @@ -546,18 +596,25 @@ class ProgressBar: class PipelineStageContext: """Context information for the current pipeline stage.""" - def __init__(self, stage_index: int, total_stages: int): + def __init__(self, stage_index: int, total_stages: int, worker_id: Optional[str] = None): self.stage_index = stage_index self.total_stages = total_stages self.is_last_stage = (stage_index == total_stages - 1) + self.worker_id = worker_id self.emits: List[Any] = [] def emit(self, obj: Any) -> None: """Emit an object to the next pipeline stage.""" self.emits.append(obj) + def get_current_command_text(self) -> str: + """Get the current command text (for backward compatibility).""" + # This is maintained for backward compatibility with old code + # In a real implementation, this would come from the stage context + return "" + def __repr__(self) -> str: - return f"PipelineStageContext(stage={self.stage_index}/{self.total_stages}, is_last={self.is_last_stage})" + return f"PipelineStageContext(stage={self.stage_index}/{self.total_stages}, is_last={self.is_last_stage}, worker_id={self.worker_id})" # ============================================================================ diff --git a/pipeline.py b/pipeline.py index 7638f49..31820f1 100644 --- a/pipeline.py +++ b/pipeline.py @@ -25,21 +25,18 @@ from models import PipelineStageContext from helper.logger import log +def _is_selectable_table(table: Any) -> bool: + """Return True when a table can be used for @ selection.""" + return bool(table) and not getattr(table, "no_choice", False) + + # ============================================================================ -# PIPELINE GLOBALS (maintained for backward compatibility) +# PIPELINE STATE # ============================================================================ -# Current pipeline context (thread-local in real world, global here for simplicity) +# Current pipeline context _CURRENT_CONTEXT: Optional[PipelineStageContext] = None -# Active execution state -_PIPE_EMITS: List[Any] = [] -_PIPE_ACTIVE: bool = False -_PIPE_IS_LAST: bool = False - -# Ephemeral handoff for direct pipelines (e.g., URL --screen-shot | ...) -_LAST_PIPELINE_CAPTURE: Optional[Any] = None - # Remember last search query to support refreshing results after pipeline actions _LAST_SEARCH_QUERY: Optional[str] = None @@ -52,25 +49,23 @@ _PIPELINE_LAST_ITEMS: List[Any] = [] # Store the last result table for @ selection syntax (e.g., @2, @2-5, @{1,3,5}) _LAST_RESULT_TABLE: Optional[Any] = None _LAST_RESULT_ITEMS: List[Any] = [] -# Subject for the current result table (e.g., the file whose tags/URLs are displayed) +# Subject for the current result table (e.g., the file whose tags/url are displayed) _LAST_RESULT_SUBJECT: Optional[Any] = None # History of result tables for @.. navigation (LIFO stack, max 20 tables) _RESULT_TABLE_HISTORY: List[tuple[Optional[Any], List[Any], Optional[Any]]] = [] _MAX_RESULT_TABLE_HISTORY = 20 +# Forward history for @,, navigation (LIFO stack for popped tables) +_RESULT_TABLE_FORWARD: List[tuple[Optional[Any], List[Any], Optional[Any]]] = [] + # Current stage table for @N expansion (separate from history) -# Used to track the ResultTable with source_command + row_selection_args from current pipeline stage -# This is set by cmdlets that display tabular results (e.g., download-data showing formats) -# and used by CLI to expand @N into full commands like "download-data URL -item 2" _CURRENT_STAGE_TABLE: Optional[Any] = None # Items displayed by non-selectable commands (get-tag, delete-tag, etc.) -# These are available for @N selection but NOT saved to history _DISPLAY_ITEMS: List[Any] = [] # Table for display-only commands (overlay) -# Used when a command wants to show a specific table formatting but not affect history _DISPLAY_TABLE: Optional[Any] = None # Subject for overlay/display-only tables (takes precedence over _LAST_RESULT_SUBJECT) _DISPLAY_SUBJECT: Optional[Any] = None @@ -98,7 +93,7 @@ _UI_LIBRARY_REFRESH_CALLBACK: Optional[Any] = None # ============================================================================ def set_stage_context(context: Optional[PipelineStageContext]) -> None: - """Internal: Set the current pipeline stage context.""" + """Set the current pipeline stage context.""" global _CURRENT_CONTEXT _CURRENT_CONTEXT = context @@ -126,26 +121,21 @@ def emit(obj: Any) -> None: return 0 ``` """ - # Try new context-based approach first if _CURRENT_CONTEXT is not None: - import logging - logger = logging.getLogger(__name__) - logger.debug(f"[EMIT] Context-based: appending to _CURRENT_CONTEXT.emits. obj={obj}") _CURRENT_CONTEXT.emit(obj) - return + + +def emit_list(objects: List[Any]) -> None: + """Emit a list of objects to the next pipeline stage. - # Fallback to legacy global approach (for backward compatibility) - try: - import logging - logger = logging.getLogger(__name__) - logger.debug(f"[EMIT] Legacy: appending to _PIPE_EMITS. obj type={type(obj).__name__}, _PIPE_EMITS len before={len(_PIPE_EMITS)}") - _PIPE_EMITS.append(obj) - logger.debug(f"[EMIT] Legacy: _PIPE_EMITS len after={len(_PIPE_EMITS)}") - except Exception as e: - import logging - logger = logging.getLogger(__name__) - logger.error(f"[EMIT] Error appending to _PIPE_EMITS: {e}", exc_info=True) - pass + This allows cmdlets to emit multiple results that are tracked as a list, + enabling downstream cmdlets to process all of them or filter by metadata. + + Args: + objects: List of objects to emit + """ + if _CURRENT_CONTEXT is not None: + _CURRENT_CONTEXT.emit(objects) def print_if_visible(*args: Any, file=None, **kwargs: Any) -> None: @@ -171,7 +161,7 @@ def print_if_visible(*args: Any, file=None, **kwargs: Any) -> None: """ try: # Print if: not in a pipeline OR this is the last stage - should_print = (not _PIPE_ACTIVE) or _PIPE_IS_LAST + should_print = (_CURRENT_CONTEXT is None) or (_CURRENT_CONTEXT and _CURRENT_CONTEXT.is_last_stage) # Always print to stderr regardless if file is not None: @@ -304,17 +294,17 @@ def clear_pending_pipeline_tail() -> None: _PENDING_PIPELINE_SOURCE = None + + def reset() -> None: """Reset all pipeline state. Called between pipeline executions.""" - global _PIPE_EMITS, _PIPE_ACTIVE, _PIPE_IS_LAST, _PIPELINE_VALUES - global _LAST_PIPELINE_CAPTURE, _PIPELINE_REFRESHED, _PIPELINE_LAST_ITEMS - global _PIPELINE_COMMAND_TEXT, _LAST_RESULT_SUBJECT, _DISPLAY_SUBJECT - global _PENDING_PIPELINE_TAIL, _PENDING_PIPELINE_SOURCE + global _PIPELINE_VALUES, _LAST_SEARCH_QUERY, _PIPELINE_REFRESHED + global _PIPELINE_LAST_ITEMS, _PIPELINE_COMMAND_TEXT, _LAST_RESULT_SUBJECT + global _DISPLAY_SUBJECT, _PENDING_PIPELINE_TAIL, _PENDING_PIPELINE_SOURCE + global _CURRENT_CONTEXT - _PIPE_EMITS = [] - _PIPE_ACTIVE = False - _PIPE_IS_LAST = False - _LAST_PIPELINE_CAPTURE = None + _CURRENT_CONTEXT = None + _LAST_SEARCH_QUERY = None _PIPELINE_REFRESHED = False _PIPELINE_LAST_ITEMS = [] _PIPELINE_VALUES = {} @@ -327,13 +317,15 @@ def reset() -> None: def get_emitted_items() -> List[Any]: """Get a copy of all items emitted by the current pipeline stage.""" - return list(_PIPE_EMITS) + if _CURRENT_CONTEXT is not None: + return list(_CURRENT_CONTEXT.emits) + return [] def clear_emits() -> None: """Clear the emitted items list (called between stages).""" - global _PIPE_EMITS - _PIPE_EMITS = [] + if _CURRENT_CONTEXT is not None: + _CURRENT_CONTEXT.emits.clear() def set_last_selection(indices: Sequence[int]) -> None: @@ -375,20 +367,8 @@ def clear_current_command_text() -> None: _PIPELINE_COMMAND_TEXT = "" -def set_active(active: bool) -> None: - """Internal: Set whether we're in a pipeline context.""" - global _PIPE_ACTIVE - _PIPE_ACTIVE = active - - -def set_last_stage(is_last: bool) -> None: - """Internal: Set whether this is the last stage of the pipeline.""" - global _PIPE_IS_LAST - _PIPE_IS_LAST = is_last - - def set_search_query(query: Optional[str]) -> None: - """Internal: Set the last search query for refresh purposes.""" + """Set the last search query for refresh purposes.""" global _LAST_SEARCH_QUERY _LAST_SEARCH_QUERY = query @@ -399,7 +379,7 @@ def get_search_query() -> Optional[str]: def set_pipeline_refreshed(refreshed: bool) -> None: - """Internal: Track whether the pipeline already refreshed results.""" + """Track whether the pipeline already refreshed results.""" global _PIPELINE_REFRESHED _PIPELINE_REFRESHED = refreshed @@ -410,7 +390,7 @@ def was_pipeline_refreshed() -> bool: def set_last_items(items: list) -> None: - """Internal: Cache the last pipeline outputs.""" + """Cache the last pipeline outputs.""" global _PIPELINE_LAST_ITEMS _PIPELINE_LAST_ITEMS = list(items) if items else [] @@ -420,17 +400,6 @@ def get_last_items() -> List[Any]: return list(_PIPELINE_LAST_ITEMS) -def set_last_capture(obj: Any) -> None: - """Internal: Store ephemeral handoff for direct pipelines.""" - global _LAST_PIPELINE_CAPTURE - _LAST_PIPELINE_CAPTURE = obj - - -def get_last_capture() -> Optional[Any]: - """Get ephemeral pipeline handoff (e.g., URL --screen-shot | ...).""" - return _LAST_PIPELINE_CAPTURE - - def set_ui_library_refresh_callback(callback: Any) -> None: """Set a callback to be called when library content is updated. @@ -501,6 +470,22 @@ def set_last_result_table(result_table: Optional[Any], items: Optional[List[Any] _LAST_RESULT_TABLE = result_table _LAST_RESULT_ITEMS = items or [] _LAST_RESULT_SUBJECT = subject + + # Sort table by Title/Name column alphabetically if available + if result_table is not None and hasattr(result_table, 'sort_by_title') and not getattr(result_table, 'preserve_order', False): + try: + result_table.sort_by_title() + # Re-order items list to match the sorted table + if _LAST_RESULT_ITEMS and hasattr(result_table, 'rows'): + sorted_items = [] + for row in result_table.rows: + src_idx = getattr(row, 'source_index', None) + if isinstance(src_idx, int) and 0 <= src_idx < len(_LAST_RESULT_ITEMS): + sorted_items.append(_LAST_RESULT_ITEMS[src_idx]) + if len(sorted_items) == len(result_table.rows): + _LAST_RESULT_ITEMS = sorted_items + except Exception: + pass def set_last_result_table_overlay(result_table: Optional[Any], items: Optional[List[Any]] = None, subject: Optional[Any] = None) -> None: @@ -518,6 +503,22 @@ def set_last_result_table_overlay(result_table: Optional[Any], items: Optional[L _DISPLAY_TABLE = result_table _DISPLAY_ITEMS = items or [] _DISPLAY_SUBJECT = subject + + # Sort table by Title/Name column alphabetically if available + if result_table is not None and hasattr(result_table, 'sort_by_title') and not getattr(result_table, 'preserve_order', False): + try: + result_table.sort_by_title() + # Re-order items list to match the sorted table + if _DISPLAY_ITEMS and hasattr(result_table, 'rows'): + sorted_items = [] + for row in result_table.rows: + src_idx = getattr(row, 'source_index', None) + if isinstance(src_idx, int) and 0 <= src_idx < len(_DISPLAY_ITEMS): + sorted_items.append(_DISPLAY_ITEMS[src_idx]) + if len(sorted_items) == len(result_table.rows): + _DISPLAY_ITEMS = sorted_items + except Exception: + pass def set_last_result_table_preserve_history(result_table: Optional[Any], items: Optional[List[Any]] = None, subject: Optional[Any] = None) -> None: @@ -567,7 +568,7 @@ def restore_previous_result_table() -> bool: True if a previous table was restored, False if history is empty """ global _LAST_RESULT_TABLE, _LAST_RESULT_ITEMS, _LAST_RESULT_SUBJECT - global _RESULT_TABLE_HISTORY, _DISPLAY_ITEMS, _DISPLAY_TABLE, _DISPLAY_SUBJECT + global _RESULT_TABLE_HISTORY, _RESULT_TABLE_FORWARD, _DISPLAY_ITEMS, _DISPLAY_TABLE, _DISPLAY_SUBJECT # If we have an active overlay (display items/table), clear it to "go back" to the underlying table if _DISPLAY_ITEMS or _DISPLAY_TABLE or _DISPLAY_SUBJECT is not None: @@ -579,6 +580,9 @@ def restore_previous_result_table() -> bool: if not _RESULT_TABLE_HISTORY: return False + # Save current state to forward stack before popping + _RESULT_TABLE_FORWARD.append((_LAST_RESULT_TABLE, _LAST_RESULT_ITEMS, _LAST_RESULT_SUBJECT)) + # Pop from history and restore prev = _RESULT_TABLE_HISTORY.pop() if isinstance(prev, tuple) and len(prev) >= 3: @@ -595,6 +599,44 @@ def restore_previous_result_table() -> bool: return True +def restore_next_result_table() -> bool: + """Restore the next result table from forward history (for @,, navigation). + + Returns: + True if a next table was restored, False if forward history is empty + """ + global _LAST_RESULT_TABLE, _LAST_RESULT_ITEMS, _LAST_RESULT_SUBJECT + global _RESULT_TABLE_HISTORY, _RESULT_TABLE_FORWARD, _DISPLAY_ITEMS, _DISPLAY_TABLE, _DISPLAY_SUBJECT + + # If we have an active overlay (display items/table), clear it to "go forward" to the underlying table + if _DISPLAY_ITEMS or _DISPLAY_TABLE or _DISPLAY_SUBJECT is not None: + _DISPLAY_ITEMS = [] + _DISPLAY_TABLE = None + _DISPLAY_SUBJECT = None + return True + + if not _RESULT_TABLE_FORWARD: + return False + + # Save current state to history stack before popping forward + _RESULT_TABLE_HISTORY.append((_LAST_RESULT_TABLE, _LAST_RESULT_ITEMS, _LAST_RESULT_SUBJECT)) + + # Pop from forward stack and restore + next_state = _RESULT_TABLE_FORWARD.pop() + if isinstance(next_state, tuple) and len(next_state) >= 3: + _LAST_RESULT_TABLE, _LAST_RESULT_ITEMS, _LAST_RESULT_SUBJECT = next_state[0], next_state[1], next_state[2] + elif isinstance(next_state, tuple) and len(next_state) == 2: + _LAST_RESULT_TABLE, _LAST_RESULT_ITEMS = next_state + _LAST_RESULT_SUBJECT = None + else: + _LAST_RESULT_TABLE, _LAST_RESULT_ITEMS, _LAST_RESULT_SUBJECT = None, [], None + # Clear display items so get_last_result_items() falls back to restored items + _DISPLAY_ITEMS = [] + _DISPLAY_TABLE = None + _DISPLAY_SUBJECT = None + return True + + def get_display_table() -> Optional[Any]: """Get the current display overlay table. @@ -637,9 +679,15 @@ def get_last_result_items() -> List[Any]: # Prioritize items from display commands (get-tag, delete-tag, etc.) # These are available for immediate @N selection if _DISPLAY_ITEMS: + if _DISPLAY_TABLE is not None and not _is_selectable_table(_DISPLAY_TABLE): + return [] return _DISPLAY_ITEMS # Fall back to items from last search/selectable command - return _LAST_RESULT_ITEMS + if _LAST_RESULT_TABLE is None: + return _LAST_RESULT_ITEMS + if _is_selectable_table(_LAST_RESULT_TABLE): + return _LAST_RESULT_ITEMS + return [] def get_last_result_table_source_command() -> Optional[str]: @@ -648,7 +696,7 @@ def get_last_result_table_source_command() -> Optional[str]: Returns: Command name (e.g., 'download-data') or None if not set """ - if _LAST_RESULT_TABLE and hasattr(_LAST_RESULT_TABLE, 'source_command'): + if _is_selectable_table(_LAST_RESULT_TABLE) and hasattr(_LAST_RESULT_TABLE, 'source_command'): return _LAST_RESULT_TABLE.source_command return None @@ -659,7 +707,7 @@ def get_last_result_table_source_args() -> List[str]: Returns: List of arguments (e.g., ['https://example.com']) or empty list """ - if _LAST_RESULT_TABLE and hasattr(_LAST_RESULT_TABLE, 'source_args'): + if _is_selectable_table(_LAST_RESULT_TABLE) and hasattr(_LAST_RESULT_TABLE, 'source_args'): return _LAST_RESULT_TABLE.source_args or [] return [] @@ -673,7 +721,7 @@ def get_last_result_table_row_selection_args(row_index: int) -> Optional[List[st Returns: Selection arguments (e.g., ['-item', '3']) or None """ - if _LAST_RESULT_TABLE and hasattr(_LAST_RESULT_TABLE, 'rows'): + if _is_selectable_table(_LAST_RESULT_TABLE) and hasattr(_LAST_RESULT_TABLE, 'rows'): if 0 <= row_index < len(_LAST_RESULT_TABLE.rows): row = _LAST_RESULT_TABLE.rows[row_index] if hasattr(row, 'selection_args'): @@ -696,13 +744,18 @@ def set_current_stage_table(result_table: Optional[Any]) -> None: _CURRENT_STAGE_TABLE = result_table +def get_current_stage_table() -> Optional[Any]: + """Get the current pipeline stage table (if any).""" + return _CURRENT_STAGE_TABLE + + def get_current_stage_table_source_command() -> Optional[str]: """Get the source command from the current pipeline stage table. Returns: Command name (e.g., 'download-data') or None """ - if _CURRENT_STAGE_TABLE and hasattr(_CURRENT_STAGE_TABLE, 'source_command'): + if _is_selectable_table(_CURRENT_STAGE_TABLE) and hasattr(_CURRENT_STAGE_TABLE, 'source_command'): return _CURRENT_STAGE_TABLE.source_command return None @@ -713,7 +766,7 @@ def get_current_stage_table_source_args() -> List[str]: Returns: List of arguments or empty list """ - if _CURRENT_STAGE_TABLE and hasattr(_CURRENT_STAGE_TABLE, 'source_args'): + if _is_selectable_table(_CURRENT_STAGE_TABLE) and hasattr(_CURRENT_STAGE_TABLE, 'source_args'): return _CURRENT_STAGE_TABLE.source_args or [] return [] @@ -727,7 +780,7 @@ def get_current_stage_table_row_selection_args(row_index: int) -> Optional[List[ Returns: Selection arguments or None """ - if _CURRENT_STAGE_TABLE and hasattr(_CURRENT_STAGE_TABLE, 'rows'): + if _is_selectable_table(_CURRENT_STAGE_TABLE) and hasattr(_CURRENT_STAGE_TABLE, 'rows'): if 0 <= row_index < len(_CURRENT_STAGE_TABLE.rows): row = _CURRENT_STAGE_TABLE.rows[row_index] if hasattr(row, 'selection_args'): @@ -735,23 +788,21 @@ def get_current_stage_table_row_selection_args(row_index: int) -> Optional[List[ return None +def get_current_stage_table_row_source_index(row_index: int) -> Optional[int]: + """Get the original source index for a row in the current stage table. + + Useful when the table has been sorted for display but selections should map + back to the original item order (e.g., playlist or provider order). + """ + if _is_selectable_table(_CURRENT_STAGE_TABLE) and hasattr(_CURRENT_STAGE_TABLE, 'rows'): + if 0 <= row_index < len(_CURRENT_STAGE_TABLE.rows): + row = _CURRENT_STAGE_TABLE.rows[row_index] + return getattr(row, 'source_index', None) + return None + + def clear_last_result() -> None: """Clear the stored last result table and items.""" global _LAST_RESULT_TABLE, _LAST_RESULT_ITEMS _LAST_RESULT_TABLE = None _LAST_RESULT_ITEMS = [] - - -def emit_list(objects: List[Any]) -> None: - """Emit a list of PipeObjects to the next pipeline stage. - - This allows cmdlets to emit multiple results that are tracked as a list, - enabling downstream cmdlets to process all of them or filter by metadata. - - Args: - objects: List of PipeObject instances or dicts to emit - """ - if _CURRENT_CONTEXT is not None: - _CURRENT_CONTEXT.emit(objects) - else: - _PIPE_EMITS.append(objects) diff --git a/pyproject.toml b/pyproject.toml index 1e3bb53..4ebe258 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -106,7 +106,7 @@ dev = [ mm = "medeia_macina.cli_entry:main" medeia = "medeia_macina.cli_entry:main" -[project.urls] +[project.url] Homepage = "https://github.com/yourusername/medeia-macina" Documentation = "https://medeia-macina.readthedocs.io" Repository = "https://github.com/yourusername/medeia-macina.git" diff --git a/result_table.py b/result_table.py index f668bd1..a6496be 100644 --- a/result_table.py +++ b/result_table.py @@ -114,6 +114,8 @@ class ResultRow: columns: List[ResultColumn] = field(default_factory=list) selection_args: Optional[List[str]] = None """Arguments to use for this row when selected via @N syntax (e.g., ['-item', '3'])""" + source_index: Optional[int] = None + """Original insertion order index (used to map sorted views back to source items).""" def add_column(self, name: str, value: Any) -> None: """Add a column to this row.""" @@ -166,13 +168,14 @@ class ResultTable: >>> print(result_table) """ - def __init__(self, title: str = "", title_width: int = 80, max_columns: int = None): + def __init__(self, title: str = "", title_width: int = 80, max_columns: int = None, preserve_order: bool = False): """Initialize a result table. Args: title: Optional title for the table title_width: Width for formatting the title line max_columns: Maximum number of columns to display (None for unlimited, default: 5 for search results) + preserve_order: When True, skip automatic sorting so row order matches source """ self.title = title self.title_width = title_width @@ -187,10 +190,25 @@ class ResultTable: """Base arguments for the source command""" self.header_lines: List[str] = [] """Optional metadata lines rendered under the title""" + self.preserve_order: bool = preserve_order + """If True, skip automatic sorting so display order matches input order.""" + self.no_choice: bool = False + """When True, suppress row numbers/selection to make the table non-interactive.""" + + def set_no_choice(self, no_choice: bool = True) -> "ResultTable": + """Mark the table as non-interactive (no row numbers, no selection parsing).""" + self.no_choice = bool(no_choice) + return self + + def set_preserve_order(self, preserve: bool = True) -> "ResultTable": + """Configure whether this table should skip automatic sorting.""" + self.preserve_order = bool(preserve) + return self def add_row(self) -> ResultRow: """Add a new row to the table and return it for configuration.""" row = ResultRow() + row.source_index = len(self.rows) self.rows.append(row) return row @@ -210,6 +228,50 @@ class ResultTable: self.source_command = command self.source_args = args or [] return self + + def init_command(self, title: str, command: str, args: Optional[List[str]] = None, preserve_order: bool = False) -> "ResultTable": + """Initialize table with title, command, args, and preserve_order in one call. + + Consolidates common initialization pattern: ResultTable(title) + set_source_command(cmd, args) + set_preserve_order(preserve_order) + + Args: + title: Table title + command: Source command name + args: Command arguments + preserve_order: Whether to preserve input row order + + Returns: + self for method chaining + """ + self.title = title + self.source_command = command + self.source_args = args or [] + self.preserve_order = preserve_order + return self + + def copy_with_title(self, new_title: str) -> "ResultTable": + """Create a new table copying settings from this one but with a new title. + + Consolidates pattern: new_table = ResultTable(title); new_table.set_source_command(...) + Useful for intermediate processing that needs to preserve source command but update display title. + + Args: + new_title: New title for the copied table + + Returns: + New ResultTable with copied settings and new title + """ + new_table = ResultTable( + title=new_title, + title_width=self.title_width, + max_columns=self.max_columns, + preserve_order=self.preserve_order + ) + new_table.source_command = self.source_command + new_table.source_args = list(self.source_args) if self.source_args else [] + new_table.input_options = dict(self.input_options) if self.input_options else {} + new_table.no_choice = self.no_choice + return new_table def set_row_selection_args(self, row_index: int, selection_args: List[str]) -> None: """Set the selection arguments for a specific row. @@ -252,6 +314,39 @@ class ResultTable: self.set_header_line(summary) return summary + def sort_by_title(self) -> "ResultTable": + """Sort rows alphabetically by Title or Name column. + + Looks for columns named 'Title', 'Name', or 'Tag' (in that order). + Case-insensitive sort. Returns self for chaining. + + IMPORTANT: Updates source_index to match new sorted positions so that + @N selections continue to work correctly after sorting. + """ + if getattr(self, "preserve_order", False): + return self + # Find the title column (try Title, Name, Tag in order) + title_col_idx = None + for row in self.rows: + if not row.columns: + continue + for idx, col in enumerate(row.columns): + col_lower = col.name.lower() + if col_lower in ("title", "name", "tag"): + title_col_idx = idx + break + if title_col_idx is not None: + break + + if title_col_idx is None: + # No title column found, return unchanged + return self + + # Sort rows by the title column value (case-insensitive) + self.rows.sort(key=lambda row: row.columns[title_col_idx].value.lower() if title_col_idx < len(row.columns) else "") + + return self + def add_result(self, result: Any) -> "ResultTable": """Add a result object (SearchResult, PipeObject, ResultItem, TagItem, or dict) as a row. @@ -338,8 +433,7 @@ class ResultTable: # Size (for files) if hasattr(result, 'size_bytes') and result.size_bytes: - size_mb = result.size_bytes / (1024 * 1024) - row.add_column("Size", f"{size_mb:.1f} MB") + row.add_column("Size (Mb)", _format_size(result.size_bytes, integer_only=True)) # Annotations if hasattr(result, 'annotations') and result.annotations: @@ -385,8 +479,7 @@ class ResultTable: # Size (for files) - integer MB only if hasattr(item, 'size_bytes') and item.size_bytes: - size_mb = int(item.size_bytes / (1024 * 1024)) - row.add_column("Size", f"{size_mb} MB") + row.add_column("Size (Mb)", _format_size(item.size_bytes, integer_only=True)) def _add_tag_item(self, row: ResultRow, item: Any) -> None: """Extract and add TagItem fields to row (compact tag display). @@ -421,8 +514,8 @@ class ResultTable: row.add_column("Title", obj.title[:50] + ("..." if len(obj.title) > 50 else "")) # File info - if hasattr(obj, 'file_path') and obj.file_path: - file_str = str(obj.file_path) + if hasattr(obj, 'path') and obj.path: + file_str = str(obj.path) if len(file_str) > 60: file_str = "..." + file_str[-57:] row.add_column("Path", file_str) @@ -467,8 +560,8 @@ class ResultTable: def is_hidden_field(field_name: Any) -> bool: # Hide internal/metadata fields hidden_fields = { - '__', 'id', 'action', 'parent_id', 'is_temp', 'file_path', 'extra', - 'target', 'hash', 'hash_hex', 'file_hash' + '__', 'id', 'action', 'parent_id', 'is_temp', 'path', 'extra', + 'target', 'hash', 'hash_hex', 'file_hash', 'tags', 'tag_summary', 'name' } if isinstance(field_name, str): if field_name.startswith('__'): @@ -551,15 +644,12 @@ class ResultTable: # Only add priority groups if we haven't already filled columns from 'columns' field if column_count == 0: - # Priority field groups - uses first matching field in each group + # Explicitly set which columns to display in order priority_groups = [ - ('title | name | filename', ['title', 'name', 'filename']), + ('title', ['title']), ('ext', ['ext']), - ('origin | source | store', ['origin', 'source', 'store']), - ('size | size_bytes', ['size', 'size_bytes']), - ('type | media_kind | kind', ['type', 'media_kind', 'kind']), - ('tags | tag_summary', ['tags', 'tag_summary']), - ('detail | description', ['detail', 'description']), + ('size', ['size', 'size_bytes']), + ('store', ['store', 'origin', 'source']), ] # Add priority field groups first - use first match in each group @@ -568,14 +658,22 @@ class ResultTable: break for field in field_options: if field in visible_data and field not in added_fields: - value_str = format_value(visible_data[field]) + # Special handling for size fields - format as MB integer + if field in ['size', 'size_bytes']: + value_str = _format_size(visible_data[field], integer_only=True) + else: + value_str = format_value(visible_data[field]) + if len(value_str) > 60: value_str = value_str[:57] + "..." - # Special case for Origin/Source -> Store to match user preference - col_name = field.replace('_', ' ').title() - if field in ['origin', 'source']: + # Map field names to display column names + if field in ['store', 'origin', 'source']: col_name = "Store" + elif field in ['size', 'size_bytes']: + col_name = "Size (Mb)" + else: + col_name = field.replace('_', ' ').title() row.add_column(col_name, value_str) added_fields.add(field) @@ -583,17 +681,7 @@ class ResultTable: break # Use first match in this group, skip rest # Add remaining fields only if we haven't hit max_columns (and no explicit columns were set) - if column_count < self.max_columns: - for key, value in visible_data.items(): - if column_count >= self.max_columns: - break - if key not in added_fields: # Only add if not already added - value_str = format_value(value) - if len(value_str) > 40: - value_str = value_str[:37] + "..." - row.add_column(key.replace('_', ' ').title(), value_str) - added_fields.add(key) # Track in added_fields to prevent re-adding - column_count += 1 + # Don't add any remaining fields - only use priority_groups for dict results # Check for selection args if '_selection_args' in data: @@ -637,8 +725,8 @@ class ResultTable: value_width ) - # Calculate row number column width - num_width = len(str(len(self.rows))) + 1 # +1 for padding + # Calculate row number column width (skip if no-choice) + num_width = 0 if self.no_choice else len(str(len(self.rows))) + 1 # Preserve column order column_names = list(col_widths.keys()) @@ -647,7 +735,7 @@ class ResultTable: cap = 5 if name.lower() == "ext" else 90 return min(col_widths[name], cap) - widths = [num_width] + [capped_width(name) for name in column_names] + widths = ([] if self.no_choice else [num_width]) + [capped_width(name) for name in column_names] base_inner_width = sum(widths) + (len(widths) - 1) * 3 # account for " | " separators # Compute final table width (with side walls) to accommodate headers/titles @@ -668,7 +756,7 @@ class ResultTable: # Title block if self.title: lines.append("|" + "=" * (table_width - 2) + "|") - lines.append(wrap(self.title.center(table_width - 2))) + lines.append(wrap(self.title.ljust(table_width - 2))) lines.append("|" + "=" * (table_width - 2) + "|") # Optional header metadata lines @@ -676,8 +764,8 @@ class ResultTable: lines.append(wrap(meta)) # Add header with # column - header_parts = ["#".ljust(num_width)] - separator_parts = ["-" * num_width] + header_parts = [] if self.no_choice else ["#".ljust(num_width)] + separator_parts = [] if self.no_choice else ["-" * num_width] for col_name in column_names: width = capped_width(col_name) header_parts.append(col_name.ljust(width)) @@ -688,7 +776,7 @@ class ResultTable: # Add rows with row numbers for row_num, row in enumerate(self.rows, 1): - row_parts = [str(row_num).ljust(num_width)] + row_parts = [] if self.no_choice else [str(row_num).ljust(num_width)] for col_name in column_names: width = capped_width(col_name) col_value = row.get_column(col_name) or "" @@ -785,6 +873,11 @@ class ResultTable: If accept_args=False: List of 0-based indices, or None if cancelled If accept_args=True: Dict with "indices" and "args" keys, or None if cancelled """ + if self.no_choice: + print(f"\n{self}") + print("Selection is disabled for this table.") + return None + # Display the table print(f"\n{self}") @@ -832,6 +925,9 @@ class ResultTable: Returns: List of 0-based indices, or None if invalid """ + if self.no_choice: + return None + indices = set() # Split by comma for multiple selections @@ -1206,14 +1302,15 @@ def _format_duration(duration: Any) -> str: return "" -def _format_size(size: Any) -> str: +def _format_size(size: Any, integer_only: bool = False) -> str: """Format file size as human-readable string. Args: size: Size in bytes or already formatted string + integer_only: If True, show MB as integer only (e.g., "250 MB" not "250.5 MB") Returns: - Formatted size string (e.g., "1.5 MB", "250 KB") + Formatted size string (e.g., "250 MB", "1.5 MB" or "250 MB" if integer_only=True) """ if isinstance(size, str): return size if size else "" @@ -1223,11 +1320,22 @@ def _format_size(size: Any) -> str: if bytes_val < 0: return "" - for unit, divisor in [("GB", 1024**3), ("MB", 1024**2), ("KB", 1024)]: - if bytes_val >= divisor: - return f"{bytes_val / divisor:.1f} {unit}" - - return f"{bytes_val} B" + if integer_only: + # For table display: always show as integer MB if >= 1MB + mb_val = int(bytes_val / (1024 * 1024)) + if mb_val > 0: + return str(mb_val) + kb_val = int(bytes_val / 1024) + if kb_val > 0: + return str(kb_val) + return str(bytes_val) + else: + # For descriptions: show with one decimal place + for unit, divisor in [("GB", 1024**3), ("MB", 1024**2), ("KB", 1024)]: + if bytes_val >= divisor: + return f"{bytes_val / divisor:.1f} {unit}" + + return f"{bytes_val} B" except (ValueError, TypeError): return "" diff --git a/scripts/check_cmdlets_import.py b/scripts/check_cmdlets_import.py new file mode 100644 index 0000000..91c73de --- /dev/null +++ b/scripts/check_cmdlets_import.py @@ -0,0 +1,10 @@ +import importlib +import traceback +import sys + +try: + importlib.import_module('cmdlets') + print('cmdlets imported OK') +except Exception: + traceback.print_exc() + sys.exit(1) diff --git a/scripts/check_download_media.py b/scripts/check_download_media.py new file mode 100644 index 0000000..e6e08a6 --- /dev/null +++ b/scripts/check_download_media.py @@ -0,0 +1,8 @@ +import importlib, traceback, sys + +try: + importlib.import_module('cmdlets.download_media') + print('download_media imported OK') +except Exception: + traceback.print_exc() + sys.exit(1) diff --git a/scripts/inspect_shared_lines.py b/scripts/inspect_shared_lines.py new file mode 100644 index 0000000..e9a9b26 --- /dev/null +++ b/scripts/inspect_shared_lines.py @@ -0,0 +1,5 @@ +from pathlib import Path +p = Path('cmdlets/_shared.py') +for i, line in enumerate(p.read_text().splitlines(), start=1): + if 1708 <= i <= 1720: + print(f"{i:4}: {repr(line)}") diff --git a/scripts/normalize_shared_indent.py b/scripts/normalize_shared_indent.py new file mode 100644 index 0000000..8286e75 --- /dev/null +++ b/scripts/normalize_shared_indent.py @@ -0,0 +1,24 @@ +from pathlib import Path +import re + +p = Path('cmdlets/_shared.py') +src = p.read_text(encoding='utf-8') +lines = src.splitlines(True) +changed = False +new_lines = [] +for line in lines: + m = re.match(r'^(?P[ \t]*)', line) + ws = m.group('ws') if m else '' + if '\t' in ws: + new_ws = ws.replace('\t', ' ') + new_line = new_ws + line[len(ws):] + new_lines.append(new_line) + changed = True + else: + new_lines.append(line) + +if changed: + p.write_text(''.join(new_lines), encoding='utf-8') + print('Normalized leading tabs to spaces in', p) +else: + print('No leading tabs found; no changes made') diff --git a/scripts/refactor_download_careful.py b/scripts/refactor_download_careful.py new file mode 100644 index 0000000..bb415c9 --- /dev/null +++ b/scripts/refactor_download_careful.py @@ -0,0 +1,160 @@ +#!/usr/bin/env python3 +""" +Careful refactoring of download_data.py to class-based pattern. +Handles nested functions and inner definitions correctly. +""" + +import re +from pathlib import Path + +def refactor_download_data(): + backup_file = Path('cmdlets/download_data_backup.py') + output_file = Path('cmdlets/download_data.py') + + print(f"Reading: {backup_file}") + content = backup_file.read_text(encoding='utf-8') + lines = content.split('\n') + + output = [] + i = 0 + in_cmdlet_def = False + skip_old_run_wrapper = False + class_added = False + + while i < len(lines): + line = lines[i] + + # Skip old _run wrapper function + if line.strip().startswith('def _run(result: Any'): + while i < len(lines): + i += 1 + if lines[i] and not lines[i][0].isspace(): + break + continue + + # Skip old CMDLET definition + if line.strip().startswith('CMDLET = Cmdlet('): + while i < len(lines): + i += 1 + if lines[i].strip() == ')': + i += 1 + break + output.append('') + output.append('# Create and register the cmdlet') + output.append('CMDLET = Download_Data()') + output.append('') + continue + + # Insert class definition before first top-level helper + if not class_added and line.strip().startswith('def _download_torrent_worker('): + # Add class header with __init__ and run() + output.extend([ + '', + '', + 'class Download_Data(Cmdlet):', + ' """Class-based download-data cmdlet with self-registration."""', + '', + ' def __init__(self) -> None:', + ' """Initialize download-data cmdlet."""', + ' super().__init__(', + ' name="download-data",', + ' summary="Download data from url with playlist/clip support using yt-dlp",', + ' usage="download-data [options] or search-file | download-data [options]",', + ' alias=["download", "dl"],', + ' arg=[', + ' CmdletArg(name="url", type="string", required=False, description="URL to download (HTTP/HTTPS or file with URL list)", variadic=True),', + ' CmdletArg(name="-url", type="string", description="URL to download (alias for positional argument)", variadic=True),', + ' CmdletArg(name="list-formats", type="flag", description="List available formats without downloading"),', + ' CmdletArg(name="audio", type="flag", alias="a", description="Download audio only (extract from video)"),', + ' CmdletArg(name="video", type="flag", alias="v", description="Download video (default if not specified)"),', + ' CmdletArg(name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector (e.g., bestvideo+bestaudio)"),', + ' CmdletArg(name="clip", type="string", description="Extract time range: MM:SS-MM:SS (e.g., 34:03-35:08) or seconds"),', + ' CmdletArg(name="section", type="string", description="Download sections (yt-dlp only): TIME_RANGE[,TIME_RANGE...] (e.g., 1:30-1:35,0:05-0:15)"),', + ' CmdletArg(name="cookies", type="string", description="Path to cookies.txt file for authentication"),', + ' CmdletArg(name="torrent", type="flag", description="Download torrent/magnet via AllDebrid (requires API key in config)"),', + ' CmdletArg(name="wait", type="float", description="Wait time (seconds) for magnet processing timeout"),', + ' CmdletArg(name="background", type="flag", alias="bg", description="Start download in background and return to prompt immediately"),', + ' CmdletArg(name="item", type="string", alias="items", description="Item selection for playlists/formats: use -item N to select format N, or -item to show table for @N selection in next command"),', + ' SharedArgs.STORAGE,', + ' ],', + ' detail=["Download media from url with advanced features.", "", "See help for full usage examples."],', + ' exec=self.run,', + ' )', + ' self.register()', + '', + ' def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:', + ' """Main execution method."""', + ' stage_ctx = pipeline_context.get_stage_context()', + ' in_pipeline = stage_ctx is not None and getattr(stage_ctx, "total_stages", 1) > 1', + ' if in_pipeline and isinstance(config, dict):', + ' config["_quiet_background_output"] = True', + ' return self._run_impl(result, args, config, emit_results=True)', + '', + ' # ' + '='*70, + ' # HELPER METHODS', + ' # ' + '='*70, + '', + ]) + class_added = True + + # Convert top-level helper functions to static methods + if class_added and line and not line[0].isspace() and line.strip().startswith('def _'): + output.append(' @staticmethod') + output.append(f' {line}') + i += 1 + # Copy function body with indentation + while i < len(lines): + next_line = lines[i] + # Stop at next top-level definition + if next_line and not next_line[0].isspace() and (next_line.strip().startswith(('def ', 'class ', 'CMDLET'))): + break + # Add indentation + if next_line.strip(): + output.append(f' {next_line}') + else: + output.append(next_line) + i += 1 + continue + + output.append(line) + i += 1 + + result_text = '\n'.join(output) + + # NOW: Update function calls carefully + # Only update calls in _run_impl, not in nested function definitions + # Pattern: match _func( but NOT when it's after "def " on the same line + helper_funcs = [ + '_download_torrent_worker', '_guess_libgen_title', '_is_libgen_entry', + '_download_libgen_entry', '_libgen_background_worker', + '_start_libgen_background_worker', '_run_pipeline_tail', + '_download_http_background_worker', '_start_http_background_download', + '_parse_torrent_file', '_download_torrent_file', '_is_torrent_file_or_url', + '_process_torrent_input', '_show_playlist_table', '_parse_time_range', + '_parse_section_ranges', '_parse_playlist_selection_indices', + '_select_playlist_entries', '_sanitize_title_for_filename', + '_find_playlist_files_from_entries', '_snapshot_playlist_paths', + '_is_openlibrary_downloadable', '_as_dict', '_is_youtube_url', + ] + + # Split into lines for careful replacement + result_lines = result_text.split('\n') + for idx, line in enumerate(result_lines): + # Skip lines that are function definitions + if 'def ' in line: + continue + # Replace helper function calls with self. + for func in helper_funcs: + # Pattern: _func( with word boundary before + pattern = rf'\b({re.escape(func)})\(' + if re.search(pattern, line): + result_lines[idx] = re.sub(pattern, r'self.\1(', line) + + result_text = '\n'.join(result_lines) + + output_file.write_text(result_text, encoding='utf-8') + print(f"✓ Written: {output_file}") + print(f"✓ Class-based refactor complete") + +if __name__ == '__main__': + refactor_download_data() diff --git a/scripts/refactor_download_data.py b/scripts/refactor_download_data.py new file mode 100644 index 0000000..557d561 --- /dev/null +++ b/scripts/refactor_download_data.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 +""" +Automated refactoring script for download_data.py +Converts module-level functions to class-based cmdlet pattern. +""" + +import re +from pathlib import Path + +def main(): + backup_file = Path('cmdlets/download_data_backup.py') + output_file = Path('cmdlets/download_data.py') + + print(f"Reading: {backup_file}") + content = backup_file.read_text(encoding='utf-8') + lines = content.split('\n') + + output = [] + i = 0 + in_cmdlet_def = False + skip_old_run_wrapper = False + class_section_added = False + + # Track where to insert class definition + last_import_line = 0 + + while i < len(lines): + line = lines[i] + + # Track imports + if line.strip().startswith(('import ', 'from ')): + last_import_line = len(output) + + # Skip old _run wrapper function + if 'def _run(result: Any' in line: + skip_old_run_wrapper = True + i += 1 + continue + + if skip_old_run_wrapper: + if line and not line[0].isspace(): + skip_old_run_wrapper = False + else: + i += 1 + continue + + # Skip old CMDLET definition + if line.strip().startswith('CMDLET = Cmdlet('): + in_cmdlet_def = True + i += 1 + continue + + if in_cmdlet_def: + if line.strip() == ')': + in_cmdlet_def = False + # Add class instantiation instead + output.append('') + output.append('# Create and register the cmdlet') + output.append('CMDLET = Download_Data()') + output.append('') + i += 1 + continue + + # Insert class definition before first helper function + if not class_section_added and line.strip().startswith('def _download_torrent_worker('): + output.append('') + output.append('') + output.append('class Download_Data(Cmdlet):') + output.append(' """Class-based download-data cmdlet with self-registration."""') + output.append('') + output.append(' # Full __init__ implementation to be added') + output.append(' # Full run() method to be added') + output.append('') + output.append(' # ' + '='*70) + output.append(' # HELPER METHODS') + output.append(' # ' + '='*70) + output.append('') + class_section_added = True + + # Convert top-level helper functions to static methods + if class_section_added and line.strip().startswith('def _') and not line.strip().startswith('def __'): + # Check if this is a top-level function (no indentation) + if not line.startswith((' ', '\t')): + output.append(' @staticmethod') + output.append(f' {line}') + i += 1 + # Copy function body with indentation + while i < len(lines): + next_line = lines[i] + # Stop at next top-level definition + if next_line and not next_line[0].isspace() and (next_line.strip().startswith('def ') or next_line.strip().startswith('class ') or next_line.strip().startswith('CMDLET')): + break + # Add indentation + if next_line.strip(): + output.append(f' {next_line}') + else: + output.append(next_line) + i += 1 + continue + + # Convert _run_impl to method (but keep as-is for now, will be updated later) + if class_section_added and line.strip().startswith('def _run_impl('): + output.append(' def _run_impl(self, result: Any, args: Sequence[str], config: Dict[str, Any], emit_results: bool = True) -> int:') + i += 1 + # Copy function body with indentation + while i < len(lines): + next_line = lines[i] + if next_line and not next_line[0].isspace() and next_line.strip(): + break + if next_line.strip(): + output.append(f' {next_line}') + else: + output.append(next_line) + i += 1 + continue + + output.append(line) + i += 1 + + # Write output + result_text = '\n'.join(output) + output_file.write_text(result_text, encoding='utf-8') + print(f"✓ Written: {output_file}") + print(f"✓ Converted {content.count('def _')} helper functions to static methods") + print("\nNext steps:") + print("1. Add full __init__ method with cmdlet args") + print("2. Add run() method that calls _run_impl") + print("3. Update function calls in _run_impl from _func() to self._func()") + +if __name__ == '__main__': + main() diff --git a/test/0e6509a4c01cd6e4584a4d5b335a4bce196d51c5a73a988cabdd152efa5e6a89/SaveTwitter.Net_lrO5QUBSiiHiGidl_(480p).mp4 b/test/0e6509a4c01cd6e4584a4d5b335a4bce196d51c5a73a988cabdd152efa5e6a89/SaveTwitter.Net_lrO5QUBSiiHiGidl_(480p).mp4 new file mode 100644 index 0000000..8c9eb5c Binary files /dev/null and b/test/0e6509a4c01cd6e4584a4d5b335a4bce196d51c5a73a988cabdd152efa5e6a89/SaveTwitter.Net_lrO5QUBSiiHiGidl_(480p).mp4 differ diff --git a/test/medios-macina.db b/test/medios-macina.db new file mode 100644 index 0000000..b6176ef Binary files /dev/null and b/test/medios-macina.db differ diff --git a/test/yapping.m4a b/test/yapping.m4a new file mode 100644 index 0000000..390f666 Binary files /dev/null and b/test/yapping.m4a differ diff --git a/test/yapping.m4a.metadata b/test/yapping.m4a.metadata new file mode 100644 index 0000000..044f709 --- /dev/null +++ b/test/yapping.m4a.metadata @@ -0,0 +1 @@ +hash:00beb438e3c02cdc0340526deb0c51f916ffd6330259be4f350009869c5448d9 diff --git a/test/yapping.m4a.tag b/test/yapping.m4a.tag new file mode 100644 index 0000000..99c9383 --- /dev/null +++ b/test/yapping.m4a.tag @@ -0,0 +1 @@ +title:yapping