"""Search-file cmdlet: Search for files by query, tags, size, type, duration, etc.""" from __future__ import annotations from typing import Any, Dict, Sequence, List, Optional, Tuple, Callable from fnmatch import fnmatchcase from pathlib import Path from dataclasses import dataclass, field import json import os import sys from helper.logger import log, debug import shutil import subprocess from helper.file_storage import FileStorage from helper.search_provider import get_provider, list_providers, SearchResult from metadata import import_pending_sidecars from . import register from ._shared import Cmdlet, CmdletArg import models import pipeline as ctx # Optional dependencies try: import mutagen # type: ignore except ImportError: # pragma: no cover mutagen = None # type: ignore try: from config import get_hydrus_url, resolve_output_dir except Exception: # pragma: no cover get_hydrus_url = None # type: ignore resolve_output_dir = None # type: ignore try: from helper.hydrus import HydrusClient, HydrusRequestError except ImportError: # pragma: no cover HydrusClient = None # type: ignore HydrusRequestError = RuntimeError # type: ignore try: from helper.utils import sha256_file except ImportError: # pragma: no cover sha256_file = None # type: ignore try: from helper.utils_constant import mime_maps except ImportError: # pragma: no cover mime_maps = {} # type: ignore # ============================================================================ # Data Classes (from helper/search.py) # ============================================================================ @dataclass(slots=True) class SearchRecord: path: str size_bytes: int | None = None duration_seconds: str | None = None tags: str | None = None hash_hex: str | None = None def as_dict(self) -> dict[str, str]: payload: dict[str, str] = {"path": self.path} if self.size_bytes is not None: payload["size"] = str(self.size_bytes) if self.duration_seconds: payload["duration"] = self.duration_seconds if self.tags: payload["tags"] = self.tags if self.hash_hex: payload["hash"] = self.hash_hex return payload @dataclass class ResultItem: origin: str title: str detail: str annotations: List[str] target: str media_kind: str = "other" hash_hex: Optional[str] = None columns: List[tuple[str, str]] = field(default_factory=list) tag_summary: Optional[str] = None duration_seconds: Optional[float] = None size_bytes: Optional[int] = None full_metadata: Optional[Dict[str, Any]] = None tags: Optional[set[str]] = field(default_factory=set) relationships: Optional[List[str]] = field(default_factory=list) known_urls: Optional[List[str]] = field(default_factory=list) def to_dict(self) -> Dict[str, Any]: payload: Dict[str, Any] = { "title": self.title, } # Always include these core fields for downstream cmdlets (get-file, download-data, etc) payload["origin"] = self.origin payload["target"] = self.target payload["media_kind"] = self.media_kind # Always include full_metadata if present (needed by download-data, etc) # This is NOT for display, but for downstream processing if self.full_metadata: payload["full_metadata"] = self.full_metadata # Include columns if defined (result renderer will use these for display) if self.columns: payload["columns"] = list(self.columns) else: # If no columns, include the detail for backwards compatibility payload["detail"] = self.detail payload["annotations"] = list(self.annotations) # Include optional fields if self.hash_hex: payload["hash"] = self.hash_hex if self.tag_summary: payload["tags"] = self.tag_summary if self.tags: payload["tags_set"] = list(self.tags) if self.relationships: payload["relationships"] = self.relationships if self.known_urls: payload["known_urls"] = self.known_urls return payload STORAGE_ORIGINS = {"local", "hydrus", "debrid"} def _ensure_storage_columns(payload: Dict[str, Any]) -> Dict[str, Any]: """Attach Title/Store columns for storage-origin results to keep CLI display compact.""" origin_value = str(payload.get("origin") or payload.get("source") or "").lower() if origin_value not in STORAGE_ORIGINS: return payload title = payload.get("title") or payload.get("name") or payload.get("target") or payload.get("path") or "Result" store_label = payload.get("origin") or payload.get("source") or origin_value normalized = dict(payload) normalized["columns"] = [("Title", str(title)), ("Store", str(store_label))] return normalized CMDLET = Cmdlet( name="search-file", summary="Unified search cmdlet for searchable backends (Hydrus, Local, Debrid, LibGen, OpenLibrary, Soulseek).", usage="search-file [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-storage BACKEND] [-provider PROVIDER]", args=[ CmdletArg("query", description="Search query string"), CmdletArg("tag", description="Filter by tag (can be used multiple times)"), CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"), CmdletArg("type", description="Filter by type: audio, video, image, document"), CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"), CmdletArg("limit", type="integer", description="Limit results (default: 100)"), CmdletArg("storage", description="Search storage backend: hydrus, local, debrid (default: all searchable)"), CmdletArg("provider", description="Search provider: libgen, openlibrary, soulseek, debrid, local (overrides -storage)"), ], details=[ "Search across multiple providers: File storage (Hydrus, Local, Debrid), Books (LibGen, OpenLibrary), Music (Soulseek)", "Use -provider to search a specific source, or -storage to search file backends", "Filter results by: tag, size, type, duration", "Results can be piped to other commands", "Examples:", "search-file foo # Search all file backends", "search-file -provider libgen 'python programming' # Search LibGen books", "search-file -provider debrid 'movie' # Search AllDebrid magnets", "search-file 'music' -provider soulseek # Search Soulseek P2P", "search-file -provider openlibrary 'tolkien' # Search OpenLibrary", "search-file song -storage hydrus -type audio # Search only Hydrus audio", "search-file movie -tag action -provider debrid # Debrid with filters", ], ) @register(["search-file", "search"]) def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Search across multiple providers: Hydrus, Local, Debrid, LibGen, etc.""" args_list = [str(arg) for arg in (args or [])] # Parse arguments query = "" tag_filters: List[str] = [] size_filter: Optional[Tuple[str, int]] = None duration_filter: Optional[Tuple[str, float]] = None type_filter: Optional[str] = None storage_backend: Optional[str] = None provider_name: Optional[str] = None limit = 100 # Simple argument parsing i = 0 while i < len(args_list): arg = args_list[i] low = arg.lower() if low in {"-provider", "--provider"} and i + 1 < len(args_list): provider_name = args_list[i + 1].lower() i += 2 elif low in {"-storage", "--storage"} and i + 1 < len(args_list): storage_backend = args_list[i + 1].lower() i += 2 elif low in {"-tag", "--tag"} and i + 1 < len(args_list): tag_filters.append(args_list[i + 1]) i += 2 elif low in {"-limit", "--limit"} and i + 1 < len(args_list): try: limit = int(args_list[i + 1]) except ValueError: limit = 100 i += 2 elif low in {"-type", "--type"} and i + 1 < len(args_list): type_filter = args_list[i + 1].lower() i += 2 elif not arg.startswith("-"): if query: query += " " + arg else: query = arg i += 1 else: i += 1 if not query: log("Provide a search query", file=sys.stderr) return 1 # Initialize worker for this search command from helper.local_library import LocalLibraryDB from config import get_local_storage_path import uuid worker_id = str(uuid.uuid4()) library_root = get_local_storage_path(config or {}) if not library_root: log("No library root configured", file=sys.stderr) return 1 db = LocalLibraryDB(library_root) db.insert_worker( worker_id, "search", title=f"Search: {query}", description=f"Query: {query}", pipe=ctx.get_current_command_text() ) try: results_list = [] # Try to search using provider (libgen, soulseek, debrid, openlibrary) if provider_name: debug(f"[search_file] Attempting provider search with: {provider_name}") provider = get_provider(provider_name, config) if not provider: log(f"Provider '{provider_name}' not available", file=sys.stderr) db.update_worker_status(worker_id, 'error') return 1 debug(f"[search_file] Provider loaded, calling search with query: {query}") search_result = provider.search(query, limit=limit) debug(f"[search_file] Provider search returned {len(search_result)} results") for item in search_result: item_dict = item.to_dict() results_list.append(item_dict) ctx.emit(item_dict) debug(f"[search_file] Emitted {len(results_list)} results") # Write results to worker stdout db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2)) db.update_worker_status(worker_id, 'completed') return 0 # Otherwise search using FileStorage (Hydrus, Local, Debrid backends) from helper.file_storage import FileStorage storage = FileStorage(config=config or {}) backend_to_search = storage_backend or None if backend_to_search: # Check if requested backend is available if backend_to_search == "hydrus": from helper.hydrus import is_hydrus_available if not is_hydrus_available(config or {}): log(f"Backend 'hydrus' is not available (Hydrus service not running)", file=sys.stderr) db.update_worker_status(worker_id, 'error') return 1 if not storage.supports_search(backend_to_search): log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr) db.update_worker_status(worker_id, 'error') return 1 results = storage[backend_to_search].search(query, limit=limit) else: # Search all searchable backends, but skip hydrus if unavailable from helper.hydrus import is_hydrus_available hydrus_available = is_hydrus_available(config or {}) all_results = [] for backend_name in storage.list_searchable_backends(): # Skip hydrus if not available if backend_name == "hydrus" and not hydrus_available: continue try: backend_results = storage[backend_name].search(query, limit=limit - len(all_results)) if backend_results: all_results.extend(backend_results) if len(all_results) >= limit: break except Exception as exc: log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr) results = all_results[:limit] # Emit results and collect for workers table if results: for item in results: if isinstance(item, dict): normalized = _ensure_storage_columns(item) results_list.append(normalized) ctx.emit(normalized) elif isinstance(item, ResultItem): item_dict = item.to_dict() results_list.append(item_dict) ctx.emit(item_dict) else: item_dict = {"title": str(item)} results_list.append(item_dict) ctx.emit(item_dict) # Write results to worker stdout db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2)) else: log("No results found", file=sys.stderr) db.append_worker_stdout(worker_id, json.dumps([], indent=2)) db.update_worker_status(worker_id, 'completed') return 0 except Exception as exc: log(f"Search failed: {exc}", file=sys.stderr) import traceback traceback.print_exc(file=sys.stderr) db.update_worker_status(worker_id, 'error') return 1 finally: # Always close the database connection try: db.close() except Exception: pass