"""search-file cmdlet: Search for files in storage backends (Folder, Hydrus).""" from __future__ import annotations from typing import Any, Dict, Sequence, List, Optional import importlib import uuid from pathlib import Path import re import json import sys from SYS.logger import log, debug from ProviderCore.registry import get_search_provider, list_search_providers from SYS.config import get_local_storage_path from SYS.rich_display import ( show_provider_config_panel, show_store_config_panel, show_available_providers_panel, ) from . import _shared as sh ( Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, first_title_tag, parse_hash_query, ) = ( sh.Cmdlet, sh.CmdletArg, sh.SharedArgs, sh.get_field, sh.should_show_help, sh.normalize_hash, sh.first_title_tag, sh.parse_hash_query, ) from SYS import pipeline as ctx STORAGE_ORIGINS = {"local", "hydrus", "folder"} class search_file(Cmdlet): """Class-based search-file cmdlet for searching storage backends.""" def __init__(self) -> None: super().__init__( name="search-file", summary="Search storage backends (Folder, Hydrus) or external providers (via -provider).", usage="search-file [-query ] [-store BACKEND] [-limit N] [-provider NAME]", arg=[ CmdletArg( "limit", type="integer", description="Limit results (default: 100)" ), SharedArgs.STORE, SharedArgs.QUERY, CmdletArg( "provider", type="string", description= "External provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive, hifi", ), CmdletArg( "open", type="integer", description="(alldebrid) Open folder/magnet by ID and list its files", ), ], detail=[ "Search across storage backends: Folder stores and Hydrus instances", "Use -store to search a specific backend by name", "URL search: url:* (any URL) or url: (URL substring)", "Extension search: ext: (e.g., ext:png)", "Hydrus-style extension: system:filetype = png", "Results include hash for downstream commands (get-file, add-tag, etc.)", "Examples:", "search-file -query foo # Search all storage backends", "search-file -store home -query '*' # Search 'home' Hydrus instance", "search-file -store test -query 'video' # Search 'test' folder store", "search-file -query 'hash:deadbeef...' # Search by SHA256 hash", "search-file -query 'url:*' # Files that have any URL", "search-file -query 'url:youtube.com' # Files whose URL contains substring", "search-file -query 'ext:png' # Files whose metadata ext is png", "search-file -query 'system:filetype = png' # Hydrus: native; Folder: maps to metadata.ext", "", "Provider search (-provider):", "search-file -provider youtube 'tutorial' # Search YouTube provider", "search-file -provider alldebrid '*' # List AllDebrid magnets", "search-file -provider alldebrid -open 123 '*' # Show files for a magnet", ], exec=self.run, ) self.register() # --- Helper methods ------------------------------------------------- @staticmethod def _normalize_extension(ext_value: Any) -> str: """Sanitize extension strings to alphanumerics and cap at 5 chars.""" ext = str(ext_value or "").strip().lstrip(".") for sep in (" ", "|", "(", "[", "{", ",", ";"): if sep in ext: ext = ext.split(sep, 1)[0] break if "." in ext: ext = ext.split(".")[-1] ext = "".join(ch for ch in ext if ch.isalnum()) return ext[:5] @staticmethod def _get_hifi_view_from_query(query: str) -> str: text = str(query or "").strip() if not text: return "track" if re.search(r"\balbum\s*:", text, flags=re.IGNORECASE): return "album" if re.search(r"\bartist\s*:", text, flags=re.IGNORECASE): return "artist" return "track" def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]: """Ensure storage results have the necessary fields for result_table display.""" store_value = str(payload.get("store") or "").lower() if store_value not in STORAGE_ORIGINS: return payload # Ensure we have title field if "title" not in payload: payload["title"] = ( payload.get("name") or payload.get("target") or payload.get("path") or "Result" ) # Ensure we have ext field if "ext" not in payload: title = str(payload.get("title", "")) path_obj = Path(title) if path_obj.suffix: payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip(".")) else: payload["ext"] = payload.get("ext", "") # Ensure size_bytes is present for display (already set by search_file()) # result_table will handle formatting it # Don't create manual columns - let result_table handle display # This allows the table to respect max_columns and apply consistent formatting return payload def _run_provider_search( self, *, provider_name: str, query: str, limit: int, limit_set: bool, open_id: Optional[int], args_list: List[str], refresh_mode: bool, config: Dict[str, Any], ) -> int: """Execute external provider search.""" if not provider_name or not query: from SYS import pipeline as ctx_mod progress = None if hasattr(ctx_mod, "get_pipeline_state"): progress = ctx_mod.get_pipeline_state().live_progress if progress: try: progress.stop() except Exception: pass log("Error: search-file -provider requires both provider and query", file=sys.stderr) log(f"Usage: {self.usage}", file=sys.stderr) providers_map = list_search_providers(config) available = [n for n, a in providers_map.items() if a] unconfigured = [n for n, a in providers_map.items() if not a] if unconfigured: show_provider_config_panel(unconfigured) if available: show_available_providers_panel(available) return 1 # Align with provider default when user did not set -limit. if not limit_set: limit = 50 from SYS import pipeline as ctx_mod progress = None if hasattr(ctx_mod, "get_pipeline_state"): progress = ctx_mod.get_pipeline_state().live_progress provider = get_search_provider(provider_name, config) if not provider: if progress: try: progress.stop() except Exception: pass show_provider_config_panel([provider_name]) providers_map = list_search_providers(config) available = [n for n, a in providers_map.items() if a] if available: show_available_providers_panel(available) return 1 worker_id = str(uuid.uuid4()) library_root = get_local_storage_path(config or {}) if get_local_storage_path else None if not library_root: try: from Store import Store storage_registry = Store(config=config or {}) # Try the first folder backend for name in storage_registry.list_backends(): backend = storage_registry[name] if type(backend).__name__ == "Folder": library_root = expand_path(getattr(backend, "_location", None)) if library_root: break except Exception: pass db = None if library_root: try: from API.folder import API_folder_store db = API_folder_store(library_root) db.__enter__() db.insert_worker( worker_id, "search-file", title=f"Search: {query}", description=f"Provider: {provider_name}, Query: {query}", pipe=ctx.get_current_command_text(), ) except Exception: db = None try: results_list: List[Dict[str, Any]] = [] from SYS import result_table importlib.reload(result_table) from SYS.result_table import ResultTable provider_text = str(provider_name or "").strip() provider_lower = provider_text.lower() id_match = re.search(r"\bid\s*[=:]\s*(\d+)", query, flags=re.IGNORECASE) parsed_open_id = open_id if id_match and parsed_open_id is None: try: parsed_open_id = int(id_match.group(1)) except Exception: parsed_open_id = None query = re.sub(r"\bid\s*[=:]\s*\d+", "", query, flags=re.IGNORECASE).strip() if not query: query = "*" effective_open_id = parsed_open_id if parsed_open_id is not None else open_id if provider_lower == "youtube": provider_label = "Youtube" elif provider_lower == "openlibrary": provider_label = "OpenLibrary" elif provider_lower == "loc": provider_label = "LoC" else: provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider" normalized_query = str(query or "").strip() provider_filters: Dict[str, Any] = {} try: normalized_query, provider_filters = provider.extract_query_arguments(query) except Exception: provider_filters = {} normalized_query = (normalized_query or "").strip() query = normalized_query or "*" provider_filters = dict(provider_filters or {}) if provider_lower == "alldebrid" and effective_open_id is not None: table_title = f"{provider_label} Files: {effective_open_id}".strip().rstrip(":") else: table_title = f"{provider_label}: {query}".strip().rstrip(":") preserve_order = provider_lower in {"youtube", "openlibrary", "loc", "torrent"} table_type = provider_name table_meta: Dict[str, Any] = {"provider": provider_name} if provider_lower == "hifi": view = self._get_hifi_view_from_query(query) table_meta["view"] = view table_type = f"hifi.{view}" elif provider_lower == "internetarchive": # Internet Archive search results are effectively folders (items); selecting @N # should open a list of downloadable files for the chosen item. table_type = "internetarchive.folder" table = ResultTable(table_title).set_preserve_order(preserve_order) table.set_table(table_type) if provider_lower == "alldebrid": table_meta["view"] = "files" if effective_open_id is not None else "folders" if effective_open_id is not None: table_meta["magnet_id"] = effective_open_id try: table.set_table_metadata(table_meta) except Exception: pass if provider_lower == "vimm": # Keep auto-staged download-file from inheriting raw query tokens; # only propagate provider hint so @N expands to a clean downloader call. table.set_source_command("search-file", ["-provider", provider_name]) else: table.set_source_command("search-file", list(args_list)) search_filters = dict(provider_filters) debug(f"[search-file] Calling {provider_name}.search(filters={search_filters})") if provider_lower == "alldebrid": search_open_id = parsed_open_id if parsed_open_id is not None else open_id view_value = "files" if search_open_id is not None else "folders" search_filters["view"] = view_value if search_open_id is not None: search_filters["magnet_id"] = search_open_id results = provider.search(query, limit=limit, filters=search_filters or None) debug(f"[search-file] {provider_name} -> {len(results or [])} result(s)") # HIFI artist UX: if there is exactly one artist match, auto-expand # directly to albums without requiring an explicit @1 selection. if ( provider_lower == "hifi" and table_meta.get("view") == "artist" and isinstance(results, list) and len(results) == 1 ): try: artist_res = results[0] artist_name = str(getattr(artist_res, "title", "") or "").strip() artist_md = getattr(artist_res, "full_metadata", None) artist_id = None if isinstance(artist_md, dict): raw_id = artist_md.get("artistId") or artist_md.get("id") try: artist_id = int(raw_id) if raw_id is not None else None except Exception: artist_id = None album_results = [] if hasattr(provider, "_albums_for_artist") and callable(getattr(provider, "_albums_for_artist")): try: album_results = provider._albums_for_artist( # type: ignore[attr-defined] artist_id=artist_id, artist_name=artist_name, limit=max(int(limit or 0), 200), ) except Exception: album_results = [] if album_results: results = album_results table_type = "hifi.album" table.set_table(table_type) table_meta["view"] = "album" try: table.set_table_metadata(table_meta) except Exception: pass except Exception: pass if not results: log(f"No results found for query: {query}", file=sys.stderr) if db is not None: db.append_worker_stdout(worker_id, json.dumps([], indent=2)) db.update_worker_status(worker_id, "completed") return 0 for search_result in results: item_dict = ( search_result.to_dict() if hasattr(search_result, "to_dict") else dict(search_result) if isinstance(search_result, dict) else {"title": str(search_result)} ) if "table" not in item_dict: item_dict["table"] = table_type # Ensure provider source is present so downstream cmdlets (select) can resolve provider if "source" not in item_dict: item_dict["source"] = provider_name row_index = len(table.rows) table.add_result(search_result) results_list.append(item_dict) ctx.emit(item_dict) if refresh_mode: ctx.set_last_result_table_preserve_history(table, results_list) else: ctx.set_last_result_table(table, results_list) ctx.set_current_stage_table(table) if db is not None: db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2)) db.update_worker_status(worker_id, "completed") return 0 except Exception as exc: log(f"Error searching provider '{provider_name}': {exc}", file=sys.stderr) import traceback debug(traceback.format_exc()) if db is not None: try: db.update_worker_status(worker_id, "error") except Exception: pass return 1 finally: if db is not None: try: db.__exit__(None, None, None) except Exception: pass # --- Execution ------------------------------------------------------ def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Search storage backends for files.""" if should_show_help(args): log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}") return 0 args_list = [str(arg) for arg in (args or [])] refresh_mode = any( str(a).strip().lower() in {"--refresh", "-refresh"} for a in args_list ) def _format_command_title(command: str, raw_args: List[str]) -> str: def _quote(value: str) -> str: text = str(value) if not text: return '""' needs_quotes = any(ch.isspace() for ch in text) or '"' in text if not needs_quotes: return text return '"' + text.replace('"', '\\"') + '"' cleaned = [ str(a) for a in (raw_args or []) if str(a).strip().lower() not in {"--refresh", "-refresh"} ] if not cleaned: return command return " ".join([command, *[_quote(a) for a in cleaned]]) raw_title = None try: raw_title = ( ctx.get_current_stage_text("") if hasattr(ctx, "get_current_stage_text") else None ) except Exception: raw_title = None command_title = (str(raw_title).strip() if raw_title else "") or _format_command_title("search-file", list(args_list)) # Build dynamic flag variants from cmdlet arg definitions. # This avoids hardcoding flag spellings in parsing loops. flag_registry = self.build_flag_registry() query_flags = { f.lower() for f in (flag_registry.get("query") or {"-query", "--query"}) } store_flags = { f.lower() for f in (flag_registry.get("store") or {"-store", "--store"}) } limit_flags = { f.lower() for f in (flag_registry.get("limit") or {"-limit", "--limit"}) } provider_flags = { f.lower() for f in (flag_registry.get("provider") or {"-provider", "--provider"}) } open_flags = { f.lower() for f in (flag_registry.get("open") or {"-open", "--open"}) } # Parse arguments query = "" storage_backend: Optional[str] = None provider_name: Optional[str] = None open_id: Optional[int] = None limit = 100 limit_set = False searched_backends: List[str] = [] i = 0 while i < len(args_list): arg = args_list[i] low = arg.lower() if low in query_flags and i + 1 < len(args_list): chunk = args_list[i + 1] query = f"{query} {chunk}".strip() if query else chunk i += 2 continue if low in provider_flags and i + 1 < len(args_list): provider_name = args_list[i + 1] i += 2 continue if low in open_flags and i + 1 < len(args_list): try: open_id = int(args_list[i + 1]) except ValueError: log( f"Warning: Invalid open value '{args_list[i + 1]}', ignoring", file=sys.stderr, ) open_id = None i += 2 continue if low in store_flags and i + 1 < len(args_list): storage_backend = args_list[i + 1] i += 2 elif low in limit_flags and i + 1 < len(args_list): limit_set = True try: limit = int(args_list[i + 1]) except ValueError: limit = 100 i += 2 elif not arg.startswith("-"): query = f"{query} {arg}".strip() if query else arg i += 1 else: i += 1 query = query.strip() if provider_name: return self._run_provider_search( provider_name=provider_name, query=query, limit=limit, limit_set=limit_set, open_id=open_id, args_list=args_list, refresh_mode=refresh_mode, config=config, ) store_filter: Optional[str] = None if query: match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE) if match: store_filter = match.group(1).strip() or None query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE) query = re.sub(r"\s{2,}", " ", query) query = query.strip().strip(",") if store_filter and not storage_backend: storage_backend = store_filter hash_query = parse_hash_query(query) if not query: log("Provide a search query", file=sys.stderr) return 1 from API.folder import API_folder_store worker_id = str(uuid.uuid4()) from Store import Store storage_registry = Store(config=config or {}) library_root = get_local_storage_path(config or {}) if not library_root: # Fallback for search-file: if no global folder path is found, # try to use the specific backend mentioned in -store or the first available folder backend. if storage_backend: try: backend = storage_registry[storage_backend] if backend and type(backend).__name__ == "Folder": library_root = expand_path(getattr(backend, "_location", None)) except Exception: pass else: # Try all backends until we find a Folder one for name in storage_registry.list_backends(): try: backend = storage_registry[name] if type(backend).__name__ == "Folder": library_root = expand_path(getattr(backend, "_location", None)) if library_root: break except Exception: continue if not library_root: from SYS import pipeline as ctx_mod progress = None if hasattr(ctx_mod, "get_pipeline_state"): progress = ctx_mod.get_pipeline_state().live_progress if progress: try: progress.stop() except Exception: pass show_store_config_panel(["Folder Store"]) return 1 # Use context manager to ensure database is always closed with API_folder_store(library_root) as db: try: db.insert_worker( worker_id, "search-file", title=f"Search: {query}", description=f"Query: {query}", pipe=ctx.get_current_command_text(), ) results_list = [] from SYS import result_table import importlib importlib.reload(result_table) from SYS.result_table import ResultTable table = ResultTable(command_title) try: table.set_source_command("search-file", list(args_list)) except Exception: pass if hash_query: try: table.set_preserve_order(True) except Exception: pass from Store import Store storage = Store(config=config or {}) from Store._base import Store as BaseStore backend_to_search = storage_backend or None if hash_query: # Explicit hash list search: build rows from backend metadata. backends_to_try: List[str] = [] if backend_to_search: backends_to_try = [backend_to_search] else: backends_to_try = list(storage.list_backends()) found_any = False for h in hash_query: resolved_backend_name: Optional[str] = None resolved_backend = None for backend_name in backends_to_try: try: backend = storage[backend_name] except Exception: continue try: # If get_metadata works, consider it a hit; get_file can be optional (e.g. remote URL). meta = backend.get_metadata(h) if meta is None: continue resolved_backend_name = backend_name resolved_backend = backend break except Exception: continue if resolved_backend_name is None or resolved_backend is None: continue found_any = True searched_backends.append(resolved_backend_name) # Resolve a path/URL string if possible path_str: Optional[str] = None # IMPORTANT: avoid calling get_file() for remote backends. # For Hydrus, get_file() returns a browser URL (and may include access keys), # which should not be pulled during search/refresh. try: if type(resolved_backend).__name__ == "Folder": maybe_path = resolved_backend.get_file(h) if isinstance(maybe_path, Path): path_str = str(maybe_path) elif isinstance(maybe_path, str) and maybe_path: path_str = maybe_path except Exception: path_str = None meta_obj: Dict[str, Any] = {} try: meta_obj = resolved_backend.get_metadata(h) or {} except Exception: meta_obj = {} tags_list: List[str] = [] try: tag_result = resolved_backend.get_tag(h) if isinstance(tag_result, tuple) and tag_result: maybe_tags = tag_result[0] else: maybe_tags = tag_result if isinstance(maybe_tags, list): tags_list = [ str(t).strip() for t in maybe_tags if isinstance(t, str) and str(t).strip() ] except Exception: tags_list = [] title_from_tag: Optional[str] = None try: title_tag = first_title_tag(tags_list) if title_tag and ":" in title_tag: title_from_tag = title_tag.split(":", 1)[1].strip() except Exception: title_from_tag = None title = title_from_tag or meta_obj.get("title") or meta_obj.get( "name" ) if not title and path_str: try: title = Path(path_str).stem except Exception: title = path_str ext_val = meta_obj.get("ext") or meta_obj.get("extension") if not ext_val and path_str: try: ext_val = Path(path_str).suffix except Exception: ext_val = None if not ext_val and title: try: ext_val = Path(str(title)).suffix except Exception: ext_val = None size_bytes = meta_obj.get("size") if size_bytes is None: size_bytes = meta_obj.get("size_bytes") try: size_bytes_int: Optional[int] = ( int(size_bytes) if size_bytes is not None else None ) except Exception: size_bytes_int = None payload: Dict[str, Any] = { "title": str(title or h), "hash": h, "store": resolved_backend_name, "path": path_str, "ext": self._normalize_extension(ext_val), "size_bytes": size_bytes_int, "tag": tags_list, } table.add_result(payload) results_list.append(payload) ctx.emit(payload) if found_any: table.title = command_title if refresh_mode: ctx.set_last_result_table_preserve_history( table, results_list ) else: ctx.set_last_result_table(table, results_list) db.append_worker_stdout( worker_id, json.dumps(results_list, indent=2) ) db.update_worker_status(worker_id, "completed") return 0 log("No results found", file=sys.stderr) if refresh_mode: try: table.title = command_title ctx.set_last_result_table_preserve_history(table, []) except Exception: pass db.append_worker_stdout(worker_id, json.dumps([], indent=2)) db.update_worker_status(worker_id, "completed") return 0 if backend_to_search: searched_backends.append(backend_to_search) target_backend = storage[backend_to_search] if type(target_backend).search is BaseStore.search: log( f"Backend '{backend_to_search}' does not support searching", file=sys.stderr, ) db.update_worker_status(worker_id, "error") return 1 debug(f"[search-file] Searching '{backend_to_search}'") results = target_backend.search(query, limit=limit) debug( f"[search-file] '{backend_to_search}' -> {len(results or [])} result(s)" ) else: all_results = [] for backend_name in storage.list_searchable_backends(): try: backend = storage[backend_name] searched_backends.append(backend_name) debug(f"[search-file] Searching '{backend_name}'") backend_results = backend.search( query, limit=limit - len(all_results) ) debug( f"[search-file] '{backend_name}' -> {len(backend_results or [])} result(s)" ) if backend_results: all_results.extend(backend_results) if len(all_results) >= limit: break except Exception as exc: log( f"Backend {backend_name} search failed: {exc}", file=sys.stderr ) results = all_results[:limit] if results: for item in results: def _as_dict(obj: Any) -> Dict[str, Any]: if isinstance(obj, dict): return dict(obj) if hasattr(obj, "to_dict") and callable(getattr(obj, "to_dict")): return obj.to_dict() # type: ignore[arg-type] return { "title": str(obj) } item_dict = _as_dict(item) if store_filter: store_val = str(item_dict.get("store") or "").lower() if store_filter != store_val: continue normalized = self._ensure_storage_columns(item_dict) # Make hash/store available for downstream cmdlet without rerunning search hash_val = normalized.get("hash") store_val = normalized.get("store") or item_dict.get("store") if hash_val and not normalized.get("hash"): normalized["hash"] = hash_val if store_val and not normalized.get("store"): normalized["store"] = store_val table.add_result(normalized) results_list.append(normalized) ctx.emit(normalized) table.title = command_title if refresh_mode: ctx.set_last_result_table_preserve_history(table, results_list) else: ctx.set_last_result_table(table, results_list) db.append_worker_stdout( worker_id, json.dumps(results_list, indent=2) ) else: log("No results found", file=sys.stderr) if refresh_mode: try: table.title = command_title ctx.set_last_result_table_preserve_history(table, []) except Exception: pass db.append_worker_stdout(worker_id, json.dumps([], indent=2)) db.update_worker_status(worker_id, "completed") return 0 except Exception as exc: log(f"Search failed: {exc}", file=sys.stderr) import traceback traceback.print_exc(file=sys.stderr) try: db.update_worker_status(worker_id, "error") except Exception: pass return 1 CMDLET = search_file()