"""Search-store cmdlet: Search for files in storage backends (Folder, Hydrus).""" from __future__ import annotations from typing import Any, Dict, Sequence, List, Optional, Tuple from pathlib import Path from dataclasses import dataclass, field from collections import OrderedDict import re import json import sys from helper.logger import log, debug from ._shared import Cmdlet, CmdletArg, get_origin, get_field, should_show_help import pipeline as ctx # Optional dependencies try: import mutagen # type: ignore except ImportError: # pragma: no cover mutagen = None # type: ignore try: from config import get_hydrus_url, resolve_output_dir except Exception: # pragma: no cover get_hydrus_url = None # type: ignore resolve_output_dir = None # type: ignore try: from helper.hydrus import HydrusClient, HydrusRequestError except ImportError: # pragma: no cover HydrusClient = None # type: ignore HydrusRequestError = RuntimeError # type: ignore try: from helper.utils import sha256_file except ImportError: # pragma: no cover sha256_file = None # type: ignore try: from helper.utils_constant import mime_maps except ImportError: # pragma: no cover mime_maps = {} # type: ignore @dataclass(slots=True) class SearchRecord: path: str size_bytes: int | None = None duration_seconds: str | None = None tags: str | None = None hash_hex: str | None = None def as_dict(self) -> dict[str, str]: payload: dict[str, str] = {"path": self.path} if self.size_bytes is not None: payload["size"] = str(self.size_bytes) if self.duration_seconds: payload["duration"] = self.duration_seconds if self.tags: payload["tags"] = self.tags if self.hash_hex: payload["hash"] = self.hash_hex return payload STORAGE_ORIGINS = {"local", "hydrus", "folder"} class Search_Store(Cmdlet): """Class-based search-store cmdlet for searching storage backends.""" def __init__(self) -> None: super().__init__( name="search-store", summary="Search storage backends (Folder, Hydrus) for files.", usage="search-store [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-store BACKEND]", arg=[ CmdletArg("query", description="Search query string"), CmdletArg("tag", description="Filter by tag (can be used multiple times)"), CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"), CmdletArg("type", description="Filter by type: audio, video, image, document"), CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"), CmdletArg("limit", type="integer", description="Limit results (default: 100)"), CmdletArg("store", description="Search specific storage backend (e.g., 'home', 'test', or 'default')"), ], detail=[ "Search across storage backends: Folder stores and Hydrus instances", "Use -store to search a specific backend by name", "Filter results by: tag, size, type, duration", "Results include hash for downstream commands (get-file, add-tag, etc.)", "Examples:", "search-store foo # Search all storage backends", "search-store -store home '*' # Search 'home' Hydrus instance", "search-store -store test 'video' # Search 'test' folder store", "search-store song -type audio # Search for audio files", "search-store movie -tag action # Search with tag filter", ], exec=self.run, ) self.register() # --- Helper methods ------------------------------------------------- @staticmethod def _normalize_extension(ext_value: Any) -> str: """Sanitize extension strings to alphanumerics and cap at 5 chars.""" ext = str(ext_value or "").strip().lstrip(".") for sep in (" ", "|", "(", "[", "{", ",", ";"): if sep in ext: ext = ext.split(sep, 1)[0] break if "." in ext: ext = ext.split(".")[-1] ext = "".join(ch for ch in ext if ch.isalnum()) return ext[:5] def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]: """Ensure storage results have the necessary fields for result_table display.""" store_value = str(get_origin(payload, "") or "").lower() if store_value not in STORAGE_ORIGINS: return payload # Ensure we have title field if "title" not in payload: payload["title"] = payload.get("name") or payload.get("target") or payload.get("path") or "Result" # Ensure we have ext field if "ext" not in payload: title = str(payload.get("title", "")) path_obj = Path(title) if path_obj.suffix: payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip('.')) else: payload["ext"] = payload.get("ext", "") # Ensure size_bytes is present for display (already set by search_file()) # result_table will handle formatting it # Don't create manual columns - let result_table handle display # This allows the table to respect max_columns and apply consistent formatting return payload # --- Execution ------------------------------------------------------ def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Search storage backends for files.""" if should_show_help(args): log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}") return 0 args_list = [str(arg) for arg in (args or [])] # Parse arguments query = "" tag_filters: List[str] = [] size_filter: Optional[Tuple[str, int]] = None duration_filter: Optional[Tuple[str, float]] = None type_filter: Optional[str] = None storage_backend: Optional[str] = None limit = 100 searched_backends: List[str] = [] i = 0 while i < len(args_list): arg = args_list[i] low = arg.lower() if low in {"-store", "--store", "-storage", "--storage"} and i + 1 < len(args_list): storage_backend = args_list[i + 1] i += 2 elif low in {"-tag", "--tag"} and i + 1 < len(args_list): tag_filters.append(args_list[i + 1]) i += 2 elif low in {"-limit", "--limit"} and i + 1 < len(args_list): try: limit = int(args_list[i + 1]) except ValueError: limit = 100 i += 2 elif low in {"-type", "--type"} and i + 1 < len(args_list): type_filter = args_list[i + 1].lower() i += 2 elif not arg.startswith("-"): query = f"{query} {arg}".strip() if query else arg i += 1 else: i += 1 store_filter: Optional[str] = None if query: match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE) if match: store_filter = match.group(1).strip() or None query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE) query = re.sub(r"\s{2,}", " ", query) query = query.strip().strip(',') if store_filter and not storage_backend: storage_backend = store_filter if not query: log("Provide a search query", file=sys.stderr) return 1 from helper.folder_store import FolderDB from config import get_local_storage_path import uuid worker_id = str(uuid.uuid4()) library_root = get_local_storage_path(config or {}) if not library_root: log("No library root configured", file=sys.stderr) return 1 # Use context manager to ensure database is always closed with FolderDB(library_root) as db: try: db.insert_worker( worker_id, "search-store", title=f"Search: {query}", description=f"Query: {query}", pipe=ctx.get_current_command_text() ) results_list = [] import result_table import importlib importlib.reload(result_table) from result_table import ResultTable table_title = f"Search: {query}" if storage_backend: table_title += f" [{storage_backend}]" table = ResultTable(table_title) from helper.store import FileStorage storage = FileStorage(config=config or {}) backend_to_search = storage_backend or None if backend_to_search: searched_backends.append(backend_to_search) target_backend = storage[backend_to_search] if not callable(getattr(target_backend, 'search_file', None)): log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr) db.update_worker_status(worker_id, 'error') return 1 results = target_backend.search_file(query, limit=limit) else: from helper.hydrus import is_hydrus_available hydrus_available = is_hydrus_available(config or {}) all_results = [] for backend_name in storage.list_searchable_backends(): if backend_name.startswith("hydrus") and not hydrus_available: continue searched_backends.append(backend_name) try: backend_results = storage[backend_name].search_file(query, limit=limit - len(all_results)) if backend_results: all_results.extend(backend_results) if len(all_results) >= limit: break except Exception as exc: log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr) results = all_results[:limit] def _format_storage_label(name: str) -> str: clean = str(name or "").strip() if not clean: return "Unknown" return clean.replace("_", " ").title() storage_counts: OrderedDict[str, int] = OrderedDict((name, 0) for name in searched_backends) for item in results or []: origin = get_origin(item) if not origin: continue key = str(origin).lower() if key not in storage_counts: storage_counts[key] = 0 storage_counts[key] += 1 if storage_counts or query: display_counts = OrderedDict((_format_storage_label(name), count) for name, count in storage_counts.items()) summary_line = table.set_storage_summary(display_counts, query, inline=True) if summary_line: table.title = summary_line if results: for item in results: def _as_dict(obj: Any) -> Dict[str, Any]: if isinstance(obj, dict): return dict(obj) if hasattr(obj, "to_dict") and callable(getattr(obj, "to_dict")): return obj.to_dict() # type: ignore[arg-type] return {"title": str(obj)} item_dict = _as_dict(item) if store_filter: origin_val = str(get_origin(item_dict) or "").lower() if store_filter != origin_val: continue normalized = self._ensure_storage_columns(item_dict) # Make hash/store available for downstream cmdlets without rerunning search hash_val = normalized.get("hash") store_val = normalized.get("store") or get_origin(item_dict) if hash_val and not normalized.get("hash"): normalized["hash"] = hash_val if store_val and not normalized.get("store"): normalized["store"] = store_val table.add_result(normalized) results_list.append(normalized) ctx.emit(normalized) # Debug: Verify table rows match items list debug(f"[search-store] Added {len(table.rows)} rows to table, {len(results_list)} items to results_list") if len(table.rows) != len(results_list): debug(f"[search-store] WARNING: Table/items mismatch! rows={len(table.rows)} items={len(results_list)}", file=sys.stderr) ctx.set_last_result_table(table, results_list) db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2)) else: log("No results found", file=sys.stderr) db.append_worker_stdout(worker_id, json.dumps([], indent=2)) db.update_worker_status(worker_id, 'completed') return 0 except Exception as exc: log(f"Search failed: {exc}", file=sys.stderr) import traceback traceback.print_exc(file=sys.stderr) try: db.update_worker_status(worker_id, 'error') except Exception: pass return 1 CMDLET = Search_Store()