Files
Medios-Macina/cmdlets/search_file.py
2025-11-25 20:09:33 -08:00

352 lines
12 KiB
Python

"""Search-file cmdlet: Search for files by query, tags, size, type, duration, etc."""
from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional, Tuple, Callable
from fnmatch import fnmatchcase
from pathlib import Path
from dataclasses import dataclass, field
import json
import os
import sys
from helper.logger import log, debug
import shutil
import subprocess
from helper.file_storage import FileStorage
from helper.search_provider import get_provider, list_providers, SearchResult
from metadata import import_pending_sidecars
from . import register
from ._shared import Cmdlet, CmdletArg
import models
import pipeline as ctx
# Optional dependencies
try:
import mutagen # type: ignore
except ImportError: # pragma: no cover
mutagen = None # type: ignore
try:
from config import get_hydrus_url, resolve_output_dir
except Exception: # pragma: no cover
get_hydrus_url = None # type: ignore
resolve_output_dir = None # type: ignore
try:
from helper.hydrus import HydrusClient, HydrusRequestError
except ImportError: # pragma: no cover
HydrusClient = None # type: ignore
HydrusRequestError = RuntimeError # type: ignore
try:
from helper.utils import sha256_file
except ImportError: # pragma: no cover
sha256_file = None # type: ignore
try:
from helper.utils_constant import mime_maps
except ImportError: # pragma: no cover
mime_maps = {} # type: ignore
# ============================================================================
# Data Classes (from helper/search.py)
# ============================================================================
@dataclass(slots=True)
class SearchRecord:
path: str
size_bytes: int | None = None
duration_seconds: str | None = None
tags: str | None = None
hash_hex: str | None = None
def as_dict(self) -> dict[str, str]:
payload: dict[str, str] = {"path": self.path}
if self.size_bytes is not None:
payload["size"] = str(self.size_bytes)
if self.duration_seconds:
payload["duration"] = self.duration_seconds
if self.tags:
payload["tags"] = self.tags
if self.hash_hex:
payload["hash"] = self.hash_hex
return payload
@dataclass
class ResultItem:
origin: str
title: str
detail: str
annotations: List[str]
target: str
media_kind: str = "other"
hash_hex: Optional[str] = None
columns: List[tuple[str, str]] = field(default_factory=list)
tag_summary: Optional[str] = None
duration_seconds: Optional[float] = None
size_bytes: Optional[int] = None
full_metadata: Optional[Dict[str, Any]] = None
tags: Optional[set[str]] = field(default_factory=set)
relationships: Optional[List[str]] = field(default_factory=list)
known_urls: Optional[List[str]] = field(default_factory=list)
def to_dict(self) -> Dict[str, Any]:
payload: Dict[str, Any] = {
"title": self.title,
}
# Always include these core fields for downstream cmdlets (get-file, download-data, etc)
payload["origin"] = self.origin
payload["target"] = self.target
payload["media_kind"] = self.media_kind
# Always include full_metadata if present (needed by download-data, etc)
# This is NOT for display, but for downstream processing
if self.full_metadata:
payload["full_metadata"] = self.full_metadata
# Include columns if defined (result renderer will use these for display)
if self.columns:
payload["columns"] = list(self.columns)
else:
# If no columns, include the detail for backwards compatibility
payload["detail"] = self.detail
payload["annotations"] = list(self.annotations)
# Include optional fields
if self.hash_hex:
payload["hash"] = self.hash_hex
if self.tag_summary:
payload["tags"] = self.tag_summary
if self.tags:
payload["tags_set"] = list(self.tags)
if self.relationships:
payload["relationships"] = self.relationships
if self.known_urls:
payload["known_urls"] = self.known_urls
return payload
STORAGE_ORIGINS = {"local", "hydrus", "debrid"}
def _ensure_storage_columns(payload: Dict[str, Any]) -> Dict[str, Any]:
"""Attach Title/Store columns for storage-origin results to keep CLI display compact."""
origin_value = str(payload.get("origin") or payload.get("source") or "").lower()
if origin_value not in STORAGE_ORIGINS:
return payload
title = payload.get("title") or payload.get("name") or payload.get("target") or payload.get("path") or "Result"
store_label = payload.get("origin") or payload.get("source") or origin_value
normalized = dict(payload)
normalized["columns"] = [("Title", str(title)), ("Store", str(store_label))]
return normalized
CMDLET = Cmdlet(
name="search-file",
summary="Unified search cmdlet for searchable backends (Hydrus, Local, Debrid, LibGen, OpenLibrary, Soulseek).",
usage="search-file [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-storage BACKEND] [-provider PROVIDER]",
args=[
CmdletArg("query", description="Search query string"),
CmdletArg("tag", description="Filter by tag (can be used multiple times)"),
CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"),
CmdletArg("type", description="Filter by type: audio, video, image, document"),
CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"),
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
CmdletArg("storage", description="Search storage backend: hydrus, local, debrid (default: all searchable)"),
CmdletArg("provider", description="Search provider: libgen, openlibrary, soulseek, debrid, local (overrides -storage)"),
],
details=[
"Search across multiple providers: File storage (Hydrus, Local, Debrid), Books (LibGen, OpenLibrary), Music (Soulseek)",
"Use -provider to search a specific source, or -storage to search file backends",
"Filter results by: tag, size, type, duration",
"Results can be piped to other commands",
"Examples:",
"search-file foo # Search all file backends",
"search-file -provider libgen 'python programming' # Search LibGen books",
"search-file -provider debrid 'movie' # Search AllDebrid magnets",
"search-file 'music' -provider soulseek # Search Soulseek P2P",
"search-file -provider openlibrary 'tolkien' # Search OpenLibrary",
"search-file song -storage hydrus -type audio # Search only Hydrus audio",
"search-file movie -tag action -provider debrid # Debrid with filters",
],
)
@register(["search-file", "search"])
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Search across multiple providers: Hydrus, Local, Debrid, LibGen, etc."""
args_list = [str(arg) for arg in (args or [])]
# Parse arguments
query = ""
tag_filters: List[str] = []
size_filter: Optional[Tuple[str, int]] = None
duration_filter: Optional[Tuple[str, float]] = None
type_filter: Optional[str] = None
storage_backend: Optional[str] = None
provider_name: Optional[str] = None
limit = 100
# Simple argument parsing
i = 0
while i < len(args_list):
arg = args_list[i]
low = arg.lower()
if low in {"-provider", "--provider"} and i + 1 < len(args_list):
provider_name = args_list[i + 1].lower()
i += 2
elif low in {"-storage", "--storage"} and i + 1 < len(args_list):
storage_backend = args_list[i + 1].lower()
i += 2
elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
tag_filters.append(args_list[i + 1])
i += 2
elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
try:
limit = int(args_list[i + 1])
except ValueError:
limit = 100
i += 2
elif low in {"-type", "--type"} and i + 1 < len(args_list):
type_filter = args_list[i + 1].lower()
i += 2
elif not query and not arg.startswith("-"):
query = arg
i += 1
else:
i += 1
if not query:
log("Provide a search query", file=sys.stderr)
return 1
# Initialize worker for this search command
from helper.local_library import LocalLibraryDB
from config import get_local_storage_path
import uuid
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(config or {})
if not library_root:
log("No library root configured", file=sys.stderr)
return 1
db = LocalLibraryDB(library_root)
db.insert_worker(
worker_id,
"search",
title=f"Search: {query}",
description=f"Query: {query}",
pipe=ctx.get_current_command_text()
)
try:
results_list = []
# Try to search using provider (libgen, soulseek, debrid, openlibrary)
if provider_name:
debug(f"[search_file] Attempting provider search with: {provider_name}")
provider = get_provider(provider_name, config)
if not provider:
log(f"Provider '{provider_name}' not available", file=sys.stderr)
db.update_worker_status(worker_id, 'error')
return 1
debug(f"[search_file] Provider loaded, calling search with query: {query}")
search_result = provider.search(query, limit=limit)
debug(f"[search_file] Provider search returned {len(search_result)} results")
for item in search_result:
item_dict = item.to_dict()
results_list.append(item_dict)
ctx.emit(item_dict)
debug(f"[search_file] Emitted {len(results_list)} results")
# Write results to worker stdout
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, 'completed')
return 0
# Otherwise search using FileStorage (Hydrus, Local, Debrid backends)
from helper.file_storage import FileStorage
storage = FileStorage(config=config or {})
backend_to_search = storage_backend or None
if backend_to_search:
# Check if requested backend is available
if backend_to_search == "hydrus":
from helper.hydrus import is_hydrus_available
if not is_hydrus_available(config or {}):
log(f"Backend 'hydrus' is not available (Hydrus service not running)", file=sys.stderr)
db.update_worker_status(worker_id, 'error')
return 1
if not storage.supports_search(backend_to_search):
log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
db.update_worker_status(worker_id, 'error')
return 1
results = storage[backend_to_search].search(query, limit=limit)
else:
# Search all searchable backends, but skip hydrus if unavailable
from helper.hydrus import is_hydrus_available
hydrus_available = is_hydrus_available(config or {})
all_results = []
for backend_name in storage.list_searchable_backends():
# Skip hydrus if not available
if backend_name == "hydrus" and not hydrus_available:
continue
try:
backend_results = storage[backend_name].search(query, limit=limit - len(all_results))
if backend_results:
all_results.extend(backend_results)
if len(all_results) >= limit:
break
except Exception as exc:
log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr)
results = all_results[:limit]
# Emit results and collect for workers table
if results:
for item in results:
if isinstance(item, dict):
normalized = _ensure_storage_columns(item)
results_list.append(normalized)
ctx.emit(normalized)
elif isinstance(item, ResultItem):
item_dict = item.to_dict()
results_list.append(item_dict)
ctx.emit(item_dict)
else:
item_dict = {"title": str(item)}
results_list.append(item_dict)
ctx.emit(item_dict)
# Write results to worker stdout
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
else:
log("No results found", file=sys.stderr)
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')
return 0
except Exception as exc:
log(f"Search failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
db.update_worker_status(worker_id, 'error')
return 1
finally:
# Always close the database connection
try:
db.close()
except Exception:
pass