AST
This commit is contained in:
351
cmdlets/search_file.py
Normal file
351
cmdlets/search_file.py
Normal file
@@ -0,0 +1,351 @@
|
||||
"""Search-file cmdlet: Search for files by query, tags, size, type, duration, etc."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence, List, Optional, Tuple, Callable
|
||||
from fnmatch import fnmatchcase
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
import json
|
||||
import os
|
||||
import sys
|
||||
|
||||
from helper.logger import log, debug
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
from helper.file_storage import FileStorage
|
||||
from helper.search_provider import get_provider, list_providers, SearchResult
|
||||
from metadata import import_pending_sidecars
|
||||
|
||||
from . import register
|
||||
from ._shared import Cmdlet, CmdletArg
|
||||
import models
|
||||
import pipeline as ctx
|
||||
|
||||
# Optional dependencies
|
||||
try:
|
||||
import mutagen # type: ignore
|
||||
except ImportError: # pragma: no cover
|
||||
mutagen = None # type: ignore
|
||||
|
||||
try:
|
||||
from config import get_hydrus_url, resolve_output_dir
|
||||
except Exception: # pragma: no cover
|
||||
get_hydrus_url = None # type: ignore
|
||||
resolve_output_dir = None # type: ignore
|
||||
|
||||
try:
|
||||
from helper.hydrus import HydrusClient, HydrusRequestError
|
||||
except ImportError: # pragma: no cover
|
||||
HydrusClient = None # type: ignore
|
||||
HydrusRequestError = RuntimeError # type: ignore
|
||||
|
||||
try:
|
||||
from helper.utils import sha256_file
|
||||
except ImportError: # pragma: no cover
|
||||
sha256_file = None # type: ignore
|
||||
|
||||
try:
|
||||
from helper.utils_constant import mime_maps
|
||||
except ImportError: # pragma: no cover
|
||||
mime_maps = {} # type: ignore
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# Data Classes (from helper/search.py)
|
||||
# ============================================================================
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SearchRecord:
|
||||
path: str
|
||||
size_bytes: int | None = None
|
||||
duration_seconds: str | None = None
|
||||
tags: str | None = None
|
||||
hash_hex: str | None = None
|
||||
|
||||
def as_dict(self) -> dict[str, str]:
|
||||
payload: dict[str, str] = {"path": self.path}
|
||||
if self.size_bytes is not None:
|
||||
payload["size"] = str(self.size_bytes)
|
||||
if self.duration_seconds:
|
||||
payload["duration"] = self.duration_seconds
|
||||
if self.tags:
|
||||
payload["tags"] = self.tags
|
||||
if self.hash_hex:
|
||||
payload["hash"] = self.hash_hex
|
||||
return payload
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResultItem:
|
||||
origin: str
|
||||
title: str
|
||||
detail: str
|
||||
annotations: List[str]
|
||||
target: str
|
||||
media_kind: str = "other"
|
||||
hash_hex: Optional[str] = None
|
||||
columns: List[tuple[str, str]] = field(default_factory=list)
|
||||
tag_summary: Optional[str] = None
|
||||
duration_seconds: Optional[float] = None
|
||||
size_bytes: Optional[int] = None
|
||||
full_metadata: Optional[Dict[str, Any]] = None
|
||||
tags: Optional[set[str]] = field(default_factory=set)
|
||||
relationships: Optional[List[str]] = field(default_factory=list)
|
||||
known_urls: Optional[List[str]] = field(default_factory=list)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
payload: Dict[str, Any] = {
|
||||
"title": self.title,
|
||||
}
|
||||
|
||||
# Always include these core fields for downstream cmdlets (get-file, download-data, etc)
|
||||
payload["origin"] = self.origin
|
||||
payload["target"] = self.target
|
||||
payload["media_kind"] = self.media_kind
|
||||
|
||||
# Always include full_metadata if present (needed by download-data, etc)
|
||||
# This is NOT for display, but for downstream processing
|
||||
if self.full_metadata:
|
||||
payload["full_metadata"] = self.full_metadata
|
||||
|
||||
# Include columns if defined (result renderer will use these for display)
|
||||
if self.columns:
|
||||
payload["columns"] = list(self.columns)
|
||||
else:
|
||||
# If no columns, include the detail for backwards compatibility
|
||||
payload["detail"] = self.detail
|
||||
payload["annotations"] = list(self.annotations)
|
||||
|
||||
# Include optional fields
|
||||
if self.hash_hex:
|
||||
payload["hash"] = self.hash_hex
|
||||
if self.tag_summary:
|
||||
payload["tags"] = self.tag_summary
|
||||
if self.tags:
|
||||
payload["tags_set"] = list(self.tags)
|
||||
if self.relationships:
|
||||
payload["relationships"] = self.relationships
|
||||
if self.known_urls:
|
||||
payload["known_urls"] = self.known_urls
|
||||
return payload
|
||||
|
||||
|
||||
STORAGE_ORIGINS = {"local", "hydrus", "debrid"}
|
||||
|
||||
|
||||
def _ensure_storage_columns(payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Attach Title/Store columns for storage-origin results to keep CLI display compact."""
|
||||
origin_value = str(payload.get("origin") or payload.get("source") or "").lower()
|
||||
if origin_value not in STORAGE_ORIGINS:
|
||||
return payload
|
||||
title = payload.get("title") or payload.get("name") or payload.get("target") or payload.get("path") or "Result"
|
||||
store_label = payload.get("origin") or payload.get("source") or origin_value
|
||||
normalized = dict(payload)
|
||||
normalized["columns"] = [("Title", str(title)), ("Store", str(store_label))]
|
||||
return normalized
|
||||
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="search-file",
|
||||
summary="Unified search cmdlet for searchable backends (Hydrus, Local, Debrid, LibGen, OpenLibrary, Soulseek).",
|
||||
usage="search-file [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-storage BACKEND] [-provider PROVIDER]",
|
||||
args=[
|
||||
CmdletArg("query", description="Search query string"),
|
||||
CmdletArg("tag", description="Filter by tag (can be used multiple times)"),
|
||||
CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"),
|
||||
CmdletArg("type", description="Filter by type: audio, video, image, document"),
|
||||
CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"),
|
||||
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
|
||||
CmdletArg("storage", description="Search storage backend: hydrus, local, debrid (default: all searchable)"),
|
||||
CmdletArg("provider", description="Search provider: libgen, openlibrary, soulseek, debrid, local (overrides -storage)"),
|
||||
],
|
||||
details=[
|
||||
"Search across multiple providers: File storage (Hydrus, Local, Debrid), Books (LibGen, OpenLibrary), Music (Soulseek)",
|
||||
"Use -provider to search a specific source, or -storage to search file backends",
|
||||
"Filter results by: tag, size, type, duration",
|
||||
"Results can be piped to other commands",
|
||||
"Examples:",
|
||||
"search-file foo # Search all file backends",
|
||||
"search-file -provider libgen 'python programming' # Search LibGen books",
|
||||
"search-file -provider debrid 'movie' # Search AllDebrid magnets",
|
||||
"search-file 'music' -provider soulseek # Search Soulseek P2P",
|
||||
"search-file -provider openlibrary 'tolkien' # Search OpenLibrary",
|
||||
"search-file song -storage hydrus -type audio # Search only Hydrus audio",
|
||||
"search-file movie -tag action -provider debrid # Debrid with filters",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@register(["search-file", "search"])
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Search across multiple providers: Hydrus, Local, Debrid, LibGen, etc."""
|
||||
args_list = [str(arg) for arg in (args or [])]
|
||||
|
||||
# Parse arguments
|
||||
query = ""
|
||||
tag_filters: List[str] = []
|
||||
size_filter: Optional[Tuple[str, int]] = None
|
||||
duration_filter: Optional[Tuple[str, float]] = None
|
||||
type_filter: Optional[str] = None
|
||||
storage_backend: Optional[str] = None
|
||||
provider_name: Optional[str] = None
|
||||
limit = 100
|
||||
|
||||
# Simple argument parsing
|
||||
i = 0
|
||||
while i < len(args_list):
|
||||
arg = args_list[i]
|
||||
low = arg.lower()
|
||||
|
||||
if low in {"-provider", "--provider"} and i + 1 < len(args_list):
|
||||
provider_name = args_list[i + 1].lower()
|
||||
i += 2
|
||||
elif low in {"-storage", "--storage"} and i + 1 < len(args_list):
|
||||
storage_backend = args_list[i + 1].lower()
|
||||
i += 2
|
||||
elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
|
||||
tag_filters.append(args_list[i + 1])
|
||||
i += 2
|
||||
elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
|
||||
try:
|
||||
limit = int(args_list[i + 1])
|
||||
except ValueError:
|
||||
limit = 100
|
||||
i += 2
|
||||
elif low in {"-type", "--type"} and i + 1 < len(args_list):
|
||||
type_filter = args_list[i + 1].lower()
|
||||
i += 2
|
||||
elif not query and not arg.startswith("-"):
|
||||
query = arg
|
||||
i += 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
if not query:
|
||||
log("Provide a search query", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Initialize worker for this search command
|
||||
from helper.local_library import LocalLibraryDB
|
||||
from config import get_local_storage_path
|
||||
import uuid
|
||||
worker_id = str(uuid.uuid4())
|
||||
library_root = get_local_storage_path(config or {})
|
||||
if not library_root:
|
||||
log("No library root configured", file=sys.stderr)
|
||||
return 1
|
||||
db = LocalLibraryDB(library_root)
|
||||
db.insert_worker(
|
||||
worker_id,
|
||||
"search",
|
||||
title=f"Search: {query}",
|
||||
description=f"Query: {query}",
|
||||
pipe=ctx.get_current_command_text()
|
||||
)
|
||||
|
||||
try:
|
||||
results_list = []
|
||||
|
||||
# Try to search using provider (libgen, soulseek, debrid, openlibrary)
|
||||
if provider_name:
|
||||
debug(f"[search_file] Attempting provider search with: {provider_name}")
|
||||
provider = get_provider(provider_name, config)
|
||||
if not provider:
|
||||
log(f"Provider '{provider_name}' not available", file=sys.stderr)
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
return 1
|
||||
|
||||
debug(f"[search_file] Provider loaded, calling search with query: {query}")
|
||||
search_result = provider.search(query, limit=limit)
|
||||
debug(f"[search_file] Provider search returned {len(search_result)} results")
|
||||
|
||||
for item in search_result:
|
||||
item_dict = item.to_dict()
|
||||
results_list.append(item_dict)
|
||||
ctx.emit(item_dict)
|
||||
|
||||
debug(f"[search_file] Emitted {len(results_list)} results")
|
||||
|
||||
# Write results to worker stdout
|
||||
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
|
||||
db.update_worker_status(worker_id, 'completed')
|
||||
return 0
|
||||
|
||||
# Otherwise search using FileStorage (Hydrus, Local, Debrid backends)
|
||||
from helper.file_storage import FileStorage
|
||||
storage = FileStorage(config=config or {})
|
||||
|
||||
backend_to_search = storage_backend or None
|
||||
if backend_to_search:
|
||||
# Check if requested backend is available
|
||||
if backend_to_search == "hydrus":
|
||||
from helper.hydrus import is_hydrus_available
|
||||
if not is_hydrus_available(config or {}):
|
||||
log(f"Backend 'hydrus' is not available (Hydrus service not running)", file=sys.stderr)
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
return 1
|
||||
if not storage.supports_search(backend_to_search):
|
||||
log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
return 1
|
||||
results = storage[backend_to_search].search(query, limit=limit)
|
||||
else:
|
||||
# Search all searchable backends, but skip hydrus if unavailable
|
||||
from helper.hydrus import is_hydrus_available
|
||||
hydrus_available = is_hydrus_available(config or {})
|
||||
|
||||
all_results = []
|
||||
for backend_name in storage.list_searchable_backends():
|
||||
# Skip hydrus if not available
|
||||
if backend_name == "hydrus" and not hydrus_available:
|
||||
continue
|
||||
try:
|
||||
backend_results = storage[backend_name].search(query, limit=limit - len(all_results))
|
||||
if backend_results:
|
||||
all_results.extend(backend_results)
|
||||
if len(all_results) >= limit:
|
||||
break
|
||||
except Exception as exc:
|
||||
log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr)
|
||||
results = all_results[:limit]
|
||||
|
||||
# Emit results and collect for workers table
|
||||
if results:
|
||||
for item in results:
|
||||
if isinstance(item, dict):
|
||||
normalized = _ensure_storage_columns(item)
|
||||
results_list.append(normalized)
|
||||
ctx.emit(normalized)
|
||||
elif isinstance(item, ResultItem):
|
||||
item_dict = item.to_dict()
|
||||
results_list.append(item_dict)
|
||||
ctx.emit(item_dict)
|
||||
else:
|
||||
item_dict = {"title": str(item)}
|
||||
results_list.append(item_dict)
|
||||
ctx.emit(item_dict)
|
||||
|
||||
# Write results to worker stdout
|
||||
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
|
||||
else:
|
||||
log("No results found", file=sys.stderr)
|
||||
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
|
||||
|
||||
db.update_worker_status(worker_id, 'completed')
|
||||
return 0
|
||||
|
||||
except Exception as exc:
|
||||
log(f"Search failed: {exc}", file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
return 1
|
||||
|
||||
finally:
|
||||
# Always close the database connection
|
||||
try:
|
||||
db.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
Reference in New Issue
Block a user