Files
Medios-Macina/cmdlet/search_store.py

346 lines
14 KiB
Python
Raw Normal View History

2025-12-11 12:47:30 -08:00
"""Search-store cmdlet: Search for files in storage backends (Folder, Hydrus)."""
from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional, Tuple
from pathlib import Path
from dataclasses import dataclass, field
from collections import OrderedDict
import re
import json
import sys
2025-12-11 19:04:02 -08:00
from SYS.logger import log, debug
2025-12-11 12:47:30 -08:00
2025-12-11 19:04:02 -08:00
from ._shared import Cmdlet, CmdletArg, get_field, should_show_help
2025-12-11 12:47:30 -08:00
import pipeline as ctx
# Optional dependencies
try:
import mutagen # type: ignore
except ImportError: # pragma: no cover
mutagen = None # type: ignore
try:
from config import get_hydrus_url, resolve_output_dir
except Exception: # pragma: no cover
get_hydrus_url = None # type: ignore
resolve_output_dir = None # type: ignore
try:
2025-12-11 23:21:45 -08:00
from API.HydrusNetwork import HydrusNetwork, HydrusRequestError
2025-12-11 12:47:30 -08:00
except ImportError: # pragma: no cover
2025-12-11 23:21:45 -08:00
HydrusNetwork = None # type: ignore
2025-12-11 12:47:30 -08:00
HydrusRequestError = RuntimeError # type: ignore
try:
2025-12-11 19:04:02 -08:00
from SYS.utils import sha256_file
2025-12-11 12:47:30 -08:00
except ImportError: # pragma: no cover
sha256_file = None # type: ignore
try:
2025-12-11 19:04:02 -08:00
from SYS.utils_constant import mime_maps
2025-12-11 12:47:30 -08:00
except ImportError: # pragma: no cover
mime_maps = {} # type: ignore
@dataclass(slots=True)
class SearchRecord:
path: str
size_bytes: int | None = None
duration_seconds: str | None = None
2025-12-11 23:21:45 -08:00
tag: str | None = None
2025-12-11 19:04:02 -08:00
hash: str | None = None
2025-12-11 12:47:30 -08:00
def as_dict(self) -> dict[str, str]:
payload: dict[str, str] = {"path": self.path}
if self.size_bytes is not None:
payload["size"] = str(self.size_bytes)
if self.duration_seconds:
payload["duration"] = self.duration_seconds
2025-12-11 23:21:45 -08:00
if self.tag:
payload["tag"] = self.tag
2025-12-11 19:04:02 -08:00
if self.hash:
payload["hash"] = self.hash
2025-12-11 12:47:30 -08:00
return payload
STORAGE_ORIGINS = {"local", "hydrus", "folder"}
class Search_Store(Cmdlet):
"""Class-based search-store cmdlet for searching storage backends."""
def __init__(self) -> None:
super().__init__(
name="search-store",
summary="Search storage backends (Folder, Hydrus) for files.",
usage="search-store [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-store BACKEND]",
arg=[
CmdletArg("query", description="Search query string"),
CmdletArg("tag", description="Filter by tag (can be used multiple times)"),
CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"),
CmdletArg("type", description="Filter by type: audio, video, image, document"),
CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"),
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
CmdletArg("store", description="Search specific storage backend (e.g., 'home', 'test', or 'default')"),
],
detail=[
"Search across storage backends: Folder stores and Hydrus instances",
"Use -store to search a specific backend by name",
"Filter results by: tag, size, type, duration",
"Results include hash for downstream commands (get-file, add-tag, etc.)",
"Examples:",
"search-store foo # Search all storage backends",
"search-store -store home '*' # Search 'home' Hydrus instance",
"search-store -store test 'video' # Search 'test' folder store",
"search-store song -type audio # Search for audio files",
"search-store movie -tag action # Search with tag filter",
],
exec=self.run,
)
self.register()
# --- Helper methods -------------------------------------------------
@staticmethod
def _normalize_extension(ext_value: Any) -> str:
"""Sanitize extension strings to alphanumerics and cap at 5 chars."""
ext = str(ext_value or "").strip().lstrip(".")
for sep in (" ", "|", "(", "[", "{", ",", ";"):
if sep in ext:
ext = ext.split(sep, 1)[0]
break
if "." in ext:
ext = ext.split(".")[-1]
ext = "".join(ch for ch in ext if ch.isalnum())
return ext[:5]
def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]:
"""Ensure storage results have the necessary fields for result_table display."""
2025-12-11 19:04:02 -08:00
store_value = str(payload.get("store") or "").lower()
2025-12-11 12:47:30 -08:00
if store_value not in STORAGE_ORIGINS:
return payload
# Ensure we have title field
if "title" not in payload:
payload["title"] = payload.get("name") or payload.get("target") or payload.get("path") or "Result"
# Ensure we have ext field
if "ext" not in payload:
title = str(payload.get("title", ""))
path_obj = Path(title)
if path_obj.suffix:
payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip('.'))
else:
payload["ext"] = payload.get("ext", "")
# Ensure size_bytes is present for display (already set by search_file())
# result_table will handle formatting it
# Don't create manual columns - let result_table handle display
# This allows the table to respect max_columns and apply consistent formatting
return payload
# --- Execution ------------------------------------------------------
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Search storage backends for files."""
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
args_list = [str(arg) for arg in (args or [])]
# Parse arguments
query = ""
tag_filters: List[str] = []
size_filter: Optional[Tuple[str, int]] = None
duration_filter: Optional[Tuple[str, float]] = None
type_filter: Optional[str] = None
storage_backend: Optional[str] = None
limit = 100
searched_backends: List[str] = []
i = 0
while i < len(args_list):
arg = args_list[i]
low = arg.lower()
2025-12-11 19:04:02 -08:00
if low in {"-store", "--store"} and i + 1 < len(args_list):
2025-12-11 12:47:30 -08:00
storage_backend = args_list[i + 1]
i += 2
elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
tag_filters.append(args_list[i + 1])
i += 2
elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
try:
limit = int(args_list[i + 1])
except ValueError:
limit = 100
i += 2
elif low in {"-type", "--type"} and i + 1 < len(args_list):
type_filter = args_list[i + 1].lower()
i += 2
elif not arg.startswith("-"):
query = f"{query} {arg}".strip() if query else arg
i += 1
else:
i += 1
store_filter: Optional[str] = None
if query:
match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE)
if match:
store_filter = match.group(1).strip() or None
query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE)
query = re.sub(r"\s{2,}", " ", query)
query = query.strip().strip(',')
if store_filter and not storage_backend:
storage_backend = store_filter
if not query:
log("Provide a search query", file=sys.stderr)
return 1
2025-12-11 19:04:02 -08:00
from API.folder import API_folder_store
2025-12-11 12:47:30 -08:00
from config import get_local_storage_path
import uuid
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(config or {})
if not library_root:
log("No library root configured", file=sys.stderr)
return 1
# Use context manager to ensure database is always closed
2025-12-11 19:04:02 -08:00
with API_folder_store(library_root) as db:
2025-12-11 12:47:30 -08:00
try:
db.insert_worker(
worker_id,
"search-store",
title=f"Search: {query}",
description=f"Query: {query}",
pipe=ctx.get_current_command_text()
)
results_list = []
import result_table
import importlib
importlib.reload(result_table)
from result_table import ResultTable
table_title = f"Search: {query}"
if storage_backend:
table_title += f" [{storage_backend}]"
table = ResultTable(table_title)
2025-12-11 19:04:02 -08:00
from Store import Store
storage = Store(config=config or {})
2025-12-11 23:21:45 -08:00
from Store._base import Store as BaseStore
2025-12-11 12:47:30 -08:00
backend_to_search = storage_backend or None
if backend_to_search:
searched_backends.append(backend_to_search)
target_backend = storage[backend_to_search]
2025-12-11 23:21:45 -08:00
if type(target_backend).search is BaseStore.search:
2025-12-11 12:47:30 -08:00
log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
db.update_worker_status(worker_id, 'error')
return 1
2025-12-11 23:21:45 -08:00
results = target_backend.search(query, limit=limit)
2025-12-11 12:47:30 -08:00
else:
2025-12-11 19:04:02 -08:00
from API.HydrusNetwork import is_hydrus_available
2025-12-11 12:47:30 -08:00
hydrus_available = is_hydrus_available(config or {})
2025-12-11 19:04:02 -08:00
from Store.HydrusNetwork import HydrusNetwork
2025-12-11 12:47:30 -08:00
all_results = []
for backend_name in storage.list_searchable_backends():
try:
2025-12-11 19:04:02 -08:00
backend = storage[backend_name]
if isinstance(backend, HydrusNetwork) and not hydrus_available:
continue
searched_backends.append(backend_name)
2025-12-11 23:21:45 -08:00
backend_results = backend.search(query, limit=limit - len(all_results))
2025-12-11 12:47:30 -08:00
if backend_results:
all_results.extend(backend_results)
if len(all_results) >= limit:
break
except Exception as exc:
log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr)
results = all_results[:limit]
def _format_storage_label(name: str) -> str:
clean = str(name or "").strip()
if not clean:
return "Unknown"
return clean.replace("_", " ").title()
storage_counts: OrderedDict[str, int] = OrderedDict((name, 0) for name in searched_backends)
for item in results or []:
2025-12-11 19:04:02 -08:00
store = get_field(item, "store")
if not store:
2025-12-11 12:47:30 -08:00
continue
2025-12-11 19:04:02 -08:00
key = str(store).lower()
2025-12-11 12:47:30 -08:00
if key not in storage_counts:
storage_counts[key] = 0
storage_counts[key] += 1
if storage_counts or query:
display_counts = OrderedDict((_format_storage_label(name), count) for name, count in storage_counts.items())
summary_line = table.set_storage_summary(display_counts, query, inline=True)
if summary_line:
table.title = summary_line
if results:
for item in results:
def _as_dict(obj: Any) -> Dict[str, Any]:
if isinstance(obj, dict):
return dict(obj)
if hasattr(obj, "to_dict") and callable(getattr(obj, "to_dict")):
return obj.to_dict() # type: ignore[arg-type]
return {"title": str(obj)}
item_dict = _as_dict(item)
if store_filter:
2025-12-11 19:04:02 -08:00
store_val = str(item_dict.get("store") or "").lower()
if store_filter != store_val:
2025-12-11 12:47:30 -08:00
continue
normalized = self._ensure_storage_columns(item_dict)
2025-12-12 21:55:38 -08:00
# Make hash/store available for downstream cmdlet without rerunning search
2025-12-11 12:47:30 -08:00
hash_val = normalized.get("hash")
2025-12-11 19:04:02 -08:00
store_val = normalized.get("store") or item_dict.get("store")
2025-12-11 12:47:30 -08:00
if hash_val and not normalized.get("hash"):
normalized["hash"] = hash_val
if store_val and not normalized.get("store"):
normalized["store"] = store_val
table.add_result(normalized)
results_list.append(normalized)
ctx.emit(normalized)
# Debug: Verify table rows match items list
debug(f"[search-store] Added {len(table.rows)} rows to table, {len(results_list)} items to results_list")
if len(table.rows) != len(results_list):
debug(f"[search-store] WARNING: Table/items mismatch! rows={len(table.rows)} items={len(results_list)}", file=sys.stderr)
ctx.set_last_result_table(table, results_list)
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
else:
log("No results found", file=sys.stderr)
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')
return 0
except Exception as exc:
log(f"Search failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
try:
db.update_worker_status(worker_id, 'error')
except Exception:
pass
return 1
CMDLET = Search_Store()