Files
Medios-Macina/cmdlet/search_store.py

452 lines
20 KiB
Python
Raw Normal View History

2025-12-11 12:47:30 -08:00
"""Search-store cmdlet: Search for files in storage backends (Folder, Hydrus)."""
from __future__ import annotations
2025-12-16 01:45:01 -08:00
from typing import Any, Dict, Sequence, List, Optional
2025-12-11 12:47:30 -08:00
from pathlib import Path
from collections import OrderedDict
import re
import json
import sys
2025-12-11 19:04:02 -08:00
from SYS.logger import log, debug
2025-12-11 12:47:30 -08:00
2025-12-16 01:45:01 -08:00
from ._shared import Cmdlet, CmdletArg, SharedArgs, get_field, should_show_help, normalize_hash, first_title_tag
2025-12-11 12:47:30 -08:00
import pipeline as ctx
STORAGE_ORIGINS = {"local", "hydrus", "folder"}
class Search_Store(Cmdlet):
"""Class-based search-store cmdlet for searching storage backends."""
def __init__(self) -> None:
super().__init__(
name="search-store",
summary="Search storage backends (Folder, Hydrus) for files.",
2025-12-16 01:45:01 -08:00
usage="search-store [query] [-store BACKEND] [-limit N]",
2025-12-11 12:47:30 -08:00
arg=[
CmdletArg("query", description="Search query string"),
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
2025-12-16 01:45:01 -08:00
SharedArgs.STORE,
2025-12-11 12:47:30 -08:00
],
detail=[
"Search across storage backends: Folder stores and Hydrus instances",
"Use -store to search a specific backend by name",
2025-12-14 00:53:52 -08:00
"URL search: url:* (any URL) or url:<value> (URL substring)",
2025-12-11 12:47:30 -08:00
"Results include hash for downstream commands (get-file, add-tag, etc.)",
"Examples:",
"search-store foo # Search all storage backends",
"search-store -store home '*' # Search 'home' Hydrus instance",
"search-store -store test 'video' # Search 'test' folder store",
2025-12-14 00:53:52 -08:00
"search-store 'url:*' # Files that have any URL",
"search-store 'url:youtube.com' # Files whose URL contains substring",
2025-12-11 12:47:30 -08:00
],
exec=self.run,
)
self.register()
# --- Helper methods -------------------------------------------------
2025-12-14 00:53:52 -08:00
@staticmethod
def _parse_hash_query(query: str) -> List[str]:
"""Parse a `hash:` query into a list of normalized 64-hex SHA256 hashes.
Supported examples:
- hash:<h1>,<h2>,<h3>
- Hash: <h1> <h2> <h3>
- hash:{<h1>, <h2>}
"""
q = str(query or "").strip()
if not q:
return []
m = re.match(r"^hash(?:es)?\s*:\s*(.+)$", q, flags=re.IGNORECASE)
if not m:
return []
rest = (m.group(1) or "").strip()
if rest.startswith("{") and rest.endswith("}"):
rest = rest[1:-1].strip()
if rest.startswith("[") and rest.endswith("]"):
rest = rest[1:-1].strip()
# Split on commas and whitespace.
raw_parts = [p.strip() for p in re.split(r"[\s,]+", rest) if p.strip()]
out: List[str] = []
for part in raw_parts:
h = normalize_hash(part)
if not h:
continue
if h not in out:
out.append(h)
return out
2025-12-11 12:47:30 -08:00
@staticmethod
def _normalize_extension(ext_value: Any) -> str:
"""Sanitize extension strings to alphanumerics and cap at 5 chars."""
ext = str(ext_value or "").strip().lstrip(".")
for sep in (" ", "|", "(", "[", "{", ",", ";"):
if sep in ext:
ext = ext.split(sep, 1)[0]
break
if "." in ext:
ext = ext.split(".")[-1]
ext = "".join(ch for ch in ext if ch.isalnum())
return ext[:5]
def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]:
"""Ensure storage results have the necessary fields for result_table display."""
2025-12-11 19:04:02 -08:00
store_value = str(payload.get("store") or "").lower()
2025-12-11 12:47:30 -08:00
if store_value not in STORAGE_ORIGINS:
return payload
# Ensure we have title field
if "title" not in payload:
payload["title"] = payload.get("name") or payload.get("target") or payload.get("path") or "Result"
# Ensure we have ext field
if "ext" not in payload:
title = str(payload.get("title", ""))
path_obj = Path(title)
if path_obj.suffix:
payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip('.'))
else:
payload["ext"] = payload.get("ext", "")
# Ensure size_bytes is present for display (already set by search_file())
# result_table will handle formatting it
# Don't create manual columns - let result_table handle display
# This allows the table to respect max_columns and apply consistent formatting
return payload
# --- Execution ------------------------------------------------------
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Search storage backends for files."""
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
args_list = [str(arg) for arg in (args or [])]
2025-12-16 01:45:01 -08:00
# Build dynamic flag variants from cmdlet arg definitions.
# This avoids hardcoding flag spellings in parsing loops.
flag_registry = self.build_flag_registry()
store_flags = {f.lower() for f in (flag_registry.get("store") or {"-store", "--store"})}
limit_flags = {f.lower() for f in (flag_registry.get("limit") or {"-limit", "--limit"})}
2025-12-11 12:47:30 -08:00
# Parse arguments
query = ""
storage_backend: Optional[str] = None
limit = 100
searched_backends: List[str] = []
i = 0
while i < len(args_list):
arg = args_list[i]
low = arg.lower()
2025-12-16 01:45:01 -08:00
if low in store_flags and i + 1 < len(args_list):
2025-12-11 12:47:30 -08:00
storage_backend = args_list[i + 1]
i += 2
2025-12-16 01:45:01 -08:00
elif low in limit_flags and i + 1 < len(args_list):
2025-12-11 12:47:30 -08:00
try:
limit = int(args_list[i + 1])
except ValueError:
limit = 100
i += 2
elif not arg.startswith("-"):
query = f"{query} {arg}".strip() if query else arg
i += 1
else:
i += 1
store_filter: Optional[str] = None
if query:
match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE)
if match:
store_filter = match.group(1).strip() or None
query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE)
query = re.sub(r"\s{2,}", " ", query)
query = query.strip().strip(',')
if store_filter and not storage_backend:
storage_backend = store_filter
2025-12-14 00:53:52 -08:00
hash_query = self._parse_hash_query(query)
2025-12-11 12:47:30 -08:00
if not query:
log("Provide a search query", file=sys.stderr)
return 1
2025-12-11 19:04:02 -08:00
from API.folder import API_folder_store
2025-12-11 12:47:30 -08:00
from config import get_local_storage_path
import uuid
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(config or {})
if not library_root:
log("No library root configured", file=sys.stderr)
return 1
# Use context manager to ensure database is always closed
2025-12-11 19:04:02 -08:00
with API_folder_store(library_root) as db:
2025-12-11 12:47:30 -08:00
try:
db.insert_worker(
worker_id,
"search-store",
title=f"Search: {query}",
description=f"Query: {query}",
pipe=ctx.get_current_command_text()
)
results_list = []
import result_table
import importlib
importlib.reload(result_table)
from result_table import ResultTable
table_title = f"Search: {query}"
if storage_backend:
table_title += f" [{storage_backend}]"
table = ResultTable(table_title)
2025-12-14 00:53:52 -08:00
if hash_query:
try:
table.set_preserve_order(True)
except Exception:
pass
2025-12-11 12:47:30 -08:00
2025-12-11 19:04:02 -08:00
from Store import Store
storage = Store(config=config or {})
2025-12-11 23:21:45 -08:00
from Store._base import Store as BaseStore
2025-12-11 12:47:30 -08:00
backend_to_search = storage_backend or None
2025-12-14 00:53:52 -08:00
if hash_query:
# Explicit hash list search: build rows from backend metadata.
backends_to_try: List[str] = []
if backend_to_search:
backends_to_try = [backend_to_search]
else:
backends_to_try = list(storage.list_backends())
found_any = False
for h in hash_query:
resolved_backend_name: Optional[str] = None
resolved_backend = None
for backend_name in backends_to_try:
try:
backend = storage[backend_name]
except Exception:
continue
try:
# If get_metadata works, consider it a hit; get_file can be optional (e.g. remote URL).
meta = backend.get_metadata(h)
if meta is None:
continue
resolved_backend_name = backend_name
resolved_backend = backend
break
except Exception:
continue
if resolved_backend_name is None or resolved_backend is None:
continue
found_any = True
searched_backends.append(resolved_backend_name)
# Resolve a path/URL string if possible
path_str: Optional[str] = None
2025-12-16 01:45:01 -08:00
# IMPORTANT: avoid calling get_file() for remote backends.
# For Hydrus, get_file() returns a browser URL (and may include access keys),
# which should not be pulled during search/refresh.
2025-12-14 00:53:52 -08:00
try:
2025-12-16 01:45:01 -08:00
if type(resolved_backend).__name__ == "Folder":
maybe_path = resolved_backend.get_file(h)
if isinstance(maybe_path, Path):
path_str = str(maybe_path)
elif isinstance(maybe_path, str) and maybe_path:
path_str = maybe_path
2025-12-14 00:53:52 -08:00
except Exception:
path_str = None
meta_obj: Dict[str, Any] = {}
try:
meta_obj = resolved_backend.get_metadata(h) or {}
except Exception:
meta_obj = {}
tags_list: List[str] = []
try:
tag_result = resolved_backend.get_tag(h)
if isinstance(tag_result, tuple) and tag_result:
maybe_tags = tag_result[0]
else:
maybe_tags = tag_result
if isinstance(maybe_tags, list):
tags_list = [str(t).strip() for t in maybe_tags if isinstance(t, str) and str(t).strip()]
except Exception:
tags_list = []
title_from_tag: Optional[str] = None
try:
title_tag = first_title_tag(tags_list)
if title_tag and ":" in title_tag:
title_from_tag = title_tag.split(":", 1)[1].strip()
except Exception:
title_from_tag = None
title = title_from_tag or meta_obj.get("title") or meta_obj.get("name")
if not title and path_str:
try:
title = Path(path_str).stem
except Exception:
title = path_str
ext_val = meta_obj.get("ext") or meta_obj.get("extension")
if not ext_val and path_str:
try:
ext_val = Path(path_str).suffix
except Exception:
ext_val = None
size_bytes = meta_obj.get("size")
if size_bytes is None:
size_bytes = meta_obj.get("size_bytes")
try:
size_bytes_int: Optional[int] = int(size_bytes) if size_bytes is not None else None
except Exception:
size_bytes_int = None
payload: Dict[str, Any] = {
"title": str(title or h),
"hash": h,
"store": resolved_backend_name,
"path": path_str,
"ext": self._normalize_extension(ext_val),
"size_bytes": size_bytes_int,
"tag": tags_list,
}
table.add_result(payload)
results_list.append(payload)
ctx.emit(payload)
if found_any:
ctx.set_last_result_table(table, results_list)
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, 'completed')
return 0
log("No results found", file=sys.stderr)
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')
return 0
2025-12-11 12:47:30 -08:00
if backend_to_search:
searched_backends.append(backend_to_search)
target_backend = storage[backend_to_search]
2025-12-11 23:21:45 -08:00
if type(target_backend).search is BaseStore.search:
2025-12-11 12:47:30 -08:00
log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
db.update_worker_status(worker_id, 'error')
return 1
2025-12-14 00:53:52 -08:00
debug(f"[search-store] Searching '{backend_to_search}'")
2025-12-11 23:21:45 -08:00
results = target_backend.search(query, limit=limit)
2025-12-14 00:53:52 -08:00
debug(f"[search-store] '{backend_to_search}' -> {len(results or [])} result(s)")
2025-12-11 12:47:30 -08:00
else:
2025-12-11 19:04:02 -08:00
from API.HydrusNetwork import is_hydrus_available
2025-12-11 12:47:30 -08:00
hydrus_available = is_hydrus_available(config or {})
2025-12-11 19:04:02 -08:00
from Store.HydrusNetwork import HydrusNetwork
2025-12-11 12:47:30 -08:00
all_results = []
for backend_name in storage.list_searchable_backends():
try:
2025-12-11 19:04:02 -08:00
backend = storage[backend_name]
if isinstance(backend, HydrusNetwork) and not hydrus_available:
continue
searched_backends.append(backend_name)
2025-12-14 00:53:52 -08:00
debug(f"[search-store] Searching '{backend_name}'")
2025-12-11 23:21:45 -08:00
backend_results = backend.search(query, limit=limit - len(all_results))
2025-12-14 00:53:52 -08:00
debug(f"[search-store] '{backend_name}' -> {len(backend_results or [])} result(s)")
2025-12-11 12:47:30 -08:00
if backend_results:
all_results.extend(backend_results)
if len(all_results) >= limit:
break
except Exception as exc:
log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr)
results = all_results[:limit]
def _format_storage_label(name: str) -> str:
clean = str(name or "").strip()
if not clean:
return "Unknown"
return clean.replace("_", " ").title()
storage_counts: OrderedDict[str, int] = OrderedDict((name, 0) for name in searched_backends)
for item in results or []:
2025-12-11 19:04:02 -08:00
store = get_field(item, "store")
if not store:
2025-12-11 12:47:30 -08:00
continue
2025-12-11 19:04:02 -08:00
key = str(store).lower()
2025-12-11 12:47:30 -08:00
if key not in storage_counts:
storage_counts[key] = 0
storage_counts[key] += 1
if storage_counts or query:
display_counts = OrderedDict((_format_storage_label(name), count) for name, count in storage_counts.items())
summary_line = table.set_storage_summary(display_counts, query, inline=True)
if summary_line:
table.title = summary_line
if results:
for item in results:
def _as_dict(obj: Any) -> Dict[str, Any]:
if isinstance(obj, dict):
return dict(obj)
if hasattr(obj, "to_dict") and callable(getattr(obj, "to_dict")):
return obj.to_dict() # type: ignore[arg-type]
return {"title": str(obj)}
item_dict = _as_dict(item)
if store_filter:
2025-12-11 19:04:02 -08:00
store_val = str(item_dict.get("store") or "").lower()
if store_filter != store_val:
2025-12-11 12:47:30 -08:00
continue
normalized = self._ensure_storage_columns(item_dict)
2025-12-12 21:55:38 -08:00
# Make hash/store available for downstream cmdlet without rerunning search
2025-12-11 12:47:30 -08:00
hash_val = normalized.get("hash")
2025-12-11 19:04:02 -08:00
store_val = normalized.get("store") or item_dict.get("store")
2025-12-11 12:47:30 -08:00
if hash_val and not normalized.get("hash"):
normalized["hash"] = hash_val
if store_val and not normalized.get("store"):
normalized["store"] = store_val
table.add_result(normalized)
results_list.append(normalized)
ctx.emit(normalized)
ctx.set_last_result_table(table, results_list)
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
else:
log("No results found", file=sys.stderr)
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')
return 0
except Exception as exc:
log(f"Search failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
try:
db.update_worker_status(worker_id, 'error')
except Exception:
pass
return 1
CMDLET = Search_Store()