dfdkflj
This commit is contained in:
341
cmdlets/search_store.py
Normal file
341
cmdlets/search_store.py
Normal file
@@ -0,0 +1,341 @@
|
||||
"""Search-store cmdlet: Search for files in storage backends (Folder, Hydrus)."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence, List, Optional, Tuple
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
from collections import OrderedDict
|
||||
import re
|
||||
import json
|
||||
import sys
|
||||
|
||||
from helper.logger import log, debug
|
||||
|
||||
from ._shared import Cmdlet, CmdletArg, get_origin, get_field, should_show_help
|
||||
import pipeline as ctx
|
||||
|
||||
# Optional dependencies
|
||||
try:
|
||||
import mutagen # type: ignore
|
||||
except ImportError: # pragma: no cover
|
||||
mutagen = None # type: ignore
|
||||
|
||||
try:
|
||||
from config import get_hydrus_url, resolve_output_dir
|
||||
except Exception: # pragma: no cover
|
||||
get_hydrus_url = None # type: ignore
|
||||
resolve_output_dir = None # type: ignore
|
||||
|
||||
try:
|
||||
from helper.hydrus import HydrusClient, HydrusRequestError
|
||||
except ImportError: # pragma: no cover
|
||||
HydrusClient = None # type: ignore
|
||||
HydrusRequestError = RuntimeError # type: ignore
|
||||
|
||||
try:
|
||||
from helper.utils import sha256_file
|
||||
except ImportError: # pragma: no cover
|
||||
sha256_file = None # type: ignore
|
||||
|
||||
try:
|
||||
from helper.utils_constant import mime_maps
|
||||
except ImportError: # pragma: no cover
|
||||
mime_maps = {} # type: ignore
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SearchRecord:
|
||||
path: str
|
||||
size_bytes: int | None = None
|
||||
duration_seconds: str | None = None
|
||||
tags: str | None = None
|
||||
hash_hex: str | None = None
|
||||
|
||||
def as_dict(self) -> dict[str, str]:
|
||||
payload: dict[str, str] = {"path": self.path}
|
||||
if self.size_bytes is not None:
|
||||
payload["size"] = str(self.size_bytes)
|
||||
if self.duration_seconds:
|
||||
payload["duration"] = self.duration_seconds
|
||||
if self.tags:
|
||||
payload["tags"] = self.tags
|
||||
if self.hash_hex:
|
||||
payload["hash"] = self.hash_hex
|
||||
return payload
|
||||
|
||||
|
||||
STORAGE_ORIGINS = {"local", "hydrus", "folder"}
|
||||
|
||||
|
||||
class Search_Store(Cmdlet):
|
||||
"""Class-based search-store cmdlet for searching storage backends."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="search-store",
|
||||
summary="Search storage backends (Folder, Hydrus) for files.",
|
||||
usage="search-store [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-store BACKEND]",
|
||||
arg=[
|
||||
CmdletArg("query", description="Search query string"),
|
||||
CmdletArg("tag", description="Filter by tag (can be used multiple times)"),
|
||||
CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"),
|
||||
CmdletArg("type", description="Filter by type: audio, video, image, document"),
|
||||
CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"),
|
||||
CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
|
||||
CmdletArg("store", description="Search specific storage backend (e.g., 'home', 'test', or 'default')"),
|
||||
],
|
||||
detail=[
|
||||
"Search across storage backends: Folder stores and Hydrus instances",
|
||||
"Use -store to search a specific backend by name",
|
||||
"Filter results by: tag, size, type, duration",
|
||||
"Results include hash for downstream commands (get-file, add-tag, etc.)",
|
||||
"Examples:",
|
||||
"search-store foo # Search all storage backends",
|
||||
"search-store -store home '*' # Search 'home' Hydrus instance",
|
||||
"search-store -store test 'video' # Search 'test' folder store",
|
||||
"search-store song -type audio # Search for audio files",
|
||||
"search-store movie -tag action # Search with tag filter",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
self.register()
|
||||
|
||||
# --- Helper methods -------------------------------------------------
|
||||
@staticmethod
|
||||
def _normalize_extension(ext_value: Any) -> str:
|
||||
"""Sanitize extension strings to alphanumerics and cap at 5 chars."""
|
||||
ext = str(ext_value or "").strip().lstrip(".")
|
||||
for sep in (" ", "|", "(", "[", "{", ",", ";"):
|
||||
if sep in ext:
|
||||
ext = ext.split(sep, 1)[0]
|
||||
break
|
||||
if "." in ext:
|
||||
ext = ext.split(".")[-1]
|
||||
ext = "".join(ch for ch in ext if ch.isalnum())
|
||||
return ext[:5]
|
||||
|
||||
def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Ensure storage results have the necessary fields for result_table display."""
|
||||
store_value = str(get_origin(payload, "") or "").lower()
|
||||
if store_value not in STORAGE_ORIGINS:
|
||||
return payload
|
||||
|
||||
# Ensure we have title field
|
||||
if "title" not in payload:
|
||||
payload["title"] = payload.get("name") or payload.get("target") or payload.get("path") or "Result"
|
||||
|
||||
# Ensure we have ext field
|
||||
if "ext" not in payload:
|
||||
title = str(payload.get("title", ""))
|
||||
path_obj = Path(title)
|
||||
if path_obj.suffix:
|
||||
payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip('.'))
|
||||
else:
|
||||
payload["ext"] = payload.get("ext", "")
|
||||
|
||||
# Ensure size_bytes is present for display (already set by search_file())
|
||||
# result_table will handle formatting it
|
||||
|
||||
# Don't create manual columns - let result_table handle display
|
||||
# This allows the table to respect max_columns and apply consistent formatting
|
||||
return payload
|
||||
|
||||
# --- Execution ------------------------------------------------------
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Search storage backends for files."""
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
|
||||
return 0
|
||||
|
||||
args_list = [str(arg) for arg in (args or [])]
|
||||
|
||||
# Parse arguments
|
||||
query = ""
|
||||
tag_filters: List[str] = []
|
||||
size_filter: Optional[Tuple[str, int]] = None
|
||||
duration_filter: Optional[Tuple[str, float]] = None
|
||||
type_filter: Optional[str] = None
|
||||
storage_backend: Optional[str] = None
|
||||
limit = 100
|
||||
searched_backends: List[str] = []
|
||||
|
||||
i = 0
|
||||
while i < len(args_list):
|
||||
arg = args_list[i]
|
||||
low = arg.lower()
|
||||
if low in {"-store", "--store", "-storage", "--storage"} and i + 1 < len(args_list):
|
||||
storage_backend = args_list[i + 1]
|
||||
i += 2
|
||||
elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
|
||||
tag_filters.append(args_list[i + 1])
|
||||
i += 2
|
||||
elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
|
||||
try:
|
||||
limit = int(args_list[i + 1])
|
||||
except ValueError:
|
||||
limit = 100
|
||||
i += 2
|
||||
elif low in {"-type", "--type"} and i + 1 < len(args_list):
|
||||
type_filter = args_list[i + 1].lower()
|
||||
i += 2
|
||||
elif not arg.startswith("-"):
|
||||
query = f"{query} {arg}".strip() if query else arg
|
||||
i += 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
store_filter: Optional[str] = None
|
||||
if query:
|
||||
match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE)
|
||||
if match:
|
||||
store_filter = match.group(1).strip() or None
|
||||
query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE)
|
||||
query = re.sub(r"\s{2,}", " ", query)
|
||||
query = query.strip().strip(',')
|
||||
|
||||
if store_filter and not storage_backend:
|
||||
storage_backend = store_filter
|
||||
|
||||
if not query:
|
||||
log("Provide a search query", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
from helper.folder_store import FolderDB
|
||||
from config import get_local_storage_path
|
||||
import uuid
|
||||
worker_id = str(uuid.uuid4())
|
||||
library_root = get_local_storage_path(config or {})
|
||||
if not library_root:
|
||||
log("No library root configured", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Use context manager to ensure database is always closed
|
||||
with FolderDB(library_root) as db:
|
||||
try:
|
||||
db.insert_worker(
|
||||
worker_id,
|
||||
"search-store",
|
||||
title=f"Search: {query}",
|
||||
description=f"Query: {query}",
|
||||
pipe=ctx.get_current_command_text()
|
||||
)
|
||||
|
||||
results_list = []
|
||||
import result_table
|
||||
import importlib
|
||||
importlib.reload(result_table)
|
||||
from result_table import ResultTable
|
||||
|
||||
table_title = f"Search: {query}"
|
||||
if storage_backend:
|
||||
table_title += f" [{storage_backend}]"
|
||||
|
||||
table = ResultTable(table_title)
|
||||
|
||||
from helper.store import FileStorage
|
||||
storage = FileStorage(config=config or {})
|
||||
|
||||
backend_to_search = storage_backend or None
|
||||
if backend_to_search:
|
||||
searched_backends.append(backend_to_search)
|
||||
target_backend = storage[backend_to_search]
|
||||
if not callable(getattr(target_backend, 'search_file', None)):
|
||||
log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
return 1
|
||||
results = target_backend.search_file(query, limit=limit)
|
||||
else:
|
||||
from helper.hydrus import is_hydrus_available
|
||||
hydrus_available = is_hydrus_available(config or {})
|
||||
|
||||
all_results = []
|
||||
for backend_name in storage.list_searchable_backends():
|
||||
if backend_name.startswith("hydrus") and not hydrus_available:
|
||||
continue
|
||||
searched_backends.append(backend_name)
|
||||
try:
|
||||
backend_results = storage[backend_name].search_file(query, limit=limit - len(all_results))
|
||||
if backend_results:
|
||||
all_results.extend(backend_results)
|
||||
if len(all_results) >= limit:
|
||||
break
|
||||
except Exception as exc:
|
||||
log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr)
|
||||
results = all_results[:limit]
|
||||
|
||||
def _format_storage_label(name: str) -> str:
|
||||
clean = str(name or "").strip()
|
||||
if not clean:
|
||||
return "Unknown"
|
||||
return clean.replace("_", " ").title()
|
||||
|
||||
storage_counts: OrderedDict[str, int] = OrderedDict((name, 0) for name in searched_backends)
|
||||
for item in results or []:
|
||||
origin = get_origin(item)
|
||||
if not origin:
|
||||
continue
|
||||
key = str(origin).lower()
|
||||
if key not in storage_counts:
|
||||
storage_counts[key] = 0
|
||||
storage_counts[key] += 1
|
||||
|
||||
if storage_counts or query:
|
||||
display_counts = OrderedDict((_format_storage_label(name), count) for name, count in storage_counts.items())
|
||||
summary_line = table.set_storage_summary(display_counts, query, inline=True)
|
||||
if summary_line:
|
||||
table.title = summary_line
|
||||
|
||||
if results:
|
||||
for item in results:
|
||||
def _as_dict(obj: Any) -> Dict[str, Any]:
|
||||
if isinstance(obj, dict):
|
||||
return dict(obj)
|
||||
if hasattr(obj, "to_dict") and callable(getattr(obj, "to_dict")):
|
||||
return obj.to_dict() # type: ignore[arg-type]
|
||||
return {"title": str(obj)}
|
||||
|
||||
item_dict = _as_dict(item)
|
||||
if store_filter:
|
||||
origin_val = str(get_origin(item_dict) or "").lower()
|
||||
if store_filter != origin_val:
|
||||
continue
|
||||
normalized = self._ensure_storage_columns(item_dict)
|
||||
|
||||
# Make hash/store available for downstream cmdlets without rerunning search
|
||||
hash_val = normalized.get("hash")
|
||||
store_val = normalized.get("store") or get_origin(item_dict)
|
||||
if hash_val and not normalized.get("hash"):
|
||||
normalized["hash"] = hash_val
|
||||
if store_val and not normalized.get("store"):
|
||||
normalized["store"] = store_val
|
||||
|
||||
table.add_result(normalized)
|
||||
|
||||
results_list.append(normalized)
|
||||
ctx.emit(normalized)
|
||||
|
||||
# Debug: Verify table rows match items list
|
||||
debug(f"[search-store] Added {len(table.rows)} rows to table, {len(results_list)} items to results_list")
|
||||
if len(table.rows) != len(results_list):
|
||||
debug(f"[search-store] WARNING: Table/items mismatch! rows={len(table.rows)} items={len(results_list)}", file=sys.stderr)
|
||||
|
||||
ctx.set_last_result_table(table, results_list)
|
||||
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
|
||||
else:
|
||||
log("No results found", file=sys.stderr)
|
||||
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
|
||||
|
||||
db.update_worker_status(worker_id, 'completed')
|
||||
return 0
|
||||
|
||||
except Exception as exc:
|
||||
log(f"Search failed: {exc}", file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
try:
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
except Exception:
|
||||
pass
|
||||
return 1
|
||||
|
||||
|
||||
CMDLET = Search_Store()
|
||||
Reference in New Issue
Block a user