dfdfsdd
This commit is contained in:
@@ -3,7 +3,6 @@ from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence, List, Optional, Tuple
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
from collections import OrderedDict
|
||||
import re
|
||||
import json
|
||||
@@ -11,57 +10,9 @@ import sys
|
||||
|
||||
from SYS.logger import log, debug
|
||||
|
||||
from ._shared import Cmdlet, CmdletArg, get_field, should_show_help
|
||||
from ._shared import Cmdlet, CmdletArg, get_field, should_show_help, normalize_hash, first_title_tag
|
||||
import pipeline as ctx
|
||||
|
||||
# Optional dependencies
|
||||
try:
|
||||
import mutagen # type: ignore
|
||||
except ImportError: # pragma: no cover
|
||||
mutagen = None # type: ignore
|
||||
|
||||
try:
|
||||
from config import get_hydrus_url, resolve_output_dir
|
||||
except Exception: # pragma: no cover
|
||||
get_hydrus_url = None # type: ignore
|
||||
resolve_output_dir = None # type: ignore
|
||||
|
||||
try:
|
||||
from API.HydrusNetwork import HydrusNetwork, HydrusRequestError
|
||||
except ImportError: # pragma: no cover
|
||||
HydrusNetwork = None # type: ignore
|
||||
HydrusRequestError = RuntimeError # type: ignore
|
||||
|
||||
try:
|
||||
from SYS.utils import sha256_file
|
||||
except ImportError: # pragma: no cover
|
||||
sha256_file = None # type: ignore
|
||||
|
||||
try:
|
||||
from SYS.utils_constant import mime_maps
|
||||
except ImportError: # pragma: no cover
|
||||
mime_maps = {} # type: ignore
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SearchRecord:
|
||||
path: str
|
||||
size_bytes: int | None = None
|
||||
duration_seconds: str | None = None
|
||||
tag: str | None = None
|
||||
hash: str | None = None
|
||||
|
||||
def as_dict(self) -> dict[str, str]:
|
||||
payload: dict[str, str] = {"path": self.path}
|
||||
if self.size_bytes is not None:
|
||||
payload["size"] = str(self.size_bytes)
|
||||
if self.duration_seconds:
|
||||
payload["duration"] = self.duration_seconds
|
||||
if self.tag:
|
||||
payload["tag"] = self.tag
|
||||
if self.hash:
|
||||
payload["hash"] = self.hash
|
||||
return payload
|
||||
|
||||
|
||||
STORAGE_ORIGINS = {"local", "hydrus", "folder"}
|
||||
|
||||
@@ -86,12 +37,15 @@ class Search_Store(Cmdlet):
|
||||
detail=[
|
||||
"Search across storage backends: Folder stores and Hydrus instances",
|
||||
"Use -store to search a specific backend by name",
|
||||
"URL search: url:* (any URL) or url:<value> (URL substring)",
|
||||
"Filter results by: tag, size, type, duration",
|
||||
"Results include hash for downstream commands (get-file, add-tag, etc.)",
|
||||
"Examples:",
|
||||
"search-store foo # Search all storage backends",
|
||||
"search-store -store home '*' # Search 'home' Hydrus instance",
|
||||
"search-store -store test 'video' # Search 'test' folder store",
|
||||
"search-store 'url:*' # Files that have any URL",
|
||||
"search-store 'url:youtube.com' # Files whose URL contains substring",
|
||||
"search-store song -type audio # Search for audio files",
|
||||
"search-store movie -tag action # Search with tag filter",
|
||||
],
|
||||
@@ -100,6 +54,40 @@ class Search_Store(Cmdlet):
|
||||
self.register()
|
||||
|
||||
# --- Helper methods -------------------------------------------------
|
||||
@staticmethod
|
||||
def _parse_hash_query(query: str) -> List[str]:
|
||||
"""Parse a `hash:` query into a list of normalized 64-hex SHA256 hashes.
|
||||
|
||||
Supported examples:
|
||||
- hash:<h1>,<h2>,<h3>
|
||||
- Hash: <h1> <h2> <h3>
|
||||
- hash:{<h1>, <h2>}
|
||||
"""
|
||||
q = str(query or "").strip()
|
||||
if not q:
|
||||
return []
|
||||
|
||||
m = re.match(r"^hash(?:es)?\s*:\s*(.+)$", q, flags=re.IGNORECASE)
|
||||
if not m:
|
||||
return []
|
||||
|
||||
rest = (m.group(1) or "").strip()
|
||||
if rest.startswith("{") and rest.endswith("}"):
|
||||
rest = rest[1:-1].strip()
|
||||
if rest.startswith("[") and rest.endswith("]"):
|
||||
rest = rest[1:-1].strip()
|
||||
|
||||
# Split on commas and whitespace.
|
||||
raw_parts = [p.strip() for p in re.split(r"[\s,]+", rest) if p.strip()]
|
||||
out: List[str] = []
|
||||
for part in raw_parts:
|
||||
h = normalize_hash(part)
|
||||
if not h:
|
||||
continue
|
||||
if h not in out:
|
||||
out.append(h)
|
||||
return out
|
||||
|
||||
@staticmethod
|
||||
def _normalize_extension(ext_value: Any) -> str:
|
||||
"""Sanitize extension strings to alphanumerics and cap at 5 chars."""
|
||||
@@ -150,10 +138,10 @@ class Search_Store(Cmdlet):
|
||||
|
||||
# Parse arguments
|
||||
query = ""
|
||||
tag_filters: List[str] = []
|
||||
size_filter: Optional[Tuple[str, int]] = None
|
||||
duration_filter: Optional[Tuple[str, float]] = None
|
||||
type_filter: Optional[str] = None
|
||||
_tag_filters: List[str] = []
|
||||
_size_filter: Optional[Tuple[str, int]] = None
|
||||
_duration_filter: Optional[Tuple[str, float]] = None
|
||||
_type_filter: Optional[str] = None
|
||||
storage_backend: Optional[str] = None
|
||||
limit = 100
|
||||
searched_backends: List[str] = []
|
||||
@@ -166,7 +154,7 @@ class Search_Store(Cmdlet):
|
||||
storage_backend = args_list[i + 1]
|
||||
i += 2
|
||||
elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
|
||||
tag_filters.append(args_list[i + 1])
|
||||
_tag_filters.append(args_list[i + 1])
|
||||
i += 2
|
||||
elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
|
||||
try:
|
||||
@@ -175,7 +163,7 @@ class Search_Store(Cmdlet):
|
||||
limit = 100
|
||||
i += 2
|
||||
elif low in {"-type", "--type"} and i + 1 < len(args_list):
|
||||
type_filter = args_list[i + 1].lower()
|
||||
_type_filter = args_list[i + 1].lower()
|
||||
i += 2
|
||||
elif not arg.startswith("-"):
|
||||
query = f"{query} {arg}".strip() if query else arg
|
||||
@@ -195,6 +183,8 @@ class Search_Store(Cmdlet):
|
||||
if store_filter and not storage_backend:
|
||||
storage_backend = store_filter
|
||||
|
||||
hash_query = self._parse_hash_query(query)
|
||||
|
||||
if not query:
|
||||
log("Provide a search query", file=sys.stderr)
|
||||
return 1
|
||||
@@ -230,12 +220,136 @@ class Search_Store(Cmdlet):
|
||||
table_title += f" [{storage_backend}]"
|
||||
|
||||
table = ResultTable(table_title)
|
||||
if hash_query:
|
||||
try:
|
||||
table.set_preserve_order(True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
from Store import Store
|
||||
storage = Store(config=config or {})
|
||||
from Store._base import Store as BaseStore
|
||||
|
||||
backend_to_search = storage_backend or None
|
||||
if hash_query:
|
||||
# Explicit hash list search: build rows from backend metadata.
|
||||
backends_to_try: List[str] = []
|
||||
if backend_to_search:
|
||||
backends_to_try = [backend_to_search]
|
||||
else:
|
||||
backends_to_try = list(storage.list_backends())
|
||||
|
||||
found_any = False
|
||||
for h in hash_query:
|
||||
resolved_backend_name: Optional[str] = None
|
||||
resolved_backend = None
|
||||
|
||||
for backend_name in backends_to_try:
|
||||
try:
|
||||
backend = storage[backend_name]
|
||||
except Exception:
|
||||
continue
|
||||
try:
|
||||
# If get_metadata works, consider it a hit; get_file can be optional (e.g. remote URL).
|
||||
meta = backend.get_metadata(h)
|
||||
if meta is None:
|
||||
continue
|
||||
resolved_backend_name = backend_name
|
||||
resolved_backend = backend
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
if resolved_backend_name is None or resolved_backend is None:
|
||||
continue
|
||||
|
||||
found_any = True
|
||||
searched_backends.append(resolved_backend_name)
|
||||
|
||||
# Resolve a path/URL string if possible
|
||||
path_str: Optional[str] = None
|
||||
try:
|
||||
maybe_path = resolved_backend.get_file(h)
|
||||
if isinstance(maybe_path, Path):
|
||||
path_str = str(maybe_path)
|
||||
elif isinstance(maybe_path, str) and maybe_path:
|
||||
path_str = maybe_path
|
||||
except Exception:
|
||||
path_str = None
|
||||
|
||||
meta_obj: Dict[str, Any] = {}
|
||||
try:
|
||||
meta_obj = resolved_backend.get_metadata(h) or {}
|
||||
except Exception:
|
||||
meta_obj = {}
|
||||
|
||||
tags_list: List[str] = []
|
||||
try:
|
||||
tag_result = resolved_backend.get_tag(h)
|
||||
if isinstance(tag_result, tuple) and tag_result:
|
||||
maybe_tags = tag_result[0]
|
||||
else:
|
||||
maybe_tags = tag_result
|
||||
if isinstance(maybe_tags, list):
|
||||
tags_list = [str(t).strip() for t in maybe_tags if isinstance(t, str) and str(t).strip()]
|
||||
except Exception:
|
||||
tags_list = []
|
||||
|
||||
title_from_tag: Optional[str] = None
|
||||
try:
|
||||
title_tag = first_title_tag(tags_list)
|
||||
if title_tag and ":" in title_tag:
|
||||
title_from_tag = title_tag.split(":", 1)[1].strip()
|
||||
except Exception:
|
||||
title_from_tag = None
|
||||
|
||||
title = title_from_tag or meta_obj.get("title") or meta_obj.get("name")
|
||||
if not title and path_str:
|
||||
try:
|
||||
title = Path(path_str).stem
|
||||
except Exception:
|
||||
title = path_str
|
||||
|
||||
ext_val = meta_obj.get("ext") or meta_obj.get("extension")
|
||||
if not ext_val and path_str:
|
||||
try:
|
||||
ext_val = Path(path_str).suffix
|
||||
except Exception:
|
||||
ext_val = None
|
||||
|
||||
size_bytes = meta_obj.get("size")
|
||||
if size_bytes is None:
|
||||
size_bytes = meta_obj.get("size_bytes")
|
||||
try:
|
||||
size_bytes_int: Optional[int] = int(size_bytes) if size_bytes is not None else None
|
||||
except Exception:
|
||||
size_bytes_int = None
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"title": str(title or h),
|
||||
"hash": h,
|
||||
"store": resolved_backend_name,
|
||||
"path": path_str,
|
||||
"ext": self._normalize_extension(ext_val),
|
||||
"size_bytes": size_bytes_int,
|
||||
"tag": tags_list,
|
||||
}
|
||||
|
||||
table.add_result(payload)
|
||||
results_list.append(payload)
|
||||
ctx.emit(payload)
|
||||
|
||||
if found_any:
|
||||
ctx.set_last_result_table(table, results_list)
|
||||
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
|
||||
db.update_worker_status(worker_id, 'completed')
|
||||
return 0
|
||||
|
||||
log("No results found", file=sys.stderr)
|
||||
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
|
||||
db.update_worker_status(worker_id, 'completed')
|
||||
return 0
|
||||
|
||||
if backend_to_search:
|
||||
searched_backends.append(backend_to_search)
|
||||
target_backend = storage[backend_to_search]
|
||||
@@ -243,7 +357,9 @@ class Search_Store(Cmdlet):
|
||||
log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
return 1
|
||||
debug(f"[search-store] Searching '{backend_to_search}'")
|
||||
results = target_backend.search(query, limit=limit)
|
||||
debug(f"[search-store] '{backend_to_search}' -> {len(results or [])} result(s)")
|
||||
else:
|
||||
from API.HydrusNetwork import is_hydrus_available
|
||||
hydrus_available = is_hydrus_available(config or {})
|
||||
@@ -257,7 +373,9 @@ class Search_Store(Cmdlet):
|
||||
continue
|
||||
searched_backends.append(backend_name)
|
||||
|
||||
debug(f"[search-store] Searching '{backend_name}'")
|
||||
backend_results = backend.search(query, limit=limit - len(all_results))
|
||||
debug(f"[search-store] '{backend_name}' -> {len(backend_results or [])} result(s)")
|
||||
if backend_results:
|
||||
all_results.extend(backend_results)
|
||||
if len(all_results) >= limit:
|
||||
@@ -317,11 +435,6 @@ class Search_Store(Cmdlet):
|
||||
results_list.append(normalized)
|
||||
ctx.emit(normalized)
|
||||
|
||||
# Debug: Verify table rows match items list
|
||||
debug(f"[search-store] Added {len(table.rows)} rows to table, {len(results_list)} items to results_list")
|
||||
if len(table.rows) != len(results_list):
|
||||
debug(f"[search-store] WARNING: Table/items mismatch! rows={len(table.rows)} items={len(results_list)}", file=sys.stderr)
|
||||
|
||||
ctx.set_last_result_table(table, results_list)
|
||||
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user