Files
Medios-Macina/cmdlet/search_file.py

916 lines
37 KiB
Python
Raw Normal View History

2025-12-30 23:19:02 -08:00
"""search-file cmdlet: Search for files in storage backends (Folder, Hydrus)."""
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
from __future__ import annotations
2025-12-16 01:45:01 -08:00
from typing import Any, Dict, Sequence, List, Optional
2025-12-30 23:19:02 -08:00
import importlib
import uuid
2025-12-11 12:47:30 -08:00
from pathlib import Path
import re
import json
import sys
2025-12-11 19:04:02 -08:00
from SYS.logger import log, debug
2025-12-30 23:19:02 -08:00
from ProviderCore.registry import get_search_provider, list_search_providers
from SYS.config import get_local_storage_path
2025-12-11 12:47:30 -08:00
2025-12-16 23:23:43 -08:00
from . import _shared as sh
2025-12-29 17:05:03 -08:00
(
Cmdlet,
CmdletArg,
SharedArgs,
get_field,
should_show_help,
normalize_hash,
first_title_tag,
parse_hash_query,
) = (
2025-12-16 23:23:43 -08:00
sh.Cmdlet,
sh.CmdletArg,
sh.SharedArgs,
sh.get_field,
sh.should_show_help,
sh.normalize_hash,
sh.first_title_tag,
2025-12-20 02:12:45 -08:00
sh.parse_hash_query,
2025-12-16 23:23:43 -08:00
)
from SYS import pipeline as ctx
2025-12-11 12:47:30 -08:00
STORAGE_ORIGINS = {"local",
"hydrus",
"folder"}
2025-12-11 12:47:30 -08:00
2025-12-30 23:19:02 -08:00
class search_file(Cmdlet):
"""Class-based search-file cmdlet for searching storage backends."""
2025-12-11 12:47:30 -08:00
def __init__(self) -> None:
super().__init__(
2025-12-30 23:19:02 -08:00
name="search-file",
summary="Search storage backends (Folder, Hydrus) or external providers (via -provider).",
usage="search-file [-query <query>] [-store BACKEND] [-limit N] [-provider NAME]",
2025-12-11 12:47:30 -08:00
arg=[
CmdletArg(
"limit",
type="integer",
description="Limit results (default: 100)"
),
2025-12-16 01:45:01 -08:00
SharedArgs.STORE,
2025-12-29 17:05:03 -08:00
SharedArgs.QUERY,
2025-12-30 23:19:02 -08:00
CmdletArg(
"provider",
type="string",
description=
2025-12-31 05:17:37 -08:00
"External provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive, hifi",
2025-12-30 23:19:02 -08:00
),
CmdletArg(
"open",
type="integer",
description="(alldebrid) Open folder/magnet by ID and list its files",
),
2025-12-11 12:47:30 -08:00
],
detail=[
"Search across storage backends: Folder stores and Hydrus instances",
"Use -store to search a specific backend by name",
2025-12-14 00:53:52 -08:00
"URL search: url:* (any URL) or url:<value> (URL substring)",
2025-12-20 23:57:44 -08:00
"Extension search: ext:<value> (e.g., ext:png)",
"Hydrus-style extension: system:filetype = png",
2025-12-11 12:47:30 -08:00
"Results include hash for downstream commands (get-file, add-tag, etc.)",
"Examples:",
2025-12-30 23:19:02 -08:00
"search-file -query foo # Search all storage backends",
"search-file -store home -query '*' # Search 'home' Hydrus instance",
"search-file -store test -query 'video' # Search 'test' folder store",
"search-file -query 'hash:deadbeef...' # Search by SHA256 hash",
"search-file -query 'url:*' # Files that have any URL",
"search-file -query 'url:youtube.com' # Files whose URL contains substring",
"search-file -query 'ext:png' # Files whose metadata ext is png",
"search-file -query 'system:filetype = png' # Hydrus: native; Folder: maps to metadata.ext",
"",
"Provider search (-provider):",
"search-file -provider youtube 'tutorial' # Search YouTube provider",
"search-file -provider alldebrid '*' # List AllDebrid magnets",
"search-file -provider alldebrid -open 123 '*' # Show files for a magnet",
2025-12-11 12:47:30 -08:00
],
exec=self.run,
)
self.register()
# --- Helper methods -------------------------------------------------
@staticmethod
def _normalize_extension(ext_value: Any) -> str:
"""Sanitize extension strings to alphanumerics and cap at 5 chars."""
ext = str(ext_value or "").strip().lstrip(".")
for sep in (" ", "|", "(", "[", "{", ",", ";"):
if sep in ext:
ext = ext.split(sep, 1)[0]
break
if "." in ext:
ext = ext.split(".")[-1]
ext = "".join(ch for ch in ext if ch.isalnum())
return ext[:5]
2026-01-03 03:37:48 -08:00
@staticmethod
def _get_hifi_view_from_query(query: str) -> str:
text = str(query or "").strip()
if not text:
return "track"
if re.search(r"\balbum\s*:", text, flags=re.IGNORECASE):
return "album"
if re.search(r"\bartist\s*:", text, flags=re.IGNORECASE):
return "artist"
return "track"
2025-12-11 12:47:30 -08:00
def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]:
"""Ensure storage results have the necessary fields for result_table display."""
2025-12-11 19:04:02 -08:00
store_value = str(payload.get("store") or "").lower()
2025-12-11 12:47:30 -08:00
if store_value not in STORAGE_ORIGINS:
return payload
# Ensure we have title field
if "title" not in payload:
2025-12-29 17:05:03 -08:00
payload["title"] = (
payload.get("name") or payload.get("target") or payload.get("path")
or "Result"
2025-12-29 17:05:03 -08:00
)
2025-12-11 12:47:30 -08:00
# Ensure we have ext field
if "ext" not in payload:
title = str(payload.get("title", ""))
path_obj = Path(title)
if path_obj.suffix:
2025-12-29 17:05:03 -08:00
payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip("."))
2025-12-11 12:47:30 -08:00
else:
payload["ext"] = payload.get("ext", "")
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Ensure size_bytes is present for display (already set by search_file())
# result_table will handle formatting it
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Don't create manual columns - let result_table handle display
# This allows the table to respect max_columns and apply consistent formatting
return payload
2025-12-30 23:19:02 -08:00
def _run_provider_search(
self,
*,
provider_name: str,
query: str,
limit: int,
limit_set: bool,
open_id: Optional[int],
args_list: List[str],
refresh_mode: bool,
config: Dict[str, Any],
) -> int:
"""Execute external provider search."""
if not provider_name or not query:
log("Error: search-file -provider requires both provider and query", file=sys.stderr)
log(f"Usage: {self.usage}", file=sys.stderr)
log("Available providers:", file=sys.stderr)
providers = list_search_providers(config)
for name, available in sorted(providers.items()):
status = "\u2713" if available else "\u2717"
log(f" {status} {name}", file=sys.stderr)
return 1
# Align with provider default when user did not set -limit.
if not limit_set:
limit = 50
debug(f"[search-file] provider={provider_name}, query={query}, limit={limit}, open_id={open_id}")
provider = get_search_provider(provider_name, config)
if not provider:
log(f"Error: Provider '{provider_name}' is not available", file=sys.stderr)
log("Available providers:", file=sys.stderr)
providers = list_search_providers(config)
for name, available in sorted(providers.items()):
if available:
log(f" - {name}", file=sys.stderr)
return 1
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(config or {}) if get_local_storage_path else None
db = None
if library_root:
try:
from API.folder import API_folder_store
db = API_folder_store(library_root)
db.__enter__()
db.insert_worker(
worker_id,
"search-file",
title=f"Search: {query}",
description=f"Provider: {provider_name}, Query: {query}",
pipe=ctx.get_current_command_text(),
)
except Exception:
db = None
try:
results_list: List[Dict[str, Any]] = []
from SYS import result_table
importlib.reload(result_table)
from SYS.result_table import ResultTable
provider_text = str(provider_name or "").strip()
provider_lower = provider_text.lower()
2026-01-01 20:37:27 -08:00
id_match = re.search(r"\bid\s*[=:]\s*(\d+)", query, flags=re.IGNORECASE)
parsed_open_id = open_id
if id_match and parsed_open_id is None:
try:
parsed_open_id = int(id_match.group(1))
except Exception:
parsed_open_id = None
query = re.sub(r"\bid\s*[=:]\s*\d+", "", query, flags=re.IGNORECASE).strip()
if not query:
query = "*"
effective_open_id = parsed_open_id if parsed_open_id is not None else open_id
2025-12-30 23:19:02 -08:00
if provider_lower == "youtube":
provider_label = "Youtube"
elif provider_lower == "openlibrary":
provider_label = "OpenLibrary"
elif provider_lower == "loc":
provider_label = "LoC"
else:
provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider"
2026-01-01 20:37:27 -08:00
if provider_lower == "alldebrid" and effective_open_id is not None:
table_title = f"{provider_label} Files: {effective_open_id}".strip().rstrip(":")
2025-12-30 23:19:02 -08:00
else:
table_title = f"{provider_label}: {query}".strip().rstrip(":")
2026-01-05 07:51:19 -08:00
preserve_order = provider_lower in {"youtube", "openlibrary", "loc", "torrent"}
2026-01-03 03:37:48 -08:00
table_type = provider_name
2026-01-01 20:37:27 -08:00
table_meta: Dict[str, Any] = {"provider": provider_name}
2026-01-03 03:37:48 -08:00
if provider_lower == "hifi":
view = self._get_hifi_view_from_query(query)
table_meta["view"] = view
table_type = f"hifi.{view}"
elif provider_lower == "internetarchive":
# Internet Archive search results are effectively folders (items); selecting @N
# should open a list of downloadable files for the chosen item.
table_type = "internetarchive.folder"
table = ResultTable(table_title).set_preserve_order(preserve_order)
table.set_table(table_type)
2026-01-01 20:37:27 -08:00
if provider_lower == "alldebrid":
table_meta["view"] = "files" if effective_open_id is not None else "folders"
if effective_open_id is not None:
table_meta["magnet_id"] = effective_open_id
try:
table.set_table_metadata(table_meta)
except Exception:
pass
2025-12-30 23:19:02 -08:00
table.set_source_command("search-file", list(args_list))
debug(f"[search-file] Calling {provider_name}.search()")
if provider_lower == "alldebrid":
2026-01-01 20:37:27 -08:00
filters = {"view": "folders"}
search_open_id = parsed_open_id if parsed_open_id is not None else open_id
if search_open_id is not None:
filters = {"view": "files", "magnet_id": search_open_id}
results = provider.search(query, limit=limit, filters=filters)
2025-12-30 23:19:02 -08:00
else:
results = provider.search(query, limit=limit)
debug(f"[search-file] {provider_name} -> {len(results or [])} result(s)")
2026-01-03 21:23:55 -08:00
# HIFI artist UX: if there is exactly one artist match, auto-expand
# directly to albums without requiring an explicit @1 selection.
if (
provider_lower == "hifi"
and table_meta.get("view") == "artist"
and isinstance(results, list)
and len(results) == 1
):
try:
artist_res = results[0]
artist_name = str(getattr(artist_res, "title", "") or "").strip()
artist_md = getattr(artist_res, "full_metadata", None)
artist_id = None
if isinstance(artist_md, dict):
raw_id = artist_md.get("artistId") or artist_md.get("id")
try:
artist_id = int(raw_id) if raw_id is not None else None
except Exception:
artist_id = None
album_results = []
if hasattr(provider, "_albums_for_artist") and callable(getattr(provider, "_albums_for_artist")):
try:
album_results = provider._albums_for_artist( # type: ignore[attr-defined]
artist_id=artist_id,
artist_name=artist_name,
limit=max(int(limit or 0), 200),
)
except Exception:
album_results = []
if album_results:
results = album_results
table_type = "hifi.album"
table.set_table(table_type)
table_meta["view"] = "album"
try:
table.set_table_metadata(table_meta)
except Exception:
pass
except Exception:
pass
2025-12-30 23:19:02 -08:00
if not results:
log(f"No results found for query: {query}", file=sys.stderr)
if db is not None:
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, "completed")
return 0
# If a ResultTable adapter is registered for this provider, use it to
# build a column-aware table view; fall back to legacy behavior otherwise.
try:
from SYS.result_table_adapters import get_provider as _get_rprov
from SYS.result_table_renderers import render_to_console as _render_to_console
try:
_rprov = _get_rprov(provider_lower)
except Exception:
_rprov = None
except Exception:
_rprov = None
if _rprov is not None:
# Adapt provider-specific results into ResultModel instances.
try:
adapted_rows = list(_rprov.adapter(results))
except Exception:
adapted_rows = []
cols = _rprov.get_columns(adapted_rows)
# Build table rows from adapted models using ColumnSpec headers and extractor
for rm in adapted_rows:
# Build columns list as (header, value)
columns_for_row = []
for c in cols:
try:
raw = c.extractor(rm)
except Exception:
raw = None
if c.format_fn:
try:
val = c.format_fn(raw)
except Exception:
val = raw
else:
val = raw
columns_for_row.append((c.header, val))
item_dict = {
"title": getattr(rm, "title", None) or "",
"path": getattr(rm, "path", None),
"ext": getattr(rm, "ext", None),
"size_bytes": getattr(rm, "size_bytes", None),
"metadata": getattr(rm, "metadata", None) or {},
"source": getattr(rm, "source", None) or provider_name,
"columns": columns_for_row,
"_selection_args": list(_rprov.selection_args(rm) or []),
"table": table_type,
}
row_index = len(table.rows)
table.add_result(item_dict)
results_list.append(item_dict)
ctx.emit(item_dict)
# Render via the normal ResultTable pipeline (legacy rendering will print columns)
if refresh_mode:
ctx.set_last_result_table_preserve_history(table, results_list)
else:
ctx.set_last_result_table(table, results_list)
ctx.set_current_stage_table(table)
if db is not None:
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, "completed")
return 0
# Fallback (legacy) behavior for providers without a ResultTable adapter
2025-12-30 23:19:02 -08:00
for search_result in results:
item_dict = (
search_result.to_dict()
if hasattr(search_result, "to_dict")
else dict(search_result)
if isinstance(search_result, dict)
else {"title": str(search_result)}
)
if "table" not in item_dict:
2026-01-03 03:37:48 -08:00
item_dict["table"] = table_type
2025-12-30 23:19:02 -08:00
row_index = len(table.rows)
table.add_result(search_result)
results_list.append(item_dict)
ctx.emit(item_dict)
if refresh_mode:
ctx.set_last_result_table_preserve_history(table, results_list)
else:
ctx.set_last_result_table(table, results_list)
ctx.set_current_stage_table(table)
if db is not None:
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, "completed")
return 0
except Exception as exc:
log(f"Error searching provider '{provider_name}': {exc}", file=sys.stderr)
import traceback
debug(traceback.format_exc())
if db is not None:
try:
db.update_worker_status(worker_id, "error")
except Exception:
pass
return 1
finally:
if db is not None:
try:
db.__exit__(None, None, None)
except Exception:
pass
2025-12-11 12:47:30 -08:00
# --- Execution ------------------------------------------------------
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Search storage backends for files."""
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
args_list = [str(arg) for arg in (args or [])]
refresh_mode = any(
str(a).strip().lower() in {"--refresh",
"-refresh"} for a in args_list
)
2025-12-20 23:57:44 -08:00
def _format_command_title(command: str, raw_args: List[str]) -> str:
2025-12-20 23:57:44 -08:00
def _quote(value: str) -> str:
text = str(value)
if not text:
return '""'
needs_quotes = any(ch.isspace() for ch in text) or '"' in text
if not needs_quotes:
return text
return '"' + text.replace('"', '\\"') + '"'
cleaned = [
str(a) for a in (raw_args or [])
2025-12-20 23:57:44 -08:00
if str(a).strip().lower() not in {"--refresh", "-refresh"}
]
if not cleaned:
return command
return " ".join([command, *[_quote(a) for a in cleaned]])
raw_title = None
try:
2025-12-29 17:05:03 -08:00
raw_title = (
ctx.get_current_stage_text("")
if hasattr(ctx,
"get_current_stage_text") else None
2025-12-29 17:05:03 -08:00
)
2025-12-20 23:57:44 -08:00
except Exception:
raw_title = None
command_title = (str(raw_title).strip() if raw_title else
2025-12-30 23:19:02 -08:00
"") or _format_command_title("search-file",
list(args_list))
2025-12-20 23:57:44 -08:00
2025-12-16 01:45:01 -08:00
# Build dynamic flag variants from cmdlet arg definitions.
# This avoids hardcoding flag spellings in parsing loops.
flag_registry = self.build_flag_registry()
query_flags = {
f.lower()
for f in (flag_registry.get("query") or {"-query", "--query"})
}
store_flags = {
f.lower()
for f in (flag_registry.get("store") or {"-store", "--store"})
}
limit_flags = {
f.lower()
for f in (flag_registry.get("limit") or {"-limit", "--limit"})
}
2025-12-30 23:19:02 -08:00
provider_flags = {
f.lower()
for f in (flag_registry.get("provider") or {"-provider", "--provider"})
}
open_flags = {
f.lower()
for f in (flag_registry.get("open") or {"-open", "--open"})
}
2025-12-16 01:45:01 -08:00
2025-12-11 12:47:30 -08:00
# Parse arguments
query = ""
storage_backend: Optional[str] = None
2025-12-30 23:19:02 -08:00
provider_name: Optional[str] = None
open_id: Optional[int] = None
2025-12-11 12:47:30 -08:00
limit = 100
2025-12-30 23:19:02 -08:00
limit_set = False
2025-12-11 12:47:30 -08:00
searched_backends: List[str] = []
i = 0
while i < len(args_list):
arg = args_list[i]
low = arg.lower()
2025-12-20 02:12:45 -08:00
if low in query_flags and i + 1 < len(args_list):
chunk = args_list[i + 1]
query = f"{query} {chunk}".strip() if query else chunk
i += 2
continue
2025-12-30 23:19:02 -08:00
if low in provider_flags and i + 1 < len(args_list):
provider_name = args_list[i + 1]
i += 2
continue
if low in open_flags and i + 1 < len(args_list):
try:
open_id = int(args_list[i + 1])
except ValueError:
log(
f"Warning: Invalid open value '{args_list[i + 1]}', ignoring",
file=sys.stderr,
)
open_id = None
i += 2
continue
2025-12-16 01:45:01 -08:00
if low in store_flags and i + 1 < len(args_list):
2025-12-11 12:47:30 -08:00
storage_backend = args_list[i + 1]
i += 2
2025-12-16 01:45:01 -08:00
elif low in limit_flags and i + 1 < len(args_list):
2025-12-30 23:19:02 -08:00
limit_set = True
2025-12-11 12:47:30 -08:00
try:
limit = int(args_list[i + 1])
except ValueError:
limit = 100
i += 2
elif not arg.startswith("-"):
query = f"{query} {arg}".strip() if query else arg
i += 1
else:
i += 1
2025-12-30 23:19:02 -08:00
query = query.strip()
if provider_name:
return self._run_provider_search(
provider_name=provider_name,
query=query,
limit=limit,
limit_set=limit_set,
open_id=open_id,
args_list=args_list,
refresh_mode=refresh_mode,
config=config,
)
2025-12-11 12:47:30 -08:00
store_filter: Optional[str] = None
if query:
match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE)
if match:
store_filter = match.group(1).strip() or None
query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE)
query = re.sub(r"\s{2,}", " ", query)
2025-12-29 17:05:03 -08:00
query = query.strip().strip(",")
2025-12-11 12:47:30 -08:00
if store_filter and not storage_backend:
storage_backend = store_filter
2025-12-20 02:12:45 -08:00
hash_query = parse_hash_query(query)
2025-12-14 00:53:52 -08:00
2025-12-11 12:47:30 -08:00
if not query:
log("Provide a search query", file=sys.stderr)
return 1
2025-12-11 19:04:02 -08:00
from API.folder import API_folder_store
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(config or {})
if not library_root:
log("No library root configured", file=sys.stderr)
return 1
# Use context manager to ensure database is always closed
2025-12-11 19:04:02 -08:00
with API_folder_store(library_root) as db:
2025-12-11 12:47:30 -08:00
try:
db.insert_worker(
worker_id,
2025-12-30 23:19:02 -08:00
"search-file",
2025-12-11 12:47:30 -08:00
title=f"Search: {query}",
description=f"Query: {query}",
2025-12-29 17:05:03 -08:00
pipe=ctx.get_current_command_text(),
2025-12-11 12:47:30 -08:00
)
results_list = []
2025-12-29 23:57:04 -08:00
from SYS import result_table
2025-12-11 12:47:30 -08:00
import importlib
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
importlib.reload(result_table)
from SYS.result_table import ResultTable
2025-12-11 12:47:30 -08:00
2025-12-20 23:57:44 -08:00
table = ResultTable(command_title)
2025-12-16 23:23:43 -08:00
try:
2025-12-30 23:19:02 -08:00
table.set_source_command("search-file", list(args_list))
2025-12-16 23:23:43 -08:00
except Exception:
pass
2025-12-14 00:53:52 -08:00
if hash_query:
try:
table.set_preserve_order(True)
except Exception:
pass
2025-12-11 12:47:30 -08:00
2025-12-11 19:04:02 -08:00
from Store import Store
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
storage = Store(config=config or {})
2025-12-11 23:21:45 -08:00
from Store._base import Store as BaseStore
2025-12-11 12:47:30 -08:00
backend_to_search = storage_backend or None
2025-12-14 00:53:52 -08:00
if hash_query:
# Explicit hash list search: build rows from backend metadata.
backends_to_try: List[str] = []
if backend_to_search:
backends_to_try = [backend_to_search]
else:
backends_to_try = list(storage.list_backends())
found_any = False
for h in hash_query:
resolved_backend_name: Optional[str] = None
resolved_backend = None
for backend_name in backends_to_try:
try:
backend = storage[backend_name]
except Exception:
continue
try:
# If get_metadata works, consider it a hit; get_file can be optional (e.g. remote URL).
meta = backend.get_metadata(h)
if meta is None:
continue
resolved_backend_name = backend_name
resolved_backend = backend
break
except Exception:
continue
if resolved_backend_name is None or resolved_backend is None:
continue
found_any = True
searched_backends.append(resolved_backend_name)
# Resolve a path/URL string if possible
path_str: Optional[str] = None
2025-12-16 01:45:01 -08:00
# IMPORTANT: avoid calling get_file() for remote backends.
# For Hydrus, get_file() returns a browser URL (and may include access keys),
# which should not be pulled during search/refresh.
2025-12-14 00:53:52 -08:00
try:
2025-12-16 01:45:01 -08:00
if type(resolved_backend).__name__ == "Folder":
maybe_path = resolved_backend.get_file(h)
if isinstance(maybe_path, Path):
path_str = str(maybe_path)
elif isinstance(maybe_path, str) and maybe_path:
path_str = maybe_path
2025-12-14 00:53:52 -08:00
except Exception:
path_str = None
meta_obj: Dict[str,
Any] = {}
2025-12-14 00:53:52 -08:00
try:
meta_obj = resolved_backend.get_metadata(h) or {}
except Exception:
meta_obj = {}
tags_list: List[str] = []
try:
tag_result = resolved_backend.get_tag(h)
if isinstance(tag_result, tuple) and tag_result:
maybe_tags = tag_result[0]
else:
maybe_tags = tag_result
if isinstance(maybe_tags, list):
2025-12-29 17:05:03 -08:00
tags_list = [
str(t).strip() for t in maybe_tags
2025-12-29 17:05:03 -08:00
if isinstance(t, str) and str(t).strip()
]
2025-12-14 00:53:52 -08:00
except Exception:
tags_list = []
title_from_tag: Optional[str] = None
try:
title_tag = first_title_tag(tags_list)
if title_tag and ":" in title_tag:
title_from_tag = title_tag.split(":", 1)[1].strip()
except Exception:
title_from_tag = None
title = title_from_tag or meta_obj.get("title") or meta_obj.get(
"name"
)
2025-12-14 00:53:52 -08:00
if not title and path_str:
try:
title = Path(path_str).stem
except Exception:
title = path_str
ext_val = meta_obj.get("ext") or meta_obj.get("extension")
if not ext_val and path_str:
try:
ext_val = Path(path_str).suffix
except Exception:
ext_val = None
2025-12-16 23:23:43 -08:00
if not ext_val and title:
try:
ext_val = Path(str(title)).suffix
except Exception:
ext_val = None
2025-12-14 00:53:52 -08:00
size_bytes = meta_obj.get("size")
if size_bytes is None:
size_bytes = meta_obj.get("size_bytes")
try:
2025-12-29 17:05:03 -08:00
size_bytes_int: Optional[int] = (
int(size_bytes) if size_bytes is not None else None
)
2025-12-14 00:53:52 -08:00
except Exception:
size_bytes_int = None
payload: Dict[str,
Any] = {
"title": str(title or h),
"hash": h,
"store": resolved_backend_name,
"path": path_str,
"ext": self._normalize_extension(ext_val),
"size_bytes": size_bytes_int,
"tag": tags_list,
}
2025-12-14 00:53:52 -08:00
table.add_result(payload)
results_list.append(payload)
ctx.emit(payload)
if found_any:
2025-12-20 23:57:44 -08:00
table.title = command_title
2025-12-16 23:23:43 -08:00
2025-12-20 23:57:44 -08:00
if refresh_mode:
ctx.set_last_result_table_preserve_history(
table,
results_list
)
2025-12-20 23:57:44 -08:00
else:
ctx.set_last_result_table(table, results_list)
db.append_worker_stdout(
worker_id,
json.dumps(results_list,
indent=2)
)
2025-12-29 17:05:03 -08:00
db.update_worker_status(worker_id, "completed")
2025-12-14 00:53:52 -08:00
return 0
log("No results found", file=sys.stderr)
2025-12-20 23:57:44 -08:00
if refresh_mode:
try:
table.title = command_title
ctx.set_last_result_table_preserve_history(table, [])
except Exception:
pass
2025-12-14 00:53:52 -08:00
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
2025-12-29 17:05:03 -08:00
db.update_worker_status(worker_id, "completed")
2025-12-14 00:53:52 -08:00
return 0
2025-12-11 12:47:30 -08:00
if backend_to_search:
searched_backends.append(backend_to_search)
target_backend = storage[backend_to_search]
2025-12-11 23:21:45 -08:00
if type(target_backend).search is BaseStore.search:
2025-12-29 17:05:03 -08:00
log(
f"Backend '{backend_to_search}' does not support searching",
file=sys.stderr,
)
db.update_worker_status(worker_id, "error")
2025-12-11 12:47:30 -08:00
return 1
2025-12-30 23:19:02 -08:00
debug(f"[search-file] Searching '{backend_to_search}'")
2025-12-11 23:21:45 -08:00
results = target_backend.search(query, limit=limit)
debug(
2025-12-30 23:19:02 -08:00
f"[search-file] '{backend_to_search}' -> {len(results or [])} result(s)"
)
2025-12-11 12:47:30 -08:00
else:
all_results = []
for backend_name in storage.list_searchable_backends():
try:
2025-12-11 19:04:02 -08:00
backend = storage[backend_name]
searched_backends.append(backend_name)
2025-12-30 23:19:02 -08:00
debug(f"[search-file] Searching '{backend_name}'")
backend_results = backend.search(
query,
limit=limit - len(all_results)
)
2025-12-29 17:05:03 -08:00
debug(
2025-12-30 23:19:02 -08:00
f"[search-file] '{backend_name}' -> {len(backend_results or [])} result(s)"
2025-12-29 17:05:03 -08:00
)
2025-12-11 12:47:30 -08:00
if backend_results:
all_results.extend(backend_results)
if len(all_results) >= limit:
break
except Exception as exc:
log(
f"Backend {backend_name} search failed: {exc}",
file=sys.stderr
)
2025-12-11 12:47:30 -08:00
results = all_results[:limit]
if results:
for item in results:
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
def _as_dict(obj: Any) -> Dict[str, Any]:
if isinstance(obj, dict):
return dict(obj)
if hasattr(obj,
"to_dict") and callable(getattr(obj,
"to_dict")):
2025-12-11 12:47:30 -08:00
return obj.to_dict() # type: ignore[arg-type]
return {
"title": str(obj)
}
2025-12-11 12:47:30 -08:00
item_dict = _as_dict(item)
if store_filter:
2025-12-11 19:04:02 -08:00
store_val = str(item_dict.get("store") or "").lower()
if store_filter != store_val:
2025-12-11 12:47:30 -08:00
continue
normalized = self._ensure_storage_columns(item_dict)
2025-12-12 21:55:38 -08:00
# Make hash/store available for downstream cmdlet without rerunning search
2025-12-11 12:47:30 -08:00
hash_val = normalized.get("hash")
2025-12-11 19:04:02 -08:00
store_val = normalized.get("store") or item_dict.get("store")
2025-12-11 12:47:30 -08:00
if hash_val and not normalized.get("hash"):
normalized["hash"] = hash_val
if store_val and not normalized.get("store"):
normalized["store"] = store_val
table.add_result(normalized)
results_list.append(normalized)
ctx.emit(normalized)
2025-12-20 23:57:44 -08:00
table.title = command_title
2025-12-16 23:23:43 -08:00
2025-12-20 23:57:44 -08:00
if refresh_mode:
ctx.set_last_result_table_preserve_history(table, results_list)
else:
ctx.set_last_result_table(table, results_list)
db.append_worker_stdout(
worker_id,
json.dumps(results_list,
indent=2)
)
2025-12-11 12:47:30 -08:00
else:
log("No results found", file=sys.stderr)
2025-12-20 23:57:44 -08:00
if refresh_mode:
try:
table.title = command_title
ctx.set_last_result_table_preserve_history(table, [])
except Exception:
pass
2025-12-11 12:47:30 -08:00
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
2025-12-29 17:05:03 -08:00
db.update_worker_status(worker_id, "completed")
2025-12-11 12:47:30 -08:00
return 0
except Exception as exc:
log(f"Search failed: {exc}", file=sys.stderr)
import traceback
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
traceback.print_exc(file=sys.stderr)
try:
2025-12-29 17:05:03 -08:00
db.update_worker_status(worker_id, "error")
2025-12-11 12:47:30 -08:00
except Exception:
pass
return 1
2025-12-30 23:19:02 -08:00
CMDLET = search_file()