This commit is contained in:
2025-12-30 23:19:02 -08:00
parent a97657a757
commit 3bbaa28fb4
17 changed files with 1735 additions and 558 deletions

View File

@@ -38,6 +38,95 @@ from SYS.metadata import write_metadata
SUPPORTED_MEDIA_EXTENSIONS = ALL_SUPPORTED_EXTENSIONS
def _maybe_apply_florencevision_tags(
media_path: Path,
tags: List[str],
config: Dict[str, Any],
pipe_obj: Optional[models.PipeObject] = None,
) -> List[str]:
"""Optionally auto-tag images using the FlorenceVision tool.
Controlled via config:
[tool=florencevision]
enabled=true
strict=false
If strict=false (default), failures log a warning and return the original tags.
If strict=true, failures raise to abort the ingest.
"""
try:
tool_block = (config or {}).get("tool")
fv_block = tool_block.get("florencevision") if isinstance(tool_block, dict) else None
enabled = False
strict = False
if isinstance(fv_block, dict):
enabled = bool(fv_block.get("enabled"))
strict = bool(fv_block.get("strict"))
if not enabled:
return tags
from tool.florencevision import FlorenceVisionTool
# Special-case: if this file was produced by the `screen-shot` cmdlet,
# OCR is more useful than caption/detection for tagging screenshots.
cfg_for_tool: Dict[str, Any] = config
try:
action = str(getattr(pipe_obj, "action", "") or "") if pipe_obj is not None else ""
cmdlet_name = ""
if action.lower().startswith("cmdlet:"):
cmdlet_name = action.split(":", 1)[1].strip().lower()
if cmdlet_name in {"screen-shot", "screen_shot", "screenshot"}:
tool_block2 = dict((config or {}).get("tool") or {})
fv_block2 = dict(tool_block2.get("florencevision") or {})
fv_block2["task"] = "ocr"
tool_block2["florencevision"] = fv_block2
cfg_for_tool = dict(config or {})
cfg_for_tool["tool"] = tool_block2
except Exception:
cfg_for_tool = config
fv = FlorenceVisionTool(cfg_for_tool)
if not fv.enabled() or not fv.applicable_path(media_path):
return tags
auto_tags = fv.tags_for_file(media_path)
# Capture caption (if any) into PipeObject notes for downstream persistence.
try:
caption_text = getattr(fv, "last_caption", None)
if caption_text and pipe_obj is not None:
if not isinstance(pipe_obj.extra, dict):
pipe_obj.extra = {}
notes = pipe_obj.extra.get("notes")
if not isinstance(notes, dict):
notes = {}
notes.setdefault("caption", caption_text)
pipe_obj.extra["notes"] = notes
except Exception:
pass
if not auto_tags:
return tags
merged = merge_sequences(tags or [], auto_tags, case_sensitive=False)
debug(f"[add-file] FlorenceVision added {len(auto_tags)} tag(s)")
return merged
except Exception as exc:
# Decide strictness from config if we couldn't read it above.
strict2 = False
try:
tool_block = (config or {}).get("tool")
fv_block = tool_block.get("florencevision") if isinstance(tool_block, dict) else None
strict2 = bool(fv_block.get("strict")) if isinstance(fv_block, dict) else False
except Exception:
strict2 = False
if strict or strict2:
raise
log(f"[add-file] Warning: FlorenceVision tagging failed: {exc}", file=sys.stderr)
return tags
class Add_File(Cmdlet):
"""Add file into the DB"""
@@ -349,14 +438,14 @@ class Add_File(Cmdlet):
successes = 0
failures = 0
# When add-file -store is the last stage, always show a final search-store table.
# When add-file -store is the last stage, always show a final search-file table.
# This is especially important for multi-item ingests (e.g., multi-clip downloads)
# so the user always gets a selectable ResultTable.
want_final_search_store = (
want_final_search_file = (
bool(is_last_stage) and bool(is_storage_backend_location)
and bool(location)
)
auto_search_store_after_add = False
auto_search_file_after_add = False
# When ingesting multiple items into a backend store, defer URL association and
# apply it once at the end (bulk) to avoid per-item URL API calls.
@@ -879,9 +968,9 @@ class Add_File(Cmdlet):
pending_url_associations=
pending_url_associations,
suppress_last_stage_overlay=
want_final_search_store,
auto_search_store=
auto_search_store_after_add,
want_final_search_file,
auto_search_file=
auto_search_file_after_add,
)
else:
code = self._handle_local_export(
@@ -1005,8 +1094,8 @@ class Add_File(Cmdlet):
collect_relationship_pairs=pending_relationship_pairs,
defer_url_association=defer_url_association,
pending_url_associations=pending_url_associations,
suppress_last_stage_overlay=want_final_search_store,
auto_search_store=auto_search_store_after_add,
suppress_last_stage_overlay=want_final_search_file,
auto_search_file=auto_search_file_after_add,
)
else:
code = self._handle_local_export(
@@ -1053,7 +1142,7 @@ class Add_File(Cmdlet):
# Always end add-file -store (when last stage) by showing the canonical store table.
# This keeps output consistent and ensures @N selection works for multi-item ingests.
if want_final_search_store and collected_payloads:
if want_final_search_file and collected_payloads:
try:
hashes: List[str] = []
for payload in collected_payloads:
@@ -1064,7 +1153,7 @@ class Add_File(Cmdlet):
seen: set[str] = set()
hashes = [h for h in hashes if not (h in seen or seen.add(h))]
refreshed_items = Add_File._try_emit_search_store_by_hashes(
refreshed_items = Add_File._try_emit_search_file_by_hashes(
store=str(location),
hash_values=hashes,
config=config,
@@ -1102,29 +1191,29 @@ class Add_File(Cmdlet):
return 1
@staticmethod
def _try_emit_search_store_by_hashes(
def _try_emit_search_file_by_hashes(
*,
store: str,
hash_values: List[str],
config: Dict[str,
Any]
) -> Optional[List[Any]]:
"""Run search-store for a list of hashes and promote the table to a display overlay.
"""Run search-file for a list of hashes and promote the table to a display overlay.
Returns the emitted search-store payload items on success, else None.
Returns the emitted search-file payload items on success, else None.
"""
hashes = [h for h in (hash_values or []) if isinstance(h, str) and len(h) == 64]
if not store or not hashes:
return None
try:
from cmdlet.search_store import CMDLET as search_store_cmdlet
from cmdlet.search_file import CMDLET as search_file_cmdlet
query = "hash:" + ",".join(hashes)
args = ["-store", str(store), query]
debug(f'[add-file] Refresh: search-store -store {store} "{query}"')
debug(f'[add-file] Refresh: search-file -store {store} "{query}"')
# Run search-store under a temporary stage context so its ctx.emit() calls
# Run search-file under a temporary stage context so its ctx.emit() calls
# don't interfere with the outer add-file pipeline stage.
prev_ctx = ctx.get_stage_context()
temp_ctx = ctx.PipelineStageContext(
@@ -1137,7 +1226,7 @@ class Add_File(Cmdlet):
)
ctx.set_stage_context(temp_ctx)
try:
code = search_store_cmdlet.run(None, args, config)
code = search_file_cmdlet.run(None, args, config)
emitted_items = list(getattr(temp_ctx, "emits", []) or [])
finally:
ctx.set_stage_context(prev_ctx)
@@ -1145,7 +1234,7 @@ class Add_File(Cmdlet):
if code != 0:
return None
# Promote the search-store result to a display overlay so the CLI prints it
# Promote the search-file result to a display overlay so the CLI prints it
# for action commands like add-file.
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx
@@ -1171,7 +1260,7 @@ class Add_File(Cmdlet):
return emitted_items
except Exception as exc:
debug(
f"[add-file] Failed to run search-store after add-file: {type(exc).__name__}: {exc}"
f"[add-file] Failed to run search-file after add-file: {type(exc).__name__}: {exc}"
)
return None
@@ -2109,7 +2198,7 @@ class Add_File(Cmdlet):
"""Emit a storage-style result payload.
- Always emits the dict downstream (when in a pipeline).
- If this is the last stage (or not in a pipeline), prints a search-store-like table
- If this is the last stage (or not in a pipeline), prints a search-file-like table
and sets an overlay table/items for @N selection.
"""
# Emit for downstream commands (no-op if not in a pipeline)
@@ -2139,28 +2228,28 @@ class Add_File(Cmdlet):
pass
@staticmethod
def _try_emit_search_store_by_hash(
def _try_emit_search_file_by_hash(
*,
store: str,
hash_value: str,
config: Dict[str,
Any]
) -> Optional[List[Any]]:
"""Run search-store for a single hash so the final table/payload is consistent.
"""Run search-file for a single hash so the final table/payload is consistent.
Important: `add-file` is treated as an action command by the CLI, so the CLI only
prints tables for it when a display overlay exists. After running search-store,
prints tables for it when a display overlay exists. After running search-file,
this copies the resulting table into the display overlay (when this is the last
stage) so the canonical store table is what the user sees and can select from.
Returns the emitted search-store payload items on success, else None.
Returns the emitted search-file payload items on success, else None.
"""
try:
from cmdlet.search_store import CMDLET as search_store_cmdlet
from cmdlet.search_file import CMDLET as search_file_cmdlet
args = ["-store", str(store), f"hash:{str(hash_value)}"]
# Run search-store under a temporary stage context so its ctx.emit() calls
# Run search-file under a temporary stage context so its ctx.emit() calls
# don't interfere with the outer add-file pipeline stage.
prev_ctx = ctx.get_stage_context()
temp_ctx = ctx.PipelineStageContext(
@@ -2173,14 +2262,14 @@ class Add_File(Cmdlet):
)
ctx.set_stage_context(temp_ctx)
try:
code = search_store_cmdlet.run(None, args, config)
code = search_file_cmdlet.run(None, args, config)
emitted_items = list(getattr(temp_ctx, "emits", []) or [])
finally:
ctx.set_stage_context(prev_ctx)
if code != 0:
return None
# Promote the search-store result to a display overlay so the CLI prints it
# Promote the search-file result to a display overlay so the CLI prints it
# for action commands like add-file.
stage_ctx = ctx.get_stage_context()
is_last = (stage_ctx
@@ -2206,7 +2295,7 @@ class Add_File(Cmdlet):
return emitted_items
except Exception as exc:
debug(
f"[add-file] Failed to run search-store after add-file: {type(exc).__name__}: {exc}"
f"[add-file] Failed to run search-file after add-file: {type(exc).__name__}: {exc}"
)
return None
@@ -3097,7 +3186,7 @@ class Add_File(Cmdlet):
List[tuple[str,
List[str]]]]] = None,
suppress_last_stage_overlay: bool = False,
auto_search_store: bool = True,
auto_search_file: bool = True,
) -> int:
"""Handle uploading to a registered storage backend (e.g., 'test' folder store, 'hydrus', etc.)."""
##log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr)
@@ -3217,6 +3306,15 @@ class Add_File(Cmdlet):
)
]
# Auto-tag (best-effort) BEFORE uploading so tags land with the stored file.
try:
tags = _maybe_apply_florencevision_tags(media_path, list(tags or []), config, pipe_obj=pipe_obj)
pipe_obj.tag = list(tags or [])
except Exception as exc:
# strict mode raises from helper; treat here as a hard failure
log(f"[add-file] FlorenceVision tagging error: {exc}", file=sys.stderr)
return 1
# Call backend's add_file with full metadata
# Backend returns hash as identifier
file_identifier = backend.add_file(
@@ -3254,7 +3352,7 @@ class Add_File(Cmdlet):
},
)
# Emit a search-store-like payload for consistent tables and natural piping.
# Emit a search-file-like payload for consistent tables and natural piping.
# Keep hash/store for downstream commands (get-tag, get-file, etc.).
resolved_hash = (
file_identifier if len(file_identifier) == 64 else
@@ -3299,6 +3397,15 @@ class Add_File(Cmdlet):
except Exception:
pass
caption_note = Add_File._get_note_text(result, pipe_obj, "caption")
if caption_note:
try:
setter = getattr(backend, "set_note", None)
if callable(setter):
setter(resolved_hash, "caption", caption_note)
except Exception:
pass
meta: Dict[str,
Any] = {}
try:
@@ -3350,16 +3457,16 @@ class Add_File(Cmdlet):
pass
# Keep the add-file 1-row summary overlay (when last stage), then emit the
# canonical search-store payload/table for piping/selection consistency.
if auto_search_store and resolved_hash and resolved_hash != "unknown":
# Show the add-file summary (overlay only) but let search-store provide the downstream payload.
# canonical search-file payload/table for piping/selection consistency.
if auto_search_file and resolved_hash and resolved_hash != "unknown":
# Show the add-file summary (overlay only) but let search-file provide the downstream payload.
Add_File._emit_storage_result(
payload,
overlay=not suppress_last_stage_overlay,
emit=False
)
refreshed_items = Add_File._try_emit_search_store_by_hash(
refreshed_items = Add_File._try_emit_search_file_by_hash(
store=backend_name,
hash_value=resolved_hash,
config=config,

View File

@@ -1079,13 +1079,13 @@ class Download_File(Cmdlet):
f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}",
file=sys.stderr,
)
from cmdlet.search_provider import CMDLET as _SEARCH_PROVIDER_CMDLET
from cmdlet.search_file import CMDLET as _SEARCH_FILE_CMDLET
fallback_query = title_text
exec_fn = getattr(_SEARCH_PROVIDER_CMDLET, "exec", None)
exec_fn = getattr(_SEARCH_FILE_CMDLET, "exec", None)
if not callable(exec_fn):
log(
"[download-file] search-provider cmdlet unavailable; cannot run LibGen fallback search",
"[download-file] search-file cmdlet unavailable; cannot run LibGen fallback search",
file=sys.stderr,
)
continue
@@ -1099,7 +1099,7 @@ class Download_File(Cmdlet):
config,
)
# Promote the search-provider table to a display overlay so it renders.
# Promote the search-file table to a display overlay so it renders.
try:
table_obj = pipeline_context.get_last_result_table()
items_obj = pipeline_context.get_last_result_items()

View File

@@ -1469,6 +1469,17 @@ class Download_Media(Cmdlet):
clip_values: List[str] = []
item_values: List[str] = []
def _uniq(values: Sequence[str]) -> List[str]:
seen: set[str] = set()
out: List[str] = []
for v in values:
key = str(v)
if key in seen:
continue
seen.add(key)
out.append(v)
return out
if clip_spec:
# Support keyed clip syntax:
# -query "clip:3m4s-3m14s,1h22m-1h33m,item:2-3"
@@ -1482,6 +1493,10 @@ class Download_Media(Cmdlet):
clip_values.extend(query_keyed.get("clip", []) or [])
item_values.extend(query_keyed.get("item", []) or [])
# QueryArg also hydrates clip via -query, so combine and deduplicate here
clip_values = _uniq(clip_values)
item_values = _uniq(item_values)
if item_values and not parsed.get("item"):
parsed["item"] = ",".join([v for v in item_values if v])

View File

@@ -27,6 +27,8 @@ class UrlItem:
hash: str
store: str
title: str = ""
size: int | None = None
ext: str = ""
class Get_Url(Cmdlet):
@@ -183,6 +185,58 @@ class Get_Url(Cmdlet):
return ""
@staticmethod
def _resolve_size_ext_for_hash(backend: Any, file_hash: str, hit: Any = None) -> tuple[int | None, str]:
"""Best-effort (size, ext) resolution for a found hash."""
# First: see if the hit already includes these fields.
try:
size_val = get_field(hit, "size")
if size_val is None:
size_val = get_field(hit, "file_size")
if size_val is None:
size_val = get_field(hit, "filesize")
if size_val is None:
size_val = get_field(hit, "size_bytes")
size_int = int(size_val) if isinstance(size_val, (int, float)) else None
except Exception:
size_int = None
try:
ext_val = get_field(hit, "ext")
if ext_val is None:
ext_val = get_field(hit, "extension")
ext = str(ext_val).strip().lstrip(".") if isinstance(ext_val, str) else ""
except Exception:
ext = ""
if size_int is not None or ext:
return size_int, ext
# Next: backend.get_metadata(hash) when available.
try:
if hasattr(backend, "get_metadata"):
meta = backend.get_metadata(file_hash)
if isinstance(meta, dict):
size_val2 = meta.get("size")
if size_val2 is None:
size_val2 = meta.get("file_size")
if size_val2 is None:
size_val2 = meta.get("filesize")
if size_val2 is None:
size_val2 = meta.get("size_bytes")
if isinstance(size_val2, (int, float)):
size_int = int(size_val2)
ext_val2 = meta.get("ext")
if ext_val2 is None:
ext_val2 = meta.get("extension")
if isinstance(ext_val2, str) and ext_val2.strip():
ext = ext_val2.strip().lstrip(".")
except Exception:
pass
return size_int, ext
def _search_urls_across_stores(self,
pattern: str,
config: Dict[str,
@@ -210,6 +264,7 @@ class Get_Url(Cmdlet):
backend = storage[store_name]
title_cache: Dict[str, str] = {}
meta_cache: Dict[str, tuple[int | None, str]] = {}
# Search only URL-bearing records using the backend's URL search capability.
# This avoids the expensive/incorrect "search('*')" scan.
@@ -250,6 +305,11 @@ class Get_Url(Cmdlet):
title = self._resolve_title_for_hash(backend, file_hash, hit)
title_cache[file_hash] = title
size, ext = meta_cache.get(file_hash, (None, ""))
if size is None and not ext:
size, ext = self._resolve_size_ext_for_hash(backend, file_hash, hit)
meta_cache[file_hash] = (size, ext)
try:
urls = backend.get_url(file_hash)
except Exception:
@@ -264,6 +324,8 @@ class Get_Url(Cmdlet):
hash=str(file_hash),
store=str(store_name),
title=str(title or ""),
size=size,
ext=str(ext or ""),
)
)
found_stores.add(str(store_name))
@@ -308,22 +370,44 @@ class Get_Url(Cmdlet):
log(f"No urls matching pattern: {search_pattern}", file=sys.stderr)
return 1
# NOTE: The CLI can auto-render tables from emitted items. When emitting
# dataclass objects, the generic-object renderer will include `hash` as a
# visible column. To keep HASH available for chaining but hidden from the
# table, emit dicts (dict rendering hides `hash`) and provide an explicit
# `columns` list to force display order and size formatting.
display_items: List[Dict[str, Any]] = []
table = (
ResultTable(
"URL Search Results",
max_columns=3
).set_preserve_order(True).set_table("urls").set_value_case("preserve")
"url",
max_columns=5
).set_preserve_order(True).set_table("url").set_value_case("preserve")
)
table.set_source_command("get-url", ["-url", search_pattern])
for item in items:
row = table.add_row()
row.add_column("Title", item.title)
row.add_column("Url", item.url)
row.add_column("Store", item.store)
ctx.emit(item)
payload: Dict[str, Any] = {
# Keep fields for downstream cmdlets.
"hash": item.hash,
"store": item.store,
"url": item.url,
"title": item.title,
"size": item.size,
"ext": item.ext,
# Force the visible table columns + ordering.
"columns": [
("Title", item.title),
("Url", item.url),
("Size", item.size),
("Ext", item.ext),
("Store", item.store),
],
}
display_items.append(payload)
table.add_result(payload)
ctx.emit(payload)
ctx.set_last_result_table(table if items else None, items, subject=result)
ctx.set_last_result_table(table if display_items else None, display_items, subject=result)
log(
f"Found {len(items)} matching url(s) in {len(stores_searched)} store(s)"
)

View File

@@ -1,15 +1,18 @@
"""Search-store cmdlet: Search for files in storage backends (Folder, Hydrus)."""
"""search-file cmdlet: Search for files in storage backends (Folder, Hydrus)."""
from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional
import importlib
import uuid
from pathlib import Path
from collections import OrderedDict
import re
import json
import sys
from SYS.logger import log, debug
from ProviderCore.registry import get_search_provider, list_search_providers
from SYS.config import get_local_storage_path
from . import _shared as sh
@@ -39,14 +42,14 @@ STORAGE_ORIGINS = {"local",
"folder"}
class Search_Store(Cmdlet):
"""Class-based search-store cmdlet for searching storage backends."""
class search_file(Cmdlet):
"""Class-based search-file cmdlet for searching storage backends."""
def __init__(self) -> None:
super().__init__(
name="search-store",
summary="Search storage backends (Folder, Hydrus) for files.",
usage="search-store [-query <query>] [-store BACKEND] [-limit N]",
name="search-file",
summary="Search storage backends (Folder, Hydrus) or external providers (via -provider).",
usage="search-file [-query <query>] [-store BACKEND] [-limit N] [-provider NAME]",
arg=[
CmdletArg(
"limit",
@@ -55,6 +58,17 @@ class Search_Store(Cmdlet):
),
SharedArgs.STORE,
SharedArgs.QUERY,
CmdletArg(
"provider",
type="string",
description=
"External provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive",
),
CmdletArg(
"open",
type="integer",
description="(alldebrid) Open folder/magnet by ID and list its files",
),
],
detail=[
"Search across storage backends: Folder stores and Hydrus instances",
@@ -64,14 +78,19 @@ class Search_Store(Cmdlet):
"Hydrus-style extension: system:filetype = png",
"Results include hash for downstream commands (get-file, add-tag, etc.)",
"Examples:",
"search-store -query foo # Search all storage backends",
"search-store -store home -query '*' # Search 'home' Hydrus instance",
"search-store -store test -query 'video' # Search 'test' folder store",
"search-store -query 'hash:deadbeef...' # Search by SHA256 hash",
"search-store -query 'url:*' # Files that have any URL",
"search-store -query 'url:youtube.com' # Files whose URL contains substring",
"search-store -query 'ext:png' # Files whose metadata ext is png",
"search-store -query 'system:filetype = png' # Hydrus: native; Folder: maps to metadata.ext",
"search-file -query foo # Search all storage backends",
"search-file -store home -query '*' # Search 'home' Hydrus instance",
"search-file -store test -query 'video' # Search 'test' folder store",
"search-file -query 'hash:deadbeef...' # Search by SHA256 hash",
"search-file -query 'url:*' # Files that have any URL",
"search-file -query 'url:youtube.com' # Files whose URL contains substring",
"search-file -query 'ext:png' # Files whose metadata ext is png",
"search-file -query 'system:filetype = png' # Hydrus: native; Folder: maps to metadata.ext",
"",
"Provider search (-provider):",
"search-file -provider youtube 'tutorial' # Search YouTube provider",
"search-file -provider alldebrid '*' # List AllDebrid magnets",
"search-file -provider alldebrid -open 123 '*' # Show files for a magnet",
],
exec=self.run,
)
@@ -120,6 +139,172 @@ class Search_Store(Cmdlet):
# This allows the table to respect max_columns and apply consistent formatting
return payload
def _run_provider_search(
self,
*,
provider_name: str,
query: str,
limit: int,
limit_set: bool,
open_id: Optional[int],
args_list: List[str],
refresh_mode: bool,
config: Dict[str, Any],
) -> int:
"""Execute external provider search."""
if not provider_name or not query:
log("Error: search-file -provider requires both provider and query", file=sys.stderr)
log(f"Usage: {self.usage}", file=sys.stderr)
log("Available providers:", file=sys.stderr)
providers = list_search_providers(config)
for name, available in sorted(providers.items()):
status = "\u2713" if available else "\u2717"
log(f" {status} {name}", file=sys.stderr)
return 1
# Align with provider default when user did not set -limit.
if not limit_set:
limit = 50
debug(f"[search-file] provider={provider_name}, query={query}, limit={limit}, open_id={open_id}")
provider = get_search_provider(provider_name, config)
if not provider:
log(f"Error: Provider '{provider_name}' is not available", file=sys.stderr)
log("Available providers:", file=sys.stderr)
providers = list_search_providers(config)
for name, available in sorted(providers.items()):
if available:
log(f" - {name}", file=sys.stderr)
return 1
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(config or {}) if get_local_storage_path else None
db = None
if library_root:
try:
from API.folder import API_folder_store
db = API_folder_store(library_root)
db.__enter__()
db.insert_worker(
worker_id,
"search-file",
title=f"Search: {query}",
description=f"Provider: {provider_name}, Query: {query}",
pipe=ctx.get_current_command_text(),
)
except Exception:
db = None
try:
results_list: List[Dict[str, Any]] = []
from SYS import result_table
importlib.reload(result_table)
from SYS.result_table import ResultTable
provider_text = str(provider_name or "").strip()
provider_lower = provider_text.lower()
if provider_lower == "youtube":
provider_label = "Youtube"
elif provider_lower == "openlibrary":
provider_label = "OpenLibrary"
elif provider_lower == "loc":
provider_label = "LoC"
else:
provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider"
if provider_lower == "alldebrid" and open_id is not None:
table_title = f"{provider_label} Files: {open_id}".strip().rstrip(":")
else:
table_title = f"{provider_label}: {query}".strip().rstrip(":")
preserve_order = provider_lower in {"youtube", "openlibrary", "loc"}
table = ResultTable(table_title).set_preserve_order(preserve_order)
table.set_table(provider_name)
table.set_source_command("search-file", list(args_list))
debug(f"[search-file] Calling {provider_name}.search()")
if provider_lower == "alldebrid":
if open_id is not None:
results = provider.search(query, limit=limit, filters={"view": "files", "magnet_id": open_id})
else:
results = provider.search(query, limit=limit, filters={"view": "folders"})
else:
results = provider.search(query, limit=limit)
debug(f"[search-file] {provider_name} -> {len(results or [])} result(s)")
if not results:
log(f"No results found for query: {query}", file=sys.stderr)
if db is not None:
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, "completed")
return 0
for search_result in results:
item_dict = (
search_result.to_dict()
if hasattr(search_result, "to_dict")
else dict(search_result)
if isinstance(search_result, dict)
else {"title": str(search_result)}
)
if "table" not in item_dict:
item_dict["table"] = provider_name
row_index = len(table.rows)
table.add_result(search_result)
try:
if provider_lower == "alldebrid" and getattr(search_result, "media_kind", "") == "folder":
magnet_id = None
meta = getattr(search_result, "full_metadata", None)
if isinstance(meta, dict):
magnet_id = meta.get("magnet_id")
if magnet_id is not None:
table.set_row_selection_args(row_index, ["-open", str(magnet_id), "-query", "*"])
except Exception:
pass
results_list.append(item_dict)
ctx.emit(item_dict)
if refresh_mode:
ctx.set_last_result_table_preserve_history(table, results_list)
else:
ctx.set_last_result_table(table, results_list)
ctx.set_current_stage_table(table)
if db is not None:
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, "completed")
return 0
except Exception as exc:
log(f"Error searching provider '{provider_name}': {exc}", file=sys.stderr)
import traceback
debug(traceback.format_exc())
if db is not None:
try:
db.update_worker_status(worker_id, "error")
except Exception:
pass
return 1
finally:
if db is not None:
try:
db.__exit__(None, None, None)
except Exception:
pass
# --- Execution ------------------------------------------------------
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Search storage backends for files."""
@@ -164,7 +349,7 @@ class Search_Store(Cmdlet):
raw_title = None
command_title = (str(raw_title).strip() if raw_title else
"") or _format_command_title("search-store",
"") or _format_command_title("search-file",
list(args_list))
# Build dynamic flag variants from cmdlet arg definitions.
@@ -182,11 +367,22 @@ class Search_Store(Cmdlet):
f.lower()
for f in (flag_registry.get("limit") or {"-limit", "--limit"})
}
provider_flags = {
f.lower()
for f in (flag_registry.get("provider") or {"-provider", "--provider"})
}
open_flags = {
f.lower()
for f in (flag_registry.get("open") or {"-open", "--open"})
}
# Parse arguments
query = ""
storage_backend: Optional[str] = None
provider_name: Optional[str] = None
open_id: Optional[int] = None
limit = 100
limit_set = False
searched_backends: List[str] = []
i = 0
@@ -198,10 +394,26 @@ class Search_Store(Cmdlet):
query = f"{query} {chunk}".strip() if query else chunk
i += 2
continue
if low in provider_flags and i + 1 < len(args_list):
provider_name = args_list[i + 1]
i += 2
continue
if low in open_flags and i + 1 < len(args_list):
try:
open_id = int(args_list[i + 1])
except ValueError:
log(
f"Warning: Invalid open value '{args_list[i + 1]}', ignoring",
file=sys.stderr,
)
open_id = None
i += 2
continue
if low in store_flags and i + 1 < len(args_list):
storage_backend = args_list[i + 1]
i += 2
elif low in limit_flags and i + 1 < len(args_list):
limit_set = True
try:
limit = int(args_list[i + 1])
except ValueError:
@@ -213,6 +425,20 @@ class Search_Store(Cmdlet):
else:
i += 1
query = query.strip()
if provider_name:
return self._run_provider_search(
provider_name=provider_name,
query=query,
limit=limit,
limit_set=limit_set,
open_id=open_id,
args_list=args_list,
refresh_mode=refresh_mode,
config=config,
)
store_filter: Optional[str] = None
if query:
match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE)
@@ -232,8 +458,6 @@ class Search_Store(Cmdlet):
return 1
from API.folder import API_folder_store
from SYS.config import get_local_storage_path
import uuid
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(config or {})
@@ -246,7 +470,7 @@ class Search_Store(Cmdlet):
try:
db.insert_worker(
worker_id,
"search-store",
"search-file",
title=f"Search: {query}",
description=f"Query: {query}",
pipe=ctx.get_current_command_text(),
@@ -261,7 +485,7 @@ class Search_Store(Cmdlet):
table = ResultTable(command_title)
try:
table.set_source_command("search-store", list(args_list))
table.set_source_command("search-file", list(args_list))
except Exception:
pass
if hash_query:
@@ -441,10 +665,10 @@ class Search_Store(Cmdlet):
)
db.update_worker_status(worker_id, "error")
return 1
debug(f"[search-store] Searching '{backend_to_search}'")
debug(f"[search-file] Searching '{backend_to_search}'")
results = target_backend.search(query, limit=limit)
debug(
f"[search-store] '{backend_to_search}' -> {len(results or [])} result(s)"
f"[search-file] '{backend_to_search}' -> {len(results or [])} result(s)"
)
else:
all_results = []
@@ -453,13 +677,13 @@ class Search_Store(Cmdlet):
backend = storage[backend_name]
searched_backends.append(backend_name)
debug(f"[search-store] Searching '{backend_name}'")
debug(f"[search-file] Searching '{backend_name}'")
backend_results = backend.search(
query,
limit=limit - len(all_results)
)
debug(
f"[search-store] '{backend_name}' -> {len(backend_results or [])} result(s)"
f"[search-file] '{backend_name}' -> {len(backend_results or [])} result(s)"
)
if backend_results:
all_results.extend(backend_results)
@@ -542,4 +766,4 @@ class Search_Store(Cmdlet):
return 1
CMDLET = Search_Store()
CMDLET = search_file()

View File

@@ -1,357 +0,0 @@
"""search-provider cmdlet: Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid)."""
from __future__ import annotations
from typing import Any, Dict, List, Sequence, Optional
import sys
import json
import uuid
import importlib
from SYS.logger import log, debug
from ProviderCore.registry import get_search_provider, list_search_providers
from . import _shared as sh
Cmdlet, CmdletArg, should_show_help = (
sh.Cmdlet,
sh.CmdletArg,
sh.should_show_help,
)
from SYS import pipeline as ctx
# Optional dependencies
try:
from SYS.config import get_local_storage_path
except Exception: # pragma: no cover
get_local_storage_path = None # type: ignore
class Search_Provider(Cmdlet):
"""Search external content providers."""
def __init__(self):
super().__init__(
name="search-provider",
summary=
"Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive)",
usage="search-provider -provider <provider> <query> [-limit N] [-open ID]",
arg=[
CmdletArg(
"provider",
type="string",
required=True,
description=
"Provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive",
),
CmdletArg(
"query",
type="string",
required=True,
description="Search query (supports provider-specific syntax)",
),
CmdletArg(
"limit",
type="int",
description="Maximum results to return (default: 50)"
),
CmdletArg(
"open",
type="int",
description=
"(alldebrid) Open folder/magnet by ID and list its files",
),
],
detail=[
"Search external content providers:",
"- alldebrid: List your AllDebrid account folders (magnets). Select @N to view files.",
' Example: search-provider -provider alldebrid "*"',
' Example: search-provider -provider alldebrid -open 123 "*"',
"- bandcamp: Search for music albums/tracks",
' Example: search-provider -provider bandcamp "artist:altrusian grace"',
"- libgen: Search Library Genesis for books",
' Example: search-provider -provider libgen "python programming"',
"- loc: Search Library of Congress (Chronicling America)",
' Example: search-provider -provider loc "lincoln"',
"- soulseek: Search P2P network for music",
' Example: search-provider -provider soulseek "pink floyd"',
"- youtube: Search YouTube for videos",
' Example: search-provider -provider youtube "tutorial"',
"- internetarchive: Search archive.org items (advancedsearch syntax)",
' Example: search-provider -provider internetarchive "title:(lincoln) AND mediatype:texts"',
"",
"Query syntax:",
"- bandcamp: Use 'artist:Name' to search by artist",
"- libgen: Supports isbn:, author:, title: prefixes",
"- soulseek: Plain text search",
"- youtube: Plain text search",
"- internetarchive: Archive.org advancedsearch query syntax",
"",
"Results can be piped to other cmdlet:",
' search-provider -provider bandcamp "artist:grace" | @1 | download-file',
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Execute search-provider cmdlet."""
if should_show_help(args):
ctx.emit(self.__dict__)
return 0
args_list = [str(a) for a in (args or [])]
# Dynamic flag variants from cmdlet arg definitions.
flag_registry = self.build_flag_registry()
provider_flags = {
f.lower()
for f in (flag_registry.get("provider") or {"-provider", "--provider"})
}
query_flags = {
f.lower()
for f in (flag_registry.get("query") or {"-query", "--query"})
}
limit_flags = {
f.lower()
for f in (flag_registry.get("limit") or {"-limit", "--limit"})
}
open_flags = {
f.lower()
for f in (flag_registry.get("open") or {"-open", "--open"})
}
provider_name: Optional[str] = None
query: Optional[str] = None
limit = 50
open_id: Optional[int] = None
positionals: List[str] = []
i = 0
while i < len(args_list):
token = args_list[i]
low = token.lower()
if low in provider_flags and i + 1 < len(args_list):
provider_name = args_list[i + 1]
i += 2
elif low in query_flags and i + 1 < len(args_list):
query = args_list[i + 1]
i += 2
elif low in limit_flags and i + 1 < len(args_list):
try:
limit = int(args_list[i + 1])
except ValueError:
log(
f"Warning: Invalid limit value '{args_list[i + 1]}', using default 50",
file=sys.stderr,
)
limit = 50
i += 2
elif low in open_flags and i + 1 < len(args_list):
try:
open_id = int(args_list[i + 1])
except ValueError:
log(
f"Warning: Invalid open value '{args_list[i + 1]}', ignoring",
file=sys.stderr,
)
open_id = None
i += 2
elif not token.startswith("-"):
positionals.append(token)
i += 1
else:
i += 1
# Backwards-compatible positional form: search-provider <provider> <query>
if provider_name is None and positionals:
provider_name = positionals[0]
positionals = positionals[1:]
if query is None and positionals:
query = " ".join(positionals).strip() or None
if not provider_name or not query:
log("Error: search-provider requires a provider and query", file=sys.stderr)
log(f"Usage: {self.usage}", file=sys.stderr)
log("Available providers:", file=sys.stderr)
providers = list_search_providers(config)
for name, available in sorted(providers.items()):
status = "" if available else ""
log(f" {status} {name}", file=sys.stderr)
return 1
debug(
f"[search-provider] provider={provider_name}, query={query}, limit={limit}"
)
# Get provider
provider = get_search_provider(provider_name, config)
if not provider:
log(f"Error: Provider '{provider_name}' is not available", file=sys.stderr)
log("Available providers:", file=sys.stderr)
providers = list_search_providers(config)
for name, available in sorted(providers.items()):
if available:
log(f" - {name}", file=sys.stderr)
return 1
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(
config or {}
) if get_local_storage_path else None
db = None
if library_root:
try:
from API.folder import API_folder_store
db = API_folder_store(library_root)
except Exception:
db = None
try:
# Use the worker DB if available; otherwise, run as a stateless one-off.
if db is not None:
db.__enter__()
db.insert_worker(
worker_id,
"search-provider",
title=f"Search: {query}",
description=f"Provider: {provider_name}, Query: {query}",
pipe=ctx.get_current_command_text(),
)
results_list = []
from SYS import result_table
importlib.reload(result_table)
from SYS.result_table import ResultTable
provider_text = str(provider_name or "").strip()
provider_lower = provider_text.lower()
if provider_lower == "youtube":
provider_label = "Youtube"
elif provider_lower == "openlibrary":
provider_label = "OpenLibrary"
elif provider_lower == "loc":
provider_label = "LoC"
else:
provider_label = (
provider_text[:1].upper() +
provider_text[1:] if provider_text else "Provider"
)
if provider_lower == "alldebrid" and open_id is not None:
table_title = f"{provider_label} Files: {open_id}".strip().rstrip(":")
else:
table_title = f"{provider_label}: {query}".strip().rstrip(":")
preserve_order = provider_name.lower() in ("youtube", "openlibrary", "loc")
table = ResultTable(table_title).set_preserve_order(preserve_order)
table.set_table(provider_name)
table.set_source_command("search-provider", list(args))
debug(f"[search-provider] Calling {provider_name}.search()")
if provider_lower == "alldebrid":
if open_id is not None:
# Second-stage: show files for selected folder/magnet.
results = provider.search(
query,
limit=limit,
filters={
"view": "files",
"magnet_id": open_id
}
)
else:
# Default: show folders (magnets) so user can select @N.
results = provider.search(
query,
limit=limit,
filters={
"view": "folders"
}
)
else:
results = provider.search(query, limit=limit)
debug(f"[search-provider] Got {len(results)} results")
if not results:
log(f"No results found for query: {query}", file=sys.stderr)
if db is not None:
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, "completed")
return 0
# Emit results for pipeline
for search_result in results:
item_dict = (
search_result.to_dict()
if hasattr(search_result,
"to_dict") else dict(search_result)
)
# Ensure table field is set (should be by provider, but just in case)
if "table" not in item_dict:
item_dict["table"] = provider_name
row_index = len(table.rows)
table.add_result(
search_result
) # ResultTable handles SearchResult objects
# For AllDebrid folder rows, allow @N to open and show files.
try:
if (provider_lower == "alldebrid" and getattr(search_result,
"media_kind",
"") == "folder"):
magnet_id = None
meta = getattr(search_result, "full_metadata", None)
if isinstance(meta, dict):
magnet_id = meta.get("magnet_id")
if magnet_id is not None:
table.set_row_selection_args(
row_index,
["-open",
str(magnet_id),
"-query",
"*"]
)
except Exception:
pass
results_list.append(item_dict)
ctx.emit(item_dict)
ctx.set_last_result_table(table, results_list)
# Ensure @N selection expands against this newly displayed table.
ctx.set_current_stage_table(table)
if db is not None:
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, "completed")
return 0
except Exception as e:
log(f"Error searching {provider_name}: {e}", file=sys.stderr)
import traceback
debug(traceback.format_exc())
if db is not None:
try:
db.update_worker_status(worker_id, "error")
except Exception:
pass
return 1
finally:
if db is not None:
try:
db.__exit__(None, None, None)
except Exception:
pass
# Register cmdlet instance (catalog + REPL autocomplete expects module-level CMDLET)
CMDLET = Search_Provider()
# Backwards-compatible alias
Search_Provider_Instance = CMDLET