Files
Medios-Macina/cmdlet/search_provider.py

358 lines
14 KiB
Python
Raw Normal View History

2025-12-16 01:45:01 -08:00
"""search-provider cmdlet: Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid)."""
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
from __future__ import annotations
2025-12-11 19:04:02 -08:00
from typing import Any, Dict, List, Sequence, Optional
2025-12-11 12:47:30 -08:00
import sys
2025-12-11 19:04:02 -08:00
import json
import uuid
import importlib
2025-12-11 12:47:30 -08:00
2025-12-11 19:04:02 -08:00
from SYS.logger import log, debug
2025-12-12 21:55:38 -08:00
from ProviderCore.registry import get_search_provider, list_search_providers
2025-12-11 12:47:30 -08:00
2025-12-16 23:23:43 -08:00
from . import _shared as sh
Cmdlet, CmdletArg, should_show_help = (
sh.Cmdlet,
sh.CmdletArg,
sh.should_show_help,
)
from SYS import pipeline as ctx
2025-12-11 12:47:30 -08:00
2025-12-11 19:04:02 -08:00
# Optional dependencies
try:
from SYS.config import get_local_storage_path
2025-12-11 19:04:02 -08:00
except Exception: # pragma: no cover
get_local_storage_path = None # type: ignore
2025-12-11 12:47:30 -08:00
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
class Search_Provider(Cmdlet):
"""Search external content providers."""
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
def __init__(self):
super().__init__(
name="search-provider",
summary=
"Search external providers (bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive)",
2025-12-16 01:45:01 -08:00
usage="search-provider -provider <provider> <query> [-limit N] [-open ID]",
2025-12-11 12:47:30 -08:00
arg=[
2025-12-29 17:05:03 -08:00
CmdletArg(
"provider",
type="string",
required=True,
description=
"Provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive",
2025-12-29 17:05:03 -08:00
),
CmdletArg(
"query",
type="string",
required=True,
description="Search query (supports provider-specific syntax)",
),
CmdletArg(
"limit",
type="int",
description="Maximum results to return (default: 50)"
2025-12-29 17:05:03 -08:00
),
CmdletArg(
"open",
type="int",
description=
"(alldebrid) Open folder/magnet by ID and list its files",
2025-12-29 17:05:03 -08:00
),
2025-12-11 12:47:30 -08:00
],
detail=[
"Search external content providers:",
2025-12-16 01:45:01 -08:00
"- alldebrid: List your AllDebrid account folders (magnets). Select @N to view files.",
2025-12-29 17:05:03 -08:00
' Example: search-provider -provider alldebrid "*"',
' Example: search-provider -provider alldebrid -open 123 "*"',
2025-12-11 12:47:30 -08:00
"- bandcamp: Search for music albums/tracks",
2025-12-29 17:05:03 -08:00
' Example: search-provider -provider bandcamp "artist:altrusian grace"',
2025-12-11 12:47:30 -08:00
"- libgen: Search Library Genesis for books",
2025-12-29 17:05:03 -08:00
' Example: search-provider -provider libgen "python programming"',
2025-12-25 16:02:46 -08:00
"- loc: Search Library of Congress (Chronicling America)",
2025-12-29 17:05:03 -08:00
' Example: search-provider -provider loc "lincoln"',
2025-12-11 12:47:30 -08:00
"- soulseek: Search P2P network for music",
2025-12-29 17:05:03 -08:00
' Example: search-provider -provider soulseek "pink floyd"',
2025-12-11 12:47:30 -08:00
"- youtube: Search YouTube for videos",
2025-12-29 17:05:03 -08:00
' Example: search-provider -provider youtube "tutorial"',
2025-12-26 21:04:09 -08:00
"- internetarchive: Search archive.org items (advancedsearch syntax)",
2025-12-29 17:05:03 -08:00
' Example: search-provider -provider internetarchive "title:(lincoln) AND mediatype:texts"',
2025-12-11 12:47:30 -08:00
"",
"Query syntax:",
"- bandcamp: Use 'artist:Name' to search by artist",
"- libgen: Supports isbn:, author:, title: prefixes",
"- soulseek: Plain text search",
"- youtube: Plain text search",
2025-12-26 21:04:09 -08:00
"- internetarchive: Archive.org advancedsearch query syntax",
2025-12-11 12:47:30 -08:00
"",
2025-12-12 21:55:38 -08:00
"Results can be piped to other cmdlet:",
2025-12-29 17:05:03 -08:00
' search-provider -provider bandcamp "artist:grace" | @1 | download-file',
2025-12-11 12:47:30 -08:00
],
2025-12-29 17:05:03 -08:00
exec=self.run,
2025-12-11 12:47:30 -08:00
)
self.register()
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Execute search-provider cmdlet."""
if should_show_help(args):
ctx.emit(self.__dict__)
return 0
2025-12-16 01:45:01 -08:00
args_list = [str(a) for a in (args or [])]
# Dynamic flag variants from cmdlet arg definitions.
flag_registry = self.build_flag_registry()
2025-12-29 17:05:03 -08:00
provider_flags = {
f.lower()
for f in (flag_registry.get("provider") or {"-provider", "--provider"})
}
query_flags = {
f.lower()
for f in (flag_registry.get("query") or {"-query", "--query"})
}
limit_flags = {
f.lower()
for f in (flag_registry.get("limit") or {"-limit", "--limit"})
}
open_flags = {
f.lower()
for f in (flag_registry.get("open") or {"-open", "--open"})
2025-12-29 17:05:03 -08:00
}
2025-12-16 01:45:01 -08:00
provider_name: Optional[str] = None
query: Optional[str] = None
limit = 50
open_id: Optional[int] = None
positionals: List[str] = []
i = 0
while i < len(args_list):
token = args_list[i]
low = token.lower()
if low in provider_flags and i + 1 < len(args_list):
provider_name = args_list[i + 1]
i += 2
elif low in query_flags and i + 1 < len(args_list):
query = args_list[i + 1]
i += 2
elif low in limit_flags and i + 1 < len(args_list):
try:
limit = int(args_list[i + 1])
except ValueError:
2025-12-29 17:05:03 -08:00
log(
f"Warning: Invalid limit value '{args_list[i + 1]}', using default 50",
file=sys.stderr,
)
2025-12-16 01:45:01 -08:00
limit = 50
i += 2
elif low in open_flags and i + 1 < len(args_list):
try:
open_id = int(args_list[i + 1])
except ValueError:
2025-12-29 17:05:03 -08:00
log(
f"Warning: Invalid open value '{args_list[i + 1]}', ignoring",
file=sys.stderr,
)
2025-12-16 01:45:01 -08:00
open_id = None
i += 2
elif not token.startswith("-"):
positionals.append(token)
i += 1
else:
i += 1
# Backwards-compatible positional form: search-provider <provider> <query>
if provider_name is None and positionals:
provider_name = positionals[0]
positionals = positionals[1:]
if query is None and positionals:
query = " ".join(positionals).strip() or None
if not provider_name or not query:
log("Error: search-provider requires a provider and query", file=sys.stderr)
2025-12-11 12:47:30 -08:00
log(f"Usage: {self.usage}", file=sys.stderr)
log("Available providers:", file=sys.stderr)
providers = list_search_providers(config)
for name, available in sorted(providers.items()):
status = "" if available else ""
log(f" {status} {name}", file=sys.stderr)
return 1
2025-12-29 17:05:03 -08:00
debug(
f"[search-provider] provider={provider_name}, query={query}, limit={limit}"
)
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Get provider
provider = get_search_provider(provider_name, config)
if not provider:
log(f"Error: Provider '{provider_name}' is not available", file=sys.stderr)
log("Available providers:", file=sys.stderr)
providers = list_search_providers(config)
for name, available in sorted(providers.items()):
if available:
log(f" - {name}", file=sys.stderr)
return 1
2025-12-11 19:04:02 -08:00
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(
config or {}
) if get_local_storage_path else None
2025-12-11 12:47:30 -08:00
2025-12-24 05:34:56 -08:00
db = None
if library_root:
2025-12-11 19:04:02 -08:00
try:
2025-12-24 05:34:56 -08:00
from API.folder import API_folder_store
2025-12-29 17:05:03 -08:00
2025-12-24 05:34:56 -08:00
db = API_folder_store(library_root)
except Exception:
db = None
try:
# Use the worker DB if available; otherwise, run as a stateless one-off.
if db is not None:
db.__enter__()
2025-12-11 19:04:02 -08:00
db.insert_worker(
worker_id,
"search-provider",
title=f"Search: {query}",
description=f"Provider: {provider_name}, Query: {query}",
2025-12-24 05:34:56 -08:00
pipe=ctx.get_current_command_text(),
2025-12-11 19:04:02 -08:00
)
2025-12-24 05:34:56 -08:00
results_list = []
import result_table
2025-12-29 17:05:03 -08:00
2025-12-24 05:34:56 -08:00
importlib.reload(result_table)
from SYS.result_table import ResultTable
2025-12-16 01:45:01 -08:00
2025-12-24 05:34:56 -08:00
provider_text = str(provider_name or "").strip()
provider_lower = provider_text.lower()
if provider_lower == "youtube":
provider_label = "Youtube"
elif provider_lower == "openlibrary":
provider_label = "OpenLibrary"
2025-12-25 16:02:46 -08:00
elif provider_lower == "loc":
provider_label = "LoC"
2025-12-24 05:34:56 -08:00
else:
2025-12-29 17:05:03 -08:00
provider_label = (
provider_text[:1].upper() +
provider_text[1:] if provider_text else "Provider"
2025-12-29 17:05:03 -08:00
)
2025-12-24 05:34:56 -08:00
if provider_lower == "alldebrid" and open_id is not None:
table_title = f"{provider_label} Files: {open_id}".strip().rstrip(":")
else:
table_title = f"{provider_label}: {query}".strip().rstrip(":")
2025-12-25 16:02:46 -08:00
preserve_order = provider_name.lower() in ("youtube", "openlibrary", "loc")
2025-12-24 05:34:56 -08:00
table = ResultTable(table_title).set_preserve_order(preserve_order)
table.set_table(provider_name)
table.set_source_command("search-provider", list(args))
debug(f"[search-provider] Calling {provider_name}.search()")
if provider_lower == "alldebrid":
if open_id is not None:
# Second-stage: show files for selected folder/magnet.
2025-12-29 17:05:03 -08:00
results = provider.search(
query,
limit=limit,
filters={
"view": "files",
"magnet_id": open_id
}
2025-12-29 17:05:03 -08:00
)
2025-12-16 01:45:01 -08:00
else:
2025-12-24 05:34:56 -08:00
# Default: show folders (magnets) so user can select @N.
results = provider.search(
query,
limit=limit,
filters={
"view": "folders"
}
)
2025-12-24 05:34:56 -08:00
else:
results = provider.search(query, limit=limit)
debug(f"[search-provider] Got {len(results)} results")
if not results:
log(f"No results found for query: {query}", file=sys.stderr)
if db is not None:
2025-12-11 19:04:02 -08:00
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
2025-12-24 05:34:56 -08:00
db.update_worker_status(worker_id, "completed")
2025-12-11 19:04:02 -08:00
return 0
2025-12-24 05:34:56 -08:00
# Emit results for pipeline
for search_result in results:
2025-12-29 17:05:03 -08:00
item_dict = (
search_result.to_dict()
if hasattr(search_result,
"to_dict") else dict(search_result)
2025-12-29 17:05:03 -08:00
)
2025-12-24 05:34:56 -08:00
# Ensure table field is set (should be by provider, but just in case)
if "table" not in item_dict:
item_dict["table"] = provider_name
row_index = len(table.rows)
table.add_result(
search_result
) # ResultTable handles SearchResult objects
2025-12-24 05:34:56 -08:00
# For AllDebrid folder rows, allow @N to open and show files.
try:
if (provider_lower == "alldebrid" and getattr(search_result,
"media_kind",
"") == "folder"):
2025-12-24 05:34:56 -08:00
magnet_id = None
meta = getattr(search_result, "full_metadata", None)
if isinstance(meta, dict):
magnet_id = meta.get("magnet_id")
if magnet_id is not None:
2025-12-29 17:05:03 -08:00
table.set_row_selection_args(
row_index,
["-open",
str(magnet_id),
"-query",
"*"]
2025-12-29 17:05:03 -08:00
)
2025-12-24 05:34:56 -08:00
except Exception:
pass
results_list.append(item_dict)
ctx.emit(item_dict)
ctx.set_last_result_table(table, results_list)
# Ensure @N selection expands against this newly displayed table.
ctx.set_current_stage_table(table)
if db is not None:
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, "completed")
return 0
except Exception as e:
log(f"Error searching {provider_name}: {e}", file=sys.stderr)
import traceback
debug(traceback.format_exc())
if db is not None:
try:
db.update_worker_status(worker_id, "error")
except Exception:
pass
return 1
finally:
if db is not None:
2025-12-11 19:04:02 -08:00
try:
2025-12-24 05:34:56 -08:00
db.__exit__(None, None, None)
2025-12-11 19:04:02 -08:00
except Exception:
pass
2025-12-11 12:47:30 -08:00
2025-12-16 01:45:01 -08:00
# Register cmdlet instance (catalog + REPL autocomplete expects module-level CMDLET)
CMDLET = Search_Provider()
# Backwards-compatible alias
Search_Provider_Instance = CMDLET