This commit is contained in:
nose
2025-12-12 21:55:38 -08:00
parent e2ffcab030
commit 85750247cc
78 changed files with 5726 additions and 6239 deletions

169
cmdlet/search_provider.py Normal file
View File

@@ -0,0 +1,169 @@
"""search-provider cmdlet: Search external providers (bandcamp, libgen, soulseek, youtube)."""
from __future__ import annotations
from typing import Any, Dict, List, Sequence, Optional
import sys
import json
import uuid
import importlib
from SYS.logger import log, debug
from ProviderCore.registry import get_search_provider, list_search_providers
from ._shared import Cmdlet, CmdletArg, should_show_help
import pipeline as ctx
# Optional dependencies
try:
from config import get_local_storage_path
except Exception: # pragma: no cover
get_local_storage_path = None # type: ignore
class Search_Provider(Cmdlet):
"""Search external content providers."""
def __init__(self):
super().__init__(
name="search-provider",
summary="Search external providers (bandcamp, libgen, soulseek, youtube)",
usage="search-provider <provider> <query> [-limit N]",
arg=[
CmdletArg("provider", type="string", required=True, description="Provider name: bandcamp, libgen, soulseek, youtube"),
CmdletArg("query", type="string", required=True, description="Search query (supports provider-specific syntax)"),
CmdletArg("limit", type="int", description="Maximum results to return (default: 50)"),
],
detail=[
"Search external content providers:",
"- bandcamp: Search for music albums/tracks",
" Example: search-provider bandcamp \"artist:altrusian grace\"",
"- libgen: Search Library Genesis for books",
" Example: search-provider libgen \"python programming\"",
"- soulseek: Search P2P network for music",
" Example: search-provider soulseek \"pink floyd\"",
"- youtube: Search YouTube for videos",
" Example: search-provider youtube \"tutorial\"",
"",
"Query syntax:",
"- bandcamp: Use 'artist:Name' to search by artist",
"- libgen: Supports isbn:, author:, title: prefixes",
"- soulseek: Plain text search",
"- youtube: Plain text search",
"",
"Results can be piped to other cmdlet:",
" search-provider bandcamp \"artist:grace\" | @1 | download-data",
],
exec=self.run
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Execute search-provider cmdlet."""
if should_show_help(args):
ctx.emit(self.__dict__)
return 0
# Parse arguments
if len(args) < 2:
log("Error: search-provider requires <provider> and <query> arguments", file=sys.stderr)
log(f"Usage: {self.usage}", file=sys.stderr)
log("Available providers:", file=sys.stderr)
providers = list_search_providers(config)
for name, available in sorted(providers.items()):
status = "" if available else ""
log(f" {status} {name}", file=sys.stderr)
return 1
provider_name = args[0]
query = args[1]
# Parse optional limit
limit = 50
if len(args) >= 4 and args[2] in ("-limit", "--limit"):
try:
limit = int(args[3])
except ValueError:
log(f"Warning: Invalid limit value '{args[3]}', using default 50", file=sys.stderr)
debug(f"[search-provider] provider={provider_name}, query={query}, limit={limit}")
# Get provider
provider = get_search_provider(provider_name, config)
if not provider:
log(f"Error: Provider '{provider_name}' is not available", file=sys.stderr)
log("Available providers:", file=sys.stderr)
providers = list_search_providers(config)
for name, available in sorted(providers.items()):
if available:
log(f" - {name}", file=sys.stderr)
return 1
from API.folder import API_folder_store
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(config or {})
if not library_root:
log("No library root configured", file=sys.stderr)
return 1
# Use context manager to ensure database is always closed
with API_folder_store(library_root) as db:
try:
db.insert_worker(
worker_id,
"search-provider",
title=f"Search: {query}",
description=f"Provider: {provider_name}, Query: {query}",
pipe=ctx.get_current_command_text()
)
results_list = []
import result_table
importlib.reload(result_table)
from result_table import ResultTable
table_title = f"Search: {query} [{provider_name}]"
preserve_order = provider_name.lower() in ('youtube', 'openlibrary')
table = ResultTable(table_title).set_preserve_order(preserve_order)
table.set_table(provider_name)
debug(f"[search-provider] Calling {provider_name}.search()")
results = provider.search(query, limit=limit)
debug(f"[search-provider] Got {len(results)} results")
if not results:
log(f"No results found for query: {query}", file=sys.stderr)
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')
return 0
# Emit results for pipeline
for search_result in results:
item_dict = search_result.to_dict() if hasattr(search_result, 'to_dict') else dict(search_result)
# Ensure table field is set (should be by provider, but just in case)
if 'table' not in item_dict:
item_dict['table'] = provider_name
table.add_result(search_result) # ResultTable handles SearchResult objects
results_list.append(item_dict)
ctx.emit(item_dict)
ctx.set_last_result_table(table, results_list)
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, 'completed')
log(f"Found {len(results)} result(s) from {provider_name}", file=sys.stderr)
return 0
except Exception as e:
log(f"Error searching {provider_name}: {e}", file=sys.stderr)
import traceback
debug(traceback.format_exc())
try:
db.update_worker_status(worker_id, 'error')
except Exception:
pass
return 1
# Register cmdlet instance
Search_Provider_Instance = Search_Provider()