diff --git a/API/HTTP.py b/API/HTTP.py index 6c87892..7c7a423 100644 --- a/API/HTTP.py +++ b/API/HTTP.py @@ -34,6 +34,7 @@ logger = logging.getLogger(__name__) from API.ssl_certs import resolve_verify_value as _resolve_verify_value from API.ssl_certs import get_requests_verify_value +from API.httpx_shared import get_shared_httpx_client # Default configuration DEFAULT_TIMEOUT = 30.0 @@ -504,17 +505,18 @@ class HTTPClient: if ("certificate verify failed" in msg or "unable to get local issuer certificate" in msg): logger.info("Certificate verification failed; attempting to retry with a system-aware CA bundle") try: - import httpx as _httpx - # Use the client's precomputed verify argument (set at init) - verify_override = self._httpx_verify - with _httpx.Client(timeout=self.timeout, verify=verify_override, headers=self._get_headers()) as temp_client: - try: - response = temp_client.request(method, url, **kwargs) - if raise_for_status: - response.raise_for_status() - return response - except Exception as e2: - last_exception = e2 + temp_client = get_shared_httpx_client( + timeout=self.timeout, + verify_ssl=self._httpx_verify, + headers=self._get_headers(), + ) + try: + response = temp_client.request(method, url, **kwargs) + if raise_for_status: + response.raise_for_status() + return response + except Exception as e2: + last_exception = e2 except Exception: # certifi/pip-system-certs/httpx not available; fall back to existing retry behavior pass @@ -530,17 +532,18 @@ class HTTPClient: if ("certificate verify failed" in msg or "unable to get local issuer certificate" in msg): logger.info("Certificate verification failed; attempting to retry with a system-aware CA bundle") try: - import httpx as _httpx - # Use the client's precomputed verify argument (set at init) - verify_override = self._httpx_verify - with _httpx.Client(timeout=self.timeout, verify=verify_override, headers=self._get_headers()) as temp_client: - try: - response = temp_client.request(method, url, **kwargs) - if raise_for_status: - response.raise_for_status() - return response - except Exception as e2: - last_exception = e2 + temp_client = get_shared_httpx_client( + timeout=self.timeout, + verify_ssl=self._httpx_verify, + headers=self._get_headers(), + ) + try: + response = temp_client.request(method, url, **kwargs) + if raise_for_status: + response.raise_for_status() + return response + except Exception as e2: + last_exception = e2 except Exception: # certifi/pip-system-certs/httpx not available; fall back to existing retry behavior pass diff --git a/API/httpx_shared.py b/API/httpx_shared.py index aef1bc1..ae93150 100644 --- a/API/httpx_shared.py +++ b/API/httpx_shared.py @@ -1,14 +1,16 @@ """Shared `httpx.Client` helper. Creating short-lived httpx clients disables connection pooling and costs extra CPU. -This module provides a small singleton client for callers that just need basic +This module provides a small keyed client cache for callers that just need basic GETs without the full HTTPClient wrapper. """ from __future__ import annotations +import atexit +from collections import OrderedDict import threading -from typing import Dict, Optional +from typing import Any, Dict, Optional, Tuple import httpx @@ -20,39 +22,85 @@ _DEFAULT_USER_AGENT = ( ) _lock = threading.Lock() -_shared_client: Optional[httpx.Client] = None +_MAX_SHARED_CLIENTS = 8 +_shared_clients: "OrderedDict[Tuple[float, Tuple[str, str], Tuple[Tuple[str, str], ...]], httpx.Client]" = OrderedDict() + + +def _normalize_headers(headers: Optional[Dict[str, str]]) -> Dict[str, str]: + normalized: Dict[str, str] = {"User-Agent": _DEFAULT_USER_AGENT} + if headers: + normalized.update({str(k): str(v) for k, v in headers.items()}) + return normalized + + +def _verify_key(verify_value: Any) -> Tuple[str, str]: + if isinstance(verify_value, bool): + return ("bool", "1" if verify_value else "0") + if isinstance(verify_value, str): + return ("str", verify_value) + return ("obj", str(id(verify_value))) + + +def _client_key( + *, + timeout: float, + verify_value: Any, + merged_headers: Dict[str, str], +) -> Tuple[float, Tuple[str, str], Tuple[Tuple[str, str], ...]]: + header_items = tuple( + sorted((str(k).lower(), str(v)) for k, v in merged_headers.items()) + ) + return (float(timeout), _verify_key(verify_value), header_items) def get_shared_httpx_client( *, timeout: float = 30.0, - verify_ssl: bool = True, + verify_ssl: bool | str = True, headers: Optional[Dict[str, str]] = None, ) -> httpx.Client: - """Return a process-wide shared synchronous httpx.Client.""" + """Return a shared synchronous httpx.Client for a specific config key.""" - global _shared_client - if _shared_client is None: - with _lock: - if _shared_client is None: - base_headers = {"User-Agent": _DEFAULT_USER_AGENT} - if headers: - base_headers.update({str(k): str(v) for k, v in headers.items()}) - _shared_client = httpx.Client( - timeout=timeout, - verify=resolve_verify_value(verify_ssl), - headers=base_headers, - ) + verify_value = resolve_verify_value(verify_ssl) + merged_headers = _normalize_headers(headers) + key = _client_key( + timeout=timeout, + verify_value=verify_value, + merged_headers=merged_headers, + ) - return _shared_client + with _lock: + existing = _shared_clients.get(key) + if existing is not None: + _shared_clients.move_to_end(key) + return existing + + client = httpx.Client( + timeout=timeout, + verify=verify_value, + headers=merged_headers, + ) + _shared_clients[key] = client + + while len(_shared_clients) > _MAX_SHARED_CLIENTS: + _, old_client = _shared_clients.popitem(last=False) + try: + old_client.close() + except Exception: + pass + + return client def close_shared_httpx_client() -> None: - global _shared_client - client = _shared_client - _shared_client = None - if client is not None: + with _lock: + clients = list(_shared_clients.values()) + _shared_clients.clear() + for client in clients: try: client.close() except Exception: pass + + +atexit.register(close_shared_httpx_client) diff --git a/API/requests_client.py b/API/requests_client.py index 8eadcd1..15396d5 100644 --- a/API/requests_client.py +++ b/API/requests_client.py @@ -9,8 +9,11 @@ This module intentionally avoids importing the heavy httpx-based stack. from __future__ import annotations +import atexit +from collections import OrderedDict import threading -from typing import Any, Dict, Optional +from typing import Any, Dict, Optional, Tuple +from weakref import WeakSet import requests from requests.adapters import HTTPAdapter @@ -23,6 +26,40 @@ _DEFAULT_USER_AGENT = ( ) _local = threading.local() +_MAX_SESSIONS_PER_THREAD = 4 +_session_registry_lock = threading.Lock() +_all_sessions: "WeakSet[requests.Session]" = WeakSet() + + +def _session_key( + *, + user_agent: str, + verify_ssl: bool, + pool_connections: int, + pool_maxsize: int, +) -> Tuple[str, Any, int, int]: + return ( + str(user_agent or _DEFAULT_USER_AGENT), + resolve_verify_value(verify_ssl), + int(pool_connections), + int(pool_maxsize), + ) + + +def _get_thread_session_cache() -> "OrderedDict[Tuple[str, Any, int, int], requests.Session]": + cache = getattr(_local, "session_cache", None) + if cache is None: + cache = OrderedDict() + _local.session_cache = cache + return cache + + +def _register_session(session: requests.Session) -> None: + try: + with _session_registry_lock: + _all_sessions.add(session) + except Exception: + pass def get_requests_session( @@ -32,24 +69,44 @@ def get_requests_session( pool_connections: int = 16, pool_maxsize: int = 16, ) -> requests.Session: - """Return a thread-local shared Session configured for pooling.""" + """Return a thread-local pooled Session keyed by config values.""" - session: Optional[requests.Session] = getattr(_local, "session", None) - if session is not None: - return session + key = _session_key( + user_agent=user_agent, + verify_ssl=verify_ssl, + pool_connections=pool_connections, + pool_maxsize=pool_maxsize, + ) + cache = _get_thread_session_cache() + + existing = cache.get(key) + if existing is not None: + cache.move_to_end(key) + return existing session = requests.Session() - session.headers.update({"User-Agent": str(user_agent or _DEFAULT_USER_AGENT)}) + session.headers.update({"User-Agent": key[0]}) # Expand connection pool; keep max_retries=0 to avoid semantic changes. - adapter = HTTPAdapter(pool_connections=pool_connections, pool_maxsize=pool_maxsize, max_retries=0) + adapter = HTTPAdapter( + pool_connections=pool_connections, + pool_maxsize=pool_maxsize, + max_retries=0, + ) session.mount("http://", adapter) session.mount("https://", adapter) # Configure verify once. - session.verify = resolve_verify_value(verify_ssl) + session.verify = key[1] + _register_session(session) - _local.session = session + cache[key] = session + while len(cache) > _MAX_SESSIONS_PER_THREAD: + _, old_session = cache.popitem(last=False) + try: + old_session.close() + except Exception: + pass return session @@ -66,3 +123,36 @@ def request( sess = get_requests_session() return sess.request(method, url, params=params, headers=headers, timeout=timeout, **kwargs) + + +def close_requests_sessions() -> None: + """Close cached requests sessions for the current thread and global registry.""" + + cache = getattr(_local, "session_cache", None) + if cache: + try: + sessions = list(cache.values()) + cache.clear() + except Exception: + sessions = [] + for session in sessions: + try: + session.close() + except Exception: + pass + + try: + with _session_registry_lock: + all_sessions = list(_all_sessions) + _all_sessions.clear() + except Exception: + all_sessions = [] + + for session in all_sessions: + try: + session.close() + except Exception: + pass + + +atexit.register(close_requests_sessions) diff --git a/Provider/HIFI.py b/Provider/HIFI.py index 3d46675..1f075a6 100644 --- a/Provider/HIFI.py +++ b/Provider/HIFI.py @@ -1222,7 +1222,7 @@ class HIFI(Provider): from API.httpx_shared import get_shared_httpx_client timeout_val = float(getattr(self, "api_timeout", 10.0)) - client = get_shared_httpx_client(timeout=timeout_val) + client = get_shared_httpx_client() resp = client.get(resolved_text, timeout=timeout_val) resp.raise_for_status() content = resp.content diff --git a/Provider/Tidal.py b/Provider/Tidal.py index 058c8e5..4aa8d73 100644 --- a/Provider/Tidal.py +++ b/Provider/Tidal.py @@ -1404,7 +1404,7 @@ class Tidal(Provider): from API.httpx_shared import get_shared_httpx_client timeout_val = float(getattr(self, "api_timeout", 10.0)) - client = get_shared_httpx_client(timeout=timeout_val) + client = get_shared_httpx_client() resp = client.get(resolved_text, timeout=timeout_val) resp.raise_for_status() content = resp.content diff --git a/Provider/metadata_provider.py b/Provider/metadata_provider.py index e96a812..8ff46ca 100644 --- a/Provider/metadata_provider.py +++ b/Provider/metadata_provider.py @@ -74,6 +74,54 @@ class MetadataProvider(ABC): tags.append(f"source:{self.name}") return tags + def search_tags(self, query: str, limit: int = 1) -> List[str]: + """Return tags for the best match from `search(query)`. + + Providers can override this when tags should be extracted differently from + the default search->first-item->to_tags flow. + """ + + try: + items = self.search(query, limit=max(1, int(limit))) + except Exception: + return [] + if not items: + return [] + try: + return [str(t) for t in self.to_tags(items[0]) if t is not None] + except Exception: + return [] + + def identifier_query(self, identifiers: Dict[str, Any]) -> Optional[str]: + """Return provider-specific identifier query text from parsed identifiers.""" + + _ = identifiers + return None + + def combined_query( + self, + *, + title_hint: Optional[str], + artist_hint: Optional[str], + ) -> Optional[str]: + """Return provider-specific title+artist query text.""" + + _ = title_hint + _ = artist_hint + return None + + def extract_url_query(self, result: Any, get_field: Any) -> Optional[str]: + """Return provider-specific URL query derived from a piped result.""" + + _ = result + _ = get_field + return None + + def emits_direct_tags(self) -> bool: + """True when provider should skip selection table and emit tags directly.""" + + return False + class ITunesProvider(MetadataProvider): """Metadata provider using the iTunes Search API.""" @@ -112,6 +160,21 @@ class ITunesProvider(MetadataProvider): debug(f"iTunes returned {len(items)} items for '{query}'") return items + def identifier_query(self, identifiers: Dict[str, Any]) -> Optional[str]: + return identifiers.get("musicbrainz") or identifiers.get("musicbrainzalbum") + + def combined_query( + self, + *, + title_hint: Optional[str], + artist_hint: Optional[str], + ) -> Optional[str]: + title_text = str(title_hint or "").strip() + artist_text = str(artist_hint or "").strip() + if not title_text or not artist_text: + return None + return f"{title_text} {artist_text}" + class OpenLibraryMetadataProvider(MetadataProvider): """Metadata provider for OpenLibrary book metadata.""" @@ -220,6 +283,14 @@ class OpenLibraryMetadataProvider(MetadataProvider): tags.append(f"source:{self.name}") return tags + def identifier_query(self, identifiers: Dict[str, Any]) -> Optional[str]: + return ( + identifiers.get("isbn_13") + or identifiers.get("isbn_10") + or identifiers.get("isbn") + or identifiers.get("openlibrary") + ) + class GoogleBooksMetadataProvider(MetadataProvider): """Metadata provider for Google Books volumes API.""" @@ -329,6 +400,14 @@ class GoogleBooksMetadataProvider(MetadataProvider): tags.append(f"source:{self.name}") return tags + def identifier_query(self, identifiers: Dict[str, Any]) -> Optional[str]: + return ( + identifiers.get("isbn_13") + or identifiers.get("isbn_10") + or identifiers.get("isbn") + or identifiers.get("openlibrary") + ) + class ISBNsearchMetadataProvider(MetadataProvider): """Metadata provider that scrapes isbnsearch.org by ISBN. @@ -624,6 +703,18 @@ class MusicBrainzMetadataProvider(MetadataProvider): tags.append(f"musicbrainz:{mbid}") return tags + def combined_query( + self, + *, + title_hint: Optional[str], + artist_hint: Optional[str], + ) -> Optional[str]: + title_text = str(title_hint or "").strip() + artist_text = str(artist_hint or "").strip() + if not title_text or not artist_text: + return None + return f'recording:"{title_text}" AND artist:"{artist_text}"' + class ImdbMetadataProvider(MetadataProvider): """Metadata provider for IMDb titles (movies/series/episodes).""" @@ -757,6 +848,9 @@ class ImdbMetadataProvider(MetadataProvider): deduped.append(s) return deduped + def identifier_query(self, identifiers: Dict[str, Any]) -> Optional[str]: + return identifiers.get("imdb") + class YtdlpMetadataProvider(MetadataProvider): """Metadata provider that extracts tags from a supported URL using yt-dlp. @@ -904,6 +998,23 @@ class YtdlpMetadataProvider(MetadataProvider): out.append(s) return out + def extract_url_query(self, result: Any, get_field: Any) -> Optional[str]: + raw_url = ( + get_field(result, "url", None) + or get_field(result, "source_url", None) + or get_field(result, "target", None) + ) + if isinstance(raw_url, list) and raw_url: + raw_url = raw_url[0] + if isinstance(raw_url, str): + text = raw_url.strip() + if text.startswith(("http://", "https://")): + return text + return None + + def emits_direct_tags(self) -> bool: + return True + def _coerce_archive_field_list(value: Any) -> List[str]: """Coerce an Archive.org metadata field to a list of strings.""" diff --git a/SYS/cmdlet_spec.py b/SYS/cmdlet_spec.py new file mode 100644 index 0000000..2eabef5 --- /dev/null +++ b/SYS/cmdlet_spec.py @@ -0,0 +1,449 @@ +from __future__ import annotations + +import sys +import tempfile +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Sequence, Set + +from SYS.logger import log + + +@dataclass +class CmdletArg: + """Represents a single cmdlet argument with optional enum choices.""" + + name: str + type: str = "string" + required: bool = False + description: str = "" + choices: List[str] = field(default_factory=list) + alias: str = "" + handler: Optional[Any] = None + variadic: bool = False + usage: str = "" + requires_db: bool = False + query_key: Optional[str] = None + query_aliases: List[str] = field(default_factory=list) + query_only: bool = False + + def resolve(self, value: Any) -> Any: + if self.handler is not None and callable(self.handler): + return self.handler(value) + return value + + def to_flags(self) -> tuple[str, ...]: + flags = [f"--{self.name}", f"-{self.name}"] + if self.alias: + flags.append(f"-{self.alias}") + + if self.type == "flag": + flags.append(f"--no-{self.name}") + flags.append(f"-no{self.name}") + if self.alias: + flags.append(f"-n{self.alias}") + + return tuple(flags) + + +def QueryArg( + name: str, + *, + key: Optional[str] = None, + aliases: Optional[Sequence[str]] = None, + type: str = "string", + required: bool = False, + description: str = "", + choices: Optional[Sequence[str]] = None, + handler: Optional[Any] = None, + query_only: bool = True, +) -> CmdletArg: + """Create an argument that can be populated from `-query` fields.""" + + return CmdletArg( + name=str(name), + type=str(type or "string"), + required=bool(required), + description=str(description or ""), + choices=list(choices or []), + handler=handler, + query_key=str(key or name).strip().lower() if str(key or name).strip() else None, + query_aliases=[str(a).strip().lower() for a in (aliases or []) if str(a).strip()], + query_only=bool(query_only), + ) + + +class SharedArgs: + """Registry of shared CmdletArg definitions used across multiple cmdlet.""" + + STORE = CmdletArg( + name="store", + type="enum", + choices=[], + description="Selects store", + query_key="store", + ) + + URL = CmdletArg( + name="url", + type="string", + description="http parser", + ) + + PROVIDER = CmdletArg( + name="provider", + type="string", + description="selects provider", + ) + + @staticmethod + def get_store_choices(config: Optional[Dict[str, Any]] = None, force: bool = False) -> List[str]: + if not force and hasattr(SharedArgs, "_cached_available_stores"): + return SharedArgs._cached_available_stores or [] + + if not force: + SharedArgs._refresh_store_choices_cache(config, skip_instantiation=True) + else: + SharedArgs._refresh_store_choices_cache(config, skip_instantiation=False) + return SharedArgs._cached_available_stores or [] + + @staticmethod + def _refresh_store_choices_cache(config: Optional[Dict[str, Any]] = None, skip_instantiation: bool = False) -> None: + try: + if config is None: + try: + from SYS.config import load_config + + config = load_config() + except Exception: + SharedArgs._cached_available_stores = [] + return + + try: + from Store.registry import list_configured_backend_names + + SharedArgs._cached_available_stores = list_configured_backend_names(config) or [] + except Exception: + SharedArgs._cached_available_stores = [] + + if skip_instantiation: + return + + try: + from Store.registry import Store as StoreRegistry + + registry = StoreRegistry(config=config, suppress_debug=True) + available = registry.list_backends() + if available: + SharedArgs._cached_available_stores = available + except Exception: + pass + except Exception: + SharedArgs._cached_available_stores = [] + + LOCATION = CmdletArg( + "location", + type="enum", + choices=["hydrus", "0x0"], + required=True, + description="Destination location", + ) + + DELETE = CmdletArg( + "delete", + type="flag", + description="Delete the file after successful operation.", + ) + + ARTIST = CmdletArg( + "artist", + type="string", + description="Filter by artist name (case-insensitive, partial match).", + ) + + ALBUM = CmdletArg( + "album", + type="string", + description="Filter by album name (case-insensitive, partial match).", + ) + + TRACK = CmdletArg( + "track", + type="string", + description="Filter by track title (case-insensitive, partial match).", + ) + + LIBRARY = CmdletArg( + "library", + type="string", + choices=["hydrus", "local", "soulseek", "libgen", "ftp"], + description="Search library or source location.", + ) + + TIMEOUT = CmdletArg( + "timeout", + type="integer", + description="Search or operation timeout in seconds.", + ) + + LIMIT = CmdletArg( + "limit", + type="integer", + description="Maximum number of results to return.", + ) + + PATH = CmdletArg("path", type="string", description="File or directory path.") + + QUERY = CmdletArg( + "query", + type="string", + description="Unified query string (e.g., hash:, hash:{

,

}).", + ) + + REASON = CmdletArg( + "reason", + type="string", + description="Reason or explanation for the operation.", + ) + + ARCHIVE = CmdletArg( + "archive", + type="flag", + description="Archive the URL to Wayback Machine, Archive.today, and Archive.ph (requires URL argument in cmdlet).", + alias="arch", + ) + + @staticmethod + def resolve_storage( + storage_value: Optional[str], + default: Optional[Path] = None, + ) -> Path: + _ = storage_value + if default is not None: + return default + return Path(tempfile.gettempdir()) + + @classmethod + def get(cls, name: str) -> Optional[CmdletArg]: + try: + return getattr(cls, name.upper()) + except AttributeError: + return None + + +@dataclass +class Cmdlet: + """Represents a cmdlet with metadata and arguments.""" + + name: str + summary: str + usage: str + alias: List[str] = field(default_factory=list) + arg: List[CmdletArg] = field(default_factory=list) + detail: List[str] = field(default_factory=list) + examples: List[str] = field(default_factory=list) + exec: Optional[Callable[[Any, Sequence[str], Dict[str, Any]], int]] = field(default=None) + + def _collect_names(self) -> List[str]: + names: List[str] = [] + if self.name: + names.append(self.name) + for alias in self.alias or []: + if alias: + names.append(alias) + for alias in getattr(self, "aliases", None) or []: + if alias: + names.append(alias) + + seen: Set[str] = set() + deduped: List[str] = [] + for name in names: + key = name.replace("_", "-").lower() + if key in seen: + continue + seen.add(key) + deduped.append(name) + return deduped + + def register(self) -> "Cmdlet": + if not callable(self.exec): + return self + try: + from cmdlet import register_callable as _register_callable + except Exception: + return self + + names = self._collect_names() + if not names: + return self + + _register_callable(names, self.exec) + return self + + def get_flags(self, arg_name: str) -> set[str]: + return {f"-{arg_name}", f"--{arg_name}"} + + def build_flag_registry(self) -> Dict[str, set[str]]: + return {arg.name: self.get_flags(arg.name) for arg in self.arg} + + +def parse_cmdlet_args( + args: Sequence[str], + cmdlet_spec: Dict[str, Any] | Cmdlet, +) -> Dict[str, Any]: + """Parse command-line arguments based on cmdlet specification.""" + + result: Dict[str, Any] = {} + + arg_specs_raw = getattr(cmdlet_spec, "arg", None) + if arg_specs_raw is None or not isinstance(arg_specs_raw, (list, tuple)): + raise TypeError( + f"Expected cmdlet-like object with an 'arg' list, got {type(cmdlet_spec).__name__}" + ) + + arg_specs: List[Any] = list(arg_specs_raw) + positional_args: List[CmdletArg] = [] + query_mapped_args: List[CmdletArg] = [] + + arg_spec_map: Dict[str, str] = {} + arg_spec_by_canonical: Dict[str, Any] = {} + + for spec in arg_specs: + name = getattr(spec, "name", None) + if not name: + continue + + try: + if getattr(spec, "query_key", None): + query_mapped_args.append(spec) + except Exception: + pass + + name_str = str(name) + canonical_name = name_str.lstrip("-") + canonical_key = canonical_name.lower() + + try: + if bool(getattr(spec, "query_only", False)): + continue + except Exception: + pass + + arg_spec_by_canonical[canonical_key] = spec + + if "-" not in name_str: + positional_args.append(spec) + + arg_spec_map[canonical_key] = canonical_name + arg_spec_map[f"-{canonical_name}".lower()] = canonical_name + arg_spec_map[f"--{canonical_name}".lower()] = canonical_name + + i = 0 + positional_index = 0 + + while i < len(args): + token = str(args[i]) + token_lower = token.lower() + + if token_lower in {"-hash", "--hash"} and token_lower not in arg_spec_map: + try: + log( + 'Legacy flag -hash is no longer supported. Use: -query "hash:"', + file=sys.stderr, + ) + except Exception: + pass + i += 1 + continue + + if token_lower in arg_spec_map: + canonical_name = arg_spec_map[token_lower] + spec = arg_spec_by_canonical.get(canonical_name.lower()) + + is_flag = bool(spec and str(getattr(spec, "type", "")).lower() == "flag") + + if is_flag: + result[canonical_name] = True + i += 1 + else: + if i + 1 < len(args) and not str(args[i + 1]).startswith("-"): + value = args[i + 1] + + is_variadic = bool(spec and getattr(spec, "variadic", False)) + if is_variadic: + if canonical_name not in result: + result[canonical_name] = [] + elif not isinstance(result[canonical_name], list): + result[canonical_name] = [result[canonical_name]] + result[canonical_name].append(value) + else: + result[canonical_name] = value + i += 2 + else: + i += 1 + elif positional_index < len(positional_args): + positional_spec = positional_args[positional_index] + canonical_name = str(getattr(positional_spec, "name", "")).lstrip("-") + is_variadic = bool(getattr(positional_spec, "variadic", False)) + + if is_variadic: + if canonical_name not in result: + result[canonical_name] = [] + elif not isinstance(result[canonical_name], list): + result[canonical_name] = [result[canonical_name]] + + result[canonical_name].append(token) + i += 1 + else: + result[canonical_name] = token + positional_index += 1 + i += 1 + else: + i += 1 + + try: + raw_query = result.get("query") + except Exception: + raw_query = None + + if query_mapped_args and raw_query is not None: + try: + from SYS.cli_syntax import parse_query as _parse_query + + parsed_query = _parse_query(str(raw_query)) + fields = parsed_query.get("fields", {}) if isinstance(parsed_query, dict) else {} + norm_fields = ( + {str(k).strip().lower(): v for k, v in fields.items()} + if isinstance(fields, dict) + else {} + ) + except Exception: + norm_fields = {} + + for spec in query_mapped_args: + canonical_name = str(getattr(spec, "name", "") or "").lstrip("-") + if not canonical_name: + continue + if canonical_name in result and result.get(canonical_name) not in (None, ""): + continue + try: + key = str(getattr(spec, "query_key", "") or "").strip().lower() + aliases = getattr(spec, "query_aliases", None) + alias_list = [str(a).strip().lower() for a in (aliases or []) if str(a).strip()] + except Exception: + key = "" + alias_list = [] + candidates = [k for k in [key, canonical_name] + alias_list if k] + val = None + for k in candidates: + if k in norm_fields: + val = norm_fields.get(k) + break + if val is None: + continue + try: + result[canonical_name] = spec.resolve(val) + except Exception: + result[canonical_name] = val + + return result diff --git a/Store/HydrusNetwork.py b/Store/HydrusNetwork.py index eab8757..7876342 100644 --- a/Store/HydrusNetwork.py +++ b/Store/HydrusNetwork.py @@ -10,6 +10,7 @@ from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple from urllib.parse import quote import httpx +from API.httpx_shared import get_shared_httpx_client from SYS.logger import debug, log from SYS.utils_constant import mime_maps @@ -198,29 +199,28 @@ class HydrusNetwork(Store): api_version_url = f"{self.URL}/api_version" verify_key_url = f"{self.URL}/verify_access_key" try: - with httpx.Client(timeout=5.0, - verify=False, - follow_redirects=True) as client: - version_resp = client.get(api_version_url) - version_resp.raise_for_status() - version_payload = version_resp.json() - if not isinstance(version_payload, dict): - raise RuntimeError( - "Hydrus /api_version returned an unexpected response" - ) - - verify_resp = client.get( - verify_key_url, - headers={ - "Hydrus-Client-API-Access-Key": self.API - }, + client = get_shared_httpx_client(timeout=5.0, verify_ssl=False) + version_resp = client.get(api_version_url, follow_redirects=True) + version_resp.raise_for_status() + version_payload = version_resp.json() + if not isinstance(version_payload, dict): + raise RuntimeError( + "Hydrus /api_version returned an unexpected response" + ) + + verify_resp = client.get( + verify_key_url, + headers={ + "Hydrus-Client-API-Access-Key": self.API + }, + follow_redirects=True, + ) + verify_resp.raise_for_status() + verify_payload = verify_resp.json() + if not isinstance(verify_payload, dict): + raise RuntimeError( + "Hydrus /verify_access_key returned an unexpected response" ) - verify_resp.raise_for_status() - verify_payload = verify_resp.json() - if not isinstance(verify_payload, dict): - raise RuntimeError( - "Hydrus /verify_access_key returned an unexpected response" - ) _HYDRUS_INIT_CHECK_CACHE[cache_key] = (True, None) except Exception as exc: @@ -294,12 +294,10 @@ class HydrusNetwork(Store): "Hydrus-Client-API-Access-Key": self.API, "Accept": "application/json", } - with httpx.Client(timeout=5.0, - verify=False, - follow_redirects=True) as client: - resp = client.get(url, params=params, headers=headers) - resp.raise_for_status() - payload = resp.json() + client = get_shared_httpx_client(timeout=5.0, verify_ssl=False) + resp = client.get(url, params=params, headers=headers, follow_redirects=True) + resp.raise_for_status() + payload = resp.json() count_val = None if isinstance(payload, dict): @@ -1587,13 +1585,13 @@ class HydrusNetwork(Store): file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={quote(h)}" dest_path = base_tmp / fname - with httpx.stream( + stream_client = get_shared_httpx_client(timeout=60.0, verify_ssl=False) + with stream_client.stream( "GET", file_url, headers={"Hydrus-Client-API-Access-Key": self.API}, follow_redirects=True, timeout=60.0, - verify=False, ) as resp: resp.raise_for_status() with dest_path.open("wb") as fh: diff --git a/cmdlet/_shared.py b/cmdlet/_shared.py index 18f0060..f7645b4 100644 --- a/cmdlet/_shared.py +++ b/cmdlet/_shared.py @@ -585,6 +585,15 @@ def parse_cmdlet_args(args: Sequence[str], result = parse_cmdlet_args(["value1", "-count", "5"], cmdlet) # result = {"path": "value1", "count": "5"} """ + try: + from SYS.cmdlet_spec import parse_cmdlet_args as _parse_cmdlet_args_fast + + return _parse_cmdlet_args_fast(args, cmdlet_spec) + except Exception: + # Fall back to local implementation below to preserve behavior if the + # lightweight parser is unavailable. + pass + result: Dict[str, Any] = {} diff --git a/cmdlet/get_tag.py b/cmdlet/get_tag.py index ef2b24d..fbe1f84 100644 --- a/cmdlet/get_tag.py +++ b/cmdlet/get_tag.py @@ -27,6 +27,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple from SYS import pipeline as ctx from SYS.pipeline_progress import PipelineProgress from . import _shared as sh +from SYS.field_access import get_field normalize_hash = sh.normalize_hash looks_like_hash = sh.looks_like_hash @@ -34,7 +35,6 @@ Cmdlet = sh.Cmdlet CmdletArg = sh.CmdletArg SharedArgs = sh.SharedArgs parse_cmdlet_args = sh.parse_cmdlet_args -get_field = sh.get_field try: from SYS.metadata import extract_title @@ -60,84 +60,6 @@ def _dedup_tags_preserve_order(tags: List[str]) -> List[str]: return out -def _extract_subtitle_tags(info: Dict[str, Any]) -> List[str]: - """Extract subtitle availability tags from a yt-dlp info dict. - - Produces multi-valued tags so languages can coexist: - - subs: - - subs_auto: - """ - - def _langs(value: Any) -> List[str]: - if not isinstance(value, dict): - return [] - langs: List[str] = [] - for k in value.keys(): - if not isinstance(k, str): - continue - lang = k.strip().lower() - if lang: - langs.append(lang) - return sorted(set(langs)) - - out: List[str] = [] - for lang in _langs(info.get("subtitles")): - out.append(f"subs:{lang}") - for lang in _langs(info.get("automatic_captions")): - out.append(f"subs_auto:{lang}") - return out - - -def _scrape_ytdlp_info(url: str) -> Optional[Dict[str, Any]]: - """Fetch a yt-dlp info dict without downloading media.""" - if not isinstance(url, str) or not url.strip(): - return None - url = url.strip() - - # Prefer the Python module when available (faster, avoids shell quoting issues). - try: - import yt_dlp # type: ignore - - opts: Any = { - "quiet": True, - "no_warnings": True, - "skip_download": True, - "noprogress": True, - "socket_timeout": 15, - "retries": 1, - "playlist_items": "1-10", - } - with yt_dlp.YoutubeDL(opts) as ydl: - info = ydl.extract_info(url, download=False) - return info if isinstance(info, dict) else None - except Exception: - pass - - # Fallback to yt-dlp CLI if the module isn't available. - try: - import json as json_module - - cmd = [ - "yt-dlp", - "-J", - "--no-warnings", - "--skip-download", - "--playlist-items", - "1-10", - url, - ] - result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) - if result.returncode != 0: - return None - payload = (result.stdout or "").strip() - if not payload: - return None - data = json_module.loads(payload) - return data if isinstance(data, dict) else None - except Exception: - return None - - def _resolve_candidate_urls_for_item( result: Any, backend: Any, @@ -1224,45 +1146,19 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: ) return 1 - info = _scrape_ytdlp_info(scrape_target) - if not info: - log( - "yt-dlp could not extract metadata for this URL (unsupported or failed)", - file=sys.stderr, - ) + ytdlp_provider = get_metadata_provider("ytdlp", config) + if ytdlp_provider is None: + log("yt-dlp metadata provider is unavailable", file=sys.stderr) return 1 try: - from SYS.metadata import extract_ytdlp_tags + tags = [ + str(t) + for t in ytdlp_provider.search_tags(scrape_target, limit=1) + if t is not None + ] except Exception: - extract_ytdlp_tags = None # type: ignore[assignment] - - # Prefer the top-level metadata, but if this is a playlist container, use - # the first entry for per-item fields like subtitles. - info_for_subs = info - entries = info.get("entries") if isinstance(info, dict) else None - if isinstance(entries, list) and entries: - first = entries[0] - if isinstance(first, dict): - info_for_subs = first - - tags: List[str] = [] - if extract_ytdlp_tags: - try: - tags.extend(extract_ytdlp_tags(info)) - except Exception: - pass - - # Subtitle availability tags - try: - tags.extend( - _extract_subtitle_tags( - info_for_subs if isinstance(info_for_subs, - dict) else {} - ) - ) - except Exception: - pass + tags = [] # Ensure we actually have something to apply. tags = _dedup_tags_preserve_order(tags) @@ -1399,19 +1295,10 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: identifiers = _extract_scrapable_identifiers(identifier_tags) identifier_query: Optional[str] = None if identifiers: - if provider.name in {"openlibrary", - "googlebooks", - "google"}: - identifier_query = ( - identifiers.get("isbn_13") or identifiers.get("isbn_10") - or identifiers.get("isbn") or identifiers.get("openlibrary") - ) - elif provider.name == "imdb": - identifier_query = identifiers.get("imdb") - elif provider.name == "itunes": - identifier_query = identifiers.get("musicbrainz") or identifiers.get( - "musicbrainzalbum" - ) + try: + identifier_query = provider.identifier_query(identifiers) + except Exception: + identifier_query = None # Determine query from identifier first, else title on the result or filename title_hint = ( @@ -1444,32 +1331,21 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: artist_hint = str(meta_artist) combined_query: Optional[str] = None - if (not identifier_query and title_hint and artist_hint - and provider.name in {"itunes", - "musicbrainz"}): - if provider.name == "musicbrainz": - combined_query = f'recording:"{title_hint}" AND artist:"{artist_hint}"' - else: - combined_query = f"{title_hint} {artist_hint}" + if not identifier_query and title_hint and artist_hint: + try: + combined_query = provider.combined_query( + title_hint=str(title_hint), + artist_hint=str(artist_hint), + ) + except Exception: + combined_query = None # yt-dlp isn't a search provider; it requires a URL. url_hint: Optional[str] = None - if provider.name == "ytdlp": - raw_url = ( - get_field(result, - "url", - None) or get_field(result, - "source_url", - None) or get_field(result, - "target", - None) - ) - if isinstance(raw_url, list) and raw_url: - raw_url = raw_url[0] - if isinstance(raw_url, - str) and raw_url.strip().startswith(("http://", - "https://")): - url_hint = raw_url.strip() + try: + url_hint = provider.extract_url_query(result, get_field) + except Exception: + url_hint = None query_hint = url_hint or identifier_query or combined_query or title_hint if not query_hint: @@ -1492,7 +1368,12 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: return 1 # For yt-dlp, emit tags directly (there is no meaningful multi-result selection step). - if provider.name == "ytdlp": + emit_direct = False + try: + emit_direct = bool(provider.emits_direct_tags()) + except Exception: + emit_direct = False + if emit_direct: try: tags = [str(t) for t in provider.to_tags(items[0]) if t is not None] except Exception: diff --git a/cmdnat/adjective.py b/cmdnat/adjective.py index c9d5b49..fdb64f8 100644 --- a/cmdnat/adjective.py +++ b/cmdnat/adjective.py @@ -2,7 +2,7 @@ import json import os import sys from typing import List, Dict, Any, Sequence -from cmdlet._shared import Cmdlet, CmdletArg +from SYS.cmdlet_spec import Cmdlet, CmdletArg from SYS.logger import log from SYS.result_table import Table from SYS import pipeline as ctx diff --git a/cmdnat/config.py b/cmdnat/config.py index f0e8f35..5f18ccb 100644 --- a/cmdnat/config.py +++ b/cmdnat/config.py @@ -1,6 +1,6 @@ from typing import List, Dict, Any, Optional, Sequence -from cmdlet._shared import Cmdlet, CmdletArg +from SYS.cmdlet_spec import Cmdlet, CmdletArg from SYS.config import load_config, save_config, save_config_and_verify from SYS import pipeline as ctx from SYS.result_table import Table diff --git a/cmdnat/help.py b/cmdnat/help.py index 6d1be45..7734f76 100644 --- a/cmdnat/help.py +++ b/cmdnat/help.py @@ -4,7 +4,7 @@ from typing import Any, Dict, Sequence, List, Optional, Tuple import shlex import sys -from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args +from SYS.cmdlet_spec import Cmdlet, CmdletArg, parse_cmdlet_args from cmdlet import REGISTRY as CMDLET_REGISTRY, ensure_cmdlet_modules_loaded from SYS.logger import log from SYS.result_table import Table @@ -16,6 +16,8 @@ def _normalize_choice_list(arg_names: Optional[List[str]]) -> List[str]: _HELP_EXAMPLE_SOURCE_COMMAND = ".help-example" +_METADATA_CACHE_KEY: Optional[Tuple[int, int]] = None +_METADATA_CACHE_VALUE: Optional[Tuple[Dict[str, Dict[str, Any]], Dict[str, str]]] = None def _example_for_cmd(name: str) -> List[str]: @@ -104,6 +106,13 @@ def _build_alias_map_from_metadata(metadata: Dict[str, Dict[str, Any]]) -> Dict[ def _gather_metadata_from_cmdlet_classes() -> Tuple[Dict[str, Dict[str, Any]], Dict[str, str]]: + global _METADATA_CACHE_KEY, _METADATA_CACHE_VALUE + + cache_key = (len(sys.modules), len(CMDLET_REGISTRY)) + if _METADATA_CACHE_KEY == cache_key and _METADATA_CACHE_VALUE is not None: + cached_metadata, cached_alias = _METADATA_CACHE_VALUE + return dict(cached_metadata), dict(cached_alias) + metadata: Dict[str, Dict[str, Any]] = {} alias_map: Dict[str, str] = {} try: @@ -116,7 +125,7 @@ def _gather_metadata_from_cmdlet_classes() -> Tuple[Dict[str, Dict[str, Any]], D if not (mod_name.startswith("cmdlet.") or mod_name == "cmdlet" or mod_name.startswith("cmdnat.")): continue cmdlet_obj = getattr(module, "CMDLET", None) - if not isinstance(cmdlet_obj, Cmdlet): + if cmdlet_obj is None or not hasattr(cmdlet_obj, "name") or not hasattr(cmdlet_obj, "arg"): continue canonical_key = _normalize_cmdlet_key(getattr(cmdlet_obj, "name", None) or "") if not canonical_key: @@ -166,6 +175,9 @@ def _gather_metadata_from_cmdlet_classes() -> Tuple[Dict[str, Dict[str, Any]], D }, ) + _METADATA_CACHE_KEY = cache_key + _METADATA_CACHE_VALUE = (dict(metadata), dict(alias_map)) + return metadata, alias_map diff --git a/cmdnat/matrix.py b/cmdnat/matrix.py index c72a2e9..d1fdd10 100644 --- a/cmdnat/matrix.py +++ b/cmdnat/matrix.py @@ -8,7 +8,7 @@ import re import uuid from urllib.parse import parse_qs, urlparse -from cmdlet._shared import Cmdlet, CmdletArg +from SYS.cmdlet_spec import Cmdlet, CmdletArg from SYS.config import load_config, save_config from SYS.logger import log, debug from SYS.result_table import Table diff --git a/cmdnat/out_table.py b/cmdnat/out_table.py index dd79973..58e2bf8 100644 --- a/cmdnat/out_table.py +++ b/cmdnat/out_table.py @@ -6,7 +6,7 @@ import sys from pathlib import Path from typing import Any, Dict, Sequence, Optional -from cmdlet._shared import Cmdlet, CmdletArg +from SYS.cmdlet_spec import Cmdlet, CmdletArg from SYS.logger import log from SYS import pipeline as ctx diff --git a/cmdnat/pipe.py b/cmdnat/pipe.py index 7a3a9f2..f5182d5 100644 --- a/cmdnat/pipe.py +++ b/cmdnat/pipe.py @@ -8,7 +8,7 @@ import re from datetime import datetime from urllib.parse import urlparse, parse_qs from pathlib import Path -from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args +from SYS.cmdlet_spec import Cmdlet, CmdletArg, parse_cmdlet_args from Provider.tidal_manifest import resolve_tidal_manifest_path from SYS.logger import debug, get_thread_stream, is_debug_enabled, set_debug, set_thread_stream from SYS.result_table import Table diff --git a/cmdnat/status.py b/cmdnat/status.py index ec3b7aa..ed8e911 100644 --- a/cmdnat/status.py +++ b/cmdnat/status.py @@ -3,7 +3,7 @@ from __future__ import annotations import shutil from typing import Any, Dict, List -from cmdlet._shared import Cmdlet +from SYS.cmdlet_spec import Cmdlet from SYS import pipeline as ctx from SYS.result_table import Table from SYS.logger import set_debug, debug diff --git a/cmdnat/table.py b/cmdnat/table.py index 53720b3..f220ebb 100644 --- a/cmdnat/table.py +++ b/cmdnat/table.py @@ -1,6 +1,6 @@ from typing import Any, Dict, Sequence -from cmdlet._shared import Cmdlet, CmdletArg +from SYS.cmdlet_spec import Cmdlet, CmdletArg from SYS.logger import log diff --git a/cmdnat/telegram.py b/cmdnat/telegram.py index 063c130..9bd615e 100644 --- a/cmdnat/telegram.py +++ b/cmdnat/telegram.py @@ -4,7 +4,7 @@ import sys from pathlib import Path from typing import Any, Dict, List, Optional, Sequence -from cmdlet._shared import Cmdlet, CmdletArg +from SYS.cmdlet_spec import Cmdlet, CmdletArg from SYS.logger import log from SYS.result_table import Table from SYS import pipeline as ctx diff --git a/cmdnat/worker.py b/cmdnat/worker.py index 3e88645..d480977 100644 --- a/cmdnat/worker.py +++ b/cmdnat/worker.py @@ -9,7 +9,7 @@ from datetime import datetime, timezone from typing import Any, Dict, Sequence, List from cmdlet import register -from cmdlet._shared import Cmdlet, CmdletArg +from SYS.cmdlet_spec import Cmdlet, CmdletArg from SYS import pipeline as ctx from SYS.logger import log from SYS.database import db as _db, get_worker_stdout