This commit is contained in:
2026-02-11 19:06:38 -08:00
parent 1d0de1118b
commit ba623cb992
20 changed files with 848 additions and 247 deletions

View File

@@ -34,6 +34,7 @@ logger = logging.getLogger(__name__)
from API.ssl_certs import resolve_verify_value as _resolve_verify_value from API.ssl_certs import resolve_verify_value as _resolve_verify_value
from API.ssl_certs import get_requests_verify_value from API.ssl_certs import get_requests_verify_value
from API.httpx_shared import get_shared_httpx_client
# Default configuration # Default configuration
DEFAULT_TIMEOUT = 30.0 DEFAULT_TIMEOUT = 30.0
@@ -504,17 +505,18 @@ class HTTPClient:
if ("certificate verify failed" in msg or "unable to get local issuer certificate" in msg): if ("certificate verify failed" in msg or "unable to get local issuer certificate" in msg):
logger.info("Certificate verification failed; attempting to retry with a system-aware CA bundle") logger.info("Certificate verification failed; attempting to retry with a system-aware CA bundle")
try: try:
import httpx as _httpx temp_client = get_shared_httpx_client(
# Use the client's precomputed verify argument (set at init) timeout=self.timeout,
verify_override = self._httpx_verify verify_ssl=self._httpx_verify,
with _httpx.Client(timeout=self.timeout, verify=verify_override, headers=self._get_headers()) as temp_client: headers=self._get_headers(),
try: )
response = temp_client.request(method, url, **kwargs) try:
if raise_for_status: response = temp_client.request(method, url, **kwargs)
response.raise_for_status() if raise_for_status:
return response response.raise_for_status()
except Exception as e2: return response
last_exception = e2 except Exception as e2:
last_exception = e2
except Exception: except Exception:
# certifi/pip-system-certs/httpx not available; fall back to existing retry behavior # certifi/pip-system-certs/httpx not available; fall back to existing retry behavior
pass pass
@@ -530,17 +532,18 @@ class HTTPClient:
if ("certificate verify failed" in msg or "unable to get local issuer certificate" in msg): if ("certificate verify failed" in msg or "unable to get local issuer certificate" in msg):
logger.info("Certificate verification failed; attempting to retry with a system-aware CA bundle") logger.info("Certificate verification failed; attempting to retry with a system-aware CA bundle")
try: try:
import httpx as _httpx temp_client = get_shared_httpx_client(
# Use the client's precomputed verify argument (set at init) timeout=self.timeout,
verify_override = self._httpx_verify verify_ssl=self._httpx_verify,
with _httpx.Client(timeout=self.timeout, verify=verify_override, headers=self._get_headers()) as temp_client: headers=self._get_headers(),
try: )
response = temp_client.request(method, url, **kwargs) try:
if raise_for_status: response = temp_client.request(method, url, **kwargs)
response.raise_for_status() if raise_for_status:
return response response.raise_for_status()
except Exception as e2: return response
last_exception = e2 except Exception as e2:
last_exception = e2
except Exception: except Exception:
# certifi/pip-system-certs/httpx not available; fall back to existing retry behavior # certifi/pip-system-certs/httpx not available; fall back to existing retry behavior
pass pass

View File

@@ -1,14 +1,16 @@
"""Shared `httpx.Client` helper. """Shared `httpx.Client` helper.
Creating short-lived httpx clients disables connection pooling and costs extra CPU. Creating short-lived httpx clients disables connection pooling and costs extra CPU.
This module provides a small singleton client for callers that just need basic This module provides a small keyed client cache for callers that just need basic
GETs without the full HTTPClient wrapper. GETs without the full HTTPClient wrapper.
""" """
from __future__ import annotations from __future__ import annotations
import atexit
from collections import OrderedDict
import threading import threading
from typing import Dict, Optional from typing import Any, Dict, Optional, Tuple
import httpx import httpx
@@ -20,39 +22,85 @@ _DEFAULT_USER_AGENT = (
) )
_lock = threading.Lock() _lock = threading.Lock()
_shared_client: Optional[httpx.Client] = None _MAX_SHARED_CLIENTS = 8
_shared_clients: "OrderedDict[Tuple[float, Tuple[str, str], Tuple[Tuple[str, str], ...]], httpx.Client]" = OrderedDict()
def _normalize_headers(headers: Optional[Dict[str, str]]) -> Dict[str, str]:
normalized: Dict[str, str] = {"User-Agent": _DEFAULT_USER_AGENT}
if headers:
normalized.update({str(k): str(v) for k, v in headers.items()})
return normalized
def _verify_key(verify_value: Any) -> Tuple[str, str]:
if isinstance(verify_value, bool):
return ("bool", "1" if verify_value else "0")
if isinstance(verify_value, str):
return ("str", verify_value)
return ("obj", str(id(verify_value)))
def _client_key(
*,
timeout: float,
verify_value: Any,
merged_headers: Dict[str, str],
) -> Tuple[float, Tuple[str, str], Tuple[Tuple[str, str], ...]]:
header_items = tuple(
sorted((str(k).lower(), str(v)) for k, v in merged_headers.items())
)
return (float(timeout), _verify_key(verify_value), header_items)
def get_shared_httpx_client( def get_shared_httpx_client(
*, *,
timeout: float = 30.0, timeout: float = 30.0,
verify_ssl: bool = True, verify_ssl: bool | str = True,
headers: Optional[Dict[str, str]] = None, headers: Optional[Dict[str, str]] = None,
) -> httpx.Client: ) -> httpx.Client:
"""Return a process-wide shared synchronous httpx.Client.""" """Return a shared synchronous httpx.Client for a specific config key."""
global _shared_client verify_value = resolve_verify_value(verify_ssl)
if _shared_client is None: merged_headers = _normalize_headers(headers)
with _lock: key = _client_key(
if _shared_client is None: timeout=timeout,
base_headers = {"User-Agent": _DEFAULT_USER_AGENT} verify_value=verify_value,
if headers: merged_headers=merged_headers,
base_headers.update({str(k): str(v) for k, v in headers.items()}) )
_shared_client = httpx.Client(
timeout=timeout,
verify=resolve_verify_value(verify_ssl),
headers=base_headers,
)
return _shared_client with _lock:
existing = _shared_clients.get(key)
if existing is not None:
_shared_clients.move_to_end(key)
return existing
client = httpx.Client(
timeout=timeout,
verify=verify_value,
headers=merged_headers,
)
_shared_clients[key] = client
while len(_shared_clients) > _MAX_SHARED_CLIENTS:
_, old_client = _shared_clients.popitem(last=False)
try:
old_client.close()
except Exception:
pass
return client
def close_shared_httpx_client() -> None: def close_shared_httpx_client() -> None:
global _shared_client with _lock:
client = _shared_client clients = list(_shared_clients.values())
_shared_client = None _shared_clients.clear()
if client is not None: for client in clients:
try: try:
client.close() client.close()
except Exception: except Exception:
pass pass
atexit.register(close_shared_httpx_client)

View File

@@ -9,8 +9,11 @@ This module intentionally avoids importing the heavy httpx-based stack.
from __future__ import annotations from __future__ import annotations
import atexit
from collections import OrderedDict
import threading import threading
from typing import Any, Dict, Optional from typing import Any, Dict, Optional, Tuple
from weakref import WeakSet
import requests import requests
from requests.adapters import HTTPAdapter from requests.adapters import HTTPAdapter
@@ -23,6 +26,40 @@ _DEFAULT_USER_AGENT = (
) )
_local = threading.local() _local = threading.local()
_MAX_SESSIONS_PER_THREAD = 4
_session_registry_lock = threading.Lock()
_all_sessions: "WeakSet[requests.Session]" = WeakSet()
def _session_key(
*,
user_agent: str,
verify_ssl: bool,
pool_connections: int,
pool_maxsize: int,
) -> Tuple[str, Any, int, int]:
return (
str(user_agent or _DEFAULT_USER_AGENT),
resolve_verify_value(verify_ssl),
int(pool_connections),
int(pool_maxsize),
)
def _get_thread_session_cache() -> "OrderedDict[Tuple[str, Any, int, int], requests.Session]":
cache = getattr(_local, "session_cache", None)
if cache is None:
cache = OrderedDict()
_local.session_cache = cache
return cache
def _register_session(session: requests.Session) -> None:
try:
with _session_registry_lock:
_all_sessions.add(session)
except Exception:
pass
def get_requests_session( def get_requests_session(
@@ -32,24 +69,44 @@ def get_requests_session(
pool_connections: int = 16, pool_connections: int = 16,
pool_maxsize: int = 16, pool_maxsize: int = 16,
) -> requests.Session: ) -> requests.Session:
"""Return a thread-local shared Session configured for pooling.""" """Return a thread-local pooled Session keyed by config values."""
session: Optional[requests.Session] = getattr(_local, "session", None) key = _session_key(
if session is not None: user_agent=user_agent,
return session verify_ssl=verify_ssl,
pool_connections=pool_connections,
pool_maxsize=pool_maxsize,
)
cache = _get_thread_session_cache()
existing = cache.get(key)
if existing is not None:
cache.move_to_end(key)
return existing
session = requests.Session() session = requests.Session()
session.headers.update({"User-Agent": str(user_agent or _DEFAULT_USER_AGENT)}) session.headers.update({"User-Agent": key[0]})
# Expand connection pool; keep max_retries=0 to avoid semantic changes. # Expand connection pool; keep max_retries=0 to avoid semantic changes.
adapter = HTTPAdapter(pool_connections=pool_connections, pool_maxsize=pool_maxsize, max_retries=0) adapter = HTTPAdapter(
pool_connections=pool_connections,
pool_maxsize=pool_maxsize,
max_retries=0,
)
session.mount("http://", adapter) session.mount("http://", adapter)
session.mount("https://", adapter) session.mount("https://", adapter)
# Configure verify once. # Configure verify once.
session.verify = resolve_verify_value(verify_ssl) session.verify = key[1]
_register_session(session)
_local.session = session cache[key] = session
while len(cache) > _MAX_SESSIONS_PER_THREAD:
_, old_session = cache.popitem(last=False)
try:
old_session.close()
except Exception:
pass
return session return session
@@ -66,3 +123,36 @@ def request(
sess = get_requests_session() sess = get_requests_session()
return sess.request(method, url, params=params, headers=headers, timeout=timeout, **kwargs) return sess.request(method, url, params=params, headers=headers, timeout=timeout, **kwargs)
def close_requests_sessions() -> None:
"""Close cached requests sessions for the current thread and global registry."""
cache = getattr(_local, "session_cache", None)
if cache:
try:
sessions = list(cache.values())
cache.clear()
except Exception:
sessions = []
for session in sessions:
try:
session.close()
except Exception:
pass
try:
with _session_registry_lock:
all_sessions = list(_all_sessions)
_all_sessions.clear()
except Exception:
all_sessions = []
for session in all_sessions:
try:
session.close()
except Exception:
pass
atexit.register(close_requests_sessions)

View File

@@ -1222,7 +1222,7 @@ class HIFI(Provider):
from API.httpx_shared import get_shared_httpx_client from API.httpx_shared import get_shared_httpx_client
timeout_val = float(getattr(self, "api_timeout", 10.0)) timeout_val = float(getattr(self, "api_timeout", 10.0))
client = get_shared_httpx_client(timeout=timeout_val) client = get_shared_httpx_client()
resp = client.get(resolved_text, timeout=timeout_val) resp = client.get(resolved_text, timeout=timeout_val)
resp.raise_for_status() resp.raise_for_status()
content = resp.content content = resp.content

View File

@@ -1404,7 +1404,7 @@ class Tidal(Provider):
from API.httpx_shared import get_shared_httpx_client from API.httpx_shared import get_shared_httpx_client
timeout_val = float(getattr(self, "api_timeout", 10.0)) timeout_val = float(getattr(self, "api_timeout", 10.0))
client = get_shared_httpx_client(timeout=timeout_val) client = get_shared_httpx_client()
resp = client.get(resolved_text, timeout=timeout_val) resp = client.get(resolved_text, timeout=timeout_val)
resp.raise_for_status() resp.raise_for_status()
content = resp.content content = resp.content

View File

@@ -74,6 +74,54 @@ class MetadataProvider(ABC):
tags.append(f"source:{self.name}") tags.append(f"source:{self.name}")
return tags return tags
def search_tags(self, query: str, limit: int = 1) -> List[str]:
"""Return tags for the best match from `search(query)`.
Providers can override this when tags should be extracted differently from
the default search->first-item->to_tags flow.
"""
try:
items = self.search(query, limit=max(1, int(limit)))
except Exception:
return []
if not items:
return []
try:
return [str(t) for t in self.to_tags(items[0]) if t is not None]
except Exception:
return []
def identifier_query(self, identifiers: Dict[str, Any]) -> Optional[str]:
"""Return provider-specific identifier query text from parsed identifiers."""
_ = identifiers
return None
def combined_query(
self,
*,
title_hint: Optional[str],
artist_hint: Optional[str],
) -> Optional[str]:
"""Return provider-specific title+artist query text."""
_ = title_hint
_ = artist_hint
return None
def extract_url_query(self, result: Any, get_field: Any) -> Optional[str]:
"""Return provider-specific URL query derived from a piped result."""
_ = result
_ = get_field
return None
def emits_direct_tags(self) -> bool:
"""True when provider should skip selection table and emit tags directly."""
return False
class ITunesProvider(MetadataProvider): class ITunesProvider(MetadataProvider):
"""Metadata provider using the iTunes Search API.""" """Metadata provider using the iTunes Search API."""
@@ -112,6 +160,21 @@ class ITunesProvider(MetadataProvider):
debug(f"iTunes returned {len(items)} items for '{query}'") debug(f"iTunes returned {len(items)} items for '{query}'")
return items return items
def identifier_query(self, identifiers: Dict[str, Any]) -> Optional[str]:
return identifiers.get("musicbrainz") or identifiers.get("musicbrainzalbum")
def combined_query(
self,
*,
title_hint: Optional[str],
artist_hint: Optional[str],
) -> Optional[str]:
title_text = str(title_hint or "").strip()
artist_text = str(artist_hint or "").strip()
if not title_text or not artist_text:
return None
return f"{title_text} {artist_text}"
class OpenLibraryMetadataProvider(MetadataProvider): class OpenLibraryMetadataProvider(MetadataProvider):
"""Metadata provider for OpenLibrary book metadata.""" """Metadata provider for OpenLibrary book metadata."""
@@ -220,6 +283,14 @@ class OpenLibraryMetadataProvider(MetadataProvider):
tags.append(f"source:{self.name}") tags.append(f"source:{self.name}")
return tags return tags
def identifier_query(self, identifiers: Dict[str, Any]) -> Optional[str]:
return (
identifiers.get("isbn_13")
or identifiers.get("isbn_10")
or identifiers.get("isbn")
or identifiers.get("openlibrary")
)
class GoogleBooksMetadataProvider(MetadataProvider): class GoogleBooksMetadataProvider(MetadataProvider):
"""Metadata provider for Google Books volumes API.""" """Metadata provider for Google Books volumes API."""
@@ -329,6 +400,14 @@ class GoogleBooksMetadataProvider(MetadataProvider):
tags.append(f"source:{self.name}") tags.append(f"source:{self.name}")
return tags return tags
def identifier_query(self, identifiers: Dict[str, Any]) -> Optional[str]:
return (
identifiers.get("isbn_13")
or identifiers.get("isbn_10")
or identifiers.get("isbn")
or identifiers.get("openlibrary")
)
class ISBNsearchMetadataProvider(MetadataProvider): class ISBNsearchMetadataProvider(MetadataProvider):
"""Metadata provider that scrapes isbnsearch.org by ISBN. """Metadata provider that scrapes isbnsearch.org by ISBN.
@@ -624,6 +703,18 @@ class MusicBrainzMetadataProvider(MetadataProvider):
tags.append(f"musicbrainz:{mbid}") tags.append(f"musicbrainz:{mbid}")
return tags return tags
def combined_query(
self,
*,
title_hint: Optional[str],
artist_hint: Optional[str],
) -> Optional[str]:
title_text = str(title_hint or "").strip()
artist_text = str(artist_hint or "").strip()
if not title_text or not artist_text:
return None
return f'recording:"{title_text}" AND artist:"{artist_text}"'
class ImdbMetadataProvider(MetadataProvider): class ImdbMetadataProvider(MetadataProvider):
"""Metadata provider for IMDb titles (movies/series/episodes).""" """Metadata provider for IMDb titles (movies/series/episodes)."""
@@ -757,6 +848,9 @@ class ImdbMetadataProvider(MetadataProvider):
deduped.append(s) deduped.append(s)
return deduped return deduped
def identifier_query(self, identifiers: Dict[str, Any]) -> Optional[str]:
return identifiers.get("imdb")
class YtdlpMetadataProvider(MetadataProvider): class YtdlpMetadataProvider(MetadataProvider):
"""Metadata provider that extracts tags from a supported URL using yt-dlp. """Metadata provider that extracts tags from a supported URL using yt-dlp.
@@ -904,6 +998,23 @@ class YtdlpMetadataProvider(MetadataProvider):
out.append(s) out.append(s)
return out return out
def extract_url_query(self, result: Any, get_field: Any) -> Optional[str]:
raw_url = (
get_field(result, "url", None)
or get_field(result, "source_url", None)
or get_field(result, "target", None)
)
if isinstance(raw_url, list) and raw_url:
raw_url = raw_url[0]
if isinstance(raw_url, str):
text = raw_url.strip()
if text.startswith(("http://", "https://")):
return text
return None
def emits_direct_tags(self) -> bool:
return True
def _coerce_archive_field_list(value: Any) -> List[str]: def _coerce_archive_field_list(value: Any) -> List[str]:
"""Coerce an Archive.org metadata field to a list of strings.""" """Coerce an Archive.org metadata field to a list of strings."""

449
SYS/cmdlet_spec.py Normal file
View File

@@ -0,0 +1,449 @@
from __future__ import annotations
import sys
import tempfile
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Sequence, Set
from SYS.logger import log
@dataclass
class CmdletArg:
"""Represents a single cmdlet argument with optional enum choices."""
name: str
type: str = "string"
required: bool = False
description: str = ""
choices: List[str] = field(default_factory=list)
alias: str = ""
handler: Optional[Any] = None
variadic: bool = False
usage: str = ""
requires_db: bool = False
query_key: Optional[str] = None
query_aliases: List[str] = field(default_factory=list)
query_only: bool = False
def resolve(self, value: Any) -> Any:
if self.handler is not None and callable(self.handler):
return self.handler(value)
return value
def to_flags(self) -> tuple[str, ...]:
flags = [f"--{self.name}", f"-{self.name}"]
if self.alias:
flags.append(f"-{self.alias}")
if self.type == "flag":
flags.append(f"--no-{self.name}")
flags.append(f"-no{self.name}")
if self.alias:
flags.append(f"-n{self.alias}")
return tuple(flags)
def QueryArg(
name: str,
*,
key: Optional[str] = None,
aliases: Optional[Sequence[str]] = None,
type: str = "string",
required: bool = False,
description: str = "",
choices: Optional[Sequence[str]] = None,
handler: Optional[Any] = None,
query_only: bool = True,
) -> CmdletArg:
"""Create an argument that can be populated from `-query` fields."""
return CmdletArg(
name=str(name),
type=str(type or "string"),
required=bool(required),
description=str(description or ""),
choices=list(choices or []),
handler=handler,
query_key=str(key or name).strip().lower() if str(key or name).strip() else None,
query_aliases=[str(a).strip().lower() for a in (aliases or []) if str(a).strip()],
query_only=bool(query_only),
)
class SharedArgs:
"""Registry of shared CmdletArg definitions used across multiple cmdlet."""
STORE = CmdletArg(
name="store",
type="enum",
choices=[],
description="Selects store",
query_key="store",
)
URL = CmdletArg(
name="url",
type="string",
description="http parser",
)
PROVIDER = CmdletArg(
name="provider",
type="string",
description="selects provider",
)
@staticmethod
def get_store_choices(config: Optional[Dict[str, Any]] = None, force: bool = False) -> List[str]:
if not force and hasattr(SharedArgs, "_cached_available_stores"):
return SharedArgs._cached_available_stores or []
if not force:
SharedArgs._refresh_store_choices_cache(config, skip_instantiation=True)
else:
SharedArgs._refresh_store_choices_cache(config, skip_instantiation=False)
return SharedArgs._cached_available_stores or []
@staticmethod
def _refresh_store_choices_cache(config: Optional[Dict[str, Any]] = None, skip_instantiation: bool = False) -> None:
try:
if config is None:
try:
from SYS.config import load_config
config = load_config()
except Exception:
SharedArgs._cached_available_stores = []
return
try:
from Store.registry import list_configured_backend_names
SharedArgs._cached_available_stores = list_configured_backend_names(config) or []
except Exception:
SharedArgs._cached_available_stores = []
if skip_instantiation:
return
try:
from Store.registry import Store as StoreRegistry
registry = StoreRegistry(config=config, suppress_debug=True)
available = registry.list_backends()
if available:
SharedArgs._cached_available_stores = available
except Exception:
pass
except Exception:
SharedArgs._cached_available_stores = []
LOCATION = CmdletArg(
"location",
type="enum",
choices=["hydrus", "0x0"],
required=True,
description="Destination location",
)
DELETE = CmdletArg(
"delete",
type="flag",
description="Delete the file after successful operation.",
)
ARTIST = CmdletArg(
"artist",
type="string",
description="Filter by artist name (case-insensitive, partial match).",
)
ALBUM = CmdletArg(
"album",
type="string",
description="Filter by album name (case-insensitive, partial match).",
)
TRACK = CmdletArg(
"track",
type="string",
description="Filter by track title (case-insensitive, partial match).",
)
LIBRARY = CmdletArg(
"library",
type="string",
choices=["hydrus", "local", "soulseek", "libgen", "ftp"],
description="Search library or source location.",
)
TIMEOUT = CmdletArg(
"timeout",
type="integer",
description="Search or operation timeout in seconds.",
)
LIMIT = CmdletArg(
"limit",
type="integer",
description="Maximum number of results to return.",
)
PATH = CmdletArg("path", type="string", description="File or directory path.")
QUERY = CmdletArg(
"query",
type="string",
description="Unified query string (e.g., hash:<sha256>, hash:{<h1>,<h2>}).",
)
REASON = CmdletArg(
"reason",
type="string",
description="Reason or explanation for the operation.",
)
ARCHIVE = CmdletArg(
"archive",
type="flag",
description="Archive the URL to Wayback Machine, Archive.today, and Archive.ph (requires URL argument in cmdlet).",
alias="arch",
)
@staticmethod
def resolve_storage(
storage_value: Optional[str],
default: Optional[Path] = None,
) -> Path:
_ = storage_value
if default is not None:
return default
return Path(tempfile.gettempdir())
@classmethod
def get(cls, name: str) -> Optional[CmdletArg]:
try:
return getattr(cls, name.upper())
except AttributeError:
return None
@dataclass
class Cmdlet:
"""Represents a cmdlet with metadata and arguments."""
name: str
summary: str
usage: str
alias: List[str] = field(default_factory=list)
arg: List[CmdletArg] = field(default_factory=list)
detail: List[str] = field(default_factory=list)
examples: List[str] = field(default_factory=list)
exec: Optional[Callable[[Any, Sequence[str], Dict[str, Any]], int]] = field(default=None)
def _collect_names(self) -> List[str]:
names: List[str] = []
if self.name:
names.append(self.name)
for alias in self.alias or []:
if alias:
names.append(alias)
for alias in getattr(self, "aliases", None) or []:
if alias:
names.append(alias)
seen: Set[str] = set()
deduped: List[str] = []
for name in names:
key = name.replace("_", "-").lower()
if key in seen:
continue
seen.add(key)
deduped.append(name)
return deduped
def register(self) -> "Cmdlet":
if not callable(self.exec):
return self
try:
from cmdlet import register_callable as _register_callable
except Exception:
return self
names = self._collect_names()
if not names:
return self
_register_callable(names, self.exec)
return self
def get_flags(self, arg_name: str) -> set[str]:
return {f"-{arg_name}", f"--{arg_name}"}
def build_flag_registry(self) -> Dict[str, set[str]]:
return {arg.name: self.get_flags(arg.name) for arg in self.arg}
def parse_cmdlet_args(
args: Sequence[str],
cmdlet_spec: Dict[str, Any] | Cmdlet,
) -> Dict[str, Any]:
"""Parse command-line arguments based on cmdlet specification."""
result: Dict[str, Any] = {}
arg_specs_raw = getattr(cmdlet_spec, "arg", None)
if arg_specs_raw is None or not isinstance(arg_specs_raw, (list, tuple)):
raise TypeError(
f"Expected cmdlet-like object with an 'arg' list, got {type(cmdlet_spec).__name__}"
)
arg_specs: List[Any] = list(arg_specs_raw)
positional_args: List[CmdletArg] = []
query_mapped_args: List[CmdletArg] = []
arg_spec_map: Dict[str, str] = {}
arg_spec_by_canonical: Dict[str, Any] = {}
for spec in arg_specs:
name = getattr(spec, "name", None)
if not name:
continue
try:
if getattr(spec, "query_key", None):
query_mapped_args.append(spec)
except Exception:
pass
name_str = str(name)
canonical_name = name_str.lstrip("-")
canonical_key = canonical_name.lower()
try:
if bool(getattr(spec, "query_only", False)):
continue
except Exception:
pass
arg_spec_by_canonical[canonical_key] = spec
if "-" not in name_str:
positional_args.append(spec)
arg_spec_map[canonical_key] = canonical_name
arg_spec_map[f"-{canonical_name}".lower()] = canonical_name
arg_spec_map[f"--{canonical_name}".lower()] = canonical_name
i = 0
positional_index = 0
while i < len(args):
token = str(args[i])
token_lower = token.lower()
if token_lower in {"-hash", "--hash"} and token_lower not in arg_spec_map:
try:
log(
'Legacy flag -hash is no longer supported. Use: -query "hash:<sha256>"',
file=sys.stderr,
)
except Exception:
pass
i += 1
continue
if token_lower in arg_spec_map:
canonical_name = arg_spec_map[token_lower]
spec = arg_spec_by_canonical.get(canonical_name.lower())
is_flag = bool(spec and str(getattr(spec, "type", "")).lower() == "flag")
if is_flag:
result[canonical_name] = True
i += 1
else:
if i + 1 < len(args) and not str(args[i + 1]).startswith("-"):
value = args[i + 1]
is_variadic = bool(spec and getattr(spec, "variadic", False))
if is_variadic:
if canonical_name not in result:
result[canonical_name] = []
elif not isinstance(result[canonical_name], list):
result[canonical_name] = [result[canonical_name]]
result[canonical_name].append(value)
else:
result[canonical_name] = value
i += 2
else:
i += 1
elif positional_index < len(positional_args):
positional_spec = positional_args[positional_index]
canonical_name = str(getattr(positional_spec, "name", "")).lstrip("-")
is_variadic = bool(getattr(positional_spec, "variadic", False))
if is_variadic:
if canonical_name not in result:
result[canonical_name] = []
elif not isinstance(result[canonical_name], list):
result[canonical_name] = [result[canonical_name]]
result[canonical_name].append(token)
i += 1
else:
result[canonical_name] = token
positional_index += 1
i += 1
else:
i += 1
try:
raw_query = result.get("query")
except Exception:
raw_query = None
if query_mapped_args and raw_query is not None:
try:
from SYS.cli_syntax import parse_query as _parse_query
parsed_query = _parse_query(str(raw_query))
fields = parsed_query.get("fields", {}) if isinstance(parsed_query, dict) else {}
norm_fields = (
{str(k).strip().lower(): v for k, v in fields.items()}
if isinstance(fields, dict)
else {}
)
except Exception:
norm_fields = {}
for spec in query_mapped_args:
canonical_name = str(getattr(spec, "name", "") or "").lstrip("-")
if not canonical_name:
continue
if canonical_name in result and result.get(canonical_name) not in (None, ""):
continue
try:
key = str(getattr(spec, "query_key", "") or "").strip().lower()
aliases = getattr(spec, "query_aliases", None)
alias_list = [str(a).strip().lower() for a in (aliases or []) if str(a).strip()]
except Exception:
key = ""
alias_list = []
candidates = [k for k in [key, canonical_name] + alias_list if k]
val = None
for k in candidates:
if k in norm_fields:
val = norm_fields.get(k)
break
if val is None:
continue
try:
result[canonical_name] = spec.resolve(val)
except Exception:
result[canonical_name] = val
return result

View File

@@ -10,6 +10,7 @@ from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple
from urllib.parse import quote from urllib.parse import quote
import httpx import httpx
from API.httpx_shared import get_shared_httpx_client
from SYS.logger import debug, log from SYS.logger import debug, log
from SYS.utils_constant import mime_maps from SYS.utils_constant import mime_maps
@@ -198,29 +199,28 @@ class HydrusNetwork(Store):
api_version_url = f"{self.URL}/api_version" api_version_url = f"{self.URL}/api_version"
verify_key_url = f"{self.URL}/verify_access_key" verify_key_url = f"{self.URL}/verify_access_key"
try: try:
with httpx.Client(timeout=5.0, client = get_shared_httpx_client(timeout=5.0, verify_ssl=False)
verify=False, version_resp = client.get(api_version_url, follow_redirects=True)
follow_redirects=True) as client: version_resp.raise_for_status()
version_resp = client.get(api_version_url) version_payload = version_resp.json()
version_resp.raise_for_status() if not isinstance(version_payload, dict):
version_payload = version_resp.json() raise RuntimeError(
if not isinstance(version_payload, dict): "Hydrus /api_version returned an unexpected response"
raise RuntimeError( )
"Hydrus /api_version returned an unexpected response"
) verify_resp = client.get(
verify_key_url,
verify_resp = client.get( headers={
verify_key_url, "Hydrus-Client-API-Access-Key": self.API
headers={ },
"Hydrus-Client-API-Access-Key": self.API follow_redirects=True,
}, )
verify_resp.raise_for_status()
verify_payload = verify_resp.json()
if not isinstance(verify_payload, dict):
raise RuntimeError(
"Hydrus /verify_access_key returned an unexpected response"
) )
verify_resp.raise_for_status()
verify_payload = verify_resp.json()
if not isinstance(verify_payload, dict):
raise RuntimeError(
"Hydrus /verify_access_key returned an unexpected response"
)
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (True, None) _HYDRUS_INIT_CHECK_CACHE[cache_key] = (True, None)
except Exception as exc: except Exception as exc:
@@ -294,12 +294,10 @@ class HydrusNetwork(Store):
"Hydrus-Client-API-Access-Key": self.API, "Hydrus-Client-API-Access-Key": self.API,
"Accept": "application/json", "Accept": "application/json",
} }
with httpx.Client(timeout=5.0, client = get_shared_httpx_client(timeout=5.0, verify_ssl=False)
verify=False, resp = client.get(url, params=params, headers=headers, follow_redirects=True)
follow_redirects=True) as client: resp.raise_for_status()
resp = client.get(url, params=params, headers=headers) payload = resp.json()
resp.raise_for_status()
payload = resp.json()
count_val = None count_val = None
if isinstance(payload, dict): if isinstance(payload, dict):
@@ -1587,13 +1585,13 @@ class HydrusNetwork(Store):
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={quote(h)}" file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={quote(h)}"
dest_path = base_tmp / fname dest_path = base_tmp / fname
with httpx.stream( stream_client = get_shared_httpx_client(timeout=60.0, verify_ssl=False)
with stream_client.stream(
"GET", "GET",
file_url, file_url,
headers={"Hydrus-Client-API-Access-Key": self.API}, headers={"Hydrus-Client-API-Access-Key": self.API},
follow_redirects=True, follow_redirects=True,
timeout=60.0, timeout=60.0,
verify=False,
) as resp: ) as resp:
resp.raise_for_status() resp.raise_for_status()
with dest_path.open("wb") as fh: with dest_path.open("wb") as fh:

View File

@@ -585,6 +585,15 @@ def parse_cmdlet_args(args: Sequence[str],
result = parse_cmdlet_args(["value1", "-count", "5"], cmdlet) result = parse_cmdlet_args(["value1", "-count", "5"], cmdlet)
# result = {"path": "value1", "count": "5"} # result = {"path": "value1", "count": "5"}
""" """
try:
from SYS.cmdlet_spec import parse_cmdlet_args as _parse_cmdlet_args_fast
return _parse_cmdlet_args_fast(args, cmdlet_spec)
except Exception:
# Fall back to local implementation below to preserve behavior if the
# lightweight parser is unavailable.
pass
result: Dict[str, result: Dict[str,
Any] = {} Any] = {}

View File

@@ -27,6 +27,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
from SYS import pipeline as ctx from SYS import pipeline as ctx
from SYS.pipeline_progress import PipelineProgress from SYS.pipeline_progress import PipelineProgress
from . import _shared as sh from . import _shared as sh
from SYS.field_access import get_field
normalize_hash = sh.normalize_hash normalize_hash = sh.normalize_hash
looks_like_hash = sh.looks_like_hash looks_like_hash = sh.looks_like_hash
@@ -34,7 +35,6 @@ Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args parse_cmdlet_args = sh.parse_cmdlet_args
get_field = sh.get_field
try: try:
from SYS.metadata import extract_title from SYS.metadata import extract_title
@@ -60,84 +60,6 @@ def _dedup_tags_preserve_order(tags: List[str]) -> List[str]:
return out return out
def _extract_subtitle_tags(info: Dict[str, Any]) -> List[str]:
"""Extract subtitle availability tags from a yt-dlp info dict.
Produces multi-valued tags so languages can coexist:
- subs:<lang>
- subs_auto:<lang>
"""
def _langs(value: Any) -> List[str]:
if not isinstance(value, dict):
return []
langs: List[str] = []
for k in value.keys():
if not isinstance(k, str):
continue
lang = k.strip().lower()
if lang:
langs.append(lang)
return sorted(set(langs))
out: List[str] = []
for lang in _langs(info.get("subtitles")):
out.append(f"subs:{lang}")
for lang in _langs(info.get("automatic_captions")):
out.append(f"subs_auto:{lang}")
return out
def _scrape_ytdlp_info(url: str) -> Optional[Dict[str, Any]]:
"""Fetch a yt-dlp info dict without downloading media."""
if not isinstance(url, str) or not url.strip():
return None
url = url.strip()
# Prefer the Python module when available (faster, avoids shell quoting issues).
try:
import yt_dlp # type: ignore
opts: Any = {
"quiet": True,
"no_warnings": True,
"skip_download": True,
"noprogress": True,
"socket_timeout": 15,
"retries": 1,
"playlist_items": "1-10",
}
with yt_dlp.YoutubeDL(opts) as ydl:
info = ydl.extract_info(url, download=False)
return info if isinstance(info, dict) else None
except Exception:
pass
# Fallback to yt-dlp CLI if the module isn't available.
try:
import json as json_module
cmd = [
"yt-dlp",
"-J",
"--no-warnings",
"--skip-download",
"--playlist-items",
"1-10",
url,
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
if result.returncode != 0:
return None
payload = (result.stdout or "").strip()
if not payload:
return None
data = json_module.loads(payload)
return data if isinstance(data, dict) else None
except Exception:
return None
def _resolve_candidate_urls_for_item( def _resolve_candidate_urls_for_item(
result: Any, result: Any,
backend: Any, backend: Any,
@@ -1224,45 +1146,19 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
) )
return 1 return 1
info = _scrape_ytdlp_info(scrape_target) ytdlp_provider = get_metadata_provider("ytdlp", config)
if not info: if ytdlp_provider is None:
log( log("yt-dlp metadata provider is unavailable", file=sys.stderr)
"yt-dlp could not extract metadata for this URL (unsupported or failed)",
file=sys.stderr,
)
return 1 return 1
try: try:
from SYS.metadata import extract_ytdlp_tags tags = [
str(t)
for t in ytdlp_provider.search_tags(scrape_target, limit=1)
if t is not None
]
except Exception: except Exception:
extract_ytdlp_tags = None # type: ignore[assignment] tags = []
# Prefer the top-level metadata, but if this is a playlist container, use
# the first entry for per-item fields like subtitles.
info_for_subs = info
entries = info.get("entries") if isinstance(info, dict) else None
if isinstance(entries, list) and entries:
first = entries[0]
if isinstance(first, dict):
info_for_subs = first
tags: List[str] = []
if extract_ytdlp_tags:
try:
tags.extend(extract_ytdlp_tags(info))
except Exception:
pass
# Subtitle availability tags
try:
tags.extend(
_extract_subtitle_tags(
info_for_subs if isinstance(info_for_subs,
dict) else {}
)
)
except Exception:
pass
# Ensure we actually have something to apply. # Ensure we actually have something to apply.
tags = _dedup_tags_preserve_order(tags) tags = _dedup_tags_preserve_order(tags)
@@ -1399,19 +1295,10 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
identifiers = _extract_scrapable_identifiers(identifier_tags) identifiers = _extract_scrapable_identifiers(identifier_tags)
identifier_query: Optional[str] = None identifier_query: Optional[str] = None
if identifiers: if identifiers:
if provider.name in {"openlibrary", try:
"googlebooks", identifier_query = provider.identifier_query(identifiers)
"google"}: except Exception:
identifier_query = ( identifier_query = None
identifiers.get("isbn_13") or identifiers.get("isbn_10")
or identifiers.get("isbn") or identifiers.get("openlibrary")
)
elif provider.name == "imdb":
identifier_query = identifiers.get("imdb")
elif provider.name == "itunes":
identifier_query = identifiers.get("musicbrainz") or identifiers.get(
"musicbrainzalbum"
)
# Determine query from identifier first, else title on the result or filename # Determine query from identifier first, else title on the result or filename
title_hint = ( title_hint = (
@@ -1444,32 +1331,21 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
artist_hint = str(meta_artist) artist_hint = str(meta_artist)
combined_query: Optional[str] = None combined_query: Optional[str] = None
if (not identifier_query and title_hint and artist_hint if not identifier_query and title_hint and artist_hint:
and provider.name in {"itunes", try:
"musicbrainz"}): combined_query = provider.combined_query(
if provider.name == "musicbrainz": title_hint=str(title_hint),
combined_query = f'recording:"{title_hint}" AND artist:"{artist_hint}"' artist_hint=str(artist_hint),
else: )
combined_query = f"{title_hint} {artist_hint}" except Exception:
combined_query = None
# yt-dlp isn't a search provider; it requires a URL. # yt-dlp isn't a search provider; it requires a URL.
url_hint: Optional[str] = None url_hint: Optional[str] = None
if provider.name == "ytdlp": try:
raw_url = ( url_hint = provider.extract_url_query(result, get_field)
get_field(result, except Exception:
"url", url_hint = None
None) or get_field(result,
"source_url",
None) or get_field(result,
"target",
None)
)
if isinstance(raw_url, list) and raw_url:
raw_url = raw_url[0]
if isinstance(raw_url,
str) and raw_url.strip().startswith(("http://",
"https://")):
url_hint = raw_url.strip()
query_hint = url_hint or identifier_query or combined_query or title_hint query_hint = url_hint or identifier_query or combined_query or title_hint
if not query_hint: if not query_hint:
@@ -1492,7 +1368,12 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 1 return 1
# For yt-dlp, emit tags directly (there is no meaningful multi-result selection step). # For yt-dlp, emit tags directly (there is no meaningful multi-result selection step).
if provider.name == "ytdlp": emit_direct = False
try:
emit_direct = bool(provider.emits_direct_tags())
except Exception:
emit_direct = False
if emit_direct:
try: try:
tags = [str(t) for t in provider.to_tags(items[0]) if t is not None] tags = [str(t) for t in provider.to_tags(items[0]) if t is not None]
except Exception: except Exception:

View File

@@ -2,7 +2,7 @@ import json
import os import os
import sys import sys
from typing import List, Dict, Any, Sequence from typing import List, Dict, Any, Sequence
from cmdlet._shared import Cmdlet, CmdletArg from SYS.cmdlet_spec import Cmdlet, CmdletArg
from SYS.logger import log from SYS.logger import log
from SYS.result_table import Table from SYS.result_table import Table
from SYS import pipeline as ctx from SYS import pipeline as ctx

View File

@@ -1,6 +1,6 @@
from typing import List, Dict, Any, Optional, Sequence from typing import List, Dict, Any, Optional, Sequence
from cmdlet._shared import Cmdlet, CmdletArg from SYS.cmdlet_spec import Cmdlet, CmdletArg
from SYS.config import load_config, save_config, save_config_and_verify from SYS.config import load_config, save_config, save_config_and_verify
from SYS import pipeline as ctx from SYS import pipeline as ctx
from SYS.result_table import Table from SYS.result_table import Table

View File

@@ -4,7 +4,7 @@ from typing import Any, Dict, Sequence, List, Optional, Tuple
import shlex import shlex
import sys import sys
from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args from SYS.cmdlet_spec import Cmdlet, CmdletArg, parse_cmdlet_args
from cmdlet import REGISTRY as CMDLET_REGISTRY, ensure_cmdlet_modules_loaded from cmdlet import REGISTRY as CMDLET_REGISTRY, ensure_cmdlet_modules_loaded
from SYS.logger import log from SYS.logger import log
from SYS.result_table import Table from SYS.result_table import Table
@@ -16,6 +16,8 @@ def _normalize_choice_list(arg_names: Optional[List[str]]) -> List[str]:
_HELP_EXAMPLE_SOURCE_COMMAND = ".help-example" _HELP_EXAMPLE_SOURCE_COMMAND = ".help-example"
_METADATA_CACHE_KEY: Optional[Tuple[int, int]] = None
_METADATA_CACHE_VALUE: Optional[Tuple[Dict[str, Dict[str, Any]], Dict[str, str]]] = None
def _example_for_cmd(name: str) -> List[str]: def _example_for_cmd(name: str) -> List[str]:
@@ -104,6 +106,13 @@ def _build_alias_map_from_metadata(metadata: Dict[str, Dict[str, Any]]) -> Dict[
def _gather_metadata_from_cmdlet_classes() -> Tuple[Dict[str, Dict[str, Any]], Dict[str, str]]: def _gather_metadata_from_cmdlet_classes() -> Tuple[Dict[str, Dict[str, Any]], Dict[str, str]]:
global _METADATA_CACHE_KEY, _METADATA_CACHE_VALUE
cache_key = (len(sys.modules), len(CMDLET_REGISTRY))
if _METADATA_CACHE_KEY == cache_key and _METADATA_CACHE_VALUE is not None:
cached_metadata, cached_alias = _METADATA_CACHE_VALUE
return dict(cached_metadata), dict(cached_alias)
metadata: Dict[str, Dict[str, Any]] = {} metadata: Dict[str, Dict[str, Any]] = {}
alias_map: Dict[str, str] = {} alias_map: Dict[str, str] = {}
try: try:
@@ -116,7 +125,7 @@ def _gather_metadata_from_cmdlet_classes() -> Tuple[Dict[str, Dict[str, Any]], D
if not (mod_name.startswith("cmdlet.") or mod_name == "cmdlet" or mod_name.startswith("cmdnat.")): if not (mod_name.startswith("cmdlet.") or mod_name == "cmdlet" or mod_name.startswith("cmdnat.")):
continue continue
cmdlet_obj = getattr(module, "CMDLET", None) cmdlet_obj = getattr(module, "CMDLET", None)
if not isinstance(cmdlet_obj, Cmdlet): if cmdlet_obj is None or not hasattr(cmdlet_obj, "name") or not hasattr(cmdlet_obj, "arg"):
continue continue
canonical_key = _normalize_cmdlet_key(getattr(cmdlet_obj, "name", None) or "") canonical_key = _normalize_cmdlet_key(getattr(cmdlet_obj, "name", None) or "")
if not canonical_key: if not canonical_key:
@@ -166,6 +175,9 @@ def _gather_metadata_from_cmdlet_classes() -> Tuple[Dict[str, Dict[str, Any]], D
}, },
) )
_METADATA_CACHE_KEY = cache_key
_METADATA_CACHE_VALUE = (dict(metadata), dict(alias_map))
return metadata, alias_map return metadata, alias_map

View File

@@ -8,7 +8,7 @@ import re
import uuid import uuid
from urllib.parse import parse_qs, urlparse from urllib.parse import parse_qs, urlparse
from cmdlet._shared import Cmdlet, CmdletArg from SYS.cmdlet_spec import Cmdlet, CmdletArg
from SYS.config import load_config, save_config from SYS.config import load_config, save_config
from SYS.logger import log, debug from SYS.logger import log, debug
from SYS.result_table import Table from SYS.result_table import Table

View File

@@ -6,7 +6,7 @@ import sys
from pathlib import Path from pathlib import Path
from typing import Any, Dict, Sequence, Optional from typing import Any, Dict, Sequence, Optional
from cmdlet._shared import Cmdlet, CmdletArg from SYS.cmdlet_spec import Cmdlet, CmdletArg
from SYS.logger import log from SYS.logger import log
from SYS import pipeline as ctx from SYS import pipeline as ctx

View File

@@ -8,7 +8,7 @@ import re
from datetime import datetime from datetime import datetime
from urllib.parse import urlparse, parse_qs from urllib.parse import urlparse, parse_qs
from pathlib import Path from pathlib import Path
from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args from SYS.cmdlet_spec import Cmdlet, CmdletArg, parse_cmdlet_args
from Provider.tidal_manifest import resolve_tidal_manifest_path from Provider.tidal_manifest import resolve_tidal_manifest_path
from SYS.logger import debug, get_thread_stream, is_debug_enabled, set_debug, set_thread_stream from SYS.logger import debug, get_thread_stream, is_debug_enabled, set_debug, set_thread_stream
from SYS.result_table import Table from SYS.result_table import Table

View File

@@ -3,7 +3,7 @@ from __future__ import annotations
import shutil import shutil
from typing import Any, Dict, List from typing import Any, Dict, List
from cmdlet._shared import Cmdlet from SYS.cmdlet_spec import Cmdlet
from SYS import pipeline as ctx from SYS import pipeline as ctx
from SYS.result_table import Table from SYS.result_table import Table
from SYS.logger import set_debug, debug from SYS.logger import set_debug, debug

View File

@@ -1,6 +1,6 @@
from typing import Any, Dict, Sequence from typing import Any, Dict, Sequence
from cmdlet._shared import Cmdlet, CmdletArg from SYS.cmdlet_spec import Cmdlet, CmdletArg
from SYS.logger import log from SYS.logger import log

View File

@@ -4,7 +4,7 @@ import sys
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence from typing import Any, Dict, List, Optional, Sequence
from cmdlet._shared import Cmdlet, CmdletArg from SYS.cmdlet_spec import Cmdlet, CmdletArg
from SYS.logger import log from SYS.logger import log
from SYS.result_table import Table from SYS.result_table import Table
from SYS import pipeline as ctx from SYS import pipeline as ctx

View File

@@ -9,7 +9,7 @@ from datetime import datetime, timezone
from typing import Any, Dict, Sequence, List from typing import Any, Dict, Sequence, List
from cmdlet import register from cmdlet import register
from cmdlet._shared import Cmdlet, CmdletArg from SYS.cmdlet_spec import Cmdlet, CmdletArg
from SYS import pipeline as ctx from SYS import pipeline as ctx
from SYS.logger import log from SYS.logger import log
from SYS.database import db as _db, get_worker_stdout from SYS.database import db as _db, get_worker_stdout