This commit is contained in:
2026-02-11 19:06:38 -08:00
parent 1d0de1118b
commit ba623cb992
20 changed files with 848 additions and 247 deletions

View File

@@ -34,6 +34,7 @@ logger = logging.getLogger(__name__)
from API.ssl_certs import resolve_verify_value as _resolve_verify_value
from API.ssl_certs import get_requests_verify_value
from API.httpx_shared import get_shared_httpx_client
# Default configuration
DEFAULT_TIMEOUT = 30.0
@@ -504,17 +505,18 @@ class HTTPClient:
if ("certificate verify failed" in msg or "unable to get local issuer certificate" in msg):
logger.info("Certificate verification failed; attempting to retry with a system-aware CA bundle")
try:
import httpx as _httpx
# Use the client's precomputed verify argument (set at init)
verify_override = self._httpx_verify
with _httpx.Client(timeout=self.timeout, verify=verify_override, headers=self._get_headers()) as temp_client:
try:
response = temp_client.request(method, url, **kwargs)
if raise_for_status:
response.raise_for_status()
return response
except Exception as e2:
last_exception = e2
temp_client = get_shared_httpx_client(
timeout=self.timeout,
verify_ssl=self._httpx_verify,
headers=self._get_headers(),
)
try:
response = temp_client.request(method, url, **kwargs)
if raise_for_status:
response.raise_for_status()
return response
except Exception as e2:
last_exception = e2
except Exception:
# certifi/pip-system-certs/httpx not available; fall back to existing retry behavior
pass
@@ -530,17 +532,18 @@ class HTTPClient:
if ("certificate verify failed" in msg or "unable to get local issuer certificate" in msg):
logger.info("Certificate verification failed; attempting to retry with a system-aware CA bundle")
try:
import httpx as _httpx
# Use the client's precomputed verify argument (set at init)
verify_override = self._httpx_verify
with _httpx.Client(timeout=self.timeout, verify=verify_override, headers=self._get_headers()) as temp_client:
try:
response = temp_client.request(method, url, **kwargs)
if raise_for_status:
response.raise_for_status()
return response
except Exception as e2:
last_exception = e2
temp_client = get_shared_httpx_client(
timeout=self.timeout,
verify_ssl=self._httpx_verify,
headers=self._get_headers(),
)
try:
response = temp_client.request(method, url, **kwargs)
if raise_for_status:
response.raise_for_status()
return response
except Exception as e2:
last_exception = e2
except Exception:
# certifi/pip-system-certs/httpx not available; fall back to existing retry behavior
pass

View File

@@ -1,14 +1,16 @@
"""Shared `httpx.Client` helper.
Creating short-lived httpx clients disables connection pooling and costs extra CPU.
This module provides a small singleton client for callers that just need basic
This module provides a small keyed client cache for callers that just need basic
GETs without the full HTTPClient wrapper.
"""
from __future__ import annotations
import atexit
from collections import OrderedDict
import threading
from typing import Dict, Optional
from typing import Any, Dict, Optional, Tuple
import httpx
@@ -20,39 +22,85 @@ _DEFAULT_USER_AGENT = (
)
_lock = threading.Lock()
_shared_client: Optional[httpx.Client] = None
_MAX_SHARED_CLIENTS = 8
_shared_clients: "OrderedDict[Tuple[float, Tuple[str, str], Tuple[Tuple[str, str], ...]], httpx.Client]" = OrderedDict()
def _normalize_headers(headers: Optional[Dict[str, str]]) -> Dict[str, str]:
normalized: Dict[str, str] = {"User-Agent": _DEFAULT_USER_AGENT}
if headers:
normalized.update({str(k): str(v) for k, v in headers.items()})
return normalized
def _verify_key(verify_value: Any) -> Tuple[str, str]:
if isinstance(verify_value, bool):
return ("bool", "1" if verify_value else "0")
if isinstance(verify_value, str):
return ("str", verify_value)
return ("obj", str(id(verify_value)))
def _client_key(
*,
timeout: float,
verify_value: Any,
merged_headers: Dict[str, str],
) -> Tuple[float, Tuple[str, str], Tuple[Tuple[str, str], ...]]:
header_items = tuple(
sorted((str(k).lower(), str(v)) for k, v in merged_headers.items())
)
return (float(timeout), _verify_key(verify_value), header_items)
def get_shared_httpx_client(
*,
timeout: float = 30.0,
verify_ssl: bool = True,
verify_ssl: bool | str = True,
headers: Optional[Dict[str, str]] = None,
) -> httpx.Client:
"""Return a process-wide shared synchronous httpx.Client."""
"""Return a shared synchronous httpx.Client for a specific config key."""
global _shared_client
if _shared_client is None:
with _lock:
if _shared_client is None:
base_headers = {"User-Agent": _DEFAULT_USER_AGENT}
if headers:
base_headers.update({str(k): str(v) for k, v in headers.items()})
_shared_client = httpx.Client(
timeout=timeout,
verify=resolve_verify_value(verify_ssl),
headers=base_headers,
)
verify_value = resolve_verify_value(verify_ssl)
merged_headers = _normalize_headers(headers)
key = _client_key(
timeout=timeout,
verify_value=verify_value,
merged_headers=merged_headers,
)
return _shared_client
with _lock:
existing = _shared_clients.get(key)
if existing is not None:
_shared_clients.move_to_end(key)
return existing
client = httpx.Client(
timeout=timeout,
verify=verify_value,
headers=merged_headers,
)
_shared_clients[key] = client
while len(_shared_clients) > _MAX_SHARED_CLIENTS:
_, old_client = _shared_clients.popitem(last=False)
try:
old_client.close()
except Exception:
pass
return client
def close_shared_httpx_client() -> None:
global _shared_client
client = _shared_client
_shared_client = None
if client is not None:
with _lock:
clients = list(_shared_clients.values())
_shared_clients.clear()
for client in clients:
try:
client.close()
except Exception:
pass
atexit.register(close_shared_httpx_client)

View File

@@ -9,8 +9,11 @@ This module intentionally avoids importing the heavy httpx-based stack.
from __future__ import annotations
import atexit
from collections import OrderedDict
import threading
from typing import Any, Dict, Optional
from typing import Any, Dict, Optional, Tuple
from weakref import WeakSet
import requests
from requests.adapters import HTTPAdapter
@@ -23,6 +26,40 @@ _DEFAULT_USER_AGENT = (
)
_local = threading.local()
_MAX_SESSIONS_PER_THREAD = 4
_session_registry_lock = threading.Lock()
_all_sessions: "WeakSet[requests.Session]" = WeakSet()
def _session_key(
*,
user_agent: str,
verify_ssl: bool,
pool_connections: int,
pool_maxsize: int,
) -> Tuple[str, Any, int, int]:
return (
str(user_agent or _DEFAULT_USER_AGENT),
resolve_verify_value(verify_ssl),
int(pool_connections),
int(pool_maxsize),
)
def _get_thread_session_cache() -> "OrderedDict[Tuple[str, Any, int, int], requests.Session]":
cache = getattr(_local, "session_cache", None)
if cache is None:
cache = OrderedDict()
_local.session_cache = cache
return cache
def _register_session(session: requests.Session) -> None:
try:
with _session_registry_lock:
_all_sessions.add(session)
except Exception:
pass
def get_requests_session(
@@ -32,24 +69,44 @@ def get_requests_session(
pool_connections: int = 16,
pool_maxsize: int = 16,
) -> requests.Session:
"""Return a thread-local shared Session configured for pooling."""
"""Return a thread-local pooled Session keyed by config values."""
session: Optional[requests.Session] = getattr(_local, "session", None)
if session is not None:
return session
key = _session_key(
user_agent=user_agent,
verify_ssl=verify_ssl,
pool_connections=pool_connections,
pool_maxsize=pool_maxsize,
)
cache = _get_thread_session_cache()
existing = cache.get(key)
if existing is not None:
cache.move_to_end(key)
return existing
session = requests.Session()
session.headers.update({"User-Agent": str(user_agent or _DEFAULT_USER_AGENT)})
session.headers.update({"User-Agent": key[0]})
# Expand connection pool; keep max_retries=0 to avoid semantic changes.
adapter = HTTPAdapter(pool_connections=pool_connections, pool_maxsize=pool_maxsize, max_retries=0)
adapter = HTTPAdapter(
pool_connections=pool_connections,
pool_maxsize=pool_maxsize,
max_retries=0,
)
session.mount("http://", adapter)
session.mount("https://", adapter)
# Configure verify once.
session.verify = resolve_verify_value(verify_ssl)
session.verify = key[1]
_register_session(session)
_local.session = session
cache[key] = session
while len(cache) > _MAX_SESSIONS_PER_THREAD:
_, old_session = cache.popitem(last=False)
try:
old_session.close()
except Exception:
pass
return session
@@ -66,3 +123,36 @@ def request(
sess = get_requests_session()
return sess.request(method, url, params=params, headers=headers, timeout=timeout, **kwargs)
def close_requests_sessions() -> None:
"""Close cached requests sessions for the current thread and global registry."""
cache = getattr(_local, "session_cache", None)
if cache:
try:
sessions = list(cache.values())
cache.clear()
except Exception:
sessions = []
for session in sessions:
try:
session.close()
except Exception:
pass
try:
with _session_registry_lock:
all_sessions = list(_all_sessions)
_all_sessions.clear()
except Exception:
all_sessions = []
for session in all_sessions:
try:
session.close()
except Exception:
pass
atexit.register(close_requests_sessions)

View File

@@ -1222,7 +1222,7 @@ class HIFI(Provider):
from API.httpx_shared import get_shared_httpx_client
timeout_val = float(getattr(self, "api_timeout", 10.0))
client = get_shared_httpx_client(timeout=timeout_val)
client = get_shared_httpx_client()
resp = client.get(resolved_text, timeout=timeout_val)
resp.raise_for_status()
content = resp.content

View File

@@ -1404,7 +1404,7 @@ class Tidal(Provider):
from API.httpx_shared import get_shared_httpx_client
timeout_val = float(getattr(self, "api_timeout", 10.0))
client = get_shared_httpx_client(timeout=timeout_val)
client = get_shared_httpx_client()
resp = client.get(resolved_text, timeout=timeout_val)
resp.raise_for_status()
content = resp.content

View File

@@ -74,6 +74,54 @@ class MetadataProvider(ABC):
tags.append(f"source:{self.name}")
return tags
def search_tags(self, query: str, limit: int = 1) -> List[str]:
"""Return tags for the best match from `search(query)`.
Providers can override this when tags should be extracted differently from
the default search->first-item->to_tags flow.
"""
try:
items = self.search(query, limit=max(1, int(limit)))
except Exception:
return []
if not items:
return []
try:
return [str(t) for t in self.to_tags(items[0]) if t is not None]
except Exception:
return []
def identifier_query(self, identifiers: Dict[str, Any]) -> Optional[str]:
"""Return provider-specific identifier query text from parsed identifiers."""
_ = identifiers
return None
def combined_query(
self,
*,
title_hint: Optional[str],
artist_hint: Optional[str],
) -> Optional[str]:
"""Return provider-specific title+artist query text."""
_ = title_hint
_ = artist_hint
return None
def extract_url_query(self, result: Any, get_field: Any) -> Optional[str]:
"""Return provider-specific URL query derived from a piped result."""
_ = result
_ = get_field
return None
def emits_direct_tags(self) -> bool:
"""True when provider should skip selection table and emit tags directly."""
return False
class ITunesProvider(MetadataProvider):
"""Metadata provider using the iTunes Search API."""
@@ -112,6 +160,21 @@ class ITunesProvider(MetadataProvider):
debug(f"iTunes returned {len(items)} items for '{query}'")
return items
def identifier_query(self, identifiers: Dict[str, Any]) -> Optional[str]:
return identifiers.get("musicbrainz") or identifiers.get("musicbrainzalbum")
def combined_query(
self,
*,
title_hint: Optional[str],
artist_hint: Optional[str],
) -> Optional[str]:
title_text = str(title_hint or "").strip()
artist_text = str(artist_hint or "").strip()
if not title_text or not artist_text:
return None
return f"{title_text} {artist_text}"
class OpenLibraryMetadataProvider(MetadataProvider):
"""Metadata provider for OpenLibrary book metadata."""
@@ -220,6 +283,14 @@ class OpenLibraryMetadataProvider(MetadataProvider):
tags.append(f"source:{self.name}")
return tags
def identifier_query(self, identifiers: Dict[str, Any]) -> Optional[str]:
return (
identifiers.get("isbn_13")
or identifiers.get("isbn_10")
or identifiers.get("isbn")
or identifiers.get("openlibrary")
)
class GoogleBooksMetadataProvider(MetadataProvider):
"""Metadata provider for Google Books volumes API."""
@@ -329,6 +400,14 @@ class GoogleBooksMetadataProvider(MetadataProvider):
tags.append(f"source:{self.name}")
return tags
def identifier_query(self, identifiers: Dict[str, Any]) -> Optional[str]:
return (
identifiers.get("isbn_13")
or identifiers.get("isbn_10")
or identifiers.get("isbn")
or identifiers.get("openlibrary")
)
class ISBNsearchMetadataProvider(MetadataProvider):
"""Metadata provider that scrapes isbnsearch.org by ISBN.
@@ -624,6 +703,18 @@ class MusicBrainzMetadataProvider(MetadataProvider):
tags.append(f"musicbrainz:{mbid}")
return tags
def combined_query(
self,
*,
title_hint: Optional[str],
artist_hint: Optional[str],
) -> Optional[str]:
title_text = str(title_hint or "").strip()
artist_text = str(artist_hint or "").strip()
if not title_text or not artist_text:
return None
return f'recording:"{title_text}" AND artist:"{artist_text}"'
class ImdbMetadataProvider(MetadataProvider):
"""Metadata provider for IMDb titles (movies/series/episodes)."""
@@ -757,6 +848,9 @@ class ImdbMetadataProvider(MetadataProvider):
deduped.append(s)
return deduped
def identifier_query(self, identifiers: Dict[str, Any]) -> Optional[str]:
return identifiers.get("imdb")
class YtdlpMetadataProvider(MetadataProvider):
"""Metadata provider that extracts tags from a supported URL using yt-dlp.
@@ -904,6 +998,23 @@ class YtdlpMetadataProvider(MetadataProvider):
out.append(s)
return out
def extract_url_query(self, result: Any, get_field: Any) -> Optional[str]:
raw_url = (
get_field(result, "url", None)
or get_field(result, "source_url", None)
or get_field(result, "target", None)
)
if isinstance(raw_url, list) and raw_url:
raw_url = raw_url[0]
if isinstance(raw_url, str):
text = raw_url.strip()
if text.startswith(("http://", "https://")):
return text
return None
def emits_direct_tags(self) -> bool:
return True
def _coerce_archive_field_list(value: Any) -> List[str]:
"""Coerce an Archive.org metadata field to a list of strings."""

449
SYS/cmdlet_spec.py Normal file
View File

@@ -0,0 +1,449 @@
from __future__ import annotations
import sys
import tempfile
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Sequence, Set
from SYS.logger import log
@dataclass
class CmdletArg:
"""Represents a single cmdlet argument with optional enum choices."""
name: str
type: str = "string"
required: bool = False
description: str = ""
choices: List[str] = field(default_factory=list)
alias: str = ""
handler: Optional[Any] = None
variadic: bool = False
usage: str = ""
requires_db: bool = False
query_key: Optional[str] = None
query_aliases: List[str] = field(default_factory=list)
query_only: bool = False
def resolve(self, value: Any) -> Any:
if self.handler is not None and callable(self.handler):
return self.handler(value)
return value
def to_flags(self) -> tuple[str, ...]:
flags = [f"--{self.name}", f"-{self.name}"]
if self.alias:
flags.append(f"-{self.alias}")
if self.type == "flag":
flags.append(f"--no-{self.name}")
flags.append(f"-no{self.name}")
if self.alias:
flags.append(f"-n{self.alias}")
return tuple(flags)
def QueryArg(
name: str,
*,
key: Optional[str] = None,
aliases: Optional[Sequence[str]] = None,
type: str = "string",
required: bool = False,
description: str = "",
choices: Optional[Sequence[str]] = None,
handler: Optional[Any] = None,
query_only: bool = True,
) -> CmdletArg:
"""Create an argument that can be populated from `-query` fields."""
return CmdletArg(
name=str(name),
type=str(type or "string"),
required=bool(required),
description=str(description or ""),
choices=list(choices or []),
handler=handler,
query_key=str(key or name).strip().lower() if str(key or name).strip() else None,
query_aliases=[str(a).strip().lower() for a in (aliases or []) if str(a).strip()],
query_only=bool(query_only),
)
class SharedArgs:
"""Registry of shared CmdletArg definitions used across multiple cmdlet."""
STORE = CmdletArg(
name="store",
type="enum",
choices=[],
description="Selects store",
query_key="store",
)
URL = CmdletArg(
name="url",
type="string",
description="http parser",
)
PROVIDER = CmdletArg(
name="provider",
type="string",
description="selects provider",
)
@staticmethod
def get_store_choices(config: Optional[Dict[str, Any]] = None, force: bool = False) -> List[str]:
if not force and hasattr(SharedArgs, "_cached_available_stores"):
return SharedArgs._cached_available_stores or []
if not force:
SharedArgs._refresh_store_choices_cache(config, skip_instantiation=True)
else:
SharedArgs._refresh_store_choices_cache(config, skip_instantiation=False)
return SharedArgs._cached_available_stores or []
@staticmethod
def _refresh_store_choices_cache(config: Optional[Dict[str, Any]] = None, skip_instantiation: bool = False) -> None:
try:
if config is None:
try:
from SYS.config import load_config
config = load_config()
except Exception:
SharedArgs._cached_available_stores = []
return
try:
from Store.registry import list_configured_backend_names
SharedArgs._cached_available_stores = list_configured_backend_names(config) or []
except Exception:
SharedArgs._cached_available_stores = []
if skip_instantiation:
return
try:
from Store.registry import Store as StoreRegistry
registry = StoreRegistry(config=config, suppress_debug=True)
available = registry.list_backends()
if available:
SharedArgs._cached_available_stores = available
except Exception:
pass
except Exception:
SharedArgs._cached_available_stores = []
LOCATION = CmdletArg(
"location",
type="enum",
choices=["hydrus", "0x0"],
required=True,
description="Destination location",
)
DELETE = CmdletArg(
"delete",
type="flag",
description="Delete the file after successful operation.",
)
ARTIST = CmdletArg(
"artist",
type="string",
description="Filter by artist name (case-insensitive, partial match).",
)
ALBUM = CmdletArg(
"album",
type="string",
description="Filter by album name (case-insensitive, partial match).",
)
TRACK = CmdletArg(
"track",
type="string",
description="Filter by track title (case-insensitive, partial match).",
)
LIBRARY = CmdletArg(
"library",
type="string",
choices=["hydrus", "local", "soulseek", "libgen", "ftp"],
description="Search library or source location.",
)
TIMEOUT = CmdletArg(
"timeout",
type="integer",
description="Search or operation timeout in seconds.",
)
LIMIT = CmdletArg(
"limit",
type="integer",
description="Maximum number of results to return.",
)
PATH = CmdletArg("path", type="string", description="File or directory path.")
QUERY = CmdletArg(
"query",
type="string",
description="Unified query string (e.g., hash:<sha256>, hash:{<h1>,<h2>}).",
)
REASON = CmdletArg(
"reason",
type="string",
description="Reason or explanation for the operation.",
)
ARCHIVE = CmdletArg(
"archive",
type="flag",
description="Archive the URL to Wayback Machine, Archive.today, and Archive.ph (requires URL argument in cmdlet).",
alias="arch",
)
@staticmethod
def resolve_storage(
storage_value: Optional[str],
default: Optional[Path] = None,
) -> Path:
_ = storage_value
if default is not None:
return default
return Path(tempfile.gettempdir())
@classmethod
def get(cls, name: str) -> Optional[CmdletArg]:
try:
return getattr(cls, name.upper())
except AttributeError:
return None
@dataclass
class Cmdlet:
"""Represents a cmdlet with metadata and arguments."""
name: str
summary: str
usage: str
alias: List[str] = field(default_factory=list)
arg: List[CmdletArg] = field(default_factory=list)
detail: List[str] = field(default_factory=list)
examples: List[str] = field(default_factory=list)
exec: Optional[Callable[[Any, Sequence[str], Dict[str, Any]], int]] = field(default=None)
def _collect_names(self) -> List[str]:
names: List[str] = []
if self.name:
names.append(self.name)
for alias in self.alias or []:
if alias:
names.append(alias)
for alias in getattr(self, "aliases", None) or []:
if alias:
names.append(alias)
seen: Set[str] = set()
deduped: List[str] = []
for name in names:
key = name.replace("_", "-").lower()
if key in seen:
continue
seen.add(key)
deduped.append(name)
return deduped
def register(self) -> "Cmdlet":
if not callable(self.exec):
return self
try:
from cmdlet import register_callable as _register_callable
except Exception:
return self
names = self._collect_names()
if not names:
return self
_register_callable(names, self.exec)
return self
def get_flags(self, arg_name: str) -> set[str]:
return {f"-{arg_name}", f"--{arg_name}"}
def build_flag_registry(self) -> Dict[str, set[str]]:
return {arg.name: self.get_flags(arg.name) for arg in self.arg}
def parse_cmdlet_args(
args: Sequence[str],
cmdlet_spec: Dict[str, Any] | Cmdlet,
) -> Dict[str, Any]:
"""Parse command-line arguments based on cmdlet specification."""
result: Dict[str, Any] = {}
arg_specs_raw = getattr(cmdlet_spec, "arg", None)
if arg_specs_raw is None or not isinstance(arg_specs_raw, (list, tuple)):
raise TypeError(
f"Expected cmdlet-like object with an 'arg' list, got {type(cmdlet_spec).__name__}"
)
arg_specs: List[Any] = list(arg_specs_raw)
positional_args: List[CmdletArg] = []
query_mapped_args: List[CmdletArg] = []
arg_spec_map: Dict[str, str] = {}
arg_spec_by_canonical: Dict[str, Any] = {}
for spec in arg_specs:
name = getattr(spec, "name", None)
if not name:
continue
try:
if getattr(spec, "query_key", None):
query_mapped_args.append(spec)
except Exception:
pass
name_str = str(name)
canonical_name = name_str.lstrip("-")
canonical_key = canonical_name.lower()
try:
if bool(getattr(spec, "query_only", False)):
continue
except Exception:
pass
arg_spec_by_canonical[canonical_key] = spec
if "-" not in name_str:
positional_args.append(spec)
arg_spec_map[canonical_key] = canonical_name
arg_spec_map[f"-{canonical_name}".lower()] = canonical_name
arg_spec_map[f"--{canonical_name}".lower()] = canonical_name
i = 0
positional_index = 0
while i < len(args):
token = str(args[i])
token_lower = token.lower()
if token_lower in {"-hash", "--hash"} and token_lower not in arg_spec_map:
try:
log(
'Legacy flag -hash is no longer supported. Use: -query "hash:<sha256>"',
file=sys.stderr,
)
except Exception:
pass
i += 1
continue
if token_lower in arg_spec_map:
canonical_name = arg_spec_map[token_lower]
spec = arg_spec_by_canonical.get(canonical_name.lower())
is_flag = bool(spec and str(getattr(spec, "type", "")).lower() == "flag")
if is_flag:
result[canonical_name] = True
i += 1
else:
if i + 1 < len(args) and not str(args[i + 1]).startswith("-"):
value = args[i + 1]
is_variadic = bool(spec and getattr(spec, "variadic", False))
if is_variadic:
if canonical_name not in result:
result[canonical_name] = []
elif not isinstance(result[canonical_name], list):
result[canonical_name] = [result[canonical_name]]
result[canonical_name].append(value)
else:
result[canonical_name] = value
i += 2
else:
i += 1
elif positional_index < len(positional_args):
positional_spec = positional_args[positional_index]
canonical_name = str(getattr(positional_spec, "name", "")).lstrip("-")
is_variadic = bool(getattr(positional_spec, "variadic", False))
if is_variadic:
if canonical_name not in result:
result[canonical_name] = []
elif not isinstance(result[canonical_name], list):
result[canonical_name] = [result[canonical_name]]
result[canonical_name].append(token)
i += 1
else:
result[canonical_name] = token
positional_index += 1
i += 1
else:
i += 1
try:
raw_query = result.get("query")
except Exception:
raw_query = None
if query_mapped_args and raw_query is not None:
try:
from SYS.cli_syntax import parse_query as _parse_query
parsed_query = _parse_query(str(raw_query))
fields = parsed_query.get("fields", {}) if isinstance(parsed_query, dict) else {}
norm_fields = (
{str(k).strip().lower(): v for k, v in fields.items()}
if isinstance(fields, dict)
else {}
)
except Exception:
norm_fields = {}
for spec in query_mapped_args:
canonical_name = str(getattr(spec, "name", "") or "").lstrip("-")
if not canonical_name:
continue
if canonical_name in result and result.get(canonical_name) not in (None, ""):
continue
try:
key = str(getattr(spec, "query_key", "") or "").strip().lower()
aliases = getattr(spec, "query_aliases", None)
alias_list = [str(a).strip().lower() for a in (aliases or []) if str(a).strip()]
except Exception:
key = ""
alias_list = []
candidates = [k for k in [key, canonical_name] + alias_list if k]
val = None
for k in candidates:
if k in norm_fields:
val = norm_fields.get(k)
break
if val is None:
continue
try:
result[canonical_name] = spec.resolve(val)
except Exception:
result[canonical_name] = val
return result

View File

@@ -10,6 +10,7 @@ from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple
from urllib.parse import quote
import httpx
from API.httpx_shared import get_shared_httpx_client
from SYS.logger import debug, log
from SYS.utils_constant import mime_maps
@@ -198,29 +199,28 @@ class HydrusNetwork(Store):
api_version_url = f"{self.URL}/api_version"
verify_key_url = f"{self.URL}/verify_access_key"
try:
with httpx.Client(timeout=5.0,
verify=False,
follow_redirects=True) as client:
version_resp = client.get(api_version_url)
version_resp.raise_for_status()
version_payload = version_resp.json()
if not isinstance(version_payload, dict):
raise RuntimeError(
"Hydrus /api_version returned an unexpected response"
)
verify_resp = client.get(
verify_key_url,
headers={
"Hydrus-Client-API-Access-Key": self.API
},
client = get_shared_httpx_client(timeout=5.0, verify_ssl=False)
version_resp = client.get(api_version_url, follow_redirects=True)
version_resp.raise_for_status()
version_payload = version_resp.json()
if not isinstance(version_payload, dict):
raise RuntimeError(
"Hydrus /api_version returned an unexpected response"
)
verify_resp = client.get(
verify_key_url,
headers={
"Hydrus-Client-API-Access-Key": self.API
},
follow_redirects=True,
)
verify_resp.raise_for_status()
verify_payload = verify_resp.json()
if not isinstance(verify_payload, dict):
raise RuntimeError(
"Hydrus /verify_access_key returned an unexpected response"
)
verify_resp.raise_for_status()
verify_payload = verify_resp.json()
if not isinstance(verify_payload, dict):
raise RuntimeError(
"Hydrus /verify_access_key returned an unexpected response"
)
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (True, None)
except Exception as exc:
@@ -294,12 +294,10 @@ class HydrusNetwork(Store):
"Hydrus-Client-API-Access-Key": self.API,
"Accept": "application/json",
}
with httpx.Client(timeout=5.0,
verify=False,
follow_redirects=True) as client:
resp = client.get(url, params=params, headers=headers)
resp.raise_for_status()
payload = resp.json()
client = get_shared_httpx_client(timeout=5.0, verify_ssl=False)
resp = client.get(url, params=params, headers=headers, follow_redirects=True)
resp.raise_for_status()
payload = resp.json()
count_val = None
if isinstance(payload, dict):
@@ -1587,13 +1585,13 @@ class HydrusNetwork(Store):
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={quote(h)}"
dest_path = base_tmp / fname
with httpx.stream(
stream_client = get_shared_httpx_client(timeout=60.0, verify_ssl=False)
with stream_client.stream(
"GET",
file_url,
headers={"Hydrus-Client-API-Access-Key": self.API},
follow_redirects=True,
timeout=60.0,
verify=False,
) as resp:
resp.raise_for_status()
with dest_path.open("wb") as fh:

View File

@@ -585,6 +585,15 @@ def parse_cmdlet_args(args: Sequence[str],
result = parse_cmdlet_args(["value1", "-count", "5"], cmdlet)
# result = {"path": "value1", "count": "5"}
"""
try:
from SYS.cmdlet_spec import parse_cmdlet_args as _parse_cmdlet_args_fast
return _parse_cmdlet_args_fast(args, cmdlet_spec)
except Exception:
# Fall back to local implementation below to preserve behavior if the
# lightweight parser is unavailable.
pass
result: Dict[str,
Any] = {}

View File

@@ -27,6 +27,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
from SYS import pipeline as ctx
from SYS.pipeline_progress import PipelineProgress
from . import _shared as sh
from SYS.field_access import get_field
normalize_hash = sh.normalize_hash
looks_like_hash = sh.looks_like_hash
@@ -34,7 +35,6 @@ Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args
get_field = sh.get_field
try:
from SYS.metadata import extract_title
@@ -60,84 +60,6 @@ def _dedup_tags_preserve_order(tags: List[str]) -> List[str]:
return out
def _extract_subtitle_tags(info: Dict[str, Any]) -> List[str]:
"""Extract subtitle availability tags from a yt-dlp info dict.
Produces multi-valued tags so languages can coexist:
- subs:<lang>
- subs_auto:<lang>
"""
def _langs(value: Any) -> List[str]:
if not isinstance(value, dict):
return []
langs: List[str] = []
for k in value.keys():
if not isinstance(k, str):
continue
lang = k.strip().lower()
if lang:
langs.append(lang)
return sorted(set(langs))
out: List[str] = []
for lang in _langs(info.get("subtitles")):
out.append(f"subs:{lang}")
for lang in _langs(info.get("automatic_captions")):
out.append(f"subs_auto:{lang}")
return out
def _scrape_ytdlp_info(url: str) -> Optional[Dict[str, Any]]:
"""Fetch a yt-dlp info dict without downloading media."""
if not isinstance(url, str) or not url.strip():
return None
url = url.strip()
# Prefer the Python module when available (faster, avoids shell quoting issues).
try:
import yt_dlp # type: ignore
opts: Any = {
"quiet": True,
"no_warnings": True,
"skip_download": True,
"noprogress": True,
"socket_timeout": 15,
"retries": 1,
"playlist_items": "1-10",
}
with yt_dlp.YoutubeDL(opts) as ydl:
info = ydl.extract_info(url, download=False)
return info if isinstance(info, dict) else None
except Exception:
pass
# Fallback to yt-dlp CLI if the module isn't available.
try:
import json as json_module
cmd = [
"yt-dlp",
"-J",
"--no-warnings",
"--skip-download",
"--playlist-items",
"1-10",
url,
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
if result.returncode != 0:
return None
payload = (result.stdout or "").strip()
if not payload:
return None
data = json_module.loads(payload)
return data if isinstance(data, dict) else None
except Exception:
return None
def _resolve_candidate_urls_for_item(
result: Any,
backend: Any,
@@ -1224,45 +1146,19 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
)
return 1
info = _scrape_ytdlp_info(scrape_target)
if not info:
log(
"yt-dlp could not extract metadata for this URL (unsupported or failed)",
file=sys.stderr,
)
ytdlp_provider = get_metadata_provider("ytdlp", config)
if ytdlp_provider is None:
log("yt-dlp metadata provider is unavailable", file=sys.stderr)
return 1
try:
from SYS.metadata import extract_ytdlp_tags
tags = [
str(t)
for t in ytdlp_provider.search_tags(scrape_target, limit=1)
if t is not None
]
except Exception:
extract_ytdlp_tags = None # type: ignore[assignment]
# Prefer the top-level metadata, but if this is a playlist container, use
# the first entry for per-item fields like subtitles.
info_for_subs = info
entries = info.get("entries") if isinstance(info, dict) else None
if isinstance(entries, list) and entries:
first = entries[0]
if isinstance(first, dict):
info_for_subs = first
tags: List[str] = []
if extract_ytdlp_tags:
try:
tags.extend(extract_ytdlp_tags(info))
except Exception:
pass
# Subtitle availability tags
try:
tags.extend(
_extract_subtitle_tags(
info_for_subs if isinstance(info_for_subs,
dict) else {}
)
)
except Exception:
pass
tags = []
# Ensure we actually have something to apply.
tags = _dedup_tags_preserve_order(tags)
@@ -1399,19 +1295,10 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
identifiers = _extract_scrapable_identifiers(identifier_tags)
identifier_query: Optional[str] = None
if identifiers:
if provider.name in {"openlibrary",
"googlebooks",
"google"}:
identifier_query = (
identifiers.get("isbn_13") or identifiers.get("isbn_10")
or identifiers.get("isbn") or identifiers.get("openlibrary")
)
elif provider.name == "imdb":
identifier_query = identifiers.get("imdb")
elif provider.name == "itunes":
identifier_query = identifiers.get("musicbrainz") or identifiers.get(
"musicbrainzalbum"
)
try:
identifier_query = provider.identifier_query(identifiers)
except Exception:
identifier_query = None
# Determine query from identifier first, else title on the result or filename
title_hint = (
@@ -1444,32 +1331,21 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
artist_hint = str(meta_artist)
combined_query: Optional[str] = None
if (not identifier_query and title_hint and artist_hint
and provider.name in {"itunes",
"musicbrainz"}):
if provider.name == "musicbrainz":
combined_query = f'recording:"{title_hint}" AND artist:"{artist_hint}"'
else:
combined_query = f"{title_hint} {artist_hint}"
if not identifier_query and title_hint and artist_hint:
try:
combined_query = provider.combined_query(
title_hint=str(title_hint),
artist_hint=str(artist_hint),
)
except Exception:
combined_query = None
# yt-dlp isn't a search provider; it requires a URL.
url_hint: Optional[str] = None
if provider.name == "ytdlp":
raw_url = (
get_field(result,
"url",
None) or get_field(result,
"source_url",
None) or get_field(result,
"target",
None)
)
if isinstance(raw_url, list) and raw_url:
raw_url = raw_url[0]
if isinstance(raw_url,
str) and raw_url.strip().startswith(("http://",
"https://")):
url_hint = raw_url.strip()
try:
url_hint = provider.extract_url_query(result, get_field)
except Exception:
url_hint = None
query_hint = url_hint or identifier_query or combined_query or title_hint
if not query_hint:
@@ -1492,7 +1368,12 @@ def _run_impl(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 1
# For yt-dlp, emit tags directly (there is no meaningful multi-result selection step).
if provider.name == "ytdlp":
emit_direct = False
try:
emit_direct = bool(provider.emits_direct_tags())
except Exception:
emit_direct = False
if emit_direct:
try:
tags = [str(t) for t in provider.to_tags(items[0]) if t is not None]
except Exception:

View File

@@ -2,7 +2,7 @@ import json
import os
import sys
from typing import List, Dict, Any, Sequence
from cmdlet._shared import Cmdlet, CmdletArg
from SYS.cmdlet_spec import Cmdlet, CmdletArg
from SYS.logger import log
from SYS.result_table import Table
from SYS import pipeline as ctx

View File

@@ -1,6 +1,6 @@
from typing import List, Dict, Any, Optional, Sequence
from cmdlet._shared import Cmdlet, CmdletArg
from SYS.cmdlet_spec import Cmdlet, CmdletArg
from SYS.config import load_config, save_config, save_config_and_verify
from SYS import pipeline as ctx
from SYS.result_table import Table

View File

@@ -4,7 +4,7 @@ from typing import Any, Dict, Sequence, List, Optional, Tuple
import shlex
import sys
from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from SYS.cmdlet_spec import Cmdlet, CmdletArg, parse_cmdlet_args
from cmdlet import REGISTRY as CMDLET_REGISTRY, ensure_cmdlet_modules_loaded
from SYS.logger import log
from SYS.result_table import Table
@@ -16,6 +16,8 @@ def _normalize_choice_list(arg_names: Optional[List[str]]) -> List[str]:
_HELP_EXAMPLE_SOURCE_COMMAND = ".help-example"
_METADATA_CACHE_KEY: Optional[Tuple[int, int]] = None
_METADATA_CACHE_VALUE: Optional[Tuple[Dict[str, Dict[str, Any]], Dict[str, str]]] = None
def _example_for_cmd(name: str) -> List[str]:
@@ -104,6 +106,13 @@ def _build_alias_map_from_metadata(metadata: Dict[str, Dict[str, Any]]) -> Dict[
def _gather_metadata_from_cmdlet_classes() -> Tuple[Dict[str, Dict[str, Any]], Dict[str, str]]:
global _METADATA_CACHE_KEY, _METADATA_CACHE_VALUE
cache_key = (len(sys.modules), len(CMDLET_REGISTRY))
if _METADATA_CACHE_KEY == cache_key and _METADATA_CACHE_VALUE is not None:
cached_metadata, cached_alias = _METADATA_CACHE_VALUE
return dict(cached_metadata), dict(cached_alias)
metadata: Dict[str, Dict[str, Any]] = {}
alias_map: Dict[str, str] = {}
try:
@@ -116,7 +125,7 @@ def _gather_metadata_from_cmdlet_classes() -> Tuple[Dict[str, Dict[str, Any]], D
if not (mod_name.startswith("cmdlet.") or mod_name == "cmdlet" or mod_name.startswith("cmdnat.")):
continue
cmdlet_obj = getattr(module, "CMDLET", None)
if not isinstance(cmdlet_obj, Cmdlet):
if cmdlet_obj is None or not hasattr(cmdlet_obj, "name") or not hasattr(cmdlet_obj, "arg"):
continue
canonical_key = _normalize_cmdlet_key(getattr(cmdlet_obj, "name", None) or "")
if not canonical_key:
@@ -166,6 +175,9 @@ def _gather_metadata_from_cmdlet_classes() -> Tuple[Dict[str, Dict[str, Any]], D
},
)
_METADATA_CACHE_KEY = cache_key
_METADATA_CACHE_VALUE = (dict(metadata), dict(alias_map))
return metadata, alias_map

View File

@@ -8,7 +8,7 @@ import re
import uuid
from urllib.parse import parse_qs, urlparse
from cmdlet._shared import Cmdlet, CmdletArg
from SYS.cmdlet_spec import Cmdlet, CmdletArg
from SYS.config import load_config, save_config
from SYS.logger import log, debug
from SYS.result_table import Table

View File

@@ -6,7 +6,7 @@ import sys
from pathlib import Path
from typing import Any, Dict, Sequence, Optional
from cmdlet._shared import Cmdlet, CmdletArg
from SYS.cmdlet_spec import Cmdlet, CmdletArg
from SYS.logger import log
from SYS import pipeline as ctx

View File

@@ -8,7 +8,7 @@ import re
from datetime import datetime
from urllib.parse import urlparse, parse_qs
from pathlib import Path
from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from SYS.cmdlet_spec import Cmdlet, CmdletArg, parse_cmdlet_args
from Provider.tidal_manifest import resolve_tidal_manifest_path
from SYS.logger import debug, get_thread_stream, is_debug_enabled, set_debug, set_thread_stream
from SYS.result_table import Table

View File

@@ -3,7 +3,7 @@ from __future__ import annotations
import shutil
from typing import Any, Dict, List
from cmdlet._shared import Cmdlet
from SYS.cmdlet_spec import Cmdlet
from SYS import pipeline as ctx
from SYS.result_table import Table
from SYS.logger import set_debug, debug

View File

@@ -1,6 +1,6 @@
from typing import Any, Dict, Sequence
from cmdlet._shared import Cmdlet, CmdletArg
from SYS.cmdlet_spec import Cmdlet, CmdletArg
from SYS.logger import log

View File

@@ -4,7 +4,7 @@ import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence
from cmdlet._shared import Cmdlet, CmdletArg
from SYS.cmdlet_spec import Cmdlet, CmdletArg
from SYS.logger import log
from SYS.result_table import Table
from SYS import pipeline as ctx

View File

@@ -9,7 +9,7 @@ from datetime import datetime, timezone
from typing import Any, Dict, Sequence, List
from cmdlet import register
from cmdlet._shared import Cmdlet, CmdletArg
from SYS.cmdlet_spec import Cmdlet, CmdletArg
from SYS import pipeline as ctx
from SYS.logger import log
from SYS.database import db as _db, get_worker_stdout