Files
Medios-Macina/ProviderCore/registry.py

600 lines
20 KiB
Python
Raw Normal View History

2025-12-11 19:04:02 -08:00
"""Provider registry.
2026-01-05 07:51:19 -08:00
Concrete provider implementations live in the ``Provider`` package. This module
is the single source of truth for discovery, metadata, and lifecycle helpers
for those plugins.
2025-12-11 19:04:02 -08:00
"""
from __future__ import annotations
2026-01-05 07:51:19 -08:00
import importlib
import pkgutil
2025-12-11 19:04:02 -08:00
import sys
2026-01-05 07:51:19 -08:00
from dataclasses import dataclass, field
from types import ModuleType
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Type
2025-12-22 02:11:53 -08:00
from urllib.parse import urlparse
2025-12-11 19:04:02 -08:00
2026-01-11 04:54:27 -08:00
from SYS.logger import log, debug
2025-12-11 19:04:02 -08:00
2026-01-05 07:51:19 -08:00
from ProviderCore.base import FileProvider, Provider, SearchProvider, SearchResult
from Provider.soulseek import download_soulseek_file
@dataclass(frozen=True)
class ProviderInfo:
"""Metadata about a single provider entry."""
canonical_name: str
provider_class: Type[Provider]
module: str
alias_names: Tuple[str, ...] = field(default_factory=tuple)
@property
def supports_search(self) -> bool:
return self.provider_class.search is not Provider.search
@property
def supports_upload(self) -> bool:
return self.provider_class.upload is not Provider.upload
class ProviderRegistry:
"""Handles discovery, registration, and lookup of provider classes."""
def __init__(self, package_name: str) -> None:
self.package_name = (package_name or "").strip()
self._infos: Dict[str, ProviderInfo] = {}
self._lookup: Dict[str, ProviderInfo] = {}
self._modules: set[str] = set()
self._discovered = False
def _normalize(self, value: Any) -> str:
return str(value or "").strip().lower()
def _candidate_names(self,
provider_class: Type[Provider],
override_name: Optional[str]) -> List[str]:
names: List[str] = []
seen: set[str] = set()
def _add(value: Any) -> None:
text = str(value or "").strip()
normalized = text.lower()
if not text or normalized in seen:
return
seen.add(normalized)
names.append(text)
if override_name:
_add(override_name)
else:
_add(getattr(provider_class, "PROVIDER_NAME", None))
_add(getattr(provider_class, "NAME", None))
_add(getattr(provider_class, "__name__", None))
for alias in getattr(provider_class, "PROVIDER_ALIASES", ()) or ():
_add(alias)
return names
def register(
self,
provider_class: Type[Provider],
*,
override_name: Optional[str] = None,
extra_aliases: Optional[Sequence[str]] = None,
module_name: Optional[str] = None,
replace: bool = False,
) -> ProviderInfo:
"""Register a provider class with canonical and alias names."""
candidates = self._candidate_names(provider_class, override_name)
if not candidates:
raise ValueError("provider name candidates are required")
canonical = self._normalize(candidates[0])
if not canonical:
raise ValueError("provider name must not be empty")
alias_names: List[str] = []
alias_seen: set[str] = set()
for candidate in candidates[1:]:
normalized = self._normalize(candidate)
if not normalized or normalized == canonical or normalized in alias_seen:
continue
alias_seen.add(normalized)
alias_names.append(normalized)
for alias in extra_aliases or ():
normalized = self._normalize(alias)
if not normalized or normalized == canonical or normalized in alias_seen:
continue
alias_seen.add(normalized)
alias_names.append(normalized)
info = ProviderInfo(
canonical_name=canonical,
provider_class=provider_class,
module=module_name or getattr(provider_class, "__module__", "") or "",
alias_names=tuple(alias_names),
)
existing = self._infos.get(canonical)
if existing is not None and not replace:
return existing
self._infos[canonical] = info
for lookup in (canonical,) + tuple(alias_names):
self._lookup[lookup] = info
return info
def _register_module(self, module: ModuleType) -> None:
module_name = getattr(module, "__name__", "")
if not module_name or module_name in self._modules:
return
self._modules.add(module_name)
for attr in dir(module):
candidate = getattr(module, attr)
if not isinstance(candidate, type):
continue
if not issubclass(candidate, Provider):
continue
if candidate in {Provider, SearchProvider, FileProvider}:
continue
if getattr(candidate, "__module__", "") != module_name:
continue
try:
self.register(candidate, module_name=module_name)
except Exception as exc:
log(f"[provider] Failed to register {module_name}.{candidate.__name__}: {exc}", file=sys.stderr)
def discover(self) -> None:
"""Import and register providers from the package."""
if self._discovered or not self.package_name:
return
self._discovered = True
try:
package = importlib.import_module(self.package_name)
except Exception as exc:
log(f"[provider] Failed to import package {self.package_name}: {exc}", file=sys.stderr)
return
self._register_module(package)
package_path = getattr(package, "__path__", None)
if not package_path:
return
for finder, module_name, _ in pkgutil.iter_modules(package_path):
if module_name.startswith("_"):
continue
module_path = f"{self.package_name}.{module_name}"
try:
module = importlib.import_module(module_path)
except Exception as exc:
log(f"[provider] Failed to load {module_path}: {exc}", file=sys.stderr)
continue
self._register_module(module)
def get(self, name: str) -> Optional[ProviderInfo]:
self.discover()
if not name:
return None
return self._lookup.get(self._normalize(name))
def iter_providers(self) -> Iterable[ProviderInfo]:
self.discover()
return tuple(self._infos.values())
def has_name(self, name: str) -> bool:
return self.get(name) is not None
REGISTRY = ProviderRegistry("Provider")
REGISTRY.discover()
def register_provider(
provider_class: Type[Provider],
*,
name: Optional[str] = None,
aliases: Optional[Sequence[str]] = None,
module_name: Optional[str] = None,
replace: bool = False,
) -> ProviderInfo:
"""Register a provider class from tests or third-party packages."""
return REGISTRY.register(
provider_class,
override_name=name,
extra_aliases=aliases,
module_name=module_name,
replace=replace,
)
2025-12-11 19:04:02 -08:00
2026-01-03 03:37:48 -08:00
def get_provider_class(name: str) -> Optional[Type[Provider]]:
2026-01-05 07:51:19 -08:00
info = REGISTRY.get(name)
if info is None:
return None
return info.provider_class
2026-01-03 03:37:48 -08:00
def selection_auto_stage_for_table(
table_type: str,
stage_args: Optional[Sequence[str]] = None,
) -> Optional[list[str]]:
t = str(table_type or "").strip().lower()
if not t:
return None
provider_key = t.split(".", 1)[0] if "." in t else t
provider_class = get_provider_class(provider_key) or get_provider_class(t)
if provider_class is None:
return None
try:
return provider_class.selection_auto_stage(t, stage_args)
except Exception:
return None
2025-12-21 05:10:09 -08:00
def is_known_provider_name(name: str) -> bool:
2026-01-05 07:51:19 -08:00
return REGISTRY.has_name(name)
2025-12-21 05:10:09 -08:00
2025-12-19 02:29:42 -08:00
def _supports_search(provider: Provider) -> bool:
return provider.__class__.search is not Provider.search
def _supports_upload(provider: Provider) -> bool:
return provider.__class__.upload is not Provider.upload
2025-12-11 19:04:02 -08:00
2025-12-19 02:29:42 -08:00
2026-01-01 20:37:27 -08:00
def _provider_url_patterns(provider_class: Type[Provider]) -> Sequence[str]:
try:
return list(provider_class.url_patterns())
except Exception:
return []
2026-01-05 07:51:19 -08:00
def get_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[Provider]:
info = REGISTRY.get(name)
if info is None:
2026-01-11 04:54:27 -08:00
debug(f"[provider] Unknown provider: {name}")
2025-12-11 19:04:02 -08:00
return None
try:
2026-01-05 07:51:19 -08:00
provider = info.provider_class(config)
2025-12-11 19:04:02 -08:00
if not provider.validate():
2026-01-11 04:54:27 -08:00
debug(f"[provider] Provider '{name}' is not available")
2025-12-11 19:04:02 -08:00
return None
return provider
except Exception as exc:
2026-01-11 04:54:27 -08:00
debug(f"[provider] Error initializing '{name}': {exc}")
2025-12-11 19:04:02 -08:00
return None
2025-12-19 02:29:42 -08:00
def list_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
2026-01-05 07:51:19 -08:00
availability: Dict[str, bool] = {}
for info in REGISTRY.iter_providers():
2025-12-11 19:04:02 -08:00
try:
2026-01-05 07:51:19 -08:00
provider = info.provider_class(config)
availability[info.canonical_name] = provider.validate()
2025-12-11 19:04:02 -08:00
except Exception:
2026-01-05 07:51:19 -08:00
availability[info.canonical_name] = False
2025-12-11 19:04:02 -08:00
return availability
def get_search_provider(name: str,
2026-01-05 07:51:19 -08:00
config: Optional[Dict[str, Any]] = None) -> Optional[SearchProvider]:
2025-12-19 02:29:42 -08:00
provider = get_provider(name, config)
if provider is None:
return None
if not _supports_search(provider):
2026-01-11 04:54:27 -08:00
debug(f"[provider] Provider '{name}' does not support search")
2025-12-19 02:29:42 -08:00
return None
return provider # type: ignore[return-value]
def list_search_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
2026-01-05 07:51:19 -08:00
availability: Dict[str, bool] = {}
for info in REGISTRY.iter_providers():
2025-12-19 02:29:42 -08:00
try:
2026-01-05 07:51:19 -08:00
provider = info.provider_class(config)
availability[info.canonical_name] = bool(
provider.validate() and info.supports_search
)
2025-12-19 02:29:42 -08:00
except Exception:
2026-01-05 07:51:19 -08:00
availability[info.canonical_name] = False
2025-12-19 02:29:42 -08:00
return availability
2025-12-11 19:04:02 -08:00
def get_file_provider(name: str,
2026-01-05 07:51:19 -08:00
config: Optional[Dict[str, Any]] = None) -> Optional[FileProvider]:
2025-12-19 02:29:42 -08:00
provider = get_provider(name, config)
if provider is None:
2025-12-11 19:04:02 -08:00
return None
2025-12-19 02:29:42 -08:00
if not _supports_upload(provider):
2026-01-11 04:54:27 -08:00
debug(f"[provider] Provider '{name}' does not support upload")
2025-12-11 19:04:02 -08:00
return None
2025-12-19 02:29:42 -08:00
return provider # type: ignore[return-value]
2025-12-11 19:04:02 -08:00
def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
2026-01-05 07:51:19 -08:00
availability: Dict[str, bool] = {}
for info in REGISTRY.iter_providers():
2025-12-11 19:04:02 -08:00
try:
2026-01-05 07:51:19 -08:00
provider = info.provider_class(config)
availability[info.canonical_name] = bool(
provider.validate() and info.supports_upload
)
2025-12-11 19:04:02 -08:00
except Exception:
2026-01-05 07:51:19 -08:00
availability[info.canonical_name] = False
2025-12-11 19:04:02 -08:00
return availability
2025-12-22 02:11:53 -08:00
def match_provider_name_for_url(url: str) -> Optional[str]:
2026-01-01 20:37:27 -08:00
raw_url = str(url or "").strip()
raw_url_lower = raw_url.lower()
2025-12-22 02:11:53 -08:00
try:
2026-01-01 20:37:27 -08:00
parsed = urlparse(raw_url)
2025-12-22 02:11:53 -08:00
host = (parsed.hostname or "").strip().lower()
2025-12-27 03:13:16 -08:00
path = (parsed.path or "").strip()
2025-12-22 02:11:53 -08:00
except Exception:
host = ""
2025-12-27 03:13:16 -08:00
path = ""
2025-12-22 02:11:53 -08:00
2026-01-04 02:23:50 -08:00
def _norm_host(h: str) -> str:
h_norm = str(h or "").strip().lower()
if h_norm.startswith("www."):
h_norm = h_norm[4:]
return h_norm
host_norm = _norm_host(host)
if host_norm:
if host_norm == "openlibrary.org" or host_norm.endswith(".openlibrary.org"):
2026-01-05 07:51:19 -08:00
return "openlibrary" if REGISTRY.has_name("openlibrary") else None
2026-01-01 20:37:27 -08:00
2026-01-04 02:23:50 -08:00
if host_norm == "archive.org" or host_norm.endswith(".archive.org"):
2026-01-01 20:37:27 -08:00
low_path = str(path or "").lower()
is_borrowish = (
2026-01-05 07:51:19 -08:00
low_path.startswith("/borrow/")
or low_path.startswith("/stream/")
or low_path.startswith("/services/loans/")
or "/services/loans/" in low_path
2026-01-01 20:37:27 -08:00
)
if is_borrowish:
2026-01-05 07:51:19 -08:00
return "openlibrary" if REGISTRY.has_name("openlibrary") else None
return "internetarchive" if REGISTRY.has_name("internetarchive") else None
2025-12-27 03:13:16 -08:00
2026-01-05 07:51:19 -08:00
for info in REGISTRY.iter_providers():
domains = _provider_url_patterns(info.provider_class)
2026-01-01 20:37:27 -08:00
if not domains:
2025-12-22 02:11:53 -08:00
continue
2026-01-05 07:51:19 -08:00
for domain in domains:
dom_raw = str(domain or "").strip()
2026-01-04 02:23:50 -08:00
dom = dom_raw.lower()
2025-12-22 02:11:53 -08:00
if not dom:
continue
2026-01-04 02:23:50 -08:00
if dom.startswith("magnet:") or dom.startswith("http://") or dom.startswith("https://"):
if raw_url_lower.startswith(dom):
2026-01-05 07:51:19 -08:00
return info.canonical_name
2026-01-04 02:23:50 -08:00
continue
dom_norm = _norm_host(dom)
if not dom_norm or not host_norm:
2026-01-01 20:37:27 -08:00
continue
2026-01-04 02:23:50 -08:00
if host_norm == dom_norm or host_norm.endswith("." + dom_norm):
2026-01-05 07:51:19 -08:00
return info.canonical_name
2025-12-22 02:11:53 -08:00
return None
def provider_inline_query_choices(
provider_name: str,
field_name: str,
config: Optional[Dict[str, Any]] = None,
) -> List[str]:
"""Return provider-declared inline query choices for a field (e.g., system:GBA).
Providers can expose a mapping via ``QUERY_ARG_CHOICES`` (preferred) or
``INLINE_QUERY_FIELD_CHOICES`` / ``inline_query_field_choices()``. The helper
keeps completion logic simple and reusable.
This helper keeps completion logic simple and reusable.
"""
pname = str(provider_name or "").strip().lower()
field = str(field_name or "").strip().lower()
if not pname or not field:
return []
provider = get_search_provider(pname, config)
if provider is None:
provider = get_provider(pname, config)
if provider is None:
return []
def _normalize_choice_entry(entry: Any) -> Optional[Dict[str, Any]]:
if entry is None:
return None
if isinstance(entry, dict):
value = entry.get("value")
text = entry.get("text") or entry.get("label") or value
aliases = entry.get("alias") or entry.get("aliases") or []
value_str = str(value) if value is not None else (str(text) if text is not None else None)
text_str = str(text) if text is not None else value_str
if not value_str or not text_str:
return None
alias_list = [str(a) for a in aliases if a is not None]
return {"value": value_str, "text": text_str, "aliases": alias_list}
# string/other primitives
return {"value": str(entry), "text": str(entry), "aliases": []}
def _collect_mapping(p) -> Dict[str, List[Dict[str, Any]]]:
mapping: Dict[str, List[Dict[str, Any]]] = {}
base = getattr(p, "QUERY_ARG_CHOICES", None)
if not isinstance(base, dict):
base = getattr(p, "INLINE_QUERY_FIELD_CHOICES", None)
if isinstance(base, dict):
for k, v in base.items():
normalized: List[Dict[str, Any]] = []
seq = v
try:
if callable(seq):
seq = seq()
except Exception:
seq = v
if isinstance(seq, dict):
seq = seq.get("choices") or seq.get("values") or seq
if isinstance(seq, (list, tuple, set)):
for entry in seq:
n = _normalize_choice_entry(entry)
if n:
normalized.append(n)
if normalized:
mapping[str(k).strip().lower()] = normalized
try:
fn = getattr(p, "inline_query_field_choices", None)
if callable(fn):
extra = fn()
if isinstance(extra, dict):
for k, v in extra.items():
normalized: List[Dict[str, Any]] = []
seq = v
try:
if callable(seq):
seq = seq()
except Exception:
seq = v
if isinstance(seq, dict):
seq = seq.get("choices") or seq.get("values") or seq
if isinstance(seq, (list, tuple, set)):
for entry in seq:
n = _normalize_choice_entry(entry)
if n:
normalized.append(n)
if normalized:
mapping[str(k).strip().lower()] = normalized
except Exception:
pass
return mapping
try:
mapping = _collect_mapping(provider)
if not mapping:
return []
entries = mapping.get(field, [])
if not entries:
return []
seen: set[str] = set()
out: List[str] = []
for entry in entries:
text = entry.get("text") or entry.get("value")
if not text:
continue
text_str = str(text)
if text_str in seen:
continue
seen.add(text_str)
out.append(text_str)
for alias in entry.get("aliases", []):
alias_str = str(alias)
if alias_str and alias_str not in seen:
seen.add(alias_str)
out.append(alias_str)
return out
except Exception:
return []
def get_provider_for_url(url: str,
2026-01-05 07:51:19 -08:00
config: Optional[Dict[str, Any]] = None) -> Optional[Provider]:
2025-12-22 02:11:53 -08:00
name = match_provider_name_for_url(url)
if not name:
return None
return get_provider(name, config)
def resolve_inline_filters(
provider: Provider,
inline_args: Dict[str, Any],
*,
field_transforms: Optional[Dict[str, Any]] = None,
) -> Dict[str, str]:
"""Map inline query args to provider filter values using declared choices.
- Uses provider's inline choice mapping (value/text/aliases) to resolve user text.
- Applies optional per-field transforms (e.g., str.upper).
- Returns normalized filters suitable for provider.search.
"""
filters: Dict[str, str] = {}
if not inline_args:
return filters
mapping = _collect_mapping(provider)
transforms = field_transforms or {}
for raw_key, raw_val in inline_args.items():
if raw_val is None:
continue
key = str(raw_key or "").strip().lower()
val_str = str(raw_val).strip()
if not key or not val_str:
continue
entries = mapping.get(key, [])
resolved: Optional[str] = None
val_lower = val_str.lower()
for entry in entries:
text = str(entry.get("text") or "").strip()
value = str(entry.get("value") or "").strip()
aliases = [str(a).strip() for a in entry.get("aliases", []) if a is not None]
if val_lower in {text.lower(), value.lower()} or val_lower in {a.lower() for a in aliases}:
resolved = value or text or val_str
break
if resolved is None:
resolved = val_str
transform = transforms.get(key)
if callable(transform):
try:
resolved = transform(resolved)
except Exception:
pass
if resolved:
filters[key] = str(resolved)
return filters
2025-12-11 19:04:02 -08:00
__all__ = [
2026-01-05 07:51:19 -08:00
"ProviderInfo",
2025-12-19 02:29:42 -08:00
"Provider",
2025-12-11 19:04:02 -08:00
"SearchProvider",
"FileProvider",
2026-01-05 07:51:19 -08:00
"SearchResult",
"register_provider",
2025-12-19 02:29:42 -08:00
"get_provider",
"list_providers",
2025-12-11 19:04:02 -08:00
"get_search_provider",
"list_search_providers",
"get_file_provider",
"list_file_providers",
2025-12-22 02:11:53 -08:00
"match_provider_name_for_url",
"get_provider_for_url",
2026-01-03 03:37:48 -08:00
"get_provider_class",
"selection_auto_stage_for_table",
2026-01-05 07:51:19 -08:00
"download_soulseek_file",
"provider_inline_query_choices",
2025-12-11 19:04:02 -08:00
]