Files
Medios-Macina/ProviderCore/registry.py
2026-01-05 07:51:19 -08:00

427 lines
14 KiB
Python

"""Provider registry.
Concrete provider implementations live in the ``Provider`` package. This module
is the single source of truth for discovery, metadata, and lifecycle helpers
for those plugins.
"""
from __future__ import annotations
import importlib
import pkgutil
import sys
from dataclasses import dataclass, field
from types import ModuleType
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Type
from urllib.parse import urlparse
from SYS.logger import log
from ProviderCore.base import FileProvider, Provider, SearchProvider, SearchResult
from Provider.soulseek import download_soulseek_file
@dataclass(frozen=True)
class ProviderInfo:
"""Metadata about a single provider entry."""
canonical_name: str
provider_class: Type[Provider]
module: str
alias_names: Tuple[str, ...] = field(default_factory=tuple)
@property
def supports_search(self) -> bool:
return self.provider_class.search is not Provider.search
@property
def supports_upload(self) -> bool:
return self.provider_class.upload is not Provider.upload
class ProviderRegistry:
"""Handles discovery, registration, and lookup of provider classes."""
def __init__(self, package_name: str) -> None:
self.package_name = (package_name or "").strip()
self._infos: Dict[str, ProviderInfo] = {}
self._lookup: Dict[str, ProviderInfo] = {}
self._modules: set[str] = set()
self._discovered = False
def _normalize(self, value: Any) -> str:
return str(value or "").strip().lower()
def _candidate_names(self,
provider_class: Type[Provider],
override_name: Optional[str]) -> List[str]:
names: List[str] = []
seen: set[str] = set()
def _add(value: Any) -> None:
text = str(value or "").strip()
normalized = text.lower()
if not text or normalized in seen:
return
seen.add(normalized)
names.append(text)
if override_name:
_add(override_name)
else:
_add(getattr(provider_class, "PROVIDER_NAME", None))
_add(getattr(provider_class, "NAME", None))
_add(getattr(provider_class, "__name__", None))
for alias in getattr(provider_class, "PROVIDER_ALIASES", ()) or ():
_add(alias)
return names
def register(
self,
provider_class: Type[Provider],
*,
override_name: Optional[str] = None,
extra_aliases: Optional[Sequence[str]] = None,
module_name: Optional[str] = None,
replace: bool = False,
) -> ProviderInfo:
"""Register a provider class with canonical and alias names."""
candidates = self._candidate_names(provider_class, override_name)
if not candidates:
raise ValueError("provider name candidates are required")
canonical = self._normalize(candidates[0])
if not canonical:
raise ValueError("provider name must not be empty")
alias_names: List[str] = []
alias_seen: set[str] = set()
for candidate in candidates[1:]:
normalized = self._normalize(candidate)
if not normalized or normalized == canonical or normalized in alias_seen:
continue
alias_seen.add(normalized)
alias_names.append(normalized)
for alias in extra_aliases or ():
normalized = self._normalize(alias)
if not normalized or normalized == canonical or normalized in alias_seen:
continue
alias_seen.add(normalized)
alias_names.append(normalized)
info = ProviderInfo(
canonical_name=canonical,
provider_class=provider_class,
module=module_name or getattr(provider_class, "__module__", "") or "",
alias_names=tuple(alias_names),
)
existing = self._infos.get(canonical)
if existing is not None and not replace:
return existing
self._infos[canonical] = info
for lookup in (canonical,) + tuple(alias_names):
self._lookup[lookup] = info
return info
def _register_module(self, module: ModuleType) -> None:
module_name = getattr(module, "__name__", "")
if not module_name or module_name in self._modules:
return
self._modules.add(module_name)
for attr in dir(module):
candidate = getattr(module, attr)
if not isinstance(candidate, type):
continue
if not issubclass(candidate, Provider):
continue
if candidate in {Provider, SearchProvider, FileProvider}:
continue
if getattr(candidate, "__module__", "") != module_name:
continue
try:
self.register(candidate, module_name=module_name)
except Exception as exc:
log(f"[provider] Failed to register {module_name}.{candidate.__name__}: {exc}", file=sys.stderr)
def discover(self) -> None:
"""Import and register providers from the package."""
if self._discovered or not self.package_name:
return
self._discovered = True
try:
package = importlib.import_module(self.package_name)
except Exception as exc:
log(f"[provider] Failed to import package {self.package_name}: {exc}", file=sys.stderr)
return
self._register_module(package)
package_path = getattr(package, "__path__", None)
if not package_path:
return
for finder, module_name, _ in pkgutil.iter_modules(package_path):
if module_name.startswith("_"):
continue
module_path = f"{self.package_name}.{module_name}"
try:
module = importlib.import_module(module_path)
except Exception as exc:
log(f"[provider] Failed to load {module_path}: {exc}", file=sys.stderr)
continue
self._register_module(module)
def get(self, name: str) -> Optional[ProviderInfo]:
self.discover()
if not name:
return None
return self._lookup.get(self._normalize(name))
def iter_providers(self) -> Iterable[ProviderInfo]:
self.discover()
return tuple(self._infos.values())
def has_name(self, name: str) -> bool:
return self.get(name) is not None
REGISTRY = ProviderRegistry("Provider")
REGISTRY.discover()
def register_provider(
provider_class: Type[Provider],
*,
name: Optional[str] = None,
aliases: Optional[Sequence[str]] = None,
module_name: Optional[str] = None,
replace: bool = False,
) -> ProviderInfo:
"""Register a provider class from tests or third-party packages."""
return REGISTRY.register(
provider_class,
override_name=name,
extra_aliases=aliases,
module_name=module_name,
replace=replace,
)
def get_provider_class(name: str) -> Optional[Type[Provider]]:
info = REGISTRY.get(name)
if info is None:
return None
return info.provider_class
def selection_auto_stage_for_table(
table_type: str,
stage_args: Optional[Sequence[str]] = None,
) -> Optional[list[str]]:
t = str(table_type or "").strip().lower()
if not t:
return None
provider_key = t.split(".", 1)[0] if "." in t else t
provider_class = get_provider_class(provider_key) or get_provider_class(t)
if provider_class is None:
return None
try:
return provider_class.selection_auto_stage(t, stage_args)
except Exception:
return None
def is_known_provider_name(name: str) -> bool:
return REGISTRY.has_name(name)
def _supports_search(provider: Provider) -> bool:
return provider.__class__.search is not Provider.search
def _supports_upload(provider: Provider) -> bool:
return provider.__class__.upload is not Provider.upload
def _provider_url_patterns(provider_class: Type[Provider]) -> Sequence[str]:
try:
return list(provider_class.url_patterns())
except Exception:
return []
def get_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[Provider]:
info = REGISTRY.get(name)
if info is None:
log(f"[provider] Unknown provider: {name}", file=sys.stderr)
return None
try:
provider = info.provider_class(config)
if not provider.validate():
log(f"[provider] Provider '{name}' is not available", file=sys.stderr)
return None
return provider
except Exception as exc:
log(f"[provider] Error initializing '{name}': {exc}", file=sys.stderr)
return None
def list_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
availability: Dict[str, bool] = {}
for info in REGISTRY.iter_providers():
try:
provider = info.provider_class(config)
availability[info.canonical_name] = provider.validate()
except Exception:
availability[info.canonical_name] = False
return availability
def get_search_provider(name: str,
config: Optional[Dict[str, Any]] = None) -> Optional[SearchProvider]:
provider = get_provider(name, config)
if provider is None:
return None
if not _supports_search(provider):
log(f"[provider] Provider '{name}' does not support search", file=sys.stderr)
return None
return provider # type: ignore[return-value]
def list_search_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
availability: Dict[str, bool] = {}
for info in REGISTRY.iter_providers():
try:
provider = info.provider_class(config)
availability[info.canonical_name] = bool(
provider.validate() and info.supports_search
)
except Exception:
availability[info.canonical_name] = False
return availability
def get_file_provider(name: str,
config: Optional[Dict[str, Any]] = None) -> Optional[FileProvider]:
provider = get_provider(name, config)
if provider is None:
return None
if not _supports_upload(provider):
log(f"[provider] Provider '{name}' does not support upload", file=sys.stderr)
return None
return provider # type: ignore[return-value]
def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
availability: Dict[str, bool] = {}
for info in REGISTRY.iter_providers():
try:
provider = info.provider_class(config)
availability[info.canonical_name] = bool(
provider.validate() and info.supports_upload
)
except Exception:
availability[info.canonical_name] = False
return availability
def match_provider_name_for_url(url: str) -> Optional[str]:
raw_url = str(url or "").strip()
raw_url_lower = raw_url.lower()
try:
parsed = urlparse(raw_url)
host = (parsed.hostname or "").strip().lower()
path = (parsed.path or "").strip()
except Exception:
host = ""
path = ""
def _norm_host(h: str) -> str:
h_norm = str(h or "").strip().lower()
if h_norm.startswith("www."):
h_norm = h_norm[4:]
return h_norm
host_norm = _norm_host(host)
if host_norm:
if host_norm == "openlibrary.org" or host_norm.endswith(".openlibrary.org"):
return "openlibrary" if REGISTRY.has_name("openlibrary") else None
if host_norm == "archive.org" or host_norm.endswith(".archive.org"):
low_path = str(path or "").lower()
is_borrowish = (
low_path.startswith("/borrow/")
or low_path.startswith("/stream/")
or low_path.startswith("/services/loans/")
or "/services/loans/" in low_path
)
if is_borrowish:
return "openlibrary" if REGISTRY.has_name("openlibrary") else None
return "internetarchive" if REGISTRY.has_name("internetarchive") else None
for info in REGISTRY.iter_providers():
domains = _provider_url_patterns(info.provider_class)
if not domains:
continue
for domain in domains:
dom_raw = str(domain or "").strip()
dom = dom_raw.lower()
if not dom:
continue
if dom.startswith("magnet:") or dom.startswith("http://") or dom.startswith("https://"):
if raw_url_lower.startswith(dom):
return info.canonical_name
continue
dom_norm = _norm_host(dom)
if not dom_norm or not host_norm:
continue
if host_norm == dom_norm or host_norm.endswith("." + dom_norm):
return info.canonical_name
return None
def get_provider_for_url(url: str,
config: Optional[Dict[str, Any]] = None) -> Optional[Provider]:
name = match_provider_name_for_url(url)
if not name:
return None
return get_provider(name, config)
__all__ = [
"ProviderInfo",
"Provider",
"SearchProvider",
"FileProvider",
"SearchResult",
"register_provider",
"get_provider",
"list_providers",
"get_search_provider",
"list_search_providers",
"get_file_provider",
"list_file_providers",
"match_provider_name_for_url",
"get_provider_for_url",
"get_provider_class",
"selection_auto_stage_for_table",
"download_soulseek_file",
]