Files
Medios-Macina/ProviderCore/registry.py

427 lines
14 KiB
Python
Raw Normal View History

2025-12-11 19:04:02 -08:00
"""Provider registry.
2026-01-05 07:51:19 -08:00
Concrete provider implementations live in the ``Provider`` package. This module
is the single source of truth for discovery, metadata, and lifecycle helpers
for those plugins.
2025-12-11 19:04:02 -08:00
"""
from __future__ import annotations
2026-01-05 07:51:19 -08:00
import importlib
import pkgutil
2025-12-11 19:04:02 -08:00
import sys
2026-01-05 07:51:19 -08:00
from dataclasses import dataclass, field
from types import ModuleType
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Type
2025-12-22 02:11:53 -08:00
from urllib.parse import urlparse
2025-12-11 19:04:02 -08:00
from SYS.logger import log
2026-01-05 07:51:19 -08:00
from ProviderCore.base import FileProvider, Provider, SearchProvider, SearchResult
from Provider.soulseek import download_soulseek_file
@dataclass(frozen=True)
class ProviderInfo:
"""Metadata about a single provider entry."""
canonical_name: str
provider_class: Type[Provider]
module: str
alias_names: Tuple[str, ...] = field(default_factory=tuple)
@property
def supports_search(self) -> bool:
return self.provider_class.search is not Provider.search
@property
def supports_upload(self) -> bool:
return self.provider_class.upload is not Provider.upload
class ProviderRegistry:
"""Handles discovery, registration, and lookup of provider classes."""
def __init__(self, package_name: str) -> None:
self.package_name = (package_name or "").strip()
self._infos: Dict[str, ProviderInfo] = {}
self._lookup: Dict[str, ProviderInfo] = {}
self._modules: set[str] = set()
self._discovered = False
def _normalize(self, value: Any) -> str:
return str(value or "").strip().lower()
def _candidate_names(self,
provider_class: Type[Provider],
override_name: Optional[str]) -> List[str]:
names: List[str] = []
seen: set[str] = set()
def _add(value: Any) -> None:
text = str(value or "").strip()
normalized = text.lower()
if not text or normalized in seen:
return
seen.add(normalized)
names.append(text)
if override_name:
_add(override_name)
else:
_add(getattr(provider_class, "PROVIDER_NAME", None))
_add(getattr(provider_class, "NAME", None))
_add(getattr(provider_class, "__name__", None))
for alias in getattr(provider_class, "PROVIDER_ALIASES", ()) or ():
_add(alias)
return names
def register(
self,
provider_class: Type[Provider],
*,
override_name: Optional[str] = None,
extra_aliases: Optional[Sequence[str]] = None,
module_name: Optional[str] = None,
replace: bool = False,
) -> ProviderInfo:
"""Register a provider class with canonical and alias names."""
candidates = self._candidate_names(provider_class, override_name)
if not candidates:
raise ValueError("provider name candidates are required")
canonical = self._normalize(candidates[0])
if not canonical:
raise ValueError("provider name must not be empty")
alias_names: List[str] = []
alias_seen: set[str] = set()
for candidate in candidates[1:]:
normalized = self._normalize(candidate)
if not normalized or normalized == canonical or normalized in alias_seen:
continue
alias_seen.add(normalized)
alias_names.append(normalized)
for alias in extra_aliases or ():
normalized = self._normalize(alias)
if not normalized or normalized == canonical or normalized in alias_seen:
continue
alias_seen.add(normalized)
alias_names.append(normalized)
info = ProviderInfo(
canonical_name=canonical,
provider_class=provider_class,
module=module_name or getattr(provider_class, "__module__", "") or "",
alias_names=tuple(alias_names),
)
existing = self._infos.get(canonical)
if existing is not None and not replace:
return existing
self._infos[canonical] = info
for lookup in (canonical,) + tuple(alias_names):
self._lookup[lookup] = info
return info
def _register_module(self, module: ModuleType) -> None:
module_name = getattr(module, "__name__", "")
if not module_name or module_name in self._modules:
return
self._modules.add(module_name)
for attr in dir(module):
candidate = getattr(module, attr)
if not isinstance(candidate, type):
continue
if not issubclass(candidate, Provider):
continue
if candidate in {Provider, SearchProvider, FileProvider}:
continue
if getattr(candidate, "__module__", "") != module_name:
continue
try:
self.register(candidate, module_name=module_name)
except Exception as exc:
log(f"[provider] Failed to register {module_name}.{candidate.__name__}: {exc}", file=sys.stderr)
def discover(self) -> None:
"""Import and register providers from the package."""
if self._discovered or not self.package_name:
return
self._discovered = True
try:
package = importlib.import_module(self.package_name)
except Exception as exc:
log(f"[provider] Failed to import package {self.package_name}: {exc}", file=sys.stderr)
return
self._register_module(package)
package_path = getattr(package, "__path__", None)
if not package_path:
return
for finder, module_name, _ in pkgutil.iter_modules(package_path):
if module_name.startswith("_"):
continue
module_path = f"{self.package_name}.{module_name}"
try:
module = importlib.import_module(module_path)
except Exception as exc:
log(f"[provider] Failed to load {module_path}: {exc}", file=sys.stderr)
continue
self._register_module(module)
def get(self, name: str) -> Optional[ProviderInfo]:
self.discover()
if not name:
return None
return self._lookup.get(self._normalize(name))
def iter_providers(self) -> Iterable[ProviderInfo]:
self.discover()
return tuple(self._infos.values())
def has_name(self, name: str) -> bool:
return self.get(name) is not None
REGISTRY = ProviderRegistry("Provider")
REGISTRY.discover()
def register_provider(
provider_class: Type[Provider],
*,
name: Optional[str] = None,
aliases: Optional[Sequence[str]] = None,
module_name: Optional[str] = None,
replace: bool = False,
) -> ProviderInfo:
"""Register a provider class from tests or third-party packages."""
return REGISTRY.register(
provider_class,
override_name=name,
extra_aliases=aliases,
module_name=module_name,
replace=replace,
)
2025-12-11 19:04:02 -08:00
2026-01-03 03:37:48 -08:00
def get_provider_class(name: str) -> Optional[Type[Provider]]:
2026-01-05 07:51:19 -08:00
info = REGISTRY.get(name)
if info is None:
return None
return info.provider_class
2026-01-03 03:37:48 -08:00
def selection_auto_stage_for_table(
table_type: str,
stage_args: Optional[Sequence[str]] = None,
) -> Optional[list[str]]:
t = str(table_type or "").strip().lower()
if not t:
return None
provider_key = t.split(".", 1)[0] if "." in t else t
provider_class = get_provider_class(provider_key) or get_provider_class(t)
if provider_class is None:
return None
try:
return provider_class.selection_auto_stage(t, stage_args)
except Exception:
return None
2025-12-21 05:10:09 -08:00
def is_known_provider_name(name: str) -> bool:
2026-01-05 07:51:19 -08:00
return REGISTRY.has_name(name)
2025-12-21 05:10:09 -08:00
2025-12-19 02:29:42 -08:00
def _supports_search(provider: Provider) -> bool:
return provider.__class__.search is not Provider.search
def _supports_upload(provider: Provider) -> bool:
return provider.__class__.upload is not Provider.upload
2025-12-11 19:04:02 -08:00
2025-12-19 02:29:42 -08:00
2026-01-01 20:37:27 -08:00
def _provider_url_patterns(provider_class: Type[Provider]) -> Sequence[str]:
try:
return list(provider_class.url_patterns())
except Exception:
return []
2026-01-05 07:51:19 -08:00
def get_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[Provider]:
info = REGISTRY.get(name)
if info is None:
2025-12-19 02:29:42 -08:00
log(f"[provider] Unknown provider: {name}", file=sys.stderr)
2025-12-11 19:04:02 -08:00
return None
try:
2026-01-05 07:51:19 -08:00
provider = info.provider_class(config)
2025-12-11 19:04:02 -08:00
if not provider.validate():
log(f"[provider] Provider '{name}' is not available", file=sys.stderr)
return None
return provider
except Exception as exc:
log(f"[provider] Error initializing '{name}': {exc}", file=sys.stderr)
return None
2025-12-19 02:29:42 -08:00
def list_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
2026-01-05 07:51:19 -08:00
availability: Dict[str, bool] = {}
for info in REGISTRY.iter_providers():
2025-12-11 19:04:02 -08:00
try:
2026-01-05 07:51:19 -08:00
provider = info.provider_class(config)
availability[info.canonical_name] = provider.validate()
2025-12-11 19:04:02 -08:00
except Exception:
2026-01-05 07:51:19 -08:00
availability[info.canonical_name] = False
2025-12-11 19:04:02 -08:00
return availability
def get_search_provider(name: str,
2026-01-05 07:51:19 -08:00
config: Optional[Dict[str, Any]] = None) -> Optional[SearchProvider]:
2025-12-19 02:29:42 -08:00
provider = get_provider(name, config)
if provider is None:
return None
if not _supports_search(provider):
log(f"[provider] Provider '{name}' does not support search", file=sys.stderr)
return None
return provider # type: ignore[return-value]
def list_search_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
2026-01-05 07:51:19 -08:00
availability: Dict[str, bool] = {}
for info in REGISTRY.iter_providers():
2025-12-19 02:29:42 -08:00
try:
2026-01-05 07:51:19 -08:00
provider = info.provider_class(config)
availability[info.canonical_name] = bool(
provider.validate() and info.supports_search
)
2025-12-19 02:29:42 -08:00
except Exception:
2026-01-05 07:51:19 -08:00
availability[info.canonical_name] = False
2025-12-19 02:29:42 -08:00
return availability
2025-12-11 19:04:02 -08:00
def get_file_provider(name: str,
2026-01-05 07:51:19 -08:00
config: Optional[Dict[str, Any]] = None) -> Optional[FileProvider]:
2025-12-19 02:29:42 -08:00
provider = get_provider(name, config)
if provider is None:
2025-12-11 19:04:02 -08:00
return None
2025-12-19 02:29:42 -08:00
if not _supports_upload(provider):
log(f"[provider] Provider '{name}' does not support upload", file=sys.stderr)
2025-12-11 19:04:02 -08:00
return None
2025-12-19 02:29:42 -08:00
return provider # type: ignore[return-value]
2025-12-11 19:04:02 -08:00
def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
2026-01-05 07:51:19 -08:00
availability: Dict[str, bool] = {}
for info in REGISTRY.iter_providers():
2025-12-11 19:04:02 -08:00
try:
2026-01-05 07:51:19 -08:00
provider = info.provider_class(config)
availability[info.canonical_name] = bool(
provider.validate() and info.supports_upload
)
2025-12-11 19:04:02 -08:00
except Exception:
2026-01-05 07:51:19 -08:00
availability[info.canonical_name] = False
2025-12-11 19:04:02 -08:00
return availability
2025-12-22 02:11:53 -08:00
def match_provider_name_for_url(url: str) -> Optional[str]:
2026-01-01 20:37:27 -08:00
raw_url = str(url or "").strip()
raw_url_lower = raw_url.lower()
2025-12-22 02:11:53 -08:00
try:
2026-01-01 20:37:27 -08:00
parsed = urlparse(raw_url)
2025-12-22 02:11:53 -08:00
host = (parsed.hostname or "").strip().lower()
2025-12-27 03:13:16 -08:00
path = (parsed.path or "").strip()
2025-12-22 02:11:53 -08:00
except Exception:
host = ""
2025-12-27 03:13:16 -08:00
path = ""
2025-12-22 02:11:53 -08:00
2026-01-04 02:23:50 -08:00
def _norm_host(h: str) -> str:
h_norm = str(h or "").strip().lower()
if h_norm.startswith("www."):
h_norm = h_norm[4:]
return h_norm
host_norm = _norm_host(host)
if host_norm:
if host_norm == "openlibrary.org" or host_norm.endswith(".openlibrary.org"):
2026-01-05 07:51:19 -08:00
return "openlibrary" if REGISTRY.has_name("openlibrary") else None
2026-01-01 20:37:27 -08:00
2026-01-04 02:23:50 -08:00
if host_norm == "archive.org" or host_norm.endswith(".archive.org"):
2026-01-01 20:37:27 -08:00
low_path = str(path or "").lower()
is_borrowish = (
2026-01-05 07:51:19 -08:00
low_path.startswith("/borrow/")
or low_path.startswith("/stream/")
or low_path.startswith("/services/loans/")
or "/services/loans/" in low_path
2026-01-01 20:37:27 -08:00
)
if is_borrowish:
2026-01-05 07:51:19 -08:00
return "openlibrary" if REGISTRY.has_name("openlibrary") else None
return "internetarchive" if REGISTRY.has_name("internetarchive") else None
2025-12-27 03:13:16 -08:00
2026-01-05 07:51:19 -08:00
for info in REGISTRY.iter_providers():
domains = _provider_url_patterns(info.provider_class)
2026-01-01 20:37:27 -08:00
if not domains:
2025-12-22 02:11:53 -08:00
continue
2026-01-05 07:51:19 -08:00
for domain in domains:
dom_raw = str(domain or "").strip()
2026-01-04 02:23:50 -08:00
dom = dom_raw.lower()
2025-12-22 02:11:53 -08:00
if not dom:
continue
2026-01-04 02:23:50 -08:00
if dom.startswith("magnet:") or dom.startswith("http://") or dom.startswith("https://"):
if raw_url_lower.startswith(dom):
2026-01-05 07:51:19 -08:00
return info.canonical_name
2026-01-04 02:23:50 -08:00
continue
dom_norm = _norm_host(dom)
if not dom_norm or not host_norm:
2026-01-01 20:37:27 -08:00
continue
2026-01-04 02:23:50 -08:00
if host_norm == dom_norm or host_norm.endswith("." + dom_norm):
2026-01-05 07:51:19 -08:00
return info.canonical_name
2025-12-22 02:11:53 -08:00
return None
def get_provider_for_url(url: str,
2026-01-05 07:51:19 -08:00
config: Optional[Dict[str, Any]] = None) -> Optional[Provider]:
2025-12-22 02:11:53 -08:00
name = match_provider_name_for_url(url)
if not name:
return None
return get_provider(name, config)
2025-12-11 19:04:02 -08:00
__all__ = [
2026-01-05 07:51:19 -08:00
"ProviderInfo",
2025-12-19 02:29:42 -08:00
"Provider",
2025-12-11 19:04:02 -08:00
"SearchProvider",
"FileProvider",
2026-01-05 07:51:19 -08:00
"SearchResult",
"register_provider",
2025-12-19 02:29:42 -08:00
"get_provider",
"list_providers",
2025-12-11 19:04:02 -08:00
"get_search_provider",
"list_search_providers",
"get_file_provider",
"list_file_providers",
2025-12-22 02:11:53 -08:00
"match_provider_name_for_url",
"get_provider_for_url",
2026-01-03 03:37:48 -08:00
"get_provider_class",
"selection_auto_stage_for_table",
2026-01-05 07:51:19 -08:00
"download_soulseek_file",
2025-12-11 19:04:02 -08:00
]