Files
Medios-Macina/ProviderCore/registry.py

242 lines
8.0 KiB
Python
Raw Normal View History

2025-12-11 19:04:02 -08:00
"""Provider registry.
Concrete provider implementations live in the `Provider/` package.
This module is the single source of truth for provider discovery.
"""
from __future__ import annotations
2025-12-22 02:11:53 -08:00
from typing import Any, Dict, Optional, Sequence, Type
2025-12-11 19:04:02 -08:00
import sys
2025-12-22 02:11:53 -08:00
from urllib.parse import urlparse
2025-12-11 19:04:02 -08:00
from SYS.logger import log
2025-12-19 02:29:42 -08:00
from ProviderCore.base import Provider, SearchProvider, FileProvider, SearchResult
2025-12-16 01:45:01 -08:00
from Provider.alldebrid import AllDebrid
2025-12-11 19:04:02 -08:00
from Provider.bandcamp import Bandcamp
from Provider.libgen import Libgen
from Provider.matrix import Matrix
2025-12-12 21:55:38 -08:00
from Provider.openlibrary import OpenLibrary
2025-12-11 19:04:02 -08:00
from Provider.soulseek import Soulseek, download_soulseek_file
2025-12-19 02:29:42 -08:00
from Provider.telegram import Telegram
2025-12-11 19:04:02 -08:00
from Provider.youtube import YouTube
2025-12-19 15:20:08 -08:00
from Provider.fileio import FileIO
2025-12-11 19:04:02 -08:00
from Provider.zeroxzero import ZeroXZero
2025-12-25 16:02:46 -08:00
from Provider.loc import LOC
2025-12-26 21:04:09 -08:00
from Provider.internetarchive import InternetArchive
2025-12-30 04:47:13 -08:00
from Provider.podcastindex import PodcastIndex
2025-12-31 05:17:37 -08:00
from Provider.HIFI import HIFI
2025-12-11 19:04:02 -08:00
_PROVIDERS: Dict[str,
Type[Provider]] = {
# Search-capable providers
"alldebrid": AllDebrid,
"libgen": Libgen,
"openlibrary": OpenLibrary,
"internetarchive": InternetArchive,
2025-12-31 05:17:37 -08:00
"hifi": HIFI,
"soulseek": Soulseek,
"bandcamp": Bandcamp,
"youtube": YouTube,
"telegram": Telegram,
"loc": LOC,
2025-12-30 04:47:13 -08:00
"podcastindex": PodcastIndex,
# Upload-capable providers
"0x0": ZeroXZero,
"file.io": FileIO,
"matrix": Matrix,
}
2025-12-11 19:04:02 -08:00
2025-12-21 05:10:09 -08:00
def is_known_provider_name(name: str) -> bool:
"""Return True if `name` matches a registered provider key.
This is intentionally cheap (no imports/instantiation) so callers can
probe UI strings (table names, store names, etc.) without triggering
noisy 'Unknown provider' logs.
"""
return (name or "").strip().lower() in _PROVIDERS
2025-12-19 02:29:42 -08:00
def _supports_search(provider: Provider) -> bool:
return provider.__class__.search is not Provider.search
def _supports_upload(provider: Provider) -> bool:
return provider.__class__.upload is not Provider.upload
2025-12-11 19:04:02 -08:00
2025-12-19 02:29:42 -08:00
def get_provider(name: str,
config: Optional[Dict[str,
Any]] = None) -> Optional[Provider]:
2025-12-19 02:29:42 -08:00
"""Get a provider by name (unified registry)."""
provider_class = _PROVIDERS.get((name or "").lower())
2025-12-11 19:04:02 -08:00
if provider_class is None:
2025-12-19 02:29:42 -08:00
log(f"[provider] Unknown provider: {name}", file=sys.stderr)
2025-12-11 19:04:02 -08:00
return None
try:
provider = provider_class(config)
if not provider.validate():
log(f"[provider] Provider '{name}' is not available", file=sys.stderr)
return None
return provider
except Exception as exc:
log(f"[provider] Error initializing '{name}': {exc}", file=sys.stderr)
return None
2025-12-19 02:29:42 -08:00
def list_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
"""List all providers and their availability."""
2025-12-11 19:04:02 -08:00
availability: Dict[str,
bool] = {}
2025-12-19 02:29:42 -08:00
for name, provider_class in _PROVIDERS.items():
2025-12-11 19:04:02 -08:00
try:
provider = provider_class(config)
availability[name] = provider.validate()
except Exception:
availability[name] = False
return availability
def get_search_provider(name: str,
config: Optional[Dict[str,
Any]] = None) -> Optional[SearchProvider]:
2025-12-19 02:29:42 -08:00
"""Get a search-capable provider by name (compat API)."""
provider = get_provider(name, config)
if provider is None:
return None
if not _supports_search(provider):
log(f"[provider] Provider '{name}' does not support search", file=sys.stderr)
return None
return provider # type: ignore[return-value]
def list_search_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
"""List all search providers and their availability."""
availability: Dict[str,
bool] = {}
2025-12-19 02:29:42 -08:00
for name, provider_class in _PROVIDERS.items():
try:
provider = provider_class(config)
availability[name] = bool(
provider.validate() and _supports_search(provider)
)
2025-12-19 02:29:42 -08:00
except Exception:
availability[name] = False
return availability
2025-12-11 19:04:02 -08:00
def get_file_provider(name: str,
config: Optional[Dict[str,
Any]] = None) -> Optional[FileProvider]:
2025-12-19 02:29:42 -08:00
"""Get an upload-capable provider by name (compat API)."""
2025-12-11 19:04:02 -08:00
2025-12-19 02:29:42 -08:00
provider = get_provider(name, config)
if provider is None:
2025-12-11 19:04:02 -08:00
return None
2025-12-19 02:29:42 -08:00
if not _supports_upload(provider):
log(f"[provider] Provider '{name}' does not support upload", file=sys.stderr)
2025-12-11 19:04:02 -08:00
return None
2025-12-19 02:29:42 -08:00
return provider # type: ignore[return-value]
2025-12-11 19:04:02 -08:00
def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
"""List all file providers and their availability."""
availability: Dict[str,
bool] = {}
2025-12-19 02:29:42 -08:00
for name, provider_class in _PROVIDERS.items():
2025-12-11 19:04:02 -08:00
try:
provider = provider_class(config)
availability[name] = bool(
provider.validate() and _supports_upload(provider)
)
2025-12-11 19:04:02 -08:00
except Exception:
availability[name] = False
return availability
2025-12-22 02:11:53 -08:00
def match_provider_name_for_url(url: str) -> Optional[str]:
"""Return a registered provider name that claims the URL's domain.
Providers can declare domains via a class attribute `URL_DOMAINS` (sequence of strings).
This matcher is intentionally cheap (no provider instantiation, no network).
"""
try:
parsed = urlparse(str(url))
host = (parsed.hostname or "").strip().lower()
2025-12-27 03:13:16 -08:00
path = (parsed.path or "").strip()
2025-12-22 02:11:53 -08:00
except Exception:
host = ""
2025-12-27 03:13:16 -08:00
path = ""
2025-12-22 02:11:53 -08:00
if not host:
return None
2025-12-27 03:13:16 -08:00
# Prefer Internet Archive for archive.org links unless the URL clearly refers
# to a borrow/loan flow (handled by OpenLibrary provider).
#
# This keeps direct downloads and item pages routed to `internetarchive`, while
# preserving OpenLibrary's scripted borrow pipeline for loan/reader URLs.
if host == "openlibrary.org" or host.endswith(".openlibrary.org"):
return "openlibrary" if "openlibrary" in _PROVIDERS else None
if host == "archive.org" or host.endswith(".archive.org"):
low_path = str(path or "").lower()
is_borrowish = (
low_path.startswith("/borrow/") or low_path.startswith("/stream/")
or low_path.startswith("/services/loans/") or "/services/loans/" in low_path
2025-12-27 03:13:16 -08:00
)
if is_borrowish:
return "openlibrary" if "openlibrary" in _PROVIDERS else None
return "internetarchive" if "internetarchive" in _PROVIDERS else None
2025-12-22 02:11:53 -08:00
for name, provider_class in _PROVIDERS.items():
domains = getattr(provider_class, "URL_DOMAINS", None)
if not isinstance(domains, (list, tuple)):
continue
for d in domains:
dom = str(d or "").strip().lower()
if not dom:
continue
if host == dom or host.endswith("." + dom):
return name
return None
def get_provider_for_url(url: str,
config: Optional[Dict[str,
Any]] = None) -> Optional[Provider]:
2025-12-22 02:11:53 -08:00
"""Instantiate and return the matching provider for a URL, if any."""
name = match_provider_name_for_url(url)
if not name:
return None
return get_provider(name, config)
2025-12-11 19:04:02 -08:00
__all__ = [
"SearchResult",
2025-12-19 02:29:42 -08:00
"Provider",
2025-12-11 19:04:02 -08:00
"SearchProvider",
"FileProvider",
2025-12-19 02:29:42 -08:00
"get_provider",
"list_providers",
2025-12-11 19:04:02 -08:00
"get_search_provider",
"list_search_providers",
"get_file_provider",
"list_file_providers",
2025-12-22 02:11:53 -08:00
"match_provider_name_for_url",
"get_provider_for_url",
2025-12-11 19:04:02 -08:00
"download_soulseek_file",
]