df
This commit is contained in:
@@ -26,8 +26,7 @@ class SearchResult:
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""Convert to dictionary for pipeline processing."""
|
||||
|
||||
return {
|
||||
out = {
|
||||
"table": self.table,
|
||||
"title": self.title,
|
||||
"path": self.path,
|
||||
@@ -40,6 +39,15 @@ class SearchResult:
|
||||
"full_metadata": self.full_metadata,
|
||||
}
|
||||
|
||||
try:
|
||||
url_value = getattr(self, "url", None)
|
||||
if url_value is not None:
|
||||
out["url"] = url_value
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class Provider(ABC):
|
||||
"""Unified provider base class.
|
||||
|
||||
@@ -1,75 +1,238 @@
|
||||
"""Provider registry.
|
||||
|
||||
Concrete provider implementations live in the `Provider/` package.
|
||||
This module is the single source of truth for provider discovery.
|
||||
Concrete provider implementations live in the ``Provider`` package. This module
|
||||
is the single source of truth for discovery, metadata, and lifecycle helpers
|
||||
for those plugins.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional, Sequence, Type
|
||||
import importlib
|
||||
import pkgutil
|
||||
import sys
|
||||
from dataclasses import dataclass, field
|
||||
from types import ModuleType
|
||||
from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple, Type
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
from ProviderCore.base import Provider, SearchProvider, FileProvider, SearchResult
|
||||
from Provider.alldebrid import AllDebrid
|
||||
from Provider.bandcamp import Bandcamp
|
||||
from Provider.libgen import Libgen
|
||||
from Provider.matrix import Matrix
|
||||
from Provider.openlibrary import OpenLibrary
|
||||
from Provider.soulseek import Soulseek, download_soulseek_file
|
||||
from Provider.telegram import Telegram
|
||||
from Provider.youtube import YouTube
|
||||
from Provider.fileio import FileIO
|
||||
from Provider.zeroxzero import ZeroXZero
|
||||
from Provider.loc import LOC
|
||||
from Provider.internetarchive import InternetArchive
|
||||
from Provider.podcastindex import PodcastIndex
|
||||
from Provider.HIFI import HIFI
|
||||
from ProviderCore.base import FileProvider, Provider, SearchProvider, SearchResult
|
||||
from Provider.soulseek import download_soulseek_file
|
||||
|
||||
_PROVIDERS: Dict[str,
|
||||
Type[Provider]] = {
|
||||
# Search-capable providers
|
||||
"alldebrid": AllDebrid,
|
||||
"libgen": Libgen,
|
||||
"openlibrary": OpenLibrary,
|
||||
"internetarchive": InternetArchive,
|
||||
"hifi": HIFI,
|
||||
"soulseek": Soulseek,
|
||||
"bandcamp": Bandcamp,
|
||||
"youtube": YouTube,
|
||||
"telegram": Telegram,
|
||||
"loc": LOC,
|
||||
"podcastindex": PodcastIndex,
|
||||
# Upload-capable providers
|
||||
"0x0": ZeroXZero,
|
||||
"file.io": FileIO,
|
||||
"matrix": Matrix,
|
||||
}
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ProviderInfo:
|
||||
"""Metadata about a single provider entry."""
|
||||
|
||||
canonical_name: str
|
||||
provider_class: Type[Provider]
|
||||
module: str
|
||||
alias_names: Tuple[str, ...] = field(default_factory=tuple)
|
||||
|
||||
@property
|
||||
def supports_search(self) -> bool:
|
||||
return self.provider_class.search is not Provider.search
|
||||
|
||||
@property
|
||||
def supports_upload(self) -> bool:
|
||||
return self.provider_class.upload is not Provider.upload
|
||||
|
||||
|
||||
class ProviderRegistry:
|
||||
"""Handles discovery, registration, and lookup of provider classes."""
|
||||
|
||||
def __init__(self, package_name: str) -> None:
|
||||
self.package_name = (package_name or "").strip()
|
||||
self._infos: Dict[str, ProviderInfo] = {}
|
||||
self._lookup: Dict[str, ProviderInfo] = {}
|
||||
self._modules: set[str] = set()
|
||||
self._discovered = False
|
||||
|
||||
def _normalize(self, value: Any) -> str:
|
||||
return str(value or "").strip().lower()
|
||||
|
||||
def _candidate_names(self,
|
||||
provider_class: Type[Provider],
|
||||
override_name: Optional[str]) -> List[str]:
|
||||
names: List[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
def _add(value: Any) -> None:
|
||||
text = str(value or "").strip()
|
||||
normalized = text.lower()
|
||||
if not text or normalized in seen:
|
||||
return
|
||||
seen.add(normalized)
|
||||
names.append(text)
|
||||
|
||||
if override_name:
|
||||
_add(override_name)
|
||||
else:
|
||||
_add(getattr(provider_class, "PROVIDER_NAME", None))
|
||||
_add(getattr(provider_class, "NAME", None))
|
||||
|
||||
_add(getattr(provider_class, "__name__", None))
|
||||
|
||||
for alias in getattr(provider_class, "PROVIDER_ALIASES", ()) or ():
|
||||
_add(alias)
|
||||
|
||||
return names
|
||||
|
||||
def register(
|
||||
self,
|
||||
provider_class: Type[Provider],
|
||||
*,
|
||||
override_name: Optional[str] = None,
|
||||
extra_aliases: Optional[Sequence[str]] = None,
|
||||
module_name: Optional[str] = None,
|
||||
replace: bool = False,
|
||||
) -> ProviderInfo:
|
||||
"""Register a provider class with canonical and alias names."""
|
||||
|
||||
candidates = self._candidate_names(provider_class, override_name)
|
||||
if not candidates:
|
||||
raise ValueError("provider name candidates are required")
|
||||
|
||||
canonical = self._normalize(candidates[0])
|
||||
if not canonical:
|
||||
raise ValueError("provider name must not be empty")
|
||||
|
||||
alias_names: List[str] = []
|
||||
alias_seen: set[str] = set()
|
||||
|
||||
for candidate in candidates[1:]:
|
||||
normalized = self._normalize(candidate)
|
||||
if not normalized or normalized == canonical or normalized in alias_seen:
|
||||
continue
|
||||
alias_seen.add(normalized)
|
||||
alias_names.append(normalized)
|
||||
|
||||
for alias in extra_aliases or ():
|
||||
normalized = self._normalize(alias)
|
||||
if not normalized or normalized == canonical or normalized in alias_seen:
|
||||
continue
|
||||
alias_seen.add(normalized)
|
||||
alias_names.append(normalized)
|
||||
|
||||
info = ProviderInfo(
|
||||
canonical_name=canonical,
|
||||
provider_class=provider_class,
|
||||
module=module_name or getattr(provider_class, "__module__", "") or "",
|
||||
alias_names=tuple(alias_names),
|
||||
)
|
||||
|
||||
existing = self._infos.get(canonical)
|
||||
if existing is not None and not replace:
|
||||
return existing
|
||||
|
||||
self._infos[canonical] = info
|
||||
for lookup in (canonical,) + tuple(alias_names):
|
||||
self._lookup[lookup] = info
|
||||
return info
|
||||
|
||||
def _register_module(self, module: ModuleType) -> None:
|
||||
module_name = getattr(module, "__name__", "")
|
||||
if not module_name or module_name in self._modules:
|
||||
return
|
||||
self._modules.add(module_name)
|
||||
|
||||
for attr in dir(module):
|
||||
candidate = getattr(module, attr)
|
||||
if not isinstance(candidate, type):
|
||||
continue
|
||||
if not issubclass(candidate, Provider):
|
||||
continue
|
||||
if candidate in {Provider, SearchProvider, FileProvider}:
|
||||
continue
|
||||
if getattr(candidate, "__module__", "") != module_name:
|
||||
continue
|
||||
try:
|
||||
self.register(candidate, module_name=module_name)
|
||||
except Exception as exc:
|
||||
log(f"[provider] Failed to register {module_name}.{candidate.__name__}: {exc}", file=sys.stderr)
|
||||
|
||||
def discover(self) -> None:
|
||||
"""Import and register providers from the package."""
|
||||
|
||||
if self._discovered or not self.package_name:
|
||||
return
|
||||
self._discovered = True
|
||||
|
||||
try:
|
||||
package = importlib.import_module(self.package_name)
|
||||
except Exception as exc:
|
||||
log(f"[provider] Failed to import package {self.package_name}: {exc}", file=sys.stderr)
|
||||
return
|
||||
|
||||
self._register_module(package)
|
||||
package_path = getattr(package, "__path__", None)
|
||||
if not package_path:
|
||||
return
|
||||
|
||||
for finder, module_name, _ in pkgutil.iter_modules(package_path):
|
||||
if module_name.startswith("_"):
|
||||
continue
|
||||
module_path = f"{self.package_name}.{module_name}"
|
||||
try:
|
||||
module = importlib.import_module(module_path)
|
||||
except Exception as exc:
|
||||
log(f"[provider] Failed to load {module_path}: {exc}", file=sys.stderr)
|
||||
continue
|
||||
self._register_module(module)
|
||||
|
||||
def get(self, name: str) -> Optional[ProviderInfo]:
|
||||
self.discover()
|
||||
if not name:
|
||||
return None
|
||||
return self._lookup.get(self._normalize(name))
|
||||
|
||||
def iter_providers(self) -> Iterable[ProviderInfo]:
|
||||
self.discover()
|
||||
return tuple(self._infos.values())
|
||||
|
||||
def has_name(self, name: str) -> bool:
|
||||
return self.get(name) is not None
|
||||
|
||||
|
||||
REGISTRY = ProviderRegistry("Provider")
|
||||
REGISTRY.discover()
|
||||
|
||||
|
||||
def register_provider(
|
||||
provider_class: Type[Provider],
|
||||
*,
|
||||
name: Optional[str] = None,
|
||||
aliases: Optional[Sequence[str]] = None,
|
||||
module_name: Optional[str] = None,
|
||||
replace: bool = False,
|
||||
) -> ProviderInfo:
|
||||
"""Register a provider class from tests or third-party packages."""
|
||||
|
||||
return REGISTRY.register(
|
||||
provider_class,
|
||||
override_name=name,
|
||||
extra_aliases=aliases,
|
||||
module_name=module_name,
|
||||
replace=replace,
|
||||
)
|
||||
|
||||
|
||||
def get_provider_class(name: str) -> Optional[Type[Provider]]:
|
||||
"""Return the provider class for a registered provider name, if any."""
|
||||
key = str(name or "").strip().lower()
|
||||
return _PROVIDERS.get(key)
|
||||
info = REGISTRY.get(name)
|
||||
if info is None:
|
||||
return None
|
||||
return info.provider_class
|
||||
|
||||
|
||||
def selection_auto_stage_for_table(
|
||||
table_type: str,
|
||||
stage_args: Optional[Sequence[str]] = None,
|
||||
) -> Optional[list[str]]:
|
||||
"""Return the provider-suggested stage to auto-run for a selected table.
|
||||
|
||||
This is used by the CLI to avoid hardcoding table names and behaviors.
|
||||
"""
|
||||
t = str(table_type or "").strip().lower()
|
||||
if not t:
|
||||
return None
|
||||
|
||||
# Provider tables are usually either:
|
||||
# - "youtube" (no dot)
|
||||
# - "hifi.tracks" (prefix = provider name)
|
||||
provider_key = t.split(".", 1)[0] if "." in t else t
|
||||
provider_class = get_provider_class(provider_key) or get_provider_class(t)
|
||||
if provider_class is None:
|
||||
@@ -82,14 +245,7 @@ def selection_auto_stage_for_table(
|
||||
|
||||
|
||||
def is_known_provider_name(name: str) -> bool:
|
||||
"""Return True if `name` matches a registered provider key.
|
||||
|
||||
This is intentionally cheap (no imports/instantiation) so callers can
|
||||
probe UI strings (table names, store names, etc.) without triggering
|
||||
noisy 'Unknown provider' logs.
|
||||
"""
|
||||
|
||||
return (name or "").strip().lower() in _PROVIDERS
|
||||
return REGISTRY.has_name(name)
|
||||
|
||||
|
||||
def _supports_search(provider: Provider) -> bool:
|
||||
@@ -107,18 +263,14 @@ def _provider_url_patterns(provider_class: Type[Provider]) -> Sequence[str]:
|
||||
return []
|
||||
|
||||
|
||||
def get_provider(name: str,
|
||||
config: Optional[Dict[str,
|
||||
Any]] = None) -> Optional[Provider]:
|
||||
"""Get a provider by name (unified registry)."""
|
||||
|
||||
provider_class = _PROVIDERS.get((name or "").lower())
|
||||
if provider_class is None:
|
||||
def get_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[Provider]:
|
||||
info = REGISTRY.get(name)
|
||||
if info is None:
|
||||
log(f"[provider] Unknown provider: {name}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
try:
|
||||
provider = provider_class(config)
|
||||
provider = info.provider_class(config)
|
||||
if not provider.validate():
|
||||
log(f"[provider] Provider '{name}' is not available", file=sys.stderr)
|
||||
return None
|
||||
@@ -129,24 +281,18 @@ def get_provider(name: str,
|
||||
|
||||
|
||||
def list_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
|
||||
"""List all providers and their availability."""
|
||||
|
||||
availability: Dict[str,
|
||||
bool] = {}
|
||||
for name, provider_class in _PROVIDERS.items():
|
||||
availability: Dict[str, bool] = {}
|
||||
for info in REGISTRY.iter_providers():
|
||||
try:
|
||||
provider = provider_class(config)
|
||||
availability[name] = provider.validate()
|
||||
provider = info.provider_class(config)
|
||||
availability[info.canonical_name] = provider.validate()
|
||||
except Exception:
|
||||
availability[name] = False
|
||||
availability[info.canonical_name] = False
|
||||
return availability
|
||||
|
||||
|
||||
def get_search_provider(name: str,
|
||||
config: Optional[Dict[str,
|
||||
Any]] = None) -> Optional[SearchProvider]:
|
||||
"""Get a search-capable provider by name (compat API)."""
|
||||
|
||||
config: Optional[Dict[str, Any]] = None) -> Optional[SearchProvider]:
|
||||
provider = get_provider(name, config)
|
||||
if provider is None:
|
||||
return None
|
||||
@@ -157,26 +303,20 @@ def get_search_provider(name: str,
|
||||
|
||||
|
||||
def list_search_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
|
||||
"""List all search providers and their availability."""
|
||||
|
||||
availability: Dict[str,
|
||||
bool] = {}
|
||||
for name, provider_class in _PROVIDERS.items():
|
||||
availability: Dict[str, bool] = {}
|
||||
for info in REGISTRY.iter_providers():
|
||||
try:
|
||||
provider = provider_class(config)
|
||||
availability[name] = bool(
|
||||
provider.validate() and _supports_search(provider)
|
||||
provider = info.provider_class(config)
|
||||
availability[info.canonical_name] = bool(
|
||||
provider.validate() and info.supports_search
|
||||
)
|
||||
except Exception:
|
||||
availability[name] = False
|
||||
availability[info.canonical_name] = False
|
||||
return availability
|
||||
|
||||
|
||||
def get_file_provider(name: str,
|
||||
config: Optional[Dict[str,
|
||||
Any]] = None) -> Optional[FileProvider]:
|
||||
"""Get an upload-capable provider by name (compat API)."""
|
||||
|
||||
config: Optional[Dict[str, Any]] = None) -> Optional[FileProvider]:
|
||||
provider = get_provider(name, config)
|
||||
if provider is None:
|
||||
return None
|
||||
@@ -187,28 +327,19 @@ def get_file_provider(name: str,
|
||||
|
||||
|
||||
def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
|
||||
"""List all file providers and their availability."""
|
||||
|
||||
availability: Dict[str,
|
||||
bool] = {}
|
||||
for name, provider_class in _PROVIDERS.items():
|
||||
availability: Dict[str, bool] = {}
|
||||
for info in REGISTRY.iter_providers():
|
||||
try:
|
||||
provider = provider_class(config)
|
||||
availability[name] = bool(
|
||||
provider.validate() and _supports_upload(provider)
|
||||
provider = info.provider_class(config)
|
||||
availability[info.canonical_name] = bool(
|
||||
provider.validate() and info.supports_upload
|
||||
)
|
||||
except Exception:
|
||||
availability[name] = False
|
||||
availability[info.canonical_name] = False
|
||||
return availability
|
||||
|
||||
|
||||
def match_provider_name_for_url(url: str) -> Optional[str]:
|
||||
"""Return a registered provider name that claims the URL's domain.
|
||||
|
||||
Providers can declare domains via class attribute `URL` (preferred) or `URL_DOMAINS`.
|
||||
This matcher is intentionally cheap (no provider instantiation, no network).
|
||||
"""
|
||||
|
||||
raw_url = str(url or "").strip()
|
||||
raw_url_lower = raw_url.lower()
|
||||
try:
|
||||
@@ -219,11 +350,6 @@ def match_provider_name_for_url(url: str) -> Optional[str]:
|
||||
host = ""
|
||||
path = ""
|
||||
|
||||
# Prefer Internet Archive for archive.org links unless the URL clearly refers
|
||||
# to a borrow/loan flow (handled by OpenLibrary provider).
|
||||
#
|
||||
# This keeps direct downloads and item pages routed to `internetarchive`, while
|
||||
# preserving OpenLibrary's scripted borrow pipeline for loan/reader URLs.
|
||||
def _norm_host(h: str) -> str:
|
||||
h_norm = str(h or "").strip().lower()
|
||||
if h_norm.startswith("www."):
|
||||
@@ -234,47 +360,45 @@ def match_provider_name_for_url(url: str) -> Optional[str]:
|
||||
|
||||
if host_norm:
|
||||
if host_norm == "openlibrary.org" or host_norm.endswith(".openlibrary.org"):
|
||||
return "openlibrary" if "openlibrary" in _PROVIDERS else None
|
||||
return "openlibrary" if REGISTRY.has_name("openlibrary") else None
|
||||
|
||||
if host_norm == "archive.org" or host_norm.endswith(".archive.org"):
|
||||
low_path = str(path or "").lower()
|
||||
is_borrowish = (
|
||||
low_path.startswith("/borrow/") or low_path.startswith("/stream/")
|
||||
or low_path.startswith("/services/loans/") or "/services/loans/" in low_path
|
||||
low_path.startswith("/borrow/")
|
||||
or low_path.startswith("/stream/")
|
||||
or low_path.startswith("/services/loans/")
|
||||
or "/services/loans/" in low_path
|
||||
)
|
||||
if is_borrowish:
|
||||
return "openlibrary" if "openlibrary" in _PROVIDERS else None
|
||||
return "internetarchive" if "internetarchive" in _PROVIDERS else None
|
||||
return "openlibrary" if REGISTRY.has_name("openlibrary") else None
|
||||
return "internetarchive" if REGISTRY.has_name("internetarchive") else None
|
||||
|
||||
for name, provider_class in _PROVIDERS.items():
|
||||
domains = _provider_url_patterns(provider_class)
|
||||
for info in REGISTRY.iter_providers():
|
||||
domains = _provider_url_patterns(info.provider_class)
|
||||
if not domains:
|
||||
continue
|
||||
for d in domains:
|
||||
dom_raw = str(d or "").strip()
|
||||
for domain in domains:
|
||||
dom_raw = str(domain or "").strip()
|
||||
dom = dom_raw.lower()
|
||||
if not dom:
|
||||
continue
|
||||
# Scheme-like patterns (magnet:, http://example) still use prefix match.
|
||||
if dom.startswith("magnet:") or dom.startswith("http://") or dom.startswith("https://"):
|
||||
if raw_url_lower.startswith(dom):
|
||||
return name
|
||||
return info.canonical_name
|
||||
continue
|
||||
|
||||
dom_norm = _norm_host(dom)
|
||||
if not dom_norm or not host_norm:
|
||||
continue
|
||||
if host_norm == dom_norm or host_norm.endswith("." + dom_norm):
|
||||
return name
|
||||
return info.canonical_name
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_provider_for_url(url: str,
|
||||
config: Optional[Dict[str,
|
||||
Any]] = None) -> Optional[Provider]:
|
||||
"""Instantiate and return the matching provider for a URL, if any."""
|
||||
|
||||
config: Optional[Dict[str, Any]] = None) -> Optional[Provider]:
|
||||
name = match_provider_name_for_url(url)
|
||||
if not name:
|
||||
return None
|
||||
@@ -282,10 +406,12 @@ def get_provider_for_url(url: str,
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SearchResult",
|
||||
"ProviderInfo",
|
||||
"Provider",
|
||||
"SearchProvider",
|
||||
"FileProvider",
|
||||
"SearchResult",
|
||||
"register_provider",
|
||||
"get_provider",
|
||||
"list_providers",
|
||||
"get_search_provider",
|
||||
@@ -294,7 +420,7 @@ __all__ = [
|
||||
"list_file_providers",
|
||||
"match_provider_name_for_url",
|
||||
"get_provider_for_url",
|
||||
"download_soulseek_file",
|
||||
"get_provider_class",
|
||||
"selection_auto_stage_for_table",
|
||||
"download_soulseek_file",
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user