j
This commit is contained in:
@@ -68,6 +68,13 @@ def _supports_upload(provider: Provider) -> bool:
|
||||
return provider.__class__.upload is not Provider.upload
|
||||
|
||||
|
||||
def _provider_url_patterns(provider_class: Type[Provider]) -> Sequence[str]:
|
||||
try:
|
||||
return list(provider_class.url_patterns())
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
|
||||
def get_provider(name: str,
|
||||
config: Optional[Dict[str,
|
||||
Any]] = None) -> Optional[Provider]:
|
||||
@@ -166,47 +173,53 @@ def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bo
|
||||
def match_provider_name_for_url(url: str) -> Optional[str]:
|
||||
"""Return a registered provider name that claims the URL's domain.
|
||||
|
||||
Providers can declare domains via a class attribute `URL_DOMAINS` (sequence of strings).
|
||||
Providers can declare domains via class attribute `URL` (preferred) or `URL_DOMAINS`.
|
||||
This matcher is intentionally cheap (no provider instantiation, no network).
|
||||
"""
|
||||
|
||||
raw_url = str(url or "").strip()
|
||||
raw_url_lower = raw_url.lower()
|
||||
try:
|
||||
parsed = urlparse(str(url))
|
||||
parsed = urlparse(raw_url)
|
||||
host = (parsed.hostname or "").strip().lower()
|
||||
path = (parsed.path or "").strip()
|
||||
except Exception:
|
||||
host = ""
|
||||
path = ""
|
||||
|
||||
if not host:
|
||||
return None
|
||||
|
||||
# Prefer Internet Archive for archive.org links unless the URL clearly refers
|
||||
# to a borrow/loan flow (handled by OpenLibrary provider).
|
||||
#
|
||||
# This keeps direct downloads and item pages routed to `internetarchive`, while
|
||||
# preserving OpenLibrary's scripted borrow pipeline for loan/reader URLs.
|
||||
if host == "openlibrary.org" or host.endswith(".openlibrary.org"):
|
||||
return "openlibrary" if "openlibrary" in _PROVIDERS else None
|
||||
|
||||
if host == "archive.org" or host.endswith(".archive.org"):
|
||||
low_path = str(path or "").lower()
|
||||
is_borrowish = (
|
||||
low_path.startswith("/borrow/") or low_path.startswith("/stream/")
|
||||
or low_path.startswith("/services/loans/") or "/services/loans/" in low_path
|
||||
)
|
||||
if is_borrowish:
|
||||
if host:
|
||||
if host == "openlibrary.org" or host.endswith(".openlibrary.org"):
|
||||
return "openlibrary" if "openlibrary" in _PROVIDERS else None
|
||||
return "internetarchive" if "internetarchive" in _PROVIDERS else None
|
||||
|
||||
if host == "archive.org" or host.endswith(".archive.org"):
|
||||
low_path = str(path or "").lower()
|
||||
is_borrowish = (
|
||||
low_path.startswith("/borrow/") or low_path.startswith("/stream/")
|
||||
or low_path.startswith("/services/loans/") or "/services/loans/" in low_path
|
||||
)
|
||||
if is_borrowish:
|
||||
return "openlibrary" if "openlibrary" in _PROVIDERS else None
|
||||
return "internetarchive" if "internetarchive" in _PROVIDERS else None
|
||||
|
||||
for name, provider_class in _PROVIDERS.items():
|
||||
domains = getattr(provider_class, "URL_DOMAINS", None)
|
||||
if not isinstance(domains, (list, tuple)):
|
||||
domains = _provider_url_patterns(provider_class)
|
||||
if not domains:
|
||||
continue
|
||||
for d in domains:
|
||||
dom = str(d or "").strip().lower()
|
||||
if not dom:
|
||||
continue
|
||||
if raw_url_lower.startswith(dom):
|
||||
return name
|
||||
for d in domains:
|
||||
dom = str(d or "").strip().lower()
|
||||
if not dom or not host:
|
||||
continue
|
||||
if host == dom or host.endswith("." + dom):
|
||||
return name
|
||||
|
||||
|
||||
Reference in New Issue
Block a user