This commit is contained in:
2026-01-04 02:23:50 -08:00
parent 3acf21a673
commit 8545367e28
6 changed files with 2925 additions and 94 deletions

View File

@@ -224,11 +224,19 @@ def match_provider_name_for_url(url: str) -> Optional[str]:
#
# This keeps direct downloads and item pages routed to `internetarchive`, while
# preserving OpenLibrary's scripted borrow pipeline for loan/reader URLs.
if host:
if host == "openlibrary.org" or host.endswith(".openlibrary.org"):
def _norm_host(h: str) -> str:
h_norm = str(h or "").strip().lower()
if h_norm.startswith("www."):
h_norm = h_norm[4:]
return h_norm
host_norm = _norm_host(host)
if host_norm:
if host_norm == "openlibrary.org" or host_norm.endswith(".openlibrary.org"):
return "openlibrary" if "openlibrary" in _PROVIDERS else None
if host == "archive.org" or host.endswith(".archive.org"):
if host_norm == "archive.org" or host_norm.endswith(".archive.org"):
low_path = str(path or "").lower()
is_borrowish = (
low_path.startswith("/borrow/") or low_path.startswith("/stream/")
@@ -243,16 +251,20 @@ def match_provider_name_for_url(url: str) -> Optional[str]:
if not domains:
continue
for d in domains:
dom = str(d or "").strip().lower()
dom_raw = str(d or "").strip()
dom = dom_raw.lower()
if not dom:
continue
if raw_url_lower.startswith(dom):
return name
for d in domains:
dom = str(d or "").strip().lower()
if not dom or not host:
# Scheme-like patterns (magnet:, http://example) still use prefix match.
if dom.startswith("magnet:") or dom.startswith("http://") or dom.startswith("https://"):
if raw_url_lower.startswith(dom):
return name
continue
if host == dom or host.endswith("." + dom):
dom_norm = _norm_host(dom)
if not dom_norm or not host_norm:
continue
if host_norm == dom_norm or host_norm.endswith("." + dom_norm):
return name
return None