This commit is contained in:
2026-01-03 03:37:48 -08:00
parent 6e9a0c28ff
commit 73f3005393
23 changed files with 1791 additions and 442 deletions

View File

@@ -214,10 +214,15 @@ def _archive_id_from_url(url: str) -> str:
# - /details/<id>/...
# - /borrow/<id>
# - /download/<id>/...
if len(parts) >= 2 and parts[0].lower() in {"details",
"borrow",
"download",
"stream"}:
# - /stream/<id>/...
# - /metadata/<id>
if len(parts) >= 2 and parts[0].lower() in {
"details",
"borrow",
"download",
"stream",
"metadata",
}:
return str(parts[1]).strip()
# Sometimes the identifier is the first segment.
@@ -225,37 +230,38 @@ def _archive_id_from_url(url: str) -> str:
first = str(parts[0]).strip()
if first and first.lower() not in {"account",
"services",
"metadata",
"search",
"advancedsearch.php"}:
return first
def edition_id_from_url(u: str) -> str:
"""Extract an OpenLibrary edition id (OL...M) from a book URL."""
try:
p = urlparse(str(u))
parts = [x for x in (p.path or "").split("/") if x]
except Exception:
parts = []
if len(parts) >= 2 and str(parts[0]).lower() == "books":
return str(parts[1]).strip()
return ""
def title_hint_from_url_slug(u: str) -> str:
"""Derive a human-friendly title hint from the URL slug."""
try:
p = urlparse(str(u))
parts = [x for x in (p.path or "").split("/") if x]
slug = parts[-1] if parts else ""
except Exception:
slug = ""
slug = (slug or "").strip().replace("_", " ")
return slug or "OpenLibrary"
return ""
def edition_id_from_url(u: str) -> str:
"""Extract an OpenLibrary edition id (OL...M) from a book URL."""
try:
p = urlparse(str(u))
parts = [x for x in (p.path or "").split("/") if x]
except Exception:
parts = []
if len(parts) >= 2 and str(parts[0]).lower() == "books":
return str(parts[1]).strip()
return ""
def title_hint_from_url_slug(u: str) -> str:
"""Derive a human-friendly title hint from the URL slug."""
try:
p = urlparse(str(u))
parts = [x for x in (p.path or "").split("/") if x]
slug = parts[-1] if parts else ""
except Exception:
slug = ""
slug = (slug or "").strip().replace("_", " ")
return slug or "OpenLibrary"
def _coerce_archive_field_list(value: Any) -> List[str]:
"""Coerce an Archive.org metadata field to a list of strings."""
if value is None:
@@ -433,6 +439,22 @@ def _fetch_archive_item_metadata(archive_id: str,
class OpenLibrary(Provider):
TABLE_AUTO_STAGES = {
"openlibrary": ["download-file"],
}
REQUIRED_CONFIG_KEYS = (
"email",
"password",
)
DEFAULT_ARCHIVE_SCALE = 4
QUALITY_TO_ARCHIVE_SCALE = {
"high": 2,
"medium": 5,
"low": 8,
}
# Domains that should be routed to this provider when the user supplies a URL.
# (Used by ProviderCore.registry.match_provider_name_for_url)
URL_DOMAINS = (
@@ -449,6 +471,41 @@ class OpenLibrary(Provider):
class BookNotAvailableError(Exception):
"""Raised when a book is not available for borrowing (waitlisted/in use)."""
def search_result_from_url(self, url: str) -> Optional[SearchResult]:
"""Build a minimal SearchResult from a bare OpenLibrary/Archive URL."""
edition_id = edition_id_from_url(url)
title_hint = title_hint_from_url_slug(url)
return SearchResult(
table="openlibrary",
title=title_hint,
path=str(url),
media_kind="book",
full_metadata={"openlibrary_id": edition_id} if edition_id else {},
)
def download_url(
self,
url: str,
output_dir: Path,
progress_callback: Optional[Callable[[str, int, Optional[int], str], None]] = None,
) -> Optional[Dict[str, Any]]:
"""Download a book directly from an OpenLibrary/Archive URL.
Returns a dict with the downloaded path and SearchResult when successful.
"""
sr = self.search_result_from_url(url)
if sr is None:
return None
downloaded = self.download(sr, output_dir, progress_callback)
if not downloaded:
return None
return {
"path": Path(downloaded),
"search_result": sr,
}
@staticmethod
def _credential_archive(config: Dict[str,
Any]) -> Tuple[Optional[str],
@@ -491,6 +548,57 @@ class OpenLibrary(Provider):
str(password) if password is not None else None
)
@classmethod
def _archive_scale_from_config(cls, config: Dict[str, Any]) -> int:
"""Resolve Archive.org book-reader scale from provider config.
Config:
[provider=OpenLibrary]
quality="medium" # High=2, Medium=5, Low=8
Default when missing/invalid: 4.
"""
default_scale = int(getattr(cls, "DEFAULT_ARCHIVE_SCALE", 4) or 4)
if not isinstance(config, dict):
return default_scale
provider_config = config.get("provider", {})
openlibrary_config = None
if isinstance(provider_config, dict):
openlibrary_config = provider_config.get("openlibrary")
if not isinstance(openlibrary_config, dict):
openlibrary_config = {}
raw_quality = openlibrary_config.get("quality")
if raw_quality is None:
return default_scale
if isinstance(raw_quality, (int, float)):
try:
val = int(raw_quality)
except Exception:
return default_scale
return val if val > 0 else default_scale
try:
q = str(raw_quality).strip().lower()
except Exception:
return default_scale
if not q:
return default_scale
mapped = cls.QUALITY_TO_ARCHIVE_SCALE.get(q)
if isinstance(mapped, int) and mapped > 0:
return mapped
# Allow numeric strings (e.g. quality="4").
try:
val = int(q)
except Exception:
return default_scale
return val if val > 0 else default_scale
@staticmethod
def _archive_error_body(response: requests.Response) -> str:
try:
@@ -1444,64 +1552,6 @@ class OpenLibrary(Provider):
log("[openlibrary] Direct download failed", file=sys.stderr)
return None
# --- Convenience helpers for URL-driven downloads (used by download-file) ---
def search_result_from_url(self, url: str) -> Optional[SearchResult]:
"""Build a minimal SearchResult from a bare OpenLibrary URL."""
edition_id = edition_id_from_url(url)
title_hint = title_hint_from_url_slug(url)
return SearchResult(
table="openlibrary",
title=title_hint,
path=str(url),
media_kind="book",
full_metadata={"openlibrary_id": edition_id} if edition_id else {},
)
def download_url(
self,
url: str,
output_dir: Path,
progress_callback: Optional[Callable[[str, int, Optional[int], str], None]] = None,
) -> Optional[Dict[str, Any]]:
"""Download a book directly from an OpenLibrary URL.
Returns a dict with the downloaded path and SearchResult when successful.
"""
sr = self.search_result_from_url(url)
if sr is None:
return None
downloaded = self.download(sr, output_dir, progress_callback)
if not downloaded:
return None
return {
"path": Path(downloaded),
"search_result": sr,
}
try:
if progress_callback is not None:
progress_callback("step", 0, None, "direct download")
except Exception:
pass
out_path = unique_path(output_dir / f"{safe_title}.pdf")
ok = download_file(
pdf_url,
out_path,
session=self._session,
progress_callback=(
(
lambda downloaded, total, label:
progress_callback("bytes", downloaded, total, label)
) if progress_callback is not None else None
),
)
if ok:
return out_path
log("[openlibrary] Direct download failed", file=sys.stderr)
return None
# 2) Borrow flow (credentials required).
try:
email, password = self._credential_archive(self.config or {})
@@ -1510,6 +1560,15 @@ class OpenLibrary(Provider):
"[openlibrary] Archive credentials missing; cannot borrow",
file=sys.stderr
)
try:
from SYS.rich_display import show_provider_config_panel
show_provider_config_panel(
"openlibrary",
keys=self.required_config_keys(),
)
except Exception:
pass
return None
lendable = True
@@ -1590,7 +1649,7 @@ class OpenLibrary(Provider):
n_threads=10,
directory=temp_dir,
links=links,
scale=3,
scale=self._archive_scale_from_config(self.config or {}),
book_id=archive_id,
progress_callback=(
(