j
This commit is contained in:
@@ -229,6 +229,30 @@ def _archive_id_from_url(url: str) -> str:
|
||||
"advancedsearch.php"}:
|
||||
return first
|
||||
|
||||
|
||||
def edition_id_from_url(u: str) -> str:
|
||||
"""Extract an OpenLibrary edition id (OL...M) from a book URL."""
|
||||
try:
|
||||
p = urlparse(str(u))
|
||||
parts = [x for x in (p.path or "").split("/") if x]
|
||||
except Exception:
|
||||
parts = []
|
||||
if len(parts) >= 2 and str(parts[0]).lower() == "books":
|
||||
return str(parts[1]).strip()
|
||||
return ""
|
||||
|
||||
|
||||
def title_hint_from_url_slug(u: str) -> str:
|
||||
"""Derive a human-friendly title hint from the URL slug."""
|
||||
try:
|
||||
p = urlparse(str(u))
|
||||
parts = [x for x in (p.path or "").split("/") if x]
|
||||
slug = parts[-1] if parts else ""
|
||||
except Exception:
|
||||
slug = ""
|
||||
slug = (slug or "").strip().replace("_", " ")
|
||||
return slug or "OpenLibrary"
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
@@ -415,6 +439,7 @@ class OpenLibrary(Provider):
|
||||
"openlibrary.org",
|
||||
"archive.org",
|
||||
)
|
||||
URL = URL_DOMAINS
|
||||
"""Search provider for OpenLibrary books + Archive.org direct/borrow download."""
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
@@ -1419,6 +1444,64 @@ class OpenLibrary(Provider):
|
||||
log("[openlibrary] Direct download failed", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# --- Convenience helpers for URL-driven downloads (used by download-file) ---
|
||||
|
||||
def search_result_from_url(self, url: str) -> Optional[SearchResult]:
|
||||
"""Build a minimal SearchResult from a bare OpenLibrary URL."""
|
||||
edition_id = edition_id_from_url(url)
|
||||
title_hint = title_hint_from_url_slug(url)
|
||||
return SearchResult(
|
||||
table="openlibrary",
|
||||
title=title_hint,
|
||||
path=str(url),
|
||||
media_kind="book",
|
||||
full_metadata={"openlibrary_id": edition_id} if edition_id else {},
|
||||
)
|
||||
|
||||
def download_url(
|
||||
self,
|
||||
url: str,
|
||||
output_dir: Path,
|
||||
progress_callback: Optional[Callable[[str, int, Optional[int], str], None]] = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Download a book directly from an OpenLibrary URL.
|
||||
|
||||
Returns a dict with the downloaded path and SearchResult when successful.
|
||||
"""
|
||||
sr = self.search_result_from_url(url)
|
||||
if sr is None:
|
||||
return None
|
||||
|
||||
downloaded = self.download(sr, output_dir, progress_callback)
|
||||
if not downloaded:
|
||||
return None
|
||||
|
||||
return {
|
||||
"path": Path(downloaded),
|
||||
"search_result": sr,
|
||||
}
|
||||
try:
|
||||
if progress_callback is not None:
|
||||
progress_callback("step", 0, None, "direct download")
|
||||
except Exception:
|
||||
pass
|
||||
out_path = unique_path(output_dir / f"{safe_title}.pdf")
|
||||
ok = download_file(
|
||||
pdf_url,
|
||||
out_path,
|
||||
session=self._session,
|
||||
progress_callback=(
|
||||
(
|
||||
lambda downloaded, total, label:
|
||||
progress_callback("bytes", downloaded, total, label)
|
||||
) if progress_callback is not None else None
|
||||
),
|
||||
)
|
||||
if ok:
|
||||
return out_path
|
||||
log("[openlibrary] Direct download failed", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# 2) Borrow flow (credentials required).
|
||||
try:
|
||||
email, password = self._credential_archive(self.config or {})
|
||||
|
||||
Reference in New Issue
Block a user