j
This commit is contained in:
@@ -214,10 +214,15 @@ def _archive_id_from_url(url: str) -> str:
|
||||
# - /details/<id>/...
|
||||
# - /borrow/<id>
|
||||
# - /download/<id>/...
|
||||
if len(parts) >= 2 and parts[0].lower() in {"details",
|
||||
"borrow",
|
||||
"download",
|
||||
"stream"}:
|
||||
# - /stream/<id>/...
|
||||
# - /metadata/<id>
|
||||
if len(parts) >= 2 and parts[0].lower() in {
|
||||
"details",
|
||||
"borrow",
|
||||
"download",
|
||||
"stream",
|
||||
"metadata",
|
||||
}:
|
||||
return str(parts[1]).strip()
|
||||
|
||||
# Sometimes the identifier is the first segment.
|
||||
@@ -225,37 +230,38 @@ def _archive_id_from_url(url: str) -> str:
|
||||
first = str(parts[0]).strip()
|
||||
if first and first.lower() not in {"account",
|
||||
"services",
|
||||
"metadata",
|
||||
"search",
|
||||
"advancedsearch.php"}:
|
||||
return first
|
||||
|
||||
|
||||
def edition_id_from_url(u: str) -> str:
|
||||
"""Extract an OpenLibrary edition id (OL...M) from a book URL."""
|
||||
try:
|
||||
p = urlparse(str(u))
|
||||
parts = [x for x in (p.path or "").split("/") if x]
|
||||
except Exception:
|
||||
parts = []
|
||||
if len(parts) >= 2 and str(parts[0]).lower() == "books":
|
||||
return str(parts[1]).strip()
|
||||
return ""
|
||||
|
||||
|
||||
def title_hint_from_url_slug(u: str) -> str:
|
||||
"""Derive a human-friendly title hint from the URL slug."""
|
||||
try:
|
||||
p = urlparse(str(u))
|
||||
parts = [x for x in (p.path or "").split("/") if x]
|
||||
slug = parts[-1] if parts else ""
|
||||
except Exception:
|
||||
slug = ""
|
||||
slug = (slug or "").strip().replace("_", " ")
|
||||
return slug or "OpenLibrary"
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def edition_id_from_url(u: str) -> str:
|
||||
"""Extract an OpenLibrary edition id (OL...M) from a book URL."""
|
||||
try:
|
||||
p = urlparse(str(u))
|
||||
parts = [x for x in (p.path or "").split("/") if x]
|
||||
except Exception:
|
||||
parts = []
|
||||
if len(parts) >= 2 and str(parts[0]).lower() == "books":
|
||||
return str(parts[1]).strip()
|
||||
return ""
|
||||
|
||||
|
||||
def title_hint_from_url_slug(u: str) -> str:
|
||||
"""Derive a human-friendly title hint from the URL slug."""
|
||||
try:
|
||||
p = urlparse(str(u))
|
||||
parts = [x for x in (p.path or "").split("/") if x]
|
||||
slug = parts[-1] if parts else ""
|
||||
except Exception:
|
||||
slug = ""
|
||||
slug = (slug or "").strip().replace("_", " ")
|
||||
return slug or "OpenLibrary"
|
||||
|
||||
|
||||
def _coerce_archive_field_list(value: Any) -> List[str]:
|
||||
"""Coerce an Archive.org metadata field to a list of strings."""
|
||||
if value is None:
|
||||
@@ -433,6 +439,22 @@ def _fetch_archive_item_metadata(archive_id: str,
|
||||
|
||||
|
||||
class OpenLibrary(Provider):
|
||||
|
||||
TABLE_AUTO_STAGES = {
|
||||
"openlibrary": ["download-file"],
|
||||
}
|
||||
|
||||
REQUIRED_CONFIG_KEYS = (
|
||||
"email",
|
||||
"password",
|
||||
)
|
||||
|
||||
DEFAULT_ARCHIVE_SCALE = 4
|
||||
QUALITY_TO_ARCHIVE_SCALE = {
|
||||
"high": 2,
|
||||
"medium": 5,
|
||||
"low": 8,
|
||||
}
|
||||
# Domains that should be routed to this provider when the user supplies a URL.
|
||||
# (Used by ProviderCore.registry.match_provider_name_for_url)
|
||||
URL_DOMAINS = (
|
||||
@@ -449,6 +471,41 @@ class OpenLibrary(Provider):
|
||||
class BookNotAvailableError(Exception):
|
||||
"""Raised when a book is not available for borrowing (waitlisted/in use)."""
|
||||
|
||||
def search_result_from_url(self, url: str) -> Optional[SearchResult]:
|
||||
"""Build a minimal SearchResult from a bare OpenLibrary/Archive URL."""
|
||||
edition_id = edition_id_from_url(url)
|
||||
title_hint = title_hint_from_url_slug(url)
|
||||
return SearchResult(
|
||||
table="openlibrary",
|
||||
title=title_hint,
|
||||
path=str(url),
|
||||
media_kind="book",
|
||||
full_metadata={"openlibrary_id": edition_id} if edition_id else {},
|
||||
)
|
||||
|
||||
def download_url(
|
||||
self,
|
||||
url: str,
|
||||
output_dir: Path,
|
||||
progress_callback: Optional[Callable[[str, int, Optional[int], str], None]] = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Download a book directly from an OpenLibrary/Archive URL.
|
||||
|
||||
Returns a dict with the downloaded path and SearchResult when successful.
|
||||
"""
|
||||
sr = self.search_result_from_url(url)
|
||||
if sr is None:
|
||||
return None
|
||||
|
||||
downloaded = self.download(sr, output_dir, progress_callback)
|
||||
if not downloaded:
|
||||
return None
|
||||
|
||||
return {
|
||||
"path": Path(downloaded),
|
||||
"search_result": sr,
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def _credential_archive(config: Dict[str,
|
||||
Any]) -> Tuple[Optional[str],
|
||||
@@ -491,6 +548,57 @@ class OpenLibrary(Provider):
|
||||
str(password) if password is not None else None
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def _archive_scale_from_config(cls, config: Dict[str, Any]) -> int:
|
||||
"""Resolve Archive.org book-reader scale from provider config.
|
||||
|
||||
Config:
|
||||
[provider=OpenLibrary]
|
||||
quality="medium" # High=2, Medium=5, Low=8
|
||||
|
||||
Default when missing/invalid: 4.
|
||||
"""
|
||||
|
||||
default_scale = int(getattr(cls, "DEFAULT_ARCHIVE_SCALE", 4) or 4)
|
||||
if not isinstance(config, dict):
|
||||
return default_scale
|
||||
|
||||
provider_config = config.get("provider", {})
|
||||
openlibrary_config = None
|
||||
if isinstance(provider_config, dict):
|
||||
openlibrary_config = provider_config.get("openlibrary")
|
||||
if not isinstance(openlibrary_config, dict):
|
||||
openlibrary_config = {}
|
||||
|
||||
raw_quality = openlibrary_config.get("quality")
|
||||
if raw_quality is None:
|
||||
return default_scale
|
||||
|
||||
if isinstance(raw_quality, (int, float)):
|
||||
try:
|
||||
val = int(raw_quality)
|
||||
except Exception:
|
||||
return default_scale
|
||||
return val if val > 0 else default_scale
|
||||
|
||||
try:
|
||||
q = str(raw_quality).strip().lower()
|
||||
except Exception:
|
||||
return default_scale
|
||||
if not q:
|
||||
return default_scale
|
||||
|
||||
mapped = cls.QUALITY_TO_ARCHIVE_SCALE.get(q)
|
||||
if isinstance(mapped, int) and mapped > 0:
|
||||
return mapped
|
||||
|
||||
# Allow numeric strings (e.g. quality="4").
|
||||
try:
|
||||
val = int(q)
|
||||
except Exception:
|
||||
return default_scale
|
||||
return val if val > 0 else default_scale
|
||||
|
||||
@staticmethod
|
||||
def _archive_error_body(response: requests.Response) -> str:
|
||||
try:
|
||||
@@ -1444,64 +1552,6 @@ class OpenLibrary(Provider):
|
||||
log("[openlibrary] Direct download failed", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# --- Convenience helpers for URL-driven downloads (used by download-file) ---
|
||||
|
||||
def search_result_from_url(self, url: str) -> Optional[SearchResult]:
|
||||
"""Build a minimal SearchResult from a bare OpenLibrary URL."""
|
||||
edition_id = edition_id_from_url(url)
|
||||
title_hint = title_hint_from_url_slug(url)
|
||||
return SearchResult(
|
||||
table="openlibrary",
|
||||
title=title_hint,
|
||||
path=str(url),
|
||||
media_kind="book",
|
||||
full_metadata={"openlibrary_id": edition_id} if edition_id else {},
|
||||
)
|
||||
|
||||
def download_url(
|
||||
self,
|
||||
url: str,
|
||||
output_dir: Path,
|
||||
progress_callback: Optional[Callable[[str, int, Optional[int], str], None]] = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Download a book directly from an OpenLibrary URL.
|
||||
|
||||
Returns a dict with the downloaded path and SearchResult when successful.
|
||||
"""
|
||||
sr = self.search_result_from_url(url)
|
||||
if sr is None:
|
||||
return None
|
||||
|
||||
downloaded = self.download(sr, output_dir, progress_callback)
|
||||
if not downloaded:
|
||||
return None
|
||||
|
||||
return {
|
||||
"path": Path(downloaded),
|
||||
"search_result": sr,
|
||||
}
|
||||
try:
|
||||
if progress_callback is not None:
|
||||
progress_callback("step", 0, None, "direct download")
|
||||
except Exception:
|
||||
pass
|
||||
out_path = unique_path(output_dir / f"{safe_title}.pdf")
|
||||
ok = download_file(
|
||||
pdf_url,
|
||||
out_path,
|
||||
session=self._session,
|
||||
progress_callback=(
|
||||
(
|
||||
lambda downloaded, total, label:
|
||||
progress_callback("bytes", downloaded, total, label)
|
||||
) if progress_callback is not None else None
|
||||
),
|
||||
)
|
||||
if ok:
|
||||
return out_path
|
||||
log("[openlibrary] Direct download failed", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# 2) Borrow flow (credentials required).
|
||||
try:
|
||||
email, password = self._credential_archive(self.config or {})
|
||||
@@ -1510,6 +1560,15 @@ class OpenLibrary(Provider):
|
||||
"[openlibrary] Archive credentials missing; cannot borrow",
|
||||
file=sys.stderr
|
||||
)
|
||||
try:
|
||||
from SYS.rich_display import show_provider_config_panel
|
||||
|
||||
show_provider_config_panel(
|
||||
"openlibrary",
|
||||
keys=self.required_config_keys(),
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
lendable = True
|
||||
@@ -1590,7 +1649,7 @@ class OpenLibrary(Provider):
|
||||
n_threads=10,
|
||||
directory=temp_dir,
|
||||
links=links,
|
||||
scale=3,
|
||||
scale=self._archive_scale_from_config(self.config or {}),
|
||||
book_id=archive_id,
|
||||
progress_callback=(
|
||||
(
|
||||
|
||||
Reference in New Issue
Block a user