2025-12-12 21:55:38 -08:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
import base64
|
2025-12-16 23:23:43 -08:00
|
|
|
import io
|
2025-12-14 00:53:52 -08:00
|
|
|
from concurrent import futures
|
|
|
|
|
import hashlib
|
|
|
|
|
import json as json_module
|
|
|
|
|
import re
|
2025-12-12 21:55:38 -08:00
|
|
|
import shutil
|
|
|
|
|
import sys
|
|
|
|
|
import tempfile
|
2025-12-14 00:53:52 -08:00
|
|
|
import time
|
2025-12-12 21:55:38 -08:00
|
|
|
from pathlib import Path
|
2026-01-06 16:19:29 -08:00
|
|
|
from typing import Any, Callable, Dict, List, Optional, Tuple, Union
|
2025-12-22 02:11:53 -08:00
|
|
|
from urllib.parse import urlparse
|
2025-12-12 21:55:38 -08:00
|
|
|
|
|
|
|
|
import requests
|
|
|
|
|
|
2026-01-06 16:19:29 -08:00
|
|
|
from API.HTTP import HTTPClient, get_requests_verify_value
|
2025-12-19 02:29:42 -08:00
|
|
|
from ProviderCore.base import Provider, SearchResult
|
2026-01-06 01:38:59 -08:00
|
|
|
from SYS.utils import sanitize_filename
|
2025-12-29 18:42:02 -08:00
|
|
|
from SYS.cli_syntax import get_field, get_free_text, parse_query
|
2025-12-17 03:16:41 -08:00
|
|
|
from SYS.logger import debug, log
|
2026-01-06 16:19:29 -08:00
|
|
|
from Provider.metadata_provider import (
|
|
|
|
|
archive_item_metadata_to_tags,
|
|
|
|
|
fetch_archive_item_metadata,
|
|
|
|
|
)
|
2025-12-12 21:55:38 -08:00
|
|
|
from SYS.utils import unique_path
|
|
|
|
|
|
2026-01-06 16:19:29 -08:00
|
|
|
_ARCHIVE_VERIFY_VALUE = get_requests_verify_value()
|
|
|
|
|
|
|
|
|
|
def _create_archive_session() -> requests.Session:
|
|
|
|
|
session = requests.Session()
|
|
|
|
|
session.verify = _ARCHIVE_VERIFY_VALUE
|
|
|
|
|
return session
|
|
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
try:
|
|
|
|
|
from Crypto.Cipher import AES # type: ignore
|
|
|
|
|
from Crypto.Util import Counter # type: ignore
|
|
|
|
|
except ImportError:
|
|
|
|
|
AES = None # type: ignore
|
|
|
|
|
Counter = None # type: ignore
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from tqdm import tqdm # type: ignore
|
|
|
|
|
except ImportError:
|
|
|
|
|
tqdm = None # type: ignore
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
def _image_paths_to_pdf_bytes(images: List[str]) -> Optional[bytes]:
|
|
|
|
|
if not images:
|
|
|
|
|
return None
|
|
|
|
|
try:
|
|
|
|
|
from PIL import Image # type: ignore
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
pil_images: List[Any] = []
|
|
|
|
|
try:
|
|
|
|
|
for p in images:
|
|
|
|
|
img_path = Path(p)
|
|
|
|
|
if not img_path.is_file():
|
|
|
|
|
continue
|
|
|
|
|
with Image.open(img_path) as im: # type: ignore[attr-defined]
|
|
|
|
|
# Ensure PDF-compatible mode.
|
2025-12-29 18:42:02 -08:00
|
|
|
if im.mode in {"RGBA",
|
|
|
|
|
"LA",
|
|
|
|
|
"P"}:
|
2025-12-16 23:23:43 -08:00
|
|
|
im = im.convert("RGB")
|
|
|
|
|
else:
|
|
|
|
|
im = im.convert("RGB")
|
|
|
|
|
pil_images.append(im.copy())
|
|
|
|
|
except Exception:
|
|
|
|
|
for im in pil_images:
|
|
|
|
|
try:
|
|
|
|
|
im.close()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
if not pil_images:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
buf = io.BytesIO()
|
|
|
|
|
first, rest = pil_images[0], pil_images[1:]
|
|
|
|
|
try:
|
|
|
|
|
first.save(buf, format="PDF", save_all=True, append_images=rest)
|
|
|
|
|
return buf.getvalue()
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
finally:
|
|
|
|
|
for im in pil_images:
|
|
|
|
|
try:
|
|
|
|
|
im.close()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
def _looks_like_isbn(text: str) -> bool:
|
|
|
|
|
t = (text or "").replace("-", "").strip()
|
|
|
|
|
return t.isdigit() and len(t) in (10, 13)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _first_str(value: Any) -> Optional[str]:
|
|
|
|
|
if isinstance(value, str):
|
|
|
|
|
v = value.strip()
|
|
|
|
|
return v if v else None
|
|
|
|
|
if isinstance(value, list) and value:
|
|
|
|
|
first = value[0]
|
|
|
|
|
if isinstance(first, str):
|
|
|
|
|
v = first.strip()
|
|
|
|
|
return v if v else None
|
|
|
|
|
return str(first) if first is not None else None
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _resolve_edition_id(doc: Dict[str, Any]) -> str:
|
|
|
|
|
# OpenLibrary Search API typically provides edition_key: ["OL...M", ...]
|
|
|
|
|
edition_key = doc.get("edition_key")
|
|
|
|
|
if isinstance(edition_key, list) and edition_key:
|
|
|
|
|
return str(edition_key[0]).strip()
|
2025-12-14 00:53:52 -08:00
|
|
|
if isinstance(edition_key, str) and edition_key.strip():
|
|
|
|
|
return edition_key.strip()
|
|
|
|
|
|
|
|
|
|
# Often present even when edition_key is missing.
|
|
|
|
|
cover_edition_key = doc.get("cover_edition_key")
|
|
|
|
|
if isinstance(cover_edition_key, str) and cover_edition_key.strip():
|
|
|
|
|
return cover_edition_key.strip()
|
2025-12-12 21:55:38 -08:00
|
|
|
|
|
|
|
|
# Fallback: sometimes key can be /books/OL...M
|
|
|
|
|
key = doc.get("key")
|
|
|
|
|
if isinstance(key, str) and key.startswith("/books/"):
|
|
|
|
|
return key.split("/books/", 1)[1].strip("/")
|
|
|
|
|
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, str]:
|
|
|
|
|
"""Return (lendable, status_text) using OpenLibrary volumes API."""
|
|
|
|
|
try:
|
2025-12-29 18:42:02 -08:00
|
|
|
if not edition_id or not edition_id.startswith("OL") or not edition_id.endswith(
|
|
|
|
|
"M"):
|
2025-12-12 21:55:38 -08:00
|
|
|
return False, "not-an-edition"
|
|
|
|
|
|
|
|
|
|
url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{edition_id}"
|
2025-12-14 00:53:52 -08:00
|
|
|
resp = session.get(url, timeout=6)
|
2025-12-12 21:55:38 -08:00
|
|
|
resp.raise_for_status()
|
|
|
|
|
data = resp.json() or {}
|
|
|
|
|
wrapped = data.get(f"OLID:{edition_id}")
|
|
|
|
|
if not isinstance(wrapped, dict):
|
|
|
|
|
return False, "no-availability"
|
|
|
|
|
|
|
|
|
|
items = wrapped.get("items")
|
|
|
|
|
if not isinstance(items, list) or not items:
|
|
|
|
|
return False, "no-items"
|
|
|
|
|
|
|
|
|
|
first = items[0]
|
|
|
|
|
status_val = ""
|
|
|
|
|
if isinstance(first, dict):
|
|
|
|
|
status_val = str(first.get("status", ""))
|
|
|
|
|
else:
|
|
|
|
|
status_val = str(first)
|
|
|
|
|
|
|
|
|
|
return ("lendable" in status_val.lower()), status_val
|
|
|
|
|
except requests.exceptions.Timeout:
|
|
|
|
|
return False, "api-timeout"
|
|
|
|
|
except Exception:
|
|
|
|
|
return False, "api-error"
|
|
|
|
|
|
|
|
|
|
|
2025-12-29 17:05:03 -08:00
|
|
|
def _resolve_archive_id(
|
2025-12-29 18:42:02 -08:00
|
|
|
session: requests.Session,
|
|
|
|
|
edition_id: str,
|
|
|
|
|
ia_candidates: List[str]
|
2025-12-29 17:05:03 -08:00
|
|
|
) -> str:
|
2025-12-12 21:55:38 -08:00
|
|
|
# Prefer IA identifiers already present in search results.
|
|
|
|
|
if ia_candidates:
|
|
|
|
|
first = ia_candidates[0].strip()
|
|
|
|
|
if first:
|
|
|
|
|
return first
|
|
|
|
|
|
|
|
|
|
# Otherwise query the edition JSON.
|
|
|
|
|
try:
|
2025-12-29 18:42:02 -08:00
|
|
|
resp = session.get(
|
|
|
|
|
f"https://openlibrary.org/books/{edition_id}.json",
|
|
|
|
|
timeout=6
|
|
|
|
|
)
|
2025-12-12 21:55:38 -08:00
|
|
|
resp.raise_for_status()
|
|
|
|
|
data = resp.json() or {}
|
|
|
|
|
|
|
|
|
|
ocaid = data.get("ocaid")
|
|
|
|
|
if isinstance(ocaid, str) and ocaid.strip():
|
|
|
|
|
return ocaid.strip()
|
|
|
|
|
|
|
|
|
|
identifiers = data.get("identifiers")
|
|
|
|
|
if isinstance(identifiers, dict):
|
|
|
|
|
ia = identifiers.get("internet_archive")
|
|
|
|
|
ia_id = _first_str(ia)
|
|
|
|
|
if ia_id:
|
|
|
|
|
return ia_id
|
|
|
|
|
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
|
2025-12-22 02:11:53 -08:00
|
|
|
def _archive_id_from_url(url: str) -> str:
|
|
|
|
|
"""Best-effort extraction of an Archive.org item identifier from a URL."""
|
|
|
|
|
|
|
|
|
|
u = str(url or "").strip()
|
|
|
|
|
if not u:
|
|
|
|
|
return ""
|
2025-12-25 04:49:22 -08:00
|
|
|
|
2025-12-22 02:11:53 -08:00
|
|
|
try:
|
|
|
|
|
p = urlparse(u)
|
|
|
|
|
host = (p.hostname or "").lower().strip()
|
|
|
|
|
if not host.endswith("archive.org"):
|
|
|
|
|
return ""
|
|
|
|
|
parts = [x for x in (p.path or "").split("/") if x]
|
|
|
|
|
except Exception:
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
# Common patterns:
|
|
|
|
|
# - /details/<id>/...
|
|
|
|
|
# - /borrow/<id>
|
|
|
|
|
# - /download/<id>/...
|
2026-01-03 03:37:48 -08:00
|
|
|
# - /stream/<id>/...
|
|
|
|
|
# - /metadata/<id>
|
|
|
|
|
if len(parts) >= 2 and parts[0].lower() in {
|
|
|
|
|
"details",
|
|
|
|
|
"borrow",
|
|
|
|
|
"download",
|
|
|
|
|
"stream",
|
|
|
|
|
"metadata",
|
|
|
|
|
}:
|
2025-12-22 02:11:53 -08:00
|
|
|
return str(parts[1]).strip()
|
|
|
|
|
|
|
|
|
|
# Sometimes the identifier is the first segment.
|
|
|
|
|
if len(parts) >= 1:
|
|
|
|
|
first = str(parts[0]).strip()
|
2025-12-29 18:42:02 -08:00
|
|
|
if first and first.lower() not in {"account",
|
|
|
|
|
"services",
|
2026-01-03 03:37:48 -08:00
|
|
|
"metadata",
|
2025-12-29 18:42:02 -08:00
|
|
|
"search",
|
|
|
|
|
"advancedsearch.php"}:
|
2025-12-22 02:11:53 -08:00
|
|
|
return first
|
|
|
|
|
|
2026-01-03 03:37:48 -08:00
|
|
|
return ""
|
2026-01-01 20:37:27 -08:00
|
|
|
|
|
|
|
|
|
2026-01-03 03:37:48 -08:00
|
|
|
def edition_id_from_url(u: str) -> str:
|
|
|
|
|
"""Extract an OpenLibrary edition id (OL...M) from a book URL."""
|
|
|
|
|
try:
|
|
|
|
|
p = urlparse(str(u))
|
|
|
|
|
parts = [x for x in (p.path or "").split("/") if x]
|
|
|
|
|
except Exception:
|
|
|
|
|
parts = []
|
|
|
|
|
if len(parts) >= 2 and str(parts[0]).lower() == "books":
|
|
|
|
|
return str(parts[1]).strip()
|
|
|
|
|
return ""
|
2026-01-01 20:37:27 -08:00
|
|
|
|
|
|
|
|
|
2026-01-03 03:37:48 -08:00
|
|
|
def title_hint_from_url_slug(u: str) -> str:
|
|
|
|
|
"""Derive a human-friendly title hint from the URL slug."""
|
|
|
|
|
try:
|
|
|
|
|
p = urlparse(str(u))
|
|
|
|
|
parts = [x for x in (p.path or "").split("/") if x]
|
|
|
|
|
slug = parts[-1] if parts else ""
|
|
|
|
|
except Exception:
|
|
|
|
|
slug = ""
|
|
|
|
|
slug = (slug or "").strip().replace("_", " ")
|
|
|
|
|
return slug or "OpenLibrary"
|
2025-12-22 02:11:53 -08:00
|
|
|
|
|
|
|
|
|
2025-12-19 02:29:42 -08:00
|
|
|
class OpenLibrary(Provider):
|
2026-01-03 03:37:48 -08:00
|
|
|
|
|
|
|
|
TABLE_AUTO_STAGES = {
|
|
|
|
|
"openlibrary": ["download-file"],
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
REQUIRED_CONFIG_KEYS = (
|
|
|
|
|
"email",
|
|
|
|
|
"password",
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
DEFAULT_ARCHIVE_SCALE = 4
|
|
|
|
|
QUALITY_TO_ARCHIVE_SCALE = {
|
|
|
|
|
"high": 2,
|
|
|
|
|
"medium": 5,
|
|
|
|
|
"low": 8,
|
|
|
|
|
}
|
2025-12-22 02:11:53 -08:00
|
|
|
# Domains that should be routed to this provider when the user supplies a URL.
|
|
|
|
|
# (Used by ProviderCore.registry.match_provider_name_for_url)
|
|
|
|
|
URL_DOMAINS = (
|
|
|
|
|
"openlibrary.org",
|
|
|
|
|
"archive.org",
|
|
|
|
|
)
|
2026-01-01 20:37:27 -08:00
|
|
|
URL = URL_DOMAINS
|
2025-12-12 21:55:38 -08:00
|
|
|
"""Search provider for OpenLibrary books + Archive.org direct/borrow download."""
|
|
|
|
|
|
|
|
|
|
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
|
|
|
|
super().__init__(config)
|
2026-01-06 16:19:29 -08:00
|
|
|
self._session = _create_archive_session()
|
2025-12-12 21:55:38 -08:00
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
class BookNotAvailableError(Exception):
|
|
|
|
|
"""Raised when a book is not available for borrowing (waitlisted/in use)."""
|
|
|
|
|
|
2026-01-03 03:37:48 -08:00
|
|
|
def search_result_from_url(self, url: str) -> Optional[SearchResult]:
|
|
|
|
|
"""Build a minimal SearchResult from a bare OpenLibrary/Archive URL."""
|
|
|
|
|
edition_id = edition_id_from_url(url)
|
|
|
|
|
title_hint = title_hint_from_url_slug(url)
|
|
|
|
|
return SearchResult(
|
|
|
|
|
table="openlibrary",
|
|
|
|
|
title=title_hint,
|
|
|
|
|
path=str(url),
|
|
|
|
|
media_kind="book",
|
|
|
|
|
full_metadata={"openlibrary_id": edition_id} if edition_id else {},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
def download_url(
|
|
|
|
|
self,
|
|
|
|
|
url: str,
|
|
|
|
|
output_dir: Path,
|
|
|
|
|
progress_callback: Optional[Callable[[str, int, Optional[int], str], None]] = None,
|
|
|
|
|
) -> Optional[Dict[str, Any]]:
|
|
|
|
|
"""Download a book directly from an OpenLibrary/Archive URL.
|
|
|
|
|
|
|
|
|
|
Returns a dict with the downloaded path and SearchResult when successful.
|
|
|
|
|
"""
|
|
|
|
|
sr = self.search_result_from_url(url)
|
|
|
|
|
if sr is None:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
downloaded = self.download(sr, output_dir, progress_callback)
|
|
|
|
|
if not downloaded:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
return {
|
|
|
|
|
"path": Path(downloaded),
|
|
|
|
|
"search_result": sr,
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
@staticmethod
|
2025-12-29 18:42:02 -08:00
|
|
|
def _credential_archive(config: Dict[str,
|
|
|
|
|
Any]) -> Tuple[Optional[str],
|
|
|
|
|
Optional[str]]:
|
2025-12-14 00:53:52 -08:00
|
|
|
"""Get Archive.org email/password from config.
|
|
|
|
|
|
|
|
|
|
Supports:
|
|
|
|
|
- New: {"provider": {"openlibrary": {"email": "...", "password": "..."}}}
|
|
|
|
|
- Old: {"Archive": {"email": "...", "password": "..."}}
|
|
|
|
|
{"archive_org_email": "...", "archive_org_password": "..."}
|
|
|
|
|
"""
|
|
|
|
|
if not isinstance(config, dict):
|
|
|
|
|
return None, None
|
|
|
|
|
|
2025-12-29 18:42:02 -08:00
|
|
|
provider_config = config.get("provider",
|
|
|
|
|
{})
|
2025-12-14 00:53:52 -08:00
|
|
|
if isinstance(provider_config, dict):
|
2025-12-29 18:42:02 -08:00
|
|
|
openlibrary_config = provider_config.get("openlibrary",
|
|
|
|
|
{})
|
2025-12-14 00:53:52 -08:00
|
|
|
if isinstance(openlibrary_config, dict):
|
|
|
|
|
email = openlibrary_config.get("email")
|
|
|
|
|
password = openlibrary_config.get("password")
|
|
|
|
|
if email or password:
|
2025-12-29 17:05:03 -08:00
|
|
|
return str(email) if email is not None else None, (
|
|
|
|
|
str(password) if password is not None else None
|
|
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
archive_config = config.get("Archive")
|
|
|
|
|
if isinstance(archive_config, dict):
|
|
|
|
|
email = archive_config.get("email")
|
|
|
|
|
password = archive_config.get("password")
|
|
|
|
|
if email or password:
|
2025-12-29 17:05:03 -08:00
|
|
|
return str(email) if email is not None else None, (
|
|
|
|
|
str(password) if password is not None else None
|
|
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
email = config.get("archive_org_email")
|
|
|
|
|
password = config.get("archive_org_password")
|
2025-12-29 17:05:03 -08:00
|
|
|
return str(email) if email is not None else None, (
|
|
|
|
|
str(password) if password is not None else None
|
|
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
|
2026-01-03 03:37:48 -08:00
|
|
|
@classmethod
|
|
|
|
|
def _archive_scale_from_config(cls, config: Dict[str, Any]) -> int:
|
|
|
|
|
"""Resolve Archive.org book-reader scale from provider config.
|
|
|
|
|
|
|
|
|
|
Config:
|
|
|
|
|
[provider=OpenLibrary]
|
|
|
|
|
quality="medium" # High=2, Medium=5, Low=8
|
|
|
|
|
|
|
|
|
|
Default when missing/invalid: 4.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
default_scale = int(getattr(cls, "DEFAULT_ARCHIVE_SCALE", 4) or 4)
|
|
|
|
|
if not isinstance(config, dict):
|
|
|
|
|
return default_scale
|
|
|
|
|
|
|
|
|
|
provider_config = config.get("provider", {})
|
|
|
|
|
openlibrary_config = None
|
|
|
|
|
if isinstance(provider_config, dict):
|
|
|
|
|
openlibrary_config = provider_config.get("openlibrary")
|
|
|
|
|
if not isinstance(openlibrary_config, dict):
|
|
|
|
|
openlibrary_config = {}
|
|
|
|
|
|
|
|
|
|
raw_quality = openlibrary_config.get("quality")
|
|
|
|
|
if raw_quality is None:
|
|
|
|
|
return default_scale
|
|
|
|
|
|
|
|
|
|
if isinstance(raw_quality, (int, float)):
|
|
|
|
|
try:
|
|
|
|
|
val = int(raw_quality)
|
|
|
|
|
except Exception:
|
|
|
|
|
return default_scale
|
|
|
|
|
return val if val > 0 else default_scale
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
q = str(raw_quality).strip().lower()
|
|
|
|
|
except Exception:
|
|
|
|
|
return default_scale
|
|
|
|
|
if not q:
|
|
|
|
|
return default_scale
|
|
|
|
|
|
|
|
|
|
mapped = cls.QUALITY_TO_ARCHIVE_SCALE.get(q)
|
|
|
|
|
if isinstance(mapped, int) and mapped > 0:
|
|
|
|
|
return mapped
|
|
|
|
|
|
|
|
|
|
# Allow numeric strings (e.g. quality="4").
|
|
|
|
|
try:
|
|
|
|
|
val = int(q)
|
|
|
|
|
except Exception:
|
|
|
|
|
return default_scale
|
|
|
|
|
return val if val > 0 else default_scale
|
|
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
@staticmethod
|
|
|
|
|
def _archive_error_body(response: requests.Response) -> str:
|
|
|
|
|
try:
|
|
|
|
|
body = response.text or ""
|
|
|
|
|
except Exception:
|
|
|
|
|
return ""
|
|
|
|
|
if len(body) > 2000:
|
|
|
|
|
return body[:1200] + "\n... (truncated) ...\n" + body[-400:]
|
|
|
|
|
return body
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def _archive_login(cls, email: str, password: str) -> requests.Session:
|
|
|
|
|
"""Login to archive.org using the token-based services endpoint (matches test-login.py)."""
|
2026-01-06 16:19:29 -08:00
|
|
|
session = _create_archive_session()
|
2025-12-14 00:53:52 -08:00
|
|
|
|
2025-12-29 18:42:02 -08:00
|
|
|
token_resp = session.get(
|
|
|
|
|
"https://archive.org/services/account/login/",
|
|
|
|
|
timeout=30
|
|
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
try:
|
|
|
|
|
token_json = token_resp.json()
|
|
|
|
|
except Exception as exc:
|
2025-12-29 17:05:03 -08:00
|
|
|
raise RuntimeError(
|
|
|
|
|
f"Archive login token parse failed: {exc}\n{cls._archive_error_body(token_resp)}"
|
|
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
if not token_json.get("success"):
|
2025-12-29 17:05:03 -08:00
|
|
|
raise RuntimeError(
|
|
|
|
|
f"Archive login token fetch failed\n{cls._archive_error_body(token_resp)}"
|
|
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
token = (token_json.get("value") or {}).get("token")
|
|
|
|
|
if not token:
|
|
|
|
|
raise RuntimeError("Archive login token missing")
|
|
|
|
|
|
2025-12-29 18:42:02 -08:00
|
|
|
headers = {
|
|
|
|
|
"Content-Type": "application/x-www-form-urlencoded"
|
|
|
|
|
}
|
|
|
|
|
payload = {
|
|
|
|
|
"username": email,
|
|
|
|
|
"password": password,
|
|
|
|
|
"t": token
|
|
|
|
|
}
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
login_resp = session.post(
|
|
|
|
|
"https://archive.org/services/account/login/",
|
|
|
|
|
headers=headers,
|
|
|
|
|
data=json_module.dumps(payload),
|
|
|
|
|
timeout=30,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
login_json = login_resp.json()
|
|
|
|
|
except Exception as exc:
|
2025-12-29 17:05:03 -08:00
|
|
|
raise RuntimeError(
|
|
|
|
|
f"Archive login parse failed: {exc}\n{cls._archive_error_body(login_resp)}"
|
|
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
if login_json.get("success") is False:
|
|
|
|
|
if login_json.get("value") == "bad_login":
|
|
|
|
|
raise RuntimeError("Invalid Archive.org credentials")
|
|
|
|
|
raise RuntimeError(f"Archive login failed: {login_json}")
|
|
|
|
|
|
|
|
|
|
return session
|
|
|
|
|
|
|
|
|
|
@classmethod
|
2025-12-29 17:05:03 -08:00
|
|
|
def _archive_loan(
|
2025-12-29 18:42:02 -08:00
|
|
|
cls,
|
|
|
|
|
session: requests.Session,
|
|
|
|
|
book_id: str,
|
|
|
|
|
*,
|
|
|
|
|
verbose: bool = True
|
2025-12-29 17:05:03 -08:00
|
|
|
) -> requests.Session:
|
2025-12-29 18:42:02 -08:00
|
|
|
data = {
|
|
|
|
|
"action": "grant_access",
|
|
|
|
|
"identifier": book_id
|
|
|
|
|
}
|
2025-12-29 17:05:03 -08:00
|
|
|
session.post(
|
2025-12-29 18:42:02 -08:00
|
|
|
"https://archive.org/services/loans/loan/searchInside.php",
|
|
|
|
|
data=data,
|
|
|
|
|
timeout=30
|
2025-12-29 17:05:03 -08:00
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
data["action"] = "browse_book"
|
2025-12-29 18:42:02 -08:00
|
|
|
response = session.post(
|
|
|
|
|
"https://archive.org/services/loans/loan/",
|
|
|
|
|
data=data,
|
|
|
|
|
timeout=30
|
|
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
if response.status_code == 400:
|
|
|
|
|
try:
|
|
|
|
|
err = (response.json() or {}).get("error")
|
2025-12-29 18:42:02 -08:00
|
|
|
if (err ==
|
|
|
|
|
"This book is not available to borrow at this time. Please try again later."
|
|
|
|
|
):
|
2025-12-14 00:53:52 -08:00
|
|
|
raise cls.BookNotAvailableError("Book is waitlisted or in use")
|
|
|
|
|
raise RuntimeError(f"Borrow failed: {err or response.text}")
|
|
|
|
|
except cls.BookNotAvailableError:
|
|
|
|
|
raise
|
|
|
|
|
except Exception:
|
|
|
|
|
raise RuntimeError("The book cannot be borrowed")
|
|
|
|
|
|
|
|
|
|
data["action"] = "create_token"
|
2025-12-29 18:42:02 -08:00
|
|
|
response = session.post(
|
|
|
|
|
"https://archive.org/services/loans/loan/",
|
|
|
|
|
data=data,
|
|
|
|
|
timeout=30
|
|
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
if "token" in (response.text or ""):
|
|
|
|
|
return session
|
|
|
|
|
raise RuntimeError("Something went wrong when trying to borrow the book")
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _archive_return_loan(session: requests.Session, book_id: str) -> None:
|
2025-12-29 18:42:02 -08:00
|
|
|
data = {
|
|
|
|
|
"action": "return_loan",
|
|
|
|
|
"identifier": book_id
|
|
|
|
|
}
|
|
|
|
|
response = session.post(
|
|
|
|
|
"https://archive.org/services/loans/loan/",
|
|
|
|
|
data=data,
|
|
|
|
|
timeout=30
|
|
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
if response.status_code == 200:
|
|
|
|
|
try:
|
|
|
|
|
if (response.json() or {}).get("success"):
|
|
|
|
|
return
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
raise RuntimeError("Something went wrong when trying to return the book")
|
|
|
|
|
|
2025-12-22 02:11:53 -08:00
|
|
|
@staticmethod
|
|
|
|
|
def _archive_logout(session: requests.Session) -> None:
|
|
|
|
|
"""Best-effort logout from archive.org.
|
|
|
|
|
|
|
|
|
|
Archive sessions are cookie-based; returning the loan is the critical step.
|
|
|
|
|
Logout is attempted for cleanliness but failures should not abort the workflow.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
if session is None:
|
|
|
|
|
return
|
|
|
|
|
for url in (
|
2025-12-29 18:42:02 -08:00
|
|
|
"https://archive.org/account/logout",
|
|
|
|
|
"https://archive.org/account/logout.php",
|
2025-12-22 02:11:53 -08:00
|
|
|
):
|
|
|
|
|
try:
|
|
|
|
|
resp = session.get(url, timeout=15, allow_redirects=True)
|
|
|
|
|
code = int(getattr(resp, "status_code", 0) or 0)
|
|
|
|
|
if code and code < 500:
|
|
|
|
|
return
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _archive_is_lendable(book_id: str) -> tuple[bool, str]:
|
|
|
|
|
"""Heuristic lendable check using Archive.org item metadata.
|
|
|
|
|
|
|
|
|
|
Some lendable items do not map cleanly to an OpenLibrary edition id.
|
|
|
|
|
In practice, Archive metadata collections often include markers like:
|
|
|
|
|
- inlibrary
|
|
|
|
|
- printdisabled
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
ident = str(book_id or "").strip()
|
|
|
|
|
if not ident:
|
|
|
|
|
return False, "no-archive-id"
|
|
|
|
|
try:
|
2026-01-06 16:19:29 -08:00
|
|
|
resp = requests.get(
|
|
|
|
|
f"https://archive.org/metadata/{ident}",
|
|
|
|
|
timeout=8,
|
|
|
|
|
verify=_ARCHIVE_VERIFY_VALUE,
|
|
|
|
|
)
|
2025-12-22 02:11:53 -08:00
|
|
|
resp.raise_for_status()
|
|
|
|
|
data = resp.json() if resp is not None else {}
|
2025-12-29 18:42:02 -08:00
|
|
|
meta = data.get("metadata",
|
|
|
|
|
{}) if isinstance(data,
|
|
|
|
|
dict) else {}
|
2025-12-22 02:11:53 -08:00
|
|
|
collection = meta.get("collection") if isinstance(meta, dict) else None
|
|
|
|
|
|
|
|
|
|
values: List[str] = []
|
|
|
|
|
if isinstance(collection, list):
|
|
|
|
|
values = [str(x).strip().lower() for x in collection if str(x).strip()]
|
|
|
|
|
elif isinstance(collection, str):
|
|
|
|
|
values = [collection.strip().lower()]
|
|
|
|
|
|
2026-01-03 21:23:55 -08:00
|
|
|
# Treat borrowable as "inlibrary" (and keep "lendinglibrary" as a safe alias).
|
|
|
|
|
# IMPORTANT: do NOT treat "printdisabled" alone as borrowable.
|
|
|
|
|
if any(v in {"inlibrary", "lendinglibrary"} for v in values):
|
2025-12-22 02:11:53 -08:00
|
|
|
return True, "archive-collection"
|
|
|
|
|
return False, "archive-not-lendable"
|
|
|
|
|
except Exception:
|
|
|
|
|
return False, "archive-metadata-error"
|
|
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
@staticmethod
|
2025-12-29 18:42:02 -08:00
|
|
|
def _archive_get_book_infos(session: requests.Session,
|
|
|
|
|
url: str) -> Tuple[str,
|
|
|
|
|
List[str],
|
|
|
|
|
Dict[str,
|
|
|
|
|
Any]]:
|
2025-12-14 00:53:52 -08:00
|
|
|
"""Extract page links from Archive.org book reader."""
|
|
|
|
|
r = session.get(url, timeout=30).text
|
|
|
|
|
|
|
|
|
|
# Matches: "url":"//archive.org/..." (allow whitespace)
|
|
|
|
|
match = re.search(r'"url"\s*:\s*"([^"]+)"', r)
|
|
|
|
|
if not match:
|
|
|
|
|
raise RuntimeError("Failed to extract book info URL from response")
|
|
|
|
|
|
|
|
|
|
url_path = match.group(1)
|
|
|
|
|
infos_url = ("https:" + url_path) if url_path.startswith("//") else url_path
|
|
|
|
|
infos_url = infos_url.replace("\\u0026", "&")
|
|
|
|
|
|
|
|
|
|
response = session.get(infos_url, timeout=30)
|
|
|
|
|
payload = response.json()
|
|
|
|
|
data = payload["data"]
|
|
|
|
|
|
|
|
|
|
title = str(data["brOptions"]["bookTitle"]).strip().replace(" ", "_")
|
|
|
|
|
title = "".join(c for c in title if c not in '<>:"/\\|?*')
|
|
|
|
|
title = title[:150]
|
|
|
|
|
|
|
|
|
|
metadata = data.get("metadata") or {}
|
|
|
|
|
links: List[str] = []
|
2025-12-29 18:42:02 -08:00
|
|
|
br_data = (data.get("brOptions") or {}).get("data",
|
|
|
|
|
[])
|
2025-12-14 00:53:52 -08:00
|
|
|
if isinstance(br_data, list):
|
|
|
|
|
for item in br_data:
|
|
|
|
|
if isinstance(item, list):
|
|
|
|
|
for page in item:
|
|
|
|
|
if isinstance(page, dict) and "uri" in page:
|
|
|
|
|
links.append(page["uri"])
|
|
|
|
|
elif isinstance(item, dict) and "uri" in item:
|
|
|
|
|
links.append(item["uri"])
|
|
|
|
|
|
|
|
|
|
if not links:
|
|
|
|
|
raise RuntimeError("No pages found in book data")
|
|
|
|
|
return title, links, metadata if isinstance(metadata, dict) else {}
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _archive_image_name(pages: int, page: int, directory: str) -> str:
|
|
|
|
|
return f"{directory}/{(len(str(pages)) - len(str(page))) * '0'}{page}.jpg"
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
2025-12-29 18:42:02 -08:00
|
|
|
def _archive_deobfuscate_image(
|
|
|
|
|
image_data: bytes,
|
|
|
|
|
link: str,
|
|
|
|
|
obf_header: str
|
|
|
|
|
) -> bytes:
|
2025-12-14 00:53:52 -08:00
|
|
|
if not AES or not Counter:
|
|
|
|
|
raise RuntimeError("Crypto library not available")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
version, counter_b64 = obf_header.split("|")
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
raise ValueError("Invalid X-Obfuscate header format") from exc
|
|
|
|
|
|
|
|
|
|
if version != "1":
|
|
|
|
|
raise ValueError("Unsupported obfuscation version: " + version)
|
|
|
|
|
|
|
|
|
|
aes_key = re.sub(r"^https?:\/\/.*?\/", "/", link)
|
|
|
|
|
sha1_digest = hashlib.sha1(aes_key.encode("utf-8")).digest()
|
|
|
|
|
key = sha1_digest[:16]
|
|
|
|
|
|
|
|
|
|
counter_bytes = base64.b64decode(counter_b64)
|
|
|
|
|
if len(counter_bytes) != 16:
|
2025-12-29 18:42:02 -08:00
|
|
|
raise ValueError(
|
|
|
|
|
f"Expected counter to be 16 bytes, got {len(counter_bytes)}"
|
|
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
prefix = counter_bytes[:8]
|
|
|
|
|
initial_value = int.from_bytes(counter_bytes[8:], byteorder="big")
|
2025-12-29 18:42:02 -08:00
|
|
|
ctr = Counter.new(
|
|
|
|
|
64,
|
|
|
|
|
prefix=prefix,
|
|
|
|
|
initial_value=initial_value,
|
|
|
|
|
little_endian=False
|
|
|
|
|
) # type: ignore
|
2025-12-14 00:53:52 -08:00
|
|
|
cipher = AES.new(key, AES.MODE_CTR, counter=ctr) # type: ignore
|
|
|
|
|
|
|
|
|
|
decrypted_part = cipher.decrypt(image_data[:1024])
|
|
|
|
|
return decrypted_part + image_data[1024:]
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def _archive_download_one_image(
|
|
|
|
|
cls,
|
|
|
|
|
session: requests.Session,
|
|
|
|
|
link: str,
|
|
|
|
|
i: int,
|
|
|
|
|
directory: str,
|
|
|
|
|
book_id: str,
|
|
|
|
|
pages: int,
|
|
|
|
|
) -> None:
|
|
|
|
|
headers = {
|
|
|
|
|
"Referer": "https://archive.org/",
|
|
|
|
|
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
|
|
|
|
|
"Sec-Fetch-Site": "same-site",
|
|
|
|
|
"Sec-Fetch-Mode": "no-cors",
|
|
|
|
|
"Sec-Fetch-Dest": "image",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while True:
|
|
|
|
|
try:
|
|
|
|
|
response = session.get(link, headers=headers, timeout=30)
|
|
|
|
|
if response.status_code == 403:
|
|
|
|
|
cls._archive_loan(session, book_id, verbose=False)
|
|
|
|
|
raise RuntimeError("Borrow again")
|
|
|
|
|
if response.status_code == 200:
|
|
|
|
|
break
|
|
|
|
|
except Exception:
|
|
|
|
|
time.sleep(1)
|
|
|
|
|
|
|
|
|
|
image = cls._archive_image_name(pages, i, directory)
|
|
|
|
|
obf_header = response.headers.get("X-Obfuscate")
|
|
|
|
|
if obf_header:
|
2025-12-29 18:42:02 -08:00
|
|
|
image_content = cls._archive_deobfuscate_image(
|
|
|
|
|
response.content,
|
|
|
|
|
link,
|
|
|
|
|
obf_header
|
|
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
else:
|
|
|
|
|
image_content = response.content
|
|
|
|
|
|
|
|
|
|
with open(image, "wb") as f:
|
|
|
|
|
f.write(image_content)
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def _archive_download(
|
|
|
|
|
cls,
|
|
|
|
|
session: requests.Session,
|
|
|
|
|
n_threads: int,
|
|
|
|
|
directory: str,
|
|
|
|
|
links: List[str],
|
|
|
|
|
scale: int,
|
|
|
|
|
book_id: str,
|
2025-12-29 18:42:02 -08:00
|
|
|
progress_callback: Optional[Callable[[int,
|
|
|
|
|
int],
|
|
|
|
|
None]] = None,
|
2025-12-14 00:53:52 -08:00
|
|
|
) -> List[str]:
|
|
|
|
|
links_scaled = [f"{link}&rotate=0&scale={scale}" for link in links]
|
|
|
|
|
pages = len(links_scaled)
|
|
|
|
|
|
|
|
|
|
tasks = []
|
|
|
|
|
with futures.ThreadPoolExecutor(max_workers=n_threads) as executor:
|
|
|
|
|
for i, link in enumerate(links_scaled):
|
|
|
|
|
tasks.append(
|
|
|
|
|
executor.submit(
|
|
|
|
|
cls._archive_download_one_image,
|
|
|
|
|
session=session,
|
|
|
|
|
link=link,
|
|
|
|
|
i=i,
|
|
|
|
|
directory=directory,
|
|
|
|
|
book_id=book_id,
|
|
|
|
|
pages=pages,
|
|
|
|
|
)
|
|
|
|
|
)
|
2025-12-22 02:11:53 -08:00
|
|
|
if progress_callback is not None:
|
|
|
|
|
done = 0
|
|
|
|
|
total = len(tasks)
|
|
|
|
|
for fut in futures.as_completed(tasks):
|
|
|
|
|
try:
|
|
|
|
|
_ = fut.result()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
done += 1
|
|
|
|
|
try:
|
|
|
|
|
progress_callback(done, total)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
elif tqdm:
|
2025-12-29 18:42:02 -08:00
|
|
|
for _ in tqdm(futures.as_completed(tasks),
|
|
|
|
|
total=len(tasks)): # type: ignore
|
2025-12-14 00:53:52 -08:00
|
|
|
pass
|
|
|
|
|
else:
|
|
|
|
|
for _ in futures.as_completed(tasks):
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
return [cls._archive_image_name(pages, i, directory) for i in range(pages)]
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _archive_check_direct_download(book_id: str) -> Tuple[bool, str]:
|
|
|
|
|
"""Check for a directly downloadable original PDF in Archive.org metadata."""
|
|
|
|
|
try:
|
|
|
|
|
metadata_url = f"https://archive.org/metadata/{book_id}"
|
2026-01-06 16:19:29 -08:00
|
|
|
response = requests.get(
|
|
|
|
|
metadata_url,
|
|
|
|
|
timeout=6,
|
|
|
|
|
verify=_ARCHIVE_VERIFY_VALUE,
|
|
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
response.raise_for_status()
|
|
|
|
|
metadata = response.json()
|
|
|
|
|
files = metadata.get("files") if isinstance(metadata, dict) else None
|
|
|
|
|
if isinstance(files, list):
|
|
|
|
|
for file_info in files:
|
|
|
|
|
if not isinstance(file_info, dict):
|
|
|
|
|
continue
|
|
|
|
|
filename = str(file_info.get("name", ""))
|
2025-12-29 18:42:02 -08:00
|
|
|
if filename.endswith(".pdf") and file_info.get("source"
|
|
|
|
|
) == "original":
|
2025-12-29 17:05:03 -08:00
|
|
|
pdf_url = (
|
|
|
|
|
f"https://archive.org/download/{book_id}/{filename.replace(' ', '%20')}"
|
|
|
|
|
)
|
2025-12-29 18:42:02 -08:00
|
|
|
check_response = requests.head(
|
|
|
|
|
pdf_url,
|
|
|
|
|
timeout=4,
|
2026-01-06 16:19:29 -08:00
|
|
|
allow_redirects=True,
|
|
|
|
|
verify=_ARCHIVE_VERIFY_VALUE,
|
2025-12-29 18:42:02 -08:00
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
if check_response.status_code == 200:
|
|
|
|
|
return True, pdf_url
|
|
|
|
|
return False, ""
|
|
|
|
|
except Exception:
|
|
|
|
|
return False, ""
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
def search(
|
|
|
|
|
self,
|
|
|
|
|
query: str,
|
|
|
|
|
limit: int = 50,
|
2025-12-29 18:42:02 -08:00
|
|
|
filters: Optional[Dict[str,
|
|
|
|
|
Any]] = None,
|
2025-12-12 21:55:38 -08:00
|
|
|
**kwargs: Any,
|
|
|
|
|
) -> List[SearchResult]:
|
|
|
|
|
filters = filters or {}
|
|
|
|
|
|
|
|
|
|
parsed = parse_query(query)
|
|
|
|
|
isbn = get_field(parsed, "isbn")
|
|
|
|
|
author = get_field(parsed, "author")
|
|
|
|
|
title = get_field(parsed, "title")
|
|
|
|
|
free_text = get_free_text(parsed)
|
|
|
|
|
|
|
|
|
|
q = (isbn or title or author or free_text or query or "").strip()
|
|
|
|
|
if not q:
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
if _looks_like_isbn(q):
|
|
|
|
|
q = f"isbn:{q.replace('-', '')}"
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
resp = self._session.get(
|
|
|
|
|
"https://openlibrary.org/search.json",
|
2025-12-29 18:42:02 -08:00
|
|
|
params={
|
|
|
|
|
"q": q,
|
|
|
|
|
"limit": int(limit)
|
|
|
|
|
},
|
2025-12-12 21:55:38 -08:00
|
|
|
timeout=10,
|
|
|
|
|
)
|
|
|
|
|
resp.raise_for_status()
|
|
|
|
|
data = resp.json() or {}
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
log(f"[openlibrary] Search failed: {exc}", file=sys.stderr)
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
results: List[SearchResult] = []
|
|
|
|
|
docs = data.get("docs") or []
|
|
|
|
|
if not isinstance(docs, list):
|
|
|
|
|
return []
|
|
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
# Availability enrichment can be slow if done sequentially (it may require multiple
|
|
|
|
|
# network calls per row). Do it concurrently to keep the pipeline responsive.
|
2025-12-29 18:42:02 -08:00
|
|
|
docs = docs[:int(limit)]
|
2025-12-14 00:53:52 -08:00
|
|
|
|
2025-12-29 18:42:02 -08:00
|
|
|
def _compute_availability(doc_dict: Dict[str,
|
|
|
|
|
Any]) -> Tuple[str,
|
|
|
|
|
str,
|
|
|
|
|
str,
|
|
|
|
|
str]:
|
2025-12-14 00:53:52 -08:00
|
|
|
edition_id_local = _resolve_edition_id(doc_dict)
|
|
|
|
|
if not edition_id_local:
|
|
|
|
|
return "no-olid", "", "", ""
|
|
|
|
|
|
|
|
|
|
ia_val_local = doc_dict.get("ia") or []
|
|
|
|
|
if isinstance(ia_val_local, str):
|
|
|
|
|
ia_val_local = [ia_val_local]
|
|
|
|
|
if not isinstance(ia_val_local, list):
|
|
|
|
|
ia_val_local = []
|
|
|
|
|
ia_ids_local = [str(x) for x in ia_val_local if x]
|
|
|
|
|
|
2026-01-06 16:19:29 -08:00
|
|
|
session_local = _create_archive_session()
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
try:
|
2025-12-29 17:05:03 -08:00
|
|
|
archive_id_local = _resolve_archive_id(
|
2025-12-29 18:42:02 -08:00
|
|
|
session_local,
|
|
|
|
|
edition_id_local,
|
|
|
|
|
ia_ids_local
|
2025-12-29 17:05:03 -08:00
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
except Exception:
|
|
|
|
|
archive_id_local = ""
|
|
|
|
|
|
|
|
|
|
if not archive_id_local:
|
|
|
|
|
return "no-archive", "", "", ""
|
|
|
|
|
|
|
|
|
|
# Prefer the fastest signal first: OpenLibrary lendable status.
|
|
|
|
|
lendable_local, reason_local = _check_lendable(session_local, edition_id_local)
|
|
|
|
|
if lendable_local:
|
|
|
|
|
return "borrow", reason_local, archive_id_local, ""
|
|
|
|
|
|
2026-01-03 21:23:55 -08:00
|
|
|
# OpenLibrary API can be a false-negative; fall back to Archive metadata.
|
2025-12-14 00:53:52 -08:00
|
|
|
try:
|
2026-01-03 21:23:55 -08:00
|
|
|
lendable2, reason2 = self._archive_is_lendable(archive_id_local)
|
|
|
|
|
if lendable2:
|
|
|
|
|
return "borrow", reason2 or reason_local, archive_id_local, ""
|
2025-12-14 00:53:52 -08:00
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
return "unavailable", reason_local, archive_id_local, ""
|
|
|
|
|
|
2025-12-29 18:42:02 -08:00
|
|
|
availability_rows: List[Tuple[str,
|
|
|
|
|
str,
|
|
|
|
|
str,
|
|
|
|
|
str]] = [
|
|
|
|
|
("unknown",
|
|
|
|
|
"",
|
|
|
|
|
"",
|
|
|
|
|
"") for _ in range(len(docs))
|
|
|
|
|
]
|
2025-12-14 00:53:52 -08:00
|
|
|
if docs:
|
|
|
|
|
max_workers = min(8, max(1, len(docs)))
|
|
|
|
|
done = 0
|
|
|
|
|
with futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
|
|
|
future_to_index = {
|
2025-12-29 18:42:02 -08:00
|
|
|
executor.submit(_compute_availability,
|
|
|
|
|
doc_dict): i
|
|
|
|
|
for i, doc_dict in enumerate(docs) if isinstance(doc_dict, dict)
|
2025-12-14 00:53:52 -08:00
|
|
|
}
|
|
|
|
|
for fut in futures.as_completed(list(future_to_index.keys())):
|
|
|
|
|
i = future_to_index[fut]
|
|
|
|
|
try:
|
|
|
|
|
availability_rows[i] = fut.result()
|
|
|
|
|
except Exception:
|
|
|
|
|
availability_rows[i] = ("unknown", "", "", "")
|
|
|
|
|
done += 1
|
2025-12-17 03:16:41 -08:00
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
for idx, doc in enumerate(docs):
|
2025-12-12 21:55:38 -08:00
|
|
|
if not isinstance(doc, dict):
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
book_title = str(doc.get("title") or "").strip() or "Unknown"
|
|
|
|
|
|
|
|
|
|
authors = doc.get("author_name") or []
|
|
|
|
|
if isinstance(authors, str):
|
|
|
|
|
authors = [authors]
|
|
|
|
|
if not isinstance(authors, list):
|
|
|
|
|
authors = []
|
|
|
|
|
authors_list = [str(a) for a in authors if a]
|
|
|
|
|
|
|
|
|
|
year_val = doc.get("first_publish_year")
|
|
|
|
|
year = str(year_val) if year_val is not None else ""
|
|
|
|
|
|
|
|
|
|
edition_id = _resolve_edition_id(doc)
|
2025-12-14 00:53:52 -08:00
|
|
|
work_key = doc.get("key") if isinstance(doc.get("key"), str) else ""
|
2025-12-12 21:55:38 -08:00
|
|
|
|
|
|
|
|
ia_val = doc.get("ia") or []
|
|
|
|
|
if isinstance(ia_val, str):
|
|
|
|
|
ia_val = [ia_val]
|
|
|
|
|
if not isinstance(ia_val, list):
|
|
|
|
|
ia_val = []
|
|
|
|
|
ia_ids = [str(x) for x in ia_val if x]
|
|
|
|
|
|
|
|
|
|
isbn_list = doc.get("isbn") or []
|
|
|
|
|
if isinstance(isbn_list, str):
|
|
|
|
|
isbn_list = [isbn_list]
|
|
|
|
|
if not isinstance(isbn_list, list):
|
|
|
|
|
isbn_list = []
|
|
|
|
|
|
|
|
|
|
isbn_13 = next((str(i) for i in isbn_list if len(str(i)) == 13), "")
|
|
|
|
|
isbn_10 = next((str(i) for i in isbn_list if len(str(i)) == 10), "")
|
|
|
|
|
|
|
|
|
|
columns = [
|
2025-12-29 18:42:02 -08:00
|
|
|
("Title",
|
|
|
|
|
book_title),
|
|
|
|
|
("Author",
|
|
|
|
|
", ".join(authors_list)),
|
|
|
|
|
("Year",
|
|
|
|
|
year),
|
|
|
|
|
("Avail",
|
|
|
|
|
""),
|
|
|
|
|
("OLID",
|
|
|
|
|
edition_id),
|
2025-12-12 21:55:38 -08:00
|
|
|
]
|
|
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
# Determine availability using the concurrently computed enrichment.
|
|
|
|
|
availability, availability_reason, archive_id, direct_url = ("unknown", "", "", "")
|
|
|
|
|
if 0 <= idx < len(availability_rows):
|
|
|
|
|
availability, availability_reason, archive_id, direct_url = availability_rows[idx]
|
|
|
|
|
|
2026-01-03 21:23:55 -08:00
|
|
|
# UX requirement: OpenLibrary provider should ONLY show borrowable books.
|
|
|
|
|
# Ignore printdisabled-only and non-borrow items.
|
|
|
|
|
if availability != "borrow":
|
|
|
|
|
continue
|
|
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
# Patch the display column.
|
|
|
|
|
for idx, (name, _val) in enumerate(columns):
|
|
|
|
|
if name == "Avail":
|
|
|
|
|
columns[idx] = ("Avail", availability)
|
|
|
|
|
break
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
annotations: List[str] = []
|
|
|
|
|
if isbn_13:
|
|
|
|
|
annotations.append(f"isbn_13:{isbn_13}")
|
|
|
|
|
elif isbn_10:
|
|
|
|
|
annotations.append(f"isbn_10:{isbn_10}")
|
|
|
|
|
if ia_ids:
|
|
|
|
|
annotations.append("archive")
|
2025-12-29 18:42:02 -08:00
|
|
|
if availability in {"download",
|
|
|
|
|
"borrow"}:
|
2025-12-14 00:53:52 -08:00
|
|
|
annotations.append(availability)
|
2025-12-12 21:55:38 -08:00
|
|
|
|
2026-01-06 16:19:29 -08:00
|
|
|
book_path = (
|
|
|
|
|
f"https://openlibrary.org/books/{edition_id}" if edition_id else
|
|
|
|
|
(
|
|
|
|
|
f"https://openlibrary.org{work_key}"
|
|
|
|
|
if isinstance(work_key, str) and work_key.startswith("/") else
|
|
|
|
|
"https://openlibrary.org"
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
metadata = {
|
|
|
|
|
"openlibrary_id": edition_id,
|
|
|
|
|
"openlibrary_key": work_key,
|
|
|
|
|
"authors": authors_list,
|
|
|
|
|
"year": year,
|
|
|
|
|
"isbn_10": isbn_10,
|
|
|
|
|
"isbn_13": isbn_13,
|
|
|
|
|
"ia": ia_ids,
|
|
|
|
|
"availability": availability,
|
|
|
|
|
"availability_reason": availability_reason,
|
|
|
|
|
"archive_id": archive_id,
|
|
|
|
|
"direct_url": direct_url,
|
|
|
|
|
"raw": doc,
|
|
|
|
|
}
|
|
|
|
|
if book_path:
|
|
|
|
|
metadata["selection_url"] = book_path
|
|
|
|
|
metadata["_selection_args"] = ["-url", book_path]
|
|
|
|
|
metadata["_selection_action"] = ["download-file", "-url", book_path]
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
results.append(
|
|
|
|
|
SearchResult(
|
|
|
|
|
table="openlibrary",
|
|
|
|
|
title=book_title,
|
2026-01-06 16:19:29 -08:00
|
|
|
path=book_path,
|
2025-12-12 21:55:38 -08:00
|
|
|
detail=(
|
2025-12-29 18:42:02 -08:00
|
|
|
(f"By: {', '.join(authors_list)}" if authors_list else "") +
|
|
|
|
|
(f" ({year})" if year else "")
|
2025-12-12 21:55:38 -08:00
|
|
|
).strip(),
|
|
|
|
|
annotations=annotations,
|
|
|
|
|
media_kind="book",
|
|
|
|
|
columns=columns,
|
2026-01-06 16:19:29 -08:00
|
|
|
full_metadata=metadata,
|
2025-12-12 21:55:38 -08:00
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return results
|
|
|
|
|
|
2025-12-22 02:11:53 -08:00
|
|
|
def download(
|
|
|
|
|
self,
|
|
|
|
|
result: SearchResult,
|
|
|
|
|
output_dir: Path,
|
2025-12-29 18:42:02 -08:00
|
|
|
progress_callback: Optional[Callable[[str,
|
|
|
|
|
int,
|
|
|
|
|
Optional[int],
|
|
|
|
|
str],
|
|
|
|
|
None]] = None,
|
2025-12-22 02:11:53 -08:00
|
|
|
) -> Optional[Path]:
|
2025-12-12 21:55:38 -08:00
|
|
|
output_dir = Path(output_dir)
|
|
|
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
meta = result.full_metadata or {}
|
|
|
|
|
edition_id = str(meta.get("openlibrary_id") or "").strip()
|
2025-12-22 02:11:53 -08:00
|
|
|
|
|
|
|
|
# Accept direct Archive.org URLs too (details/borrow/download) even when no OL edition id is known.
|
|
|
|
|
archive_id = str(meta.get("archive_id") or "").strip()
|
2025-12-12 21:55:38 -08:00
|
|
|
|
|
|
|
|
ia_ids = meta.get("ia") or []
|
|
|
|
|
if isinstance(ia_ids, str):
|
|
|
|
|
ia_ids = [ia_ids]
|
|
|
|
|
if not isinstance(ia_ids, list):
|
|
|
|
|
ia_ids = []
|
|
|
|
|
ia_candidates = [str(x) for x in ia_ids if x]
|
|
|
|
|
|
2025-12-22 02:11:53 -08:00
|
|
|
if not archive_id:
|
|
|
|
|
archive_id = _first_str(ia_candidates) or ""
|
|
|
|
|
|
|
|
|
|
if not archive_id and edition_id:
|
|
|
|
|
archive_id = _resolve_archive_id(self._session, edition_id, ia_candidates)
|
|
|
|
|
|
|
|
|
|
if not archive_id:
|
|
|
|
|
# Try to extract identifier from the SearchResult path (URL).
|
|
|
|
|
archive_id = _archive_id_from_url(str(getattr(result, "path", "") or ""))
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
if not archive_id:
|
2025-12-29 18:42:02 -08:00
|
|
|
log(
|
|
|
|
|
"[openlibrary] No archive identifier available; cannot download",
|
|
|
|
|
file=sys.stderr
|
|
|
|
|
)
|
2025-12-12 21:55:38 -08:00
|
|
|
return None
|
|
|
|
|
|
2025-12-25 04:49:22 -08:00
|
|
|
# Best-effort metadata scrape to attach bibliographic tags for downstream cmdlets.
|
|
|
|
|
try:
|
2026-01-06 16:19:29 -08:00
|
|
|
archive_meta = fetch_archive_item_metadata(archive_id)
|
|
|
|
|
tags = archive_item_metadata_to_tags(archive_id, archive_meta)
|
2025-12-25 04:49:22 -08:00
|
|
|
if tags:
|
|
|
|
|
try:
|
|
|
|
|
result.tag.update(tags)
|
|
|
|
|
except Exception:
|
|
|
|
|
# Fallback for callers that pass plain dicts.
|
|
|
|
|
pass
|
|
|
|
|
if isinstance(meta, dict):
|
|
|
|
|
meta["archive_id"] = archive_id
|
|
|
|
|
if archive_meta:
|
|
|
|
|
meta["archive_metadata"] = archive_meta
|
|
|
|
|
result.full_metadata = meta
|
|
|
|
|
except Exception:
|
|
|
|
|
# Never block downloads on metadata fetch.
|
|
|
|
|
pass
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
safe_title = sanitize_filename(result.title)
|
2025-12-22 02:11:53 -08:00
|
|
|
if not safe_title or "http" in safe_title.lower():
|
|
|
|
|
safe_title = sanitize_filename(archive_id) or "archive"
|
2025-12-12 21:55:38 -08:00
|
|
|
|
|
|
|
|
# 1) Direct download if available.
|
|
|
|
|
try:
|
2025-12-14 00:53:52 -08:00
|
|
|
can_direct, pdf_url = self._archive_check_direct_download(archive_id)
|
2025-12-12 21:55:38 -08:00
|
|
|
except Exception:
|
|
|
|
|
can_direct, pdf_url = False, ""
|
|
|
|
|
|
|
|
|
|
if can_direct and pdf_url:
|
2025-12-22 02:11:53 -08:00
|
|
|
try:
|
|
|
|
|
if progress_callback is not None:
|
|
|
|
|
progress_callback("step", 0, None, "direct download")
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2025-12-12 21:55:38 -08:00
|
|
|
out_path = unique_path(output_dir / f"{safe_title}.pdf")
|
2026-01-06 01:38:59 -08:00
|
|
|
try:
|
|
|
|
|
with HTTPClient(timeout=30.0) as client:
|
|
|
|
|
path = client.download(
|
|
|
|
|
pdf_url,
|
|
|
|
|
str(out_path),
|
|
|
|
|
chunk_size=1024 * 256,
|
|
|
|
|
progress_callback=(
|
|
|
|
|
(lambda downloaded, total: progress_callback("bytes", downloaded, total, safe_title))
|
|
|
|
|
if progress_callback is not None
|
|
|
|
|
else None
|
|
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
if path and path.exists():
|
|
|
|
|
return path
|
|
|
|
|
log("[openlibrary] Direct download failed", file=sys.stderr)
|
|
|
|
|
return None
|
|
|
|
|
except Exception:
|
|
|
|
|
log("[openlibrary] Direct download failed", file=sys.stderr)
|
|
|
|
|
return None
|
2026-01-01 20:37:27 -08:00
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
# 2) Borrow flow (credentials required).
|
|
|
|
|
try:
|
2025-12-14 00:53:52 -08:00
|
|
|
email, password = self._credential_archive(self.config or {})
|
2025-12-12 21:55:38 -08:00
|
|
|
if not email or not password:
|
2025-12-29 18:42:02 -08:00
|
|
|
log(
|
|
|
|
|
"[openlibrary] Archive credentials missing; cannot borrow",
|
|
|
|
|
file=sys.stderr
|
|
|
|
|
)
|
2026-01-03 03:37:48 -08:00
|
|
|
try:
|
|
|
|
|
from SYS.rich_display import show_provider_config_panel
|
|
|
|
|
|
|
|
|
|
show_provider_config_panel(
|
|
|
|
|
"openlibrary",
|
|
|
|
|
keys=self.required_config_keys(),
|
|
|
|
|
)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2025-12-12 21:55:38 -08:00
|
|
|
return None
|
|
|
|
|
|
2025-12-22 02:11:53 -08:00
|
|
|
lendable = True
|
|
|
|
|
reason = ""
|
|
|
|
|
if edition_id:
|
|
|
|
|
lendable, reason = _check_lendable(self._session, edition_id)
|
|
|
|
|
if not lendable:
|
|
|
|
|
# OpenLibrary API can be a false-negative; fall back to Archive metadata.
|
|
|
|
|
lendable2, reason2 = self._archive_is_lendable(archive_id)
|
|
|
|
|
if lendable2:
|
|
|
|
|
lendable, reason = True, reason2
|
|
|
|
|
else:
|
|
|
|
|
lendable, reason = self._archive_is_lendable(archive_id)
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
if not lendable:
|
|
|
|
|
log(f"[openlibrary] Not lendable: {reason}", file=sys.stderr)
|
|
|
|
|
return None
|
|
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
session = self._archive_login(email, password)
|
2025-12-22 02:11:53 -08:00
|
|
|
loaned = False
|
2025-12-12 21:55:38 -08:00
|
|
|
try:
|
|
|
|
|
try:
|
2025-12-22 02:11:53 -08:00
|
|
|
if progress_callback is not None:
|
|
|
|
|
progress_callback("step", 0, None, "login")
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2025-12-12 21:55:38 -08:00
|
|
|
|
2025-12-22 02:11:53 -08:00
|
|
|
try:
|
|
|
|
|
session = self._archive_loan(session, archive_id, verbose=False)
|
|
|
|
|
loaned = True
|
|
|
|
|
except self.BookNotAvailableError:
|
|
|
|
|
log("[openlibrary] Book not available to borrow", file=sys.stderr)
|
|
|
|
|
return None
|
|
|
|
|
except Exception:
|
|
|
|
|
log("[openlibrary] Borrow failed", file=sys.stderr)
|
|
|
|
|
return None
|
2025-12-12 21:55:38 -08:00
|
|
|
|
2025-12-22 02:11:53 -08:00
|
|
|
try:
|
|
|
|
|
if progress_callback is not None:
|
|
|
|
|
progress_callback("step", 0, None, "borrow")
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2025-12-12 21:55:38 -08:00
|
|
|
|
2025-12-29 17:05:03 -08:00
|
|
|
urls = [
|
|
|
|
|
f"https://archive.org/borrow/{archive_id}",
|
|
|
|
|
f"https://archive.org/details/{archive_id}",
|
|
|
|
|
]
|
2025-12-22 02:11:53 -08:00
|
|
|
title = safe_title
|
|
|
|
|
links: Optional[List[str]] = None
|
|
|
|
|
last_exc: Optional[Exception] = None
|
|
|
|
|
for u in urls:
|
|
|
|
|
try:
|
|
|
|
|
title_raw, links, _metadata = self._archive_get_book_infos(session, u)
|
|
|
|
|
if title_raw:
|
|
|
|
|
title = sanitize_filename(title_raw)
|
|
|
|
|
break
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
last_exc = exc
|
|
|
|
|
continue
|
2025-12-12 21:55:38 -08:00
|
|
|
|
2025-12-22 02:11:53 -08:00
|
|
|
if not links:
|
2025-12-29 18:42:02 -08:00
|
|
|
log(
|
|
|
|
|
f"[openlibrary] Failed to extract pages: {last_exc}",
|
|
|
|
|
file=sys.stderr
|
|
|
|
|
)
|
2025-12-22 02:11:53 -08:00
|
|
|
return None
|
2025-12-12 21:55:38 -08:00
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
try:
|
2025-12-22 02:11:53 -08:00
|
|
|
if progress_callback is not None:
|
|
|
|
|
progress_callback("step", 0, None, "download pages")
|
2025-12-16 23:23:43 -08:00
|
|
|
except Exception:
|
|
|
|
|
pass
|
2025-12-12 21:55:38 -08:00
|
|
|
|
2025-12-22 02:11:53 -08:00
|
|
|
temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=str(output_dir))
|
|
|
|
|
try:
|
|
|
|
|
images = self._archive_download(
|
|
|
|
|
session=session,
|
|
|
|
|
n_threads=10,
|
|
|
|
|
directory=temp_dir,
|
|
|
|
|
links=links,
|
2026-01-03 03:37:48 -08:00
|
|
|
scale=self._archive_scale_from_config(self.config or {}),
|
2025-12-22 02:11:53 -08:00
|
|
|
book_id=archive_id,
|
|
|
|
|
progress_callback=(
|
2025-12-29 18:42:02 -08:00
|
|
|
(
|
|
|
|
|
lambda done, total:
|
|
|
|
|
progress_callback("pages", done, total, "pages")
|
|
|
|
|
) if progress_callback is not None else None
|
2025-12-22 02:11:53 -08:00
|
|
|
),
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
pdf_bytes = _image_paths_to_pdf_bytes(images)
|
|
|
|
|
if not pdf_bytes:
|
|
|
|
|
# Keep images folder for manual conversion.
|
2025-12-29 17:05:03 -08:00
|
|
|
log(
|
|
|
|
|
"[openlibrary] PDF conversion failed; keeping images folder",
|
|
|
|
|
file=sys.stderr,
|
|
|
|
|
)
|
2025-12-22 02:11:53 -08:00
|
|
|
return Path(temp_dir)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
if progress_callback is not None:
|
|
|
|
|
progress_callback("step", 0, None, "stitch pdf")
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
pdf_path = unique_path(output_dir / f"{title}.pdf")
|
|
|
|
|
with open(pdf_path, "wb") as f:
|
|
|
|
|
f.write(pdf_bytes)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
shutil.rmtree(temp_dir)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
return pdf_path
|
|
|
|
|
|
|
|
|
|
except Exception:
|
|
|
|
|
try:
|
|
|
|
|
shutil.rmtree(temp_dir)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
raise
|
|
|
|
|
finally:
|
|
|
|
|
# Always return the loan after a successful borrow, even if download/stitch fails.
|
|
|
|
|
if loaned:
|
|
|
|
|
try:
|
|
|
|
|
if progress_callback is not None:
|
|
|
|
|
progress_callback("step", 0, None, "return book")
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
try:
|
|
|
|
|
self._archive_return_loan(session, archive_id)
|
|
|
|
|
except Exception as exc:
|
2025-12-29 18:42:02 -08:00
|
|
|
log(
|
|
|
|
|
f"[openlibrary] Warning: failed to return loan: {exc}",
|
|
|
|
|
file=sys.stderr
|
|
|
|
|
)
|
2025-12-12 21:55:38 -08:00
|
|
|
try:
|
2025-12-22 02:11:53 -08:00
|
|
|
self._archive_logout(session)
|
2025-12-12 21:55:38 -08:00
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
log(f"[openlibrary] Borrow workflow error: {exc}", file=sys.stderr)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def validate(self) -> bool:
|
|
|
|
|
return True
|