This commit is contained in:
nose
2025-12-14 00:53:52 -08:00
parent 52a79b0086
commit a03eb0d1be
24 changed files with 2785 additions and 1868 deletions

View File

@@ -1,19 +1,38 @@
from __future__ import annotations
import base64
from concurrent import futures
import hashlib
import json as json_module
import re
import shutil
import sys
import tempfile
import time
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
import requests
from API.HTTP import HTTPClient
from ProviderCore.base import SearchProvider, SearchResult
from ProviderCore.download import download_file, sanitize_filename
from cli_syntax import get_field, get_free_text, parse_query
from SYS.logger import log
from SYS.utils import unique_path
try:
from Crypto.Cipher import AES # type: ignore
from Crypto.Util import Counter # type: ignore
except ImportError:
AES = None # type: ignore
Counter = None # type: ignore
try:
from tqdm import tqdm # type: ignore
except ImportError:
tqdm = None # type: ignore
def _looks_like_isbn(text: str) -> bool:
t = (text or "").replace("-", "").strip()
@@ -38,6 +57,13 @@ def _resolve_edition_id(doc: Dict[str, Any]) -> str:
edition_key = doc.get("edition_key")
if isinstance(edition_key, list) and edition_key:
return str(edition_key[0]).strip()
if isinstance(edition_key, str) and edition_key.strip():
return edition_key.strip()
# Often present even when edition_key is missing.
cover_edition_key = doc.get("cover_edition_key")
if isinstance(cover_edition_key, str) and cover_edition_key.strip():
return cover_edition_key.strip()
# Fallback: sometimes key can be /books/OL...M
key = doc.get("key")
@@ -54,7 +80,7 @@ def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, s
return False, "not-an-edition"
url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{edition_id}"
resp = session.get(url, timeout=10)
resp = session.get(url, timeout=6)
resp.raise_for_status()
data = resp.json() or {}
wrapped = data.get(f"OLID:{edition_id}")
@@ -88,7 +114,7 @@ def _resolve_archive_id(session: requests.Session, edition_id: str, ia_candidate
# Otherwise query the edition JSON.
try:
resp = session.get(f"https://openlibrary.org/books/{edition_id}.json", timeout=10)
resp = session.get(f"https://openlibrary.org/books/{edition_id}.json", timeout=6)
resp.raise_for_status()
data = resp.json() or {}
@@ -116,6 +142,522 @@ class OpenLibrary(SearchProvider):
super().__init__(config)
self._session = requests.Session()
class BookNotAvailableError(Exception):
"""Raised when a book is not available for borrowing (waitlisted/in use)."""
@staticmethod
def _credential_archive(config: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
"""Get Archive.org email/password from config.
Supports:
- New: {"provider": {"openlibrary": {"email": "...", "password": "..."}}}
- Old: {"Archive": {"email": "...", "password": "..."}}
{"archive_org_email": "...", "archive_org_password": "..."}
"""
if not isinstance(config, dict):
return None, None
provider_config = config.get("provider", {})
if isinstance(provider_config, dict):
openlibrary_config = provider_config.get("openlibrary", {})
if isinstance(openlibrary_config, dict):
email = openlibrary_config.get("email")
password = openlibrary_config.get("password")
if email or password:
return str(email) if email is not None else None, str(password) if password is not None else None
archive_config = config.get("Archive")
if isinstance(archive_config, dict):
email = archive_config.get("email")
password = archive_config.get("password")
if email or password:
return str(email) if email is not None else None, str(password) if password is not None else None
email = config.get("archive_org_email")
password = config.get("archive_org_password")
return str(email) if email is not None else None, str(password) if password is not None else None
@staticmethod
def _archive_error_body(response: requests.Response) -> str:
try:
body = response.text or ""
except Exception:
return ""
if len(body) > 2000:
return body[:1200] + "\n... (truncated) ...\n" + body[-400:]
return body
@classmethod
def _archive_login(cls, email: str, password: str) -> requests.Session:
"""Login to archive.org using the token-based services endpoint (matches test-login.py)."""
session = requests.Session()
token_resp = session.get("https://archive.org/services/account/login/", timeout=30)
try:
token_json = token_resp.json()
except Exception as exc:
raise RuntimeError(f"Archive login token parse failed: {exc}\n{cls._archive_error_body(token_resp)}")
if not token_json.get("success"):
raise RuntimeError(f"Archive login token fetch failed\n{cls._archive_error_body(token_resp)}")
token = (token_json.get("value") or {}).get("token")
if not token:
raise RuntimeError("Archive login token missing")
headers = {"Content-Type": "application/x-www-form-urlencoded"}
payload = {"username": email, "password": password, "t": token}
login_resp = session.post(
"https://archive.org/services/account/login/",
headers=headers,
data=json_module.dumps(payload),
timeout=30,
)
try:
login_json = login_resp.json()
except Exception as exc:
raise RuntimeError(f"Archive login parse failed: {exc}\n{cls._archive_error_body(login_resp)}")
if login_json.get("success") is False:
if login_json.get("value") == "bad_login":
raise RuntimeError("Invalid Archive.org credentials")
raise RuntimeError(f"Archive login failed: {login_json}")
return session
@classmethod
def _archive_loan(cls, session: requests.Session, book_id: str, *, verbose: bool = True) -> requests.Session:
data = {"action": "grant_access", "identifier": book_id}
session.post("https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30)
data["action"] = "browse_book"
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
if response.status_code == 400:
try:
err = (response.json() or {}).get("error")
if err == "This book is not available to borrow at this time. Please try again later.":
raise cls.BookNotAvailableError("Book is waitlisted or in use")
raise RuntimeError(f"Borrow failed: {err or response.text}")
except cls.BookNotAvailableError:
raise
except Exception:
raise RuntimeError("The book cannot be borrowed")
data["action"] = "create_token"
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
if "token" in (response.text or ""):
return session
raise RuntimeError("Something went wrong when trying to borrow the book")
@staticmethod
def _archive_return_loan(session: requests.Session, book_id: str) -> None:
data = {"action": "return_loan", "identifier": book_id}
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
if response.status_code == 200:
try:
if (response.json() or {}).get("success"):
return
except Exception:
pass
raise RuntimeError("Something went wrong when trying to return the book")
@staticmethod
def _archive_get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]:
"""Extract page links from Archive.org book reader."""
r = session.get(url, timeout=30).text
# Matches: "url":"//archive.org/..." (allow whitespace)
match = re.search(r'"url"\s*:\s*"([^"]+)"', r)
if not match:
raise RuntimeError("Failed to extract book info URL from response")
url_path = match.group(1)
infos_url = ("https:" + url_path) if url_path.startswith("//") else url_path
infos_url = infos_url.replace("\\u0026", "&")
response = session.get(infos_url, timeout=30)
payload = response.json()
data = payload["data"]
title = str(data["brOptions"]["bookTitle"]).strip().replace(" ", "_")
title = "".join(c for c in title if c not in '<>:"/\\|?*')
title = title[:150]
metadata = data.get("metadata") or {}
links: List[str] = []
br_data = (data.get("brOptions") or {}).get("data", [])
if isinstance(br_data, list):
for item in br_data:
if isinstance(item, list):
for page in item:
if isinstance(page, dict) and "uri" in page:
links.append(page["uri"])
elif isinstance(item, dict) and "uri" in item:
links.append(item["uri"])
if not links:
raise RuntimeError("No pages found in book data")
return title, links, metadata if isinstance(metadata, dict) else {}
@staticmethod
def _archive_image_name(pages: int, page: int, directory: str) -> str:
return f"{directory}/{(len(str(pages)) - len(str(page))) * '0'}{page}.jpg"
@staticmethod
def _archive_deobfuscate_image(image_data: bytes, link: str, obf_header: str) -> bytes:
if not AES or not Counter:
raise RuntimeError("Crypto library not available")
try:
version, counter_b64 = obf_header.split("|")
except Exception as exc:
raise ValueError("Invalid X-Obfuscate header format") from exc
if version != "1":
raise ValueError("Unsupported obfuscation version: " + version)
aes_key = re.sub(r"^https?:\/\/.*?\/", "/", link)
sha1_digest = hashlib.sha1(aes_key.encode("utf-8")).digest()
key = sha1_digest[:16]
counter_bytes = base64.b64decode(counter_b64)
if len(counter_bytes) != 16:
raise ValueError(f"Expected counter to be 16 bytes, got {len(counter_bytes)}")
prefix = counter_bytes[:8]
initial_value = int.from_bytes(counter_bytes[8:], byteorder="big")
ctr = Counter.new(64, prefix=prefix, initial_value=initial_value, little_endian=False) # type: ignore
cipher = AES.new(key, AES.MODE_CTR, counter=ctr) # type: ignore
decrypted_part = cipher.decrypt(image_data[:1024])
return decrypted_part + image_data[1024:]
@classmethod
def _archive_download_one_image(
cls,
session: requests.Session,
link: str,
i: int,
directory: str,
book_id: str,
pages: int,
) -> None:
headers = {
"Referer": "https://archive.org/",
"Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
"Sec-Fetch-Site": "same-site",
"Sec-Fetch-Mode": "no-cors",
"Sec-Fetch-Dest": "image",
}
while True:
try:
response = session.get(link, headers=headers, timeout=30)
if response.status_code == 403:
cls._archive_loan(session, book_id, verbose=False)
raise RuntimeError("Borrow again")
if response.status_code == 200:
break
except Exception:
time.sleep(1)
image = cls._archive_image_name(pages, i, directory)
obf_header = response.headers.get("X-Obfuscate")
if obf_header:
image_content = cls._archive_deobfuscate_image(response.content, link, obf_header)
else:
image_content = response.content
with open(image, "wb") as f:
f.write(image_content)
@classmethod
def _archive_download(
cls,
session: requests.Session,
n_threads: int,
directory: str,
links: List[str],
scale: int,
book_id: str,
) -> List[str]:
links_scaled = [f"{link}&rotate=0&scale={scale}" for link in links]
pages = len(links_scaled)
tasks = []
with futures.ThreadPoolExecutor(max_workers=n_threads) as executor:
for i, link in enumerate(links_scaled):
tasks.append(
executor.submit(
cls._archive_download_one_image,
session=session,
link=link,
i=i,
directory=directory,
book_id=book_id,
pages=pages,
)
)
if tqdm:
for _ in tqdm(futures.as_completed(tasks), total=len(tasks)): # type: ignore
pass
else:
for _ in futures.as_completed(tasks):
pass
return [cls._archive_image_name(pages, i, directory) for i in range(pages)]
@staticmethod
def _archive_check_direct_download(book_id: str) -> Tuple[bool, str]:
"""Check for a directly downloadable original PDF in Archive.org metadata."""
try:
metadata_url = f"https://archive.org/metadata/{book_id}"
response = requests.get(metadata_url, timeout=6)
response.raise_for_status()
metadata = response.json()
files = metadata.get("files") if isinstance(metadata, dict) else None
if isinstance(files, list):
for file_info in files:
if not isinstance(file_info, dict):
continue
filename = str(file_info.get("name", ""))
if filename.endswith(".pdf") and file_info.get("source") == "original":
pdf_url = f"https://archive.org/download/{book_id}/{filename.replace(' ', '%20')}"
check_response = requests.head(pdf_url, timeout=4, allow_redirects=True)
if check_response.status_code == 200:
return True, pdf_url
return False, ""
except Exception:
return False, ""
@staticmethod
def scrape_isbn_metadata(isbn: str) -> List[str]:
"""Scrape tags for an ISBN using Open Library API.
Returns tags such as:
- title:<...>, author:<...>, publish_date:<...>, publisher:<...>, description:<...>, pages:<...>
- identifiers: openlibrary:<...>, lccn:<...>, oclc:<...>, goodreads:<...>, librarything:<...>, doi:<...>, internet_archive:<...>
"""
new_tags: List[str] = []
isbn_clean = str(isbn or "").replace("isbn:", "").replace("-", "").strip()
if not isbn_clean:
return []
url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json"
try:
with HTTPClient() as client:
response = client.get(url)
response.raise_for_status()
data = json_module.loads(response.content.decode("utf-8"))
except Exception as exc:
log(f"Failed to fetch ISBN metadata: {exc}", file=sys.stderr)
return []
if not data:
log(f"No ISBN metadata found for: {isbn}")
return []
book_data = next(iter(data.values()), None)
if not isinstance(book_data, dict):
return []
if "title" in book_data:
new_tags.append(f"title:{book_data['title']}")
authors = book_data.get("authors")
if isinstance(authors, list):
for author in authors[:3]:
if isinstance(author, dict) and author.get("name"):
new_tags.append(f"author:{author['name']}")
if book_data.get("publish_date"):
new_tags.append(f"publish_date:{book_data['publish_date']}")
publishers = book_data.get("publishers")
if isinstance(publishers, list) and publishers:
pub = publishers[0]
if isinstance(pub, dict) and pub.get("name"):
new_tags.append(f"publisher:{pub['name']}")
if "description" in book_data:
desc = book_data.get("description")
if isinstance(desc, dict) and "value" in desc:
desc = desc.get("value")
if desc:
desc_str = str(desc).strip()
if desc_str:
new_tags.append(f"description:{desc_str[:200]}")
page_count = book_data.get("number_of_pages")
if isinstance(page_count, int) and page_count > 0:
new_tags.append(f"pages:{page_count}")
identifiers = book_data.get("identifiers")
if isinstance(identifiers, dict):
def _first(value: Any) -> Any:
if isinstance(value, list) and value:
return value[0]
return value
for key, ns in (
("openlibrary", "openlibrary"),
("lccn", "lccn"),
("oclc", "oclc"),
("goodreads", "goodreads"),
("librarything", "librarything"),
("doi", "doi"),
("internet_archive", "internet_archive"),
):
val = _first(identifiers.get(key))
if val:
new_tags.append(f"{ns}:{val}")
log(f"Found {len(new_tags)} tag(s) from ISBN lookup")
return new_tags
@staticmethod
def scrape_openlibrary_metadata(olid: str) -> List[str]:
"""Scrape tags for an OpenLibrary ID using the .json API endpoint."""
new_tags: List[str] = []
olid_text = str(olid or "").strip()
if not olid_text:
return []
# Normalize OLID to the common "OL<digits>M" form when possible.
olid_norm = olid_text
try:
if not olid_norm.startswith("OL"):
olid_norm = f"OL{olid_norm}"
if not olid_norm.endswith("M"):
olid_norm = f"{olid_norm}M"
except Exception:
olid_norm = olid_text
# Ensure we always include a scrapeable identifier tag.
new_tags.append(f"openlibrary:{olid_norm}")
# Accept OL9674499M, 9674499M, or just digits.
olid_clean = olid_text.replace("OL", "").replace("M", "")
if not olid_clean.isdigit():
olid_clean = olid_text
if not olid_text.startswith("OL"):
url = f"https://openlibrary.org/books/OL{olid_clean}M.json"
else:
url = f"https://openlibrary.org/books/{olid_text}.json"
try:
with HTTPClient() as client:
response = client.get(url)
response.raise_for_status()
data = json_module.loads(response.content.decode("utf-8"))
except Exception as exc:
log(f"Failed to fetch OpenLibrary metadata: {exc}", file=sys.stderr)
return []
if not isinstance(data, dict) or not data:
log(f"No OpenLibrary metadata found for: {olid_text}")
return []
if "title" in data:
new_tags.append(f"title:{data['title']}")
authors = data.get("authors")
if isinstance(authors, list):
for author in authors[:3]:
if isinstance(author, dict) and author.get("name"):
new_tags.append(f"author:{author['name']}")
continue
# Common OL shape: {"key": "/authors/OL...A"} or {"author": {"key": ...}}
author_key = None
if isinstance(author, dict):
if isinstance(author.get("author"), dict):
author_key = author.get("author", {}).get("key")
if not author_key:
author_key = author.get("key")
if isinstance(author_key, str) and author_key.startswith("/"):
try:
author_url = f"https://openlibrary.org{author_key}.json"
with HTTPClient(timeout=10) as client:
author_resp = client.get(author_url)
author_resp.raise_for_status()
author_data = json_module.loads(author_resp.content.decode("utf-8"))
if isinstance(author_data, dict) and author_data.get("name"):
new_tags.append(f"author:{author_data['name']}")
continue
except Exception:
pass
if isinstance(author, str) and author:
new_tags.append(f"author:{author}")
if data.get("publish_date"):
new_tags.append(f"publish_date:{data['publish_date']}")
publishers = data.get("publishers")
if isinstance(publishers, list) and publishers:
pub = publishers[0]
if isinstance(pub, dict) and pub.get("name"):
new_tags.append(f"publisher:{pub['name']}")
elif isinstance(pub, str) and pub:
new_tags.append(f"publisher:{pub}")
if "description" in data:
desc = data.get("description")
if isinstance(desc, dict) and "value" in desc:
desc = desc.get("value")
if desc:
desc_str = str(desc).strip()
if desc_str:
new_tags.append(f"description:{desc_str[:200]}")
page_count = data.get("number_of_pages")
if isinstance(page_count, int) and page_count > 0:
new_tags.append(f"pages:{page_count}")
subjects = data.get("subjects")
if isinstance(subjects, list):
for subject in subjects[:10]:
if isinstance(subject, str):
subject_clean = subject.strip()
if subject_clean and subject_clean not in new_tags:
new_tags.append(subject_clean)
identifiers = data.get("identifiers")
if isinstance(identifiers, dict):
def _first(value: Any) -> Any:
if isinstance(value, list) and value:
return value[0]
return value
for key, ns in (
("isbn_10", "isbn_10"),
("isbn_13", "isbn_13"),
("lccn", "lccn"),
("oclc_numbers", "oclc"),
("goodreads", "goodreads"),
("internet_archive", "internet_archive"),
):
val = _first(identifiers.get(key))
if val:
new_tags.append(f"{ns}:{val}")
# Some editions expose a direct Archive.org identifier as "ocaid".
ocaid = data.get("ocaid")
if isinstance(ocaid, str) and ocaid.strip():
new_tags.append(f"internet_archive:{ocaid.strip()}")
log(f"Found {len(new_tags)} tag(s) from OpenLibrary lookup")
return new_tags
def search(
self,
query: str,
@@ -155,7 +697,70 @@ class OpenLibrary(SearchProvider):
if not isinstance(docs, list):
return []
for doc in docs[: int(limit)]:
# Availability enrichment can be slow if done sequentially (it may require multiple
# network calls per row). Do it concurrently to keep the pipeline responsive.
docs = docs[: int(limit)]
def _compute_availability(doc_dict: Dict[str, Any]) -> Tuple[str, str, str, str]:
edition_id_local = _resolve_edition_id(doc_dict)
if not edition_id_local:
return "no-olid", "", "", ""
ia_val_local = doc_dict.get("ia") or []
if isinstance(ia_val_local, str):
ia_val_local = [ia_val_local]
if not isinstance(ia_val_local, list):
ia_val_local = []
ia_ids_local = [str(x) for x in ia_val_local if x]
session_local = requests.Session()
try:
archive_id_local = _resolve_archive_id(session_local, edition_id_local, ia_ids_local)
except Exception:
archive_id_local = ""
if not archive_id_local:
return "no-archive", "", "", ""
# Prefer the fastest signal first: OpenLibrary lendable status.
lendable_local, reason_local = _check_lendable(session_local, edition_id_local)
if lendable_local:
return "borrow", reason_local, archive_id_local, ""
# Not lendable: check whether it's directly downloadable (public domain uploads, etc.).
try:
can_direct, pdf_url = self._archive_check_direct_download(archive_id_local)
if can_direct and pdf_url:
return "download", reason_local, archive_id_local, str(pdf_url)
except Exception:
pass
return "unavailable", reason_local, archive_id_local, ""
availability_rows: List[Tuple[str, str, str, str]] = [("unknown", "", "", "") for _ in range(len(docs))]
if docs:
log(f"[openlibrary] Enriching availability for {len(docs)} result(s)...")
max_workers = min(8, max(1, len(docs)))
done = 0
with futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
future_to_index = {
executor.submit(_compute_availability, doc_dict): i
for i, doc_dict in enumerate(docs)
if isinstance(doc_dict, dict)
}
for fut in futures.as_completed(list(future_to_index.keys())):
i = future_to_index[fut]
try:
availability_rows[i] = fut.result()
except Exception:
availability_rows[i] = ("unknown", "", "", "")
done += 1
if done in {1, len(future_to_index)} or (done % 10 == 0):
log(f"[openlibrary] Availability: {done}/{len(future_to_index)}")
log("[openlibrary] Availability enrichment complete")
for idx, doc in enumerate(docs):
if not isinstance(doc, dict):
continue
@@ -172,6 +777,7 @@ class OpenLibrary(SearchProvider):
year = str(year_val) if year_val is not None else ""
edition_id = _resolve_edition_id(doc)
work_key = doc.get("key") if isinstance(doc.get("key"), str) else ""
ia_val = doc.get("ia") or []
if isinstance(ia_val, str):
@@ -193,9 +799,21 @@ class OpenLibrary(SearchProvider):
("Title", book_title),
("Author", ", ".join(authors_list)),
("Year", year),
("Avail", ""),
("OLID", edition_id),
]
# Determine availability using the concurrently computed enrichment.
availability, availability_reason, archive_id, direct_url = ("unknown", "", "", "")
if 0 <= idx < len(availability_rows):
availability, availability_reason, archive_id, direct_url = availability_rows[idx]
# Patch the display column.
for idx, (name, _val) in enumerate(columns):
if name == "Avail":
columns[idx] = ("Avail", availability)
break
annotations: List[str] = []
if isbn_13:
annotations.append(f"isbn_13:{isbn_13}")
@@ -203,12 +821,18 @@ class OpenLibrary(SearchProvider):
annotations.append(f"isbn_10:{isbn_10}")
if ia_ids:
annotations.append("archive")
if availability in {"download", "borrow"}:
annotations.append(availability)
results.append(
SearchResult(
table="openlibrary",
title=book_title,
path=(f"https://openlibrary.org/books/{edition_id}" if edition_id else "https://openlibrary.org"),
path=(
f"https://openlibrary.org/books/{edition_id}" if edition_id else (
f"https://openlibrary.org{work_key}" if isinstance(work_key, str) and work_key.startswith("/") else "https://openlibrary.org"
)
),
detail=(
(f"By: {', '.join(authors_list)}" if authors_list else "")
+ (f" ({year})" if year else "")
@@ -218,11 +842,16 @@ class OpenLibrary(SearchProvider):
columns=columns,
full_metadata={
"openlibrary_id": edition_id,
"openlibrary_key": work_key,
"authors": authors_list,
"year": year,
"isbn_10": isbn_10,
"isbn_13": isbn_13,
"ia": ia_ids,
"availability": availability,
"availability_reason": availability_reason,
"archive_id": archive_id,
"direct_url": direct_url,
"raw": doc,
},
)
@@ -256,9 +885,7 @@ class OpenLibrary(SearchProvider):
# 1) Direct download if available.
try:
from API.archive_client import check_direct_download
can_direct, pdf_url = check_direct_download(archive_id)
can_direct, pdf_url = self._archive_check_direct_download(archive_id)
except Exception:
can_direct, pdf_url = False, ""
@@ -272,10 +899,7 @@ class OpenLibrary(SearchProvider):
# 2) Borrow flow (credentials required).
try:
from API.archive_client import BookNotAvailableError, credential_openlibrary, download as archive_download
from API.archive_client import get_book_infos, loan, login
email, password = credential_openlibrary(self.config or {})
email, password = self._credential_archive(self.config or {})
if not email or not password:
log("[openlibrary] Archive credentials missing; cannot borrow", file=sys.stderr)
return None
@@ -285,13 +909,13 @@ class OpenLibrary(SearchProvider):
log(f"[openlibrary] Not lendable: {reason}", file=sys.stderr)
return None
session = login(email, password)
session = self._archive_login(email, password)
try:
session = loan(session, archive_id, verbose=False)
except BookNotAvailableError:
session = self._archive_loan(session, archive_id, verbose=False)
except self.BookNotAvailableError:
log("[openlibrary] Book not available to borrow", file=sys.stderr)
return None
except SystemExit:
except Exception:
log("[openlibrary] Borrow failed", file=sys.stderr)
return None
@@ -301,7 +925,7 @@ class OpenLibrary(SearchProvider):
last_exc: Optional[Exception] = None
for u in urls:
try:
title_raw, links, _metadata = get_book_infos(session, u)
title_raw, links, _metadata = self._archive_get_book_infos(session, u)
if title_raw:
title = sanitize_filename(title_raw)
break
@@ -315,7 +939,7 @@ class OpenLibrary(SearchProvider):
temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=str(output_dir))
try:
images = archive_download(session=session, n_threads=10, directory=temp_dir, links=links, scale=3, book_id=archive_id)
images = self._archive_download(session=session, n_threads=10, directory=temp_dir, links=links, scale=3, book_id=archive_id)
try:
import img2pdf # type: ignore