df
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled

This commit is contained in:
2025-12-29 17:05:03 -08:00
parent 226de9316a
commit c019c00aed
104 changed files with 19669 additions and 12954 deletions

View File

@@ -154,7 +154,9 @@ def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, s
return False, "api-error"
def _resolve_archive_id(session: requests.Session, edition_id: str, ia_candidates: List[str]) -> str:
def _resolve_archive_id(
session: requests.Session, edition_id: str, ia_candidates: List[str]
) -> str:
# Prefer IA identifiers already present in search results.
if ia_candidates:
first = ia_candidates[0].strip()
@@ -420,18 +422,24 @@ class OpenLibrary(Provider):
email = openlibrary_config.get("email")
password = openlibrary_config.get("password")
if email or password:
return str(email) if email is not None else None, str(password) if password is not None else None
return str(email) if email is not None else None, (
str(password) if password is not None else None
)
archive_config = config.get("Archive")
if isinstance(archive_config, dict):
email = archive_config.get("email")
password = archive_config.get("password")
if email or password:
return str(email) if email is not None else None, str(password) if password is not None else None
return str(email) if email is not None else None, (
str(password) if password is not None else None
)
email = config.get("archive_org_email")
password = config.get("archive_org_password")
return str(email) if email is not None else None, str(password) if password is not None else None
return str(email) if email is not None else None, (
str(password) if password is not None else None
)
@staticmethod
def _archive_error_body(response: requests.Response) -> str:
@@ -452,10 +460,14 @@ class OpenLibrary(Provider):
try:
token_json = token_resp.json()
except Exception as exc:
raise RuntimeError(f"Archive login token parse failed: {exc}\n{cls._archive_error_body(token_resp)}")
raise RuntimeError(
f"Archive login token parse failed: {exc}\n{cls._archive_error_body(token_resp)}"
)
if not token_json.get("success"):
raise RuntimeError(f"Archive login token fetch failed\n{cls._archive_error_body(token_resp)}")
raise RuntimeError(
f"Archive login token fetch failed\n{cls._archive_error_body(token_resp)}"
)
token = (token_json.get("value") or {}).get("token")
if not token:
@@ -474,7 +486,9 @@ class OpenLibrary(Provider):
try:
login_json = login_resp.json()
except Exception as exc:
raise RuntimeError(f"Archive login parse failed: {exc}\n{cls._archive_error_body(login_resp)}")
raise RuntimeError(
f"Archive login parse failed: {exc}\n{cls._archive_error_body(login_resp)}"
)
if login_json.get("success") is False:
if login_json.get("value") == "bad_login":
@@ -484,16 +498,23 @@ class OpenLibrary(Provider):
return session
@classmethod
def _archive_loan(cls, session: requests.Session, book_id: str, *, verbose: bool = True) -> requests.Session:
def _archive_loan(
cls, session: requests.Session, book_id: str, *, verbose: bool = True
) -> requests.Session:
data = {"action": "grant_access", "identifier": book_id}
session.post("https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30)
session.post(
"https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30
)
data["action"] = "browse_book"
response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
if response.status_code == 400:
try:
err = (response.json() or {}).get("error")
if err == "This book is not available to borrow at this time. Please try again later.":
if (
err
== "This book is not available to borrow at this time. Please try again later."
):
raise cls.BookNotAvailableError("Book is waitlisted or in use")
raise RuntimeError(f"Borrow failed: {err or response.text}")
except cls.BookNotAvailableError:
@@ -574,7 +595,9 @@ class OpenLibrary(Provider):
return False, "archive-metadata-error"
@staticmethod
def _archive_get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]:
def _archive_get_book_infos(
session: requests.Session, url: str
) -> Tuple[str, List[str], Dict[str, Any]]:
"""Extract page links from Archive.org book reader."""
r = session.get(url, timeout=30).text
@@ -748,7 +771,9 @@ class OpenLibrary(Provider):
continue
filename = str(file_info.get("name", ""))
if filename.endswith(".pdf") and file_info.get("source") == "original":
pdf_url = f"https://archive.org/download/{book_id}/{filename.replace(' ', '%20')}"
pdf_url = (
f"https://archive.org/download/{book_id}/{filename.replace(' ', '%20')}"
)
check_response = requests.head(pdf_url, timeout=4, allow_redirects=True)
if check_response.status_code == 200:
return True, pdf_url
@@ -1040,7 +1065,9 @@ class OpenLibrary(Provider):
session_local = requests.Session()
try:
archive_id_local = _resolve_archive_id(session_local, edition_id_local, ia_ids_local)
archive_id_local = _resolve_archive_id(
session_local, edition_id_local, ia_ids_local
)
except Exception:
archive_id_local = ""
@@ -1062,7 +1089,9 @@ class OpenLibrary(Provider):
return "unavailable", reason_local, archive_id_local, ""
availability_rows: List[Tuple[str, str, str, str]] = [("unknown", "", "", "") for _ in range(len(docs))]
availability_rows: List[Tuple[str, str, str, str]] = [
("unknown", "", "", "") for _ in range(len(docs))
]
if docs:
max_workers = min(8, max(1, len(docs)))
done = 0
@@ -1080,7 +1109,6 @@ class OpenLibrary(Provider):
availability_rows[i] = ("unknown", "", "", "")
done += 1
for idx, doc in enumerate(docs):
if not isinstance(doc, dict):
continue
@@ -1150,8 +1178,12 @@ class OpenLibrary(Provider):
table="openlibrary",
title=book_title,
path=(
f"https://openlibrary.org/books/{edition_id}" if edition_id else (
f"https://openlibrary.org{work_key}" if isinstance(work_key, str) and work_key.startswith("/") else "https://openlibrary.org"
f"https://openlibrary.org/books/{edition_id}"
if edition_id
else (
f"https://openlibrary.org{work_key}"
if isinstance(work_key, str) and work_key.startswith("/")
else "https://openlibrary.org"
)
),
detail=(
@@ -1257,7 +1289,11 @@ class OpenLibrary(Provider):
out_path,
session=self._session,
progress_callback=(
(lambda downloaded, total, label: progress_callback("bytes", downloaded, total, label))
(
lambda downloaded, total, label: progress_callback(
"bytes", downloaded, total, label
)
)
if progress_callback is not None
else None
),
@@ -1315,7 +1351,10 @@ class OpenLibrary(Provider):
except Exception:
pass
urls = [f"https://archive.org/borrow/{archive_id}", f"https://archive.org/details/{archive_id}"]
urls = [
f"https://archive.org/borrow/{archive_id}",
f"https://archive.org/details/{archive_id}",
]
title = safe_title
links: Optional[List[str]] = None
last_exc: Optional[Exception] = None
@@ -1358,7 +1397,10 @@ class OpenLibrary(Provider):
pdf_bytes = _image_paths_to_pdf_bytes(images)
if not pdf_bytes:
# Keep images folder for manual conversion.
log("[openlibrary] PDF conversion failed; keeping images folder", file=sys.stderr)
log(
"[openlibrary] PDF conversion failed; keeping images folder",
file=sys.stderr,
)
return Path(temp_dir)
try: