dfdfsdd

2025-12-14 00:53:52 -08:00
parent 52a79b0086
commit a03eb0d1be
24 changed files with 2785 additions and 1868 deletions
--- a/Provider/openlibrary.py
+++ b/Provider/openlibrary.py
@@ -1,19 +1,38 @@
 from __future__ import annotations

+import base64
+from concurrent import futures
+import hashlib
+import json as json_module
+import re
 import shutil
 import sys
 import tempfile
+import time
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple

 import requests

+from API.HTTP import HTTPClient
 from ProviderCore.base import SearchProvider, SearchResult
 from ProviderCore.download import download_file, sanitize_filename
 from cli_syntax import get_field, get_free_text, parse_query
 from SYS.logger import log
 from SYS.utils import unique_path

+try:
+    from Crypto.Cipher import AES  # type: ignore
+    from Crypto.Util import Counter  # type: ignore
+except ImportError:
+    AES = None  # type: ignore
+    Counter = None  # type: ignore
+
+try:
+    from tqdm import tqdm  # type: ignore
+except ImportError:
+    tqdm = None  # type: ignore
+

 def _looks_like_isbn(text: str) -> bool:
    t = (text or "").replace("-", "").strip()
@@ -38,6 +57,13 @@ def _resolve_edition_id(doc: Dict[str, Any]) -> str:
    edition_key = doc.get("edition_key")
    if isinstance(edition_key, list) and edition_key:
        return str(edition_key[0]).strip()
+    if isinstance(edition_key, str) and edition_key.strip():
+        return edition_key.strip()
+
+    # Often present even when edition_key is missing.
+    cover_edition_key = doc.get("cover_edition_key")
+    if isinstance(cover_edition_key, str) and cover_edition_key.strip():
+        return cover_edition_key.strip()

    # Fallback: sometimes key can be /books/OL...M
    key = doc.get("key")
@@ -54,7 +80,7 @@ def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, s
            return False, "not-an-edition"

        url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{edition_id}"
-        resp = session.get(url, timeout=10)
+        resp = session.get(url, timeout=6)
        resp.raise_for_status()
        data = resp.json() or {}
        wrapped = data.get(f"OLID:{edition_id}")
@@ -88,7 +114,7 @@ def _resolve_archive_id(session: requests.Session, edition_id: str, ia_candidate

    # Otherwise query the edition JSON.
    try:
-        resp = session.get(f"https://openlibrary.org/books/{edition_id}.json", timeout=10)
+        resp = session.get(f"https://openlibrary.org/books/{edition_id}.json", timeout=6)
        resp.raise_for_status()
        data = resp.json() or {}

@@ -116,6 +142,522 @@ class OpenLibrary(SearchProvider):
        super().__init__(config)
        self._session = requests.Session()

+    class BookNotAvailableError(Exception):
+        """Raised when a book is not available for borrowing (waitlisted/in use)."""
+
+    @staticmethod
+    def _credential_archive(config: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
+        """Get Archive.org email/password from config.
+
+        Supports:
+        - New: {"provider": {"openlibrary": {"email": "...", "password": "..."}}}
+        - Old: {"Archive": {"email": "...", "password": "..."}}
+               {"archive_org_email": "...", "archive_org_password": "..."}
+        """
+        if not isinstance(config, dict):
+            return None, None
+
+        provider_config = config.get("provider", {})
+        if isinstance(provider_config, dict):
+            openlibrary_config = provider_config.get("openlibrary", {})
+            if isinstance(openlibrary_config, dict):
+                email = openlibrary_config.get("email")
+                password = openlibrary_config.get("password")
+                if email or password:
+                    return str(email) if email is not None else None, str(password) if password is not None else None
+
+        archive_config = config.get("Archive")
+        if isinstance(archive_config, dict):
+            email = archive_config.get("email")
+            password = archive_config.get("password")
+            if email or password:
+                return str(email) if email is not None else None, str(password) if password is not None else None
+
+        email = config.get("archive_org_email")
+        password = config.get("archive_org_password")
+        return str(email) if email is not None else None, str(password) if password is not None else None
+
+    @staticmethod
+    def _archive_error_body(response: requests.Response) -> str:
+        try:
+            body = response.text or ""
+        except Exception:
+            return ""
+        if len(body) > 2000:
+            return body[:1200] + "\n... (truncated) ...\n" + body[-400:]
+        return body
+
+    @classmethod
+    def _archive_login(cls, email: str, password: str) -> requests.Session:
+        """Login to archive.org using the token-based services endpoint (matches test-login.py)."""
+        session = requests.Session()
+
+        token_resp = session.get("https://archive.org/services/account/login/", timeout=30)
+        try:
+            token_json = token_resp.json()
+        except Exception as exc:
+            raise RuntimeError(f"Archive login token parse failed: {exc}\n{cls._archive_error_body(token_resp)}")
+
+        if not token_json.get("success"):
+            raise RuntimeError(f"Archive login token fetch failed\n{cls._archive_error_body(token_resp)}")
+
+        token = (token_json.get("value") or {}).get("token")
+        if not token:
+            raise RuntimeError("Archive login token missing")
+
+        headers = {"Content-Type": "application/x-www-form-urlencoded"}
+        payload = {"username": email, "password": password, "t": token}
+
+        login_resp = session.post(
+            "https://archive.org/services/account/login/",
+            headers=headers,
+            data=json_module.dumps(payload),
+            timeout=30,
+        )
+
+        try:
+            login_json = login_resp.json()
+        except Exception as exc:
+            raise RuntimeError(f"Archive login parse failed: {exc}\n{cls._archive_error_body(login_resp)}")
+
+        if login_json.get("success") is False:
+            if login_json.get("value") == "bad_login":
+                raise RuntimeError("Invalid Archive.org credentials")
+            raise RuntimeError(f"Archive login failed: {login_json}")
+
+        return session
+
+    @classmethod
+    def _archive_loan(cls, session: requests.Session, book_id: str, *, verbose: bool = True) -> requests.Session:
+        data = {"action": "grant_access", "identifier": book_id}
+        session.post("https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30)
+        data["action"] = "browse_book"
+        response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
+
+        if response.status_code == 400:
+            try:
+                err = (response.json() or {}).get("error")
+                if err == "This book is not available to borrow at this time. Please try again later.":
+                    raise cls.BookNotAvailableError("Book is waitlisted or in use")
+                raise RuntimeError(f"Borrow failed: {err or response.text}")
+            except cls.BookNotAvailableError:
+                raise
+            except Exception:
+                raise RuntimeError("The book cannot be borrowed")
+
+        data["action"] = "create_token"
+        response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
+        if "token" in (response.text or ""):
+            return session
+        raise RuntimeError("Something went wrong when trying to borrow the book")
+
+    @staticmethod
+    def _archive_return_loan(session: requests.Session, book_id: str) -> None:
+        data = {"action": "return_loan", "identifier": book_id}
+        response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
+        if response.status_code == 200:
+            try:
+                if (response.json() or {}).get("success"):
+                    return
+            except Exception:
+                pass
+        raise RuntimeError("Something went wrong when trying to return the book")
+
+    @staticmethod
+    def _archive_get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]:
+        """Extract page links from Archive.org book reader."""
+        r = session.get(url, timeout=30).text
+
+        # Matches: "url":"//archive.org/..." (allow whitespace)
+        match = re.search(r'"url"\s*:\s*"([^"]+)"', r)
+        if not match:
+            raise RuntimeError("Failed to extract book info URL from response")
+
+        url_path = match.group(1)
+        infos_url = ("https:" + url_path) if url_path.startswith("//") else url_path
+        infos_url = infos_url.replace("\\u0026", "&")
+
+        response = session.get(infos_url, timeout=30)
+        payload = response.json()
+        data = payload["data"]
+
+        title = str(data["brOptions"]["bookTitle"]).strip().replace(" ", "_")
+        title = "".join(c for c in title if c not in '<>:"/\\|?*')
+        title = title[:150]
+
+        metadata = data.get("metadata") or {}
+        links: List[str] = []
+        br_data = (data.get("brOptions") or {}).get("data", [])
+        if isinstance(br_data, list):
+            for item in br_data:
+                if isinstance(item, list):
+                    for page in item:
+                        if isinstance(page, dict) and "uri" in page:
+                            links.append(page["uri"])
+                elif isinstance(item, dict) and "uri" in item:
+                    links.append(item["uri"])
+
+        if not links:
+            raise RuntimeError("No pages found in book data")
+        return title, links, metadata if isinstance(metadata, dict) else {}
+
+    @staticmethod
+    def _archive_image_name(pages: int, page: int, directory: str) -> str:
+        return f"{directory}/{(len(str(pages)) - len(str(page))) * '0'}{page}.jpg"
+
+    @staticmethod
+    def _archive_deobfuscate_image(image_data: bytes, link: str, obf_header: str) -> bytes:
+        if not AES or not Counter:
+            raise RuntimeError("Crypto library not available")
+
+        try:
+            version, counter_b64 = obf_header.split("|")
+        except Exception as exc:
+            raise ValueError("Invalid X-Obfuscate header format") from exc
+
+        if version != "1":
+            raise ValueError("Unsupported obfuscation version: " + version)
+
+        aes_key = re.sub(r"^https?:\/\/.*?\/", "/", link)
+        sha1_digest = hashlib.sha1(aes_key.encode("utf-8")).digest()
+        key = sha1_digest[:16]
+
+        counter_bytes = base64.b64decode(counter_b64)
+        if len(counter_bytes) != 16:
+            raise ValueError(f"Expected counter to be 16 bytes, got {len(counter_bytes)}")
+
+        prefix = counter_bytes[:8]
+        initial_value = int.from_bytes(counter_bytes[8:], byteorder="big")
+        ctr = Counter.new(64, prefix=prefix, initial_value=initial_value, little_endian=False)  # type: ignore
+        cipher = AES.new(key, AES.MODE_CTR, counter=ctr)  # type: ignore
+
+        decrypted_part = cipher.decrypt(image_data[:1024])
+        return decrypted_part + image_data[1024:]
+
+    @classmethod
+    def _archive_download_one_image(
+        cls,
+        session: requests.Session,
+        link: str,
+        i: int,
+        directory: str,
+        book_id: str,
+        pages: int,
+    ) -> None:
+        headers = {
+            "Referer": "https://archive.org/",
+            "Accept": "image/avif,image/webp,image/apng,image/*,*/*;q=0.8",
+            "Sec-Fetch-Site": "same-site",
+            "Sec-Fetch-Mode": "no-cors",
+            "Sec-Fetch-Dest": "image",
+        }
+
+        while True:
+            try:
+                response = session.get(link, headers=headers, timeout=30)
+                if response.status_code == 403:
+                    cls._archive_loan(session, book_id, verbose=False)
+                    raise RuntimeError("Borrow again")
+                if response.status_code == 200:
+                    break
+            except Exception:
+                time.sleep(1)
+
+        image = cls._archive_image_name(pages, i, directory)
+        obf_header = response.headers.get("X-Obfuscate")
+        if obf_header:
+            image_content = cls._archive_deobfuscate_image(response.content, link, obf_header)
+        else:
+            image_content = response.content
+
+        with open(image, "wb") as f:
+            f.write(image_content)
+
+    @classmethod
+    def _archive_download(
+        cls,
+        session: requests.Session,
+        n_threads: int,
+        directory: str,
+        links: List[str],
+        scale: int,
+        book_id: str,
+    ) -> List[str]:
+        links_scaled = [f"{link}&rotate=0&scale={scale}" for link in links]
+        pages = len(links_scaled)
+
+        tasks = []
+        with futures.ThreadPoolExecutor(max_workers=n_threads) as executor:
+            for i, link in enumerate(links_scaled):
+                tasks.append(
+                    executor.submit(
+                        cls._archive_download_one_image,
+                        session=session,
+                        link=link,
+                        i=i,
+                        directory=directory,
+                        book_id=book_id,
+                        pages=pages,
+                    )
+                )
+            if tqdm:
+                for _ in tqdm(futures.as_completed(tasks), total=len(tasks)):  # type: ignore
+                    pass
+            else:
+                for _ in futures.as_completed(tasks):
+                    pass
+
+        return [cls._archive_image_name(pages, i, directory) for i in range(pages)]
+
+    @staticmethod
+    def _archive_check_direct_download(book_id: str) -> Tuple[bool, str]:
+        """Check for a directly downloadable original PDF in Archive.org metadata."""
+        try:
+            metadata_url = f"https://archive.org/metadata/{book_id}"
+            response = requests.get(metadata_url, timeout=6)
+            response.raise_for_status()
+            metadata = response.json()
+            files = metadata.get("files") if isinstance(metadata, dict) else None
+            if isinstance(files, list):
+                for file_info in files:
+                    if not isinstance(file_info, dict):
+                        continue
+                    filename = str(file_info.get("name", ""))
+                    if filename.endswith(".pdf") and file_info.get("source") == "original":
+                        pdf_url = f"https://archive.org/download/{book_id}/{filename.replace(' ', '%20')}"
+                        check_response = requests.head(pdf_url, timeout=4, allow_redirects=True)
+                        if check_response.status_code == 200:
+                            return True, pdf_url
+            return False, ""
+        except Exception:
+            return False, ""
+
+    @staticmethod
+    def scrape_isbn_metadata(isbn: str) -> List[str]:
+        """Scrape tags for an ISBN using Open Library API.
+
+        Returns tags such as:
+        - title:<...>, author:<...>, publish_date:<...>, publisher:<...>, description:<...>, pages:<...>
+        - identifiers: openlibrary:<...>, lccn:<...>, oclc:<...>, goodreads:<...>, librarything:<...>, doi:<...>, internet_archive:<...>
+        """
+        new_tags: List[str] = []
+
+        isbn_clean = str(isbn or "").replace("isbn:", "").replace("-", "").strip()
+        if not isbn_clean:
+            return []
+
+        url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json"
+        try:
+            with HTTPClient() as client:
+                response = client.get(url)
+                response.raise_for_status()
+                data = json_module.loads(response.content.decode("utf-8"))
+        except Exception as exc:
+            log(f"Failed to fetch ISBN metadata: {exc}", file=sys.stderr)
+            return []
+
+        if not data:
+            log(f"No ISBN metadata found for: {isbn}")
+            return []
+
+        book_data = next(iter(data.values()), None)
+        if not isinstance(book_data, dict):
+            return []
+
+        if "title" in book_data:
+            new_tags.append(f"title:{book_data['title']}")
+
+        authors = book_data.get("authors")
+        if isinstance(authors, list):
+            for author in authors[:3]:
+                if isinstance(author, dict) and author.get("name"):
+                    new_tags.append(f"author:{author['name']}")
+
+        if book_data.get("publish_date"):
+            new_tags.append(f"publish_date:{book_data['publish_date']}")
+
+        publishers = book_data.get("publishers")
+        if isinstance(publishers, list) and publishers:
+            pub = publishers[0]
+            if isinstance(pub, dict) and pub.get("name"):
+                new_tags.append(f"publisher:{pub['name']}")
+
+        if "description" in book_data:
+            desc = book_data.get("description")
+            if isinstance(desc, dict) and "value" in desc:
+                desc = desc.get("value")
+            if desc:
+                desc_str = str(desc).strip()
+                if desc_str:
+                    new_tags.append(f"description:{desc_str[:200]}")
+
+        page_count = book_data.get("number_of_pages")
+        if isinstance(page_count, int) and page_count > 0:
+            new_tags.append(f"pages:{page_count}")
+
+        identifiers = book_data.get("identifiers")
+        if isinstance(identifiers, dict):
+
+            def _first(value: Any) -> Any:
+                if isinstance(value, list) and value:
+                    return value[0]
+                return value
+
+            for key, ns in (
+                ("openlibrary", "openlibrary"),
+                ("lccn", "lccn"),
+                ("oclc", "oclc"),
+                ("goodreads", "goodreads"),
+                ("librarything", "librarything"),
+                ("doi", "doi"),
+                ("internet_archive", "internet_archive"),
+            ):
+                val = _first(identifiers.get(key))
+                if val:
+                    new_tags.append(f"{ns}:{val}")
+
+        log(f"Found {len(new_tags)} tag(s) from ISBN lookup")
+        return new_tags
+
+    @staticmethod
+    def scrape_openlibrary_metadata(olid: str) -> List[str]:
+        """Scrape tags for an OpenLibrary ID using the .json API endpoint."""
+        new_tags: List[str] = []
+
+        olid_text = str(olid or "").strip()
+        if not olid_text:
+            return []
+
+        # Normalize OLID to the common "OL<digits>M" form when possible.
+        olid_norm = olid_text
+        try:
+            if not olid_norm.startswith("OL"):
+                olid_norm = f"OL{olid_norm}"
+            if not olid_norm.endswith("M"):
+                olid_norm = f"{olid_norm}M"
+        except Exception:
+            olid_norm = olid_text
+
+        # Ensure we always include a scrapeable identifier tag.
+        new_tags.append(f"openlibrary:{olid_norm}")
+
+        # Accept OL9674499M, 9674499M, or just digits.
+        olid_clean = olid_text.replace("OL", "").replace("M", "")
+        if not olid_clean.isdigit():
+            olid_clean = olid_text
+
+        if not olid_text.startswith("OL"):
+            url = f"https://openlibrary.org/books/OL{olid_clean}M.json"
+        else:
+            url = f"https://openlibrary.org/books/{olid_text}.json"
+
+        try:
+            with HTTPClient() as client:
+                response = client.get(url)
+                response.raise_for_status()
+                data = json_module.loads(response.content.decode("utf-8"))
+        except Exception as exc:
+            log(f"Failed to fetch OpenLibrary metadata: {exc}", file=sys.stderr)
+            return []
+
+        if not isinstance(data, dict) or not data:
+            log(f"No OpenLibrary metadata found for: {olid_text}")
+            return []
+
+        if "title" in data:
+            new_tags.append(f"title:{data['title']}")
+
+        authors = data.get("authors")
+        if isinstance(authors, list):
+            for author in authors[:3]:
+                if isinstance(author, dict) and author.get("name"):
+                    new_tags.append(f"author:{author['name']}")
+                    continue
+
+                # Common OL shape: {"key": "/authors/OL...A"} or {"author": {"key": ...}}
+                author_key = None
+                if isinstance(author, dict):
+                    if isinstance(author.get("author"), dict):
+                        author_key = author.get("author", {}).get("key")
+                    if not author_key:
+                        author_key = author.get("key")
+
+                if isinstance(author_key, str) and author_key.startswith("/"):
+                    try:
+                        author_url = f"https://openlibrary.org{author_key}.json"
+                        with HTTPClient(timeout=10) as client:
+                            author_resp = client.get(author_url)
+                            author_resp.raise_for_status()
+                            author_data = json_module.loads(author_resp.content.decode("utf-8"))
+                        if isinstance(author_data, dict) and author_data.get("name"):
+                            new_tags.append(f"author:{author_data['name']}")
+                            continue
+                    except Exception:
+                        pass
+
+                if isinstance(author, str) and author:
+                    new_tags.append(f"author:{author}")
+
+        if data.get("publish_date"):
+            new_tags.append(f"publish_date:{data['publish_date']}")
+
+        publishers = data.get("publishers")
+        if isinstance(publishers, list) and publishers:
+            pub = publishers[0]
+            if isinstance(pub, dict) and pub.get("name"):
+                new_tags.append(f"publisher:{pub['name']}")
+            elif isinstance(pub, str) and pub:
+                new_tags.append(f"publisher:{pub}")
+
+        if "description" in data:
+            desc = data.get("description")
+            if isinstance(desc, dict) and "value" in desc:
+                desc = desc.get("value")
+            if desc:
+                desc_str = str(desc).strip()
+                if desc_str:
+                    new_tags.append(f"description:{desc_str[:200]}")
+
+        page_count = data.get("number_of_pages")
+        if isinstance(page_count, int) and page_count > 0:
+            new_tags.append(f"pages:{page_count}")
+
+        subjects = data.get("subjects")
+        if isinstance(subjects, list):
+            for subject in subjects[:10]:
+                if isinstance(subject, str):
+                    subject_clean = subject.strip()
+                    if subject_clean and subject_clean not in new_tags:
+                        new_tags.append(subject_clean)
+
+        identifiers = data.get("identifiers")
+        if isinstance(identifiers, dict):
+
+            def _first(value: Any) -> Any:
+                if isinstance(value, list) and value:
+                    return value[0]
+                return value
+
+            for key, ns in (
+                ("isbn_10", "isbn_10"),
+                ("isbn_13", "isbn_13"),
+                ("lccn", "lccn"),
+                ("oclc_numbers", "oclc"),
+                ("goodreads", "goodreads"),
+                ("internet_archive", "internet_archive"),
+            ):
+                val = _first(identifiers.get(key))
+                if val:
+                    new_tags.append(f"{ns}:{val}")
+
+        # Some editions expose a direct Archive.org identifier as "ocaid".
+        ocaid = data.get("ocaid")
+        if isinstance(ocaid, str) and ocaid.strip():
+            new_tags.append(f"internet_archive:{ocaid.strip()}")
+
+        log(f"Found {len(new_tags)} tag(s) from OpenLibrary lookup")
+        return new_tags
+
    def search(
        self,
        query: str,
@@ -155,7 +697,70 @@ class OpenLibrary(SearchProvider):
        if not isinstance(docs, list):
            return []

-        for doc in docs[: int(limit)]:
+        # Availability enrichment can be slow if done sequentially (it may require multiple
+        # network calls per row). Do it concurrently to keep the pipeline responsive.
+        docs = docs[: int(limit)]
+
+        def _compute_availability(doc_dict: Dict[str, Any]) -> Tuple[str, str, str, str]:
+            edition_id_local = _resolve_edition_id(doc_dict)
+            if not edition_id_local:
+                return "no-olid", "", "", ""
+
+            ia_val_local = doc_dict.get("ia") or []
+            if isinstance(ia_val_local, str):
+                ia_val_local = [ia_val_local]
+            if not isinstance(ia_val_local, list):
+                ia_val_local = []
+            ia_ids_local = [str(x) for x in ia_val_local if x]
+
+            session_local = requests.Session()
+
+            try:
+                archive_id_local = _resolve_archive_id(session_local, edition_id_local, ia_ids_local)
+            except Exception:
+                archive_id_local = ""
+
+            if not archive_id_local:
+                return "no-archive", "", "", ""
+
+            # Prefer the fastest signal first: OpenLibrary lendable status.
+            lendable_local, reason_local = _check_lendable(session_local, edition_id_local)
+            if lendable_local:
+                return "borrow", reason_local, archive_id_local, ""
+
+            # Not lendable: check whether it's directly downloadable (public domain uploads, etc.).
+            try:
+                can_direct, pdf_url = self._archive_check_direct_download(archive_id_local)
+                if can_direct and pdf_url:
+                    return "download", reason_local, archive_id_local, str(pdf_url)
+            except Exception:
+                pass
+
+            return "unavailable", reason_local, archive_id_local, ""
+
+        availability_rows: List[Tuple[str, str, str, str]] = [("unknown", "", "", "") for _ in range(len(docs))]
+        if docs:
+            log(f"[openlibrary] Enriching availability for {len(docs)} result(s)...")
+            max_workers = min(8, max(1, len(docs)))
+            done = 0
+            with futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+                future_to_index = {
+                    executor.submit(_compute_availability, doc_dict): i
+                    for i, doc_dict in enumerate(docs)
+                    if isinstance(doc_dict, dict)
+                }
+                for fut in futures.as_completed(list(future_to_index.keys())):
+                    i = future_to_index[fut]
+                    try:
+                        availability_rows[i] = fut.result()
+                    except Exception:
+                        availability_rows[i] = ("unknown", "", "", "")
+                    done += 1
+                    if done in {1, len(future_to_index)} or (done % 10 == 0):
+                        log(f"[openlibrary] Availability: {done}/{len(future_to_index)}")
+            log("[openlibrary] Availability enrichment complete")
+
+        for idx, doc in enumerate(docs):
            if not isinstance(doc, dict):
                continue

@@ -172,6 +777,7 @@ class OpenLibrary(SearchProvider):
            year = str(year_val) if year_val is not None else ""

            edition_id = _resolve_edition_id(doc)
+            work_key = doc.get("key") if isinstance(doc.get("key"), str) else ""

            ia_val = doc.get("ia") or []
            if isinstance(ia_val, str):
@@ -193,9 +799,21 @@ class OpenLibrary(SearchProvider):
                ("Title", book_title),
                ("Author", ", ".join(authors_list)),
                ("Year", year),
+                ("Avail", ""),
                ("OLID", edition_id),
            ]

+            # Determine availability using the concurrently computed enrichment.
+            availability, availability_reason, archive_id, direct_url = ("unknown", "", "", "")
+            if 0 <= idx < len(availability_rows):
+                availability, availability_reason, archive_id, direct_url = availability_rows[idx]
+
+            # Patch the display column.
+            for idx, (name, _val) in enumerate(columns):
+                if name == "Avail":
+                    columns[idx] = ("Avail", availability)
+                    break
+
            annotations: List[str] = []
            if isbn_13:
                annotations.append(f"isbn_13:{isbn_13}")
@@ -203,12 +821,18 @@ class OpenLibrary(SearchProvider):
                annotations.append(f"isbn_10:{isbn_10}")
            if ia_ids:
                annotations.append("archive")
+            if availability in {"download", "borrow"}:
+                annotations.append(availability)

            results.append(
                SearchResult(
                    table="openlibrary",
                    title=book_title,
-                    path=(f"https://openlibrary.org/books/{edition_id}" if edition_id else "https://openlibrary.org"),
+                    path=(
+                        f"https://openlibrary.org/books/{edition_id}" if edition_id else (
+                            f"https://openlibrary.org{work_key}" if isinstance(work_key, str) and work_key.startswith("/") else "https://openlibrary.org"
+                        )
+                    ),
                    detail=(
                        (f"By: {', '.join(authors_list)}" if authors_list else "")
                        + (f" ({year})" if year else "")
@@ -218,11 +842,16 @@ class OpenLibrary(SearchProvider):
                    columns=columns,
                    full_metadata={
                        "openlibrary_id": edition_id,
+                        "openlibrary_key": work_key,
                        "authors": authors_list,
                        "year": year,
                        "isbn_10": isbn_10,
                        "isbn_13": isbn_13,
                        "ia": ia_ids,
+                        "availability": availability,
+                        "availability_reason": availability_reason,
+                        "archive_id": archive_id,
+                        "direct_url": direct_url,
                        "raw": doc,
                    },
                )
@@ -256,9 +885,7 @@ class OpenLibrary(SearchProvider):

        # 1) Direct download if available.
        try:
-            from API.archive_client import check_direct_download
-
-            can_direct, pdf_url = check_direct_download(archive_id)
+            can_direct, pdf_url = self._archive_check_direct_download(archive_id)
        except Exception:
            can_direct, pdf_url = False, ""

@@ -272,10 +899,7 @@ class OpenLibrary(SearchProvider):

        # 2) Borrow flow (credentials required).
        try:
-            from API.archive_client import BookNotAvailableError, credential_openlibrary, download as archive_download
-            from API.archive_client import get_book_infos, loan, login
-
-            email, password = credential_openlibrary(self.config or {})
+            email, password = self._credential_archive(self.config or {})
            if not email or not password:
                log("[openlibrary] Archive credentials missing; cannot borrow", file=sys.stderr)
                return None
@@ -285,13 +909,13 @@ class OpenLibrary(SearchProvider):
                log(f"[openlibrary] Not lendable: {reason}", file=sys.stderr)
                return None

-            session = login(email, password)
+            session = self._archive_login(email, password)
            try:
-                session = loan(session, archive_id, verbose=False)
-            except BookNotAvailableError:
+                session = self._archive_loan(session, archive_id, verbose=False)
+            except self.BookNotAvailableError:
                log("[openlibrary] Book not available to borrow", file=sys.stderr)
                return None
-            except SystemExit:
+            except Exception:
                log("[openlibrary] Borrow failed", file=sys.stderr)
                return None

@@ -301,7 +925,7 @@ class OpenLibrary(SearchProvider):
            last_exc: Optional[Exception] = None
            for u in urls:
                try:
-                    title_raw, links, _metadata = get_book_infos(session, u)
+                    title_raw, links, _metadata = self._archive_get_book_infos(session, u)
                    if title_raw:
                        title = sanitize_filename(title_raw)
                    break
@@ -315,7 +939,7 @@ class OpenLibrary(SearchProvider):

            temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=str(output_dir))
            try:
-                images = archive_download(session=session, n_threads=10, directory=temp_dir, links=links, scale=3, book_id=archive_id)
+                images = self._archive_download(session=session, n_threads=10, directory=temp_dir, links=links, scale=3, book_id=archive_id)

                try:
                    import img2pdf  # type: ignore