Add YAPF style + ignore, and format tracked Python files

2025-12-29 18:42:02 -08:00
parent c019c00aed
commit 507946a3e4
108 changed files with 11664 additions and 6494 deletions
--- a/Provider/openlibrary.py
+++ b/Provider/openlibrary.py
@@ -19,7 +19,7 @@ import requests
 from API.HTTP import HTTPClient
 from ProviderCore.base import Provider, SearchResult
 from ProviderCore.download import download_file, sanitize_filename
-from cli_syntax import get_field, get_free_text, parse_query
+from SYS.cli_syntax import get_field, get_free_text, parse_query
 from SYS.logger import debug, log
 from SYS.utils import unique_path

@@ -52,7 +52,9 @@ def _image_paths_to_pdf_bytes(images: List[str]) -> Optional[bytes]:
                continue
            with Image.open(img_path) as im:  # type: ignore[attr-defined]
                # Ensure PDF-compatible mode.
-                if im.mode in {"RGBA", "LA", "P"}:
+                if im.mode in {"RGBA",
+                               "LA",
+                               "P"}:
                    im = im.convert("RGB")
                else:
                    im = im.convert("RGB")
@@ -125,7 +127,8 @@ def _resolve_edition_id(doc: Dict[str, Any]) -> str:
 def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, str]:
    """Return (lendable, status_text) using OpenLibrary volumes API."""
    try:
-        if not edition_id or not edition_id.startswith("OL") or not edition_id.endswith("M"):
+        if not edition_id or not edition_id.startswith("OL") or not edition_id.endswith(
+                "M"):
            return False, "not-an-edition"

        url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{edition_id}"
@@ -155,7 +158,9 @@ def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, s


 def _resolve_archive_id(
-    session: requests.Session, edition_id: str, ia_candidates: List[str]
+    session: requests.Session,
+    edition_id: str,
+    ia_candidates: List[str]
 ) -> str:
    # Prefer IA identifiers already present in search results.
    if ia_candidates:
@@ -165,7 +170,10 @@ def _resolve_archive_id(

    # Otherwise query the edition JSON.
    try:
-        resp = session.get(f"https://openlibrary.org/books/{edition_id}.json", timeout=6)
+        resp = session.get(
+            f"https://openlibrary.org/books/{edition_id}.json",
+            timeout=6
+        )
        resp.raise_for_status()
        data = resp.json() or {}

@@ -206,13 +214,19 @@ def _archive_id_from_url(url: str) -> str:
    # - /details/<id>/...
    # - /borrow/<id>
    # - /download/<id>/...
-    if len(parts) >= 2 and parts[0].lower() in {"details", "borrow", "download", "stream"}:
+    if len(parts) >= 2 and parts[0].lower() in {"details",
+                                                "borrow",
+                                                "download",
+                                                "stream"}:
        return str(parts[1]).strip()

    # Sometimes the identifier is the first segment.
    if len(parts) >= 1:
        first = str(parts[0]).strip()
-        if first and first.lower() not in {"account", "services", "search", "advancedsearch.php"}:
+        if first and first.lower() not in {"account",
+                                           "services",
+                                           "search",
+                                           "advancedsearch.php"}:
            return first

    return ""
@@ -249,14 +263,17 @@ def _coerce_archive_field_list(value: Any) -> List[str]:
    return [s] if s else []


-def _archive_item_metadata_to_tags(archive_id: str, item_metadata: Dict[str, Any]) -> List[str]:
+def _archive_item_metadata_to_tags(archive_id: str,
+                                   item_metadata: Dict[str,
+                                                       Any]) -> List[str]:
    """Map Archive.org metadata JSON (the `metadata` object) to tag strings.

    This is intentionally best-effort and conservative: it focuses on stable,
    useful bibliographic fields (title/author/publisher/ISBN/identifier/topics).
    """
    archive_id_clean = str(archive_id or "").strip()
-    meta = item_metadata if isinstance(item_metadata, dict) else {}
+    meta = item_metadata if isinstance(item_metadata,
+                                       dict) else {}

    tags: List[str] = []
    seen: set[str] = set()
@@ -374,7 +391,10 @@ def _archive_item_metadata_to_tags(archive_id: str, item_metadata: Dict[str, Any
    return tags


-def _fetch_archive_item_metadata(archive_id: str, *, timeout: int = 8) -> Dict[str, Any]:
+def _fetch_archive_item_metadata(archive_id: str,
+                                 *,
+                                 timeout: int = 8) -> Dict[str,
+                                                           Any]:
    ident = str(archive_id or "").strip()
    if not ident:
        return {}
@@ -384,7 +404,8 @@ def _fetch_archive_item_metadata(archive_id: str, *, timeout: int = 8) -> Dict[s
    if not isinstance(data, dict):
        return {}
    meta = data.get("metadata")
-    return meta if isinstance(meta, dict) else {}
+    return meta if isinstance(meta,
+                              dict) else {}


 class OpenLibrary(Provider):
@@ -404,7 +425,9 @@ class OpenLibrary(Provider):
        """Raised when a book is not available for borrowing (waitlisted/in use)."""

    @staticmethod
-    def _credential_archive(config: Dict[str, Any]) -> Tuple[Optional[str], Optional[str]]:
+    def _credential_archive(config: Dict[str,
+                                         Any]) -> Tuple[Optional[str],
+                                                        Optional[str]]:
        """Get Archive.org email/password from config.

        Supports:
@@ -415,9 +438,11 @@ class OpenLibrary(Provider):
        if not isinstance(config, dict):
            return None, None

-        provider_config = config.get("provider", {})
+        provider_config = config.get("provider",
+                                     {})
        if isinstance(provider_config, dict):
-            openlibrary_config = provider_config.get("openlibrary", {})
+            openlibrary_config = provider_config.get("openlibrary",
+                                                     {})
            if isinstance(openlibrary_config, dict):
                email = openlibrary_config.get("email")
                password = openlibrary_config.get("password")
@@ -456,7 +481,10 @@ class OpenLibrary(Provider):
        """Login to archive.org using the token-based services endpoint (matches test-login.py)."""
        session = requests.Session()

-        token_resp = session.get("https://archive.org/services/account/login/", timeout=30)
+        token_resp = session.get(
+            "https://archive.org/services/account/login/",
+            timeout=30
+        )
        try:
            token_json = token_resp.json()
        except Exception as exc:
@@ -473,8 +501,14 @@ class OpenLibrary(Provider):
        if not token:
            raise RuntimeError("Archive login token missing")

-        headers = {"Content-Type": "application/x-www-form-urlencoded"}
-        payload = {"username": email, "password": password, "t": token}
+        headers = {
+            "Content-Type": "application/x-www-form-urlencoded"
+        }
+        payload = {
+            "username": email,
+            "password": password,
+            "t": token
+        }

        login_resp = session.post(
            "https://archive.org/services/account/login/",
@@ -499,22 +533,34 @@ class OpenLibrary(Provider):

    @classmethod
    def _archive_loan(
-        cls, session: requests.Session, book_id: str, *, verbose: bool = True
+        cls,
+        session: requests.Session,
+        book_id: str,
+        *,
+        verbose: bool = True
    ) -> requests.Session:
-        data = {"action": "grant_access", "identifier": book_id}
+        data = {
+            "action": "grant_access",
+            "identifier": book_id
+        }
        session.post(
-            "https://archive.org/services/loans/loan/searchInside.php", data=data, timeout=30
+            "https://archive.org/services/loans/loan/searchInside.php",
+            data=data,
+            timeout=30
        )
        data["action"] = "browse_book"
-        response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
+        response = session.post(
+            "https://archive.org/services/loans/loan/",
+            data=data,
+            timeout=30
+        )

        if response.status_code == 400:
            try:
                err = (response.json() or {}).get("error")
-                if (
-                    err
-                    == "This book is not available to borrow at this time. Please try again later."
-                ):
+                if (err ==
+                        "This book is not available to borrow at this time. Please try again later."
+                    ):
                    raise cls.BookNotAvailableError("Book is waitlisted or in use")
                raise RuntimeError(f"Borrow failed: {err or response.text}")
            except cls.BookNotAvailableError:
@@ -523,15 +569,26 @@ class OpenLibrary(Provider):
                raise RuntimeError("The book cannot be borrowed")

        data["action"] = "create_token"
-        response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
+        response = session.post(
+            "https://archive.org/services/loans/loan/",
+            data=data,
+            timeout=30
+        )
        if "token" in (response.text or ""):
            return session
        raise RuntimeError("Something went wrong when trying to borrow the book")

    @staticmethod
    def _archive_return_loan(session: requests.Session, book_id: str) -> None:
-        data = {"action": "return_loan", "identifier": book_id}
-        response = session.post("https://archive.org/services/loans/loan/", data=data, timeout=30)
+        data = {
+            "action": "return_loan",
+            "identifier": book_id
+        }
+        response = session.post(
+            "https://archive.org/services/loans/loan/",
+            data=data,
+            timeout=30
+        )
        if response.status_code == 200:
            try:
                if (response.json() or {}).get("success"):
@@ -551,8 +608,8 @@ class OpenLibrary(Provider):
        if session is None:
            return
        for url in (
-            "https://archive.org/account/logout",
-            "https://archive.org/account/logout.php",
+                "https://archive.org/account/logout",
+                "https://archive.org/account/logout.php",
        ):
            try:
                resp = session.get(url, timeout=15, allow_redirects=True)
@@ -579,7 +636,9 @@ class OpenLibrary(Provider):
            resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=8)
            resp.raise_for_status()
            data = resp.json() if resp is not None else {}
-            meta = data.get("metadata", {}) if isinstance(data, dict) else {}
+            meta = data.get("metadata",
+                            {}) if isinstance(data,
+                                              dict) else {}
            collection = meta.get("collection") if isinstance(meta, dict) else None

            values: List[str] = []
@@ -588,16 +647,20 @@ class OpenLibrary(Provider):
            elif isinstance(collection, str):
                values = [collection.strip().lower()]

-            if any(v in {"inlibrary", "printdisabled", "lendinglibrary"} for v in values):
+            if any(v in {"inlibrary",
+                         "printdisabled",
+                         "lendinglibrary"} for v in values):
                return True, "archive-collection"
            return False, "archive-not-lendable"
        except Exception:
            return False, "archive-metadata-error"

    @staticmethod
-    def _archive_get_book_infos(
-        session: requests.Session, url: str
-    ) -> Tuple[str, List[str], Dict[str, Any]]:
+    def _archive_get_book_infos(session: requests.Session,
+                                url: str) -> Tuple[str,
+                                                   List[str],
+                                                   Dict[str,
+                                                        Any]]:
        """Extract page links from Archive.org book reader."""
        r = session.get(url, timeout=30).text

@@ -620,7 +683,8 @@ class OpenLibrary(Provider):

        metadata = data.get("metadata") or {}
        links: List[str] = []
-        br_data = (data.get("brOptions") or {}).get("data", [])
+        br_data = (data.get("brOptions") or {}).get("data",
+                                                    [])
        if isinstance(br_data, list):
            for item in br_data:
                if isinstance(item, list):
@@ -639,7 +703,11 @@ class OpenLibrary(Provider):
        return f"{directory}/{(len(str(pages)) - len(str(page))) * '0'}{page}.jpg"

    @staticmethod
-    def _archive_deobfuscate_image(image_data: bytes, link: str, obf_header: str) -> bytes:
+    def _archive_deobfuscate_image(
+        image_data: bytes,
+        link: str,
+        obf_header: str
+    ) -> bytes:
        if not AES or not Counter:
            raise RuntimeError("Crypto library not available")

@@ -657,11 +725,18 @@ class OpenLibrary(Provider):

        counter_bytes = base64.b64decode(counter_b64)
        if len(counter_bytes) != 16:
-            raise ValueError(f"Expected counter to be 16 bytes, got {len(counter_bytes)}")
+            raise ValueError(
+                f"Expected counter to be 16 bytes, got {len(counter_bytes)}"
+            )

        prefix = counter_bytes[:8]
        initial_value = int.from_bytes(counter_bytes[8:], byteorder="big")
-        ctr = Counter.new(64, prefix=prefix, initial_value=initial_value, little_endian=False)  # type: ignore
+        ctr = Counter.new(
+            64,
+            prefix=prefix,
+            initial_value=initial_value,
+            little_endian=False
+        )  # type: ignore
        cipher = AES.new(key, AES.MODE_CTR, counter=ctr)  # type: ignore

        decrypted_part = cipher.decrypt(image_data[:1024])
@@ -699,7 +774,11 @@ class OpenLibrary(Provider):
        image = cls._archive_image_name(pages, i, directory)
        obf_header = response.headers.get("X-Obfuscate")
        if obf_header:
-            image_content = cls._archive_deobfuscate_image(response.content, link, obf_header)
+            image_content = cls._archive_deobfuscate_image(
+                response.content,
+                link,
+                obf_header
+            )
        else:
            image_content = response.content

@@ -715,7 +794,9 @@ class OpenLibrary(Provider):
        links: List[str],
        scale: int,
        book_id: str,
-        progress_callback: Optional[Callable[[int, int], None]] = None,
+        progress_callback: Optional[Callable[[int,
+                                              int],
+                                             None]] = None,
    ) -> List[str]:
        links_scaled = [f"{link}&rotate=0&scale={scale}" for link in links]
        pages = len(links_scaled)
@@ -748,7 +829,8 @@ class OpenLibrary(Provider):
                    except Exception:
                        pass
            elif tqdm:
-                for _ in tqdm(futures.as_completed(tasks), total=len(tasks)):  # type: ignore
+                for _ in tqdm(futures.as_completed(tasks),
+                              total=len(tasks)):  # type: ignore
                    pass
            else:
                for _ in futures.as_completed(tasks):
@@ -770,11 +852,16 @@ class OpenLibrary(Provider):
                    if not isinstance(file_info, dict):
                        continue
                    filename = str(file_info.get("name", ""))
-                    if filename.endswith(".pdf") and file_info.get("source") == "original":
+                    if filename.endswith(".pdf") and file_info.get("source"
+                                                                   ) == "original":
                        pdf_url = (
                            f"https://archive.org/download/{book_id}/{filename.replace(' ', '%20')}"
                        )
-                        check_response = requests.head(pdf_url, timeout=4, allow_redirects=True)
+                        check_response = requests.head(
+                            pdf_url,
+                            timeout=4,
+                            allow_redirects=True
+                        )
                        if check_response.status_code == 200:
                            return True, pdf_url
            return False, ""
@@ -927,7 +1014,8 @@ class OpenLibrary(Provider):
                author_key = None
                if isinstance(author, dict):
                    if isinstance(author.get("author"), dict):
-                        author_key = author.get("author", {}).get("key")
+                        author_key = author.get("author",
+                                                {}).get("key")
                    if not author_key:
                        author_key = author.get("key")

@@ -937,7 +1025,9 @@ class OpenLibrary(Provider):
                        with HTTPClient(timeout=10) as client:
                            author_resp = client.get(author_url)
                            author_resp.raise_for_status()
-                            author_data = json_module.loads(author_resp.content.decode("utf-8"))
+                            author_data = json_module.loads(
+                                author_resp.content.decode("utf-8")
+                            )
                        if isinstance(author_data, dict) and author_data.get("name"):
                            new_tags.append(f"author:{author_data['name']}")
                            continue
@@ -1011,7 +1101,8 @@ class OpenLibrary(Provider):
        self,
        query: str,
        limit: int = 50,
-        filters: Optional[Dict[str, Any]] = None,
+        filters: Optional[Dict[str,
+                               Any]] = None,
        **kwargs: Any,
    ) -> List[SearchResult]:
        filters = filters or {}
@@ -1032,7 +1123,10 @@ class OpenLibrary(Provider):
        try:
            resp = self._session.get(
                "https://openlibrary.org/search.json",
-                params={"q": q, "limit": int(limit)},
+                params={
+                    "q": q,
+                    "limit": int(limit)
+                },
                timeout=10,
            )
            resp.raise_for_status()
@@ -1048,9 +1142,13 @@ class OpenLibrary(Provider):

        # Availability enrichment can be slow if done sequentially (it may require multiple
        # network calls per row). Do it concurrently to keep the pipeline responsive.
-        docs = docs[: int(limit)]
+        docs = docs[:int(limit)]

-        def _compute_availability(doc_dict: Dict[str, Any]) -> Tuple[str, str, str, str]:
+        def _compute_availability(doc_dict: Dict[str,
+                                                 Any]) -> Tuple[str,
+                                                                str,
+                                                                str,
+                                                                str]:
            edition_id_local = _resolve_edition_id(doc_dict)
            if not edition_id_local:
                return "no-olid", "", "", ""
@@ -1066,7 +1164,9 @@ class OpenLibrary(Provider):

            try:
                archive_id_local = _resolve_archive_id(
-                    session_local, edition_id_local, ia_ids_local
+                    session_local,
+                    edition_id_local,
+                    ia_ids_local
                )
            except Exception:
                archive_id_local = ""
@@ -1089,17 +1189,23 @@ class OpenLibrary(Provider):

            return "unavailable", reason_local, archive_id_local, ""

-        availability_rows: List[Tuple[str, str, str, str]] = [
-            ("unknown", "", "", "") for _ in range(len(docs))
-        ]
+        availability_rows: List[Tuple[str,
+                                      str,
+                                      str,
+                                      str]] = [
+                                          ("unknown",
+                                           "",
+                                           "",
+                                           "") for _ in range(len(docs))
+                                      ]
        if docs:
            max_workers = min(8, max(1, len(docs)))
            done = 0
            with futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
                future_to_index = {
-                    executor.submit(_compute_availability, doc_dict): i
-                    for i, doc_dict in enumerate(docs)
-                    if isinstance(doc_dict, dict)
+                    executor.submit(_compute_availability,
+                                    doc_dict): i
+                    for i, doc_dict in enumerate(docs) if isinstance(doc_dict, dict)
                }
                for fut in futures.as_completed(list(future_to_index.keys())):
                    i = future_to_index[fut]
@@ -1145,11 +1251,16 @@ class OpenLibrary(Provider):
            isbn_10 = next((str(i) for i in isbn_list if len(str(i)) == 10), "")

            columns = [
-                ("Title", book_title),
-                ("Author", ", ".join(authors_list)),
-                ("Year", year),
-                ("Avail", ""),
-                ("OLID", edition_id),
+                ("Title",
+                 book_title),
+                ("Author",
+                 ", ".join(authors_list)),
+                ("Year",
+                 year),
+                ("Avail",
+                 ""),
+                ("OLID",
+                 edition_id),
            ]

            # Determine availability using the concurrently computed enrichment.
@@ -1170,7 +1281,8 @@ class OpenLibrary(Provider):
                annotations.append(f"isbn_10:{isbn_10}")
            if ia_ids:
                annotations.append("archive")
-            if availability in {"download", "borrow"}:
+            if availability in {"download",
+                                "borrow"}:
                annotations.append(availability)

            results.append(
@@ -1178,17 +1290,17 @@ class OpenLibrary(Provider):
                    table="openlibrary",
                    title=book_title,
                    path=(
-                        f"https://openlibrary.org/books/{edition_id}"
-                        if edition_id
-                        else (
+                        f"https://openlibrary.org/books/{edition_id}" if edition_id else
+                        (
                            f"https://openlibrary.org{work_key}"
-                            if isinstance(work_key, str) and work_key.startswith("/")
-                            else "https://openlibrary.org"
+                            if isinstance(work_key,
+                                          str) and work_key.startswith("/") else
+                            "https://openlibrary.org"
                        )
                    ),
                    detail=(
-                        (f"By: {', '.join(authors_list)}" if authors_list else "")
-                        + (f" ({year})" if year else "")
+                        (f"By: {', '.join(authors_list)}" if authors_list else "") +
+                        (f" ({year})" if year else "")
                    ).strip(),
                    annotations=annotations,
                    media_kind="book",
@@ -1216,7 +1328,11 @@ class OpenLibrary(Provider):
        self,
        result: SearchResult,
        output_dir: Path,
-        progress_callback: Optional[Callable[[str, int, Optional[int], str], None]] = None,
+        progress_callback: Optional[Callable[[str,
+                                              int,
+                                              Optional[int],
+                                              str],
+                                             None]] = None,
    ) -> Optional[Path]:
        output_dir = Path(output_dir)
        output_dir.mkdir(parents=True, exist_ok=True)
@@ -1245,7 +1361,10 @@ class OpenLibrary(Provider):
            archive_id = _archive_id_from_url(str(getattr(result, "path", "") or ""))

        if not archive_id:
-            log("[openlibrary] No archive identifier available; cannot download", file=sys.stderr)
+            log(
+                "[openlibrary] No archive identifier available; cannot download",
+                file=sys.stderr
+            )
            return None

        # Best-effort metadata scrape to attach bibliographic tags for downstream cmdlets.
@@ -1290,12 +1409,9 @@ class OpenLibrary(Provider):
                session=self._session,
                progress_callback=(
                    (
-                        lambda downloaded, total, label: progress_callback(
-                            "bytes", downloaded, total, label
-                        )
-                    )
-                    if progress_callback is not None
-                    else None
+                        lambda downloaded, total, label:
+                        progress_callback("bytes", downloaded, total, label)
+                    ) if progress_callback is not None else None
                ),
            )
            if ok:
@@ -1307,7 +1423,10 @@ class OpenLibrary(Provider):
        try:
            email, password = self._credential_archive(self.config or {})
            if not email or not password:
-                log("[openlibrary] Archive credentials missing; cannot borrow", file=sys.stderr)
+                log(
+                    "[openlibrary] Archive credentials missing; cannot borrow",
+                    file=sys.stderr
+                )
                return None

            lendable = True
@@ -1369,7 +1488,10 @@ class OpenLibrary(Provider):
                        continue

                if not links:
-                    log(f"[openlibrary] Failed to extract pages: {last_exc}", file=sys.stderr)
+                    log(
+                        f"[openlibrary] Failed to extract pages: {last_exc}",
+                        file=sys.stderr
+                    )
                    return None

                try:
@@ -1388,9 +1510,10 @@ class OpenLibrary(Provider):
                        scale=3,
                        book_id=archive_id,
                        progress_callback=(
-                            (lambda done, total: progress_callback("pages", done, total, "pages"))
-                            if progress_callback is not None
-                            else None
+                            (
+                                lambda done, total:
+                                progress_callback("pages", done, total, "pages")
+                            ) if progress_callback is not None else None
                        ),
                    )

@@ -1436,7 +1559,10 @@ class OpenLibrary(Provider):
                    try:
                        self._archive_return_loan(session, archive_id)
                    except Exception as exc:
-                        log(f"[openlibrary] Warning: failed to return loan: {exc}", file=sys.stderr)
+                        log(
+                            f"[openlibrary] Warning: failed to return loan: {exc}",
+                            file=sys.stderr
+                        )
                try:
                    self._archive_logout(session)
                except Exception: