ssd

2026-01-06 16:19:29 -08:00
parent 41c11d39fd
commit edc33f4528
10 changed files with 1192 additions and 881 deletions
--- a/Provider/metadata_provider.py
+++ b/Provider/metadata_provider.py
@@ -8,6 +8,9 @@ import requests
 import sys
 import json
 import subprocess
+
+from API.HTTP import HTTPClient
+from ProviderCore.base import SearchResult
 try:  # Optional dependency for IMDb scraping
    from imdbinfo.services import search_title  # type: ignore
 except ImportError:  # pragma: no cover - optional
@@ -15,6 +18,7 @@ except ImportError:  # pragma: no cover - optional

 from SYS.logger import log, debug
 from SYS.metadata import imdb_tag
+from SYS.json_table import normalize_record

 try:  # Optional dependency
    import musicbrainzngs  # type: ignore
@@ -892,6 +896,524 @@ class YtdlpMetadataProvider(MetadataProvider):
        return out


+def _coerce_archive_field_list(value: Any) -> List[str]:
+    """Coerce an Archive.org metadata field to a list of strings."""
+
+    if value is None:
+        return []
+    if isinstance(value, list):
+        out: List[str] = []
+        for v in value:
+            try:
+                s = str(v).strip()
+            except Exception:
+                continue
+            if s:
+                out.append(s)
+        return out
+    if isinstance(value, (tuple, set)):
+        out = []
+        for v in value:
+            try:
+                s = str(v).strip()
+            except Exception:
+                continue
+            if s:
+                out.append(s)
+        return out
+    try:
+        s = str(value).strip()
+    except Exception:
+        return []
+    return [s] if s else []
+
+
+def archive_item_metadata_to_tags(archive_id: str,
+                                  item_metadata: Dict[str, Any]) -> List[str]:
+    """Coerce Archive.org metadata into a stable set of bibliographic tags."""
+
+    archive_id_clean = str(archive_id or "").strip()
+    meta = item_metadata if isinstance(item_metadata, dict) else {}
+
+    tags: List[str] = []
+    seen: set[str] = set()
+
+    def _add(tag: str) -> None:
+        try:
+            t = str(tag).strip()
+        except Exception:
+            return
+        if not t:
+            return
+        if t.lower() in seen:
+            return
+        seen.add(t.lower())
+        tags.append(t)
+
+    if archive_id_clean:
+        _add(f"internet_archive:{archive_id_clean}")
+
+    for title in _coerce_archive_field_list(meta.get("title"))[:1]:
+        _add(f"title:{title}")
+
+    creators: List[str] = []
+    creators.extend(_coerce_archive_field_list(meta.get("creator")))
+    creators.extend(_coerce_archive_field_list(meta.get("author")))
+    for creator in creators[:3]:
+        _add(f"author:{creator}")
+
+    for publisher in _coerce_archive_field_list(meta.get("publisher"))[:3]:
+        _add(f"publisher:{publisher}")
+
+    for date_val in _coerce_archive_field_list(meta.get("date"))[:1]:
+        _add(f"publish_date:{date_val}")
+    for year_val in _coerce_archive_field_list(meta.get("year"))[:1]:
+        _add(f"publish_date:{year_val}")
+
+    for lang in _coerce_archive_field_list(meta.get("language"))[:3]:
+        _add(f"language:{lang}")
+
+    for subj in _coerce_archive_field_list(meta.get("subject"))[:15]:
+        if len(subj) > 200:
+            subj = subj[:200]
+        _add(subj)
+
+    def _clean_isbn(raw: str) -> str:
+        return str(raw or "").replace("-", "").strip()
+
+    for isbn in _coerce_archive_field_list(meta.get("isbn"))[:10]:
+        isbn_clean = _clean_isbn(isbn)
+        if isbn_clean:
+            _add(f"isbn:{isbn_clean}")
+
+    identifiers: List[str] = []
+    identifiers.extend(_coerce_archive_field_list(meta.get("identifier")))
+    identifiers.extend(_coerce_archive_field_list(meta.get("external-identifier")))
+    added_other = 0
+    for ident in identifiers:
+        ident_s = str(ident or "").strip()
+        if not ident_s:
+            continue
+        low = ident_s.lower()
+
+        if low.startswith("urn:isbn:"):
+            val = _clean_isbn(ident_s.split(":", 2)[-1])
+            if val:
+                _add(f"isbn:{val}")
+            continue
+        if low.startswith("isbn:"):
+            val = _clean_isbn(ident_s.split(":", 1)[-1])
+            if val:
+                _add(f"isbn:{val}")
+            continue
+        if low.startswith("urn:oclc:"):
+            val = ident_s.split(":", 2)[-1].strip()
+            if val:
+                _add(f"oclc:{val}")
+            continue
+        if low.startswith("oclc:"):
+            val = ident_s.split(":", 1)[-1].strip()
+            if val:
+                _add(f"oclc:{val}")
+            continue
+        if low.startswith("urn:lccn:"):
+            val = ident_s.split(":", 2)[-1].strip()
+            if val:
+                _add(f"lccn:{val}")
+            continue
+        if low.startswith("lccn:"):
+            val = ident_s.split(":", 1)[-1].strip()
+            if val:
+                _add(f"lccn:{val}")
+            continue
+        if low.startswith("doi:"):
+            val = ident_s.split(":", 1)[-1].strip()
+            if val:
+                _add(f"doi:{val}")
+            continue
+
+        if archive_id_clean and low == archive_id_clean.lower():
+            continue
+        if added_other >= 5:
+            continue
+        if len(ident_s) > 200:
+            ident_s = ident_s[:200]
+        _add(f"identifier:{ident_s}")
+        added_other += 1
+
+    return tags
+
+
+def fetch_archive_item_metadata(archive_id: str,
+                                *,
+                                timeout: int = 8) -> Dict[str, Any]:
+    ident = str(archive_id or "").strip()
+    if not ident:
+        return {}
+    resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=int(timeout))
+    resp.raise_for_status()
+    data = resp.json() if resp is not None else {}
+    if not isinstance(data, dict):
+        return {}
+    meta = data.get("metadata")
+    return meta if isinstance(meta, dict) else {}
+
+
+def scrape_isbn_metadata(isbn: str) -> List[str]:
+    """Scrape metadata tags for an ISBN using OpenLibrary's books API."""
+
+    new_tags: List[str] = []
+
+    isbn_clean = str(isbn or "").replace("isbn:", "").replace("-", "").strip()
+    if not isbn_clean:
+        return []
+
+    url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json"
+    try:
+        with HTTPClient() as client:
+            response = client.get(url)
+            response.raise_for_status()
+            data = json.loads(response.content.decode("utf-8"))
+    except Exception as exc:
+        log(f"Failed to fetch ISBN metadata: {exc}", file=sys.stderr)
+        return []
+
+    if not data:
+        log(f"No ISBN metadata found for: {isbn}")
+        return []
+
+    book_data = next(iter(data.values()), None)
+    if not isinstance(book_data, dict):
+        return []
+
+    if "title" in book_data:
+        new_tags.append(f"title:{book_data['title']}")
+
+    authors = book_data.get("authors")
+    if isinstance(authors, list):
+        for author in authors[:3]:
+            if isinstance(author, dict) and author.get("name"):
+                new_tags.append(f"author:{author['name']}")
+
+    if book_data.get("publish_date"):
+        new_tags.append(f"publish_date:{book_data['publish_date']}")
+
+    publishers = book_data.get("publishers")
+    if isinstance(publishers, list) and publishers:
+        pub = publishers[0]
+        if isinstance(pub, dict) and pub.get("name"):
+            new_tags.append(f"publisher:{pub['name']}")
+
+    if "description" in book_data:
+        desc = book_data.get("description")
+        if isinstance(desc, dict) and "value" in desc:
+            desc = desc.get("value")
+        if desc:
+            desc_str = str(desc).strip()
+            if desc_str:
+                new_tags.append(f"description:{desc_str[:200]}")
+
+    page_count = book_data.get("number_of_pages")
+    if isinstance(page_count, int) and page_count > 0:
+        new_tags.append(f"pages:{page_count}")
+
+    identifiers = book_data.get("identifiers")
+    if isinstance(identifiers, dict):
+
+        def _first(value: Any) -> Any:
+            if isinstance(value, list) and value:
+                return value[0]
+            return value
+
+        for key, ns in (
+            ("openlibrary", "openlibrary"),
+            ("lccn", "lccn"),
+            ("oclc", "oclc"),
+            ("goodreads", "goodreads"),
+            ("librarything", "librarything"),
+            ("doi", "doi"),
+            ("internet_archive", "internet_archive"),
+        ):
+            val = _first(identifiers.get(key))
+            if val:
+                new_tags.append(f"{ns}:{val}")
+
+    debug(f"Found {len(new_tags)} tag(s) from ISBN lookup")
+    return new_tags
+
+
+def scrape_openlibrary_metadata(olid: str) -> List[str]:
+    """Scrape metadata tags for an OpenLibrary ID using the edition JSON endpoint."""
+
+    new_tags: List[str] = []
+
+    olid_text = str(olid or "").strip()
+    if not olid_text:
+        return []
+
+    olid_norm = olid_text
+    try:
+        if not olid_norm.startswith("OL"):
+            olid_norm = f"OL{olid_norm}"
+        if not olid_norm.endswith("M"):
+            olid_norm = f"{olid_norm}M"
+    except Exception:
+        olid_norm = olid_text
+
+    new_tags.append(f"openlibrary:{olid_norm}")
+
+    olid_clean = olid_text.replace("OL", "").replace("M", "")
+    if not olid_clean.isdigit():
+        olid_clean = olid_text
+
+    if not olid_text.startswith("OL"):
+        url = f"https://openlibrary.org/books/OL{olid_clean}M.json"
+    else:
+        url = f"https://openlibrary.org/books/{olid_text}.json"
+
+    try:
+        with HTTPClient() as client:
+            response = client.get(url)
+            response.raise_for_status()
+            data = json.loads(response.content.decode("utf-8"))
+    except Exception as exc:
+        log(f"Failed to fetch OpenLibrary metadata: {exc}", file=sys.stderr)
+        return []
+
+    if not isinstance(data, dict) or not data:
+        log(f"No OpenLibrary metadata found for: {olid_text}")
+        return []
+
+    if "title" in data:
+        new_tags.append(f"title:{data['title']}")
+
+    authors = data.get("authors")
+    if isinstance(authors, list):
+        for author in authors[:3]:
+            if isinstance(author, dict) and author.get("name"):
+                new_tags.append(f"author:{author['name']}")
+                continue
+
+            author_key = None
+            if isinstance(author, dict):
+                if isinstance(author.get("author"), dict):
+                    author_key = author.get("author", {}).get("key")
+                if not author_key:
+                    author_key = author.get("key")
+
+            if isinstance(author_key, str) and author_key.startswith("/"):
+                try:
+                    author_url = f"https://openlibrary.org{author_key}.json"
+                    with HTTPClient(timeout=10) as client:
+                        author_resp = client.get(author_url)
+                        author_resp.raise_for_status()
+                        author_data = json.loads(author_resp.content.decode("utf-8"))
+                    if isinstance(author_data, dict) and author_data.get("name"):
+                        new_tags.append(f"author:{author_data['name']}")
+                        continue
+                except Exception:
+                    pass
+
+            if isinstance(author, str) and author:
+                new_tags.append(f"author:{author}")
+
+    if data.get("publish_date"):
+        new_tags.append(f"publish_date:{data['publish_date']}")
+
+    publishers = data.get("publishers")
+    if isinstance(publishers, list) and publishers:
+        pub = publishers[0]
+        if isinstance(pub, dict) and pub.get("name"):
+            new_tags.append(f"publisher:{pub['name']}")
+        elif isinstance(pub, str) and pub:
+            new_tags.append(f"publisher:{pub}")
+
+    if "description" in data:
+        desc = data.get("description")
+        if isinstance(desc, dict) and "value" in desc:
+            desc = desc.get("value")
+        if desc:
+            desc_str = str(desc).strip()
+            if desc_str:
+                new_tags.append(f"description:{desc_str[:200]}")
+
+    page_count = data.get("number_of_pages")
+    if isinstance(page_count, int) and page_count > 0:
+        new_tags.append(f"pages:{page_count}")
+
+    subjects = data.get("subjects")
+    if isinstance(subjects, list):
+        for subject in subjects[:10]:
+            if isinstance(subject, str):
+                subject_clean = subject.strip()
+                if subject_clean and subject_clean not in new_tags:
+                    new_tags.append(subject_clean)
+
+    identifiers = data.get("identifiers")
+    if isinstance(identifiers, dict):
+
+        def _first(value: Any) -> Any:
+            if isinstance(value, list) and value:
+                return value[0]
+            return value
+
+        for key, ns in (
+            ("isbn_10", "isbn_10"),
+            ("isbn_13", "isbn_13"),
+            ("lccn", "lccn"),
+            ("oclc_numbers", "oclc"),
+            ("goodreads", "goodreads"),
+            ("internet_archive", "internet_archive"),
+        ):
+            val = _first(identifiers.get(key))
+            if val:
+                new_tags.append(f"{ns}:{val}")
+
+    ocaid = data.get("ocaid")
+    if isinstance(ocaid, str) and ocaid.strip():
+        new_tags.append(f"internet_archive:{ocaid.strip()}")
+
+    debug(f"Found {len(new_tags)} tag(s) from OpenLibrary lookup")
+    return new_tags
+
+
+SAMPLE_ITEMS: List[Dict[str, Any]] = [
+    {
+        "title": "Sample OpenLibrary book",
+        "path": "https://openlibrary.org/books/OL123M",
+        "openlibrary_id": "OL123M",
+        "archive_id": "samplearchive123",
+        "availability": "borrow",
+        "availability_reason": "sample",
+        "direct_url": "https://archive.org/download/sample.pdf",
+        "author_name": ["OpenLibrary Demo"],
+        "first_publish_year": 2023,
+        "ia": ["samplearchive123"],
+    },
+]
+
+
+try:
+    from typing import Iterable
+
+    from SYS.result_table_api import ColumnSpec, ResultModel, metadata_column, title_column
+    from SYS.result_table_adapters import register_provider
+
+    def _ensure_search_result(item: Any) -> SearchResult:
+        if isinstance(item, SearchResult):
+            return item
+        if isinstance(item, dict):
+            data = dict(item)
+            title = str(data.get("title") or data.get("name") or "OpenLibrary")
+            path = str(data.get("path") or data.get("url") or "")
+            detail = str(data.get("detail") or "")
+            annotations = list(data.get("annotations") or [])
+            media_kind = str(data.get("media_kind") or "book")
+            return SearchResult(
+                table="openlibrary",
+                title=title,
+                path=path,
+                detail=detail,
+                annotations=annotations,
+                media_kind=media_kind,
+                columns=data.get("columns") or [],
+                full_metadata={**data, "raw": dict(item)},
+            )
+        return SearchResult(
+            table="openlibrary",
+            title=str(item or "OpenLibrary"),
+            path="",
+            detail="",
+            annotations=[],
+            media_kind="book",
+            full_metadata={"raw": {}},
+        )
+
+    def _adapter(items: Iterable[Any]) -> Iterable[ResultModel]:
+        for item in items:
+            sr = _ensure_search_result(item)
+            metadata = dict(getattr(sr, "full_metadata", {}) or {})
+            raw = metadata.get("raw")
+            if isinstance(raw, dict):
+                normalized = normalize_record(raw)
+                for key, val in normalized.items():
+                    metadata.setdefault(key, val)
+
+            def _make_url() -> str:
+                candidate = (
+                    metadata.get("selection_url") or
+                    metadata.get("direct_url") or
+                    metadata.get("url") or
+                    metadata.get("path") or
+                    sr.path or
+                    ""
+                )
+                return str(candidate or "").strip()
+
+            selection_url = _make_url()
+            if selection_url:
+                metadata["selection_url"] = selection_url
+            authors_value = metadata.get("authors_display") or metadata.get("authors") or metadata.get("author_name") or ""
+            if isinstance(authors_value, list):
+                authors_value = ", ".join(str(v) for v in authors_value if v)
+            authors_text = str(authors_value or "").strip()
+            if authors_text:
+                metadata["authors_display"] = authors_text
+            year_value = metadata.get("year") or metadata.get("first_publish_year")
+            if year_value and not isinstance(year_value, str):
+                year_value = str(year_value)
+            if year_value:
+                metadata["year"] = str(year_value)
+            metadata.setdefault("openlibrary_id", metadata.get("openlibrary_id") or metadata.get("olid"))
+            metadata.setdefault("source", metadata.get("source") or "openlibrary")
+            yield ResultModel(
+                title=str(sr.title or metadata.get("title") or selection_url or "OpenLibrary"),
+                path=selection_url or None,
+                metadata=metadata,
+                source="openlibrary",
+            )
+
+    def _columns_factory(rows: List[ResultModel]) -> List[ColumnSpec]:
+        cols: List[ColumnSpec] = [title_column()]
+        def _has(key: str) -> bool:
+            return any((row.metadata or {}).get(key) for row in rows)
+
+        if _has("authors_display"):
+            cols.append(
+                ColumnSpec(
+                    "authors_display",
+                    "Author",
+                    lambda r: (r.metadata or {}).get("authors_display") or "",
+                )
+            )
+        if _has("year"):
+            cols.append(metadata_column("year", "Year"))
+        if _has("availability"):
+            cols.append(metadata_column("availability", "Avail"))
+        if _has("archive_id"):
+            cols.append(metadata_column("archive_id", "Archive ID"))
+        if _has("openlibrary_id"):
+            cols.append(metadata_column("openlibrary_id", "OLID"))
+        return cols
+
+    def _selection_fn(row: ResultModel) -> List[str]:
+        metadata = row.metadata or {}
+        url = str(metadata.get("selection_url") or row.path or "").strip()
+        if url:
+            return ["-url", url]
+        return ["-title", row.title or ""]
+
+    register_provider(
+        "openlibrary",
+        _adapter,
+        columns=_columns_factory,
+        selection_fn=_selection_fn,
+        metadata={"description": "OpenLibrary search provider (JSON result table template)"},
+    )
+except Exception:
+    pass
+
+
 # Registry ---------------------------------------------------------------

 _METADATA_PROVIDERS: Dict[str,
--- a/Provider/openlibrary.py
+++ b/Provider/openlibrary.py
@@ -11,18 +11,29 @@ import sys
 import tempfile
 import time
 from pathlib import Path
-from typing import Any, Callable, Dict, List, Optional, Tuple
+from typing import Any, Callable, Dict, List, Optional, Tuple, Union
 from urllib.parse import urlparse

 import requests

-from API.HTTP import HTTPClient
+from API.HTTP import HTTPClient, get_requests_verify_value
 from ProviderCore.base import Provider, SearchResult
 from SYS.utils import sanitize_filename
 from SYS.cli_syntax import get_field, get_free_text, parse_query
 from SYS.logger import debug, log
+from Provider.metadata_provider import (
+    archive_item_metadata_to_tags,
+    fetch_archive_item_metadata,
+)
 from SYS.utils import unique_path

+_ARCHIVE_VERIFY_VALUE = get_requests_verify_value()
+
+def _create_archive_session() -> requests.Session:
+    session = requests.Session()
+    session.verify = _ARCHIVE_VERIFY_VALUE
+    return session
+
 try:
    from Crypto.Cipher import AES  # type: ignore
    from Crypto.Util import Counter  # type: ignore
@@ -262,182 +273,6 @@ def title_hint_from_url_slug(u: str) -> str:
    return slug or "OpenLibrary"


-def _coerce_archive_field_list(value: Any) -> List[str]:
-    """Coerce an Archive.org metadata field to a list of strings."""
-    if value is None:
-        return []
-    if isinstance(value, list):
-        out: List[str] = []
-        for v in value:
-            try:
-                s = str(v).strip()
-            except Exception:
-                continue
-            if s:
-                out.append(s)
-        return out
-    if isinstance(value, (tuple, set)):
-        out = []
-        for v in value:
-            try:
-                s = str(v).strip()
-            except Exception:
-                continue
-            if s:
-                out.append(s)
-        return out
-    try:
-        s = str(value).strip()
-    except Exception:
-        return []
-    return [s] if s else []
-
-
-def _archive_item_metadata_to_tags(archive_id: str,
-                                   item_metadata: Dict[str,
-                                                       Any]) -> List[str]:
-    """Map Archive.org metadata JSON (the `metadata` object) to tag strings.
-
-    This is intentionally best-effort and conservative: it focuses on stable,
-    useful bibliographic fields (title/author/publisher/ISBN/identifier/topics).
-    """
-    archive_id_clean = str(archive_id or "").strip()
-    meta = item_metadata if isinstance(item_metadata,
-                                       dict) else {}
-
-    tags: List[str] = []
-    seen: set[str] = set()
-
-    def _add(tag: str) -> None:
-        try:
-            t = str(tag).strip()
-        except Exception:
-            return
-        if not t:
-            return
-        if t.lower() in seen:
-            return
-        seen.add(t.lower())
-        tags.append(t)
-
-    if archive_id_clean:
-        _add(f"internet_archive:{archive_id_clean}")
-
-    # Title
-    for title in _coerce_archive_field_list(meta.get("title"))[:1]:
-        _add(f"title:{title}")
-
-    # Authors/creators
-    creators: List[str] = []
-    creators.extend(_coerce_archive_field_list(meta.get("creator")))
-    creators.extend(_coerce_archive_field_list(meta.get("author")))
-    for creator in creators[:3]:
-        _add(f"author:{creator}")
-
-    # Publisher
-    for publisher in _coerce_archive_field_list(meta.get("publisher"))[:3]:
-        _add(f"publisher:{publisher}")
-
-    # Publish date/year
-    for date_val in _coerce_archive_field_list(meta.get("date"))[:1]:
-        _add(f"publish_date:{date_val}")
-    for year_val in _coerce_archive_field_list(meta.get("year"))[:1]:
-        _add(f"publish_date:{year_val}")
-
-    # Language
-    for lang in _coerce_archive_field_list(meta.get("language"))[:3]:
-        _add(f"language:{lang}")
-
-    # Topics/subjects: follow existing OpenLibrary behavior (un-namespaced tags)
-    for subj in _coerce_archive_field_list(meta.get("subject"))[:15]:
-        if len(subj) > 200:
-            subj = subj[:200]
-        _add(subj)
-
-    # ISBNs and identifiers
-    def _clean_isbn(raw: str) -> str:
-        return str(raw or "").replace("-", "").strip()
-
-    for isbn in _coerce_archive_field_list(meta.get("isbn"))[:10]:
-        isbn_clean = _clean_isbn(isbn)
-        if isbn_clean:
-            _add(f"isbn:{isbn_clean}")
-
-    identifiers: List[str] = []
-    identifiers.extend(_coerce_archive_field_list(meta.get("identifier")))
-    identifiers.extend(_coerce_archive_field_list(meta.get("external-identifier")))
-    added_other = 0
-    for ident in identifiers:
-        ident_s = str(ident or "").strip()
-        if not ident_s:
-            continue
-        low = ident_s.lower()
-
-        if low.startswith("urn:isbn:"):
-            val = _clean_isbn(ident_s.split(":", 2)[-1])
-            if val:
-                _add(f"isbn:{val}")
-            continue
-        if low.startswith("isbn:"):
-            val = _clean_isbn(ident_s.split(":", 1)[-1])
-            if val:
-                _add(f"isbn:{val}")
-            continue
-        if low.startswith("urn:oclc:"):
-            val = ident_s.split(":", 2)[-1].strip()
-            if val:
-                _add(f"oclc:{val}")
-            continue
-        if low.startswith("oclc:"):
-            val = ident_s.split(":", 1)[-1].strip()
-            if val:
-                _add(f"oclc:{val}")
-            continue
-        if low.startswith("urn:lccn:"):
-            val = ident_s.split(":", 2)[-1].strip()
-            if val:
-                _add(f"lccn:{val}")
-            continue
-        if low.startswith("lccn:"):
-            val = ident_s.split(":", 1)[-1].strip()
-            if val:
-                _add(f"lccn:{val}")
-            continue
-        if low.startswith("doi:"):
-            val = ident_s.split(":", 1)[-1].strip()
-            if val:
-                _add(f"doi:{val}")
-            continue
-
-        if archive_id_clean and low == archive_id_clean.lower():
-            continue
-        if added_other >= 5:
-            continue
-        if len(ident_s) > 200:
-            ident_s = ident_s[:200]
-        _add(f"identifier:{ident_s}")
-        added_other += 1
-
-    return tags
-
-
-def _fetch_archive_item_metadata(archive_id: str,
-                                 *,
-                                 timeout: int = 8) -> Dict[str,
-                                                           Any]:
-    ident = str(archive_id or "").strip()
-    if not ident:
-        return {}
-    resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=int(timeout))
-    resp.raise_for_status()
-    data = resp.json() if resp is not None else {}
-    if not isinstance(data, dict):
-        return {}
-    meta = data.get("metadata")
-    return meta if isinstance(meta,
-                              dict) else {}
-
-
 class OpenLibrary(Provider):

    TABLE_AUTO_STAGES = {
@@ -466,7 +301,7 @@ class OpenLibrary(Provider):

    def __init__(self, config: Optional[Dict[str, Any]] = None):
        super().__init__(config)
-        self._session = requests.Session()
+        self._session = _create_archive_session()

    class BookNotAvailableError(Exception):
        """Raised when a book is not available for borrowing (waitlisted/in use)."""
@@ -612,7 +447,7 @@ class OpenLibrary(Provider):
    @classmethod
    def _archive_login(cls, email: str, password: str) -> requests.Session:
        """Login to archive.org using the token-based services endpoint (matches test-login.py)."""
-        session = requests.Session()
+        session = _create_archive_session()

        token_resp = session.get(
            "https://archive.org/services/account/login/",
@@ -766,7 +601,11 @@ class OpenLibrary(Provider):
        if not ident:
            return False, "no-archive-id"
        try:
-            resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=8)
+            resp = requests.get(
+                f"https://archive.org/metadata/{ident}",
+                timeout=8,
+                verify=_ARCHIVE_VERIFY_VALUE,
+            )
            resp.raise_for_status()
            data = resp.json() if resp is not None else {}
            meta = data.get("metadata",
@@ -976,7 +815,11 @@ class OpenLibrary(Provider):
        """Check for a directly downloadable original PDF in Archive.org metadata."""
        try:
            metadata_url = f"https://archive.org/metadata/{book_id}"
-            response = requests.get(metadata_url, timeout=6)
+            response = requests.get(
+                metadata_url,
+                timeout=6,
+                verify=_ARCHIVE_VERIFY_VALUE,
+            )
            response.raise_for_status()
            metadata = response.json()
            files = metadata.get("files") if isinstance(metadata, dict) else None
@@ -993,7 +836,8 @@ class OpenLibrary(Provider):
                        check_response = requests.head(
                            pdf_url,
                            timeout=4,
-                            allow_redirects=True
+                            allow_redirects=True,
+                            verify=_ARCHIVE_VERIFY_VALUE,
                        )
                        if check_response.status_code == 200:
                            return True, pdf_url
@@ -1001,235 +845,6 @@ class OpenLibrary(Provider):
        except Exception:
            return False, ""

-    @staticmethod
-    def scrape_isbn_metadata(isbn: str) -> List[str]:
-        """Scrape tags for an ISBN using Open Library API.
-
-        Returns tags such as:
-        - title:<...>, author:<...>, publish_date:<...>, publisher:<...>, description:<...>, pages:<...>
-        - identifiers: openlibrary:<...>, lccn:<...>, oclc:<...>, goodreads:<...>, librarything:<...>, doi:<...>, internet_archive:<...>
-        """
-        new_tags: List[str] = []
-
-        isbn_clean = str(isbn or "").replace("isbn:", "").replace("-", "").strip()
-        if not isbn_clean:
-            return []
-
-        url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json"
-        try:
-            with HTTPClient() as client:
-                response = client.get(url)
-                response.raise_for_status()
-                data = json_module.loads(response.content.decode("utf-8"))
-        except Exception as exc:
-            log(f"Failed to fetch ISBN metadata: {exc}", file=sys.stderr)
-            return []
-
-        if not data:
-            log(f"No ISBN metadata found for: {isbn}")
-            return []
-
-        book_data = next(iter(data.values()), None)
-        if not isinstance(book_data, dict):
-            return []
-
-        if "title" in book_data:
-            new_tags.append(f"title:{book_data['title']}")
-
-        authors = book_data.get("authors")
-        if isinstance(authors, list):
-            for author in authors[:3]:
-                if isinstance(author, dict) and author.get("name"):
-                    new_tags.append(f"author:{author['name']}")
-
-        if book_data.get("publish_date"):
-            new_tags.append(f"publish_date:{book_data['publish_date']}")
-
-        publishers = book_data.get("publishers")
-        if isinstance(publishers, list) and publishers:
-            pub = publishers[0]
-            if isinstance(pub, dict) and pub.get("name"):
-                new_tags.append(f"publisher:{pub['name']}")
-
-        if "description" in book_data:
-            desc = book_data.get("description")
-            if isinstance(desc, dict) and "value" in desc:
-                desc = desc.get("value")
-            if desc:
-                desc_str = str(desc).strip()
-                if desc_str:
-                    new_tags.append(f"description:{desc_str[:200]}")
-
-        page_count = book_data.get("number_of_pages")
-        if isinstance(page_count, int) and page_count > 0:
-            new_tags.append(f"pages:{page_count}")
-
-        identifiers = book_data.get("identifiers")
-        if isinstance(identifiers, dict):
-
-            def _first(value: Any) -> Any:
-                if isinstance(value, list) and value:
-                    return value[0]
-                return value
-
-            for key, ns in (
-                ("openlibrary", "openlibrary"),
-                ("lccn", "lccn"),
-                ("oclc", "oclc"),
-                ("goodreads", "goodreads"),
-                ("librarything", "librarything"),
-                ("doi", "doi"),
-                ("internet_archive", "internet_archive"),
-            ):
-                val = _first(identifiers.get(key))
-                if val:
-                    new_tags.append(f"{ns}:{val}")
-
-        debug(f"Found {len(new_tags)} tag(s) from ISBN lookup")
-        return new_tags
-
-    @staticmethod
-    def scrape_openlibrary_metadata(olid: str) -> List[str]:
-        """Scrape tags for an OpenLibrary ID using the .json API endpoint."""
-        new_tags: List[str] = []
-
-        olid_text = str(olid or "").strip()
-        if not olid_text:
-            return []
-
-        # Normalize OLID to the common "OL<digits>M" form when possible.
-        olid_norm = olid_text
-        try:
-            if not olid_norm.startswith("OL"):
-                olid_norm = f"OL{olid_norm}"
-            if not olid_norm.endswith("M"):
-                olid_norm = f"{olid_norm}M"
-        except Exception:
-            olid_norm = olid_text
-
-        # Ensure we always include a scrapeable identifier tag.
-        new_tags.append(f"openlibrary:{olid_norm}")
-
-        # Accept OL9674499M, 9674499M, or just digits.
-        olid_clean = olid_text.replace("OL", "").replace("M", "")
-        if not olid_clean.isdigit():
-            olid_clean = olid_text
-
-        if not olid_text.startswith("OL"):
-            url = f"https://openlibrary.org/books/OL{olid_clean}M.json"
-        else:
-            url = f"https://openlibrary.org/books/{olid_text}.json"
-
-        try:
-            with HTTPClient() as client:
-                response = client.get(url)
-                response.raise_for_status()
-                data = json_module.loads(response.content.decode("utf-8"))
-        except Exception as exc:
-            log(f"Failed to fetch OpenLibrary metadata: {exc}", file=sys.stderr)
-            return []
-
-        if not isinstance(data, dict) or not data:
-            log(f"No OpenLibrary metadata found for: {olid_text}")
-            return []
-
-        if "title" in data:
-            new_tags.append(f"title:{data['title']}")
-
-        authors = data.get("authors")
-        if isinstance(authors, list):
-            for author in authors[:3]:
-                if isinstance(author, dict) and author.get("name"):
-                    new_tags.append(f"author:{author['name']}")
-                    continue
-
-                # Common OL shape: {"key": "/authors/OL...A"} or {"author": {"key": ...}}
-                author_key = None
-                if isinstance(author, dict):
-                    if isinstance(author.get("author"), dict):
-                        author_key = author.get("author",
-                                                {}).get("key")
-                    if not author_key:
-                        author_key = author.get("key")
-
-                if isinstance(author_key, str) and author_key.startswith("/"):
-                    try:
-                        author_url = f"https://openlibrary.org{author_key}.json"
-                        with HTTPClient(timeout=10) as client:
-                            author_resp = client.get(author_url)
-                            author_resp.raise_for_status()
-                            author_data = json_module.loads(
-                                author_resp.content.decode("utf-8")
-                            )
-                        if isinstance(author_data, dict) and author_data.get("name"):
-                            new_tags.append(f"author:{author_data['name']}")
-                            continue
-                    except Exception:
-                        pass
-
-                if isinstance(author, str) and author:
-                    new_tags.append(f"author:{author}")
-
-        if data.get("publish_date"):
-            new_tags.append(f"publish_date:{data['publish_date']}")
-
-        publishers = data.get("publishers")
-        if isinstance(publishers, list) and publishers:
-            pub = publishers[0]
-            if isinstance(pub, dict) and pub.get("name"):
-                new_tags.append(f"publisher:{pub['name']}")
-            elif isinstance(pub, str) and pub:
-                new_tags.append(f"publisher:{pub}")
-
-        if "description" in data:
-            desc = data.get("description")
-            if isinstance(desc, dict) and "value" in desc:
-                desc = desc.get("value")
-            if desc:
-                desc_str = str(desc).strip()
-                if desc_str:
-                    new_tags.append(f"description:{desc_str[:200]}")
-
-        page_count = data.get("number_of_pages")
-        if isinstance(page_count, int) and page_count > 0:
-            new_tags.append(f"pages:{page_count}")
-
-        subjects = data.get("subjects")
-        if isinstance(subjects, list):
-            for subject in subjects[:10]:
-                if isinstance(subject, str):
-                    subject_clean = subject.strip()
-                    if subject_clean and subject_clean not in new_tags:
-                        new_tags.append(subject_clean)
-
-        identifiers = data.get("identifiers")
-        if isinstance(identifiers, dict):
-
-            def _first(value: Any) -> Any:
-                if isinstance(value, list) and value:
-                    return value[0]
-                return value
-
-            for key, ns in (
-                ("isbn_10", "isbn_10"),
-                ("isbn_13", "isbn_13"),
-                ("lccn", "lccn"),
-                ("oclc_numbers", "oclc"),
-                ("goodreads", "goodreads"),
-                ("internet_archive", "internet_archive"),
-            ):
-                val = _first(identifiers.get(key))
-                if val:
-                    new_tags.append(f"{ns}:{val}")
-
-        # Some editions expose a direct Archive.org identifier as "ocaid".
-        ocaid = data.get("ocaid")
-        if isinstance(ocaid, str) and ocaid.strip():
-            new_tags.append(f"internet_archive:{ocaid.strip()}")
-
-        debug(f"Found {len(new_tags)} tag(s) from OpenLibrary lookup")
-        return new_tags
-
    def search(
        self,
        query: str,
@@ -1293,7 +908,7 @@ class OpenLibrary(Provider):
                ia_val_local = []
            ia_ids_local = [str(x) for x in ia_val_local if x]

-            session_local = requests.Session()
+            session_local = _create_archive_session()

            try:
                archive_id_local = _resolve_archive_id(
@@ -1423,19 +1038,38 @@ class OpenLibrary(Provider):
                                "borrow"}:
                annotations.append(availability)

+            book_path = (
+                f"https://openlibrary.org/books/{edition_id}" if edition_id else
+                (
+                    f"https://openlibrary.org{work_key}"
+                    if isinstance(work_key, str) and work_key.startswith("/") else
+                    "https://openlibrary.org"
+                )
+            )
+            metadata = {
+                "openlibrary_id": edition_id,
+                "openlibrary_key": work_key,
+                "authors": authors_list,
+                "year": year,
+                "isbn_10": isbn_10,
+                "isbn_13": isbn_13,
+                "ia": ia_ids,
+                "availability": availability,
+                "availability_reason": availability_reason,
+                "archive_id": archive_id,
+                "direct_url": direct_url,
+                "raw": doc,
+            }
+            if book_path:
+                metadata["selection_url"] = book_path
+                metadata["_selection_args"] = ["-url", book_path]
+                metadata["_selection_action"] = ["download-file", "-url", book_path]
+
            results.append(
                SearchResult(
                    table="openlibrary",
                    title=book_title,
-                    path=(
-                        f"https://openlibrary.org/books/{edition_id}" if edition_id else
-                        (
-                            f"https://openlibrary.org{work_key}"
-                            if isinstance(work_key,
-                                          str) and work_key.startswith("/") else
-                            "https://openlibrary.org"
-                        )
-                    ),
+                    path=book_path,
                    detail=(
                        (f"By: {', '.join(authors_list)}" if authors_list else "") +
                        (f" ({year})" if year else "")
@@ -1443,20 +1077,7 @@ class OpenLibrary(Provider):
                    annotations=annotations,
                    media_kind="book",
                    columns=columns,
-                    full_metadata={
-                        "openlibrary_id": edition_id,
-                        "openlibrary_key": work_key,
-                        "authors": authors_list,
-                        "year": year,
-                        "isbn_10": isbn_10,
-                        "isbn_13": isbn_13,
-                        "ia": ia_ids,
-                        "availability": availability,
-                        "availability_reason": availability_reason,
-                        "archive_id": archive_id,
-                        "direct_url": direct_url,
-                        "raw": doc,
-                    },
+                    full_metadata=metadata,
                )
            )

@@ -1507,8 +1128,8 @@ class OpenLibrary(Provider):

        # Best-effort metadata scrape to attach bibliographic tags for downstream cmdlets.
        try:
-            archive_meta = _fetch_archive_item_metadata(archive_id)
-            tags = _archive_item_metadata_to_tags(archive_id, archive_meta)
+            archive_meta = fetch_archive_item_metadata(archive_id)
+            tags = archive_item_metadata_to_tags(archive_id, archive_meta)
            if tags:
                try:
                    result.tag.update(tags)