j

2026-01-03 03:37:48 -08:00
parent 6e9a0c28ff
commit 73f3005393
23 changed files with 1791 additions and 442 deletions
--- a/Provider/openlibrary.py
+++ b/Provider/openlibrary.py
@@ -214,10 +214,15 @@ def _archive_id_from_url(url: str) -> str:
    # - /details/<id>/...
    # - /borrow/<id>
    # - /download/<id>/...
-    if len(parts) >= 2 and parts[0].lower() in {"details",
-                                                "borrow",
-                                                "download",
-                                                "stream"}:
+    # - /stream/<id>/...
+    # - /metadata/<id>
+    if len(parts) >= 2 and parts[0].lower() in {
+        "details",
+        "borrow",
+        "download",
+        "stream",
+        "metadata",
+    }:
        return str(parts[1]).strip()

    # Sometimes the identifier is the first segment.
@@ -225,37 +230,38 @@ def _archive_id_from_url(url: str) -> str:
        first = str(parts[0]).strip()
        if first and first.lower() not in {"account",
                                           "services",
+                                           "metadata",
                                           "search",
                                           "advancedsearch.php"}:
            return first

-
-        def edition_id_from_url(u: str) -> str:
-            """Extract an OpenLibrary edition id (OL...M) from a book URL."""
-            try:
-                p = urlparse(str(u))
-                parts = [x for x in (p.path or "").split("/") if x]
-            except Exception:
-                parts = []
-            if len(parts) >= 2 and str(parts[0]).lower() == "books":
-                return str(parts[1]).strip()
-            return ""
-
-
-        def title_hint_from_url_slug(u: str) -> str:
-            """Derive a human-friendly title hint from the URL slug."""
-            try:
-                p = urlparse(str(u))
-                parts = [x for x in (p.path or "").split("/") if x]
-                slug = parts[-1] if parts else ""
-            except Exception:
-                slug = ""
-            slug = (slug or "").strip().replace("_", " ")
-            return slug or "OpenLibrary"
-
    return ""


+def edition_id_from_url(u: str) -> str:
+    """Extract an OpenLibrary edition id (OL...M) from a book URL."""
+    try:
+        p = urlparse(str(u))
+        parts = [x for x in (p.path or "").split("/") if x]
+    except Exception:
+        parts = []
+    if len(parts) >= 2 and str(parts[0]).lower() == "books":
+        return str(parts[1]).strip()
+    return ""
+
+
+def title_hint_from_url_slug(u: str) -> str:
+    """Derive a human-friendly title hint from the URL slug."""
+    try:
+        p = urlparse(str(u))
+        parts = [x for x in (p.path or "").split("/") if x]
+        slug = parts[-1] if parts else ""
+    except Exception:
+        slug = ""
+    slug = (slug or "").strip().replace("_", " ")
+    return slug or "OpenLibrary"
+
+
 def _coerce_archive_field_list(value: Any) -> List[str]:
    """Coerce an Archive.org metadata field to a list of strings."""
    if value is None:
@@ -433,6 +439,22 @@ def _fetch_archive_item_metadata(archive_id: str,


 class OpenLibrary(Provider):
+
+    TABLE_AUTO_STAGES = {
+        "openlibrary": ["download-file"],
+    }
+
+    REQUIRED_CONFIG_KEYS = (
+        "email",
+        "password",
+    )
+
+    DEFAULT_ARCHIVE_SCALE = 4
+    QUALITY_TO_ARCHIVE_SCALE = {
+        "high": 2,
+        "medium": 5,
+        "low": 8,
+    }
    # Domains that should be routed to this provider when the user supplies a URL.
    # (Used by ProviderCore.registry.match_provider_name_for_url)
    URL_DOMAINS = (
@@ -449,6 +471,41 @@ class OpenLibrary(Provider):
    class BookNotAvailableError(Exception):
        """Raised when a book is not available for borrowing (waitlisted/in use)."""

+    def search_result_from_url(self, url: str) -> Optional[SearchResult]:
+        """Build a minimal SearchResult from a bare OpenLibrary/Archive URL."""
+        edition_id = edition_id_from_url(url)
+        title_hint = title_hint_from_url_slug(url)
+        return SearchResult(
+            table="openlibrary",
+            title=title_hint,
+            path=str(url),
+            media_kind="book",
+            full_metadata={"openlibrary_id": edition_id} if edition_id else {},
+        )
+
+    def download_url(
+        self,
+        url: str,
+        output_dir: Path,
+        progress_callback: Optional[Callable[[str, int, Optional[int], str], None]] = None,
+    ) -> Optional[Dict[str, Any]]:
+        """Download a book directly from an OpenLibrary/Archive URL.
+
+        Returns a dict with the downloaded path and SearchResult when successful.
+        """
+        sr = self.search_result_from_url(url)
+        if sr is None:
+            return None
+
+        downloaded = self.download(sr, output_dir, progress_callback)
+        if not downloaded:
+            return None
+
+        return {
+            "path": Path(downloaded),
+            "search_result": sr,
+        }
+
    @staticmethod
    def _credential_archive(config: Dict[str,
                                         Any]) -> Tuple[Optional[str],
@@ -491,6 +548,57 @@ class OpenLibrary(Provider):
            str(password) if password is not None else None
        )

+    @classmethod
+    def _archive_scale_from_config(cls, config: Dict[str, Any]) -> int:
+        """Resolve Archive.org book-reader scale from provider config.
+
+        Config:
+          [provider=OpenLibrary]
+          quality="medium"  # High=2, Medium=5, Low=8
+
+        Default when missing/invalid: 4.
+        """
+
+        default_scale = int(getattr(cls, "DEFAULT_ARCHIVE_SCALE", 4) or 4)
+        if not isinstance(config, dict):
+            return default_scale
+
+        provider_config = config.get("provider", {})
+        openlibrary_config = None
+        if isinstance(provider_config, dict):
+            openlibrary_config = provider_config.get("openlibrary")
+        if not isinstance(openlibrary_config, dict):
+            openlibrary_config = {}
+
+        raw_quality = openlibrary_config.get("quality")
+        if raw_quality is None:
+            return default_scale
+
+        if isinstance(raw_quality, (int, float)):
+            try:
+                val = int(raw_quality)
+            except Exception:
+                return default_scale
+            return val if val > 0 else default_scale
+
+        try:
+            q = str(raw_quality).strip().lower()
+        except Exception:
+            return default_scale
+        if not q:
+            return default_scale
+
+        mapped = cls.QUALITY_TO_ARCHIVE_SCALE.get(q)
+        if isinstance(mapped, int) and mapped > 0:
+            return mapped
+
+        # Allow numeric strings (e.g. quality="4").
+        try:
+            val = int(q)
+        except Exception:
+            return default_scale
+        return val if val > 0 else default_scale
+
    @staticmethod
    def _archive_error_body(response: requests.Response) -> str:
        try:
@@ -1444,64 +1552,6 @@ class OpenLibrary(Provider):
            log("[openlibrary] Direct download failed", file=sys.stderr)
            return None

-        # --- Convenience helpers for URL-driven downloads (used by download-file) ---
-
-        def search_result_from_url(self, url: str) -> Optional[SearchResult]:
-            """Build a minimal SearchResult from a bare OpenLibrary URL."""
-            edition_id = edition_id_from_url(url)
-            title_hint = title_hint_from_url_slug(url)
-            return SearchResult(
-                table="openlibrary",
-                title=title_hint,
-                path=str(url),
-                media_kind="book",
-                full_metadata={"openlibrary_id": edition_id} if edition_id else {},
-            )
-
-        def download_url(
-            self,
-            url: str,
-            output_dir: Path,
-            progress_callback: Optional[Callable[[str, int, Optional[int], str], None]] = None,
-        ) -> Optional[Dict[str, Any]]:
-            """Download a book directly from an OpenLibrary URL.
-
-            Returns a dict with the downloaded path and SearchResult when successful.
-            """
-            sr = self.search_result_from_url(url)
-            if sr is None:
-                return None
-
-            downloaded = self.download(sr, output_dir, progress_callback)
-            if not downloaded:
-                return None
-
-            return {
-                "path": Path(downloaded),
-                "search_result": sr,
-            }
-            try:
-                if progress_callback is not None:
-                    progress_callback("step", 0, None, "direct download")
-            except Exception:
-                pass
-            out_path = unique_path(output_dir / f"{safe_title}.pdf")
-            ok = download_file(
-                pdf_url,
-                out_path,
-                session=self._session,
-                progress_callback=(
-                    (
-                        lambda downloaded, total, label:
-                        progress_callback("bytes", downloaded, total, label)
-                    ) if progress_callback is not None else None
-                ),
-            )
-            if ok:
-                return out_path
-            log("[openlibrary] Direct download failed", file=sys.stderr)
-            return None
-
        # 2) Borrow flow (credentials required).
        try:
            email, password = self._credential_archive(self.config or {})
@@ -1510,6 +1560,15 @@ class OpenLibrary(Provider):
                    "[openlibrary] Archive credentials missing; cannot borrow",
                    file=sys.stderr
                )
+                try:
+                    from SYS.rich_display import show_provider_config_panel
+
+                    show_provider_config_panel(
+                        "openlibrary",
+                        keys=self.required_config_keys(),
+                    )
+                except Exception:
+                    pass
                return None

            lendable = True
@@ -1590,7 +1649,7 @@ class OpenLibrary(Provider):
                        n_threads=10,
                        directory=temp_dir,
                        links=links,
-                        scale=3,
+                        scale=self._archive_scale_from_config(self.config or {}),
                        book_id=archive_id,
                        progress_callback=(
                            (