j

2026-01-03 03:37:48 -08:00
parent 6e9a0c28ff
commit 73f3005393
23 changed files with 1791 additions and 442 deletions
--- a/cmdlet/_shared.py
+++ b/cmdlet/_shared.py
@@ -28,6 +28,7 @@ class CmdletArg:
    """Argument type: 'string', 'int', 'flag', 'enum', etc."""
    required: bool = False
    """Whether this argument is required"""
+
    description: str = ""
    """Human-readable description of the argument"""
    choices: List[str] = field(default_factory=list)
@@ -424,6 +425,8 @@ class Cmdlet:
    """List of arguments accepted by this cmdlet"""
    detail: List[str] = field(default_factory=list)
    """Detailed explanation lines (for help text)"""
+    examples: List[str] = field(default_factory=list)
+    """Example invocations shown in `.help`."""
    # Execution function: func(result, args, config) -> int
    exec: Optional[Callable[[Any,
                             Sequence[str],
--- a/cmdlet/add_file.py
+++ b/cmdlet/add_file.py
@@ -185,6 +185,9 @@ class Add_File(Cmdlet):
                "    file.io: Upload to file.io for temporary hosting",
                "    internetarchive: Upload to archive.org (optional tag: ia:<identifier> to upload into an existing item)",
            ],
+            examples=[
+                'download-file "https://themathesontrust.org/papers/christianity/alcock-alphabet1.pdf" | add-file -store tutorial',
+            ],
            exec=self.run,
        )
        self.register()
@@ -1950,6 +1953,18 @@ class Add_File(Cmdlet):
                except Exception as exc:
                    debug(f"[add-file] sub note write failed: {exc}")

+            lyric_note = Add_File._get_note_text(result, pipe_obj, "lyric")
+            if lyric_note:
+                try:
+                    setter = getattr(backend, "set_note", None)
+                    if callable(setter):
+                        debug(
+                            f"[add-file] Writing lyric note (len={len(str(lyric_note))}) to {backend_name}:{resolved_hash}"
+                        )
+                        setter(resolved_hash, "lyric", lyric_note)
+                except Exception as exc:
+                    debug(f"[add-file] lyric note write failed: {exc}")
+
            chapters_note = Add_File._get_note_text(result, pipe_obj, "chapters")
            if chapters_note:
                try:
--- a/cmdlet/download_file.py
+++ b/cmdlet/download_file.py
@@ -15,6 +15,8 @@ from typing import Any, Dict, List, Optional, Sequence
 from urllib.parse import urlparse
 from contextlib import AbstractContextManager, nullcontext

+import requests
+
 from API.alldebrid import is_magnet_link
 from Provider import internetarchive as ia_provider
 from Provider import alldebrid as ad_provider
@@ -142,6 +144,85 @@ class Download_File(Cmdlet):

        return expanded_urls

+    @staticmethod
+    def _rewrite_archive_org_urls(raw_urls: Sequence[str]) -> List[str]:
+        """Rewrite Archive.org URLs using metadata JSON to pick the right flow.
+
+        - /metadata/<id>:
+            - if lendable (collection contains inlibrary/printdisabled/lendinglibrary) -> /borrow/<id>
+            - else -> /details/<id>
+        - /details/<id>:
+            - if lendable -> /borrow/<id>
+
+        This makes `download-file` do the right thing for borrow-only items.
+        """
+
+        out: List[str] = []
+        for u in list(raw_urls or []):
+            s = str(u or "").strip()
+            if not s:
+                continue
+
+            try:
+                p = urlparse(s)
+                host = (p.hostname or "").strip().lower()
+                path = (p.path or "").strip()
+            except Exception:
+                out.append(s)
+                continue
+
+            if not host or (host != "archive.org" and not host.endswith(".archive.org")):
+                out.append(s)
+                continue
+
+            low_path = path.lower().strip()
+            if not (low_path.startswith("/metadata/") or low_path.startswith("/details/")):
+                out.append(s)
+                continue
+
+            parts = [x for x in path.split("/") if x]
+            if len(parts) < 2:
+                out.append(s)
+                continue
+            head = str(parts[0] or "").strip().lower()
+            archive_id = str(parts[1] or "").strip()
+            if head not in {"metadata", "details"} or not archive_id:
+                out.append(s)
+                continue
+
+            lendable = False
+            try:
+                meta_url = f"https://archive.org/metadata/{archive_id}"
+                resp = requests.get(meta_url, timeout=8)
+                resp.raise_for_status()
+                data = resp.json() if resp is not None else {}
+                meta = data.get("metadata", {}) if isinstance(data, dict) else {}
+                collection = meta.get("collection") if isinstance(meta, dict) else None
+
+                values: List[str] = []
+                if isinstance(collection, list):
+                    values = [str(x).strip().lower() for x in collection if str(x).strip()]
+                elif isinstance(collection, str):
+                    values = [collection.strip().lower()] if collection.strip() else []
+
+                lendable = any(v in {"inlibrary", "printdisabled", "lendinglibrary"} for v in values)
+            except Exception:
+                lendable = False
+
+            if lendable:
+                debug(f"[download-file] archive.org item '{archive_id}' looks lendable; using borrow flow")
+                out.append(f"https://archive.org/borrow/{archive_id}")
+                continue
+
+            # Non-lendable: turn metadata URLs into details URLs so IA picker can show files.
+            if head == "metadata":
+                out.append(f"https://archive.org/details/{archive_id}")
+                continue
+
+            out.append(s)
+
+        return out
+
    @staticmethod
    def _collect_piped_items_if_no_urls(result: Any,
                                        raw_urls: Sequence[str]) -> List[Any]:
@@ -232,6 +313,14 @@ class Download_File(Cmdlet):
        title_val = (title_hint or downloaded_path.stem
                     or "Unknown").strip() or downloaded_path.stem
        hash_value = self._compute_file_hash(downloaded_path)
+        notes: Optional[Dict[str, str]] = None
+        try:
+            if isinstance(full_metadata, dict):
+                subtitles = full_metadata.get("_tidal_lyrics_subtitles")
+                if isinstance(subtitles, str) and subtitles.strip():
+                    notes = {"lyric": subtitles}
+        except Exception:
+            notes = None
        tag: List[str] = []
        if tags_hint:
            tag.extend([str(t) for t in tags_hint if t])
@@ -253,6 +342,8 @@ class Download_File(Cmdlet):
            payload["provider"] = str(provider_hint)
        if full_metadata:
            payload["full_metadata"] = full_metadata
+        if notes:
+            payload["notes"] = notes
        if source and str(source).startswith("http"):
            payload["url"] = source
        elif source:
@@ -890,52 +981,85 @@ class Download_File(Cmdlet):
                        msg += f" (availability={availability or ''} reason={reason or ''})"
                    log(msg, file=sys.stderr)

-                    # Fallback: run a LibGen title search so the user can pick an alternative source.
+                    # Fallback: show a LibGen selectable ResultTable (no emits) so the user can pick @N.
+                    # This intentionally mirrors `search-file -provider libgen` UX: results table + selection.
                    try:
                        title_text = str(title or "").strip()
                        if not title_text and isinstance(full_metadata, dict):
                            title_text = str(full_metadata.get("title") or "").strip()
-                        if title_text:
+                        if title_text and get_search_provider and SearchResult:
                            log(
                                f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}",
                                file=sys.stderr,
                            )
-                            from cmdlet.search_file import CMDLET as _SEARCH_FILE_CMDLET
-
-                            fallback_query = title_text
-                            exec_fn = getattr(_SEARCH_FILE_CMDLET, "exec", None)
-                            if not callable(exec_fn):
+                            libgen_provider = get_search_provider("libgen", config)
+                            if libgen_provider is None:
                                log(
-                                    "[download-file] search-file cmdlet unavailable; cannot run LibGen fallback search",
+                                    "[download-file] LibGen provider unavailable; cannot run fallback search",
                                    file=sys.stderr,
                                )
                                continue

-                            ret = exec_fn(
-                                None,
-                                ["-provider",
-                                 "libgen",
-                                 "-query",
-                                 fallback_query],
-                                config,
-                            )
-
-                            # Promote the search-file table to a display overlay so it renders.
                            try:
-                                table_obj = pipeline_context.get_last_result_table()
-                                items_obj = pipeline_context.get_last_result_items()
-                                if table_obj is not None:
-                                    pipeline_context.set_last_result_table_overlay(
-                                        table_obj,
-                                        items_obj
-                                    )
+                                from SYS.result_table import ResultTable
+                            except Exception:
+                                ResultTable = None  # type: ignore[assignment]
+
+                            if ResultTable is None:
+                                log(
+                                    "[download-file] ResultTable unavailable; cannot render LibGen fallback search",
+                                    file=sys.stderr,
+                                )
+                                continue
+
+                            fallback_query = title_text
+                            # Keep parity with search-file provider default when user didn't specify a limit.
+                            results = libgen_provider.search(fallback_query, limit=50)
+                            if not results:
+                                log(
+                                    f"[download-file] LibGen: no results found for: {fallback_query}",
+                                    file=sys.stderr,
+                                )
+                                continue
+
+                            table_title = f"Libgen: {fallback_query}".strip().rstrip(":")
+                            table_obj = ResultTable(table_title).set_preserve_order(False)
+                            table_obj.set_table("libgen")
+                            try:
+                                table_obj.set_table_metadata({"provider": "libgen"})
                            except Exception:
                                pass

+                            # Mark as produced by download-file so the pipeline runner pauses and stores tail stages.
+                            table_obj.set_source_command("download-file", [])
+
+                            results_list: List[Dict[str, Any]] = []
+                            for search_result in results:
+                                item_dict = (
+                                    search_result.to_dict()
+                                    if hasattr(search_result, "to_dict")
+                                    else dict(search_result)
+                                    if isinstance(search_result, dict)
+                                    else {"title": str(search_result)}
+                                )
+                                if "table" not in item_dict:
+                                    item_dict["table"] = "libgen"
+                                table_obj.add_result(search_result)
+                                results_list.append(item_dict)
+
+                            # Seed selection state for @N and pause the pipeline.
                            try:
-                                return int(ret)  # type: ignore[arg-type]
+                                pipeline_context.set_last_result_table(table_obj, results_list)
                            except Exception:
-                                return 1
+                                pass
+                            try:
+                                pipeline_context.set_current_stage_table(table_obj)
+                            except Exception:
+                                pass
+
+                            # Returning 0 with a selectable stage table and no emits causes the CLI to render
+                            # the table and pause, preserving the downstream pipeline tail.
+                            return 0
                    except Exception:
                        pass

@@ -976,6 +1100,15 @@ class Download_File(Cmdlet):
                    )
                    continue

+                # Prefer provider-enriched metadata (providers may mutate sr.full_metadata).
+                if provider_sr is not None:
+                    try:
+                        sr_md = getattr(provider_sr, "full_metadata", None)
+                        if isinstance(sr_md, dict) and sr_md:
+                            full_metadata = sr_md
+                    except Exception:
+                        pass
+
                # Allow providers to add/enrich tags and metadata during download.
                if str(table or "").lower() == "libgen" and provider_sr is not None:
                    try:
@@ -3305,6 +3438,7 @@ class Download_File(Cmdlet):
            parsed = parse_cmdlet_args(args, self)

            raw_url = self._normalize_urls(parsed)
+            raw_url = self._rewrite_archive_org_urls(raw_url)
            piped_items = self._collect_piped_items_if_no_urls(result, raw_url)

            had_piped_input = False
@@ -3346,6 +3480,26 @@ class Download_File(Cmdlet):
                log("No url or piped items to download", file=sys.stderr)
                return 1

+            # Internet Archive details URLs should present a downloadable file picker
+            # before we try any streaming/ytdlp probing.
+            try:
+                quiet_mode = (
+                    bool(config.get("_quiet_background_output"))
+                    if isinstance(config, dict) else False
+                )
+            except Exception:
+                quiet_mode = False
+            ia_picker_exit = ia_provider.maybe_show_formats_table(
+                raw_urls=raw_url,
+                piped_items=piped_items,
+                parsed=parsed,
+                config=config,
+                quiet_mode=quiet_mode,
+                get_field=get_field,
+            )
+            if ia_picker_exit is not None:
+                return int(ia_picker_exit)
+
            streaming_candidates = self._append_urls_from_piped_result(list(raw_url), result)
            supported_streaming, unsupported_streaming = self._filter_supported_urls(streaming_candidates)

@@ -3360,13 +3514,13 @@ class Download_File(Cmdlet):
                )
                if streaming_exit_code == 0:
                    streaming_downloaded += 1
+                    # Only remove URLs from further processing when streaming succeeded.
+                    raw_url = [u for u in raw_url if u not in supported_streaming]
+                    if not raw_url and not unsupported_streaming:
+                        piped_items = []

-                raw_url = [u for u in raw_url if u not in supported_streaming]
-                if not raw_url and not unsupported_streaming:
-                    piped_items = []
-
-                if not raw_url and not piped_items:
-                    return int(streaming_exit_code or 0)
+                    if not raw_url and not piped_items:
+                        return int(streaming_exit_code or 0)

            quiet_mode = (
                bool(config.get("_quiet_background_output"))
--- a/cmdlet/search_file.py
+++ b/cmdlet/search_file.py
@@ -110,6 +110,17 @@ class search_file(Cmdlet):
        ext = "".join(ch for ch in ext if ch.isalnum())
        return ext[:5]

+    @staticmethod
+    def _get_hifi_view_from_query(query: str) -> str:
+        text = str(query or "").strip()
+        if not text:
+            return "track"
+        if re.search(r"\balbum\s*:", text, flags=re.IGNORECASE):
+            return "album"
+        if re.search(r"\bartist\s*:", text, flags=re.IGNORECASE):
+            return "artist"
+        return "track"
+
    def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]:
        """Ensure storage results have the necessary fields for result_table display."""
        store_value = str(payload.get("store") or "").lower()
@@ -236,9 +247,18 @@ class search_file(Cmdlet):
                table_title = f"{provider_label}: {query}".strip().rstrip(":")

            preserve_order = provider_lower in {"youtube", "openlibrary", "loc"}
-            table = ResultTable(table_title).set_preserve_order(preserve_order)
-            table.set_table(provider_name)
+            table_type = provider_name
            table_meta: Dict[str, Any] = {"provider": provider_name}
+            if provider_lower == "hifi":
+                view = self._get_hifi_view_from_query(query)
+                table_meta["view"] = view
+                table_type = f"hifi.{view}"
+            elif provider_lower == "internetarchive":
+                # Internet Archive search results are effectively folders (items); selecting @N
+                # should open a list of downloadable files for the chosen item.
+                table_type = "internetarchive.folder"
+            table = ResultTable(table_title).set_preserve_order(preserve_order)
+            table.set_table(table_type)
            if provider_lower == "alldebrid":
                table_meta["view"] = "files" if effective_open_id is not None else "folders"
                if effective_open_id is not None:
@@ -277,7 +297,7 @@ class search_file(Cmdlet):
                )

                if "table" not in item_dict:
-                    item_dict["table"] = provider_name
+                    item_dict["table"] = table_type

                row_index = len(table.rows)
                table.add_result(search_result)