dfd

2025-12-27 21:24:27 -08:00
parent fcdd507d00
commit 8288ea8c66
16 changed files with 530 additions and 339 deletions
--- a/cmdlet/download_file.py
+++ b/cmdlet/download_file.py
@@ -47,7 +47,11 @@ class Download_File(Cmdlet):
                CmdletArg(name="-output", type="string", alias="o", description="(deprecated) Output directory (use -path instead)"),
                
            ],
-            detail=["Download files directly via HTTP without yt-dlp processing.", "For streaming sites, use download-media."],
+            detail=[
+                "Download files directly via HTTP without yt-dlp processing.",
+                "For streaming sites, use download-media.",
+                "For Internet Archive item pages (archive.org/details/...), shows a selectable file list; pick with @N to download.",
+            ],
            exec=self.run,
        )
        self.register()
@@ -121,6 +125,7 @@ class Download_File(Cmdlet):
                "match_provider_name_for_url": _match_provider_name_for_url,
                "SearchResult": _SearchResult,
            }
+
        except Exception:
            return {
                "get_search_provider": None,
@@ -129,6 +134,154 @@ class Download_File(Cmdlet):
                "SearchResult": None,
            }

+    @staticmethod
+    def _maybe_show_internetarchive_formats(
+        *,
+        raw_urls: Sequence[str],
+        piped_items: Sequence[Any],
+        parsed: Dict[str, Any],
+        config: Dict[str, Any],
+        quiet_mode: bool,
+    ) -> Optional[int]:
+        """If the input is an IA item page, show a selectable formats table.
+
+        Returns an exit code when handled; otherwise None.
+        """
+        if quiet_mode:
+            return None
+
+        try:
+            total_inputs = int(len(raw_urls or []) + len(piped_items or []))
+        except Exception:
+            total_inputs = 0
+
+        if total_inputs != 1:
+            return None
+
+        item = piped_items[0] if piped_items else None
+        target = ""
+        if item is not None:
+            try:
+                target = str(get_field(item, "path") or get_field(item, "url") or "").strip()
+            except Exception:
+                target = ""
+        if not target and raw_urls:
+            target = str(raw_urls[0]).strip()
+        if not target:
+            return None
+
+        try:
+            from Provider import internetarchive as ia
+        except Exception:
+            return None
+
+        identifier = ""
+        try:
+            md = get_field(item, "full_metadata") if item is not None else None
+            if isinstance(md, dict):
+                identifier = str(md.get("identifier") or "").strip()
+        except Exception:
+            identifier = ""
+        if not identifier:
+            try:
+                identifier = str(ia.extract_identifier(target) or "").strip()
+            except Exception:
+                identifier = ""
+        if not identifier:
+            return None
+
+        # Only show picker for item pages (details); direct download URLs should download immediately.
+        try:
+            if not ia.is_details_url(target):
+                return None
+        except Exception:
+            return None
+
+        try:
+            files = ia.list_download_files(identifier)
+        except Exception as exc:
+            log(f"download-file: Internet Archive lookup failed: {exc}", file=sys.stderr)
+            return 1
+
+        if not files:
+            log("download-file: Internet Archive item has no downloadable files", file=sys.stderr)
+            return 1
+
+        title = ""
+        try:
+            title = str(get_field(item, "title") or "").strip() if item is not None else ""
+        except Exception:
+            title = ""
+        table_title = f"Internet Archive: {title}".strip().rstrip(":") if title else f"Internet Archive: {identifier}"
+
+        try:
+            from result_table import ResultTable
+        except Exception as exc:
+            log(f"download-file: ResultTable unavailable: {exc}", file=sys.stderr)
+            return 1
+
+        base_args: List[str] = []
+        out_arg = parsed.get("path") or parsed.get("output")
+        if out_arg:
+            base_args.extend(["-path", str(out_arg)])
+
+        table = ResultTable(table_title).set_preserve_order(True)
+        table.set_table("internetarchive.formats")
+        table.set_source_command("download-file", base_args)
+
+        rows: List[Dict[str, Any]] = []
+        for f in files:
+            name = str(f.get("name") or "").strip()
+            if not name:
+                continue
+            fmt = str(f.get("format") or "").strip()
+            src = str(f.get("source") or "").strip()
+            direct_url = str(f.get("direct_url") or "").strip()
+            if not direct_url:
+                continue
+
+            size_val: Any = f.get("size")
+            try:
+                size_val = int(size_val) if size_val not in (None, "") else ""
+            except Exception:
+                pass
+
+            row_item: Dict[str, Any] = {
+                "table": "internetarchive",
+                "title": fmt or name,
+                "path": direct_url,
+                "url": direct_url,
+                "columns": [
+                    ("Format", fmt),
+                    ("Name", name),
+                    ("Size", size_val),
+                    ("Source", src),
+                ],
+                "_selection_args": [direct_url],
+                "full_metadata": {
+                    "identifier": identifier,
+                    "name": name,
+                    "format": fmt,
+                    "source": src,
+                    "size": f.get("size"),
+                },
+            }
+            rows.append(row_item)
+            table.add_result(row_item)
+
+        if not rows:
+            log("download-file: no downloadable files found for this item", file=sys.stderr)
+            return 1
+
+        try:
+            pipeline_context.set_last_result_table(table, rows, subject=item)
+            pipeline_context.set_current_stage_table(table)
+        except Exception:
+            pass
+
+        log("Internet Archive item detected: select a file with @N to download", file=sys.stderr)
+        return 0
+
    @staticmethod
    def _openlibrary_edition_id_from_url(u: str) -> str:
        try:
@@ -284,11 +437,11 @@ class Download_File(Cmdlet):
                            post = None

                    title_hint = None
-                    tags_hint: List[str] = []
+                    tg_tags: List[str] = []
                    if channel:
-                        tags_hint.append(f"channel:{channel}")
+                        tg_tags.append(f"channel:{channel}")
                    if post is not None:
-                        tags_hint.append(f"post:{post}")
+                        tg_tags.append(f"post:{post}")
                    if channel and post is not None:
                        title_hint = f"{channel} {post}"
                    elif post is not None:
@@ -300,7 +453,7 @@ class Download_File(Cmdlet):
                        downloaded_path=downloaded_path,
                        source=str(url),
                        title_hint=title_hint,
-                        tags_hint=tags_hint,
+                        tags_hint=tg_tags,
                        media_kind_hint="file",
                        full_metadata=telegram_info,
                        provider_hint="telegram",
@@ -481,14 +634,15 @@ class Download_File(Cmdlet):

                    # Otherwise, try provider.download(SearchResult) with the URL as the target.
                    if provider is not None:
+                        sr_obj = None
                        try:
-                            sr = SearchResult(
+                            sr_obj = SearchResult(
                                table=str(provider_name),
                                title=str(url),
                                path=str(url),
                                full_metadata={},
                            )
-                            downloaded_path = provider.download(sr, final_output_dir)  # type: ignore[call-arg]
+                            downloaded_path = provider.download(sr_obj, final_output_dir)  # type: ignore[call-arg]
                        except Exception:
                            downloaded_path = None

@@ -498,24 +652,25 @@ class Download_File(Cmdlet):
                            raise DownloadError("LibGen URL did not resolve to a downloadable file")

                        if downloaded_path:
-                            tags_hint: Optional[List[str]] = None
+                            emit_tags: Optional[List[str]] = None
                            full_md: Optional[Dict[str, Any]] = None
                            title_hint = Path(str(downloaded_path)).stem
                            media_kind_hint = "file"

-                            if str(provider_name).lower() == "libgen":
+                            if str(provider_name).lower() == "libgen" and sr_obj is not None:
                                media_kind_hint = "book"
                                try:
-                                    sr_tags = getattr(sr, "tag", None)
+                                    sr_tags = getattr(sr_obj, "tag", None)
                                    if isinstance(sr_tags, set) and sr_tags:
-                                        tags_hint = sorted([str(t) for t in sr_tags if t])
+                                        emit_tags = sorted([str(t) for t in sr_tags if t])
                                except Exception:
-                                    tags_hint = None
+                                    emit_tags = None

                                try:
-                                    if isinstance(getattr(sr, "full_metadata", None), dict):
-                                        full_md = sr.full_metadata
-                                        t = str(full_md.get("title") or "").strip()
+                                    sr_full_md = getattr(sr_obj, "full_metadata", None)
+                                    if isinstance(sr_full_md, dict):
+                                        full_md = sr_full_md
+                                        t = str(sr_full_md.get("title") or "").strip()
                                        if t:
                                            title_hint = t
                                except Exception:
@@ -525,7 +680,7 @@ class Download_File(Cmdlet):
                                downloaded_path=Path(downloaded_path),
                                source=str(url),
                                title_hint=title_hint,
-                                tags_hint=tags_hint,
+                                tags_hint=emit_tags,
                                media_kind_hint=media_kind_hint,
                                full_metadata=full_md,
                                provider_hint=str(provider_name),
@@ -802,6 +957,17 @@ class Download_File(Cmdlet):
                log("No url or piped items to download", file=sys.stderr)
                return 1

+            quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
+            ia_picker_exit = self._maybe_show_internetarchive_formats(
+                raw_urls=raw_url,
+                piped_items=piped_items,
+                parsed=parsed,
+                config=config,
+                quiet_mode=quiet_mode,
+            )
+            if ia_picker_exit is not None:
+                return int(ia_picker_exit)
+
            # Get output directory
            final_output_dir = self._resolve_output_dir(parsed, config)
            if not final_output_dir:
@@ -817,7 +983,6 @@ class Download_File(Cmdlet):

            progress.ensure_local_ui(label="download-file", total_items=total_items, items_preview=preview)

-            quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
            registry = self._load_provider_registry()

            downloaded_count = 0