lkjlkj

2025-12-11 23:21:45 -08:00
parent 16d8a763cd
commit e2ffcab030
44 changed files with 3558 additions and 1793 deletions
--- a/cmdlets/download_file.py
+++ b/cmdlets/download_file.py
@@ -1,12 +1,10 @@
-"""Download files directly via HTTP (non-yt-dlp url).
+"""Generic file downloader.

-Focused cmdlet for direct file downloads from:
- PDFs, images, documents
- url not supported by yt-dlp
- LibGen sources
- Direct file links
+Supports:
+- Direct HTTP file URLs (PDFs, images, documents; non-yt-dlp)
+- Piped provider items (uses provider.download when available)

-No streaming site logic - pure HTTP download with retries.
+No streaming site logic; use download-media for yt-dlp/streaming.
 """

 from __future__ import annotations
@@ -17,10 +15,17 @@ from typing import Any, Dict, List, Optional, Sequence

 from SYS.download import DownloadError, _download_direct_file
 from SYS.logger import log, debug
-from models import DownloadOptions
 import pipeline as pipeline_context

-from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, register_url_with_local_library, coerce_to_pipe_object
+from ._shared import (
+    Cmdlet,
+    CmdletArg,
+    SharedArgs,
+    parse_cmdlet_args,
+    register_url_with_local_library,
+    coerce_to_pipe_object,
+    get_field,
+)


 class Download_File(Cmdlet):
@@ -30,14 +35,13 @@ class Download_File(Cmdlet):
        """Initialize download-file cmdlet."""
        super().__init__(
            name="download-file",
-            summary="Download files directly via HTTP (PDFs, images, documents)",
-            usage="download-file <url> [options] or search-file | download-file [options]",
+            summary="Download files via HTTP or provider handlers",
+            usage="download-file <url> [options] OR @N | download-file [options]",
            alias=["dl-file", "download-http"],
            arg=[
-                CmdletArg(name="url", type="string", required=False, description="URL to download (direct file links)", variadic=True),
-                CmdletArg(name="-url", type="string", description="URL to download (alias for positional argument)", variadic=True),
-                CmdletArg(name="output", type="string", alias="o", description="Output filename (auto-detected if not specified)"),
-                SharedArgs.URL
+                CmdletArg(name="output", type="string", alias="o", description="Output directory (overrides defaults)"),
+                SharedArgs.URL,
+                
            ],
            detail=["Download files directly via HTTP without yt-dlp processing.", "For streaming sites, use download-media."],
            exec=self.run,
@@ -60,13 +64,21 @@ class Download_File(Cmdlet):
            # Parse arguments
            parsed = parse_cmdlet_args(args, self)

-            # Extract options
+            # Extract explicit URL args (if any)
            raw_url = parsed.get("url", [])
            if isinstance(raw_url, str):
                raw_url = [raw_url]

+            # If no URL args were provided, fall back to piped results (provider items)
+            piped_items: List[Any] = []
            if not raw_url:
-                log("No url to download", file=sys.stderr)
+                if isinstance(result, list):
+                    piped_items = result
+                elif result:
+                    piped_items = [result]
+
+            if not raw_url and not piped_items:
+                log("No url or piped items to download", file=sys.stderr)
                return 1

            # Get output directory
@@ -76,27 +88,78 @@ class Download_File(Cmdlet):

            debug(f"Output directory: {final_output_dir}")

-            # Download each URL
+            # Download each URL and/or provider item
            downloaded_count = 0
            quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
-            custom_output = parsed.get("output")

+            # Provider lookup is optional; keep import local to avoid overhead if unused
+            get_search_provider = None
+            SearchResult = None
+            try:
+                from Provider.registry import get_search_provider as _get_search_provider, SearchResult as _SearchResult
+
+                get_search_provider = _get_search_provider
+                SearchResult = _SearchResult
+            except Exception:
+                get_search_provider = None
+                SearchResult = None
+
+            def _emit_local_file(downloaded_path: Path, source: Optional[str], title_hint: Optional[str], tags_hint: Optional[List[str]], media_kind_hint: Optional[str], full_metadata: Optional[Dict[str, Any]]) -> None:
+                title_val = (title_hint or downloaded_path.stem or "Unknown").strip() or downloaded_path.stem
+                hash_value = self._compute_file_hash(downloaded_path)
+                tag: List[str] = []
+                if tags_hint:
+                    tag.extend([str(t) for t in tags_hint if t])
+                if not any(str(t).lower().startswith("title:") for t in tag):
+                    tag.insert(0, f"title:{title_val}")
+
+                payload: Dict[str, Any] = {
+                    "path": str(downloaded_path),
+                    "hash": hash_value,
+                    "title": title_val,
+                    "action": "cmdlet:download-file",
+                    "download_mode": "file",
+                    "store": "local",
+                    "media_kind": media_kind_hint or "file",
+                    "tag": tag,
+                }
+                if full_metadata:
+                    payload["full_metadata"] = full_metadata
+                if source and str(source).startswith("http"):
+                    payload["url"] = source
+                elif source:
+                    payload["source_url"] = source
+
+                pipeline_context.emit(payload)
+
+                # Automatically register url with local library
+                if payload.get("url"):
+                    pipe_obj = coerce_to_pipe_object(payload)
+                    register_url_with_local_library(pipe_obj, config)
+
+            # 1) Explicit URL downloads
            for url in raw_url:
                try:
-                    debug(f"Processing: {url}")
+                    debug(f"Processing URL: {url}")

-                    # Direct HTTP download
                    result_obj = _download_direct_file(url, final_output_dir, quiet=quiet_mode)
-                    debug(f"Download completed, building pipe object...")
-                    pipe_obj_dict = self._build_pipe_object(result_obj, url, final_output_dir)
-                    debug(f"Emitting result to pipeline...")
-                    pipeline_context.emit(pipe_obj_dict)
-                    
-                    # Automatically register url with local library
-                    if pipe_obj_dict.get("url"):
-                        pipe_obj = coerce_to_pipe_object(pipe_obj_dict)
-                        register_url_with_local_library(pipe_obj, config)
-                    
+                    file_path = None
+                    if hasattr(result_obj, "path"):
+                        file_path = getattr(result_obj, "path")
+                    elif isinstance(result_obj, dict):
+                        file_path = result_obj.get("path")
+                    if not file_path:
+                        file_path = str(result_obj)
+                    downloaded_path = Path(str(file_path))
+
+                    _emit_local_file(
+                        downloaded_path=downloaded_path,
+                        source=url,
+                        title_hint=downloaded_path.stem,
+                        tags_hint=[f"title:{downloaded_path.stem}"],
+                        media_kind_hint="file",
+                        full_metadata=None,
+                    )
                    downloaded_count += 1
                    debug("✓ Downloaded and emitted")

@@ -105,6 +168,72 @@ class Download_File(Cmdlet):
                except Exception as e:
                    log(f"Error processing {url}: {e}", file=sys.stderr)

+            # 2) Provider item downloads (piped results)
+            for item in piped_items:
+                try:
+                    table = get_field(item, "table")
+                    title = get_field(item, "title")
+                    target = get_field(item, "path") or get_field(item, "url")
+                    media_kind = get_field(item, "media_kind")
+                    tags_val = get_field(item, "tag")
+                    tags_list: Optional[List[str]]
+                    if isinstance(tags_val, list):
+                        tags_list = [str(t) for t in tags_val if t]
+                    else:
+                        tags_list = None
+
+                    full_metadata = get_field(item, "full_metadata")
+                    if (not full_metadata) and isinstance(item, dict) and isinstance(item.get("extra"), dict):
+                        extra_md = item["extra"].get("full_metadata")
+                        if isinstance(extra_md, dict):
+                            full_metadata = extra_md
+
+                    # If this looks like a provider item and providers are available, prefer provider.download()
+                    downloaded_path: Optional[Path] = None
+                    if table and get_search_provider and SearchResult:
+                        provider = get_search_provider(str(table), config)
+                        if provider is not None:
+                            sr = SearchResult(
+                                table=str(table),
+                                title=str(title or "Unknown"),
+                                path=str(target or ""),
+                                full_metadata=full_metadata if isinstance(full_metadata, dict) else {},
+                            )
+                            debug(f"[download-file] Downloading provider item via {table}: {sr.title}")
+                            downloaded_path = provider.download(sr, final_output_dir)
+
+                    # Fallback: if we have a direct HTTP URL, download it directly
+                    if downloaded_path is None and isinstance(target, str) and target.startswith("http"):
+                        debug(f"[download-file] Provider item looks like direct URL, downloading: {target}")
+                        result_obj = _download_direct_file(target, final_output_dir, quiet=quiet_mode)
+                        file_path = None
+                        if hasattr(result_obj, "path"):
+                            file_path = getattr(result_obj, "path")
+                        elif isinstance(result_obj, dict):
+                            file_path = result_obj.get("path")
+                        if not file_path:
+                            file_path = str(result_obj)
+                        downloaded_path = Path(str(file_path))
+
+                    if downloaded_path is None:
+                        log(f"Cannot download item (no provider handler / unsupported target): {title or target}", file=sys.stderr)
+                        continue
+
+                    _emit_local_file(
+                        downloaded_path=downloaded_path,
+                        source=str(target) if target else None,
+                        title_hint=str(title) if title else downloaded_path.stem,
+                        tags_hint=tags_list,
+                        media_kind_hint=str(media_kind) if media_kind else None,
+                        full_metadata=full_metadata if isinstance(full_metadata, dict) else None,
+                    )
+                    downloaded_count += 1
+
+                except DownloadError as e:
+                    log(f"Download failed: {e}", file=sys.stderr)
+                except Exception as e:
+                    log(f"Error downloading item: {e}", file=sys.stderr)
+
            if downloaded_count > 0:
                debug(f"✓ Successfully processed {downloaded_count} file(s)")
                return 0
@@ -118,6 +247,16 @@ class Download_File(Cmdlet):

    def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]:
        """Resolve the output directory from storage location or config."""
+        output_dir_arg = parsed.get("output")
+        if output_dir_arg:
+            try:
+                out_path = Path(str(output_dir_arg)).expanduser()
+                out_path.mkdir(parents=True, exist_ok=True)
+                return out_path
+            except Exception as e:
+                log(f"Cannot use output directory {output_dir_arg}: {e}", file=sys.stderr)
+                return None
+
        storage_location = parsed.get("storage")

        # Priority 1: --storage flag
@@ -148,40 +287,6 @@ class Download_File(Cmdlet):

        return final_output_dir

-    def _build_pipe_object(self, download_result: Any, url: str, output_dir: Path) -> Dict[str, Any]:
-        """Create a PipeObject-compatible dict from a download result."""
-        # Try to get file path from result
-        file_path = None
-        if hasattr(download_result, 'path'):
-            file_path = download_result.path
-        elif isinstance(download_result, dict) and 'path' in download_result:
-            file_path = download_result['path']
-        
-        if not file_path:
-            # Fallback: assume result is the path itself
-            file_path = str(download_result)
-
-        media_path = Path(file_path)
-        hash_value = self._compute_file_hash(media_path)
-        title = media_path.stem
-        
-        # Build tags with title for searchability
-        tags = [f"title:{title}"]
-
-        # Canonical pipeline payload (no legacy aliases)
-        return {
-            "path": str(media_path),
-            "hash": hash_value,
-            "title": title,
-            "file_title": title,
-            "action": "cmdlet:download-file",
-            "download_mode": "file",
-            "url": url or (download_result.get('url') if isinstance(download_result, dict) else None),
-            "store": "local",
-            "media_kind": "file",
-            "tags": tags,
-        }
-
    def _compute_file_hash(self, filepath: Path) -> str:
        """Compute SHA256 hash of a file."""
        import hashlib