"""Download files directly via HTTP (non-yt-dlp url). Focused cmdlet for direct file downloads from: - PDFs, images, documents - url not supported by yt-dlp - LibGen sources - Direct file links No streaming site logic - pure HTTP download with retries. """ from __future__ import annotations import sys from pathlib import Path from typing import Any, Dict, List, Optional, Sequence from SYS.download import DownloadError, _download_direct_file from SYS.logger import log, debug from models import DownloadOptions import pipeline as pipeline_context from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, register_url_with_local_library, coerce_to_pipe_object class Download_File(Cmdlet): """Class-based download-file cmdlet - direct HTTP downloads.""" def __init__(self) -> None: """Initialize download-file cmdlet.""" super().__init__( name="download-file", summary="Download files directly via HTTP (PDFs, images, documents)", usage="download-file [options] or search-file | download-file [options]", alias=["dl-file", "download-http"], arg=[ CmdletArg(name="url", type="string", required=False, description="URL to download (direct file links)", variadic=True), CmdletArg(name="-url", type="string", description="URL to download (alias for positional argument)", variadic=True), CmdletArg(name="output", type="string", alias="o", description="Output filename (auto-detected if not specified)"), SharedArgs.URL ], detail=["Download files directly via HTTP without yt-dlp processing.", "For streaming sites, use download-media."], exec=self.run, ) self.register() def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Main execution method.""" stage_ctx = pipeline_context.get_stage_context() in_pipeline = stage_ctx is not None and getattr(stage_ctx, "total_stages", 1) > 1 if in_pipeline and isinstance(config, dict): config["_quiet_background_output"] = True return self._run_impl(result, args, config) def _run_impl(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Main download implementation for direct HTTP files.""" try: debug("Starting download-file") # Parse arguments parsed = parse_cmdlet_args(args, self) # Extract options raw_url = parsed.get("url", []) if isinstance(raw_url, str): raw_url = [raw_url] if not raw_url: log("No url to download", file=sys.stderr) return 1 # Get output directory final_output_dir = self._resolve_output_dir(parsed, config) if not final_output_dir: return 1 debug(f"Output directory: {final_output_dir}") # Download each URL downloaded_count = 0 quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False custom_output = parsed.get("output") for url in raw_url: try: debug(f"Processing: {url}") # Direct HTTP download result_obj = _download_direct_file(url, final_output_dir, quiet=quiet_mode) debug(f"Download completed, building pipe object...") pipe_obj_dict = self._build_pipe_object(result_obj, url, final_output_dir) debug(f"Emitting result to pipeline...") pipeline_context.emit(pipe_obj_dict) # Automatically register url with local library if pipe_obj_dict.get("url"): pipe_obj = coerce_to_pipe_object(pipe_obj_dict) register_url_with_local_library(pipe_obj, config) downloaded_count += 1 debug("✓ Downloaded and emitted") except DownloadError as e: log(f"Download failed for {url}: {e}", file=sys.stderr) except Exception as e: log(f"Error processing {url}: {e}", file=sys.stderr) if downloaded_count > 0: debug(f"✓ Successfully processed {downloaded_count} file(s)") return 0 log("No downloads completed", file=sys.stderr) return 1 except Exception as e: log(f"Error in download-file: {e}", file=sys.stderr) return 1 def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]: """Resolve the output directory from storage location or config.""" storage_location = parsed.get("storage") # Priority 1: --storage flag if storage_location: try: return SharedArgs.resolve_storage(storage_location) except Exception as e: log(f"Invalid storage location: {e}", file=sys.stderr) return None # Priority 2: Config outfile if config and config.get("outfile"): try: return Path(config["outfile"]).expanduser() except Exception: pass # Priority 3: Default (home/Downloads) final_output_dir = Path.home() / "Downloads" debug(f"Using default directory: {final_output_dir}") # Ensure directory exists try: final_output_dir.mkdir(parents=True, exist_ok=True) except Exception as e: log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr) return None return final_output_dir def _build_pipe_object(self, download_result: Any, url: str, output_dir: Path) -> Dict[str, Any]: """Create a PipeObject-compatible dict from a download result.""" # Try to get file path from result file_path = None if hasattr(download_result, 'path'): file_path = download_result.path elif isinstance(download_result, dict) and 'path' in download_result: file_path = download_result['path'] if not file_path: # Fallback: assume result is the path itself file_path = str(download_result) media_path = Path(file_path) hash_value = self._compute_file_hash(media_path) title = media_path.stem # Build tags with title for searchability tags = [f"title:{title}"] # Canonical pipeline payload (no legacy aliases) return { "path": str(media_path), "hash": hash_value, "title": title, "file_title": title, "action": "cmdlet:download-file", "download_mode": "file", "url": url or (download_result.get('url') if isinstance(download_result, dict) else None), "store": "local", "media_kind": "file", "tags": tags, } def _compute_file_hash(self, filepath: Path) -> str: """Compute SHA256 hash of a file.""" import hashlib sha256_hash = hashlib.sha256() with open(filepath, "rb") as f: for byte_block in iter(lambda: f.read(4096), b""): sha256_hash.update(byte_block) return sha256_hash.hexdigest() # Module-level singleton registration CMDLET = Download_File()