2025-12-11 12:47:30 -08:00
|
|
|
"""Download files directly via HTTP (non-yt-dlp url).
|
|
|
|
|
|
|
|
|
|
Focused cmdlet for direct file downloads from:
|
|
|
|
|
- PDFs, images, documents
|
|
|
|
|
- url not supported by yt-dlp
|
|
|
|
|
- LibGen sources
|
|
|
|
|
- Direct file links
|
|
|
|
|
|
|
|
|
|
No streaming site logic - pure HTTP download with retries.
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
import sys
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import Any, Dict, List, Optional, Sequence
|
|
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
from SYS.download import DownloadError, _download_direct_file
|
|
|
|
|
from SYS.logger import log, debug
|
2025-12-11 12:47:30 -08:00
|
|
|
from models import DownloadOptions
|
|
|
|
|
import pipeline as pipeline_context
|
|
|
|
|
|
|
|
|
|
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, register_url_with_local_library, coerce_to_pipe_object
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Download_File(Cmdlet):
|
|
|
|
|
"""Class-based download-file cmdlet - direct HTTP downloads."""
|
|
|
|
|
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
|
"""Initialize download-file cmdlet."""
|
|
|
|
|
super().__init__(
|
|
|
|
|
name="download-file",
|
|
|
|
|
summary="Download files directly via HTTP (PDFs, images, documents)",
|
|
|
|
|
usage="download-file <url> [options] or search-file | download-file [options]",
|
|
|
|
|
alias=["dl-file", "download-http"],
|
|
|
|
|
arg=[
|
|
|
|
|
CmdletArg(name="url", type="string", required=False, description="URL to download (direct file links)", variadic=True),
|
|
|
|
|
CmdletArg(name="-url", type="string", description="URL to download (alias for positional argument)", variadic=True),
|
|
|
|
|
CmdletArg(name="output", type="string", alias="o", description="Output filename (auto-detected if not specified)"),
|
|
|
|
|
SharedArgs.URL
|
|
|
|
|
],
|
|
|
|
|
detail=["Download files directly via HTTP without yt-dlp processing.", "For streaming sites, use download-media."],
|
|
|
|
|
exec=self.run,
|
|
|
|
|
)
|
|
|
|
|
self.register()
|
|
|
|
|
|
|
|
|
|
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|
|
|
|
"""Main execution method."""
|
|
|
|
|
stage_ctx = pipeline_context.get_stage_context()
|
|
|
|
|
in_pipeline = stage_ctx is not None and getattr(stage_ctx, "total_stages", 1) > 1
|
|
|
|
|
if in_pipeline and isinstance(config, dict):
|
|
|
|
|
config["_quiet_background_output"] = True
|
|
|
|
|
return self._run_impl(result, args, config)
|
|
|
|
|
|
|
|
|
|
def _run_impl(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|
|
|
|
"""Main download implementation for direct HTTP files."""
|
|
|
|
|
try:
|
|
|
|
|
debug("Starting download-file")
|
|
|
|
|
|
|
|
|
|
# Parse arguments
|
|
|
|
|
parsed = parse_cmdlet_args(args, self)
|
|
|
|
|
|
|
|
|
|
# Extract options
|
|
|
|
|
raw_url = parsed.get("url", [])
|
|
|
|
|
if isinstance(raw_url, str):
|
|
|
|
|
raw_url = [raw_url]
|
|
|
|
|
|
|
|
|
|
if not raw_url:
|
|
|
|
|
log("No url to download", file=sys.stderr)
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
# Get output directory
|
|
|
|
|
final_output_dir = self._resolve_output_dir(parsed, config)
|
|
|
|
|
if not final_output_dir:
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
debug(f"Output directory: {final_output_dir}")
|
|
|
|
|
|
|
|
|
|
# Download each URL
|
|
|
|
|
downloaded_count = 0
|
|
|
|
|
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
|
|
|
|
|
custom_output = parsed.get("output")
|
|
|
|
|
|
|
|
|
|
for url in raw_url:
|
|
|
|
|
try:
|
|
|
|
|
debug(f"Processing: {url}")
|
|
|
|
|
|
|
|
|
|
# Direct HTTP download
|
|
|
|
|
result_obj = _download_direct_file(url, final_output_dir, quiet=quiet_mode)
|
|
|
|
|
debug(f"Download completed, building pipe object...")
|
|
|
|
|
pipe_obj_dict = self._build_pipe_object(result_obj, url, final_output_dir)
|
|
|
|
|
debug(f"Emitting result to pipeline...")
|
|
|
|
|
pipeline_context.emit(pipe_obj_dict)
|
|
|
|
|
|
|
|
|
|
# Automatically register url with local library
|
|
|
|
|
if pipe_obj_dict.get("url"):
|
|
|
|
|
pipe_obj = coerce_to_pipe_object(pipe_obj_dict)
|
|
|
|
|
register_url_with_local_library(pipe_obj, config)
|
|
|
|
|
|
|
|
|
|
downloaded_count += 1
|
|
|
|
|
debug("✓ Downloaded and emitted")
|
|
|
|
|
|
|
|
|
|
except DownloadError as e:
|
|
|
|
|
log(f"Download failed for {url}: {e}", file=sys.stderr)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Error processing {url}: {e}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
if downloaded_count > 0:
|
|
|
|
|
debug(f"✓ Successfully processed {downloaded_count} file(s)")
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
log("No downloads completed", file=sys.stderr)
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Error in download-file: {e}", file=sys.stderr)
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]:
|
|
|
|
|
"""Resolve the output directory from storage location or config."""
|
|
|
|
|
storage_location = parsed.get("storage")
|
|
|
|
|
|
|
|
|
|
# Priority 1: --storage flag
|
|
|
|
|
if storage_location:
|
|
|
|
|
try:
|
|
|
|
|
return SharedArgs.resolve_storage(storage_location)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Invalid storage location: {e}", file=sys.stderr)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
# Priority 2: Config outfile
|
|
|
|
|
if config and config.get("outfile"):
|
|
|
|
|
try:
|
|
|
|
|
return Path(config["outfile"]).expanduser()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# Priority 3: Default (home/Downloads)
|
|
|
|
|
final_output_dir = Path.home() / "Downloads"
|
|
|
|
|
debug(f"Using default directory: {final_output_dir}")
|
|
|
|
|
|
|
|
|
|
# Ensure directory exists
|
|
|
|
|
try:
|
|
|
|
|
final_output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
return final_output_dir
|
|
|
|
|
|
|
|
|
|
def _build_pipe_object(self, download_result: Any, url: str, output_dir: Path) -> Dict[str, Any]:
|
|
|
|
|
"""Create a PipeObject-compatible dict from a download result."""
|
|
|
|
|
# Try to get file path from result
|
|
|
|
|
file_path = None
|
|
|
|
|
if hasattr(download_result, 'path'):
|
|
|
|
|
file_path = download_result.path
|
|
|
|
|
elif isinstance(download_result, dict) and 'path' in download_result:
|
|
|
|
|
file_path = download_result['path']
|
|
|
|
|
|
|
|
|
|
if not file_path:
|
|
|
|
|
# Fallback: assume result is the path itself
|
|
|
|
|
file_path = str(download_result)
|
|
|
|
|
|
|
|
|
|
media_path = Path(file_path)
|
|
|
|
|
hash_value = self._compute_file_hash(media_path)
|
|
|
|
|
title = media_path.stem
|
|
|
|
|
|
|
|
|
|
# Build tags with title for searchability
|
|
|
|
|
tags = [f"title:{title}"]
|
|
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
# Canonical pipeline payload (no legacy aliases)
|
2025-12-11 12:47:30 -08:00
|
|
|
return {
|
|
|
|
|
"path": str(media_path),
|
|
|
|
|
"hash": hash_value,
|
|
|
|
|
"title": title,
|
|
|
|
|
"file_title": title,
|
|
|
|
|
"action": "cmdlet:download-file",
|
|
|
|
|
"download_mode": "file",
|
|
|
|
|
"url": url or (download_result.get('url') if isinstance(download_result, dict) else None),
|
|
|
|
|
"store": "local",
|
|
|
|
|
"media_kind": "file",
|
|
|
|
|
"tags": tags,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def _compute_file_hash(self, filepath: Path) -> str:
|
|
|
|
|
"""Compute SHA256 hash of a file."""
|
|
|
|
|
import hashlib
|
|
|
|
|
sha256_hash = hashlib.sha256()
|
|
|
|
|
with open(filepath, "rb") as f:
|
|
|
|
|
for byte_block in iter(lambda: f.read(4096), b""):
|
|
|
|
|
sha256_hash.update(byte_block)
|
|
|
|
|
return sha256_hash.hexdigest()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Module-level singleton registration
|
|
|
|
|
CMDLET = Download_File()
|