lkjlkj
This commit is contained in:
@@ -1,12 +1,10 @@
|
||||
"""Download files directly via HTTP (non-yt-dlp url).
|
||||
"""Generic file downloader.
|
||||
|
||||
Focused cmdlet for direct file downloads from:
|
||||
- PDFs, images, documents
|
||||
- url not supported by yt-dlp
|
||||
- LibGen sources
|
||||
- Direct file links
|
||||
Supports:
|
||||
- Direct HTTP file URLs (PDFs, images, documents; non-yt-dlp)
|
||||
- Piped provider items (uses provider.download when available)
|
||||
|
||||
No streaming site logic - pure HTTP download with retries.
|
||||
No streaming site logic; use download-media for yt-dlp/streaming.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
@@ -17,10 +15,17 @@ from typing import Any, Dict, List, Optional, Sequence
|
||||
|
||||
from SYS.download import DownloadError, _download_direct_file
|
||||
from SYS.logger import log, debug
|
||||
from models import DownloadOptions
|
||||
import pipeline as pipeline_context
|
||||
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, register_url_with_local_library, coerce_to_pipe_object
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
SharedArgs,
|
||||
parse_cmdlet_args,
|
||||
register_url_with_local_library,
|
||||
coerce_to_pipe_object,
|
||||
get_field,
|
||||
)
|
||||
|
||||
|
||||
class Download_File(Cmdlet):
|
||||
@@ -30,14 +35,13 @@ class Download_File(Cmdlet):
|
||||
"""Initialize download-file cmdlet."""
|
||||
super().__init__(
|
||||
name="download-file",
|
||||
summary="Download files directly via HTTP (PDFs, images, documents)",
|
||||
usage="download-file <url> [options] or search-file | download-file [options]",
|
||||
summary="Download files via HTTP or provider handlers",
|
||||
usage="download-file <url> [options] OR @N | download-file [options]",
|
||||
alias=["dl-file", "download-http"],
|
||||
arg=[
|
||||
CmdletArg(name="url", type="string", required=False, description="URL to download (direct file links)", variadic=True),
|
||||
CmdletArg(name="-url", type="string", description="URL to download (alias for positional argument)", variadic=True),
|
||||
CmdletArg(name="output", type="string", alias="o", description="Output filename (auto-detected if not specified)"),
|
||||
SharedArgs.URL
|
||||
CmdletArg(name="output", type="string", alias="o", description="Output directory (overrides defaults)"),
|
||||
SharedArgs.URL,
|
||||
|
||||
],
|
||||
detail=["Download files directly via HTTP without yt-dlp processing.", "For streaming sites, use download-media."],
|
||||
exec=self.run,
|
||||
@@ -60,13 +64,21 @@ class Download_File(Cmdlet):
|
||||
# Parse arguments
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
# Extract options
|
||||
# Extract explicit URL args (if any)
|
||||
raw_url = parsed.get("url", [])
|
||||
if isinstance(raw_url, str):
|
||||
raw_url = [raw_url]
|
||||
|
||||
# If no URL args were provided, fall back to piped results (provider items)
|
||||
piped_items: List[Any] = []
|
||||
if not raw_url:
|
||||
log("No url to download", file=sys.stderr)
|
||||
if isinstance(result, list):
|
||||
piped_items = result
|
||||
elif result:
|
||||
piped_items = [result]
|
||||
|
||||
if not raw_url and not piped_items:
|
||||
log("No url or piped items to download", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Get output directory
|
||||
@@ -76,27 +88,78 @@ class Download_File(Cmdlet):
|
||||
|
||||
debug(f"Output directory: {final_output_dir}")
|
||||
|
||||
# Download each URL
|
||||
# Download each URL and/or provider item
|
||||
downloaded_count = 0
|
||||
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
|
||||
custom_output = parsed.get("output")
|
||||
|
||||
# Provider lookup is optional; keep import local to avoid overhead if unused
|
||||
get_search_provider = None
|
||||
SearchResult = None
|
||||
try:
|
||||
from Provider.registry import get_search_provider as _get_search_provider, SearchResult as _SearchResult
|
||||
|
||||
get_search_provider = _get_search_provider
|
||||
SearchResult = _SearchResult
|
||||
except Exception:
|
||||
get_search_provider = None
|
||||
SearchResult = None
|
||||
|
||||
def _emit_local_file(downloaded_path: Path, source: Optional[str], title_hint: Optional[str], tags_hint: Optional[List[str]], media_kind_hint: Optional[str], full_metadata: Optional[Dict[str, Any]]) -> None:
|
||||
title_val = (title_hint or downloaded_path.stem or "Unknown").strip() or downloaded_path.stem
|
||||
hash_value = self._compute_file_hash(downloaded_path)
|
||||
tag: List[str] = []
|
||||
if tags_hint:
|
||||
tag.extend([str(t) for t in tags_hint if t])
|
||||
if not any(str(t).lower().startswith("title:") for t in tag):
|
||||
tag.insert(0, f"title:{title_val}")
|
||||
|
||||
payload: Dict[str, Any] = {
|
||||
"path": str(downloaded_path),
|
||||
"hash": hash_value,
|
||||
"title": title_val,
|
||||
"action": "cmdlet:download-file",
|
||||
"download_mode": "file",
|
||||
"store": "local",
|
||||
"media_kind": media_kind_hint or "file",
|
||||
"tag": tag,
|
||||
}
|
||||
if full_metadata:
|
||||
payload["full_metadata"] = full_metadata
|
||||
if source and str(source).startswith("http"):
|
||||
payload["url"] = source
|
||||
elif source:
|
||||
payload["source_url"] = source
|
||||
|
||||
pipeline_context.emit(payload)
|
||||
|
||||
# Automatically register url with local library
|
||||
if payload.get("url"):
|
||||
pipe_obj = coerce_to_pipe_object(payload)
|
||||
register_url_with_local_library(pipe_obj, config)
|
||||
|
||||
# 1) Explicit URL downloads
|
||||
for url in raw_url:
|
||||
try:
|
||||
debug(f"Processing: {url}")
|
||||
debug(f"Processing URL: {url}")
|
||||
|
||||
# Direct HTTP download
|
||||
result_obj = _download_direct_file(url, final_output_dir, quiet=quiet_mode)
|
||||
debug(f"Download completed, building pipe object...")
|
||||
pipe_obj_dict = self._build_pipe_object(result_obj, url, final_output_dir)
|
||||
debug(f"Emitting result to pipeline...")
|
||||
pipeline_context.emit(pipe_obj_dict)
|
||||
|
||||
# Automatically register url with local library
|
||||
if pipe_obj_dict.get("url"):
|
||||
pipe_obj = coerce_to_pipe_object(pipe_obj_dict)
|
||||
register_url_with_local_library(pipe_obj, config)
|
||||
|
||||
file_path = None
|
||||
if hasattr(result_obj, "path"):
|
||||
file_path = getattr(result_obj, "path")
|
||||
elif isinstance(result_obj, dict):
|
||||
file_path = result_obj.get("path")
|
||||
if not file_path:
|
||||
file_path = str(result_obj)
|
||||
downloaded_path = Path(str(file_path))
|
||||
|
||||
_emit_local_file(
|
||||
downloaded_path=downloaded_path,
|
||||
source=url,
|
||||
title_hint=downloaded_path.stem,
|
||||
tags_hint=[f"title:{downloaded_path.stem}"],
|
||||
media_kind_hint="file",
|
||||
full_metadata=None,
|
||||
)
|
||||
downloaded_count += 1
|
||||
debug("✓ Downloaded and emitted")
|
||||
|
||||
@@ -105,6 +168,72 @@ class Download_File(Cmdlet):
|
||||
except Exception as e:
|
||||
log(f"Error processing {url}: {e}", file=sys.stderr)
|
||||
|
||||
# 2) Provider item downloads (piped results)
|
||||
for item in piped_items:
|
||||
try:
|
||||
table = get_field(item, "table")
|
||||
title = get_field(item, "title")
|
||||
target = get_field(item, "path") or get_field(item, "url")
|
||||
media_kind = get_field(item, "media_kind")
|
||||
tags_val = get_field(item, "tag")
|
||||
tags_list: Optional[List[str]]
|
||||
if isinstance(tags_val, list):
|
||||
tags_list = [str(t) for t in tags_val if t]
|
||||
else:
|
||||
tags_list = None
|
||||
|
||||
full_metadata = get_field(item, "full_metadata")
|
||||
if (not full_metadata) and isinstance(item, dict) and isinstance(item.get("extra"), dict):
|
||||
extra_md = item["extra"].get("full_metadata")
|
||||
if isinstance(extra_md, dict):
|
||||
full_metadata = extra_md
|
||||
|
||||
# If this looks like a provider item and providers are available, prefer provider.download()
|
||||
downloaded_path: Optional[Path] = None
|
||||
if table and get_search_provider and SearchResult:
|
||||
provider = get_search_provider(str(table), config)
|
||||
if provider is not None:
|
||||
sr = SearchResult(
|
||||
table=str(table),
|
||||
title=str(title or "Unknown"),
|
||||
path=str(target or ""),
|
||||
full_metadata=full_metadata if isinstance(full_metadata, dict) else {},
|
||||
)
|
||||
debug(f"[download-file] Downloading provider item via {table}: {sr.title}")
|
||||
downloaded_path = provider.download(sr, final_output_dir)
|
||||
|
||||
# Fallback: if we have a direct HTTP URL, download it directly
|
||||
if downloaded_path is None and isinstance(target, str) and target.startswith("http"):
|
||||
debug(f"[download-file] Provider item looks like direct URL, downloading: {target}")
|
||||
result_obj = _download_direct_file(target, final_output_dir, quiet=quiet_mode)
|
||||
file_path = None
|
||||
if hasattr(result_obj, "path"):
|
||||
file_path = getattr(result_obj, "path")
|
||||
elif isinstance(result_obj, dict):
|
||||
file_path = result_obj.get("path")
|
||||
if not file_path:
|
||||
file_path = str(result_obj)
|
||||
downloaded_path = Path(str(file_path))
|
||||
|
||||
if downloaded_path is None:
|
||||
log(f"Cannot download item (no provider handler / unsupported target): {title or target}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
_emit_local_file(
|
||||
downloaded_path=downloaded_path,
|
||||
source=str(target) if target else None,
|
||||
title_hint=str(title) if title else downloaded_path.stem,
|
||||
tags_hint=tags_list,
|
||||
media_kind_hint=str(media_kind) if media_kind else None,
|
||||
full_metadata=full_metadata if isinstance(full_metadata, dict) else None,
|
||||
)
|
||||
downloaded_count += 1
|
||||
|
||||
except DownloadError as e:
|
||||
log(f"Download failed: {e}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Error downloading item: {e}", file=sys.stderr)
|
||||
|
||||
if downloaded_count > 0:
|
||||
debug(f"✓ Successfully processed {downloaded_count} file(s)")
|
||||
return 0
|
||||
@@ -118,6 +247,16 @@ class Download_File(Cmdlet):
|
||||
|
||||
def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]:
|
||||
"""Resolve the output directory from storage location or config."""
|
||||
output_dir_arg = parsed.get("output")
|
||||
if output_dir_arg:
|
||||
try:
|
||||
out_path = Path(str(output_dir_arg)).expanduser()
|
||||
out_path.mkdir(parents=True, exist_ok=True)
|
||||
return out_path
|
||||
except Exception as e:
|
||||
log(f"Cannot use output directory {output_dir_arg}: {e}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
storage_location = parsed.get("storage")
|
||||
|
||||
# Priority 1: --storage flag
|
||||
@@ -148,40 +287,6 @@ class Download_File(Cmdlet):
|
||||
|
||||
return final_output_dir
|
||||
|
||||
def _build_pipe_object(self, download_result: Any, url: str, output_dir: Path) -> Dict[str, Any]:
|
||||
"""Create a PipeObject-compatible dict from a download result."""
|
||||
# Try to get file path from result
|
||||
file_path = None
|
||||
if hasattr(download_result, 'path'):
|
||||
file_path = download_result.path
|
||||
elif isinstance(download_result, dict) and 'path' in download_result:
|
||||
file_path = download_result['path']
|
||||
|
||||
if not file_path:
|
||||
# Fallback: assume result is the path itself
|
||||
file_path = str(download_result)
|
||||
|
||||
media_path = Path(file_path)
|
||||
hash_value = self._compute_file_hash(media_path)
|
||||
title = media_path.stem
|
||||
|
||||
# Build tags with title for searchability
|
||||
tags = [f"title:{title}"]
|
||||
|
||||
# Canonical pipeline payload (no legacy aliases)
|
||||
return {
|
||||
"path": str(media_path),
|
||||
"hash": hash_value,
|
||||
"title": title,
|
||||
"file_title": title,
|
||||
"action": "cmdlet:download-file",
|
||||
"download_mode": "file",
|
||||
"url": url or (download_result.get('url') if isinstance(download_result, dict) else None),
|
||||
"store": "local",
|
||||
"media_kind": "file",
|
||||
"tags": tags,
|
||||
}
|
||||
|
||||
def _compute_file_hash(self, filepath: Path) -> str:
|
||||
"""Compute SHA256 hash of a file."""
|
||||
import hashlib
|
||||
|
||||
Reference in New Issue
Block a user