"""Generic file downloader. Supports: - Direct HTTP file URLs (PDFs, images, documents; non-yt-dlp) - Piped provider items (uses provider.download when available) No streaming site logic; use download-media for yt-dlp/streaming. """ from __future__ import annotations import sys from pathlib import Path from typing import Any, Dict, List, Optional, Sequence from urllib.parse import urlparse from SYS.download import DownloadError, _download_direct_file from SYS.logger import log, debug from SYS.pipeline_progress import PipelineProgress import pipeline as pipeline_context from . import _shared as sh Cmdlet = sh.Cmdlet CmdletArg = sh.CmdletArg SharedArgs = sh.SharedArgs parse_cmdlet_args = sh.parse_cmdlet_args register_url_with_local_library = sh.register_url_with_local_library coerce_to_pipe_object = sh.coerce_to_pipe_object get_field = sh.get_field class Download_File(Cmdlet): """Class-based download-file cmdlet - direct HTTP downloads.""" def __init__(self) -> None: """Initialize download-file cmdlet.""" super().__init__( name="download-file", summary="Download files via HTTP or provider handlers", usage="download-file [-path DIR] [options] OR @N | download-file [-path DIR] [options]", alias=["dl-file", "download-http"], arg=[ SharedArgs.URL, SharedArgs.PATH, # Prefer -path for output directory to match other cmdlets; keep -output for backwards compatibility. CmdletArg(name="-output", type="string", alias="o", description="(deprecated) Output directory (use -path instead)"), ], detail=[ "Download files directly via HTTP without yt-dlp processing.", "For streaming sites, use download-media.", "For Internet Archive item pages (archive.org/details/...), shows a selectable file list; pick with @N to download.", ], exec=self.run, ) self.register() def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Main execution method.""" return self._run_impl(result, args, config) @staticmethod def _normalize_urls(parsed: Dict[str, Any]) -> List[str]: raw_url = parsed.get("url", []) if isinstance(raw_url, str): raw_url = [raw_url] expanded_urls: List[str] = [] for u in (raw_url or []): if u is None: continue s = str(u).strip() if not s: continue if "," in s: parts = [p.strip() for p in s.split(",")] expanded_urls.extend([p for p in parts if p]) else: expanded_urls.append(s) return expanded_urls @staticmethod def _collect_piped_items_if_no_urls(result: Any, raw_urls: Sequence[str]) -> List[Any]: if raw_urls: return [] if isinstance(result, list): return list(result) if result: return [result] return [] @staticmethod def _safe_total_items(raw_urls: Sequence[str], piped_items: Sequence[Any]) -> int: try: return int(len(raw_urls or []) + len(piped_items or [])) except Exception: return 1 @staticmethod def _build_preview(raw_urls: Sequence[str], piped_items: Sequence[Any], total_items: int) -> List[Any]: try: preview: List[Any] = [] preview.extend(list(raw_urls or [])[: max(0, total_items)]) if len(preview) < total_items: preview.extend(list(piped_items or [])[: max(0, total_items - len(preview))]) return preview except Exception: return [] @staticmethod def _load_provider_registry() -> Dict[str, Any]: try: from ProviderCore.registry import ( get_search_provider as _get_search_provider, get_provider as _get_provider, match_provider_name_for_url as _match_provider_name_for_url, SearchResult as _SearchResult, ) return { "get_search_provider": _get_search_provider, "get_provider": _get_provider, "match_provider_name_for_url": _match_provider_name_for_url, "SearchResult": _SearchResult, } except Exception: return { "get_search_provider": None, "get_provider": None, "match_provider_name_for_url": None, "SearchResult": None, } @staticmethod def _maybe_show_internetarchive_formats( *, raw_urls: Sequence[str], piped_items: Sequence[Any], parsed: Dict[str, Any], config: Dict[str, Any], quiet_mode: bool, ) -> Optional[int]: """If the input is an IA item page, show a selectable formats table. Returns an exit code when handled; otherwise None. """ if quiet_mode: return None try: total_inputs = int(len(raw_urls or []) + len(piped_items or [])) except Exception: total_inputs = 0 if total_inputs != 1: return None item = piped_items[0] if piped_items else None target = "" if item is not None: try: target = str(get_field(item, "path") or get_field(item, "url") or "").strip() except Exception: target = "" if not target and raw_urls: target = str(raw_urls[0]).strip() if not target: return None try: from Provider import internetarchive as ia except Exception: return None identifier = "" try: md = get_field(item, "full_metadata") if item is not None else None if isinstance(md, dict): identifier = str(md.get("identifier") or "").strip() except Exception: identifier = "" if not identifier: try: identifier = str(ia.extract_identifier(target) or "").strip() except Exception: identifier = "" if not identifier: return None # Only show picker for item pages (details); direct download URLs should download immediately. try: if not ia.is_details_url(target): return None except Exception: return None try: files = ia.list_download_files(identifier) except Exception as exc: log(f"download-file: Internet Archive lookup failed: {exc}", file=sys.stderr) return 1 if not files: log("download-file: Internet Archive item has no downloadable files", file=sys.stderr) return 1 title = "" try: title = str(get_field(item, "title") or "").strip() if item is not None else "" except Exception: title = "" table_title = f"Internet Archive: {title}".strip().rstrip(":") if title else f"Internet Archive: {identifier}" try: from result_table import ResultTable except Exception as exc: log(f"download-file: ResultTable unavailable: {exc}", file=sys.stderr) return 1 base_args: List[str] = [] out_arg = parsed.get("path") or parsed.get("output") if out_arg: base_args.extend(["-path", str(out_arg)]) table = ResultTable(table_title).set_preserve_order(True) table.set_table("internetarchive.formats") table.set_source_command("download-file", base_args) rows: List[Dict[str, Any]] = [] for f in files: name = str(f.get("name") or "").strip() if not name: continue fmt = str(f.get("format") or "").strip() src = str(f.get("source") or "").strip() direct_url = str(f.get("direct_url") or "").strip() if not direct_url: continue size_val: Any = f.get("size") try: size_val = int(size_val) if size_val not in (None, "") else "" except Exception: pass row_item: Dict[str, Any] = { "table": "internetarchive", "title": fmt or name, "path": direct_url, "url": direct_url, "columns": [ ("Format", fmt), ("Name", name), ("Size", size_val), ("Source", src), ], "_selection_args": [direct_url], "full_metadata": { "identifier": identifier, "name": name, "format": fmt, "source": src, "size": f.get("size"), }, } rows.append(row_item) table.add_result(row_item) if not rows: log("download-file: no downloadable files found for this item", file=sys.stderr) return 1 try: pipeline_context.set_last_result_table(table, rows, subject=item) pipeline_context.set_current_stage_table(table) except Exception: pass log("Internet Archive item detected: select a file with @N to download", file=sys.stderr) return 0 @staticmethod def _openlibrary_edition_id_from_url(u: str) -> str: try: p = urlparse(str(u)) parts = [x for x in (p.path or "").split("/") if x] except Exception: parts = [] # /books/OL35443598M/... if len(parts) >= 2 and str(parts[0]).lower() == "books": return str(parts[1]).strip() return "" @staticmethod def _title_hint_from_url_slug(u: str) -> str: try: p = urlparse(str(u)) parts = [x for x in (p.path or "").split("/") if x] slug = parts[-1] if parts else "" except Exception: slug = "" slug = (slug or "").strip().replace("_", " ") return slug or "OpenLibrary" @staticmethod def _path_from_download_result(result_obj: Any) -> Path: file_path = None if hasattr(result_obj, "path"): file_path = getattr(result_obj, "path") elif isinstance(result_obj, dict): file_path = result_obj.get("path") if not file_path: file_path = str(result_obj) return Path(str(file_path)) def _emit_local_file( self, *, downloaded_path: Path, source: Optional[str], title_hint: Optional[str], tags_hint: Optional[List[str]], media_kind_hint: Optional[str], full_metadata: Optional[Dict[str, Any]], progress: PipelineProgress, config: Dict[str, Any], provider_hint: Optional[str] = None, ) -> None: title_val = (title_hint or downloaded_path.stem or "Unknown").strip() or downloaded_path.stem hash_value = self._compute_file_hash(downloaded_path) tag: List[str] = [] if tags_hint: tag.extend([str(t) for t in tags_hint if t]) if not any(str(t).lower().startswith("title:") for t in tag): tag.insert(0, f"title:{title_val}") payload: Dict[str, Any] = { "path": str(downloaded_path), "hash": hash_value, "title": title_val, "action": "cmdlet:download-file", "download_mode": "file", "store": "local", "media_kind": media_kind_hint or "file", "tag": tag, } if provider_hint: payload["provider"] = str(provider_hint) if full_metadata: payload["full_metadata"] = full_metadata if source and str(source).startswith("http"): payload["url"] = source elif source: payload["source_url"] = source pipeline_context.emit(payload) # When running with a local progress UI (standalone cmdlet), ensure # the pipe advances on emit. progress.on_emit(payload) # Automatically register url with local library if payload.get("url"): pipe_obj = coerce_to_pipe_object(payload) register_url_with_local_library(pipe_obj, config) def _process_explicit_urls( self, *, raw_urls: Sequence[str], final_output_dir: Path, config: Dict[str, Any], quiet_mode: bool, registry: Dict[str, Any], progress: PipelineProgress, ) -> tuple[int, Optional[int]]: downloaded_count = 0 SearchResult = registry.get("SearchResult") get_provider = registry.get("get_provider") match_provider_name_for_url = registry.get("match_provider_name_for_url") for url in raw_urls: try: debug(f"Processing URL: {url}") # Telegram message URLs are not direct files; route through the provider. try: parsed_url = urlparse(str(url)) host = (parsed_url.hostname or "").lower().strip() except Exception: host = "" is_telegram = host in {"t.me", "telegram.me"} or host.endswith(".t.me") if is_telegram and SearchResult: try: from ProviderCore.registry import get_provider as _get_provider except Exception: _get_provider = None if _get_provider is None: raise DownloadError("Telegram provider registry not available") provider = _get_provider("telegram", config) if provider is None: raise DownloadError("Telegram provider not configured or not available (check telethon/app_id/api_hash)") sr = SearchResult(table="telegram", title=str(url), path=str(url), full_metadata={}) downloaded_path = None telegram_info: Optional[Dict[str, Any]] = None if hasattr(provider, "download_url"): try: downloaded_path, telegram_info = provider.download_url(str(url), final_output_dir) # type: ignore[attr-defined] except Exception as exc: raise DownloadError(str(exc)) else: downloaded_path = provider.download(sr, final_output_dir) if not downloaded_path: raise DownloadError("Telegram download returned no file") channel = "" post = None if isinstance(telegram_info, dict): try: chat_info_raw = telegram_info.get("chat") msg_info_raw = telegram_info.get("message") chat_info: Dict[str, Any] = chat_info_raw if isinstance(chat_info_raw, dict) else {} msg_info: Dict[str, Any] = msg_info_raw if isinstance(msg_info_raw, dict) else {} channel = str(chat_info.get("title") or chat_info.get("username") or "").strip() post = msg_info.get("id") except Exception: channel = "" post = None title_hint = None tg_tags: List[str] = [] if channel: tg_tags.append(f"channel:{channel}") if post is not None: tg_tags.append(f"post:{post}") if channel and post is not None: title_hint = f"{channel} {post}" elif post is not None: title_hint = f"post:{post}" else: title_hint = downloaded_path.stem self._emit_local_file( downloaded_path=downloaded_path, source=str(url), title_hint=title_hint, tags_hint=tg_tags, media_kind_hint="file", full_metadata=telegram_info, provider_hint="telegram", progress=progress, config=config, ) downloaded_count += 1 debug("✓ Downloaded via Telegram provider and emitted") continue # Provider URL routing (e.g. OpenLibrary book pages). provider_name = None if match_provider_name_for_url is not None: try: provider_name = match_provider_name_for_url(str(url)) except Exception: provider_name = None # Heuristic: LibGen often uses landing pages like edition.php/file.php. # These should never be treated as direct file URLs. if not provider_name: try: p = urlparse(str(url)) h = (p.hostname or "").strip().lower() path = (p.path or "").strip().lower() if "libgen" in h and any(x in path for x in ("/edition.php", "/file.php", "/ads.php", "/get.php", "/series.php")): provider_name = "libgen" except Exception: pass if provider_name and get_provider is not None and SearchResult is not None: # OpenLibrary URLs should be handled by the OpenLibrary provider. if provider_name == "openlibrary": provider = get_provider("openlibrary", config) if provider is None: raise DownloadError("OpenLibrary provider not configured or not available") edition_id = self._openlibrary_edition_id_from_url(str(url)) title_hint = self._title_hint_from_url_slug(str(url)) sr = SearchResult( table="openlibrary", title=title_hint, path=str(url), media_kind="book", full_metadata={ "openlibrary_id": edition_id, }, ) downloaded_path = None try: ui, _pipe_idx = progress.ui_and_pipe_index() progress_cb = None if ui is not None: # High-level steps for OpenLibrary borrow/download flow. progress.begin_steps(5) def _progress(kind: str, done: int, total: Optional[int], label: str) -> None: # kind: # - "step": advance step text # - "pages": update pipe percent/status # - "bytes": update transfer bar if kind == "step": progress.step(label) return if kind == "pages": t = int(total) if isinstance(total, int) else 0 d = int(done) if isinstance(done, int) else 0 if t > 0: pct = int(round((max(0, min(d, t)) / max(1, t)) * 100.0)) progress.set_percent(pct) progress.set_status(f"downloading pages {d}/{t}") else: progress.set_status(f"downloading pages {d}") return if kind == "bytes": try: lbl = str(label or "download") except Exception: lbl = "download" progress.begin_transfer(label=lbl, total=total) progress.update_transfer(label=lbl, completed=done, total=total) try: if isinstance(total, int) and total > 0 and int(done) >= int(total): progress.finish_transfer(label=lbl) except Exception: pass return progress_cb = _progress downloaded_path = provider.download(sr, final_output_dir, progress_callback=progress_cb) # type: ignore[call-arg] except Exception as exc: raise DownloadError(str(exc)) # Clear long-running status line after the download attempt. progress.clear_status() if downloaded_path: tags_hint: Optional[List[str]] = None try: sr_tags = getattr(sr, "tag", None) if isinstance(sr_tags, set) and sr_tags: tags_hint = sorted([str(t) for t in sr_tags if t]) except Exception: tags_hint = None self._emit_local_file( downloaded_path=Path(downloaded_path), source=str(url), title_hint=title_hint, tags_hint=tags_hint, media_kind_hint="book", full_metadata=sr.full_metadata, provider_hint="openlibrary", progress=progress, config=config, ) downloaded_count += 1 continue # If OpenLibrary can't provide it (not lendable, no creds, etc), auto-search LibGen. try: fallback_query = str(title_hint or "").strip() if fallback_query: log( f"[download-file] Not available on OpenLibrary; searching LibGen for: {fallback_query}", file=sys.stderr, ) from cmdlet.search_provider import CMDLET as _SEARCH_PROVIDER_CMDLET exec_fn = getattr(_SEARCH_PROVIDER_CMDLET, "exec", None) if callable(exec_fn): ret = exec_fn(None, ["-provider", "libgen", "-query", fallback_query], config) try: table = pipeline_context.get_last_result_table() items = pipeline_context.get_last_result_items() if table is not None: pipeline_context.set_last_result_table_overlay(table, items) except Exception: pass try: return downloaded_count, int(ret) # type: ignore[arg-type] except Exception: return downloaded_count, 1 except Exception: pass log("[download-file] OpenLibrary URL could not be downloaded", file=sys.stderr) continue # Generic provider URL handler (if a provider implements `download_url`). provider = get_provider(provider_name, config) if provider is not None and hasattr(provider, "download_url"): try: downloaded_path = provider.download_url(str(url), final_output_dir) # type: ignore[attr-defined] except Exception as exc: raise DownloadError(str(exc)) if downloaded_path: self._emit_local_file( downloaded_path=Path(downloaded_path), source=str(url), title_hint=Path(str(downloaded_path)).stem, tags_hint=None, media_kind_hint="file", full_metadata=None, provider_hint=str(provider_name), progress=progress, config=config, ) downloaded_count += 1 continue # Otherwise, try provider.download(SearchResult) with the URL as the target. if provider is not None: sr_obj = None try: sr_obj = SearchResult( table=str(provider_name), title=str(url), path=str(url), full_metadata={}, ) downloaded_path = provider.download(sr_obj, final_output_dir) # type: ignore[call-arg] except Exception: downloaded_path = None # Refuse to fall back to direct-download for LibGen landing pages. # This prevents saving HTML (e.g. edition.php) as a bogus file. if (not downloaded_path) and str(provider_name).lower() == "libgen": raise DownloadError("LibGen URL did not resolve to a downloadable file") if downloaded_path: emit_tags: Optional[List[str]] = None full_md: Optional[Dict[str, Any]] = None title_hint = Path(str(downloaded_path)).stem media_kind_hint = "file" if str(provider_name).lower() == "libgen" and sr_obj is not None: media_kind_hint = "book" try: sr_tags = getattr(sr_obj, "tag", None) if isinstance(sr_tags, set) and sr_tags: emit_tags = sorted([str(t) for t in sr_tags if t]) except Exception: emit_tags = None try: sr_full_md = getattr(sr_obj, "full_metadata", None) if isinstance(sr_full_md, dict): full_md = sr_full_md t = str(sr_full_md.get("title") or "").strip() if t: title_hint = t except Exception: full_md = None self._emit_local_file( downloaded_path=Path(downloaded_path), source=str(url), title_hint=title_hint, tags_hint=emit_tags, media_kind_hint=media_kind_hint, full_metadata=full_md, provider_hint=str(provider_name), progress=progress, config=config, ) downloaded_count += 1 continue result_obj = _download_direct_file(str(url), final_output_dir, quiet=quiet_mode) downloaded_path = self._path_from_download_result(result_obj) self._emit_local_file( downloaded_path=downloaded_path, source=str(url), title_hint=downloaded_path.stem, tags_hint=[f"title:{downloaded_path.stem}"], media_kind_hint="file", full_metadata=None, progress=progress, config=config, ) downloaded_count += 1 debug("✓ Downloaded and emitted") except DownloadError as e: log(f"Download failed for {url}: {e}", file=sys.stderr) except Exception as e: log(f"Error processing {url}: {e}", file=sys.stderr) return downloaded_count, None def _expand_provider_items( self, *, piped_items: Sequence[Any], registry: Dict[str, Any], config: Dict[str, Any], ) -> List[Any]: get_search_provider = registry.get("get_search_provider") expanded_items: List[Any] = [] for item in piped_items: try: table = get_field(item, "table") media_kind = get_field(item, "media_kind") full_metadata = get_field(item, "full_metadata") target = get_field(item, "path") or get_field(item, "url") if str(table or "").lower() == "alldebrid" and str(media_kind or "").lower() == "folder": magnet_id = None if isinstance(full_metadata, dict): magnet_id = full_metadata.get("magnet_id") if magnet_id is None and isinstance(target, str) and target.lower().startswith("alldebrid:magnet:"): try: magnet_id = int(target.split(":")[-1]) except Exception: magnet_id = None if magnet_id is not None and get_search_provider is not None: provider = get_search_provider("alldebrid", config) if provider is not None: try: files = provider.search("*", limit=10_000, filters={"view": "files", "magnet_id": int(magnet_id)}) except Exception: files = [] # If the magnet isn't ready, provider.search returns a single not-ready folder row. if files and len(files) == 1 and getattr(files[0], "media_kind", "") == "folder": detail = getattr(files[0], "detail", "") log( f"[download-file] AllDebrid magnet {magnet_id} not ready ({detail or 'unknown'})", file=sys.stderr, ) else: for sr in files: expanded_items.append(sr.to_dict() if hasattr(sr, "to_dict") else sr) continue expanded_items.append(item) except Exception: expanded_items.append(item) return expanded_items def _process_provider_items( self, *, piped_items: Sequence[Any], final_output_dir: Path, config: Dict[str, Any], quiet_mode: bool, registry: Dict[str, Any], progress: PipelineProgress, ) -> int: downloaded_count = 0 get_search_provider = registry.get("get_search_provider") SearchResult = registry.get("SearchResult") expanded_items = self._expand_provider_items(piped_items=piped_items, registry=registry, config=config) for item in expanded_items: try: table = get_field(item, "table") title = get_field(item, "title") target = get_field(item, "path") or get_field(item, "url") media_kind = get_field(item, "media_kind") tags_val = get_field(item, "tag") tags_list: Optional[List[str]] if isinstance(tags_val, list): tags_list = [str(t) for t in tags_val if t] else: tags_list = None full_metadata = get_field(item, "full_metadata") if (not full_metadata) and isinstance(item, dict) and isinstance(item.get("extra"), dict): extra_md = item["extra"].get("full_metadata") if isinstance(extra_md, dict): full_metadata = extra_md # If this looks like a provider item and providers are available, prefer provider.download() downloaded_path: Optional[Path] = None attempted_provider_download = False provider_sr = None if table and get_search_provider and SearchResult: provider = get_search_provider(str(table), config) if provider is not None: attempted_provider_download = True sr = SearchResult( table=str(table), title=str(title or "Unknown"), path=str(target or ""), full_metadata=full_metadata if isinstance(full_metadata, dict) else {}, ) debug(f"[download-file] Downloading provider item via {table}: {sr.title}") downloaded_path = provider.download(sr, final_output_dir) provider_sr = sr # OpenLibrary: if provider download failed, do NOT try to download the OpenLibrary page HTML. if downloaded_path is None and attempted_provider_download and str(table or "").lower() == "openlibrary": availability = None reason = None if isinstance(full_metadata, dict): availability = full_metadata.get("availability") reason = full_metadata.get("availability_reason") msg = "[download-file] OpenLibrary item not downloadable" if availability or reason: msg += f" (availability={availability or ''} reason={reason or ''})" log(msg, file=sys.stderr) # Fallback: run a LibGen title search so the user can pick an alternative source. try: title_text = str(title or "").strip() if not title_text and isinstance(full_metadata, dict): title_text = str(full_metadata.get("title") or "").strip() if title_text: log(f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}", file=sys.stderr) from cmdlet.search_provider import CMDLET as _SEARCH_PROVIDER_CMDLET fallback_query = title_text exec_fn = getattr(_SEARCH_PROVIDER_CMDLET, "exec", None) if not callable(exec_fn): log( "[download-file] search-provider cmdlet unavailable; cannot run LibGen fallback search", file=sys.stderr, ) continue ret = exec_fn( None, ["-provider", "libgen", "-query", fallback_query], config, ) # Promote the search-provider table to a display overlay so it renders. try: table_obj = pipeline_context.get_last_result_table() items_obj = pipeline_context.get_last_result_items() if table_obj is not None: pipeline_context.set_last_result_table_overlay(table_obj, items_obj) except Exception: pass try: return int(ret) # type: ignore[arg-type] except Exception: return 1 except Exception: pass continue # Fallback: if we have a direct HTTP URL, download it directly if downloaded_path is None and isinstance(target, str) and target.startswith("http"): # Guard: provider landing pages (e.g. LibGen ads.php) are HTML, not files. # Never download these as "files". if str(table or "").lower() == "libgen": low = target.lower() if ("/ads.php" in low) or ("/file.php" in low) or ("/index.php" in low): log( "[download-file] Refusing to download LibGen landing page (expected provider to resolve file link)", file=sys.stderr, ) continue debug(f"[download-file] Provider item looks like direct URL, downloading: {target}") suggested_name = str(title).strip() if title is not None else None result_obj = _download_direct_file( target, final_output_dir, quiet=quiet_mode, suggested_filename=suggested_name, ) downloaded_path = self._path_from_download_result(result_obj) if downloaded_path is None: log(f"Cannot download item (no provider handler / unsupported target): {title or target}", file=sys.stderr) continue # Allow providers to add/enrich tags and metadata during download. if str(table or "").lower() == "libgen" and provider_sr is not None: try: sr_tags = getattr(provider_sr, "tag", None) if tags_list is None and isinstance(sr_tags, set) and sr_tags: tags_list = sorted([str(t) for t in sr_tags if t]) except Exception: pass try: sr_md = getattr(provider_sr, "full_metadata", None) if isinstance(sr_md, dict) and sr_md: full_metadata = sr_md except Exception: pass try: if isinstance(full_metadata, dict): t = str(full_metadata.get("title") or "").strip() if t: title = t except Exception: pass self._emit_local_file( downloaded_path=downloaded_path, source=str(target) if target else None, title_hint=str(title) if title else downloaded_path.stem, tags_hint=tags_list, media_kind_hint=str(media_kind) if media_kind else None, full_metadata=full_metadata if isinstance(full_metadata, dict) else None, progress=progress, config=config, ) downloaded_count += 1 except DownloadError as e: log(f"Download failed: {e}", file=sys.stderr) except Exception as e: log(f"Error downloading item: {e}", file=sys.stderr) return downloaded_count def _run_impl(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Main download implementation for direct HTTP files.""" progress = PipelineProgress(pipeline_context) try: debug("Starting download-file") # Parse arguments parsed = parse_cmdlet_args(args, self) raw_url = self._normalize_urls(parsed) piped_items = self._collect_piped_items_if_no_urls(result, raw_url) if not raw_url and not piped_items: log("No url or piped items to download", file=sys.stderr) return 1 quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False ia_picker_exit = self._maybe_show_internetarchive_formats( raw_urls=raw_url, piped_items=piped_items, parsed=parsed, config=config, quiet_mode=quiet_mode, ) if ia_picker_exit is not None: return int(ia_picker_exit) # Get output directory final_output_dir = self._resolve_output_dir(parsed, config) if not final_output_dir: return 1 debug(f"Output directory: {final_output_dir}") # If the caller isn't running the shared pipeline Live progress UI (e.g. direct # cmdlet execution), start a minimal local pipeline progress panel so downloads # show consistent, Rich-formatted progress (like download-media). total_items = self._safe_total_items(raw_url, piped_items) preview = self._build_preview(raw_url, piped_items, total_items) progress.ensure_local_ui(label="download-file", total_items=total_items, items_preview=preview) registry = self._load_provider_registry() downloaded_count = 0 urls_downloaded, early_exit = self._process_explicit_urls( raw_urls=raw_url, final_output_dir=final_output_dir, config=config, quiet_mode=quiet_mode, registry=registry, progress=progress, ) downloaded_count += int(urls_downloaded) if early_exit is not None: return int(early_exit) downloaded_count += self._process_provider_items( piped_items=piped_items, final_output_dir=final_output_dir, config=config, quiet_mode=quiet_mode, registry=registry, progress=progress, ) if downloaded_count > 0: debug(f"✓ Successfully processed {downloaded_count} file(s)") return 0 log("No downloads completed", file=sys.stderr) return 1 except Exception as e: log(f"Error in download-file: {e}", file=sys.stderr) return 1 finally: progress.close_local_ui(force_complete=True) def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]: """Resolve the output directory from storage location or config.""" output_dir_arg = parsed.get("path") or parsed.get("output") if output_dir_arg: try: out_path = Path(str(output_dir_arg)).expanduser() out_path.mkdir(parents=True, exist_ok=True) return out_path except Exception as e: log(f"Cannot use output directory {output_dir_arg}: {e}", file=sys.stderr) return None storage_location = parsed.get("storage") # Priority 1: --storage flag if storage_location: try: return SharedArgs.resolve_storage(storage_location) except Exception as e: log(f"Invalid storage location: {e}", file=sys.stderr) return None # Priority 2: Config default output/temp directory try: from config import resolve_output_dir final_output_dir = resolve_output_dir(config) except Exception: final_output_dir = Path.home() / "Downloads" debug(f"Using default directory: {final_output_dir}") # Ensure directory exists try: final_output_dir.mkdir(parents=True, exist_ok=True) except Exception as e: log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr) return None return final_output_dir def _compute_file_hash(self, filepath: Path) -> str: """Compute SHA256 hash of a file.""" import hashlib sha256_hash = hashlib.sha256() with open(filepath, "rb") as f: for byte_block in iter(lambda: f.read(4096), b""): sha256_hash.update(byte_block) return sha256_hash.hexdigest() # Module-level singleton registration CMDLET = Download_File()