"""Generic file/stream downloader. Supports: - Direct HTTP file URLs (PDFs, images, documents; non-yt-dlp) - Piped plugin items (uses plugin.download when available) - Streaming sites via yt-dlp (YouTube, Bandcamp, etc.) """ from __future__ import annotations import sys import re from pathlib import Path from typing import Any, Dict, List, Optional, Sequence from urllib.parse import urlparse from contextlib import AbstractContextManager, nullcontext from API.HTTP import _download_direct_file from SYS.models import DownloadError, DownloadOptions, DownloadMediaResult from SYS.logger import log, debug_panel, is_debug_enabled from SYS.payload_builders import build_file_result_payload, build_table_result_payload from SYS.pipeline_progress import PipelineProgress from SYS.result_table import Table from SYS.rich_display import stderr_console as get_stderr_console from SYS import pipeline as pipeline_context from SYS.metadata import normalize_urls as normalize_url_list from SYS.selection_builder import ( extract_selection_fields, extract_urls_from_selection_args, selection_args_have_url, ) from SYS.utils import sha256_file from . import _shared as sh Cmdlet = sh.Cmdlet CmdletArg = sh.CmdletArg SharedArgs = sh.SharedArgs QueryArg = sh.QueryArg parse_cmdlet_args = sh.parse_cmdlet_args register_url_with_local_library = sh.register_url_with_local_library coerce_to_pipe_object = sh.coerce_to_pipe_object get_field = sh.get_field resolve_target_dir = sh.resolve_target_dir coerce_to_path = sh.coerce_to_path build_pipeline_preview = sh.build_pipeline_preview class Download_File(Cmdlet): """Class-based download-file cmdlet - direct HTTP downloads.""" def __init__(self) -> None: """Initialize download-file cmdlet.""" super().__init__( name="download-file", summary="Download files or streaming media", usage= "download-file [-path DIR] [options] OR @N | download-file [-path DIR|DIR] [options]", alias=["dl-file", "download-http"], arg=[ SharedArgs.URL, SharedArgs.PLUGIN, SharedArgs.PATH, SharedArgs.QUERY, QueryArg( "clip", key="clip", aliases=["range", "section", "sections"], type="string", required=False, description=( "Clip time ranges via -query keyed fields (e.g. clip:1m-2m or clip:00:01-00:10). " "Comma-separated values supported." ), query_only=True, ), CmdletArg( name="item", type="string", description="Item selection for playlists/formats", ), ], detail=[ "Download files directly via HTTP or streaming media via yt-dlp.", "For Internet Archive item pages (archive.org/details/...), shows a selectable file/format list; pick with @N to download.", ], exec=self.run, ) self.register() def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Main execution method.""" try: debug_panel( "download-file", [ ("args", list(args)), ("has_piped_input", bool(result)), ], border_style="cyan", ) except Exception: pass return self._run_impl(result, args, config) @staticmethod def _path_from_download_result(result_obj: Any) -> Path: """Normalize downloader return values to a concrete filesystem path.""" resolved = coerce_to_path(result_obj) if resolved is None: raise DownloadError("Could not determine downloaded file path") return resolved @staticmethod def _selection_run_label( run_args: Sequence[str], *, extra_url_prefixes: Sequence[str] = (), ) -> str: try: urls = extract_urls_from_selection_args( run_args, extra_url_prefixes=extra_url_prefixes, ) if urls: return str(urls[0]) except Exception: pass for arg in run_args: text = str(arg or "").strip() if text and not text.startswith("-"): return text return "item" @staticmethod def _batch_progress_state(config: Optional[Dict[str, Any]]) -> tuple[bool, int, int, str]: if not isinstance(config, dict): return False, 0, 0, "" suppress_nested = bool(config.get("_download_file_suppress_nested_pipe_progress")) if not suppress_nested: return False, 0, 0, "" try: total = max(0, int(config.get("_download_file_batch_total") or 0)) except Exception: total = 0 try: index = max(0, int(config.get("_download_file_batch_index") or 0)) except Exception: index = 0 try: label = str(config.get("_download_file_batch_label") or "").strip() except Exception: label = "" return True, total, index, label @staticmethod def _selection_url_prefixes(registry: Dict[str, Any]) -> List[str]: loader = registry.get("list_selection_url_prefixes") if not callable(loader): return [] try: values = loader() or [] except Exception: return [] return [str(value).strip().lower() for value in values if str(value or "").strip()] def _emit_plugin_items( self, *, items: Sequence[Any], config: Dict[str, Any], ) -> int: emitted = 0 for item in items: if not isinstance(item, dict): continue pipeline_context.emit(item) if item.get("url"): try: pipe_obj = coerce_to_pipe_object(item) register_url_with_local_library(pipe_obj, config) except Exception: pass emitted += 1 return emitted def _consume_plugin_download_result( self, *, result: Any, config: Dict[str, Any], ) -> tuple[int, Optional[int], bool]: if result is None: return 0, None, False if isinstance(result, list): if result and all(isinstance(item, dict) for item in result): return self._emit_plugin_items(items=result, config=config), 0, True return 0, None, False if not isinstance(result, dict): return 0, None, False action = str( result.get("action") or result.get("provider_action") or "" ).strip().lower() if action in {"emit_items", "emit_pipe_objects"}: items = result.get("items") or [] exit_code = result.get("exit_code") emitted = self._emit_plugin_items( items=items if isinstance(items, list) else [], config=config, ) try: normalized_exit = int(exit_code) if exit_code is not None else 0 except Exception: normalized_exit = 0 return emitted, normalized_exit, True if action == "handled": exit_code = result.get("exit_code") try: normalized_exit = int(exit_code) if exit_code is not None else 0 except Exception: normalized_exit = 0 try: downloaded = int(result.get("downloaded") or 0) except Exception: downloaded = 0 return downloaded, normalized_exit, True return 0, None, False def _process_explicit_urls( self, *, raw_urls: Sequence[str], final_output_dir: Path, config: Dict[str, Any], quiet_mode: bool, registry: Dict[str, Any], progress: PipelineProgress, parsed: Dict[str, Any], args: Sequence[str], context_items: Sequence[Any] = (), ) -> tuple[int, Optional[int]]: downloaded_count = 0 suppress_nested, batch_total, batch_index, batch_label = self._batch_progress_state(config) total_urls = len(raw_urls or []) try: if total_urls > 1 and not suppress_nested: progress.begin_pipe(total_items=total_urls, items_preview=list(raw_urls[:5])) except Exception: pass SearchResult = registry.get("SearchResult") get_plugin = registry.get("get_plugin") match_plugin_name_for_url = registry.get("match_plugin_name_for_url") for idx, url in enumerate(raw_urls, 1): try: try: display_total = batch_total if batch_total > 0 else total_urls display_index = batch_index if batch_total > 0 else idx display_label = batch_label or str(url) if display_total > 0: progress.set_status( f"downloading {display_index}/{display_total}: {display_label}" ) except Exception: pass # Check providers first provider_name = None if match_plugin_name_for_url: try: provider_name = match_plugin_name_for_url(str(url)) except Exception: pass provider = None if provider_name and get_plugin: provider = get_plugin(provider_name, config) if provider: try: # Try generic handle_url handled = False if hasattr(provider, "handle_url"): try: handled, path = provider.handle_url(str(url), output_dir=final_output_dir) if handled: extra_meta = None title_hint = None tags_hint: Optional[List[str]] = None media_kind_hint = None path_value: Optional[Any] = path if isinstance(path, dict): provider_action = str( path.get("action") or path.get("provider_action") or "" ).strip().lower() if provider_action == "download_items" or bool(path.get("download_items")): request_metadata = path.get("metadata") or path.get("full_metadata") or {} if not isinstance(request_metadata, dict): request_metadata = {} magnet_id = path.get("magnet_id") or request_metadata.get("magnet_id") if magnet_id is not None: request_metadata.setdefault("magnet_id", magnet_id) if SearchResult is None: continue sr = SearchResult( table=str(provider_name), title=str(path.get("title") or path.get("name") or f"{provider_name} item"), path=str(path.get("path") or path.get("url") or url), full_metadata=request_metadata, ) downloaded_extra = self._download_provider_items( provider=provider, provider_name=str(provider_name), search_result=sr, output_dir=final_output_dir, progress=progress, quiet_mode=quiet_mode, config=config, ) if downloaded_extra: downloaded_count += int(downloaded_extra) continue plugin_downloaded, plugin_exit, plugin_handled = self._consume_plugin_download_result( result=path, config=config, ) if plugin_handled: downloaded_count += plugin_downloaded if plugin_exit is not None and plugin_downloaded == 0: return downloaded_count, int(plugin_exit) if plugin_downloaded: continue path_value = path.get("path") or path.get("file_path") extra_meta = path.get("metadata") or path.get("full_metadata") title_hint = path.get("title") or path.get("name") media_kind_hint = path.get("media_kind") tags_val = path.get("tags") or path.get("tag") if isinstance(tags_val, (list, tuple, set)): tags_hint = [str(t) for t in tags_val if t] elif isinstance(tags_val, str) and tags_val.strip(): tags_hint = [str(tags_val).strip()] if path_value: p_val = Path(str(path_value)) if not title_hint and isinstance(extra_meta, dict): title_hint = extra_meta.get("title") or extra_meta.get("name") self._emit_local_file( downloaded_path=p_val, source=str(url), title_hint=str(title_hint) if title_hint else p_val.stem, tags_hint=tags_hint, media_kind_hint=str(media_kind_hint) if media_kind_hint else "file", full_metadata=extra_meta, progress=progress, config=config, provider_hint=provider_name ) downloaded_count += 1 continue except Exception as e: debug_panel( "download-file provider error", [ ("plugin", provider_name), ("url", url), ("operation", "handle_url"), ("error", e), ], border_style="yellow", ) # Try generic download_url if not already handled if not handled and hasattr(provider, "download_url"): try: res = provider.download_url( str(url), final_output_dir, parsed=parsed, args=list(args), progress=progress, quiet_mode=quiet_mode, context_items=list(context_items or []), ) except TypeError: res = provider.download_url(str(url), final_output_dir) plugin_downloaded, plugin_exit, plugin_handled = self._consume_plugin_download_result( result=res, config=config, ) if plugin_handled: downloaded_count += plugin_downloaded if plugin_exit is not None and plugin_downloaded == 0: return downloaded_count, int(plugin_exit) if plugin_downloaded: continue if res: # Standardize result: can be Path, tuple(Path, Info), or dict with "path" p_val = None extra_meta = None if isinstance(res, (str, Path)): p_val = Path(res) elif isinstance(res, tuple) and len(res) > 0: p_val = Path(res[0]) if len(res) > 1 and isinstance(res[1], dict): extra_meta = res[1] elif isinstance(res, dict): path_candidate = res.get("path") or res.get("file_path") if path_candidate: p_val = Path(path_candidate) extra_meta = res if p_val: self._emit_local_file( downloaded_path=p_val, source=str(url), title_hint=p_val.stem, tags_hint=None, media_kind_hint=extra_meta.get("media_kind") if extra_meta else "file", full_metadata=extra_meta, provider_hint=provider_name, progress=progress, config=config, ) downloaded_count += 1 continue except Exception as e: log(f"Provider {provider_name} error handling {url}: {e}", file=sys.stderr) pass if not handled: continue # Direct Download Fallback result_obj = _download_direct_file( str(url), final_output_dir, quiet=quiet_mode, pipeline_progress=progress, ) downloaded_path = self._path_from_download_result(result_obj) self._emit_local_file( downloaded_path=downloaded_path, source=str(url), title_hint=downloaded_path.stem, tags_hint=[f"title:{downloaded_path.stem}"], media_kind_hint="file", full_metadata=None, progress=progress, config=config, ) downloaded_count += 1 except DownloadError as e: log(f"Download failed for {url}: {e}", file=sys.stderr) except Exception as e: log(f"Error processing {url}: {e}", file=sys.stderr) return downloaded_count, None def _normalize_provider_key(self, value: Optional[Any]) -> Optional[str]: if value is None: return None try: normalized = str(value).strip() except Exception: return None if not normalized: return None if "." in normalized: normalized = normalized.split(".", 1)[0] return normalized.lower() def _provider_key_from_item(self, item: Any) -> Optional[str]: table_hint = get_field(item, "table") key = self._normalize_provider_key(table_hint) if key: return key provider_hint = get_field(item, "provider") key = self._normalize_provider_key(provider_hint) if key: return key return self._normalize_provider_key(get_field(item, "source")) def _expand_provider_items( self, *, piped_items: Sequence[Any], registry: Dict[str, Any], config: Dict[str, Any], ) -> List[Any]: get_search_plugin = registry.get("get_search_plugin") expanded_items: List[Any] = [] for item in piped_items: try: provider_key = self._provider_key_from_item(item) provider = get_search_plugin(provider_key, config) if provider_key and get_search_plugin else None # Generic hook: If provider has expand_item(item), use it. if provider and hasattr(provider, "expand_item") and callable(provider.expand_item): try: sub_items = provider.expand_item(item) if sub_items: expanded_items.extend(sub_items) continue except Exception as e: debug_panel( "download-file expand_item failed", [ ("plugin", provider_key), ("error", e), ], border_style="yellow", ) expanded_items.append(item) except Exception: expanded_items.append(item) return expanded_items def _process_provider_items(self, *, piped_items: Sequence[Any], final_output_dir: Path, config: Dict[str, Any], quiet_mode: bool, registry: Dict[str, Any], progress: PipelineProgress, ) -> tuple[int, int]: downloaded_count = 0 queued_magnet_submissions = 0 get_search_plugin = registry.get("get_search_plugin") SearchResult = registry.get("SearchResult") expanded_items = self._expand_provider_items( piped_items=piped_items, registry=registry, config=config ) total_items = len(expanded_items) processed_items = 0 try: if total_items: progress.set_percent(0) except Exception: pass for idx, item in enumerate(expanded_items, 1): try: label = "item" table = get_field(item, "table") title = get_field(item, "title") target = get_field(item, "path") or get_field(item, "url") media_kind = get_field(item, "media_kind") tags_val = get_field(item, "tag") tags_list: Optional[List[str]] if isinstance(tags_val, (list, set)): tags_list = sorted([str(t) for t in tags_val if t]) else: tags_list = None full_metadata = get_field(item, "full_metadata") if ((not full_metadata) and isinstance(item, dict) and isinstance(item.get("extra"), dict)): extra_md = item["extra"].get("full_metadata") if isinstance(extra_md, dict): full_metadata = extra_md try: label = title or target label = str(label or "item").strip() if total_items: pct = int(round((processed_items / max(1, total_items)) * 100)) progress.set_percent(pct) progress.set_status( f"downloading {processed_items + 1}/{total_items}: {label}" ) except Exception: pass transfer_label = label # If this looks like a provider item and providers are available, prefer provider.download() downloaded_path: Optional[Path] = None attempted_provider_download = False provider_sr = None provider_obj = None provider_key = self._provider_key_from_item(item) if provider_key and get_search_plugin and SearchResult: # Reuse helper to derive the provider key from table/provider/source hints. provider_obj = get_search_plugin(provider_key, config) if provider_obj is not None and getattr(provider_obj, "prefers_transfer_progress", False): try: progress.begin_transfer(label=transfer_label, total=None) except Exception: pass if provider_obj is not None: attempted_provider_download = True sr = SearchResult( table=str(table), title=str(title or "Unknown"), path=str(target or ""), tag=set(tags_list) if tags_list else set(), media_kind=str(media_kind or "file"), full_metadata=full_metadata if isinstance(full_metadata, dict) else {}, ) # Preserve plugin-managed output structure when a plugin encodes nested paths. output_dir = final_output_dir # Generic: allow provider to strict output_dir? # Using default output_dir for now. downloaded_path = provider_obj.download(sr, output_dir) provider_sr = sr if downloaded_path is None: try: downloaded_extra = self._download_provider_items( provider=provider_obj, provider_name=str(provider_key), search_result=sr, output_dir=output_dir, progress=progress, quiet_mode=quiet_mode, config=config, ) except Exception: downloaded_extra = 0 if downloaded_extra: downloaded_count += int(downloaded_extra) continue # Fallback: if we have a direct HTTP URL and no provider successfully handled it if (downloaded_path is None and not attempted_provider_download and isinstance(target, str) and target.startswith("http")): suggested_name = str(title).strip() if title is not None else None result_obj = _download_direct_file( target, final_output_dir, quiet=quiet_mode, suggested_filename=suggested_name, pipeline_progress=progress, ) downloaded_path = coerce_to_path(result_obj) if downloaded_path is None: log( f"Cannot download item (no provider handler / unsupported target): {title or target}", file=sys.stderr, ) continue # Allow providers to add/enrich tags and metadata during download. if provider_sr is not None: try: sr_md = getattr(provider_sr, "full_metadata", None) if isinstance(sr_md, dict) and sr_md: full_metadata = sr_md except Exception: pass try: if isinstance(full_metadata, dict): t = str(full_metadata.get("title") or "").strip() if t: title = t except Exception: pass # Prefer tags from the search result object if the provider mutated them during download. try: sr_tags = getattr(provider_sr, "tag", None) if isinstance(sr_tags, (set, list)) and sr_tags: # Re-sync tags_list with the potentially enriched provider_sr.tag tags_list = sorted([str(t) for t in sr_tags if t]) except Exception: pass self._emit_local_file( downloaded_path=downloaded_path, source=str(target) if target else None, title_hint=str(title) if title else downloaded_path.stem, tags_hint=tags_list, media_kind_hint=str(media_kind) if media_kind else None, full_metadata=full_metadata if isinstance(full_metadata, dict) else None, progress=progress, config=config, provider_hint=provider_key ) downloaded_count += 1 except DownloadError as e: log(f"Download failed: {e}", file=sys.stderr) except Exception as e: log(f"Error downloading item: {e}", file=sys.stderr) finally: if provider_obj is not None and getattr(provider_obj, "prefers_transfer_progress", False): try: progress.finish_transfer(label=transfer_label) except Exception: pass processed_items += 1 try: pct = int(round((processed_items / max(1, total_items)) * 100)) progress.set_percent(pct) if processed_items >= total_items: progress.clear_status() except Exception: pass return downloaded_count, queued_magnet_submissions def _download_provider_items( self, *, provider: Any, provider_name: str, search_result: Any, output_dir: Path, progress: PipelineProgress, quiet_mode: bool, config: Dict[str, Any], ) -> int: if provider is None or not hasattr(provider, "download_items"): return 0 def _on_emit(path: Path, file_url: str, relpath: str, metadata: Dict[str, Any]) -> None: title_hint = None try: title_hint = metadata.get("name") or relpath except Exception: title_hint = relpath title_hint = title_hint or (Path(path).name if path else "download") self._emit_local_file( downloaded_path=path, source=file_url, title_hint=title_hint, tags_hint=None, media_kind_hint="file", full_metadata=metadata if isinstance(metadata, dict) else None, progress=progress, config=config, provider_hint=provider_name, ) try: downloaded_count = provider.download_items( search_result, output_dir, emit=_on_emit, progress=progress, quiet_mode=quiet_mode, path_from_result=coerce_to_path, config=config, ) except TypeError: downloaded_count = provider.download_items( search_result, output_dir, emit=_on_emit, progress=progress, quiet_mode=quiet_mode, path_from_result=coerce_to_path, ) except Exception as exc: log(f"Provider {provider_name} download_items error: {exc}", file=sys.stderr) return 0 try: return int(downloaded_count or 0) except Exception: return 0 def _emit_local_file( self, *, downloaded_path: Path, source: Optional[str], title_hint: Optional[str], tags_hint: Optional[List[str]], media_kind_hint: Optional[str], full_metadata: Optional[Dict[str, Any]], progress: PipelineProgress, config: Dict[str, Any], provider_hint: Optional[str] = None, ) -> None: title_val = (title_hint or downloaded_path.stem or "Unknown").strip() or downloaded_path.stem hash_value = sha256_file(downloaded_path) notes: Optional[Dict[str, str]] = None try: if isinstance(full_metadata, dict): # Providers attach pre-built notes under the generic "_notes" key # (e.g. Tidal sets {"lyric": subtitles} during download enrichment). # This keeps provider-specific metadata handling inside the provider. _provider_notes = full_metadata.get("_notes") if isinstance(_provider_notes, dict) and _provider_notes: notes = {str(k): str(v) for k, v in _provider_notes.items() if k and v} except Exception: notes = None tag: List[str] = [] if tags_hint: tag.extend([str(t) for t in tags_hint if t]) if not any(str(t).lower().startswith("title:") for t in tag): tag.insert(0, f"title:{title_val}") payload: Dict[str, Any] = { "path": str(downloaded_path), "hash": hash_value, "title": title_val, "action": "cmdlet:download-file", "download_mode": "file", "store": "local", "media_kind": media_kind_hint or "file", "tag": tag, } if provider_hint: payload["plugin"] = str(provider_hint) payload["provider"] = str(provider_hint) if full_metadata: payload["metadata"] = full_metadata if notes: payload["notes"] = notes if source and str(source).startswith("http"): payload["url"] = source elif source: payload["source_url"] = source pipeline_context.emit(payload) def _maybe_render_download_details(self, *, config: Dict[str, Any]) -> None: try: stage_ctx = pipeline_context.get_stage_context() except Exception: stage_ctx = None is_last_stage = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False)) if not is_last_stage: return try: quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False except Exception: quiet_mode = False if quiet_mode: return emitted_items: List[Any] = [] try: emitted_items = list(getattr(stage_ctx, "emits", None) or []) if stage_ctx is not None else [] except Exception: emitted_items = [] if not emitted_items: return # Stop the live pipeline progress UI before rendering the details panel. try: live_progress = pipeline_context.get_live_progress() except Exception: live_progress = None if live_progress is not None: try: pipe_idx = getattr(stage_ctx, "pipe_index", None) if stage_ctx is not None else None if isinstance(pipe_idx, int): live_progress.finish_pipe(int(pipe_idx), force_complete=True) except Exception: pass try: live_progress.stop() except Exception: pass try: if hasattr(pipeline_context, "set_live_progress"): pipeline_context.set_live_progress(None) except Exception: pass try: subject = emitted_items[0] if len(emitted_items) == 1 else list(emitted_items) # Use helper to display items and make them @-selectable from ._shared import display_and_persist_items display_and_persist_items(list(emitted_items), title="Result", subject=subject) except Exception: pass # Prevent CLI from printing a redundant table after the detail panels. try: if stage_ctx is not None: stage_ctx.emits = [] except Exception: pass @staticmethod def _load_provider_registry() -> Dict[str, Any]: """Lightweight accessor for plugin helpers without hard dependencies.""" try: from ProviderCore import registry as provider_registry # type: ignore from ProviderCore.base import SearchResult # type: ignore return { "get_plugin": getattr(provider_registry, "get_plugin", None), "get_search_plugin": getattr(provider_registry, "get_search_plugin", None), "match_plugin_name_for_url": getattr(provider_registry, "match_plugin_name_for_url", None), "list_selection_url_prefixes": getattr(provider_registry, "list_selection_url_prefixes", None), "SearchResult": SearchResult, } except Exception: return { "get_plugin": None, "get_search_plugin": None, "match_plugin_name_for_url": None, "list_selection_url_prefixes": None, "SearchResult": None, } @classmethod def _extract_hash_from_search_hit(cls, hit: Any) -> Optional[str]: if not isinstance(hit, dict): return None for key in ("hash", "hash_hex", "file_hash", "hydrus_hash"): v = hit.get(key) normalized = sh.normalize_hash(str(v) if v is not None else None) if normalized: return normalized return None @classmethod def _find_existing_hash_for_url( cls, storage: Any, canonical_url: str, *, hydrus_available: bool ) -> Optional[str]: if storage is None or not canonical_url: return None try: from Store.HydrusNetwork import HydrusNetwork except Exception: HydrusNetwork = None # type: ignore try: backend_names = list(storage.list_searchable_backends() or []) except Exception: backend_names = [] for backend_name in backend_names: try: backend = storage[backend_name] except Exception: continue try: if str(backend_name).strip().lower() == "temp": continue except Exception: pass try: if HydrusNetwork is not None and isinstance(backend, HydrusNetwork) and not hydrus_available: continue except Exception: pass try: if HydrusNetwork is not None and isinstance(backend, HydrusNetwork): hashes = backend.find_hashes_by_url(canonical_url) or [] for existing_hash in hashes: normalized = sh.normalize_hash(existing_hash) if normalized: return normalized continue hits = backend.search(f"url:{canonical_url}", limit=5) or [] except Exception: hits = [] for hit in hits: extracted = cls._extract_hash_from_search_hit(hit) if extracted: return extracted return None @staticmethod def _format_timecode(seconds: int, *, force_hours: bool) -> str: total = max(0, int(seconds)) minutes, secs = divmod(total, 60) hours, minutes = divmod(minutes, 60) if force_hours: return f"{hours:02d}:{minutes:02d}:{secs:02d}" return f"{minutes:02d}:{secs:02d}" @staticmethod def _rebase_subtitle_timestamp_text(text: str, offset_seconds: int) -> str: if not text: return text try: offset_value = float(offset_seconds) except Exception: return text if offset_value <= 0: return text timestamp_re = re.compile(r"(?(?:\d{2}:)?\d{2}:\d{2}(?:[\.,]\d{1,3})?)(?!\d)") def _shift(match: re.Match[str]) -> str: original = str(match.group("ts") or "") if not original: return original frac_sep = "." frac_digits = 0 base = original frac_seconds = 0.0 if "." in original: base, frac = original.split(".", 1) frac_sep = "." frac_digits = len(frac) try: frac_seconds = float(f"0.{frac}") if frac else 0.0 except Exception: frac_seconds = 0.0 elif "," in original: base, frac = original.split(",", 1) frac_sep = "," frac_digits = len(frac) try: frac_seconds = float(f"0.{frac}") if frac else 0.0 except Exception: frac_seconds = 0.0 parts = base.split(":") if len(parts) == 3: hours_s, minutes_s, seconds_s = parts include_hours = True elif len(parts) == 2: hours_s = "0" minutes_s, seconds_s = parts include_hours = False else: return original try: total = ( (int(hours_s) * 3600) + (int(minutes_s) * 60) + int(seconds_s) + frac_seconds + offset_value ) except Exception: return original total = max(0.0, total) whole_seconds = int(total) fraction = total - whole_seconds hours, remainder = divmod(whole_seconds, 3600) minutes, seconds = divmod(remainder, 60) if frac_digits > 0: scale = 10 ** frac_digits frac_value = int(round(fraction * scale)) if frac_value >= scale: frac_value = 0 seconds += 1 if seconds >= 60: seconds = 0 minutes += 1 if minutes >= 60: minutes = 0 hours += 1 frac_text = f"{frac_value:0{frac_digits}d}" if include_hours or hours > 0: return f"{hours:02d}:{minutes:02d}:{seconds:02d}{frac_sep}{frac_text}" return f"{minutes:02d}:{seconds:02d}{frac_sep}{frac_text}" if include_hours or hours > 0: return f"{hours:02d}:{minutes:02d}:{seconds:02d}" return f"{minutes:02d}:{seconds:02d}" try: return timestamp_re.sub(_shift, str(text)) except Exception: return text @classmethod def _format_clip_range(cls, start_s: int, end_s: int) -> str: force_hours = bool(start_s >= 3600 or end_s >= 3600) return f"{cls._format_timecode(start_s, force_hours=force_hours)}-{cls._format_timecode(end_s, force_hours=force_hours)}" @classmethod def _apply_clip_decorations( cls, pipe_objects: List[Dict[str, Any]], clip_ranges: List[tuple[int, int]], *, source_king_hash: Optional[str] ) -> None: if not pipe_objects or len(pipe_objects) != len(clip_ranges): return for po, (start_s, end_s) in zip(pipe_objects, clip_ranges): clip_range = cls._format_clip_range(start_s, end_s) clip_tag = f"clip:{clip_range}" po["title"] = clip_tag tags = po.get("tag") if not isinstance(tags, list): tags = [] tags = [t for t in tags if not str(t).strip().lower().startswith("title:")] tags = [t for t in tags if not str(t).strip().lower().startswith("relationship:")] tags.insert(0, f"title:{clip_tag}") if clip_tag not in tags: tags.append(clip_tag) po["tag"] = tags notes = po.get("notes") if isinstance(notes, dict): sub_text = notes.get("sub") if isinstance(sub_text, str) and sub_text.strip(): notes["sub"] = cls._rebase_subtitle_timestamp_text(sub_text, start_s) po["notes"] = notes if len(pipe_objects) < 2: return hashes: List[str] = [] for po in pipe_objects: h_val = sh.normalize_hash(str(po.get("hash") or "")) hashes.append(h_val or "") king_hash = sh.normalize_hash(source_king_hash) if source_king_hash else None if not king_hash: king_hash = hashes[0] if hashes and hashes[0] else None if not king_hash: return alt_hashes: List[str] = [h for h in hashes if h and h != king_hash] if not alt_hashes: return for po in pipe_objects: po["relationships"] = {"king": [king_hash], "alt": list(alt_hashes)} def _run_impl( self, result: Any, args: Sequence[str], config: Dict[str, Any] ) -> int: """Main download implementation for direct HTTP files.""" progress = PipelineProgress(pipeline_context) prev_progress = None had_progress_key = False try: # Allow providers to tap into the active PipelineProgress (optional). try: if isinstance(config, dict): had_progress_key = "_pipeline_progress" in config prev_progress = config.get("_pipeline_progress") config["_pipeline_progress"] = progress except Exception: pass # Parse arguments parsed = parse_cmdlet_args(args, self) registry = self._load_provider_registry() selection_url_prefixes = self._selection_url_prefixes(registry) # Resolve URLs from -url or positional arguments url_candidates = parsed.get("url") or [ a for a in parsed.get("args", []) if isinstance(a, str) and ( a.startswith("http") or "://" in a or ":" in a or "🧲" in a and not a.startswith("-") ) ] raw_url = normalize_url_list(url_candidates) quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False # Fallback to piped items if no explicit URLs provided piped_items = [] if not raw_url: piped_items = sh.normalize_result_items( result, include_falsey_single=True, ) # Handle TABLE_AUTO_STAGES routing: if a piped item has _selection_args, # re-invoke download-file with those args instead of processing the PipeObject itself. if piped_items and not raw_url: selection_runs: List[List[str]] = [] residual_items: List[Any] = [] for item in piped_items: handled = False try: normalized_args, _normalized_action, item_url = extract_selection_fields( item, extra_url_prefixes=selection_url_prefixes, ) if normalized_args: if selection_args_have_url( normalized_args, extra_url_prefixes=selection_url_prefixes, ): selection_runs.append(list(normalized_args)) handled = True elif item_url: selection_runs.append([str(item_url)] + list(normalized_args)) handled = True except Exception as e: debug_panel( "download-file selection args failed", [("error", e)], border_style="yellow", ) handled = False if not handled: residual_items.append(item) if selection_runs: selection_urls: List[str] = [] for run_args in selection_runs: for u in extract_urls_from_selection_args( run_args, extra_url_prefixes=selection_url_prefixes, ): if u not in selection_urls: selection_urls.append(u) original_skip_preflight = None original_timeout = None original_skip_direct = None original_batch_total = None original_batch_index = None original_batch_label = None original_suppress_nested = None try: if isinstance(config, dict): original_skip_preflight = config.get("_skip_url_preflight") original_timeout = config.get("_pipeobject_timeout_seconds") original_skip_direct = config.get("_skip_direct_on_streaming_failure") original_batch_total = config.get("_download_file_batch_total") original_batch_index = config.get("_download_file_batch_index") original_batch_label = config.get("_download_file_batch_label") original_suppress_nested = config.get("_download_file_suppress_nested_pipe_progress") except Exception: original_skip_preflight = None original_timeout = None original_batch_total = None original_batch_index = None original_batch_label = None original_suppress_nested = None try: if selection_urls: # Skip Duplicate Preflight on selection re-entry: # User has already seen the table/status and explicitly selected an item. # Skipping this reduces DB load and latency. if isinstance(config, dict): config["_skip_url_preflight"] = True config["_skip_direct_on_streaming_failure"] = True if isinstance(config, dict) and config.get("_pipeobject_timeout_seconds") is None: # Use a generous default for individual items config["_pipeobject_timeout_seconds"] = 600 successes = 0 failures = 0 last_code = 0 total_selection = len(selection_runs) preview_items = list(selection_urls[:5]) or [ self._selection_run_label( run_args, extra_url_prefixes=selection_url_prefixes, ) for run_args in selection_runs[:5] ] try: progress.ensure_local_ui( label="download-file", total_items=total_selection, items_preview=preview_items, ) except Exception: pass try: progress.begin_pipe( total_items=total_selection, items_preview=preview_items, ) except Exception: pass for idx, run_args in enumerate(selection_runs, 1): run_label = self._selection_run_label( run_args, extra_url_prefixes=selection_url_prefixes, ) try: progress.set_status( f"downloading {idx}/{total_selection}: {run_label}" ) except Exception: pass try: if isinstance(config, dict): config["_download_file_batch_total"] = total_selection config["_download_file_batch_index"] = idx config["_download_file_batch_label"] = run_label config["_download_file_suppress_nested_pipe_progress"] = True except Exception: pass exit_code = self._run_impl(None, run_args, config) if exit_code == 0: successes += 1 else: failures += 1 last_code = exit_code piped_items = residual_items if not piped_items: if successes > 0: return 0 return last_code or 1 finally: try: if isinstance(config, dict): if original_skip_preflight is None: config.pop("_skip_url_preflight", None) else: config["_skip_url_preflight"] = original_skip_preflight if original_timeout is None: config.pop("_pipeobject_timeout_seconds", None) else: config["_pipeobject_timeout_seconds"] = original_timeout if original_skip_direct is None: config.pop("_skip_direct_on_streaming_failure", None) else: config["_skip_direct_on_streaming_failure"] = original_skip_direct if original_batch_total is None: config.pop("_download_file_batch_total", None) else: config["_download_file_batch_total"] = original_batch_total if original_batch_index is None: config.pop("_download_file_batch_index", None) else: config["_download_file_batch_index"] = original_batch_index if original_batch_label is None: config.pop("_download_file_batch_label", None) else: config["_download_file_batch_label"] = original_batch_label if original_suppress_nested is None: config.pop("_download_file_suppress_nested_pipe_progress", None) else: config["_download_file_suppress_nested_pipe_progress"] = original_suppress_nested except Exception: pass had_piped_input = False try: if isinstance(result, list): had_piped_input = bool(result) else: had_piped_input = bool(result) except Exception: had_piped_input = False # UX: In piped mode, allow a single positional arg to be the destination directory. # Example: @1-4 | download-file "C:\\Users\\Me\\Downloads\\yoyo" if (had_piped_input and raw_url and len(raw_url) == 1 and (not parsed.get("path"))): candidate = str(raw_url[0] or "").strip() low = candidate.lower() looks_like_url = low.startswith( ("http://", "https://", "ftp://", "magnet:", "torrent:") + tuple(selection_url_prefixes) ) looks_like_provider = ( ":" in candidate and not candidate.startswith( ("http:", "https:", "ftp:", "ftps:", "file:") + tuple(selection_url_prefixes) ) ) looks_like_windows_path = ( (len(candidate) >= 2 and candidate[1] == ":") or candidate.startswith("\\\\") or candidate.startswith("\\") or candidate.endswith(("\\", "/")) ) if (not looks_like_url) and ( not looks_like_provider) and looks_like_windows_path: parsed["path"] = candidate raw_url = [] piped_items = self._collect_piped_items_if_no_urls(result, raw_url) if not raw_url and not piped_items: log("No url or piped items to download", file=sys.stderr) return 1 # Provider-pre-check (e.g. Internet Archive format picker) picker_result = self._maybe_show_provider_picker( raw_urls=raw_url, piped_items=piped_items, parsed=parsed, config=config, registry=registry, ) if picker_result is not None: return int(picker_result) # Re-check picker if partial processing occurred picker_result = self._maybe_show_provider_picker( raw_urls=raw_url, piped_items=piped_items, parsed=parsed, config=config, registry=registry, ) if picker_result is not None: return int(picker_result) # Get output directory final_output_dir = resolve_target_dir(parsed, config) if not final_output_dir: return 1 try: debug_panel( "download-file plan", [ ("output_dir", final_output_dir), ("remaining_urls", len(raw_url)), ("piped_items", len(piped_items) if isinstance(piped_items, list) else int(bool(piped_items))), ], border_style="cyan", ) except Exception: pass # If the caller isn't running the shared pipeline Live progress UI (e.g. direct # cmdlet execution), start a minimal local pipeline progress panel so downloads # show consistent, Rich-formatted progress (like download-media). total_items = max(1, len(raw_url or []) + len(piped_items or [])) preview = build_pipeline_preview(raw_url, piped_items) progress.ensure_local_ui( label="download-file", total_items=total_items, items_preview=preview ) downloaded_count = 0 urls_downloaded, early_exit = self._process_explicit_urls( raw_urls=raw_url, final_output_dir=final_output_dir, config=config, quiet_mode=quiet_mode, registry=registry, progress=progress, parsed=parsed, args=args, context_items=(result if isinstance(result, list) else ([result] if result else [])), ) downloaded_count += int(urls_downloaded) if early_exit is not None: return int(early_exit) provider_downloaded, magnet_submissions = self._process_provider_items( piped_items=piped_items, final_output_dir=final_output_dir, config=config, quiet_mode=quiet_mode, registry=registry, progress=progress, ) downloaded_count += provider_downloaded if downloaded_count > 0 or magnet_submissions > 0: # Render detail panels for downloaded items when download-file is the last stage. self._maybe_render_download_details(config=config) return 0 log("No downloads completed", file=sys.stderr) return 1 except Exception as e: log(f"Error in download-file: {e}", file=sys.stderr) return 1 finally: try: if isinstance(config, dict): if had_progress_key: config["_pipeline_progress"] = prev_progress else: config.pop("_pipeline_progress", None) except Exception: pass progress.close_local_ui(force_complete=True) def _maybe_show_provider_picker( self, *, raw_urls: Sequence[str], piped_items: Sequence[Any], parsed: Dict[str, Any], config: Dict[str, Any], registry: Dict[str, Any], ) -> Optional[int]: """Generic hook for providers to show a selection table (e.g. Internet Archive format picker).""" total_inputs = len(raw_urls or []) + len(piped_items or []) if total_inputs != 1: return None target_url = None if raw_urls: target_url = str(raw_urls[0]) elif piped_items: target_url = str(get_field(piped_items[0], "path") or get_field(piped_items[0], "url") or "") if not target_url: return None match_provider_name_for_url = registry.get("match_plugin_name_for_url") get_provider = registry.get("get_plugin") provider_name = None if match_provider_name_for_url: try: provider_name = match_provider_name_for_url(target_url) except Exception: pass if provider_name and get_provider: provider = get_provider(provider_name, config) if provider and hasattr(provider, "maybe_show_picker"): try: quiet_mode = bool(config.get("_quiet_background_output")) res = provider.maybe_show_picker( url=target_url, item=piped_items[0] if piped_items else None, parsed=parsed, config=config, quiet_mode=quiet_mode, ) if res is not None: return int(res) except Exception as e: debug_panel( "download-file picker error", [ ("plugin", provider_name), ("url", target_url), ("error", e), ], border_style="yellow", ) return None # Module-level singleton registration CMDLET = Download_File()