"""Generic file/stream downloader. Supports: - Direct HTTP file URLs (PDFs, images, documents; non-yt-dlp) - Piped provider items (uses provider.download when available) - Streaming sites via yt-dlp (YouTube, Bandcamp, etc.) """ from __future__ import annotations import sys import re from pathlib import Path from typing import Any, Dict, List, Optional, Sequence from urllib.parse import urlparse from contextlib import AbstractContextManager, nullcontext from API.alldebrid import is_magnet_link from Provider import internetarchive as ia_provider from Provider import alldebrid as ad_provider from Provider import openlibrary as ol_provider from SYS.download import DownloadError, _download_direct_file from SYS.models import DownloadOptions, DownloadMediaResult from SYS.logger import log, debug from SYS.pipeline_progress import PipelineProgress from SYS.result_table import ResultTable from SYS.rich_display import stderr_console as get_stderr_console from SYS import pipeline as pipeline_context from SYS.utils import sha256_file from rich.prompt import Confirm from tool.ytdlp import ( YtDlpTool, _best_subtitle_sidecar, _download_with_timeout, _format_chapters_note, _read_text_file, is_url_supported_by_ytdlp, list_formats, probe_url, ) from . import _shared as sh Cmdlet = sh.Cmdlet CmdletArg = sh.CmdletArg SharedArgs = sh.SharedArgs QueryArg = sh.QueryArg parse_cmdlet_args = sh.parse_cmdlet_args register_url_with_local_library = sh.register_url_with_local_library coerce_to_pipe_object = sh.coerce_to_pipe_object get_field = sh.get_field class Download_File(Cmdlet): """Class-based download-file cmdlet - direct HTTP downloads.""" def __init__(self) -> None: """Initialize download-file cmdlet.""" super().__init__( name="download-file", summary="Download files or streaming media", usage= "download-file [-path DIR] [options] OR @N | download-file [-path DIR|DIR] [options]", alias=["dl-file", "download-http"], arg=[ SharedArgs.URL, SharedArgs.PATH, SharedArgs.QUERY, # Prefer -path for output directory to match other cmdlets; keep -output for backwards compatibility. CmdletArg( name="-output", type="string", alias="o", description="(deprecated) Output directory (use -path instead)", ), CmdletArg( name="audio", type="flag", alias="a", description="Download audio only (yt-dlp)", ), CmdletArg( name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector", ), QueryArg( "clip", key="clip", aliases=["range", "section", "sections"], type="string", required=False, description=( "Clip time ranges via -query keyed fields (e.g. clip:1m-2m or clip:00:01-00:10). " "Comma-separated values supported." ), query_only=True, ), CmdletArg( name="item", type="string", description="Item selection for playlists/formats", ), ], detail=[ "Download files directly via HTTP or streaming media via yt-dlp.", "For Internet Archive item pages (archive.org/details/...), shows a selectable file/format list; pick with @N to download.", ], exec=self.run, ) self.register() def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Main execution method.""" return self._run_impl(result, args, config) @staticmethod def _normalize_urls(parsed: Dict[str, Any]) -> List[str]: raw_url = parsed.get("url", []) if isinstance(raw_url, str): raw_url = [raw_url] expanded_urls: List[str] = [] for u in raw_url or []: if u is None: continue s = str(u).strip() if not s: continue if "," in s: parts = [p.strip() for p in s.split(",")] expanded_urls.extend([p for p in parts if p]) else: expanded_urls.append(s) return expanded_urls @staticmethod def _collect_piped_items_if_no_urls(result: Any, raw_urls: Sequence[str]) -> List[Any]: if raw_urls: return [] if isinstance(result, list): return list(result) if result: return [result] return [] @staticmethod def _safe_total_items(raw_urls: Sequence[str], piped_items: Sequence[Any]) -> int: try: return int(len(raw_urls or []) + len(piped_items or [])) except Exception: return 1 @staticmethod def _build_preview( raw_urls: Sequence[str], piped_items: Sequence[Any], total_items: int ) -> List[Any]: try: preview: List[Any] = [] preview.extend(list(raw_urls or [])[:max(0, total_items)]) if len(preview) < total_items: preview.extend( list(piped_items or [])[:max(0, total_items - len(preview))] ) return preview except Exception: return [] @staticmethod def _load_provider_registry() -> Dict[str, Any]: try: from ProviderCore.registry import ( get_search_provider as _get_search_provider, get_provider as _get_provider, match_provider_name_for_url as _match_provider_name_for_url, SearchResult as _SearchResult, ) return { "get_search_provider": _get_search_provider, "get_provider": _get_provider, "match_provider_name_for_url": _match_provider_name_for_url, "SearchResult": _SearchResult, } except Exception: return { "get_search_provider": None, "get_provider": None, "match_provider_name_for_url": None, "SearchResult": None, } @staticmethod def _path_from_download_result(result_obj: Any) -> Path: file_path = None if hasattr(result_obj, "path"): file_path = getattr(result_obj, "path") elif isinstance(result_obj, dict): file_path = result_obj.get("path") if not file_path: file_path = str(result_obj) return Path(str(file_path)) def _emit_local_file( self, *, downloaded_path: Path, source: Optional[str], title_hint: Optional[str], tags_hint: Optional[List[str]], media_kind_hint: Optional[str], full_metadata: Optional[Dict[str, Any]], progress: PipelineProgress, config: Dict[str, Any], provider_hint: Optional[str] = None, ) -> None: title_val = (title_hint or downloaded_path.stem or "Unknown").strip() or downloaded_path.stem hash_value = self._compute_file_hash(downloaded_path) tag: List[str] = [] if tags_hint: tag.extend([str(t) for t in tags_hint if t]) if not any(str(t).lower().startswith("title:") for t in tag): tag.insert(0, f"title:{title_val}") payload: Dict[str, Any] = { "path": str(downloaded_path), "hash": hash_value, "title": title_val, "action": "cmdlet:download-file", "download_mode": "file", "store": "local", "media_kind": media_kind_hint or "file", "tag": tag, } if provider_hint: payload["provider"] = str(provider_hint) if full_metadata: payload["full_metadata"] = full_metadata if source and str(source).startswith("http"): payload["url"] = source elif source: payload["source_url"] = source pipeline_context.emit(payload) # When running with a local progress UI (standalone cmdlet), ensure # the pipe advances on emit. progress.on_emit(payload) # Automatically register url with local library if payload.get("url"): pipe_obj = coerce_to_pipe_object(payload) register_url_with_local_library(pipe_obj, config) def _process_explicit_urls( self, *, raw_urls: Sequence[str], final_output_dir: Path, config: Dict[str, Any], quiet_mode: bool, registry: Dict[str, Any], progress: PipelineProgress, ) -> tuple[int, Optional[int]]: downloaded_count = 0 SearchResult = registry.get("SearchResult") get_provider = registry.get("get_provider") match_provider_name_for_url = registry.get("match_provider_name_for_url") for url in raw_urls: try: debug(f"Processing URL: {url}") # Telegram message URLs are not direct files; route through the provider. try: parsed_url = urlparse(str(url)) host = (parsed_url.hostname or "").lower().strip() except Exception: host = "" is_telegram = host in {"t.me", "telegram.me"} or host.endswith(".t.me") if is_telegram and SearchResult: try: from ProviderCore.registry import get_provider as _get_provider except Exception: _get_provider = None if _get_provider is None: raise DownloadError("Telegram provider registry not available") provider = _get_provider("telegram", config) if provider is None: raise DownloadError( "Telegram provider not configured or not available (check telethon/app_id/api_hash)" ) sr = SearchResult( table="telegram", title=str(url), path=str(url), full_metadata={} ) downloaded_path = None telegram_info: Optional[Dict[str, Any]] = None if hasattr(provider, "download_url"): try: downloaded_path, telegram_info = provider.download_url(str(url), final_output_dir) # type: ignore[attr-defined] except Exception as exc: raise DownloadError(str(exc)) else: downloaded_path = provider.download(sr, final_output_dir) if not downloaded_path: raise DownloadError("Telegram download returned no file") channel = "" post = None if isinstance(telegram_info, dict): try: chat_info_raw = telegram_info.get("chat") msg_info_raw = telegram_info.get("message") chat_info: Dict[str, Any] = ( chat_info_raw if isinstance(chat_info_raw, dict) else {} ) msg_info: Dict[str, Any] = ( msg_info_raw if isinstance(msg_info_raw, dict) else {} ) channel = str( chat_info.get("title") or chat_info.get("username") or "" ).strip() post = msg_info.get("id") except Exception: channel = "" post = None title_hint = None tg_tags: List[str] = [] if channel: tg_tags.append(f"channel:{channel}") if post is not None: tg_tags.append(f"post:{post}") if channel and post is not None: title_hint = f"{channel} {post}" elif post is not None: title_hint = f"post:{post}" else: title_hint = downloaded_path.stem self._emit_local_file( downloaded_path=downloaded_path, source=str(url), title_hint=title_hint, tags_hint=tg_tags, media_kind_hint="file", full_metadata=telegram_info, provider_hint="telegram", progress=progress, config=config, ) downloaded_count += 1 debug("✓ Downloaded via Telegram provider and emitted") continue # Provider URL routing (e.g. OpenLibrary book pages). provider_name = None if match_provider_name_for_url is not None: try: provider_name = match_provider_name_for_url(str(url)) except Exception: provider_name = None # Heuristic: LibGen often uses landing pages like edition.php/file.php. # These should never be treated as direct file URLs. if not provider_name: try: p = urlparse(str(url)) h = (p.hostname or "").strip().lower() path = (p.path or "").strip().lower() if "libgen" in h and any(x in path for x in ( "/edition.php", "/file.php", "/ads.php", "/get.php", "/series.php", )): provider_name = "libgen" except Exception: pass if (provider_name and str(provider_name).lower() == "alldebrid" and is_magnet_link(str(url))): magnet_spec = ad_provider.resolve_magnet_spec(str(url)) if magnet_spec: _, magnet_id = ad_provider.prepare_magnet(magnet_spec, config) if magnet_id is not None: downloaded_count += 1 continue if provider_name and get_provider is not None and SearchResult is not None: # OpenLibrary URLs should be handled by the OpenLibrary provider. if provider_name == "openlibrary": provider = get_provider("openlibrary", config) if provider is None: raise DownloadError( "OpenLibrary provider not configured or not available" ) edition_id = ol_provider.edition_id_from_url(str(url)) title_hint = ol_provider.title_hint_from_url_slug(str(url)) download_payload: Optional[Dict[str, Any]] = None try: ui, _pipe_idx = progress.ui_and_pipe_index() progress_cb = None if ui is not None: # High-level steps for OpenLibrary borrow/download flow. progress.begin_steps(5) def _progress( kind: str, done: int, total: Optional[int], label: str ) -> None: # kind: # - "step": advance step text # - "pages": update pipe percent/status # - "bytes": update transfer bar if kind == "step": progress.step(label) return if kind == "pages": t = int(total) if isinstance(total, int) else 0 d = int(done) if isinstance(done, int) else 0 if t > 0: pct = int( round( (max(0, min(d, t)) / max(1, t)) * 100.0 ) ) progress.set_percent(pct) progress.set_status( f"downloading pages {d}/{t}" ) else: progress.set_status( f"downloading pages {d}" ) return if kind == "bytes": try: lbl = str(label or "download") except Exception: lbl = "download" progress.begin_transfer(label=lbl, total=total) progress.update_transfer( label=lbl, completed=done, total=total ) try: if (isinstance(total, int) and total > 0 and int(done) >= int(total)): progress.finish_transfer(label=lbl) except Exception: pass return progress_cb = _progress if hasattr(provider, "download_url"): download_payload = provider.download_url( # type: ignore[attr-defined] str(url), final_output_dir, progress_cb, ) if download_payload is None: sr = None if hasattr(provider, "search_result_from_url"): sr = provider.search_result_from_url(str(url)) # type: ignore[attr-defined] if sr is None: sr = SearchResult( table="openlibrary", title=title_hint, path=str(url), media_kind="book", full_metadata={ "openlibrary_id": edition_id, }, ) downloaded_path = provider.download( sr, final_output_dir, progress_callback=progress_cb ) # type: ignore[call-arg] if downloaded_path: download_payload = { "path": Path(downloaded_path), "search_result": sr, } except Exception as exc: raise DownloadError(str(exc)) # Clear long-running status line after the download attempt. progress.clear_status() if download_payload and download_payload.get("path"): downloaded_path = Path(download_payload["path"]) sr_obj = download_payload.get("search_result") tags_hint: Optional[List[str]] = None full_md: Optional[Dict[str, Any]] = None resolved_title = title_hint if sr_obj is not None: try: resolved_title = getattr(sr_obj, "title", None) or resolved_title except Exception: pass try: sr_tags = getattr(sr_obj, "tag", None) if isinstance(sr_tags, set) and sr_tags: tags_hint = sorted([str(t) for t in sr_tags if t]) except Exception: tags_hint = None try: full_md = getattr(sr_obj, "full_metadata", None) except Exception: full_md = None self._emit_local_file( downloaded_path=downloaded_path, source=str(url), title_hint=resolved_title, tags_hint=tags_hint, media_kind_hint="book", full_metadata=full_md, provider_hint="openlibrary", progress=progress, config=config, ) downloaded_count += 1 continue # If OpenLibrary can't provide it (not lendable, no creds, etc), auto-search LibGen. try: fallback_query = str(title_hint or "").strip() if fallback_query: log( f"[download-file] Not available on OpenLibrary; searching LibGen for: {fallback_query}", file=sys.stderr, ) from cmdlet.search_provider import CMDLET as _SEARCH_PROVIDER_CMDLET exec_fn = getattr(_SEARCH_PROVIDER_CMDLET, "exec", None) if callable(exec_fn): ret = exec_fn( None, [ "-provider", "libgen", "-query", fallback_query ], config, ) try: table = pipeline_context.get_last_result_table() items = pipeline_context.get_last_result_items() if table is not None: pipeline_context.set_last_result_table_overlay( table, items ) except Exception: pass try: return downloaded_count, int(ret) # type: ignore[arg-type] except Exception: return downloaded_count, 1 except Exception: pass log( "[download-file] OpenLibrary URL could not be downloaded", file=sys.stderr, ) continue # Generic provider URL handler (if a provider implements `download_url`). provider = get_provider(provider_name, config) if provider is not None and hasattr(provider, "download_url"): try: downloaded_path = provider.download_url( str(url), final_output_dir ) # type: ignore[attr-defined] except Exception as exc: raise DownloadError(str(exc)) if downloaded_path: self._emit_local_file( downloaded_path=Path(downloaded_path), source=str(url), title_hint=Path(str(downloaded_path)).stem, tags_hint=None, media_kind_hint="file", full_metadata=None, provider_hint=str(provider_name), progress=progress, config=config, ) downloaded_count += 1 continue # Otherwise, try provider.download(SearchResult) with the URL as the target. if provider is not None: sr_obj = None try: sr_obj = SearchResult( table=str(provider_name), title=str(url), path=str(url), full_metadata={}, ) downloaded_path = provider.download( sr_obj, final_output_dir ) # type: ignore[call-arg] except Exception: downloaded_path = None # Refuse to fall back to direct-download for LibGen landing pages. # This prevents saving HTML (e.g. edition.php) as a bogus file. if (not downloaded_path ) and str(provider_name).lower() == "libgen": raise DownloadError( "LibGen URL did not resolve to a downloadable file" ) if downloaded_path: emit_tags: Optional[List[str]] = None full_md: Optional[Dict[str, Any]] = None title_hint = Path(str(downloaded_path)).stem media_kind_hint = "file" if str(provider_name ).lower() == "libgen" and sr_obj is not None: media_kind_hint = "book" try: sr_tags = getattr(sr_obj, "tag", None) if isinstance(sr_tags, set) and sr_tags: emit_tags = sorted( [str(t) for t in sr_tags if t] ) except Exception: emit_tags = None try: sr_full_md = getattr(sr_obj, "full_metadata", None) if isinstance(sr_full_md, dict): full_md = sr_full_md t = str(sr_full_md.get("title") or "").strip() if t: title_hint = t except Exception: full_md = None self._emit_local_file( downloaded_path=Path(downloaded_path), source=str(url), title_hint=title_hint, tags_hint=emit_tags, media_kind_hint=media_kind_hint, full_metadata=full_md, provider_hint=str(provider_name), progress=progress, config=config, ) downloaded_count += 1 continue result_obj = _download_direct_file( str(url), final_output_dir, quiet=quiet_mode, pipeline_progress=progress, ) downloaded_path = self._path_from_download_result(result_obj) self._emit_local_file( downloaded_path=downloaded_path, source=str(url), title_hint=downloaded_path.stem, tags_hint=[f"title:{downloaded_path.stem}"], media_kind_hint="file", full_metadata=None, progress=progress, config=config, ) downloaded_count += 1 debug("✓ Downloaded and emitted") except DownloadError as e: log(f"Download failed for {url}: {e}", file=sys.stderr) except Exception as e: log(f"Error processing {url}: {e}", file=sys.stderr) return downloaded_count, None def _expand_provider_items( self, *, piped_items: Sequence[Any], registry: Dict[str, Any], config: Dict[str, Any], ) -> List[Any]: get_search_provider = registry.get("get_search_provider") expanded_items: List[Any] = [] for item in piped_items: try: table = get_field(item, "table") media_kind = get_field(item, "media_kind") full_metadata = get_field(item, "full_metadata") target = get_field(item, "path") or get_field(item, "url") if (str(table or "").lower() == "alldebrid" and str(media_kind or "").lower() == "folder"): magnet_id = None if isinstance(full_metadata, dict): magnet_id = full_metadata.get("magnet_id") if (magnet_id is None and isinstance(target, str) and target.lower().startswith("alldebrid:magnet:")): try: magnet_id = int(target.split(":")[-1]) except Exception: magnet_id = None expanded, detail = ad_provider.expand_folder_item( item, get_search_provider, config, ) if detail: log( f"[download-file] AllDebrid magnet {magnet_id or 'unknown'} not ready ({detail or 'unknown'})", file=sys.stderr, ) continue if expanded: expanded_items.extend(expanded) continue expanded_items.append(item) except Exception: expanded_items.append(item) return expanded_items def _process_provider_items( self, *, piped_items: Sequence[Any], final_output_dir: Path, config: Dict[str, Any], quiet_mode: bool, registry: Dict[str, Any], progress: PipelineProgress, ) -> int: downloaded_count = 0 get_search_provider = registry.get("get_search_provider") SearchResult = registry.get("SearchResult") expanded_items = self._expand_provider_items( piped_items=piped_items, registry=registry, config=config ) for item in expanded_items: try: table = get_field(item, "table") title = get_field(item, "title") target = get_field(item, "path") or get_field(item, "url") media_kind = get_field(item, "media_kind") tags_val = get_field(item, "tag") tags_list: Optional[List[str]] if isinstance(tags_val, list): tags_list = [str(t) for t in tags_val if t] else: tags_list = None full_metadata = get_field(item, "full_metadata") if ((not full_metadata) and isinstance(item, dict) and isinstance(item.get("extra"), dict)): extra_md = item["extra"].get("full_metadata") if isinstance(extra_md, dict): full_metadata = extra_md # If this looks like a provider item and providers are available, prefer provider.download() downloaded_path: Optional[Path] = None attempted_provider_download = False provider_sr = None if table and get_search_provider and SearchResult: provider = get_search_provider(str(table), config) if provider is not None: attempted_provider_download = True sr = SearchResult( table=str(table), title=str(title or "Unknown"), path=str(target or ""), full_metadata=full_metadata if isinstance(full_metadata, dict) else {}, ) debug( f"[download-file] Downloading provider item via {table}: {sr.title}" ) # Preserve provider structure when possible (AllDebrid folders -> subfolders). output_dir = final_output_dir try: if str(table).strip().lower() == "alldebrid": output_dir = ad_provider.adjust_output_dir_for_alldebrid( final_output_dir, full_metadata if isinstance(full_metadata, dict) else None, item, ) except Exception: output_dir = final_output_dir downloaded_path = provider.download(sr, output_dir) provider_sr = sr # OpenLibrary: if provider download failed, do NOT try to download the OpenLibrary page HTML. if (downloaded_path is None and attempted_provider_download and str(table or "").lower() == "openlibrary"): availability = None reason = None if isinstance(full_metadata, dict): availability = full_metadata.get("availability") reason = full_metadata.get("availability_reason") msg = "[download-file] OpenLibrary item not downloadable" if availability or reason: msg += f" (availability={availability or ''} reason={reason or ''})" log(msg, file=sys.stderr) # Fallback: run a LibGen title search so the user can pick an alternative source. try: title_text = str(title or "").strip() if not title_text and isinstance(full_metadata, dict): title_text = str(full_metadata.get("title") or "").strip() if title_text: log( f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}", file=sys.stderr, ) from cmdlet.search_file import CMDLET as _SEARCH_FILE_CMDLET fallback_query = title_text exec_fn = getattr(_SEARCH_FILE_CMDLET, "exec", None) if not callable(exec_fn): log( "[download-file] search-file cmdlet unavailable; cannot run LibGen fallback search", file=sys.stderr, ) continue ret = exec_fn( None, ["-provider", "libgen", "-query", fallback_query], config, ) # Promote the search-file table to a display overlay so it renders. try: table_obj = pipeline_context.get_last_result_table() items_obj = pipeline_context.get_last_result_items() if table_obj is not None: pipeline_context.set_last_result_table_overlay( table_obj, items_obj ) except Exception: pass try: return int(ret) # type: ignore[arg-type] except Exception: return 1 except Exception: pass continue # Fallback: if we have a direct HTTP URL, download it directly if (downloaded_path is None and isinstance(target, str) and target.startswith("http")): # Guard: provider landing pages (e.g. LibGen ads.php) are HTML, not files. # Never download these as "files". if str(table or "").lower() == "libgen": low = target.lower() if ("/ads.php" in low) or ("/file.php" in low) or ("/index.php" in low): log( "[download-file] Refusing to download LibGen landing page (expected provider to resolve file link)", file=sys.stderr, ) continue debug( f"[download-file] Provider item looks like direct URL, downloading: {target}" ) suggested_name = str(title).strip() if title is not None else None result_obj = _download_direct_file( target, final_output_dir, quiet=quiet_mode, suggested_filename=suggested_name, pipeline_progress=progress, ) downloaded_path = self._path_from_download_result(result_obj) if downloaded_path is None: log( f"Cannot download item (no provider handler / unsupported target): {title or target}", file=sys.stderr, ) continue # Allow providers to add/enrich tags and metadata during download. if str(table or "").lower() == "libgen" and provider_sr is not None: try: sr_tags = getattr(provider_sr, "tag", None) if tags_list is None and isinstance(sr_tags, set) and sr_tags: tags_list = sorted([str(t) for t in sr_tags if t]) except Exception: pass try: sr_md = getattr(provider_sr, "full_metadata", None) if isinstance(sr_md, dict) and sr_md: full_metadata = sr_md except Exception: pass try: if isinstance(full_metadata, dict): t = str(full_metadata.get("title") or "").strip() if t: title = t except Exception: pass self._emit_local_file( downloaded_path=downloaded_path, source=str(target) if target else None, title_hint=str(title) if title else downloaded_path.stem, tags_hint=tags_list, media_kind_hint=str(media_kind) if media_kind else None, full_metadata=full_metadata if isinstance(full_metadata, dict) else None, progress=progress, config=config, ) downloaded_count += 1 except DownloadError as e: log(f"Download failed: {e}", file=sys.stderr) except Exception as e: log(f"Error downloading item: {e}", file=sys.stderr) return downloaded_count # === Streaming helpers (yt-dlp) === @staticmethod def _append_urls_from_piped_result(raw_urls: List[str], result: Any) -> List[str]: if raw_urls: return raw_urls if not result: return raw_urls results_to_check = result if isinstance(result, list) else [result] for item in results_to_check: try: url = get_field(item, "url") or get_field(item, "target") except Exception: url = None if url: raw_urls.append(url) return raw_urls @staticmethod def _filter_supported_urls(raw_urls: Sequence[str]) -> tuple[List[str], List[str]]: supported = [url for url in (raw_urls or []) if is_url_supported_by_ytdlp(url)] unsupported = list(set(raw_urls or []) - set(supported or [])) return supported, unsupported def _parse_query_keyed_spec(self, query_spec: Optional[str]) -> Dict[str, List[str]]: if not query_spec: return {} try: keyed = self._parse_keyed_csv_spec(str(query_spec), default_key="hash") if not keyed: return {} def _alias(src: str, dest: str) -> None: try: values = keyed.get(src) except Exception: values = None if not values: return try: keyed.setdefault(dest, []).extend(list(values)) except Exception: pass try: keyed.pop(src, None) except Exception: pass for src in ("range", "ranges", "section", "sections"): _alias(src, "clip") for src in ("fmt", "f"): _alias(src, "format") for src in ("aud", "a"): _alias(src, "audio") return keyed except Exception: return {} @staticmethod def _extract_hash_override(query_spec: Optional[str], query_keyed: Dict[str, List[str]]) -> Optional[str]: try: hash_values = query_keyed.get("hash", []) if isinstance(query_keyed, dict) else [] hash_candidate = hash_values[-1] if hash_values else None if hash_candidate: return sh.parse_single_hash_query(f"hash:{hash_candidate}") try: has_non_hash_keys = bool( query_keyed and isinstance(query_keyed, dict) and any(k for k in query_keyed.keys() if str(k).strip().lower() != "hash") ) except Exception: has_non_hash_keys = False if has_non_hash_keys: return None return sh.parse_single_hash_query(str(query_spec)) if query_spec else None except Exception: return None def _parse_clip_ranges_and_apply_items( self, *, clip_spec: Optional[str], query_keyed: Dict[str, List[str]], parsed: Dict[str, Any], query_spec: Optional[str], ) -> tuple[Optional[List[tuple[int, int]]], bool, List[str]]: clip_ranges: Optional[List[tuple[int, int]]] = None clip_values: List[str] = [] item_values: List[str] = [] def _uniq(values: Sequence[str]) -> List[str]: seen: set[str] = set() out: List[str] = [] for v in values: key = str(v) if key in seen: continue seen.add(key) out.append(v) return out if clip_spec: keyed = self._parse_keyed_csv_spec(str(clip_spec), default_key="clip") clip_values.extend(keyed.get("clip", []) or []) item_values.extend(keyed.get("item", []) or []) if query_keyed: clip_values.extend(query_keyed.get("clip", []) or []) item_values.extend(query_keyed.get("item", []) or []) clip_values = _uniq(clip_values) item_values = _uniq(item_values) if item_values and not parsed.get("item"): parsed["item"] = ",".join([v for v in item_values if v]) if clip_values: clip_ranges = self._parse_time_ranges(",".join([v for v in clip_values if v])) if not clip_ranges: bad_spec = clip_spec or query_spec log(f"Invalid clip format: {bad_spec}", file=sys.stderr) return None, True, clip_values return clip_ranges, False, clip_values @staticmethod def _init_storage(config: Dict[str, Any]) -> tuple[Optional[Any], bool]: storage = None hydrus_available = True try: from Store import Store from API.HydrusNetwork import is_hydrus_available storage = Store(config=config or {}, suppress_debug=True) hydrus_available = bool(is_hydrus_available(config or {})) except Exception: storage = None return storage, hydrus_available @staticmethod def _cookiefile_str(ytdlp_tool: YtDlpTool) -> Optional[str]: try: cookie_path = ytdlp_tool.resolve_cookiefile() if cookie_path is not None and cookie_path.is_file(): return str(cookie_path) except Exception: pass return None def _list_formats_cached( self, u: str, *, playlist_items_value: Optional[str], formats_cache: Dict[str, Optional[List[Dict[str, Any]]]], ytdlp_tool: YtDlpTool, ) -> Optional[List[Dict[str, Any]]]: key = f"{u}||{playlist_items_value or ''}" if key in formats_cache: return formats_cache[key] fmts = list_formats( u, no_playlist=False, playlist_items=playlist_items_value, cookiefile=self._cookiefile_str(ytdlp_tool), ) formats_cache[key] = fmts return fmts def _is_browseable_format(self, fmt: Any) -> bool: if not isinstance(fmt, dict): return False format_id = str(fmt.get("format_id") or "").strip() if not format_id: return False ext = str(fmt.get("ext") or "").strip().lower() if ext in {"mhtml", "json"}: return False note = str(fmt.get("format_note") or "").lower() if "storyboard" in note: return False if format_id.lower().startswith("sb"): return False vcodec = str(fmt.get("vcodec", "none")) acodec = str(fmt.get("acodec", "none")) return not (vcodec == "none" and acodec == "none") def _format_id_for_query_index( self, query_format: str, url: str, formats_cache: Dict[str, Optional[List[Dict[str, Any]]]], ytdlp_tool: YtDlpTool, ) -> Optional[str]: import re if not query_format or not re.match(r"^\s*#?\d+\s*$", str(query_format)): return None try: idx = int(str(query_format).lstrip("#").strip()) except Exception: raise ValueError(f"Invalid format index: {query_format}") fmts = self._list_formats_cached( url, playlist_items_value=None, formats_cache=formats_cache, ytdlp_tool=ytdlp_tool, ) if not fmts: raise ValueError("Unable to list formats for the URL; cannot resolve numeric format index") candidate_formats = [f for f in fmts if self._is_browseable_format(f)] filtered_formats = candidate_formats if candidate_formats else list(fmts) if not filtered_formats: raise ValueError("No formats available for selection") if idx <= 0 or idx > len(filtered_formats): raise ValueError(f"Format index {idx} out of range (1..{len(filtered_formats)})") chosen = filtered_formats[idx - 1] selection_format_id = str(chosen.get("format_id") or "").strip() if not selection_format_id: raise ValueError("Selected format has no format_id") try: vcodec = str(chosen.get("vcodec", "none")) acodec = str(chosen.get("acodec", "none")) if vcodec != "none" and acodec == "none": selection_format_id = f"{selection_format_id}+ba" except Exception: pass return selection_format_id @staticmethod def _format_selector_for_query_height(query_format: str) -> Optional[str]: import re if query_format is None: return None s = str(query_format).strip().lower() m = re.match(r"^(\d{2,5})p$", s) if not m: return None try: height = int(m.group(1)) except Exception: return None if height <= 0: raise ValueError(f"Invalid height selection: {query_format}") return f"bv*[height<={height}]+ba" @staticmethod def _canonicalize_url_for_storage(*, requested_url: str, ytdlp_tool: YtDlpTool, playlist_items: Optional[str]) -> str: if playlist_items: return str(requested_url) try: cf = None try: cookie_path = ytdlp_tool.resolve_cookiefile() if cookie_path is not None and cookie_path.is_file(): cf = str(cookie_path) except Exception: cf = None pr = probe_url(requested_url, no_playlist=False, timeout_seconds=15, cookiefile=cf) if isinstance(pr, dict): for key in ("webpage_url", "original_url", "url", "requested_url"): value = pr.get(key) if isinstance(value, str) and value.strip(): return value.strip() except Exception: pass return str(requested_url) def _preflight_url_duplicate( self, *, storage: Any, hydrus_available: bool, final_output_dir: Path, candidate_url: str, extra_urls: Optional[Sequence[str]] = None, ) -> bool: if storage is None: debug("Preflight URL check skipped: storage unavailable") return True debug(f"Preflight URL check: candidate={candidate_url}") try: from SYS.metadata import normalize_urls except Exception: normalize_urls = None # type: ignore[assignment] needles: List[str] = [] if normalize_urls is not None: for raw in [candidate_url, *(list(extra_urls) if extra_urls else [])]: try: needles.extend(normalize_urls(raw)) except Exception: continue if not needles: needles = [str(candidate_url)] seen_needles: List[str] = [] for needle in needles: if needle and needle not in seen_needles: seen_needles.append(needle) needles = seen_needles try: debug(f"Preflight URL needles: {needles}") except Exception: pass url_matches: List[Dict[str, Any]] = [] try: from Store.HydrusNetwork import HydrusNetwork backend_names_all = storage.list_searchable_backends() backend_names: List[str] = [] skipped: List[str] = [] for backend_name in backend_names_all: try: backend = storage[backend_name] except Exception: continue try: if str(backend_name).strip().lower() == "temp": skipped.append(backend_name) continue except Exception: pass try: backend_location = getattr(backend, "_location", None) if backend_location and final_output_dir: backend_path = Path(str(backend_location)).expanduser().resolve() temp_path = Path(str(final_output_dir)).expanduser().resolve() if backend_path == temp_path: skipped.append(backend_name) continue except Exception: pass backend_names.append(backend_name) try: if skipped: debug(f"Preflight backends: {backend_names} (skipped temp: {skipped})") else: debug(f"Preflight backends: {backend_names}") except Exception: pass for backend_name in backend_names: backend = storage[backend_name] if isinstance(backend, HydrusNetwork) and not hydrus_available: continue backend_hits: List[Dict[str, Any]] = [] for needle in needles: try: backend_hits = backend.search(f"url:{needle}", limit=25) or [] if backend_hits: break except Exception: continue if backend_hits: url_matches.extend( [ dict(x) if isinstance(x, dict) else {"title": str(x)} for x in backend_hits ] ) if len(url_matches) >= 25: url_matches = url_matches[:25] break except Exception: url_matches = [] if not url_matches: debug("Preflight URL check: no matches") return True try: current_cmd_text = pipeline_context.get_current_command_text("") except Exception: current_cmd_text = "" try: stage_ctx = pipeline_context.get_stage_context() except Exception: stage_ctx = None in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or ""))) if in_pipeline: try: cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="") cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None) except Exception: cached_cmd = "" cached_decision = None if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""): if bool(cached_decision): return True try: pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) except Exception: pass return False table = ResultTable(f"URL already exists ({len(url_matches)} match(es))") results_list: List[Dict[str, Any]] = [] for item in url_matches: if "title" not in item: item["title"] = item.get("name") or item.get("target") or item.get("path") or "Result" try: from SYS.result_table import build_display_row except Exception: build_display_row = None # type: ignore if callable(build_display_row): display_row = build_display_row(item, keys=["title", "store", "hash", "ext", "size"]) else: display_row = { "title": item.get("title"), "store": item.get("store"), "hash": item.get("hash") or item.get("file_hash") or item.get("sha256"), "ext": str(item.get("ext") or ""), "size": item.get("size") or item.get("size_bytes"), } table.add_result(display_row) results_list.append(item) pipeline_context.set_current_stage_table(table) pipeline_context.set_last_result_table(table, results_list) suspend = getattr(pipeline_context, "suspend_live_progress", None) used_suspend = False cm: AbstractContextManager[Any] = nullcontext() if callable(suspend): try: maybe_cm = suspend() if maybe_cm is not None: cm = maybe_cm # type: ignore[assignment] used_suspend = True except Exception: cm = nullcontext() used_suspend = False with cm: get_stderr_console().print(table) setattr(table, "_rendered_by_cmdlet", True) answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console())) if in_pipeline: try: existing = pipeline_context.load_value("preflight", default=None) except Exception: existing = None preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {} url_dup_cache = preflight_cache.get("url_duplicates") if not isinstance(url_dup_cache, dict): url_dup_cache = {} url_dup_cache["command"] = str(current_cmd_text or "") url_dup_cache["continue"] = bool(answered_yes) preflight_cache["url_duplicates"] = url_dup_cache try: pipeline_context.store_value("preflight", preflight_cache) except Exception: pass if not answered_yes: if in_pipeline and used_suspend: try: pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) except Exception: pass return False return True def _preflight_url_duplicates_bulk( self, *, storage: Any, hydrus_available: bool, final_output_dir: Path, urls: Sequence[str], ) -> bool: if storage is None: debug("Bulk URL preflight skipped: storage unavailable") return True try: current_cmd_text = pipeline_context.get_current_command_text("") except Exception: current_cmd_text = "" try: stage_ctx = pipeline_context.get_stage_context() except Exception: stage_ctx = None in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or ""))) if in_pipeline: try: cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="") cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None) except Exception: cached_cmd = "" cached_decision = None if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""): if bool(cached_decision): return True try: pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) except Exception: pass return False unique_urls: List[str] = [] for u in urls or []: s = str(u or "").strip() if s and s not in unique_urls: unique_urls.append(s) if len(unique_urls) <= 1: return True try: from SYS.metadata import normalize_urls except Exception: normalize_urls = None # type: ignore[assignment] def _httpish(value: str) -> bool: try: return bool(value) and (value.startswith("http://") or value.startswith("https://")) except Exception: return False url_needles: Dict[str, List[str]] = {} for u in unique_urls: needles: List[str] = [] if normalize_urls is not None: try: needles.extend([n for n in (normalize_urls(u) or []) if isinstance(n, str)]) except Exception: needles = [] if not needles: needles = [u] filtered: List[str] = [] for n in needles: n2 = str(n or "").strip() if not n2: continue if not _httpish(n2): continue if n2 not in filtered: filtered.append(n2) url_needles[u] = filtered if filtered else [u] backend_names: List[str] = [] try: backend_names_all = storage.list_searchable_backends() except Exception: backend_names_all = [] for backend_name in backend_names_all: try: backend = storage[backend_name] except Exception: continue try: if str(backend_name).strip().lower() == "temp": continue except Exception: pass try: backend_location = getattr(backend, "_location", None) if backend_location and final_output_dir: backend_path = Path(str(backend_location)).expanduser().resolve() temp_path = Path(str(final_output_dir)).expanduser().resolve() if backend_path == temp_path: continue except Exception: pass backend_names.append(backend_name) if not backend_names: debug("Bulk URL preflight skipped: no searchable backends") return True seen_pairs: set[tuple[str, str]] = set() matched_urls: set[str] = set() match_rows: List[Dict[str, Any]] = [] max_rows = 200 try: from Store.HydrusNetwork import HydrusNetwork except Exception: HydrusNetwork = None # type: ignore for backend_name in backend_names: if len(match_rows) >= max_rows: break try: backend = storage[backend_name] except Exception: continue if HydrusNetwork is not None and isinstance(backend, HydrusNetwork): if not hydrus_available: continue client = getattr(backend, "_client", None) if client is None: continue for original_url, needles in url_needles.items(): if len(match_rows) >= max_rows: break if (original_url, str(backend_name)) in seen_pairs: continue found_hash: Optional[str] = None found = False for needle in (needles or [])[:3]: if not _httpish(needle): continue try: from API.HydrusNetwork import HydrusRequestSpec spec = HydrusRequestSpec( method="GET", endpoint="/add_urls/get_url_files", query={"url": needle}, ) response = client._perform_request(spec) # type: ignore[attr-defined] raw_hashes = None if isinstance(response, dict): raw_hashes = response.get("hashes") or response.get("file_hashes") raw_ids = response.get("file_ids") has_ids = isinstance(raw_ids, list) and len(raw_ids) > 0 has_hashes = isinstance(raw_hashes, list) and len(raw_hashes) > 0 if has_hashes: try: found_hash = str(raw_hashes[0]).strip() # type: ignore[index] except Exception: found_hash = None if has_ids or has_hashes: found = True break except Exception: continue if not found: continue seen_pairs.add((original_url, str(backend_name))) matched_urls.add(original_url) display_row = { "title": "(exists)", "store": str(backend_name), "hash": found_hash or "", "url": original_url, "columns": [ ("Title", "(exists)"), ("Store", str(backend_name)), ("Hash", found_hash or ""), ("URL", original_url), ], } match_rows.append(display_row) continue for original_url, needles in url_needles.items(): if len(match_rows) >= max_rows: break if (original_url, str(backend_name)) in seen_pairs: continue backend_hits: List[Dict[str, Any]] = [] for needle in (needles or [])[:3]: try: backend_hits = backend.search(f"url:{needle}", limit=1) or [] if backend_hits: break except Exception: continue if not backend_hits: continue seen_pairs.add((original_url, str(backend_name))) matched_urls.add(original_url) hit = backend_hits[0] title = hit.get("title") or hit.get("name") or hit.get("target") or hit.get("path") or "(exists)" file_hash = hit.get("hash") or hit.get("file_hash") or hit.get("sha256") or "" try: from SYS.result_table import build_display_row except Exception: build_display_row = None # type: ignore extracted = { "title": str(title), "store": str(hit.get("store") or backend_name), "hash": str(file_hash or ""), "ext": "", "size": None, } if callable(build_display_row): try: extracted = build_display_row(hit, keys=["title", "store", "hash", "ext", "size"]) except Exception: pass extracted["title"] = str(title) extracted["store"] = str(hit.get("store") or backend_name) extracted["hash"] = str(file_hash or "") ext = extracted.get("ext") size_val = extracted.get("size") display_row = { "title": str(title), "store": str(hit.get("store") or backend_name), "hash": str(file_hash or ""), "ext": str(ext or ""), "size": size_val, "url": original_url, "columns": [ ("Title", str(title)), ("Store", str(hit.get("store") or backend_name)), ("Hash", str(file_hash or "")), ("Ext", str(ext or "")), ("Size", size_val), ("URL", original_url), ], } match_rows.append(display_row) if not match_rows: debug("Bulk URL preflight: no matches") return True table = ResultTable(f"URL already exists ({len(matched_urls)} url(s))", max_columns=10) table.set_no_choice(True) try: table.set_preserve_order(True) except Exception: pass for row in match_rows: table.add_result(row) try: pipeline_context.set_last_result_table_overlay(table, match_rows) except Exception: pass suspend = getattr(pipeline_context, "suspend_live_progress", None) cm: AbstractContextManager[Any] = nullcontext() if callable(suspend): try: maybe_cm = suspend() if maybe_cm is not None: cm = maybe_cm # type: ignore[assignment] except Exception: cm = nullcontext() with cm: get_stderr_console().print(table) setattr(table, "_rendered_by_cmdlet", True) answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console())) if in_pipeline: try: existing = pipeline_context.load_value("preflight", default=None) except Exception: existing = None preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {} url_dup_cache = preflight_cache.get("url_duplicates") if not isinstance(url_dup_cache, dict): url_dup_cache = {} url_dup_cache["command"] = str(current_cmd_text or "") url_dup_cache["continue"] = bool(answered_yes) preflight_cache["url_duplicates"] = url_dup_cache try: pipeline_context.store_value("preflight", preflight_cache) except Exception: pass if not answered_yes: if in_pipeline: try: pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) except Exception: pass return False return True def _maybe_show_playlist_table(self, *, url: str, ytdlp_tool: YtDlpTool) -> bool: try: cf = self._cookiefile_str(ytdlp_tool) pr = probe_url(url, no_playlist=False, timeout_seconds=15, cookiefile=cf) except Exception: pr = None if not isinstance(pr, dict): return False entries = pr.get("entries") if not isinstance(entries, list) or len(entries) <= 1: return False extractor_name = "" try: extractor_name = str(pr.get("extractor") or pr.get("extractor_key") or "").strip().lower() except Exception: extractor_name = "" table_type: Optional[str] = None if "bandcamp" in extractor_name: table_type = "bandcamp" elif "youtube" in extractor_name: table_type = "youtube" max_rows = 200 display_entries = entries[:max_rows] def _entry_to_url(entry: Any) -> Optional[str]: if not isinstance(entry, dict): return None for key in ("webpage_url", "original_url", "url"): v = entry.get(key) if isinstance(v, str) and v.strip(): s_val = v.strip() try: if urlparse(s_val).scheme in {"http", "https"}: return s_val except Exception: return s_val entry_id = entry.get("id") if isinstance(entry_id, str) and entry_id.strip(): extractor_name_inner = str(pr.get("extractor") or pr.get("extractor_key") or "").lower() if "youtube" in extractor_name_inner: return f"https://www.youtube.com/watch?v={entry_id.strip()}" return None table = ResultTable() safe_url = str(url or "").strip() table.title = f'download-file -url "{safe_url}"' if safe_url else "download-file" if table_type: try: table.set_table(table_type) except Exception: table.table = table_type table.set_source_command("download-file", []) try: table.set_preserve_order(True) except Exception: pass results_list: List[Dict[str, Any]] = [] for idx, entry in enumerate(display_entries, 1): title = None uploader = None duration = None entry_url = _entry_to_url(entry) try: if isinstance(entry, dict): title = entry.get("title") uploader = entry.get("uploader") or pr.get("uploader") duration = entry.get("duration") except Exception: pass row: Dict[str, Any] = { "table": "download-file", "title": str(title or f"Item {idx}"), "detail": str(uploader or ""), "media_kind": "playlist-item", "playlist_index": idx, "_selection_args": (["-url", str(entry_url)] if entry_url else ["-url", str(url), "-item", str(idx)]), "url": entry_url, "target": entry_url, "columns": [ ("#", str(idx)), ("Title", str(title or "")), ("Duration", str(duration or "")), ("Uploader", str(uploader or "")), ], } results_list.append(row) table.add_result(row) pipeline_context.set_current_stage_table(table) pipeline_context.set_last_result_table(table, results_list) try: suspend = getattr(pipeline_context, "suspend_live_progress", None) cm: AbstractContextManager[Any] = nullcontext() if callable(suspend): maybe_cm = suspend() if maybe_cm is not None: cm = maybe_cm # type: ignore[assignment] with cm: get_stderr_console().print(table) except Exception: pass setattr(table, "_rendered_by_cmdlet", True) return True def _maybe_show_format_table_for_single_url( self, *, mode: str, clip_spec: Any, clip_values: Sequence[str], playlist_items: Optional[str], ytdl_format: Any, supported_url: Sequence[str], playlist_selection_handled: bool, ytdlp_tool: YtDlpTool, formats_cache: Dict[str, Optional[List[Dict[str, Any]]]], storage: Any, hydrus_available: bool, final_output_dir: Path, args: Sequence[str], ) -> Optional[int]: if ( mode != "audio" and not clip_spec and not clip_values and not playlist_items and not ytdl_format and len(supported_url) == 1 and not playlist_selection_handled ): url = supported_url[0] canonical_url = self._canonicalize_url_for_storage( requested_url=url, ytdlp_tool=ytdlp_tool, playlist_items=playlist_items, ) if not self._preflight_url_duplicate( storage=storage, hydrus_available=hydrus_available, final_output_dir=final_output_dir, candidate_url=canonical_url, extra_urls=[url], ): log(f"Skipping download: {url}", file=sys.stderr) return 0 formats = self._list_formats_cached( url, playlist_items_value=None, formats_cache=formats_cache, ytdlp_tool=ytdlp_tool, ) if formats and len(formats) > 1: candidate_formats = [f for f in formats if self._is_browseable_format(f)] filtered_formats = candidate_formats if candidate_formats else list(formats) debug(f"Formatlist: showing {len(filtered_formats)} formats (raw={len(formats)})") base_cmd = f'download-file "{url}"' remaining_args = [arg for arg in args if arg not in [url] and not arg.startswith("-")] if remaining_args: base_cmd += " " + " ".join(remaining_args) table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True) table.set_table("ytdlp.formatlist") table.set_source_command("download-file", [url]) results_list: List[Dict[str, Any]] = [] for idx, fmt in enumerate(filtered_formats, 1): resolution = fmt.get("resolution", "") ext = fmt.get("ext", "") vcodec = fmt.get("vcodec", "none") acodec = fmt.get("acodec", "none") filesize = fmt.get("filesize") filesize_approx = fmt.get("filesize_approx") format_id = fmt.get("format_id", "") selection_format_id = format_id try: if vcodec != "none" and acodec == "none" and format_id: selection_format_id = f"{format_id}+ba" except Exception: selection_format_id = format_id size_str = "" size_prefix = "" size_bytes = filesize if not size_bytes: size_bytes = filesize_approx if size_bytes: size_prefix = "~" try: if isinstance(size_bytes, (int, float)) and size_bytes > 0: size_mb = float(size_bytes) / (1024 * 1024) size_str = f"{size_prefix}{size_mb:.1f}MB" except Exception: size_str = "" desc_parts: List[str] = [] if resolution and resolution != "audio only": desc_parts.append(resolution) if ext: desc_parts.append(str(ext).upper()) if vcodec != "none": desc_parts.append(f"v:{vcodec}") if acodec != "none": desc_parts.append(f"a:{acodec}") if size_str: desc_parts.append(size_str) format_desc = " | ".join(desc_parts) format_dict = { "table": "download-file", "title": f"Format {format_id}", "url": url, "target": url, "detail": format_desc, "annotations": [ext, resolution] if resolution else [ext], "media_kind": "format", "cmd": base_cmd, "columns": [ ("ID", format_id), ("Resolution", resolution or "N/A"), ("Ext", ext), ("Size", size_str or ""), ("Video", vcodec), ("Audio", acodec), ], "full_metadata": { "format_id": format_id, "url": url, "item_selector": selection_format_id, }, "_selection_args": None, } selection_args: List[str] = ["-format", selection_format_id] try: if (not clip_spec) and clip_values: selection_args.extend(["-query", f"clip:{','.join([v for v in clip_values if v])}"]) except Exception: pass format_dict["_selection_args"] = selection_args results_list.append(format_dict) table.add_result(format_dict) try: suspend = getattr(pipeline_context, "suspend_live_progress", None) cm: AbstractContextManager[Any] = nullcontext() if callable(suspend): maybe_cm = suspend() if maybe_cm is not None: cm = maybe_cm # type: ignore[assignment] with cm: get_stderr_console().print(table) except Exception: pass setattr(table, "_rendered_by_cmdlet", True) pipeline_context.set_current_stage_table(table) pipeline_context.set_last_result_table(table, results_list) log(f"", file=sys.stderr) return 0 return None def _download_supported_urls( self, *, supported_url: Sequence[str], ytdlp_tool: YtDlpTool, args: Sequence[str], config: Dict[str, Any], final_output_dir: Path, mode: str, clip_spec: Any, clip_ranges: Optional[List[tuple[int, int]]], query_hash_override: Optional[str], embed_chapters: bool, write_sub: bool, quiet_mode: bool, playlist_items: Optional[str], ytdl_format: Any, skip_per_url_preflight: bool, forced_single_format_id: Optional[str], forced_single_format_for_batch: bool, formats_cache: Dict[str, Optional[List[Dict[str, Any]]]], storage: Any, hydrus_available: bool, ) -> int: downloaded_count = 0 downloaded_pipe_objects: List[Dict[str, Any]] = [] pipe_seq = 0 clip_sections_spec = self._build_clip_sections_spec(clip_ranges) if clip_sections_spec: try: debug(f"Clip sections spec: {clip_sections_spec}") except Exception: pass for url in supported_url: try: debug(f"Processing: {url}") canonical_url = self._canonicalize_url_for_storage( requested_url=url, ytdlp_tool=ytdlp_tool, playlist_items=playlist_items, ) if not skip_per_url_preflight: if not self._preflight_url_duplicate( storage=storage, hydrus_available=hydrus_available, final_output_dir=final_output_dir, candidate_url=canonical_url, extra_urls=[url], ): log(f"Skipping download: {url}", file=sys.stderr) continue PipelineProgress(pipeline_context).begin_steps(2) actual_format = ytdl_format actual_playlist_items = playlist_items if playlist_items and not ytdl_format: import re if re.search(r"[^0-9,-]", playlist_items): actual_format = playlist_items actual_playlist_items = None if mode == "audio" and not actual_format: actual_format = "bestaudio" if mode == "video" and not actual_format: configured = (ytdlp_tool.default_format("video") or "").strip() if configured and configured != "bestvideo+bestaudio/best": actual_format = configured forced_single_applied = False if ( forced_single_format_for_batch and forced_single_format_id and not ytdl_format and not actual_playlist_items ): actual_format = forced_single_format_id forced_single_applied = True if ( actual_format and isinstance(actual_format, str) and mode != "audio" and "+" not in actual_format and "/" not in actual_format and "[" not in actual_format and actual_format not in {"best", "bv", "ba", "b"} and not forced_single_applied ): try: formats = self._list_formats_cached( url, playlist_items_value=actual_playlist_items, formats_cache=formats_cache, ytdlp_tool=ytdlp_tool, ) if formats: fmt_match = next((f for f in formats if str(f.get("format_id", "")) == actual_format), None) if fmt_match: vcodec = str(fmt_match.get("vcodec", "none")) acodec = str(fmt_match.get("acodec", "none")) if vcodec != "none" and acodec == "none": debug(f"Selected video-only format {actual_format}; using {actual_format}+ba for audio") actual_format = f"{actual_format}+ba" except Exception: pass attempted_single_format_fallback = False while True: try: opts = DownloadOptions( url=url, mode=mode, output_dir=final_output_dir, ytdl_format=actual_format, cookies_path=ytdlp_tool.resolve_cookiefile(), clip_sections=clip_sections_spec, playlist_items=actual_playlist_items, quiet=quiet_mode, no_playlist=False, embed_chapters=embed_chapters, write_sub=write_sub, ) PipelineProgress(pipeline_context).step("downloading") debug(f"Starting download with 5-minute timeout...") result_obj = _download_with_timeout(opts, timeout_seconds=300) debug(f"Download completed, building pipe object...") break except DownloadError as e: cause = getattr(e, "__cause__", None) detail = "" try: detail = str(cause or "") except Exception: detail = "" if ("requested format is not available" in (detail or "").lower()) and mode != "audio": if ( forced_single_format_for_batch and forced_single_format_id and not ytdl_format and not actual_playlist_items and not attempted_single_format_fallback ): attempted_single_format_fallback = True actual_format = forced_single_format_id debug(f"Only one format available (playlist preflight); retrying with: {actual_format}") continue formats = self._list_formats_cached( url, playlist_items_value=actual_playlist_items, formats_cache=formats_cache, ytdlp_tool=ytdlp_tool, ) if ( (not attempted_single_format_fallback) and isinstance(formats, list) and len(formats) == 1 and isinstance(formats[0], dict) ): only = formats[0] fallback_format = str(only.get("format_id") or "").strip() selection_format_id = fallback_format try: vcodec = str(only.get("vcodec", "none")) acodec = str(only.get("acodec", "none")) if vcodec != "none" and acodec == "none" and fallback_format: selection_format_id = f"{fallback_format}+ba" except Exception: selection_format_id = fallback_format if selection_format_id: attempted_single_format_fallback = True actual_format = selection_format_id debug(f"Only one format available; retrying with: {actual_format}") continue if formats: formats_to_show = formats table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True) table.set_table("ytdlp.formatlist") table.set_source_command("download-file", [url]) results_list: List[Dict[str, Any]] = [] for idx, fmt in enumerate(formats_to_show, 1): resolution = fmt.get("resolution", "") ext = fmt.get("ext", "") vcodec = fmt.get("vcodec", "none") acodec = fmt.get("acodec", "none") filesize = fmt.get("filesize") filesize_approx = fmt.get("filesize_approx") format_id = fmt.get("format_id", "") selection_format_id = format_id try: if vcodec != "none" and acodec == "none" and format_id: selection_format_id = f"{format_id}+ba" except Exception: selection_format_id = format_id size_str = "" size_prefix = "" size_bytes = filesize if not size_bytes: size_bytes = filesize_approx if size_bytes: size_prefix = "~" try: if isinstance(size_bytes, (int, float)) and size_bytes > 0: size_mb = float(size_bytes) / (1024 * 1024) size_str = f"{size_prefix}{size_mb:.1f}MB" except Exception: size_str = "" desc_parts: List[str] = [] if resolution and resolution != "audio only": desc_parts.append(str(resolution)) if ext: desc_parts.append(str(ext).upper()) if vcodec != "none": desc_parts.append(f"v:{vcodec}") if acodec != "none": desc_parts.append(f"a:{acodec}") if size_str: desc_parts.append(size_str) format_desc = " | ".join(desc_parts) format_dict: Dict[str, Any] = { "table": "download-file", "title": f"Format {format_id}", "url": url, "target": url, "detail": format_desc, "media_kind": "format", "columns": [ ("ID", format_id), ("Resolution", resolution or "N/A"), ("Ext", ext), ("Size", size_str or ""), ("Video", vcodec), ("Audio", acodec), ], "full_metadata": { "format_id": format_id, "url": url, "item_selector": selection_format_id, }, "_selection_args": ["-format", selection_format_id], } results_list.append(format_dict) table.add_result(format_dict) pipeline_context.set_current_stage_table(table) pipeline_context.set_last_result_table(table, results_list) try: suspend = getattr(pipeline_context, "suspend_live_progress", None) cm: AbstractContextManager[Any] = nullcontext() if callable(suspend): maybe_cm = suspend() if maybe_cm is not None: cm = maybe_cm # type: ignore[assignment] with cm: get_stderr_console().print(table) except Exception: pass PipelineProgress(pipeline_context).step("awaiting selection") log("Requested format is not available; select a working format with @N", file=sys.stderr) return 0 raise results_to_emit: List[Any] = [] if isinstance(result_obj, list): results_to_emit = list(result_obj) else: paths = getattr(result_obj, "paths", None) if isinstance(paths, list) and paths: for p in paths: try: p_path = Path(p) except Exception: continue try: if p_path.suffix.lower() in _best_subtitle_sidecar.__defaults__[0]: continue except Exception: pass if not p_path.exists() or p_path.is_dir(): continue try: hv = sha256_file(p_path) except Exception: hv = None results_to_emit.append( DownloadMediaResult( path=p_path, info=getattr(result_obj, "info", {}) or {}, tag=list(getattr(result_obj, "tag", []) or []), source_url=getattr(result_obj, "source_url", None) or opts.url, hash_value=hv, ) ) else: results_to_emit = [result_obj] pipe_objects: List[Dict[str, Any]] = [] for downloaded in results_to_emit: po = self._build_pipe_object(downloaded, url, opts) pipe_seq += 1 try: po.setdefault("pipe_index", pipe_seq) except Exception: pass try: info = downloaded.info if isinstance(getattr(downloaded, "info", None), dict) else {} except Exception: info = {} chapters_text = _format_chapters_note(info) if embed_chapters else None if chapters_text: notes = po.get("notes") if not isinstance(notes, dict): notes = {} notes.setdefault("chapters", chapters_text) po["notes"] = notes if write_sub: try: media_path = Path(str(po.get("path") or "")) except Exception: media_path = None if media_path is not None and media_path.exists() and media_path.is_file(): sub_path = _best_subtitle_sidecar(media_path) if sub_path is not None: sub_text = _read_text_file(sub_path) if sub_text: notes = po.get("notes") if not isinstance(notes, dict): notes = {} notes["sub"] = sub_text po["notes"] = notes try: sub_path.unlink() except Exception: pass pipe_objects.append(po) try: if clip_ranges and len(pipe_objects) == len(clip_ranges): source_hash = query_hash_override or self._find_existing_hash_for_url( storage, canonical_url, hydrus_available=hydrus_available, ) self._apply_clip_decorations(pipe_objects, clip_ranges, source_king_hash=source_hash) except Exception: pass debug(f"Emitting {len(pipe_objects)} result(s) to pipeline...") PipelineProgress(pipeline_context).step("finalized") stage_ctx = pipeline_context.get_stage_context() emit_enabled = bool(stage_ctx is not None) for pipe_obj_dict in pipe_objects: if emit_enabled: pipeline_context.emit(pipe_obj_dict) if pipe_obj_dict.get("url"): pipe_obj = coerce_to_pipe_object(pipe_obj_dict) register_url_with_local_library(pipe_obj, config) try: downloaded_pipe_objects.append(pipe_obj_dict) except Exception: pass downloaded_count += len(pipe_objects) debug("✓ Downloaded and emitted") except DownloadError as e: log(f"Download failed for {url}: {e}", file=sys.stderr) except Exception as e: log(f"Error processing {url}: {e}", file=sys.stderr) if downloaded_count > 0: debug(f"✓ Successfully processed {downloaded_count} URL(s)") return 0 log("No downloads completed", file=sys.stderr) return 1 def _run_streaming_urls( self, *, streaming_urls: List[str], args: Sequence[str], config: Dict[str, Any], parsed: Dict[str, Any], ) -> int: try: debug("Starting streaming download handler") ytdlp_tool = YtDlpTool(config) raw_url = list(streaming_urls) supported_url, unsupported_list = self._filter_supported_urls(raw_url) if not supported_url: log("No yt-dlp-supported url to download", file=sys.stderr) return 1 if unsupported_list: debug(f"Skipping {len(unsupported_list)} unsupported url (use direct HTTP mode)") final_output_dir = self._resolve_streaming_output_dir(parsed, config) if not final_output_dir: return 1 debug(f"Output directory: {final_output_dir}") clip_spec = parsed.get("clip") query_spec = parsed.get("query") query_keyed = self._parse_query_keyed_spec(str(query_spec) if query_spec is not None else None) query_hash_override = self._extract_hash_override(str(query_spec) if query_spec is not None else None, query_keyed) embed_chapters = True write_sub = True query_format: Optional[str] = None try: fmt_values = query_keyed.get("format", []) if isinstance(query_keyed, dict) else [] fmt_candidate = fmt_values[-1] if fmt_values else None if fmt_candidate is not None: query_format = str(fmt_candidate).strip() except Exception: query_format = None query_audio: Optional[bool] = None try: audio_values = query_keyed.get("audio", []) if isinstance(query_keyed, dict) else [] audio_candidate = audio_values[-1] if audio_values else None if audio_candidate is not None: s_val = str(audio_candidate).strip().lower() if s_val in {"1", "true", "t", "yes", "y", "on"}: query_audio = True elif s_val in {"0", "false", "f", "no", "n", "off"}: query_audio = False elif s_val: query_audio = True except Exception: query_audio = None query_wants_audio = False if query_format: try: query_wants_audio = str(query_format).strip().lower() == "audio" except Exception: query_wants_audio = False audio_flag = bool(parsed.get("audio") is True) wants_audio = audio_flag if query_audio is not None: wants_audio = wants_audio or bool(query_audio) else: wants_audio = wants_audio or bool(query_wants_audio) mode = "audio" if wants_audio else "video" clip_ranges, clip_invalid, clip_values = self._parse_clip_ranges_and_apply_items( clip_spec=str(clip_spec) if clip_spec is not None else None, query_keyed=query_keyed, parsed=parsed, query_spec=str(query_spec) if query_spec is not None else None, ) if clip_invalid: return 1 if clip_ranges: try: debug(f"Clip ranges: {clip_ranges}") except Exception: pass quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False storage, hydrus_available = self._init_storage(config if isinstance(config, dict) else {}) formats_cache: Dict[str, Optional[List[Dict[str, Any]]]] = {} playlist_items = str(parsed.get("item")) if parsed.get("item") else None ytdl_format = parsed.get("format") if not ytdl_format and query_format and not query_wants_audio: try: height_selector = self._format_selector_for_query_height(query_format) except ValueError as e: log(f"Error parsing format selection: {e}", file=sys.stderr) return 1 if height_selector: ytdl_format = height_selector else: import re if not re.match(r"^\s*#?\d+\s*$", str(query_format)): ytdl_format = query_format playlist_selection_handled = False if len(supported_url) == 1 and not playlist_items and not ytdl_format: candidate_url = supported_url[0] if query_format and not query_wants_audio: try: idx_fmt = self._format_id_for_query_index(query_format, candidate_url, formats_cache, ytdlp_tool) except ValueError as e: log(f"Error parsing format selection: {e}", file=sys.stderr) return 1 if idx_fmt: debug(f"Resolved numeric format selection '{query_format}' -> {idx_fmt}") ytdl_format = idx_fmt if not ytdl_format: if self._maybe_show_playlist_table(url=candidate_url, ytdlp_tool=ytdlp_tool): playlist_selection_handled = True try: last_table = pipeline_context.get_last_result_table() if hasattr(pipeline_context, "get_last_result_table") else None if hasattr(last_table, "rows") and getattr(last_table, "rows", None): sample_index = 1 sample_fmt_id = None try: sample_row = last_table.rows[0] sample_fmt_id = sample_row._full_metadata.get("item_selector") if getattr(sample_row, "_full_metadata", None) else None except Exception: sample_fmt_id = None try: sample_pipeline = f'download-file "{candidate_url}"' hint = ( "To select non-interactively, re-run with an explicit format: " "e.g. mm \"{pipeline} -format {fmt} | add-file -store \" or " "mm \"{pipeline} -query 'format:{index}' | add-file -store \"" ).format( pipeline=sample_pipeline, fmt=sample_fmt_id or "", index=sample_index, ) log(hint, file=sys.stderr) except Exception: pass except Exception: pass return 0 skip_per_url_preflight = False if len(supported_url) > 1: if not self._preflight_url_duplicates_bulk( storage=storage, hydrus_available=hydrus_available, final_output_dir=final_output_dir, urls=list(supported_url), ): return 0 skip_per_url_preflight = True forced_single_format_id: Optional[str] = None forced_single_format_for_batch = False if len(supported_url) > 1 and not playlist_items and not ytdl_format: try: sample_url = str(supported_url[0]) fmts = self._list_formats_cached( sample_url, playlist_items_value=None, formats_cache=formats_cache, ytdlp_tool=ytdlp_tool, ) if isinstance(fmts, list) and len(fmts) == 1 and isinstance(fmts[0], dict): only_id = str(fmts[0].get("format_id") or "").strip() if only_id: forced_single_format_id = only_id forced_single_format_for_batch = True debug( f"Playlist format preflight: only one format available; using {forced_single_format_id} for all items" ) except Exception: forced_single_format_id = None forced_single_format_for_batch = False early_ret = self._maybe_show_format_table_for_single_url( mode=mode, clip_spec=clip_spec, clip_values=clip_values, playlist_items=playlist_items, ytdl_format=ytdl_format, supported_url=supported_url, playlist_selection_handled=playlist_selection_handled, ytdlp_tool=ytdlp_tool, formats_cache=formats_cache, storage=storage, hydrus_available=hydrus_available, final_output_dir=final_output_dir, args=args, ) if early_ret is not None: return int(early_ret) return self._download_supported_urls( supported_url=supported_url, ytdlp_tool=ytdlp_tool, args=args, config=config, final_output_dir=final_output_dir, mode=mode, clip_spec=clip_spec, clip_ranges=clip_ranges, query_hash_override=query_hash_override, embed_chapters=embed_chapters, write_sub=write_sub, quiet_mode=quiet_mode, playlist_items=playlist_items, ytdl_format=ytdl_format, skip_per_url_preflight=skip_per_url_preflight, forced_single_format_id=forced_single_format_id, forced_single_format_for_batch=forced_single_format_for_batch, formats_cache=formats_cache, storage=storage, hydrus_available=hydrus_available, ) except Exception as e: log(f"Error in streaming download handler: {e}", file=sys.stderr) return 1 def _resolve_streaming_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]: path_override = parsed.get("path") if path_override: try: candidate = Path(str(path_override)).expanduser() if candidate.suffix: candidate = candidate.parent candidate.mkdir(parents=True, exist_ok=True) debug(f"Using output directory override: {candidate}") return candidate except Exception as e: log(f"Invalid -path output directory: {e}", file=sys.stderr) return None try: temp_value = (config or {}).get("temp") if isinstance(config, dict) else None except Exception: temp_value = None if temp_value: try: candidate = Path(str(temp_value)).expanduser() candidate.mkdir(parents=True, exist_ok=True) debug(f"Using config temp directory: {candidate}") return candidate except Exception as e: log(f"Cannot use configured temp directory '{temp_value}': {e}", file=sys.stderr) return None try: import tempfile candidate = Path(tempfile.gettempdir()) / "Medios-Macina" candidate.mkdir(parents=True, exist_ok=True) debug(f"Using OS temp directory: {candidate}") return candidate except Exception as e: log(f"Cannot create OS temp directory: {e}", file=sys.stderr) return None def _parse_time_ranges(self, spec: str) -> List[tuple[int, int]]: def _to_seconds(ts: str) -> Optional[int]: ts = str(ts).strip() if not ts: return None try: unit_match = re.fullmatch(r"(?i)\s*(?:(?P\d+)h)?\s*(?:(?P\d+)m)?\s*(?:(?P\d+(?:\.\d+)?)s)?\s*", ts) except Exception: unit_match = None if unit_match and unit_match.group(0).strip() and any(unit_match.group(g) for g in ("h", "m", "s")): try: hours = int(unit_match.group("h") or 0) minutes = int(unit_match.group("m") or 0) seconds = float(unit_match.group("s") or 0) total = (hours * 3600) + (minutes * 60) + seconds return int(total) except Exception: return None if ":" in ts: parts = [p.strip() for p in ts.split(":")] if len(parts) == 2: hh_s = "0" mm_s, ss_s = parts elif len(parts) == 3: hh_s, mm_s, ss_s = parts else: return None try: hours = int(hh_s) minutes = int(mm_s) seconds = float(ss_s) total = (hours * 3600) + (minutes * 60) + seconds return int(total) except Exception: return None try: return int(float(ts)) except Exception: return None ranges: List[tuple[int, int]] = [] if not spec: return ranges for piece in str(spec).split(","): piece = piece.strip() if not piece: continue if "-" not in piece: return [] start_s, end_s = [p.strip() for p in piece.split("-", 1)] start = _to_seconds(start_s) end = _to_seconds(end_s) if start is None or end is None or start >= end: return [] ranges.append((start, end)) return ranges @staticmethod def _parse_keyed_csv_spec(spec: str, *, default_key: str) -> Dict[str, List[str]]: out: Dict[str, List[str]] = {} if not isinstance(spec, str): spec = str(spec) text = spec.strip() if not text: return out active = (default_key or "").strip().lower() or "clip" key_pattern = re.compile(r"^([A-Za-z_][A-Za-z0-9_-]*)\s*:\s*(.*)$") for raw_piece in text.split(","): piece = raw_piece.strip() if not piece: continue m = key_pattern.match(piece) if m: active = (m.group(1) or "").strip().lower() or active value = (m.group(2) or "").strip() if value: out.setdefault(active, []).append(value) continue out.setdefault(active, []).append(piece) return out def _build_clip_sections_spec(self, clip_ranges: Optional[List[tuple[int, int]]]) -> Optional[str]: ranges: List[str] = [] if clip_ranges: for start_s, end_s in clip_ranges: ranges.append(f"{start_s}-{end_s}") return ",".join(ranges) if ranges else None def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]: info: Dict[str, Any] = download_result.info if isinstance(download_result.info, dict) else {} media_path = Path(download_result.path) hash_value = download_result.hash_value or self._compute_file_hash(media_path) title = info.get("title") or media_path.stem tag = list(download_result.tag or []) if title and f"title:{title}" not in tag: tag.insert(0, f"title:{title}") final_url = None try: page_url = info.get("webpage_url") or info.get("original_url") or info.get("url") if page_url: final_url = str(page_url) except Exception: final_url = None if not final_url and url: final_url = str(url) return { "path": str(media_path), "hash": hash_value, "title": title, "url": final_url, "tag": tag, "action": "cmdlet:download-file", "is_temp": True, "ytdl_format": getattr(opts, "ytdl_format", None), "store": getattr(opts, "storage_name", None) or getattr(opts, "storage_location", None) or "PATH", "media_kind": "video" if opts.mode == "video" else "audio", } @staticmethod def _normalise_hash_hex(value: Optional[str]) -> Optional[str]: if not value or not isinstance(value, str): return None candidate = value.strip().lower() if len(candidate) == 64 and all(c in "0123456789abcdef" for c in candidate): return candidate return None @classmethod def _extract_hash_from_search_hit(cls, hit: Any) -> Optional[str]: if not isinstance(hit, dict): return None for key in ("hash", "hash_hex", "file_hash", "hydrus_hash"): v = hit.get(key) normalized = cls._normalise_hash_hex(str(v) if v is not None else None) if normalized: return normalized return None @classmethod def _find_existing_hash_for_url( cls, storage: Any, canonical_url: str, *, hydrus_available: bool ) -> Optional[str]: if storage is None or not canonical_url: return None try: from Store.HydrusNetwork import HydrusNetwork except Exception: HydrusNetwork = None # type: ignore try: backend_names = list(storage.list_searchable_backends() or []) except Exception: backend_names = [] for backend_name in backend_names: try: backend = storage[backend_name] except Exception: continue try: if str(backend_name).strip().lower() == "temp": continue except Exception: pass try: if HydrusNetwork is not None and isinstance(backend, HydrusNetwork) and not hydrus_available: continue except Exception: pass try: hits = backend.search(f"url:{canonical_url}", limit=5) or [] except Exception: hits = [] for hit in hits: extracted = cls._extract_hash_from_search_hit(hit) if extracted: return extracted return None @staticmethod def _format_timecode(seconds: int, *, force_hours: bool) -> str: total = max(0, int(seconds)) minutes, secs = divmod(total, 60) hours, minutes = divmod(minutes, 60) if force_hours: return f"{hours:02d}:{minutes:02d}:{secs:02d}" return f"{minutes:02d}:{secs:02d}" @classmethod def _format_clip_range(cls, start_s: int, end_s: int) -> str: force_hours = bool(start_s >= 3600 or end_s >= 3600) return f"{cls._format_timecode(start_s, force_hours=force_hours)}-{cls._format_timecode(end_s, force_hours=force_hours)}" @classmethod def _apply_clip_decorations( cls, pipe_objects: List[Dict[str, Any]], clip_ranges: List[tuple[int, int]], *, source_king_hash: Optional[str] ) -> None: if not pipe_objects or len(pipe_objects) != len(clip_ranges): return for po, (start_s, end_s) in zip(pipe_objects, clip_ranges): clip_range = cls._format_clip_range(start_s, end_s) clip_tag = f"clip:{clip_range}" po["title"] = clip_tag tags = po.get("tag") if not isinstance(tags, list): tags = [] tags = [t for t in tags if not str(t).strip().lower().startswith("title:")] tags = [t for t in tags if not str(t).strip().lower().startswith("relationship:")] tags.insert(0, f"title:{clip_tag}") if clip_tag not in tags: tags.append(clip_tag) po["tag"] = tags if len(pipe_objects) < 2: return hashes: List[str] = [] for po in pipe_objects: h_val = cls._normalise_hash_hex(str(po.get("hash") or "")) hashes.append(h_val or "") king_hash = cls._normalise_hash_hex(source_king_hash) if source_king_hash else None if not king_hash: king_hash = hashes[0] if hashes and hashes[0] else None if not king_hash: return alt_hashes: List[str] = [h for h in hashes if h and h != king_hash] if not alt_hashes: return for po in pipe_objects: po["relationships"] = {"king": [king_hash], "alt": list(alt_hashes)} def _run_impl( self, result: Any, args: Sequence[str], config: Dict[str, Any] ) -> int: """Main download implementation for direct HTTP files.""" progress = PipelineProgress(pipeline_context) prev_progress = None had_progress_key = False try: debug("Starting download-file") # Allow providers to tap into the active PipelineProgress (optional). try: if isinstance(config, dict): had_progress_key = "_pipeline_progress" in config prev_progress = config.get("_pipeline_progress") config["_pipeline_progress"] = progress except Exception: pass # Parse arguments parsed = parse_cmdlet_args(args, self) raw_url = self._normalize_urls(parsed) piped_items = self._collect_piped_items_if_no_urls(result, raw_url) had_piped_input = False try: if isinstance(result, list): had_piped_input = bool(result) else: had_piped_input = bool(result) except Exception: had_piped_input = False # UX: In piped mode, allow a single positional arg to be the destination directory. # Example: @1-4 | download-file "C:\\Users\\Me\\Downloads\\yoyo" if (had_piped_input and raw_url and len(raw_url) == 1 and (not parsed.get("path")) and (not parsed.get("output"))): candidate = str(raw_url[0] or "").strip() low = candidate.lower() looks_like_url = low.startswith(("http://", "https://", "ftp://")) looks_like_provider = low.startswith( ("magnet:", "alldebrid:", "hydrus:", "ia:", "internetarchive:") ) looks_like_windows_path = ( (len(candidate) >= 2 and candidate[1] == ":") or candidate.startswith("\\\\") or candidate.startswith("\\") or candidate.endswith(("\\", "/")) ) if (not looks_like_url) and ( not looks_like_provider) and looks_like_windows_path: parsed["path"] = candidate raw_url = [] piped_items = self._collect_piped_items_if_no_urls(result, raw_url) if not raw_url and not piped_items: log("No url or piped items to download", file=sys.stderr) return 1 streaming_candidates = self._append_urls_from_piped_result(list(raw_url), result) supported_streaming, unsupported_streaming = self._filter_supported_urls(streaming_candidates) streaming_exit_code: Optional[int] = None streaming_downloaded = 0 if supported_streaming: streaming_exit_code = self._run_streaming_urls( streaming_urls=supported_streaming, args=args, config=config, parsed=parsed, ) if streaming_exit_code == 0: streaming_downloaded += 1 raw_url = [u for u in raw_url if u not in supported_streaming] if not raw_url and not unsupported_streaming: piped_items = [] if not raw_url and not piped_items: return int(streaming_exit_code or 0) quiet_mode = ( bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False ) ia_picker_exit = ia_provider.maybe_show_formats_table( raw_urls=raw_url, piped_items=piped_items, parsed=parsed, config=config, quiet_mode=quiet_mode, get_field=get_field, ) if ia_picker_exit is not None: return int(ia_picker_exit) # Get output directory final_output_dir = self._resolve_output_dir(parsed, config) if not final_output_dir: return 1 debug(f"Output directory: {final_output_dir}") # If the caller isn't running the shared pipeline Live progress UI (e.g. direct # cmdlet execution), start a minimal local pipeline progress panel so downloads # show consistent, Rich-formatted progress (like download-media). total_items = self._safe_total_items(raw_url, piped_items) preview = self._build_preview(raw_url, piped_items, total_items) progress.ensure_local_ui( label="download-file", total_items=total_items, items_preview=preview ) registry = self._load_provider_registry() downloaded_count = 0 urls_downloaded, early_exit = self._process_explicit_urls( raw_urls=raw_url, final_output_dir=final_output_dir, config=config, quiet_mode=quiet_mode, registry=registry, progress=progress, ) downloaded_count += int(urls_downloaded) if early_exit is not None: return int(early_exit) downloaded_count += self._process_provider_items( piped_items=piped_items, final_output_dir=final_output_dir, config=config, quiet_mode=quiet_mode, registry=registry, progress=progress, ) if downloaded_count > 0 or streaming_downloaded > 0: debug(f"✓ Successfully processed {downloaded_count} file(s)") return 0 if streaming_exit_code is not None: return int(streaming_exit_code) log("No downloads completed", file=sys.stderr) return 1 except Exception as e: log(f"Error in download-file: {e}", file=sys.stderr) return 1 finally: try: if isinstance(config, dict): if had_progress_key: config["_pipeline_progress"] = prev_progress else: config.pop("_pipeline_progress", None) except Exception: pass progress.close_local_ui(force_complete=True) def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]: """Resolve the output directory from storage location or config.""" output_dir_arg = parsed.get("path") or parsed.get("output") if output_dir_arg: try: out_path = Path(str(output_dir_arg)).expanduser() out_path.mkdir(parents=True, exist_ok=True) return out_path except Exception as e: log( f"Cannot use output directory {output_dir_arg}: {e}", file=sys.stderr ) return None storage_location = parsed.get("storage") # Priority 1: --storage flag if storage_location: try: return SharedArgs.resolve_storage(storage_location) except Exception as e: log(f"Invalid storage location: {e}", file=sys.stderr) return None # Priority 2: Config default output/temp directory try: from SYS.config import resolve_output_dir final_output_dir = resolve_output_dir(config) except Exception: final_output_dir = Path.home() / "Downloads" debug(f"Using default directory: {final_output_dir}") # Ensure directory exists try: final_output_dir.mkdir(parents=True, exist_ok=True) except Exception as e: log( f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr ) return None return final_output_dir def _compute_file_hash(self, filepath: Path) -> str: """Compute SHA256 hash of a file.""" import hashlib sha256_hash = hashlib.sha256() with open(filepath, "rb") as f: for byte_block in iter(lambda: f.read(4096), b""): sha256_hash.update(byte_block) return sha256_hash.hexdigest() # Module-level singleton registration CMDLET = Download_File()