Files
Medios-Macina/cmdlet/download_file.py
2026-01-02 02:28:59 -08:00

3519 lines
142 KiB
Python

"""Generic file/stream downloader.
Supports:
- Direct HTTP file URLs (PDFs, images, documents; non-yt-dlp)
- Piped provider items (uses provider.download when available)
- Streaming sites via yt-dlp (YouTube, Bandcamp, etc.)
"""
from __future__ import annotations
import sys
import re
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence
from urllib.parse import urlparse
from contextlib import AbstractContextManager, nullcontext
from API.alldebrid import is_magnet_link
from Provider import internetarchive as ia_provider
from Provider import alldebrid as ad_provider
from Provider import openlibrary as ol_provider
from SYS.download import DownloadError, _download_direct_file
from SYS.models import DownloadOptions, DownloadMediaResult
from SYS.logger import log, debug
from SYS.pipeline_progress import PipelineProgress
from SYS.result_table import ResultTable
from SYS.rich_display import stderr_console as get_stderr_console
from SYS import pipeline as pipeline_context
from SYS.utils import sha256_file
from rich.prompt import Confirm
from tool.ytdlp import (
YtDlpTool,
_best_subtitle_sidecar,
_SUBTITLE_EXTS,
_download_with_timeout,
_format_chapters_note,
_read_text_file,
is_url_supported_by_ytdlp,
list_formats,
probe_url,
)
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
QueryArg = sh.QueryArg
parse_cmdlet_args = sh.parse_cmdlet_args
register_url_with_local_library = sh.register_url_with_local_library
coerce_to_pipe_object = sh.coerce_to_pipe_object
get_field = sh.get_field
class Download_File(Cmdlet):
"""Class-based download-file cmdlet - direct HTTP downloads."""
def __init__(self) -> None:
"""Initialize download-file cmdlet."""
super().__init__(
name="download-file",
summary="Download files or streaming media",
usage=
"download-file <url> [-path DIR] [options] OR @N | download-file [-path DIR|DIR] [options]",
alias=["dl-file",
"download-http"],
arg=[
SharedArgs.URL,
SharedArgs.PATH,
SharedArgs.QUERY,
# Prefer -path for output directory to match other cmdlets; keep -output for backwards compatibility.
CmdletArg(
name="-output",
type="string",
alias="o",
description="(deprecated) Output directory (use -path instead)",
),
CmdletArg(
name="audio",
type="flag",
alias="a",
description="Download audio only (yt-dlp)",
),
CmdletArg(
name="format",
type="string",
alias="fmt",
description="Explicit yt-dlp format selector",
),
QueryArg(
"clip",
key="clip",
aliases=["range",
"section",
"sections"],
type="string",
required=False,
description=(
"Clip time ranges via -query keyed fields (e.g. clip:1m-2m or clip:00:01-00:10). "
"Comma-separated values supported."
),
query_only=True,
),
CmdletArg(
name="item",
type="string",
description="Item selection for playlists/formats",
),
],
detail=[
"Download files directly via HTTP or streaming media via yt-dlp.",
"For Internet Archive item pages (archive.org/details/...), shows a selectable file/format list; pick with @N to download.",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Main execution method."""
return self._run_impl(result, args, config)
@staticmethod
def _normalize_urls(parsed: Dict[str, Any]) -> List[str]:
raw_url = parsed.get("url", [])
if isinstance(raw_url, str):
raw_url = [raw_url]
expanded_urls: List[str] = []
for u in raw_url or []:
if u is None:
continue
s = str(u).strip()
if not s:
continue
if "," in s:
parts = [p.strip() for p in s.split(",")]
expanded_urls.extend([p for p in parts if p])
else:
expanded_urls.append(s)
return expanded_urls
@staticmethod
def _collect_piped_items_if_no_urls(result: Any,
raw_urls: Sequence[str]) -> List[Any]:
if raw_urls:
return []
if isinstance(result, list):
return list(result)
if result:
return [result]
return []
@staticmethod
def _safe_total_items(raw_urls: Sequence[str], piped_items: Sequence[Any]) -> int:
try:
return int(len(raw_urls or []) + len(piped_items or []))
except Exception:
return 1
@staticmethod
def _build_preview(
raw_urls: Sequence[str],
piped_items: Sequence[Any],
total_items: int
) -> List[Any]:
try:
preview: List[Any] = []
preview.extend(list(raw_urls or [])[:max(0, total_items)])
if len(preview) < total_items:
preview.extend(
list(piped_items or [])[:max(0,
total_items - len(preview))]
)
return preview
except Exception:
return []
@staticmethod
def _load_provider_registry() -> Dict[str, Any]:
try:
from ProviderCore.registry import (
get_search_provider as _get_search_provider,
get_provider as _get_provider,
match_provider_name_for_url as _match_provider_name_for_url,
SearchResult as _SearchResult,
)
return {
"get_search_provider": _get_search_provider,
"get_provider": _get_provider,
"match_provider_name_for_url": _match_provider_name_for_url,
"SearchResult": _SearchResult,
}
except Exception:
return {
"get_search_provider": None,
"get_provider": None,
"match_provider_name_for_url": None,
"SearchResult": None,
}
@staticmethod
def _path_from_download_result(result_obj: Any) -> Path:
file_path = None
if hasattr(result_obj, "path"):
file_path = getattr(result_obj, "path")
elif isinstance(result_obj, dict):
file_path = result_obj.get("path")
if not file_path:
file_path = str(result_obj)
return Path(str(file_path))
def _emit_local_file(
self,
*,
downloaded_path: Path,
source: Optional[str],
title_hint: Optional[str],
tags_hint: Optional[List[str]],
media_kind_hint: Optional[str],
full_metadata: Optional[Dict[str,
Any]],
progress: PipelineProgress,
config: Dict[str,
Any],
provider_hint: Optional[str] = None,
) -> None:
title_val = (title_hint or downloaded_path.stem
or "Unknown").strip() or downloaded_path.stem
hash_value = self._compute_file_hash(downloaded_path)
tag: List[str] = []
if tags_hint:
tag.extend([str(t) for t in tags_hint if t])
if not any(str(t).lower().startswith("title:") for t in tag):
tag.insert(0, f"title:{title_val}")
payload: Dict[str,
Any] = {
"path": str(downloaded_path),
"hash": hash_value,
"title": title_val,
"action": "cmdlet:download-file",
"download_mode": "file",
"store": "local",
"media_kind": media_kind_hint or "file",
"tag": tag,
}
if provider_hint:
payload["provider"] = str(provider_hint)
if full_metadata:
payload["full_metadata"] = full_metadata
if source and str(source).startswith("http"):
payload["url"] = source
elif source:
payload["source_url"] = source
pipeline_context.emit(payload)
# When running with a local progress UI (standalone cmdlet), ensure
# the pipe advances on emit.
progress.on_emit(payload)
# Automatically register url with local library
if payload.get("url"):
pipe_obj = coerce_to_pipe_object(payload)
register_url_with_local_library(pipe_obj, config)
def _process_explicit_urls(
self,
*,
raw_urls: Sequence[str],
final_output_dir: Path,
config: Dict[str,
Any],
quiet_mode: bool,
registry: Dict[str,
Any],
progress: PipelineProgress,
) -> tuple[int,
Optional[int]]:
downloaded_count = 0
SearchResult = registry.get("SearchResult")
get_provider = registry.get("get_provider")
match_provider_name_for_url = registry.get("match_provider_name_for_url")
for url in raw_urls:
try:
debug(f"Processing URL: {url}")
# Telegram message URLs are not direct files; route through the provider.
try:
parsed_url = urlparse(str(url))
host = (parsed_url.hostname or "").lower().strip()
except Exception:
host = ""
is_telegram = host in {"t.me",
"telegram.me"} or host.endswith(".t.me")
if is_telegram and SearchResult:
try:
from ProviderCore.registry import get_provider as _get_provider
except Exception:
_get_provider = None
if _get_provider is None:
raise DownloadError("Telegram provider registry not available")
provider = _get_provider("telegram", config)
if provider is None:
raise DownloadError(
"Telegram provider not configured or not available (check telethon/app_id/api_hash)"
)
sr = SearchResult(
table="telegram",
title=str(url),
path=str(url),
full_metadata={}
)
downloaded_path = None
telegram_info: Optional[Dict[str, Any]] = None
if hasattr(provider, "download_url"):
try:
downloaded_path, telegram_info = provider.download_url(str(url), final_output_dir) # type: ignore[attr-defined]
except Exception as exc:
raise DownloadError(str(exc))
else:
downloaded_path = provider.download(sr, final_output_dir)
if not downloaded_path:
raise DownloadError("Telegram download returned no file")
channel = ""
post = None
if isinstance(telegram_info, dict):
try:
chat_info_raw = telegram_info.get("chat")
msg_info_raw = telegram_info.get("message")
chat_info: Dict[str,
Any] = (
chat_info_raw
if isinstance(chat_info_raw,
dict) else {}
)
msg_info: Dict[str,
Any] = (
msg_info_raw
if isinstance(msg_info_raw,
dict) else {}
)
channel = str(
chat_info.get("title") or chat_info.get("username")
or ""
).strip()
post = msg_info.get("id")
except Exception:
channel = ""
post = None
title_hint = None
tg_tags: List[str] = []
if channel:
tg_tags.append(f"channel:{channel}")
if post is not None:
tg_tags.append(f"post:{post}")
if channel and post is not None:
title_hint = f"{channel} {post}"
elif post is not None:
title_hint = f"post:{post}"
else:
title_hint = downloaded_path.stem
self._emit_local_file(
downloaded_path=downloaded_path,
source=str(url),
title_hint=title_hint,
tags_hint=tg_tags,
media_kind_hint="file",
full_metadata=telegram_info,
provider_hint="telegram",
progress=progress,
config=config,
)
downloaded_count += 1
debug("✓ Downloaded via Telegram provider and emitted")
continue
# Provider URL routing (e.g. OpenLibrary book pages).
provider_name = None
if match_provider_name_for_url is not None:
try:
provider_name = match_provider_name_for_url(str(url))
except Exception:
provider_name = None
# Heuristic: LibGen often uses landing pages like edition.php/file.php.
# These should never be treated as direct file URLs.
if not provider_name:
try:
p = urlparse(str(url))
h = (p.hostname or "").strip().lower()
path = (p.path or "").strip().lower()
if "libgen" in h and any(x in path for x in (
"/edition.php",
"/file.php",
"/ads.php",
"/get.php",
"/series.php", )):
provider_name = "libgen"
except Exception:
pass
if (provider_name
and str(provider_name).lower() == "alldebrid"
and is_magnet_link(str(url))):
magnet_spec = ad_provider.resolve_magnet_spec(str(url))
if magnet_spec:
_, magnet_id = ad_provider.prepare_magnet(magnet_spec, config)
if magnet_id is not None:
downloaded_count += 1
continue
if provider_name and get_provider is not None and SearchResult is not None:
# OpenLibrary URLs should be handled by the OpenLibrary provider.
if provider_name == "openlibrary":
provider = get_provider("openlibrary", config)
if provider is None:
raise DownloadError(
"OpenLibrary provider not configured or not available"
)
edition_id = ol_provider.edition_id_from_url(str(url))
title_hint = ol_provider.title_hint_from_url_slug(str(url))
download_payload: Optional[Dict[str, Any]] = None
try:
ui, _pipe_idx = progress.ui_and_pipe_index()
progress_cb = None
if ui is not None:
# High-level steps for OpenLibrary borrow/download flow.
progress.begin_steps(5)
def _progress(
kind: str,
done: int,
total: Optional[int],
label: str
) -> None:
# kind:
# - "step": advance step text
# - "pages": update pipe percent/status
# - "bytes": update transfer bar
if kind == "step":
progress.step(label)
return
if kind == "pages":
t = int(total) if isinstance(total, int) else 0
d = int(done) if isinstance(done, int) else 0
if t > 0:
pct = int(
round(
(max(0,
min(d,
t)) / max(1,
t)) * 100.0
)
)
progress.set_percent(pct)
progress.set_status(
f"downloading pages {d}/{t}"
)
else:
progress.set_status(
f"downloading pages {d}"
)
return
if kind == "bytes":
try:
lbl = str(label or "download")
except Exception:
lbl = "download"
progress.begin_transfer(label=lbl, total=total)
progress.update_transfer(
label=lbl,
completed=done,
total=total
)
try:
if (isinstance(total,
int) and total > 0
and int(done) >= int(total)):
progress.finish_transfer(label=lbl)
except Exception:
pass
return
progress_cb = _progress
if hasattr(provider, "download_url"):
download_payload = provider.download_url( # type: ignore[attr-defined]
str(url),
final_output_dir,
progress_cb,
)
if download_payload is None:
sr = None
if hasattr(provider, "search_result_from_url"):
sr = provider.search_result_from_url(str(url)) # type: ignore[attr-defined]
if sr is None:
sr = SearchResult(
table="openlibrary",
title=title_hint,
path=str(url),
media_kind="book",
full_metadata={
"openlibrary_id": edition_id,
},
)
downloaded_path = provider.download(
sr,
final_output_dir,
progress_callback=progress_cb
) # type: ignore[call-arg]
if downloaded_path:
download_payload = {
"path": Path(downloaded_path),
"search_result": sr,
}
except Exception as exc:
raise DownloadError(str(exc))
# Clear long-running status line after the download attempt.
progress.clear_status()
if download_payload and download_payload.get("path"):
downloaded_path = Path(download_payload["path"])
sr_obj = download_payload.get("search_result")
tags_hint: Optional[List[str]] = None
full_md: Optional[Dict[str, Any]] = None
resolved_title = title_hint
if sr_obj is not None:
try:
resolved_title = getattr(sr_obj, "title", None) or resolved_title
except Exception:
pass
try:
sr_tags = getattr(sr_obj, "tag", None)
if isinstance(sr_tags, set) and sr_tags:
tags_hint = sorted([str(t) for t in sr_tags if t])
except Exception:
tags_hint = None
try:
full_md = getattr(sr_obj, "full_metadata", None)
except Exception:
full_md = None
self._emit_local_file(
downloaded_path=downloaded_path,
source=str(url),
title_hint=resolved_title,
tags_hint=tags_hint,
media_kind_hint="book",
full_metadata=full_md,
provider_hint="openlibrary",
progress=progress,
config=config,
)
downloaded_count += 1
continue
# If OpenLibrary can't provide it (not lendable, no creds, etc), auto-search LibGen.
try:
fallback_query = str(title_hint or "").strip()
if fallback_query:
log(
f"[download-file] Not available on OpenLibrary; searching LibGen for: {fallback_query}",
file=sys.stderr,
)
from cmdlet.search_provider import CMDLET as _SEARCH_PROVIDER_CMDLET
exec_fn = getattr(_SEARCH_PROVIDER_CMDLET, "exec", None)
if callable(exec_fn):
ret = exec_fn(
None,
[
"-provider",
"libgen",
"-query",
fallback_query
],
config,
)
try:
table = pipeline_context.get_last_result_table()
items = pipeline_context.get_last_result_items()
if table is not None:
pipeline_context.set_last_result_table_overlay(
table,
items
)
except Exception:
pass
try:
return downloaded_count, int(ret) # type: ignore[arg-type]
except Exception:
return downloaded_count, 1
except Exception:
pass
log(
"[download-file] OpenLibrary URL could not be downloaded",
file=sys.stderr,
)
continue
# Generic provider URL handler (if a provider implements `download_url`).
provider = get_provider(provider_name, config)
if provider is not None and hasattr(provider, "download_url"):
try:
downloaded_path = provider.download_url(
str(url),
final_output_dir
) # type: ignore[attr-defined]
except Exception as exc:
raise DownloadError(str(exc))
if downloaded_path:
self._emit_local_file(
downloaded_path=Path(downloaded_path),
source=str(url),
title_hint=Path(str(downloaded_path)).stem,
tags_hint=None,
media_kind_hint="file",
full_metadata=None,
provider_hint=str(provider_name),
progress=progress,
config=config,
)
downloaded_count += 1
continue
# Otherwise, try provider.download(SearchResult) with the URL as the target.
if provider is not None:
sr_obj = None
try:
sr_obj = SearchResult(
table=str(provider_name),
title=str(url),
path=str(url),
full_metadata={},
)
downloaded_path = provider.download(
sr_obj,
final_output_dir
) # type: ignore[call-arg]
except Exception:
downloaded_path = None
# Refuse to fall back to direct-download for LibGen landing pages.
# This prevents saving HTML (e.g. edition.php) as a bogus file.
if (not downloaded_path
) and str(provider_name).lower() == "libgen":
raise DownloadError(
"LibGen URL did not resolve to a downloadable file"
)
if downloaded_path:
emit_tags: Optional[List[str]] = None
full_md: Optional[Dict[str, Any]] = None
title_hint = Path(str(downloaded_path)).stem
media_kind_hint = "file"
if str(provider_name
).lower() == "libgen" and sr_obj is not None:
media_kind_hint = "book"
try:
sr_tags = getattr(sr_obj, "tag", None)
if isinstance(sr_tags, set) and sr_tags:
emit_tags = sorted(
[str(t) for t in sr_tags if t]
)
except Exception:
emit_tags = None
try:
sr_full_md = getattr(sr_obj, "full_metadata", None)
if isinstance(sr_full_md, dict):
full_md = sr_full_md
t = str(sr_full_md.get("title") or "").strip()
if t:
title_hint = t
except Exception:
full_md = None
self._emit_local_file(
downloaded_path=Path(downloaded_path),
source=str(url),
title_hint=title_hint,
tags_hint=emit_tags,
media_kind_hint=media_kind_hint,
full_metadata=full_md,
provider_hint=str(provider_name),
progress=progress,
config=config,
)
downloaded_count += 1
continue
result_obj = _download_direct_file(
str(url),
final_output_dir,
quiet=quiet_mode,
pipeline_progress=progress,
)
downloaded_path = self._path_from_download_result(result_obj)
self._emit_local_file(
downloaded_path=downloaded_path,
source=str(url),
title_hint=downloaded_path.stem,
tags_hint=[f"title:{downloaded_path.stem}"],
media_kind_hint="file",
full_metadata=None,
progress=progress,
config=config,
)
downloaded_count += 1
debug("✓ Downloaded and emitted")
except DownloadError as e:
log(f"Download failed for {url}: {e}", file=sys.stderr)
except Exception as e:
log(f"Error processing {url}: {e}", file=sys.stderr)
return downloaded_count, None
def _expand_provider_items(
self,
*,
piped_items: Sequence[Any],
registry: Dict[str,
Any],
config: Dict[str,
Any],
) -> List[Any]:
get_search_provider = registry.get("get_search_provider")
expanded_items: List[Any] = []
for item in piped_items:
try:
table = get_field(item, "table")
media_kind = get_field(item, "media_kind")
full_metadata = get_field(item, "full_metadata")
target = get_field(item, "path") or get_field(item, "url")
if (str(table or "").lower() == "alldebrid"
and str(media_kind or "").lower() == "folder"):
magnet_id = None
if isinstance(full_metadata, dict):
magnet_id = full_metadata.get("magnet_id")
if (magnet_id is None and isinstance(target,
str)
and target.lower().startswith("alldebrid:magnet:")):
try:
magnet_id = int(target.split(":")[-1])
except Exception:
magnet_id = None
expanded, detail = ad_provider.expand_folder_item(
item,
get_search_provider,
config,
)
if detail:
log(
f"[download-file] AllDebrid magnet {magnet_id or 'unknown'} not ready ({detail or 'unknown'})",
file=sys.stderr,
)
continue
if expanded:
expanded_items.extend(expanded)
continue
expanded_items.append(item)
except Exception:
expanded_items.append(item)
return expanded_items
def _process_provider_items(
self,
*,
piped_items: Sequence[Any],
final_output_dir: Path,
config: Dict[str,
Any],
quiet_mode: bool,
registry: Dict[str,
Any],
progress: PipelineProgress,
) -> int:
downloaded_count = 0
get_search_provider = registry.get("get_search_provider")
SearchResult = registry.get("SearchResult")
expanded_items = self._expand_provider_items(
piped_items=piped_items,
registry=registry,
config=config
)
for item in expanded_items:
try:
table = get_field(item, "table")
title = get_field(item, "title")
target = get_field(item, "path") or get_field(item, "url")
media_kind = get_field(item, "media_kind")
tags_val = get_field(item, "tag")
tags_list: Optional[List[str]]
if isinstance(tags_val, list):
tags_list = [str(t) for t in tags_val if t]
else:
tags_list = None
full_metadata = get_field(item, "full_metadata")
if ((not full_metadata) and isinstance(item,
dict)
and isinstance(item.get("extra"),
dict)):
extra_md = item["extra"].get("full_metadata")
if isinstance(extra_md, dict):
full_metadata = extra_md
# If this looks like a provider item and providers are available, prefer provider.download()
downloaded_path: Optional[Path] = None
attempted_provider_download = False
provider_sr = None
if table and get_search_provider and SearchResult:
provider = get_search_provider(str(table), config)
if provider is not None:
attempted_provider_download = True
sr = SearchResult(
table=str(table),
title=str(title or "Unknown"),
path=str(target or ""),
full_metadata=full_metadata
if isinstance(full_metadata,
dict) else {},
)
debug(
f"[download-file] Downloading provider item via {table}: {sr.title}"
)
# Preserve provider structure when possible (AllDebrid folders -> subfolders).
output_dir = final_output_dir
try:
if str(table).strip().lower() == "alldebrid":
output_dir = ad_provider.adjust_output_dir_for_alldebrid(
final_output_dir,
full_metadata if isinstance(full_metadata, dict) else None,
item,
)
except Exception:
output_dir = final_output_dir
downloaded_path = provider.download(sr, output_dir)
provider_sr = sr
# OpenLibrary: if provider download failed, do NOT try to download the OpenLibrary page HTML.
if (downloaded_path is None and attempted_provider_download
and str(table or "").lower() == "openlibrary"):
availability = None
reason = None
if isinstance(full_metadata, dict):
availability = full_metadata.get("availability")
reason = full_metadata.get("availability_reason")
msg = "[download-file] OpenLibrary item not downloadable"
if availability or reason:
msg += f" (availability={availability or ''} reason={reason or ''})"
log(msg, file=sys.stderr)
# Fallback: run a LibGen title search so the user can pick an alternative source.
try:
title_text = str(title or "").strip()
if not title_text and isinstance(full_metadata, dict):
title_text = str(full_metadata.get("title") or "").strip()
if title_text:
log(
f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}",
file=sys.stderr,
)
from cmdlet.search_file import CMDLET as _SEARCH_FILE_CMDLET
fallback_query = title_text
exec_fn = getattr(_SEARCH_FILE_CMDLET, "exec", None)
if not callable(exec_fn):
log(
"[download-file] search-file cmdlet unavailable; cannot run LibGen fallback search",
file=sys.stderr,
)
continue
ret = exec_fn(
None,
["-provider",
"libgen",
"-query",
fallback_query],
config,
)
# Promote the search-file table to a display overlay so it renders.
try:
table_obj = pipeline_context.get_last_result_table()
items_obj = pipeline_context.get_last_result_items()
if table_obj is not None:
pipeline_context.set_last_result_table_overlay(
table_obj,
items_obj
)
except Exception:
pass
try:
return int(ret) # type: ignore[arg-type]
except Exception:
return 1
except Exception:
pass
continue
# Fallback: if we have a direct HTTP URL, download it directly
if (downloaded_path is None and isinstance(target,
str)
and target.startswith("http")):
# Guard: provider landing pages (e.g. LibGen ads.php) are HTML, not files.
# Never download these as "files".
if str(table or "").lower() == "libgen":
low = target.lower()
if ("/ads.php" in low) or ("/file.php" in low) or ("/index.php"
in low):
log(
"[download-file] Refusing to download LibGen landing page (expected provider to resolve file link)",
file=sys.stderr,
)
continue
debug(
f"[download-file] Provider item looks like direct URL, downloading: {target}"
)
suggested_name = str(title).strip() if title is not None else None
result_obj = _download_direct_file(
target,
final_output_dir,
quiet=quiet_mode,
suggested_filename=suggested_name,
pipeline_progress=progress,
)
downloaded_path = self._path_from_download_result(result_obj)
if downloaded_path is None:
log(
f"Cannot download item (no provider handler / unsupported target): {title or target}",
file=sys.stderr,
)
continue
# Allow providers to add/enrich tags and metadata during download.
if str(table or "").lower() == "libgen" and provider_sr is not None:
try:
sr_tags = getattr(provider_sr, "tag", None)
if tags_list is None and isinstance(sr_tags, set) and sr_tags:
tags_list = sorted([str(t) for t in sr_tags if t])
except Exception:
pass
try:
sr_md = getattr(provider_sr, "full_metadata", None)
if isinstance(sr_md, dict) and sr_md:
full_metadata = sr_md
except Exception:
pass
try:
if isinstance(full_metadata, dict):
t = str(full_metadata.get("title") or "").strip()
if t:
title = t
except Exception:
pass
self._emit_local_file(
downloaded_path=downloaded_path,
source=str(target) if target else None,
title_hint=str(title) if title else downloaded_path.stem,
tags_hint=tags_list,
media_kind_hint=str(media_kind) if media_kind else None,
full_metadata=full_metadata if isinstance(full_metadata,
dict) else None,
progress=progress,
config=config,
)
downloaded_count += 1
except DownloadError as e:
log(f"Download failed: {e}", file=sys.stderr)
except Exception as e:
log(f"Error downloading item: {e}", file=sys.stderr)
return downloaded_count
# === Streaming helpers (yt-dlp) ===
@staticmethod
def _append_urls_from_piped_result(raw_urls: List[str], result: Any) -> List[str]:
if raw_urls:
return raw_urls
if not result:
return raw_urls
results_to_check = result if isinstance(result, list) else [result]
for item in results_to_check:
try:
url = get_field(item, "url") or get_field(item, "target")
except Exception:
url = None
if url:
raw_urls.append(url)
return raw_urls
@staticmethod
def _filter_supported_urls(raw_urls: Sequence[str]) -> tuple[List[str], List[str]]:
supported = [url for url in (raw_urls or []) if is_url_supported_by_ytdlp(url)]
unsupported = list(set(raw_urls or []) - set(supported or []))
return supported, unsupported
def _parse_query_keyed_spec(self, query_spec: Optional[str]) -> Dict[str, List[str]]:
if not query_spec:
return {}
try:
keyed = self._parse_keyed_csv_spec(str(query_spec), default_key="hash")
if not keyed:
return {}
def _alias(src: str, dest: str) -> None:
try:
values = keyed.get(src)
except Exception:
values = None
if not values:
return
try:
keyed.setdefault(dest, []).extend(list(values))
except Exception:
pass
try:
keyed.pop(src, None)
except Exception:
pass
for src in ("range", "ranges", "section", "sections"):
_alias(src, "clip")
for src in ("fmt", "f"):
_alias(src, "format")
for src in ("aud", "a"):
_alias(src, "audio")
return keyed
except Exception:
return {}
@staticmethod
def _extract_hash_override(query_spec: Optional[str], query_keyed: Dict[str, List[str]]) -> Optional[str]:
try:
hash_values = query_keyed.get("hash", []) if isinstance(query_keyed, dict) else []
hash_candidate = hash_values[-1] if hash_values else None
if hash_candidate:
return sh.parse_single_hash_query(f"hash:{hash_candidate}")
try:
has_non_hash_keys = bool(
query_keyed
and isinstance(query_keyed, dict)
and any(k for k in query_keyed.keys() if str(k).strip().lower() != "hash")
)
except Exception:
has_non_hash_keys = False
if has_non_hash_keys:
return None
return sh.parse_single_hash_query(str(query_spec)) if query_spec else None
except Exception:
return None
def _parse_clip_ranges_and_apply_items(
self,
*,
clip_spec: Optional[str],
query_keyed: Dict[str, List[str]],
parsed: Dict[str, Any],
query_spec: Optional[str],
) -> tuple[Optional[List[tuple[int, int]]], bool, List[str]]:
clip_ranges: Optional[List[tuple[int, int]]] = None
clip_values: List[str] = []
item_values: List[str] = []
def _uniq(values: Sequence[str]) -> List[str]:
seen: set[str] = set()
out: List[str] = []
for v in values:
key = str(v)
if key in seen:
continue
seen.add(key)
out.append(v)
return out
if clip_spec:
keyed = self._parse_keyed_csv_spec(str(clip_spec), default_key="clip")
clip_values.extend(keyed.get("clip", []) or [])
item_values.extend(keyed.get("item", []) or [])
if query_keyed:
clip_values.extend(query_keyed.get("clip", []) or [])
item_values.extend(query_keyed.get("item", []) or [])
clip_values = _uniq(clip_values)
item_values = _uniq(item_values)
if item_values and not parsed.get("item"):
parsed["item"] = ",".join([v for v in item_values if v])
if clip_values:
clip_ranges = self._parse_time_ranges(",".join([v for v in clip_values if v]))
if not clip_ranges:
bad_spec = clip_spec or query_spec
log(f"Invalid clip format: {bad_spec}", file=sys.stderr)
return None, True, clip_values
return clip_ranges, False, clip_values
@staticmethod
def _init_storage(config: Dict[str, Any]) -> tuple[Optional[Any], bool]:
storage = None
hydrus_available = True
try:
from Store import Store
from API.HydrusNetwork import is_hydrus_available
storage = Store(config=config or {}, suppress_debug=True)
hydrus_available = bool(is_hydrus_available(config or {}))
except Exception:
storage = None
return storage, hydrus_available
@staticmethod
def _cookiefile_str(ytdlp_tool: YtDlpTool) -> Optional[str]:
try:
cookie_path = ytdlp_tool.resolve_cookiefile()
if cookie_path is not None and cookie_path.is_file():
return str(cookie_path)
except Exception:
pass
return None
def _list_formats_cached(
self,
u: str,
*,
playlist_items_value: Optional[str],
formats_cache: Dict[str, Optional[List[Dict[str, Any]]]],
ytdlp_tool: YtDlpTool,
) -> Optional[List[Dict[str, Any]]]:
key = f"{u}||{playlist_items_value or ''}"
if key in formats_cache:
return formats_cache[key]
fmts = list_formats(
u,
no_playlist=False,
playlist_items=playlist_items_value,
cookiefile=self._cookiefile_str(ytdlp_tool),
)
formats_cache[key] = fmts
return fmts
def _is_browseable_format(self, fmt: Any) -> bool:
if not isinstance(fmt, dict):
return False
format_id = str(fmt.get("format_id") or "").strip()
if not format_id:
return False
ext = str(fmt.get("ext") or "").strip().lower()
if ext in {"mhtml", "json"}:
return False
note = str(fmt.get("format_note") or "").lower()
if "storyboard" in note:
return False
if format_id.lower().startswith("sb"):
return False
vcodec = str(fmt.get("vcodec", "none"))
acodec = str(fmt.get("acodec", "none"))
return not (vcodec == "none" and acodec == "none")
def _format_id_for_query_index(
self,
query_format: str,
url: str,
formats_cache: Dict[str, Optional[List[Dict[str, Any]]]],
ytdlp_tool: YtDlpTool,
) -> Optional[str]:
import re
if not query_format or not re.match(r"^\s*#?\d+\s*$", str(query_format)):
return None
try:
idx = int(str(query_format).lstrip("#").strip())
except Exception:
raise ValueError(f"Invalid format index: {query_format}")
fmts = self._list_formats_cached(
url,
playlist_items_value=None,
formats_cache=formats_cache,
ytdlp_tool=ytdlp_tool,
)
if not fmts:
raise ValueError("Unable to list formats for the URL; cannot resolve numeric format index")
candidate_formats = [f for f in fmts if self._is_browseable_format(f)]
filtered_formats = candidate_formats if candidate_formats else list(fmts)
if not filtered_formats:
raise ValueError("No formats available for selection")
if idx <= 0 or idx > len(filtered_formats):
raise ValueError(f"Format index {idx} out of range (1..{len(filtered_formats)})")
chosen = filtered_formats[idx - 1]
selection_format_id = str(chosen.get("format_id") or "").strip()
if not selection_format_id:
raise ValueError("Selected format has no format_id")
try:
vcodec = str(chosen.get("vcodec", "none"))
acodec = str(chosen.get("acodec", "none"))
if vcodec != "none" and acodec == "none":
selection_format_id = f"{selection_format_id}+ba"
except Exception:
pass
return selection_format_id
@staticmethod
def _format_selector_for_query_height(query_format: str) -> Optional[str]:
import re
if query_format is None:
return None
s = str(query_format).strip().lower()
m = re.match(r"^(\d{2,5})p$", s)
if not m:
return None
try:
height = int(m.group(1))
except Exception:
return None
if height <= 0:
raise ValueError(f"Invalid height selection: {query_format}")
return f"bv*[height<={height}]+ba"
@staticmethod
def _canonicalize_url_for_storage(*, requested_url: str, ytdlp_tool: YtDlpTool, playlist_items: Optional[str]) -> str:
if playlist_items:
return str(requested_url)
try:
cf = None
try:
cookie_path = ytdlp_tool.resolve_cookiefile()
if cookie_path is not None and cookie_path.is_file():
cf = str(cookie_path)
except Exception:
cf = None
pr = probe_url(requested_url, no_playlist=False, timeout_seconds=15, cookiefile=cf)
if isinstance(pr, dict):
for key in ("webpage_url", "original_url", "url", "requested_url"):
value = pr.get(key)
if isinstance(value, str) and value.strip():
return value.strip()
except Exception:
pass
return str(requested_url)
def _preflight_url_duplicate(
self,
*,
storage: Any,
hydrus_available: bool,
final_output_dir: Path,
candidate_url: str,
extra_urls: Optional[Sequence[str]] = None,
) -> bool:
if storage is None:
debug("Preflight URL check skipped: storage unavailable")
return True
debug(f"Preflight URL check: candidate={candidate_url}")
try:
from SYS.metadata import normalize_urls
except Exception:
normalize_urls = None # type: ignore[assignment]
needles: List[str] = []
if normalize_urls is not None:
for raw in [candidate_url, *(list(extra_urls) if extra_urls else [])]:
try:
needles.extend(normalize_urls(raw))
except Exception:
continue
if not needles:
needles = [str(candidate_url)]
seen_needles: List[str] = []
for needle in needles:
if needle and needle not in seen_needles:
seen_needles.append(needle)
needles = seen_needles
try:
debug(f"Preflight URL needles: {needles}")
except Exception:
pass
url_matches: List[Dict[str, Any]] = []
try:
from Store.HydrusNetwork import HydrusNetwork
backend_names_all = storage.list_searchable_backends()
backend_names: List[str] = []
skipped: List[str] = []
for backend_name in backend_names_all:
try:
backend = storage[backend_name]
except Exception:
continue
try:
if str(backend_name).strip().lower() == "temp":
skipped.append(backend_name)
continue
except Exception:
pass
try:
backend_location = getattr(backend, "_location", None)
if backend_location and final_output_dir:
backend_path = Path(str(backend_location)).expanduser().resolve()
temp_path = Path(str(final_output_dir)).expanduser().resolve()
if backend_path == temp_path:
skipped.append(backend_name)
continue
except Exception:
pass
backend_names.append(backend_name)
try:
if skipped:
debug(f"Preflight backends: {backend_names} (skipped temp: {skipped})")
else:
debug(f"Preflight backends: {backend_names}")
except Exception:
pass
for backend_name in backend_names:
backend = storage[backend_name]
if isinstance(backend, HydrusNetwork) and not hydrus_available:
continue
backend_hits: List[Dict[str, Any]] = []
for needle in needles:
try:
backend_hits = backend.search(f"url:{needle}", limit=25) or []
if backend_hits:
break
except Exception:
continue
if backend_hits:
url_matches.extend(
[
dict(x) if isinstance(x, dict) else {"title": str(x)}
for x in backend_hits
]
)
if len(url_matches) >= 25:
url_matches = url_matches[:25]
break
except Exception:
url_matches = []
if not url_matches:
debug("Preflight URL check: no matches")
return True
try:
current_cmd_text = pipeline_context.get_current_command_text("")
except Exception:
current_cmd_text = ""
try:
stage_ctx = pipeline_context.get_stage_context()
except Exception:
stage_ctx = None
in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or "")))
if in_pipeline:
try:
cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="")
cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None)
except Exception:
cached_cmd = ""
cached_decision = None
if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""):
if bool(cached_decision):
return True
try:
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
except Exception:
pass
return False
table = ResultTable(f"URL already exists ({len(url_matches)} match(es))")
results_list: List[Dict[str, Any]] = []
for item in url_matches:
if "title" not in item:
item["title"] = item.get("name") or item.get("target") or item.get("path") or "Result"
try:
from SYS.result_table import build_display_row
except Exception:
build_display_row = None # type: ignore
if callable(build_display_row):
display_row = build_display_row(item, keys=["title", "store", "hash", "ext", "size"])
else:
display_row = {
"title": item.get("title"),
"store": item.get("store"),
"hash": item.get("hash") or item.get("file_hash") or item.get("sha256"),
"ext": str(item.get("ext") or ""),
"size": item.get("size") or item.get("size_bytes"),
}
table.add_result(display_row)
results_list.append(item)
pipeline_context.set_current_stage_table(table)
pipeline_context.set_last_result_table(table, results_list)
suspend = getattr(pipeline_context, "suspend_live_progress", None)
used_suspend = False
cm: AbstractContextManager[Any] = nullcontext()
if callable(suspend):
try:
maybe_cm = suspend()
if maybe_cm is not None:
cm = maybe_cm # type: ignore[assignment]
used_suspend = True
except Exception:
cm = nullcontext()
used_suspend = False
with cm:
get_stderr_console().print(table)
setattr(table, "_rendered_by_cmdlet", True)
answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()))
if in_pipeline:
try:
existing = pipeline_context.load_value("preflight", default=None)
except Exception:
existing = None
preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {}
url_dup_cache = preflight_cache.get("url_duplicates")
if not isinstance(url_dup_cache, dict):
url_dup_cache = {}
url_dup_cache["command"] = str(current_cmd_text or "")
url_dup_cache["continue"] = bool(answered_yes)
preflight_cache["url_duplicates"] = url_dup_cache
try:
pipeline_context.store_value("preflight", preflight_cache)
except Exception:
pass
if not answered_yes:
if in_pipeline and used_suspend:
try:
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
except Exception:
pass
return False
return True
def _preflight_url_duplicates_bulk(
self,
*,
storage: Any,
hydrus_available: bool,
final_output_dir: Path,
urls: Sequence[str],
) -> bool:
if storage is None:
debug("Bulk URL preflight skipped: storage unavailable")
return True
try:
current_cmd_text = pipeline_context.get_current_command_text("")
except Exception:
current_cmd_text = ""
try:
stage_ctx = pipeline_context.get_stage_context()
except Exception:
stage_ctx = None
in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or "")))
if in_pipeline:
try:
cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="")
cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None)
except Exception:
cached_cmd = ""
cached_decision = None
if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""):
if bool(cached_decision):
return True
try:
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
except Exception:
pass
return False
unique_urls: List[str] = []
for u in urls or []:
s = str(u or "").strip()
if s and s not in unique_urls:
unique_urls.append(s)
if len(unique_urls) <= 1:
return True
try:
from SYS.metadata import normalize_urls
except Exception:
normalize_urls = None # type: ignore[assignment]
def _httpish(value: str) -> bool:
try:
return bool(value) and (value.startswith("http://") or value.startswith("https://"))
except Exception:
return False
url_needles: Dict[str, List[str]] = {}
for u in unique_urls:
needles: List[str] = []
if normalize_urls is not None:
try:
needles.extend([n for n in (normalize_urls(u) or []) if isinstance(n, str)])
except Exception:
needles = []
if not needles:
needles = [u]
filtered: List[str] = []
for n in needles:
n2 = str(n or "").strip()
if not n2:
continue
if not _httpish(n2):
continue
if n2 not in filtered:
filtered.append(n2)
url_needles[u] = filtered if filtered else [u]
backend_names: List[str] = []
try:
backend_names_all = storage.list_searchable_backends()
except Exception:
backend_names_all = []
for backend_name in backend_names_all:
try:
backend = storage[backend_name]
except Exception:
continue
try:
if str(backend_name).strip().lower() == "temp":
continue
except Exception:
pass
try:
backend_location = getattr(backend, "_location", None)
if backend_location and final_output_dir:
backend_path = Path(str(backend_location)).expanduser().resolve()
temp_path = Path(str(final_output_dir)).expanduser().resolve()
if backend_path == temp_path:
continue
except Exception:
pass
backend_names.append(backend_name)
if not backend_names:
debug("Bulk URL preflight skipped: no searchable backends")
return True
seen_pairs: set[tuple[str, str]] = set()
matched_urls: set[str] = set()
match_rows: List[Dict[str, Any]] = []
max_rows = 200
try:
from Store.HydrusNetwork import HydrusNetwork
except Exception:
HydrusNetwork = None # type: ignore
for backend_name in backend_names:
if len(match_rows) >= max_rows:
break
try:
backend = storage[backend_name]
except Exception:
continue
if HydrusNetwork is not None and isinstance(backend, HydrusNetwork):
if not hydrus_available:
continue
client = getattr(backend, "_client", None)
if client is None:
continue
for original_url, needles in url_needles.items():
if len(match_rows) >= max_rows:
break
if (original_url, str(backend_name)) in seen_pairs:
continue
found_hash: Optional[str] = None
found = False
for needle in (needles or [])[:3]:
if not _httpish(needle):
continue
try:
from API.HydrusNetwork import HydrusRequestSpec
spec = HydrusRequestSpec(
method="GET",
endpoint="/add_urls/get_url_files",
query={"url": needle},
)
response = client._perform_request(spec) # type: ignore[attr-defined]
raw_hashes = None
if isinstance(response, dict):
raw_hashes = response.get("hashes") or response.get("file_hashes")
raw_ids = response.get("file_ids")
has_ids = isinstance(raw_ids, list) and len(raw_ids) > 0
has_hashes = isinstance(raw_hashes, list) and len(raw_hashes) > 0
if has_hashes:
try:
found_hash = str(raw_hashes[0]).strip() # type: ignore[index]
except Exception:
found_hash = None
if has_ids or has_hashes:
found = True
break
except Exception:
continue
if not found:
continue
seen_pairs.add((original_url, str(backend_name)))
matched_urls.add(original_url)
display_row = {
"title": "(exists)",
"store": str(backend_name),
"hash": found_hash or "",
"url": original_url,
"columns": [
("Title", "(exists)"),
("Store", str(backend_name)),
("Hash", found_hash or ""),
("URL", original_url),
],
}
match_rows.append(display_row)
continue
for original_url, needles in url_needles.items():
if len(match_rows) >= max_rows:
break
if (original_url, str(backend_name)) in seen_pairs:
continue
backend_hits: List[Dict[str, Any]] = []
for needle in (needles or [])[:3]:
try:
backend_hits = backend.search(f"url:{needle}", limit=1) or []
if backend_hits:
break
except Exception:
continue
if not backend_hits:
continue
seen_pairs.add((original_url, str(backend_name)))
matched_urls.add(original_url)
hit = backend_hits[0]
title = hit.get("title") or hit.get("name") or hit.get("target") or hit.get("path") or "(exists)"
file_hash = hit.get("hash") or hit.get("file_hash") or hit.get("sha256") or ""
try:
from SYS.result_table import build_display_row
except Exception:
build_display_row = None # type: ignore
extracted = {
"title": str(title),
"store": str(hit.get("store") or backend_name),
"hash": str(file_hash or ""),
"ext": "",
"size": None,
}
if callable(build_display_row):
try:
extracted = build_display_row(hit, keys=["title", "store", "hash", "ext", "size"])
except Exception:
pass
extracted["title"] = str(title)
extracted["store"] = str(hit.get("store") or backend_name)
extracted["hash"] = str(file_hash or "")
ext = extracted.get("ext")
size_val = extracted.get("size")
display_row = {
"title": str(title),
"store": str(hit.get("store") or backend_name),
"hash": str(file_hash or ""),
"ext": str(ext or ""),
"size": size_val,
"url": original_url,
"columns": [
("Title", str(title)),
("Store", str(hit.get("store") or backend_name)),
("Hash", str(file_hash or "")),
("Ext", str(ext or "")),
("Size", size_val),
("URL", original_url),
],
}
match_rows.append(display_row)
if not match_rows:
debug("Bulk URL preflight: no matches")
return True
table = ResultTable(f"URL already exists ({len(matched_urls)} url(s))", max_columns=10)
table.set_no_choice(True)
try:
table.set_preserve_order(True)
except Exception:
pass
for row in match_rows:
table.add_result(row)
try:
pipeline_context.set_last_result_table_overlay(table, match_rows)
except Exception:
pass
suspend = getattr(pipeline_context, "suspend_live_progress", None)
cm: AbstractContextManager[Any] = nullcontext()
if callable(suspend):
try:
maybe_cm = suspend()
if maybe_cm is not None:
cm = maybe_cm # type: ignore[assignment]
except Exception:
cm = nullcontext()
with cm:
get_stderr_console().print(table)
setattr(table, "_rendered_by_cmdlet", True)
answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()))
if in_pipeline:
try:
existing = pipeline_context.load_value("preflight", default=None)
except Exception:
existing = None
preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {}
url_dup_cache = preflight_cache.get("url_duplicates")
if not isinstance(url_dup_cache, dict):
url_dup_cache = {}
url_dup_cache["command"] = str(current_cmd_text or "")
url_dup_cache["continue"] = bool(answered_yes)
preflight_cache["url_duplicates"] = url_dup_cache
try:
pipeline_context.store_value("preflight", preflight_cache)
except Exception:
pass
if not answered_yes:
if in_pipeline:
try:
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
except Exception:
pass
return False
return True
def _maybe_show_playlist_table(self, *, url: str, ytdlp_tool: YtDlpTool) -> bool:
try:
cf = self._cookiefile_str(ytdlp_tool)
pr = probe_url(url, no_playlist=False, timeout_seconds=15, cookiefile=cf)
except Exception:
pr = None
if not isinstance(pr, dict):
return False
entries = pr.get("entries")
if not isinstance(entries, list) or len(entries) <= 1:
return False
extractor_name = ""
try:
extractor_name = str(pr.get("extractor") or pr.get("extractor_key") or "").strip().lower()
except Exception:
extractor_name = ""
table_type: Optional[str] = None
if "bandcamp" in extractor_name:
table_type = "bandcamp"
elif "youtube" in extractor_name:
table_type = "youtube"
max_rows = 200
display_entries = entries[:max_rows]
def _entry_to_url(entry: Any) -> Optional[str]:
if not isinstance(entry, dict):
return None
for key in ("webpage_url", "original_url", "url"):
v = entry.get(key)
if isinstance(v, str) and v.strip():
s_val = v.strip()
try:
if urlparse(s_val).scheme in {"http", "https"}:
return s_val
except Exception:
return s_val
entry_id = entry.get("id")
if isinstance(entry_id, str) and entry_id.strip():
extractor_name_inner = str(pr.get("extractor") or pr.get("extractor_key") or "").lower()
if "youtube" in extractor_name_inner:
return f"https://www.youtube.com/watch?v={entry_id.strip()}"
return None
table = ResultTable()
safe_url = str(url or "").strip()
table.title = f'download-file -url "{safe_url}"' if safe_url else "download-file"
if table_type:
try:
table.set_table(table_type)
except Exception:
table.table = table_type
table.set_source_command("download-file", [])
try:
table.set_preserve_order(True)
except Exception:
pass
results_list: List[Dict[str, Any]] = []
for idx, entry in enumerate(display_entries, 1):
title = None
uploader = None
duration = None
entry_url = _entry_to_url(entry)
try:
if isinstance(entry, dict):
title = entry.get("title")
uploader = entry.get("uploader") or pr.get("uploader")
duration = entry.get("duration")
except Exception:
pass
row: Dict[str, Any] = {
"table": "download-file",
"title": str(title or f"Item {idx}"),
"detail": str(uploader or ""),
"media_kind": "playlist-item",
"playlist_index": idx,
"_selection_args": (["-url", str(entry_url)] if entry_url else ["-url", str(url), "-item", str(idx)]),
"url": entry_url,
"target": entry_url,
"columns": [
("#", str(idx)),
("Title", str(title or "")),
("Duration", str(duration or "")),
("Uploader", str(uploader or "")),
],
}
results_list.append(row)
table.add_result(row)
pipeline_context.set_current_stage_table(table)
pipeline_context.set_last_result_table(table, results_list)
try:
suspend = getattr(pipeline_context, "suspend_live_progress", None)
cm: AbstractContextManager[Any] = nullcontext()
if callable(suspend):
maybe_cm = suspend()
if maybe_cm is not None:
cm = maybe_cm # type: ignore[assignment]
with cm:
get_stderr_console().print(table)
except Exception:
pass
setattr(table, "_rendered_by_cmdlet", True)
return True
def _maybe_show_format_table_for_single_url(
self,
*,
mode: str,
clip_spec: Any,
clip_values: Sequence[str],
playlist_items: Optional[str],
ytdl_format: Any,
supported_url: Sequence[str],
playlist_selection_handled: bool,
ytdlp_tool: YtDlpTool,
formats_cache: Dict[str, Optional[List[Dict[str, Any]]]],
storage: Any,
hydrus_available: bool,
final_output_dir: Path,
args: Sequence[str],
) -> Optional[int]:
if (
mode != "audio"
and not clip_spec
and not clip_values
and not playlist_items
and not ytdl_format
and len(supported_url) == 1
and not playlist_selection_handled
):
url = supported_url[0]
canonical_url = self._canonicalize_url_for_storage(
requested_url=url,
ytdlp_tool=ytdlp_tool,
playlist_items=playlist_items,
)
if not self._preflight_url_duplicate(
storage=storage,
hydrus_available=hydrus_available,
final_output_dir=final_output_dir,
candidate_url=canonical_url,
extra_urls=[url],
):
log(f"Skipping download: {url}", file=sys.stderr)
return 0
formats = self._list_formats_cached(
url,
playlist_items_value=None,
formats_cache=formats_cache,
ytdlp_tool=ytdlp_tool,
)
if formats and len(formats) > 1:
candidate_formats = [f for f in formats if self._is_browseable_format(f)]
filtered_formats = candidate_formats if candidate_formats else list(formats)
debug(f"Formatlist: showing {len(filtered_formats)} formats (raw={len(formats)})")
base_cmd = f'download-file "{url}"'
remaining_args = [arg for arg in args if arg not in [url] and not arg.startswith("-")]
if remaining_args:
base_cmd += " " + " ".join(remaining_args)
table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True)
table.set_table("ytdlp.formatlist")
table.set_source_command("download-file", [url])
results_list: List[Dict[str, Any]] = []
for idx, fmt in enumerate(filtered_formats, 1):
resolution = fmt.get("resolution", "")
ext = fmt.get("ext", "")
vcodec = fmt.get("vcodec", "none")
acodec = fmt.get("acodec", "none")
filesize = fmt.get("filesize")
filesize_approx = fmt.get("filesize_approx")
format_id = fmt.get("format_id", "")
selection_format_id = format_id
try:
if vcodec != "none" and acodec == "none" and format_id:
selection_format_id = f"{format_id}+ba"
except Exception:
selection_format_id = format_id
size_str = ""
size_prefix = ""
size_bytes = filesize
if not size_bytes:
size_bytes = filesize_approx
if size_bytes:
size_prefix = "~"
try:
if isinstance(size_bytes, (int, float)) and size_bytes > 0:
size_mb = float(size_bytes) / (1024 * 1024)
size_str = f"{size_prefix}{size_mb:.1f}MB"
except Exception:
size_str = ""
desc_parts: List[str] = []
if resolution and resolution != "audio only":
desc_parts.append(resolution)
if ext:
desc_parts.append(str(ext).upper())
if vcodec != "none":
desc_parts.append(f"v:{vcodec}")
if acodec != "none":
desc_parts.append(f"a:{acodec}")
if size_str:
desc_parts.append(size_str)
format_desc = " | ".join(desc_parts)
format_dict = {
"table": "download-file",
"title": f"Format {format_id}",
"url": url,
"target": url,
"detail": format_desc,
"annotations": [ext, resolution] if resolution else [ext],
"media_kind": "format",
"cmd": base_cmd,
"columns": [
("ID", format_id),
("Resolution", resolution or "N/A"),
("Ext", ext),
("Size", size_str or ""),
("Video", vcodec),
("Audio", acodec),
],
"full_metadata": {
"format_id": format_id,
"url": url,
"item_selector": selection_format_id,
},
"_selection_args": None,
}
selection_args: List[str] = ["-format", selection_format_id]
try:
if (not clip_spec) and clip_values:
selection_args.extend(["-query", f"clip:{','.join([v for v in clip_values if v])}"])
except Exception:
pass
format_dict["_selection_args"] = selection_args
results_list.append(format_dict)
table.add_result(format_dict)
try:
suspend = getattr(pipeline_context, "suspend_live_progress", None)
cm: AbstractContextManager[Any] = nullcontext()
if callable(suspend):
maybe_cm = suspend()
if maybe_cm is not None:
cm = maybe_cm # type: ignore[assignment]
with cm:
get_stderr_console().print(table)
except Exception:
pass
setattr(table, "_rendered_by_cmdlet", True)
pipeline_context.set_current_stage_table(table)
pipeline_context.set_last_result_table(table, results_list)
log(f"", file=sys.stderr)
return 0
return None
def _download_supported_urls(
self,
*,
supported_url: Sequence[str],
ytdlp_tool: YtDlpTool,
args: Sequence[str],
config: Dict[str, Any],
final_output_dir: Path,
mode: str,
clip_spec: Any,
clip_ranges: Optional[List[tuple[int, int]]],
query_hash_override: Optional[str],
embed_chapters: bool,
write_sub: bool,
quiet_mode: bool,
playlist_items: Optional[str],
ytdl_format: Any,
skip_per_url_preflight: bool,
forced_single_format_id: Optional[str],
forced_single_format_for_batch: bool,
formats_cache: Dict[str, Optional[List[Dict[str, Any]]]],
storage: Any,
hydrus_available: bool,
) -> int:
downloaded_count = 0
downloaded_pipe_objects: List[Dict[str, Any]] = []
pipe_seq = 0
clip_sections_spec = self._build_clip_sections_spec(clip_ranges)
if clip_sections_spec:
try:
debug(f"Clip sections spec: {clip_sections_spec}")
except Exception:
pass
for url in supported_url:
try:
debug(f"Processing: {url}")
canonical_url = self._canonicalize_url_for_storage(
requested_url=url,
ytdlp_tool=ytdlp_tool,
playlist_items=playlist_items,
)
if not skip_per_url_preflight:
if not self._preflight_url_duplicate(
storage=storage,
hydrus_available=hydrus_available,
final_output_dir=final_output_dir,
candidate_url=canonical_url,
extra_urls=[url],
):
log(f"Skipping download: {url}", file=sys.stderr)
continue
PipelineProgress(pipeline_context).begin_steps(2)
actual_format = ytdl_format
actual_playlist_items = playlist_items
if playlist_items and not ytdl_format:
import re
if re.search(r"[^0-9,-]", playlist_items):
actual_format = playlist_items
actual_playlist_items = None
if mode == "audio" and not actual_format:
actual_format = "bestaudio"
if mode == "video" and not actual_format:
configured = (ytdlp_tool.default_format("video") or "").strip()
if configured and configured != "bestvideo+bestaudio/best":
actual_format = configured
forced_single_applied = False
if (
forced_single_format_for_batch
and forced_single_format_id
and not ytdl_format
and not actual_playlist_items
):
actual_format = forced_single_format_id
forced_single_applied = True
if (
actual_format
and isinstance(actual_format, str)
and mode != "audio"
and "+" not in actual_format
and "/" not in actual_format
and "[" not in actual_format
and actual_format not in {"best", "bv", "ba", "b"}
and not forced_single_applied
):
try:
formats = self._list_formats_cached(
url,
playlist_items_value=actual_playlist_items,
formats_cache=formats_cache,
ytdlp_tool=ytdlp_tool,
)
if formats:
fmt_match = next((f for f in formats if str(f.get("format_id", "")) == actual_format), None)
if fmt_match:
vcodec = str(fmt_match.get("vcodec", "none"))
acodec = str(fmt_match.get("acodec", "none"))
if vcodec != "none" and acodec == "none":
debug(f"Selected video-only format {actual_format}; using {actual_format}+ba for audio")
actual_format = f"{actual_format}+ba"
except Exception:
pass
attempted_single_format_fallback = False
while True:
try:
opts = DownloadOptions(
url=url,
mode=mode,
output_dir=final_output_dir,
ytdl_format=actual_format,
cookies_path=ytdlp_tool.resolve_cookiefile(),
clip_sections=clip_sections_spec,
playlist_items=actual_playlist_items,
quiet=quiet_mode,
no_playlist=False,
embed_chapters=embed_chapters,
write_sub=write_sub,
)
PipelineProgress(pipeline_context).step("downloading")
debug(f"Starting download with 5-minute timeout...")
result_obj = _download_with_timeout(opts, timeout_seconds=300)
debug(f"Download completed, building pipe object...")
break
except DownloadError as e:
cause = getattr(e, "__cause__", None)
detail = ""
try:
detail = str(cause or "")
except Exception:
detail = ""
if ("requested format is not available" in (detail or "").lower()) and mode != "audio":
if (
forced_single_format_for_batch
and forced_single_format_id
and not ytdl_format
and not actual_playlist_items
and not attempted_single_format_fallback
):
attempted_single_format_fallback = True
actual_format = forced_single_format_id
debug(f"Only one format available (playlist preflight); retrying with: {actual_format}")
continue
formats = self._list_formats_cached(
url,
playlist_items_value=actual_playlist_items,
formats_cache=formats_cache,
ytdlp_tool=ytdlp_tool,
)
if (
(not attempted_single_format_fallback)
and isinstance(formats, list)
and len(formats) == 1
and isinstance(formats[0], dict)
):
only = formats[0]
fallback_format = str(only.get("format_id") or "").strip()
selection_format_id = fallback_format
try:
vcodec = str(only.get("vcodec", "none"))
acodec = str(only.get("acodec", "none"))
if vcodec != "none" and acodec == "none" and fallback_format:
selection_format_id = f"{fallback_format}+ba"
except Exception:
selection_format_id = fallback_format
if selection_format_id:
attempted_single_format_fallback = True
actual_format = selection_format_id
debug(f"Only one format available; retrying with: {actual_format}")
continue
if formats:
formats_to_show = formats
table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True)
table.set_table("ytdlp.formatlist")
table.set_source_command("download-file", [url])
results_list: List[Dict[str, Any]] = []
for idx, fmt in enumerate(formats_to_show, 1):
resolution = fmt.get("resolution", "")
ext = fmt.get("ext", "")
vcodec = fmt.get("vcodec", "none")
acodec = fmt.get("acodec", "none")
filesize = fmt.get("filesize")
filesize_approx = fmt.get("filesize_approx")
format_id = fmt.get("format_id", "")
selection_format_id = format_id
try:
if vcodec != "none" and acodec == "none" and format_id:
selection_format_id = f"{format_id}+ba"
except Exception:
selection_format_id = format_id
size_str = ""
size_prefix = ""
size_bytes = filesize
if not size_bytes:
size_bytes = filesize_approx
if size_bytes:
size_prefix = "~"
try:
if isinstance(size_bytes, (int, float)) and size_bytes > 0:
size_mb = float(size_bytes) / (1024 * 1024)
size_str = f"{size_prefix}{size_mb:.1f}MB"
except Exception:
size_str = ""
desc_parts: List[str] = []
if resolution and resolution != "audio only":
desc_parts.append(str(resolution))
if ext:
desc_parts.append(str(ext).upper())
if vcodec != "none":
desc_parts.append(f"v:{vcodec}")
if acodec != "none":
desc_parts.append(f"a:{acodec}")
if size_str:
desc_parts.append(size_str)
format_desc = " | ".join(desc_parts)
format_dict: Dict[str, Any] = {
"table": "download-file",
"title": f"Format {format_id}",
"url": url,
"target": url,
"detail": format_desc,
"media_kind": "format",
"columns": [
("ID", format_id),
("Resolution", resolution or "N/A"),
("Ext", ext),
("Size", size_str or ""),
("Video", vcodec),
("Audio", acodec),
],
"full_metadata": {
"format_id": format_id,
"url": url,
"item_selector": selection_format_id,
},
"_selection_args": ["-format", selection_format_id],
}
results_list.append(format_dict)
table.add_result(format_dict)
pipeline_context.set_current_stage_table(table)
pipeline_context.set_last_result_table(table, results_list)
try:
suspend = getattr(pipeline_context, "suspend_live_progress", None)
cm: AbstractContextManager[Any] = nullcontext()
if callable(suspend):
maybe_cm = suspend()
if maybe_cm is not None:
cm = maybe_cm # type: ignore[assignment]
with cm:
get_stderr_console().print(table)
except Exception:
pass
PipelineProgress(pipeline_context).step("awaiting selection")
log("Requested format is not available; select a working format with @N", file=sys.stderr)
return 0
raise
results_to_emit: List[Any] = []
if isinstance(result_obj, list):
results_to_emit = list(result_obj)
else:
paths = getattr(result_obj, "paths", None)
if isinstance(paths, list) and paths:
for p in paths:
try:
p_path = Path(p)
except Exception:
continue
try:
if p_path.suffix.lower() in _SUBTITLE_EXTS:
continue
except Exception:
pass
if not p_path.exists() or p_path.is_dir():
continue
try:
hv = sha256_file(p_path)
except Exception:
hv = None
results_to_emit.append(
DownloadMediaResult(
path=p_path,
info=getattr(result_obj, "info", {}) or {},
tag=list(getattr(result_obj, "tag", []) or []),
source_url=getattr(result_obj, "source_url", None) or opts.url,
hash_value=hv,
)
)
else:
results_to_emit = [result_obj]
pipe_objects: List[Dict[str, Any]] = []
for downloaded in results_to_emit:
po = self._build_pipe_object(downloaded, url, opts)
pipe_seq += 1
try:
po.setdefault("pipe_index", pipe_seq)
except Exception:
pass
try:
info = downloaded.info if isinstance(getattr(downloaded, "info", None), dict) else {}
except Exception:
info = {}
chapters_text = _format_chapters_note(info) if embed_chapters else None
if chapters_text:
notes = po.get("notes")
if not isinstance(notes, dict):
notes = {}
notes.setdefault("chapters", chapters_text)
po["notes"] = notes
if write_sub:
try:
media_path = Path(str(po.get("path") or ""))
except Exception:
media_path = None
if media_path is not None and media_path.exists() and media_path.is_file():
sub_path = _best_subtitle_sidecar(media_path)
if sub_path is not None:
sub_text = _read_text_file(sub_path)
if sub_text:
notes = po.get("notes")
if not isinstance(notes, dict):
notes = {}
notes["sub"] = sub_text
po["notes"] = notes
try:
sub_path.unlink()
except Exception:
pass
pipe_objects.append(po)
try:
if clip_ranges and len(pipe_objects) == len(clip_ranges):
source_hash = query_hash_override or self._find_existing_hash_for_url(
storage,
canonical_url,
hydrus_available=hydrus_available,
)
self._apply_clip_decorations(pipe_objects, clip_ranges, source_king_hash=source_hash)
except Exception:
pass
debug(f"Emitting {len(pipe_objects)} result(s) to pipeline...")
PipelineProgress(pipeline_context).step("finalized")
stage_ctx = pipeline_context.get_stage_context()
emit_enabled = bool(stage_ctx is not None)
for pipe_obj_dict in pipe_objects:
if emit_enabled:
pipeline_context.emit(pipe_obj_dict)
if pipe_obj_dict.get("url"):
pipe_obj = coerce_to_pipe_object(pipe_obj_dict)
register_url_with_local_library(pipe_obj, config)
try:
downloaded_pipe_objects.append(pipe_obj_dict)
except Exception:
pass
downloaded_count += len(pipe_objects)
debug("✓ Downloaded and emitted")
except DownloadError as e:
log(f"Download failed for {url}: {e}", file=sys.stderr)
except Exception as e:
log(f"Error processing {url}: {e}", file=sys.stderr)
if downloaded_count > 0:
debug(f"✓ Successfully processed {downloaded_count} URL(s)")
return 0
log("No downloads completed", file=sys.stderr)
return 1
def _run_streaming_urls(
self,
*,
streaming_urls: List[str],
args: Sequence[str],
config: Dict[str, Any],
parsed: Dict[str, Any],
) -> int:
try:
debug("Starting streaming download handler")
ytdlp_tool = YtDlpTool(config)
raw_url = list(streaming_urls)
supported_url, unsupported_list = self._filter_supported_urls(raw_url)
if not supported_url:
log("No yt-dlp-supported url to download", file=sys.stderr)
return 1
if unsupported_list:
debug(f"Skipping {len(unsupported_list)} unsupported url (use direct HTTP mode)")
final_output_dir = self._resolve_streaming_output_dir(parsed, config)
if not final_output_dir:
return 1
debug(f"Output directory: {final_output_dir}")
clip_spec = parsed.get("clip")
query_spec = parsed.get("query")
query_keyed = self._parse_query_keyed_spec(str(query_spec) if query_spec is not None else None)
query_hash_override = self._extract_hash_override(str(query_spec) if query_spec is not None else None, query_keyed)
embed_chapters = True
write_sub = True
query_format: Optional[str] = None
try:
fmt_values = query_keyed.get("format", []) if isinstance(query_keyed, dict) else []
fmt_candidate = fmt_values[-1] if fmt_values else None
if fmt_candidate is not None:
query_format = str(fmt_candidate).strip()
except Exception:
query_format = None
query_audio: Optional[bool] = None
try:
audio_values = query_keyed.get("audio", []) if isinstance(query_keyed, dict) else []
audio_candidate = audio_values[-1] if audio_values else None
if audio_candidate is not None:
s_val = str(audio_candidate).strip().lower()
if s_val in {"1", "true", "t", "yes", "y", "on"}:
query_audio = True
elif s_val in {"0", "false", "f", "no", "n", "off"}:
query_audio = False
elif s_val:
query_audio = True
except Exception:
query_audio = None
query_wants_audio = False
if query_format:
try:
query_wants_audio = str(query_format).strip().lower() == "audio"
except Exception:
query_wants_audio = False
audio_flag = bool(parsed.get("audio") is True)
wants_audio = audio_flag
if query_audio is not None:
wants_audio = wants_audio or bool(query_audio)
else:
wants_audio = wants_audio or bool(query_wants_audio)
mode = "audio" if wants_audio else "video"
clip_ranges, clip_invalid, clip_values = self._parse_clip_ranges_and_apply_items(
clip_spec=str(clip_spec) if clip_spec is not None else None,
query_keyed=query_keyed,
parsed=parsed,
query_spec=str(query_spec) if query_spec is not None else None,
)
if clip_invalid:
return 1
if clip_ranges:
try:
debug(f"Clip ranges: {clip_ranges}")
except Exception:
pass
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
storage, hydrus_available = self._init_storage(config if isinstance(config, dict) else {})
formats_cache: Dict[str, Optional[List[Dict[str, Any]]]] = {}
playlist_items = str(parsed.get("item")) if parsed.get("item") else None
ytdl_format = parsed.get("format")
if not ytdl_format and query_format and not query_wants_audio:
try:
height_selector = self._format_selector_for_query_height(query_format)
except ValueError as e:
log(f"Error parsing format selection: {e}", file=sys.stderr)
return 1
if height_selector:
ytdl_format = height_selector
else:
import re
if not re.match(r"^\s*#?\d+\s*$", str(query_format)):
ytdl_format = query_format
playlist_selection_handled = False
if len(supported_url) == 1 and not playlist_items and not ytdl_format:
candidate_url = supported_url[0]
if query_format and not query_wants_audio:
try:
idx_fmt = self._format_id_for_query_index(query_format, candidate_url, formats_cache, ytdlp_tool)
except ValueError as e:
log(f"Error parsing format selection: {e}", file=sys.stderr)
return 1
if idx_fmt:
debug(f"Resolved numeric format selection '{query_format}' -> {idx_fmt}")
ytdl_format = idx_fmt
if not ytdl_format:
if self._maybe_show_playlist_table(url=candidate_url, ytdlp_tool=ytdlp_tool):
playlist_selection_handled = True
try:
last_table = pipeline_context.get_last_result_table() if hasattr(pipeline_context, "get_last_result_table") else None
if hasattr(last_table, "rows") and getattr(last_table, "rows", None):
sample_index = 1
sample_fmt_id = None
try:
sample_row = last_table.rows[0]
sample_fmt_id = sample_row._full_metadata.get("item_selector") if getattr(sample_row, "_full_metadata", None) else None
except Exception:
sample_fmt_id = None
try:
sample_pipeline = f'download-file "{candidate_url}"'
hint = (
"To select non-interactively, re-run with an explicit format: "
"e.g. mm \"{pipeline} -format {fmt} | add-file -store <store>\" or "
"mm \"{pipeline} -query 'format:{index}' | add-file -store <store>\""
).format(
pipeline=sample_pipeline,
fmt=sample_fmt_id or "<format_id>",
index=sample_index,
)
log(hint, file=sys.stderr)
except Exception:
pass
except Exception:
pass
return 0
skip_per_url_preflight = False
if len(supported_url) > 1:
if not self._preflight_url_duplicates_bulk(
storage=storage,
hydrus_available=hydrus_available,
final_output_dir=final_output_dir,
urls=list(supported_url),
):
return 0
skip_per_url_preflight = True
forced_single_format_id: Optional[str] = None
forced_single_format_for_batch = False
if len(supported_url) > 1 and not playlist_items and not ytdl_format:
try:
sample_url = str(supported_url[0])
fmts = self._list_formats_cached(
sample_url,
playlist_items_value=None,
formats_cache=formats_cache,
ytdlp_tool=ytdlp_tool,
)
if isinstance(fmts, list) and len(fmts) == 1 and isinstance(fmts[0], dict):
only_id = str(fmts[0].get("format_id") or "").strip()
if only_id:
forced_single_format_id = only_id
forced_single_format_for_batch = True
debug(
f"Playlist format preflight: only one format available; using {forced_single_format_id} for all items"
)
except Exception:
forced_single_format_id = None
forced_single_format_for_batch = False
early_ret = self._maybe_show_format_table_for_single_url(
mode=mode,
clip_spec=clip_spec,
clip_values=clip_values,
playlist_items=playlist_items,
ytdl_format=ytdl_format,
supported_url=supported_url,
playlist_selection_handled=playlist_selection_handled,
ytdlp_tool=ytdlp_tool,
formats_cache=formats_cache,
storage=storage,
hydrus_available=hydrus_available,
final_output_dir=final_output_dir,
args=args,
)
if early_ret is not None:
return int(early_ret)
return self._download_supported_urls(
supported_url=supported_url,
ytdlp_tool=ytdlp_tool,
args=args,
config=config,
final_output_dir=final_output_dir,
mode=mode,
clip_spec=clip_spec,
clip_ranges=clip_ranges,
query_hash_override=query_hash_override,
embed_chapters=embed_chapters,
write_sub=write_sub,
quiet_mode=quiet_mode,
playlist_items=playlist_items,
ytdl_format=ytdl_format,
skip_per_url_preflight=skip_per_url_preflight,
forced_single_format_id=forced_single_format_id,
forced_single_format_for_batch=forced_single_format_for_batch,
formats_cache=formats_cache,
storage=storage,
hydrus_available=hydrus_available,
)
except Exception as e:
log(f"Error in streaming download handler: {e}", file=sys.stderr)
return 1
def _resolve_streaming_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]:
path_override = parsed.get("path")
if path_override:
try:
candidate = Path(str(path_override)).expanduser()
if candidate.suffix:
candidate = candidate.parent
candidate.mkdir(parents=True, exist_ok=True)
debug(f"Using output directory override: {candidate}")
return candidate
except Exception as e:
log(f"Invalid -path output directory: {e}", file=sys.stderr)
return None
try:
temp_value = (config or {}).get("temp") if isinstance(config, dict) else None
except Exception:
temp_value = None
if temp_value:
try:
candidate = Path(str(temp_value)).expanduser()
candidate.mkdir(parents=True, exist_ok=True)
debug(f"Using config temp directory: {candidate}")
return candidate
except Exception as e:
log(f"Cannot use configured temp directory '{temp_value}': {e}", file=sys.stderr)
return None
try:
import tempfile
candidate = Path(tempfile.gettempdir()) / "Medios-Macina"
candidate.mkdir(parents=True, exist_ok=True)
debug(f"Using OS temp directory: {candidate}")
return candidate
except Exception as e:
log(f"Cannot create OS temp directory: {e}", file=sys.stderr)
return None
def _parse_time_ranges(self, spec: str) -> List[tuple[int, int]]:
def _to_seconds(ts: str) -> Optional[int]:
ts = str(ts).strip()
if not ts:
return None
try:
unit_match = re.fullmatch(r"(?i)\s*(?:(?P<h>\d+)h)?\s*(?:(?P<m>\d+)m)?\s*(?:(?P<s>\d+(?:\.\d+)?)s)?\s*", ts)
except Exception:
unit_match = None
if unit_match and unit_match.group(0).strip() and any(unit_match.group(g) for g in ("h", "m", "s")):
try:
hours = int(unit_match.group("h") or 0)
minutes = int(unit_match.group("m") or 0)
seconds = float(unit_match.group("s") or 0)
total = (hours * 3600) + (minutes * 60) + seconds
return int(total)
except Exception:
return None
if ":" in ts:
parts = [p.strip() for p in ts.split(":")]
if len(parts) == 2:
hh_s = "0"
mm_s, ss_s = parts
elif len(parts) == 3:
hh_s, mm_s, ss_s = parts
else:
return None
try:
hours = int(hh_s)
minutes = int(mm_s)
seconds = float(ss_s)
total = (hours * 3600) + (minutes * 60) + seconds
return int(total)
except Exception:
return None
try:
return int(float(ts))
except Exception:
return None
ranges: List[tuple[int, int]] = []
if not spec:
return ranges
for piece in str(spec).split(","):
piece = piece.strip()
if not piece:
continue
if "-" not in piece:
return []
start_s, end_s = [p.strip() for p in piece.split("-", 1)]
start = _to_seconds(start_s)
end = _to_seconds(end_s)
if start is None or end is None or start >= end:
return []
ranges.append((start, end))
return ranges
@staticmethod
def _parse_keyed_csv_spec(spec: str, *, default_key: str) -> Dict[str, List[str]]:
out: Dict[str, List[str]] = {}
if not isinstance(spec, str):
spec = str(spec)
text = spec.strip()
if not text:
return out
active = (default_key or "").strip().lower() or "clip"
key_pattern = re.compile(r"^([A-Za-z_][A-Za-z0-9_-]*)\s*:\s*(.*)$")
for raw_piece in text.split(","):
piece = raw_piece.strip()
if not piece:
continue
m = key_pattern.match(piece)
if m:
active = (m.group(1) or "").strip().lower() or active
value = (m.group(2) or "").strip()
if value:
out.setdefault(active, []).append(value)
continue
out.setdefault(active, []).append(piece)
return out
def _build_clip_sections_spec(self, clip_ranges: Optional[List[tuple[int, int]]]) -> Optional[str]:
ranges: List[str] = []
if clip_ranges:
for start_s, end_s in clip_ranges:
ranges.append(f"{start_s}-{end_s}")
return ",".join(ranges) if ranges else None
def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]:
info: Dict[str, Any] = download_result.info if isinstance(download_result.info, dict) else {}
media_path = Path(download_result.path)
hash_value = download_result.hash_value or self._compute_file_hash(media_path)
title = info.get("title") or media_path.stem
tag = list(download_result.tag or [])
if title and f"title:{title}" not in tag:
tag.insert(0, f"title:{title}")
final_url = None
try:
page_url = info.get("webpage_url") or info.get("original_url") or info.get("url")
if page_url:
final_url = str(page_url)
except Exception:
final_url = None
if not final_url and url:
final_url = str(url)
return {
"path": str(media_path),
"hash": hash_value,
"title": title,
"url": final_url,
"tag": tag,
"action": "cmdlet:download-file",
"is_temp": True,
"ytdl_format": getattr(opts, "ytdl_format", None),
"store": getattr(opts, "storage_name", None) or getattr(opts, "storage_location", None) or "PATH",
"media_kind": "video" if opts.mode == "video" else "audio",
}
@staticmethod
def download_streaming_url_as_pipe_objects(
url: str,
config: Dict[str, Any],
*,
mode_hint: Optional[str] = None,
ytdl_format_hint: Optional[str] = None,
) -> List[Dict[str, Any]]:
"""Download a yt-dlp-supported URL and return PipeObject-style dict(s).
This is a lightweight helper intended for cmdlets that need to expand streaming URLs
into local files without re-implementing yt-dlp glue.
"""
url_str = str(url or "").strip()
if not url_str:
return []
if not is_url_supported_by_ytdlp(url_str):
return []
try:
from SYS.config import resolve_output_dir
out_dir = resolve_output_dir(config)
if out_dir is None:
return []
except Exception:
return []
cookies_path = None
try:
cookie_candidate = YtDlpTool(config).resolve_cookiefile()
if cookie_candidate is not None and cookie_candidate.is_file():
cookies_path = cookie_candidate
except Exception:
cookies_path = None
quiet_download = False
try:
quiet_download = bool((config or {}).get("_quiet_background_output"))
except Exception:
quiet_download = False
mode = str(mode_hint or "").strip().lower() if mode_hint else ""
if mode not in {"audio", "video"}:
mode = "video"
try:
cf = (
str(cookies_path)
if cookies_path is not None and cookies_path.is_file() else None
)
fmts_probe = list_formats(
url_str,
no_playlist=False,
playlist_items=None,
cookiefile=cf,
)
if isinstance(fmts_probe, list) and fmts_probe:
has_video = False
for f in fmts_probe:
if not isinstance(f, dict):
continue
vcodec = str(f.get("vcodec", "none") or "none").strip().lower()
if vcodec and vcodec != "none":
has_video = True
break
mode = "video" if has_video else "audio"
except Exception:
mode = "video"
fmt_hint = str(ytdl_format_hint).strip() if ytdl_format_hint else ""
chosen_format: Optional[str]
if fmt_hint:
chosen_format = fmt_hint
else:
chosen_format = None
if mode == "audio":
chosen_format = "bestaudio/best"
opts = DownloadOptions(
url=url_str,
mode=mode,
output_dir=Path(out_dir),
cookies_path=cookies_path,
ytdl_format=chosen_format,
quiet=quiet_download,
embed_chapters=True,
write_sub=True,
)
try:
result_obj = _download_with_timeout(opts, timeout_seconds=300)
except Exception as exc:
log(f"[download-file] Download failed for {url_str}: {exc}", file=sys.stderr)
return []
results: List[Any]
if isinstance(result_obj, list):
results = list(result_obj)
else:
paths = getattr(result_obj, "paths", None)
if isinstance(paths, list) and paths:
results = []
for p in paths:
try:
p_path = Path(p)
except Exception:
continue
if not p_path.exists() or p_path.is_dir():
continue
try:
hv = sha256_file(p_path)
except Exception:
hv = None
try:
results.append(
DownloadMediaResult(
path=p_path,
info=getattr(result_obj, "info", {}) or {},
tag=list(getattr(result_obj, "tag", []) or []),
source_url=getattr(result_obj, "source_url", None) or url_str,
hash_value=hv,
)
)
except Exception:
continue
else:
results = [result_obj]
out: List[Dict[str, Any]] = []
for downloaded in results:
try:
info = (
downloaded.info
if isinstance(getattr(downloaded, "info", None), dict) else {}
)
except Exception:
info = {}
try:
media_path = Path(str(getattr(downloaded, "path", "") or ""))
except Exception:
continue
if not media_path.exists() or media_path.is_dir():
continue
try:
hash_value = getattr(downloaded, "hash_value", None) or sha256_file(media_path)
except Exception:
hash_value = None
title = None
try:
title = info.get("title")
except Exception:
title = None
title = title or media_path.stem
tags = list(getattr(downloaded, "tag", []) or [])
if title and f"title:{title}" not in tags:
tags.insert(0, f"title:{title}")
final_url = None
try:
page_url = info.get("webpage_url") or info.get("original_url") or info.get("url")
if page_url:
final_url = str(page_url)
except Exception:
final_url = None
if not final_url:
final_url = url_str
po: Dict[str, Any] = {
"path": str(media_path),
"hash": hash_value,
"title": title,
"url": final_url,
"tag": tags,
"action": "cmdlet:download-file",
"is_temp": True,
"ytdl_format": getattr(opts, "ytdl_format", None),
"store": getattr(opts, "storage_name", None) or getattr(opts, "storage_location", None) or "PATH",
"media_kind": "video" if opts.mode == "video" else "audio",
}
try:
chapters_text = _format_chapters_note(info)
except Exception:
chapters_text = None
if chapters_text:
notes = po.get("notes")
if not isinstance(notes, dict):
notes = {}
notes.setdefault("chapters", chapters_text)
po["notes"] = notes
try:
sub_path = _best_subtitle_sidecar(media_path)
except Exception:
sub_path = None
if sub_path is not None:
sub_text = _read_text_file(sub_path)
if sub_text:
notes = po.get("notes")
if not isinstance(notes, dict):
notes = {}
notes["sub"] = sub_text
po["notes"] = notes
try:
sub_path.unlink()
except Exception:
pass
out.append(po)
return out
@staticmethod
def _normalise_hash_hex(value: Optional[str]) -> Optional[str]:
if not value or not isinstance(value, str):
return None
candidate = value.strip().lower()
if len(candidate) == 64 and all(c in "0123456789abcdef" for c in candidate):
return candidate
return None
@classmethod
def _extract_hash_from_search_hit(cls, hit: Any) -> Optional[str]:
if not isinstance(hit, dict):
return None
for key in ("hash", "hash_hex", "file_hash", "hydrus_hash"):
v = hit.get(key)
normalized = cls._normalise_hash_hex(str(v) if v is not None else None)
if normalized:
return normalized
return None
@classmethod
def _find_existing_hash_for_url(
cls, storage: Any, canonical_url: str, *, hydrus_available: bool
) -> Optional[str]:
if storage is None or not canonical_url:
return None
try:
from Store.HydrusNetwork import HydrusNetwork
except Exception:
HydrusNetwork = None # type: ignore
try:
backend_names = list(storage.list_searchable_backends() or [])
except Exception:
backend_names = []
for backend_name in backend_names:
try:
backend = storage[backend_name]
except Exception:
continue
try:
if str(backend_name).strip().lower() == "temp":
continue
except Exception:
pass
try:
if HydrusNetwork is not None and isinstance(backend, HydrusNetwork) and not hydrus_available:
continue
except Exception:
pass
try:
hits = backend.search(f"url:{canonical_url}", limit=5) or []
except Exception:
hits = []
for hit in hits:
extracted = cls._extract_hash_from_search_hit(hit)
if extracted:
return extracted
return None
@staticmethod
def _format_timecode(seconds: int, *, force_hours: bool) -> str:
total = max(0, int(seconds))
minutes, secs = divmod(total, 60)
hours, minutes = divmod(minutes, 60)
if force_hours:
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
return f"{minutes:02d}:{secs:02d}"
@classmethod
def _format_clip_range(cls, start_s: int, end_s: int) -> str:
force_hours = bool(start_s >= 3600 or end_s >= 3600)
return f"{cls._format_timecode(start_s, force_hours=force_hours)}-{cls._format_timecode(end_s, force_hours=force_hours)}"
@classmethod
def _apply_clip_decorations(
cls, pipe_objects: List[Dict[str, Any]], clip_ranges: List[tuple[int, int]], *, source_king_hash: Optional[str]
) -> None:
if not pipe_objects or len(pipe_objects) != len(clip_ranges):
return
for po, (start_s, end_s) in zip(pipe_objects, clip_ranges):
clip_range = cls._format_clip_range(start_s, end_s)
clip_tag = f"clip:{clip_range}"
po["title"] = clip_tag
tags = po.get("tag")
if not isinstance(tags, list):
tags = []
tags = [t for t in tags if not str(t).strip().lower().startswith("title:")]
tags = [t for t in tags if not str(t).strip().lower().startswith("relationship:")]
tags.insert(0, f"title:{clip_tag}")
if clip_tag not in tags:
tags.append(clip_tag)
po["tag"] = tags
if len(pipe_objects) < 2:
return
hashes: List[str] = []
for po in pipe_objects:
h_val = cls._normalise_hash_hex(str(po.get("hash") or ""))
hashes.append(h_val or "")
king_hash = cls._normalise_hash_hex(source_king_hash) if source_king_hash else None
if not king_hash:
king_hash = hashes[0] if hashes and hashes[0] else None
if not king_hash:
return
alt_hashes: List[str] = [h for h in hashes if h and h != king_hash]
if not alt_hashes:
return
for po in pipe_objects:
po["relationships"] = {"king": [king_hash], "alt": list(alt_hashes)}
def _run_impl(
self,
result: Any,
args: Sequence[str],
config: Dict[str,
Any]
) -> int:
"""Main download implementation for direct HTTP files."""
progress = PipelineProgress(pipeline_context)
prev_progress = None
had_progress_key = False
try:
debug("Starting download-file")
# Allow providers to tap into the active PipelineProgress (optional).
try:
if isinstance(config, dict):
had_progress_key = "_pipeline_progress" in config
prev_progress = config.get("_pipeline_progress")
config["_pipeline_progress"] = progress
except Exception:
pass
# Parse arguments
parsed = parse_cmdlet_args(args, self)
raw_url = self._normalize_urls(parsed)
piped_items = self._collect_piped_items_if_no_urls(result, raw_url)
had_piped_input = False
try:
if isinstance(result, list):
had_piped_input = bool(result)
else:
had_piped_input = bool(result)
except Exception:
had_piped_input = False
# UX: In piped mode, allow a single positional arg to be the destination directory.
# Example: @1-4 | download-file "C:\\Users\\Me\\Downloads\\yoyo"
if (had_piped_input and raw_url and len(raw_url) == 1
and (not parsed.get("path")) and (not parsed.get("output"))):
candidate = str(raw_url[0] or "").strip()
low = candidate.lower()
looks_like_url = low.startswith(("http://", "https://", "ftp://"))
looks_like_provider = low.startswith(
("magnet:",
"alldebrid:",
"hydrus:",
"ia:",
"internetarchive:")
)
looks_like_windows_path = (
(len(candidate) >= 2 and candidate[1] == ":")
or candidate.startswith("\\\\") or candidate.startswith("\\")
or candidate.endswith(("\\",
"/"))
)
if (not looks_like_url) and (
not looks_like_provider) and looks_like_windows_path:
parsed["path"] = candidate
raw_url = []
piped_items = self._collect_piped_items_if_no_urls(result, raw_url)
if not raw_url and not piped_items:
log("No url or piped items to download", file=sys.stderr)
return 1
streaming_candidates = self._append_urls_from_piped_result(list(raw_url), result)
supported_streaming, unsupported_streaming = self._filter_supported_urls(streaming_candidates)
streaming_exit_code: Optional[int] = None
streaming_downloaded = 0
if supported_streaming:
streaming_exit_code = self._run_streaming_urls(
streaming_urls=supported_streaming,
args=args,
config=config,
parsed=parsed,
)
if streaming_exit_code == 0:
streaming_downloaded += 1
raw_url = [u for u in raw_url if u not in supported_streaming]
if not raw_url and not unsupported_streaming:
piped_items = []
if not raw_url and not piped_items:
return int(streaming_exit_code or 0)
quiet_mode = (
bool(config.get("_quiet_background_output"))
if isinstance(config,
dict) else False
)
ia_picker_exit = ia_provider.maybe_show_formats_table(
raw_urls=raw_url,
piped_items=piped_items,
parsed=parsed,
config=config,
quiet_mode=quiet_mode,
get_field=get_field,
)
if ia_picker_exit is not None:
return int(ia_picker_exit)
# Get output directory
final_output_dir = self._resolve_output_dir(parsed, config)
if not final_output_dir:
return 1
debug(f"Output directory: {final_output_dir}")
# If the caller isn't running the shared pipeline Live progress UI (e.g. direct
# cmdlet execution), start a minimal local pipeline progress panel so downloads
# show consistent, Rich-formatted progress (like download-media).
total_items = self._safe_total_items(raw_url, piped_items)
preview = self._build_preview(raw_url, piped_items, total_items)
progress.ensure_local_ui(
label="download-file",
total_items=total_items,
items_preview=preview
)
registry = self._load_provider_registry()
downloaded_count = 0
urls_downloaded, early_exit = self._process_explicit_urls(
raw_urls=raw_url,
final_output_dir=final_output_dir,
config=config,
quiet_mode=quiet_mode,
registry=registry,
progress=progress,
)
downloaded_count += int(urls_downloaded)
if early_exit is not None:
return int(early_exit)
downloaded_count += self._process_provider_items(
piped_items=piped_items,
final_output_dir=final_output_dir,
config=config,
quiet_mode=quiet_mode,
registry=registry,
progress=progress,
)
if downloaded_count > 0 or streaming_downloaded > 0:
debug(f"✓ Successfully processed {downloaded_count} file(s)")
return 0
if streaming_exit_code is not None:
return int(streaming_exit_code)
log("No downloads completed", file=sys.stderr)
return 1
except Exception as e:
log(f"Error in download-file: {e}", file=sys.stderr)
return 1
finally:
try:
if isinstance(config, dict):
if had_progress_key:
config["_pipeline_progress"] = prev_progress
else:
config.pop("_pipeline_progress", None)
except Exception:
pass
progress.close_local_ui(force_complete=True)
def _resolve_output_dir(self,
parsed: Dict[str,
Any],
config: Dict[str,
Any]) -> Optional[Path]:
"""Resolve the output directory from storage location or config."""
output_dir_arg = parsed.get("path") or parsed.get("output")
if output_dir_arg:
try:
out_path = Path(str(output_dir_arg)).expanduser()
out_path.mkdir(parents=True, exist_ok=True)
return out_path
except Exception as e:
log(
f"Cannot use output directory {output_dir_arg}: {e}",
file=sys.stderr
)
return None
storage_location = parsed.get("storage")
# Priority 1: --storage flag
if storage_location:
try:
return SharedArgs.resolve_storage(storage_location)
except Exception as e:
log(f"Invalid storage location: {e}", file=sys.stderr)
return None
# Priority 2: Config default output/temp directory
try:
from SYS.config import resolve_output_dir
final_output_dir = resolve_output_dir(config)
except Exception:
final_output_dir = Path.home() / "Downloads"
debug(f"Using default directory: {final_output_dir}")
# Ensure directory exists
try:
final_output_dir.mkdir(parents=True, exist_ok=True)
except Exception as e:
log(
f"Cannot create output directory {final_output_dir}: {e}",
file=sys.stderr
)
return None
return final_output_dir
def _compute_file_hash(self, filepath: Path) -> str:
"""Compute SHA256 hash of a file."""
import hashlib
sha256_hash = hashlib.sha256()
with open(filepath, "rb") as f:
for byte_block in iter(lambda: f.read(4096), b""):
sha256_hash.update(byte_block)
return sha256_hash.hexdigest()
# Module-level singleton registration
CMDLET = Download_File()