2025-12-11 12:47:30 -08:00
|
|
|
"""Download media from url using yt-dlp (streaming sites only).
|
|
|
|
|
|
|
|
|
|
Focused cmdlet for video/audio downloads from yt-dlp-supported sites:
|
|
|
|
|
- YouTube, Twitch, Dailymotion, Vimeo, etc.
|
|
|
|
|
- No direct file downloads (use download-file for that)
|
|
|
|
|
- Playlist detection with item selection
|
|
|
|
|
- Clip extraction (time ranges)
|
|
|
|
|
- Format selection and audio/video modes
|
|
|
|
|
- Tags extraction and metadata integration
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
import glob # noqa: F401
|
|
|
|
|
import hashlib
|
|
|
|
|
import json # noqa: F401
|
|
|
|
|
import random
|
|
|
|
|
import re
|
|
|
|
|
import string
|
|
|
|
|
import subprocess
|
|
|
|
|
import sys
|
2025-12-16 01:45:01 -08:00
|
|
|
import tempfile
|
2025-12-11 12:47:30 -08:00
|
|
|
import time
|
|
|
|
|
import traceback
|
2025-12-20 23:57:44 -08:00
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import Any, Dict, Iterator, List, Optional, Sequence
|
|
|
|
|
from urllib.parse import urlparse
|
2025-12-11 12:47:30 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
from SYS.logger import log, debug
|
2025-12-20 23:57:44 -08:00
|
|
|
from SYS.utils import sha256_file
|
2025-12-11 12:47:30 -08:00
|
|
|
from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLogger, ProgressBar
|
|
|
|
|
import pipeline as pipeline_context
|
|
|
|
|
from result_table import ResultTable
|
2025-12-20 23:57:44 -08:00
|
|
|
from rich.prompt import Confirm
|
|
|
|
|
from rich_display import stderr_console as get_stderr_console
|
2025-12-11 12:47:30 -08:00
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
from tool.ytdlp import YtDlpTool
|
|
|
|
|
|
|
|
|
|
from . import _shared as sh
|
|
|
|
|
|
|
|
|
|
Cmdlet = sh.Cmdlet
|
|
|
|
|
CmdletArg = sh.CmdletArg
|
|
|
|
|
SharedArgs = sh.SharedArgs
|
|
|
|
|
create_pipe_object_result = sh.create_pipe_object_result
|
|
|
|
|
parse_cmdlet_args = sh.parse_cmdlet_args
|
|
|
|
|
register_url_with_local_library = sh.register_url_with_local_library
|
|
|
|
|
coerce_to_pipe_object = sh.coerce_to_pipe_object
|
|
|
|
|
get_field = sh.get_field
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
|
2025-12-21 16:59:37 -08:00
|
|
|
def _live_ui_and_pipe_index() -> tuple[Optional[Any], int]:
|
|
|
|
|
ui = None
|
|
|
|
|
try:
|
|
|
|
|
ui = pipeline_context.get_live_progress() if hasattr(pipeline_context, "get_live_progress") else None
|
|
|
|
|
except Exception:
|
|
|
|
|
ui = None
|
|
|
|
|
|
|
|
|
|
pipe_idx: int = 0
|
|
|
|
|
try:
|
|
|
|
|
stage_ctx = pipeline_context.get_stage_context() if hasattr(pipeline_context, "get_stage_context") else None
|
|
|
|
|
maybe_idx = getattr(stage_ctx, "pipe_index", None) if stage_ctx is not None else None
|
|
|
|
|
if isinstance(maybe_idx, int):
|
|
|
|
|
pipe_idx = int(maybe_idx)
|
|
|
|
|
except Exception:
|
|
|
|
|
pipe_idx = 0
|
|
|
|
|
|
|
|
|
|
return ui, pipe_idx
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _begin_live_steps(total_steps: int) -> None:
|
|
|
|
|
"""Declare the total number of steps for the current pipe."""
|
|
|
|
|
ui, pipe_idx = _live_ui_and_pipe_index()
|
|
|
|
|
if ui is None:
|
|
|
|
|
return
|
|
|
|
|
try:
|
|
|
|
|
begin = getattr(ui, "begin_pipe_steps", None)
|
|
|
|
|
if callable(begin):
|
|
|
|
|
begin(int(pipe_idx), total_steps=int(total_steps))
|
|
|
|
|
except Exception:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _step(text: str) -> None:
|
|
|
|
|
"""Emit a *new* step (increments i/N and advances percent automatically)."""
|
|
|
|
|
ui, pipe_idx = _live_ui_and_pipe_index()
|
|
|
|
|
if ui is None:
|
|
|
|
|
return
|
|
|
|
|
try:
|
|
|
|
|
adv = getattr(ui, "advance_pipe_step", None)
|
|
|
|
|
if callable(adv):
|
|
|
|
|
adv(int(pipe_idx), str(text))
|
|
|
|
|
except Exception:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _set_pipe_percent(percent: int) -> None:
|
|
|
|
|
"""Best-effort percent update without changing step text."""
|
|
|
|
|
ui, pipe_idx = _live_ui_and_pipe_index()
|
|
|
|
|
if ui is None:
|
|
|
|
|
return
|
|
|
|
|
try:
|
|
|
|
|
set_pct = getattr(ui, "set_pipe_percent", None)
|
|
|
|
|
if callable(set_pct):
|
|
|
|
|
set_pct(int(pipe_idx), int(percent))
|
|
|
|
|
except Exception:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
# Minimal inlined helpers from helper/download.py (is_url_supported_by_ytdlp, list_formats)
|
|
|
|
|
try:
|
|
|
|
|
import yt_dlp # type: ignore
|
|
|
|
|
from yt_dlp.extractor import gen_extractors # type: ignore
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
yt_dlp = None # type: ignore
|
|
|
|
|
YTDLP_IMPORT_ERROR = exc
|
|
|
|
|
else:
|
|
|
|
|
YTDLP_IMPORT_ERROR = None
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from metadata import extract_ytdlp_tags
|
|
|
|
|
except ImportError:
|
|
|
|
|
extract_ytdlp_tags = None
|
|
|
|
|
|
|
|
|
|
_EXTRACTOR_CACHE: List[Any] | None = None
|
|
|
|
|
|
2025-12-13 12:09:50 -08:00
|
|
|
# Reused progress formatter for yt-dlp callbacks (stderr only).
|
|
|
|
|
_YTDLP_PROGRESS_BAR = ProgressBar()
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
_SUBTITLE_EXTS = (".vtt", ".srt", ".ass", ".ssa", ".lrc")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _format_chapters_note(info: Dict[str, Any]) -> Optional[str]:
|
|
|
|
|
"""Format yt-dlp chapter metadata into a stable, note-friendly text.
|
|
|
|
|
|
|
|
|
|
Output is one chapter per line, e.g.:
|
|
|
|
|
00:00 Intro
|
|
|
|
|
01:23-02:10 Topic name
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
chapters = info.get("chapters")
|
|
|
|
|
except Exception:
|
|
|
|
|
chapters = None
|
|
|
|
|
|
|
|
|
|
if not isinstance(chapters, list) or not chapters:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
rows: List[tuple[int, Optional[int], str]] = []
|
|
|
|
|
max_t = 0
|
|
|
|
|
for ch in chapters:
|
|
|
|
|
if not isinstance(ch, dict):
|
|
|
|
|
continue
|
|
|
|
|
start_raw = ch.get("start_time")
|
|
|
|
|
end_raw = ch.get("end_time")
|
|
|
|
|
title_raw = ch.get("title") or ch.get("name") or ch.get("chapter")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
start_s = int(float(start_raw))
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
end_s: Optional[int] = None
|
|
|
|
|
try:
|
|
|
|
|
if end_raw is not None:
|
|
|
|
|
end_s = int(float(end_raw))
|
|
|
|
|
except Exception:
|
|
|
|
|
end_s = None
|
|
|
|
|
|
|
|
|
|
title = str(title_raw).strip() if title_raw is not None else ""
|
|
|
|
|
rows.append((start_s, end_s, title))
|
|
|
|
|
try:
|
|
|
|
|
max_t = max(max_t, start_s, end_s or 0)
|
|
|
|
|
except Exception:
|
|
|
|
|
max_t = max(max_t, start_s)
|
|
|
|
|
|
|
|
|
|
if not rows:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
force_hours = bool(max_t >= 3600)
|
|
|
|
|
|
|
|
|
|
def _tc(seconds: int) -> str:
|
|
|
|
|
total = max(0, int(seconds))
|
|
|
|
|
minutes, secs = divmod(total, 60)
|
|
|
|
|
hours, minutes = divmod(minutes, 60)
|
|
|
|
|
if force_hours:
|
|
|
|
|
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
|
|
|
|
|
return f"{minutes:02d}:{secs:02d}"
|
|
|
|
|
|
|
|
|
|
lines: List[str] = []
|
|
|
|
|
for start_s, end_s, title in sorted(rows, key=lambda r: (r[0], r[1] if r[1] is not None else 10**9, r[2])):
|
|
|
|
|
if end_s is not None and end_s > start_s:
|
|
|
|
|
prefix = f"{_tc(start_s)}-{_tc(end_s)}"
|
|
|
|
|
else:
|
|
|
|
|
prefix = _tc(start_s)
|
|
|
|
|
line = f"{prefix} {title}".strip()
|
|
|
|
|
if line:
|
|
|
|
|
lines.append(line)
|
|
|
|
|
|
|
|
|
|
text = "\n".join(lines).strip()
|
|
|
|
|
return text or None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _best_subtitle_sidecar(media_path: Path) -> Optional[Path]:
|
|
|
|
|
"""Find the most likely subtitle sidecar file for a downloaded media file."""
|
|
|
|
|
try:
|
|
|
|
|
base_dir = media_path.parent
|
|
|
|
|
stem = media_path.stem
|
|
|
|
|
if not stem:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
candidates: List[Path] = []
|
|
|
|
|
for p in base_dir.glob(stem + ".*"):
|
|
|
|
|
try:
|
|
|
|
|
if not p.is_file():
|
|
|
|
|
continue
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
if p.suffix.lower() in _SUBTITLE_EXTS:
|
|
|
|
|
candidates.append(p)
|
|
|
|
|
|
|
|
|
|
if not candidates:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def _rank(path: Path) -> tuple[int, int, float, str]:
|
|
|
|
|
name = path.name.lower()
|
|
|
|
|
lang_rank = 0 if ".en." in name or name.endswith(".en" + path.suffix.lower()) else 1
|
|
|
|
|
ext = path.suffix.lower()
|
|
|
|
|
ext_rank_map = {".vtt": 0, ".srt": 1, ".ass": 2, ".ssa": 3, ".lrc": 4}
|
|
|
|
|
ext_rank = ext_rank_map.get(ext, 9)
|
|
|
|
|
try:
|
|
|
|
|
mtime = float(path.stat().st_mtime)
|
|
|
|
|
except Exception:
|
|
|
|
|
mtime = 0.0
|
|
|
|
|
return (lang_rank, ext_rank, -mtime, name)
|
|
|
|
|
|
|
|
|
|
candidates.sort(key=_rank)
|
|
|
|
|
return candidates[0]
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _read_text_file(path: Path, *, max_bytes: int = 1_500_000) -> Optional[str]:
|
|
|
|
|
try:
|
|
|
|
|
data = path.read_bytes()
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
if not data:
|
|
|
|
|
return None
|
|
|
|
|
if len(data) > max_bytes:
|
|
|
|
|
data = data[:max_bytes]
|
|
|
|
|
try:
|
|
|
|
|
return data.decode("utf-8", errors="replace")
|
|
|
|
|
except Exception:
|
|
|
|
|
try:
|
|
|
|
|
return data.decode(errors="replace")
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
def _ensure_yt_dlp_ready() -> None:
|
|
|
|
|
if yt_dlp is not None:
|
|
|
|
|
return
|
|
|
|
|
detail = str(YTDLP_IMPORT_ERROR or "yt-dlp is not installed")
|
|
|
|
|
raise DownloadError(f"yt-dlp module not available: {detail}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_url_supported_by_ytdlp(url: str) -> bool:
|
|
|
|
|
if yt_dlp is None:
|
|
|
|
|
return False
|
|
|
|
|
global _EXTRACTOR_CACHE
|
|
|
|
|
if _EXTRACTOR_CACHE is None:
|
|
|
|
|
try:
|
|
|
|
|
_EXTRACTOR_CACHE = [ie for ie in gen_extractors()] # type: ignore[arg-type]
|
|
|
|
|
except Exception:
|
|
|
|
|
_EXTRACTOR_CACHE = []
|
|
|
|
|
for extractor in _EXTRACTOR_CACHE:
|
|
|
|
|
try:
|
|
|
|
|
if not extractor.suitable(url):
|
|
|
|
|
continue
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
name = getattr(extractor, "IE_NAME", "")
|
|
|
|
|
if name.lower() == "generic":
|
|
|
|
|
continue
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
def list_formats(
|
|
|
|
|
url: str,
|
|
|
|
|
no_playlist: bool = False,
|
|
|
|
|
playlist_items: Optional[str] = None,
|
|
|
|
|
cookiefile: Optional[str] = None,
|
|
|
|
|
) -> Optional[List[Dict[str, Any]]]:
|
2025-12-11 12:47:30 -08:00
|
|
|
_ensure_yt_dlp_ready()
|
|
|
|
|
try:
|
2025-12-12 21:55:38 -08:00
|
|
|
assert yt_dlp is not None
|
|
|
|
|
ydl_opts: Dict[str, Any] = {"quiet": True, "no_warnings": True, "socket_timeout": 30}
|
2025-12-11 12:47:30 -08:00
|
|
|
if no_playlist:
|
|
|
|
|
ydl_opts["noplaylist"] = True
|
|
|
|
|
if playlist_items:
|
|
|
|
|
ydl_opts["playlist_items"] = playlist_items
|
2025-12-20 23:57:44 -08:00
|
|
|
if cookiefile:
|
|
|
|
|
ydl_opts["cookiefile"] = cookiefile
|
2025-12-16 23:23:43 -08:00
|
|
|
|
|
|
|
|
debug(f"Fetching format list for: {url}")
|
2025-12-12 21:55:38 -08:00
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
|
2025-12-11 12:47:30 -08:00
|
|
|
info = ydl.extract_info(url, download=False)
|
2025-12-16 23:23:43 -08:00
|
|
|
|
|
|
|
|
if not isinstance(info, dict):
|
|
|
|
|
log("No formats available", file=sys.stderr)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
formats = info.get("formats") or []
|
2025-12-21 05:10:09 -08:00
|
|
|
|
|
|
|
|
# Some URLs (notably playlist contexts) yield a playlist-shaped payload with
|
|
|
|
|
# `entries` rather than a direct video payload. If so, try to pull formats
|
|
|
|
|
# from the first concrete entry.
|
|
|
|
|
if (not formats) and isinstance(info.get("entries"), list):
|
|
|
|
|
try:
|
|
|
|
|
for entry in info.get("entries") or []:
|
|
|
|
|
if not isinstance(entry, dict):
|
|
|
|
|
continue
|
|
|
|
|
entry_formats = entry.get("formats")
|
|
|
|
|
if isinstance(entry_formats, list) and entry_formats:
|
|
|
|
|
formats = entry_formats
|
|
|
|
|
break
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
if not isinstance(formats, list) or not formats:
|
|
|
|
|
log("No formats available", file=sys.stderr)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
result_formats: List[Dict[str, Any]] = []
|
|
|
|
|
for fmt in formats:
|
|
|
|
|
if not isinstance(fmt, dict):
|
|
|
|
|
continue
|
|
|
|
|
result_formats.append(
|
|
|
|
|
{
|
2025-12-11 12:47:30 -08:00
|
|
|
"format_id": fmt.get("format_id", ""),
|
|
|
|
|
"format": fmt.get("format", ""),
|
|
|
|
|
"ext": fmt.get("ext", ""),
|
|
|
|
|
"resolution": fmt.get("resolution", ""),
|
|
|
|
|
"width": fmt.get("width"),
|
|
|
|
|
"height": fmt.get("height"),
|
|
|
|
|
"fps": fmt.get("fps"),
|
|
|
|
|
"vcodec": fmt.get("vcodec", "none"),
|
|
|
|
|
"acodec": fmt.get("acodec", "none"),
|
|
|
|
|
"filesize": fmt.get("filesize"),
|
2025-12-12 21:55:38 -08:00
|
|
|
"abr": fmt.get("abr"),
|
2025-12-11 12:47:30 -08:00
|
|
|
"tbr": fmt.get("tbr"),
|
2025-12-16 23:23:43 -08:00
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
debug(f"Found {len(result_formats)} available formats")
|
|
|
|
|
return result_formats or None
|
2025-12-11 12:47:30 -08:00
|
|
|
except Exception as e:
|
|
|
|
|
log(f"✗ Error fetching formats: {e}", file=sys.stderr)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
def _pick_best_audio_format_id(formats: List[Dict[str, Any]]) -> Optional[str]:
|
|
|
|
|
audio_only: List[Dict[str, Any]] = []
|
|
|
|
|
for fmt in formats:
|
|
|
|
|
if not isinstance(fmt, dict):
|
|
|
|
|
continue
|
|
|
|
|
format_id = str(fmt.get("format_id") or "").strip()
|
|
|
|
|
if not format_id:
|
|
|
|
|
continue
|
|
|
|
|
vcodec = str(fmt.get("vcodec") or "none").lower()
|
|
|
|
|
acodec = str(fmt.get("acodec") or "none").lower()
|
|
|
|
|
if vcodec != "none":
|
|
|
|
|
continue
|
|
|
|
|
if not acodec or acodec == "none":
|
|
|
|
|
continue
|
|
|
|
|
audio_only.append(fmt)
|
|
|
|
|
|
|
|
|
|
if not audio_only:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def score(f: Dict[str, Any]) -> tuple[float, float]:
|
|
|
|
|
tbr = f.get("tbr")
|
|
|
|
|
abr = f.get("abr")
|
|
|
|
|
bitrate = 0.0
|
|
|
|
|
for candidate in (tbr, abr):
|
|
|
|
|
try:
|
|
|
|
|
if candidate is not None:
|
|
|
|
|
bitrate = max(bitrate, float(candidate))
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
size = 0.0
|
|
|
|
|
try:
|
|
|
|
|
fs = f.get("filesize")
|
|
|
|
|
if fs is not None:
|
|
|
|
|
size = float(fs)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
return (bitrate, size)
|
|
|
|
|
|
|
|
|
|
best = max(audio_only, key=score)
|
|
|
|
|
best_id = str(best.get("format_id") or "").strip()
|
|
|
|
|
return best_id or None
|
|
|
|
|
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False) -> tuple[Optional[str], Dict[str, Any]]:
|
|
|
|
|
sections_list = ytdl_options.get("download_sections", [])
|
|
|
|
|
if not sections_list:
|
|
|
|
|
return "", {}
|
|
|
|
|
|
|
|
|
|
session_id = hashlib.md5((url + str(time.time()) + ''.join(random.choices(string.ascii_letters, k=10))).encode()).hexdigest()[:12]
|
|
|
|
|
first_section_info = None
|
|
|
|
|
|
2025-12-21 16:59:37 -08:00
|
|
|
total_sections = len(sections_list)
|
2025-12-11 12:47:30 -08:00
|
|
|
for section_idx, section in enumerate(sections_list, 1):
|
2025-12-21 16:59:37 -08:00
|
|
|
# While step 1/2 is "downloading", keep the pipe bar moving for multi-section clips.
|
|
|
|
|
# Map sections onto 50..99 so step 2/2 can still jump to 100.
|
|
|
|
|
try:
|
|
|
|
|
if total_sections > 0:
|
|
|
|
|
pct = 50 + int(((section_idx - 1) / max(1, total_sections)) * 49)
|
|
|
|
|
_set_pipe_percent(pct)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s")
|
|
|
|
|
output_dir_path = Path(base_outtmpl).parent
|
|
|
|
|
filename_tmpl = f"{session_id}_{section_idx}"
|
|
|
|
|
if base_outtmpl.endswith(".%(ext)s"):
|
|
|
|
|
filename_tmpl += ".%(ext)s"
|
|
|
|
|
section_outtmpl = str(output_dir_path / filename_tmpl)
|
|
|
|
|
|
|
|
|
|
if section_idx == 1:
|
|
|
|
|
metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"]
|
|
|
|
|
if ytdl_options.get("cookiefile"):
|
|
|
|
|
cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
|
|
|
|
|
metadata_cmd.extend(["--cookies", cookies_path])
|
|
|
|
|
if ytdl_options.get("noplaylist"):
|
|
|
|
|
metadata_cmd.append("--no-playlist")
|
|
|
|
|
metadata_cmd.append(url)
|
|
|
|
|
try:
|
|
|
|
|
meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True)
|
|
|
|
|
if meta_result.returncode == 0 and meta_result.stdout:
|
|
|
|
|
try:
|
|
|
|
|
info_dict = json.loads(meta_result.stdout.strip())
|
|
|
|
|
first_section_info = info_dict
|
|
|
|
|
if not quiet:
|
|
|
|
|
debug(f"Extracted title from metadata: {info_dict.get('title')}")
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
if not quiet:
|
|
|
|
|
debug("Could not parse JSON metadata")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
if not quiet:
|
|
|
|
|
debug(f"Error extracting metadata: {e}")
|
|
|
|
|
|
|
|
|
|
cmd = ["yt-dlp"]
|
2025-12-21 16:59:37 -08:00
|
|
|
if quiet:
|
|
|
|
|
cmd.append("--quiet")
|
|
|
|
|
cmd.append("--no-warnings")
|
|
|
|
|
cmd.append("--no-progress")
|
|
|
|
|
# Keep ffmpeg/merger output from taking over the terminal.
|
|
|
|
|
cmd.extend(["--postprocessor-args", "ffmpeg:-hide_banner -loglevel error"])
|
|
|
|
|
if ytdl_options.get("ffmpeg_location"):
|
|
|
|
|
try:
|
|
|
|
|
cmd.extend(["--ffmpeg-location", str(ytdl_options["ffmpeg_location"])])
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2025-12-11 12:47:30 -08:00
|
|
|
if ytdl_options.get("format"):
|
|
|
|
|
cmd.extend(["-f", ytdl_options["format"]])
|
2025-12-16 23:23:43 -08:00
|
|
|
if ytdl_options.get("merge_output_format"):
|
|
|
|
|
cmd.extend(["--merge-output-format", str(ytdl_options["merge_output_format"])])
|
|
|
|
|
|
|
|
|
|
# For CLI downloads, infer chapter/metadata embedding from either legacy flags
|
|
|
|
|
# or explicit FFmpegMetadata postprocessor entries.
|
|
|
|
|
postprocessors = ytdl_options.get("postprocessors")
|
|
|
|
|
want_add_metadata = bool(ytdl_options.get("addmetadata"))
|
|
|
|
|
want_embed_chapters = bool(ytdl_options.get("embedchapters"))
|
|
|
|
|
if isinstance(postprocessors, list):
|
|
|
|
|
for pp in postprocessors:
|
|
|
|
|
if not isinstance(pp, dict):
|
|
|
|
|
continue
|
|
|
|
|
if str(pp.get("key") or "") == "FFmpegMetadata":
|
|
|
|
|
want_add_metadata = True
|
|
|
|
|
if bool(pp.get("add_chapters", True)):
|
|
|
|
|
want_embed_chapters = True
|
|
|
|
|
|
|
|
|
|
if want_add_metadata:
|
|
|
|
|
cmd.append("--add-metadata")
|
|
|
|
|
if want_embed_chapters:
|
|
|
|
|
cmd.append("--embed-chapters")
|
|
|
|
|
if ytdl_options.get("writesubtitles"):
|
|
|
|
|
cmd.append("--write-sub")
|
|
|
|
|
cmd.append("--write-auto-sub")
|
|
|
|
|
cmd.extend(["--sub-format", "vtt"])
|
2025-12-11 12:47:30 -08:00
|
|
|
if ytdl_options.get("force_keyframes_at_cuts"):
|
2025-12-21 16:59:37 -08:00
|
|
|
cmd.append("--force-keyframes-at-cuts")
|
2025-12-11 12:47:30 -08:00
|
|
|
cmd.extend(["-o", section_outtmpl])
|
|
|
|
|
if ytdl_options.get("cookiefile"):
|
|
|
|
|
cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
|
|
|
|
|
cmd.extend(["--cookies", cookies_path])
|
|
|
|
|
if ytdl_options.get("noplaylist"):
|
|
|
|
|
cmd.append("--no-playlist")
|
2025-12-12 21:55:38 -08:00
|
|
|
|
|
|
|
|
# Apply clip/section selection
|
|
|
|
|
cmd.extend(["--download-sections", section])
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
cmd.append(url)
|
|
|
|
|
if not quiet:
|
|
|
|
|
debug(f"Running yt-dlp for section: {section}")
|
|
|
|
|
try:
|
2025-12-21 16:59:37 -08:00
|
|
|
if quiet:
|
|
|
|
|
subprocess.run(cmd, check=True, capture_output=True, text=True)
|
|
|
|
|
else:
|
|
|
|
|
subprocess.run(cmd, check=True)
|
|
|
|
|
except subprocess.CalledProcessError as exc:
|
|
|
|
|
stderr_text = (exc.stderr or "")
|
|
|
|
|
tail = "\n".join(stderr_text.splitlines()[-12:]).strip()
|
|
|
|
|
details = f"\n{tail}" if tail else ""
|
|
|
|
|
raise DownloadError(f"yt-dlp failed for section {section} (exit {exc.returncode}){details}") from exc
|
2025-12-11 12:47:30 -08:00
|
|
|
except Exception as exc:
|
2025-12-21 16:59:37 -08:00
|
|
|
raise DownloadError(f"yt-dlp failed for section {section}: {exc}") from exc
|
|
|
|
|
|
|
|
|
|
# Mark near-complete before returning so the runner can finalize cleanly.
|
|
|
|
|
try:
|
|
|
|
|
_set_pipe_percent(99)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
return session_id, first_section_info or {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _iter_download_entries(info: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
|
|
|
|
|
queue: List[Dict[str, Any]] = [info]
|
|
|
|
|
seen: set[int] = set()
|
|
|
|
|
while queue:
|
|
|
|
|
current = queue.pop(0)
|
|
|
|
|
obj_id = id(current)
|
|
|
|
|
if obj_id in seen:
|
|
|
|
|
continue
|
|
|
|
|
seen.add(obj_id)
|
|
|
|
|
entries = current.get("entries")
|
|
|
|
|
if isinstance(entries, list):
|
|
|
|
|
for entry in entries:
|
|
|
|
|
queue.append(entry)
|
|
|
|
|
if current.get("requested_downloads") or not entries:
|
|
|
|
|
yield current
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _candidate_paths(entry: Dict[str, Any], output_dir: Path) -> Iterator[Path]:
|
|
|
|
|
requested = entry.get("requested_downloads")
|
|
|
|
|
if isinstance(requested, list):
|
|
|
|
|
for item in requested:
|
|
|
|
|
if isinstance(item, dict):
|
|
|
|
|
fp = item.get("filepath") or item.get("_filename")
|
|
|
|
|
if fp:
|
|
|
|
|
yield Path(fp)
|
|
|
|
|
for key in ("filepath", "_filename", "filename"):
|
|
|
|
|
value = entry.get(key)
|
|
|
|
|
if value:
|
|
|
|
|
yield Path(value)
|
|
|
|
|
if entry.get("filename"):
|
|
|
|
|
yield output_dir / entry["filename"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _resolve_entry_and_path(info: Dict[str, Any], output_dir: Path) -> tuple[Dict[str, Any], Path]:
|
|
|
|
|
for entry in _iter_download_entries(info):
|
|
|
|
|
for candidate in _candidate_paths(entry, output_dir):
|
|
|
|
|
if candidate.is_file():
|
|
|
|
|
return entry, candidate
|
|
|
|
|
if not candidate.is_absolute():
|
|
|
|
|
maybe = output_dir / candidate
|
|
|
|
|
if maybe.is_file():
|
|
|
|
|
return entry, maybe
|
|
|
|
|
raise FileNotFoundError("yt-dlp did not report a downloaded media file")
|
|
|
|
|
|
|
|
|
|
|
2025-12-13 00:18:30 -08:00
|
|
|
def _resolve_entries_and_paths(info: Dict[str, Any], output_dir: Path) -> List[tuple[Dict[str, Any], Path]]:
|
|
|
|
|
resolved: List[tuple[Dict[str, Any], Path]] = []
|
|
|
|
|
seen: set[str] = set()
|
|
|
|
|
for entry in _iter_download_entries(info):
|
|
|
|
|
chosen: Optional[Path] = None
|
|
|
|
|
for candidate in _candidate_paths(entry, output_dir):
|
|
|
|
|
if candidate.is_file():
|
|
|
|
|
chosen = candidate
|
|
|
|
|
break
|
|
|
|
|
if not candidate.is_absolute():
|
|
|
|
|
maybe = output_dir / candidate
|
|
|
|
|
if maybe.is_file():
|
|
|
|
|
chosen = maybe
|
|
|
|
|
break
|
|
|
|
|
if chosen is None:
|
|
|
|
|
continue
|
|
|
|
|
key = str(chosen.resolve())
|
|
|
|
|
if key in seen:
|
|
|
|
|
continue
|
|
|
|
|
seen.add(key)
|
|
|
|
|
resolved.append((entry, chosen))
|
|
|
|
|
return resolved
|
|
|
|
|
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
|
|
|
|
|
for payload in [info] + info.get("entries", []):
|
|
|
|
|
if not isinstance(payload, dict):
|
|
|
|
|
continue
|
|
|
|
|
hashes = payload.get("hashes")
|
|
|
|
|
if isinstance(hashes, dict):
|
|
|
|
|
for key in ("sha256", "sha-256", "sha_256"):
|
|
|
|
|
if key in hashes and isinstance(hashes[key], str) and hashes[key].strip():
|
|
|
|
|
return hashes[key].strip()
|
|
|
|
|
for key in ("sha256", "sha-256", "sha_256"):
|
|
|
|
|
value = payload.get(key)
|
|
|
|
|
if isinstance(value, str) and value.strip():
|
|
|
|
|
return value.strip()
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _progress_callback(status: Dict[str, Any]) -> None:
|
|
|
|
|
"""Simple progress callback using logger."""
|
|
|
|
|
event = status.get("status")
|
|
|
|
|
if event == "downloading":
|
2025-12-13 12:09:50 -08:00
|
|
|
# Always print progress to stderr so piped stdout remains clean.
|
|
|
|
|
percent = status.get("_percent_str")
|
|
|
|
|
downloaded = status.get("downloaded_bytes")
|
|
|
|
|
total = status.get("total_bytes") or status.get("total_bytes_estimate")
|
|
|
|
|
speed = status.get("_speed_str")
|
|
|
|
|
eta = status.get("_eta_str")
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
_YTDLP_PROGRESS_BAR.update(
|
|
|
|
|
downloaded=int(downloaded) if downloaded is not None else None,
|
|
|
|
|
total=int(total) if total is not None else None,
|
|
|
|
|
label="download",
|
|
|
|
|
file=sys.stderr,
|
|
|
|
|
)
|
2025-12-11 12:47:30 -08:00
|
|
|
elif event == "finished":
|
2025-12-20 23:57:44 -08:00
|
|
|
_YTDLP_PROGRESS_BAR.finish()
|
2025-12-11 12:47:30 -08:00
|
|
|
elif event in ("postprocessing", "processing"):
|
2025-12-13 12:09:50 -08:00
|
|
|
return
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15, *, cookiefile: Optional[str] = None) -> Optional[Dict[str, Any]]:
|
2025-12-11 12:47:30 -08:00
|
|
|
"""Probe URL to extract metadata WITHOUT downloading.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
url: URL to probe
|
|
|
|
|
no_playlist: If True, ignore playlists and probe only the single video
|
|
|
|
|
timeout_seconds: Max seconds to wait for probe (default 15s)
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dict with keys: extractor, title, entries (if playlist), duration, etc.
|
|
|
|
|
Returns None if not supported by yt-dlp or on timeout.
|
|
|
|
|
"""
|
|
|
|
|
if not is_url_supported_by_ytdlp(url):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
# Wrap probe in timeout to prevent hanging on large playlists
|
|
|
|
|
import threading
|
|
|
|
|
from typing import cast
|
|
|
|
|
|
|
|
|
|
result_container: List[Optional[Any]] = [None, None] # [result, error]
|
|
|
|
|
|
|
|
|
|
def _do_probe() -> None:
|
|
|
|
|
try:
|
|
|
|
|
_ensure_yt_dlp_ready()
|
|
|
|
|
|
|
|
|
|
assert yt_dlp is not None
|
|
|
|
|
# Extract info without downloading
|
|
|
|
|
# Use extract_flat='in_playlist' to get full metadata for playlist items
|
|
|
|
|
ydl_opts = {
|
|
|
|
|
"quiet": True, # Suppress all output
|
|
|
|
|
"no_warnings": True,
|
|
|
|
|
"socket_timeout": 10,
|
|
|
|
|
"retries": 2, # Reduce retries for faster timeout
|
|
|
|
|
"skip_download": True, # Don't actually download
|
|
|
|
|
"extract_flat": "in_playlist", # Get playlist with metadata for each entry
|
|
|
|
|
"noprogress": True, # No progress bars
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
if cookiefile:
|
|
|
|
|
ydl_opts["cookiefile"] = str(cookiefile)
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
# Add no_playlist option if specified
|
|
|
|
|
if no_playlist:
|
|
|
|
|
ydl_opts["noplaylist"] = True
|
|
|
|
|
|
|
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
|
|
|
|
|
info = ydl.extract_info(url, download=False)
|
|
|
|
|
|
|
|
|
|
if not isinstance(info, dict):
|
|
|
|
|
result_container[0] = None
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# Extract relevant fields
|
2025-12-14 00:53:52 -08:00
|
|
|
webpage_url = info.get("webpage_url") or info.get("original_url") or info.get("url")
|
2025-12-11 12:47:30 -08:00
|
|
|
result_container[0] = {
|
|
|
|
|
"extractor": info.get("extractor", ""),
|
|
|
|
|
"title": info.get("title", ""),
|
|
|
|
|
"entries": info.get("entries", []), # Will be populated if playlist
|
|
|
|
|
"duration": info.get("duration"),
|
|
|
|
|
"uploader": info.get("uploader"),
|
|
|
|
|
"description": info.get("description"),
|
2025-12-14 00:53:52 -08:00
|
|
|
# Keep both the requested and canonical URL forms; callers should prefer webpage_url.
|
|
|
|
|
"requested_url": url,
|
|
|
|
|
"webpage_url": webpage_url,
|
2025-12-11 12:47:30 -08:00
|
|
|
}
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
log(f"Probe error for {url}: {exc}")
|
|
|
|
|
result_container[1] = exc
|
|
|
|
|
|
|
|
|
|
thread = threading.Thread(target=_do_probe, daemon=False)
|
|
|
|
|
thread.start()
|
|
|
|
|
thread.join(timeout=timeout_seconds)
|
|
|
|
|
|
|
|
|
|
if thread.is_alive():
|
2025-12-20 23:57:44 -08:00
|
|
|
# Probe timed out - return None so the caller can raise an error
|
2025-12-11 12:47:30 -08:00
|
|
|
debug(f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
if result_container[1] is not None:
|
|
|
|
|
# Probe error - return None to proceed anyway
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
return cast(Optional[Dict[str, Any]], result_container[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def download_media(
|
|
|
|
|
opts: DownloadOptions,
|
|
|
|
|
*,
|
|
|
|
|
debug_logger: Optional[DebugLogger] = None,
|
2025-12-13 00:18:30 -08:00
|
|
|
) -> Any:
|
2025-12-20 23:57:44 -08:00
|
|
|
"""Download streaming media exclusively via yt-dlp.
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
opts: DownloadOptions with url, mode, output_dir, etc.
|
|
|
|
|
debug_logger: Optional debug logger for troubleshooting
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
DownloadMediaResult with path, info, tags, hash
|
|
|
|
|
|
|
|
|
|
Raises:
|
2025-12-20 23:57:44 -08:00
|
|
|
DownloadError: If the URL is unsupported or yt-dlp detects no media
|
2025-12-11 12:47:30 -08:00
|
|
|
"""
|
2025-12-20 23:57:44 -08:00
|
|
|
# Handle GoFile shares before yt-dlp (they remain unsupported)
|
2025-12-11 12:47:30 -08:00
|
|
|
try:
|
|
|
|
|
netloc = urlparse(opts.url).netloc.lower()
|
|
|
|
|
except Exception:
|
|
|
|
|
netloc = ""
|
|
|
|
|
if "gofile.io" in netloc:
|
|
|
|
|
msg = "GoFile links are currently unsupported"
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
debug(msg)
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record("gofile-unsupported", {"url": opts.url})
|
|
|
|
|
raise DownloadError(msg)
|
|
|
|
|
|
|
|
|
|
# Determine if yt-dlp should be used
|
|
|
|
|
ytdlp_supported = is_url_supported_by_ytdlp(opts.url)
|
2025-12-20 23:57:44 -08:00
|
|
|
if not ytdlp_supported:
|
|
|
|
|
msg = "URL not supported by yt-dlp; try download-file for manual downloads"
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
log(msg)
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record("ytdlp-unsupported", {"url": opts.url})
|
|
|
|
|
raise DownloadError(msg)
|
2025-12-16 23:23:43 -08:00
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
# Skip probe for playlists with item selection (probe can hang on large playlists)
|
|
|
|
|
# Just proceed straight to download which will handle item selection
|
|
|
|
|
if opts.playlist_items:
|
|
|
|
|
debug(f"Skipping probe for playlist (item selection: {opts.playlist_items}), proceeding with download")
|
|
|
|
|
probe_result = {"url": opts.url} # Minimal probe result
|
2025-12-11 12:47:30 -08:00
|
|
|
else:
|
2025-12-20 23:57:44 -08:00
|
|
|
probe_cookiefile = None
|
|
|
|
|
try:
|
|
|
|
|
if opts.cookies_path and opts.cookies_path.is_file():
|
|
|
|
|
probe_cookiefile = str(opts.cookies_path)
|
|
|
|
|
except Exception:
|
|
|
|
|
probe_cookiefile = None
|
|
|
|
|
|
|
|
|
|
probe_result = probe_url(opts.url, no_playlist=opts.no_playlist, timeout_seconds=15, cookiefile=probe_cookiefile)
|
|
|
|
|
|
|
|
|
|
if probe_result is None:
|
|
|
|
|
msg = "yt-dlp could not detect media for this URL; use download-file for direct downloads"
|
2025-12-11 12:47:30 -08:00
|
|
|
if not opts.quiet:
|
2025-12-20 23:57:44 -08:00
|
|
|
log(msg)
|
2025-12-11 12:47:30 -08:00
|
|
|
if debug_logger is not None:
|
2025-12-20 23:57:44 -08:00
|
|
|
debug_logger.write_record("ytdlp-skip-no-media", {"url": opts.url})
|
|
|
|
|
raise DownloadError(msg)
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
_ensure_yt_dlp_ready()
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
ytdlp_tool = YtDlpTool()
|
|
|
|
|
ytdl_options = ytdlp_tool.build_ytdlp_options(opts)
|
|
|
|
|
hooks = ytdl_options.get("progress_hooks")
|
|
|
|
|
if not isinstance(hooks, list):
|
|
|
|
|
hooks = []
|
|
|
|
|
ytdl_options["progress_hooks"] = hooks
|
|
|
|
|
if _progress_callback not in hooks:
|
|
|
|
|
hooks.append(_progress_callback)
|
2025-12-11 12:47:30 -08:00
|
|
|
if not opts.quiet:
|
|
|
|
|
debug(f"Starting yt-dlp download: {opts.url}")
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record("ytdlp-start", {"url": opts.url})
|
|
|
|
|
|
|
|
|
|
assert yt_dlp is not None
|
|
|
|
|
try:
|
|
|
|
|
# Debug: show what options we're using
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
if ytdl_options.get("download_sections"):
|
|
|
|
|
debug(f"[yt-dlp] download_sections: {ytdl_options['download_sections']}")
|
|
|
|
|
debug(f"[yt-dlp] force_keyframes_at_cuts: {ytdl_options.get('force_keyframes_at_cuts', False)}")
|
|
|
|
|
|
|
|
|
|
# Use subprocess when download_sections are present (Python API doesn't support them properly)
|
|
|
|
|
session_id = None
|
|
|
|
|
first_section_info = {}
|
|
|
|
|
if ytdl_options.get("download_sections"):
|
2025-12-21 16:59:37 -08:00
|
|
|
# For clip (download_sections), keep pipeline Live UI active and suppress
|
|
|
|
|
# yt-dlp/ffmpeg CLI spam when running in quiet/pipeline mode.
|
|
|
|
|
live_ui, _ = _live_ui_and_pipe_index()
|
|
|
|
|
quiet_sections = bool(opts.quiet) or (live_ui is not None)
|
|
|
|
|
session_id, first_section_info = _download_with_sections_via_cli(
|
|
|
|
|
opts.url,
|
|
|
|
|
ytdl_options,
|
|
|
|
|
ytdl_options.get("download_sections", []),
|
|
|
|
|
quiet=quiet_sections,
|
|
|
|
|
)
|
2025-12-11 12:47:30 -08:00
|
|
|
info = None
|
|
|
|
|
else:
|
|
|
|
|
with yt_dlp.YoutubeDL(ytdl_options) as ydl: # type: ignore[arg-type]
|
|
|
|
|
info = ydl.extract_info(opts.url, download=True)
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
log(f"yt-dlp failed: {exc}", file=sys.stderr)
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"exception",
|
|
|
|
|
{
|
|
|
|
|
"phase": "yt-dlp",
|
|
|
|
|
"error": str(exc),
|
|
|
|
|
"traceback": traceback.format_exc(),
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
raise DownloadError("yt-dlp download failed") from exc
|
|
|
|
|
|
|
|
|
|
# If we used subprocess, we need to find the file manually
|
|
|
|
|
if info is None:
|
|
|
|
|
# Find files created/modified during this download (after we started)
|
|
|
|
|
# Look for files matching the expected output template pattern
|
|
|
|
|
try:
|
|
|
|
|
import glob
|
|
|
|
|
import time
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
# Get the expected filename pattern from outtmpl
|
|
|
|
|
# For sections: "C:\path\{session_id}.section_1_of_3.ext", etc.
|
|
|
|
|
# For non-sections: "C:\path\title.ext"
|
|
|
|
|
|
|
|
|
|
# Wait a moment to ensure files are fully written
|
|
|
|
|
time.sleep(0.5)
|
|
|
|
|
|
|
|
|
|
# List all files in output_dir, sorted by modification time
|
|
|
|
|
files = sorted(opts.output_dir.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True)
|
|
|
|
|
if not files:
|
|
|
|
|
raise FileNotFoundError(f"No files found in {opts.output_dir}")
|
|
|
|
|
|
|
|
|
|
# If we downloaded sections, look for files with the session_id pattern
|
|
|
|
|
if opts.clip_sections and session_id:
|
|
|
|
|
# Pattern: "{session_id}_1.ext", "{session_id}_2.ext", etc.
|
2025-12-20 02:12:45 -08:00
|
|
|
# Also includes sidecars like "{session_id}_1.en.vtt".
|
|
|
|
|
section_pattern = re.compile(rf'^{re.escape(session_id)}_(\d+)')
|
2025-12-11 12:47:30 -08:00
|
|
|
matching_files = [f for f in files if section_pattern.search(f.name)]
|
|
|
|
|
|
|
|
|
|
if matching_files:
|
|
|
|
|
# Sort by section number to ensure correct order
|
|
|
|
|
def extract_section_num(path: Path) -> int:
|
|
|
|
|
match = section_pattern.search(path.name)
|
|
|
|
|
return int(match.group(1)) if match else 999
|
|
|
|
|
|
|
|
|
|
matching_files.sort(key=extract_section_num)
|
|
|
|
|
debug(f"Found {len(matching_files)} section file(s) matching pattern")
|
|
|
|
|
|
2025-12-20 02:12:45 -08:00
|
|
|
# Now rename section *media* files to use hash-based names.
|
|
|
|
|
# Sidecars (subtitles) are renamed to match the media hash so they can be
|
|
|
|
|
# attached as notes later (and not emitted as separate pipeline items).
|
|
|
|
|
by_index: Dict[int, List[Path]] = {}
|
|
|
|
|
for f in matching_files:
|
|
|
|
|
m = section_pattern.search(f.name)
|
|
|
|
|
if not m:
|
|
|
|
|
continue
|
2025-12-11 12:47:30 -08:00
|
|
|
try:
|
2025-12-20 02:12:45 -08:00
|
|
|
n = int(m.group(1))
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
by_index.setdefault(n, []).append(f)
|
|
|
|
|
|
|
|
|
|
renamed_media_files: List[Path] = []
|
|
|
|
|
|
|
|
|
|
for sec_num in sorted(by_index.keys()):
|
|
|
|
|
group = by_index.get(sec_num) or []
|
|
|
|
|
if not group:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
def _is_subtitle(p: Path) -> bool:
|
|
|
|
|
try:
|
|
|
|
|
return p.suffix.lower() in _SUBTITLE_EXTS
|
|
|
|
|
except Exception:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
media_candidates = [p for p in group if not _is_subtitle(p)]
|
|
|
|
|
subtitle_candidates = [p for p in group if _is_subtitle(p)]
|
|
|
|
|
|
|
|
|
|
# Pick the primary media file for this section.
|
|
|
|
|
# Prefer non-json, non-info sidecars.
|
|
|
|
|
media_file: Optional[Path] = None
|
|
|
|
|
for cand in media_candidates:
|
|
|
|
|
try:
|
|
|
|
|
if cand.suffix.lower() in {".json", ".info.json"}:
|
|
|
|
|
continue
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
media_file = cand
|
|
|
|
|
break
|
|
|
|
|
if media_file is None and media_candidates:
|
|
|
|
|
media_file = media_candidates[0]
|
|
|
|
|
if media_file is None:
|
|
|
|
|
# No media file found for this section; skip.
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
media_hash = sha256_file(media_file)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
debug(f"Failed to hash section media file {media_file.name}: {e}")
|
|
|
|
|
renamed_media_files.append(media_file)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Preserve any suffix tail after the section index so language tags survive.
|
|
|
|
|
# Example: <session>_1.en.vtt -> <hash>.en.vtt
|
|
|
|
|
prefix = f"{session_id}_{sec_num}"
|
|
|
|
|
|
|
|
|
|
def _tail(name: str) -> str:
|
|
|
|
|
try:
|
|
|
|
|
if name.startswith(prefix):
|
|
|
|
|
return name[len(prefix):]
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
# Fallback: keep just the last suffix.
|
|
|
|
|
try:
|
|
|
|
|
return Path(name).suffix
|
|
|
|
|
except Exception:
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
# Rename media file to <hash><tail> (tail typically like .mkv).
|
|
|
|
|
try:
|
|
|
|
|
new_media_name = f"{media_hash}{_tail(media_file.name)}"
|
|
|
|
|
new_media_path = opts.output_dir / new_media_name
|
|
|
|
|
if new_media_path.exists() and new_media_path != media_file:
|
|
|
|
|
debug(f"File with hash {media_hash} already exists, using existing file.")
|
2025-12-11 12:47:30 -08:00
|
|
|
try:
|
2025-12-20 02:12:45 -08:00
|
|
|
media_file.unlink()
|
2025-12-11 12:47:30 -08:00
|
|
|
except OSError:
|
|
|
|
|
pass
|
|
|
|
|
else:
|
2025-12-20 02:12:45 -08:00
|
|
|
media_file.rename(new_media_path)
|
|
|
|
|
debug(f"Renamed section file: {media_file.name} -> {new_media_name}")
|
|
|
|
|
renamed_media_files.append(new_media_path)
|
2025-12-11 12:47:30 -08:00
|
|
|
except Exception as e:
|
2025-12-20 02:12:45 -08:00
|
|
|
debug(f"Failed to rename section media file {media_file.name}: {e}")
|
|
|
|
|
renamed_media_files.append(media_file)
|
|
|
|
|
new_media_path = media_file
|
|
|
|
|
|
|
|
|
|
# Rename subtitle sidecars to match media hash for later note attachment.
|
|
|
|
|
for sub_file in subtitle_candidates:
|
|
|
|
|
try:
|
|
|
|
|
new_sub_name = f"{media_hash}{_tail(sub_file.name)}"
|
|
|
|
|
new_sub_path = opts.output_dir / new_sub_name
|
|
|
|
|
if new_sub_path.exists() and new_sub_path != sub_file:
|
|
|
|
|
try:
|
|
|
|
|
sub_file.unlink()
|
|
|
|
|
except OSError:
|
|
|
|
|
pass
|
|
|
|
|
else:
|
|
|
|
|
sub_file.rename(new_sub_path)
|
|
|
|
|
debug(f"Renamed section file: {sub_file.name} -> {new_sub_name}")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
debug(f"Failed to rename section subtitle file {sub_file.name}: {e}")
|
|
|
|
|
|
|
|
|
|
media_path = renamed_media_files[0] if renamed_media_files else matching_files[0]
|
|
|
|
|
media_paths = renamed_media_files if renamed_media_files else None
|
2025-12-11 12:47:30 -08:00
|
|
|
if not opts.quiet:
|
2025-12-20 02:12:45 -08:00
|
|
|
count = len(media_paths) if isinstance(media_paths, list) else 1
|
|
|
|
|
debug(f"✓ Downloaded {count} section media file(s) (session: {session_id})")
|
2025-12-11 12:47:30 -08:00
|
|
|
else:
|
|
|
|
|
# Fallback to most recent file if pattern not found
|
|
|
|
|
media_path = files[0]
|
|
|
|
|
media_paths = None
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
debug(f"✓ Downloaded section file (pattern not found): {media_path.name}")
|
|
|
|
|
else:
|
|
|
|
|
# No sections, just take the most recent file
|
|
|
|
|
media_path = files[0]
|
|
|
|
|
media_paths = None
|
|
|
|
|
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
debug(f"✓ Downloaded: {media_path.name}")
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record("ytdlp-file-found", {"path": str(media_path)})
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
log(f"Error finding downloaded file: {exc}", file=sys.stderr)
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"exception",
|
|
|
|
|
{"phase": "find-file", "error": str(exc)},
|
|
|
|
|
)
|
|
|
|
|
raise DownloadError(str(exc)) from exc
|
|
|
|
|
|
|
|
|
|
# Create result with minimal data extracted from filename
|
|
|
|
|
file_hash = sha256_file(media_path)
|
|
|
|
|
|
|
|
|
|
# For section downloads, create tags with the title and build proper info dict
|
|
|
|
|
tags = []
|
|
|
|
|
title = ''
|
|
|
|
|
if first_section_info:
|
|
|
|
|
title = first_section_info.get('title', '')
|
|
|
|
|
if title:
|
|
|
|
|
tags.append(f'title:{title}')
|
|
|
|
|
debug(f"Added title tag for section download: {title}")
|
|
|
|
|
|
|
|
|
|
# Build info dict - always use extracted title if available, not hash
|
|
|
|
|
if first_section_info:
|
|
|
|
|
info_dict = first_section_info
|
|
|
|
|
else:
|
|
|
|
|
info_dict = {
|
|
|
|
|
"id": media_path.stem,
|
|
|
|
|
"title": title or media_path.stem,
|
|
|
|
|
"ext": media_path.suffix.lstrip(".")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return DownloadMediaResult(
|
|
|
|
|
path=media_path,
|
|
|
|
|
info=info_dict,
|
2025-12-12 21:55:38 -08:00
|
|
|
tag=tags,
|
2025-12-11 12:47:30 -08:00
|
|
|
source_url=opts.url,
|
|
|
|
|
hash_value=file_hash,
|
|
|
|
|
paths=media_paths, # Include all section files if present
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not isinstance(info, dict):
|
|
|
|
|
log(f"Unexpected yt-dlp response: {type(info)}", file=sys.stderr)
|
|
|
|
|
raise DownloadError("Unexpected yt-dlp response type")
|
|
|
|
|
|
|
|
|
|
info_dict: Dict[str, Any] = info
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"ytdlp-info",
|
|
|
|
|
{
|
|
|
|
|
"keys": sorted(info_dict.keys()),
|
|
|
|
|
"is_playlist": bool(info_dict.get("entries")),
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
|
2025-12-13 00:18:30 -08:00
|
|
|
# Playlist/album handling: resolve ALL downloaded entries and return multiple results.
|
|
|
|
|
# The cmdlet will emit one PipeObject per downloaded file.
|
|
|
|
|
if info_dict.get("entries") and not opts.no_playlist:
|
|
|
|
|
resolved = _resolve_entries_and_paths(info_dict, opts.output_dir)
|
|
|
|
|
if resolved:
|
|
|
|
|
results: List[DownloadMediaResult] = []
|
|
|
|
|
for entry, media_path in resolved:
|
|
|
|
|
hash_value = _extract_sha256(entry) or _extract_sha256(info_dict)
|
|
|
|
|
if not hash_value:
|
|
|
|
|
try:
|
|
|
|
|
hash_value = sha256_file(media_path)
|
|
|
|
|
except OSError:
|
|
|
|
|
hash_value = None
|
|
|
|
|
|
|
|
|
|
tags: List[str] = []
|
|
|
|
|
if extract_ytdlp_tags:
|
|
|
|
|
try:
|
|
|
|
|
tags = extract_ytdlp_tags(entry)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Error extracting tags: {e}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
source_url = (
|
|
|
|
|
entry.get("webpage_url")
|
|
|
|
|
or entry.get("original_url")
|
|
|
|
|
or entry.get("url")
|
|
|
|
|
or opts.url
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
results.append(
|
|
|
|
|
DownloadMediaResult(
|
|
|
|
|
path=media_path,
|
|
|
|
|
info=entry,
|
|
|
|
|
tag=tags,
|
|
|
|
|
source_url=source_url,
|
|
|
|
|
hash_value=hash_value,
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
debug(f"✓ Downloaded playlist items: {len(results)}")
|
|
|
|
|
return results
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
try:
|
|
|
|
|
entry, media_path = _resolve_entry_and_path(info_dict, opts.output_dir)
|
|
|
|
|
except FileNotFoundError as exc:
|
|
|
|
|
log(f"Error: {exc}", file=sys.stderr)
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"exception",
|
|
|
|
|
{"phase": "resolve-path", "error": str(exc)},
|
|
|
|
|
)
|
|
|
|
|
raise DownloadError(str(exc)) from exc
|
|
|
|
|
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"resolved-media",
|
|
|
|
|
{"path": str(media_path), "entry_keys": sorted(entry.keys())},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Extract hash from metadata or compute
|
|
|
|
|
hash_value = _extract_sha256(entry) or _extract_sha256(info_dict)
|
|
|
|
|
if not hash_value:
|
|
|
|
|
try:
|
|
|
|
|
hash_value = sha256_file(media_path)
|
|
|
|
|
except OSError as exc:
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"hash-error",
|
|
|
|
|
{"path": str(media_path), "error": str(exc)},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Extract tags using metadata.py
|
|
|
|
|
tags = []
|
|
|
|
|
if extract_ytdlp_tags:
|
|
|
|
|
try:
|
|
|
|
|
tags = extract_ytdlp_tags(entry)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Error extracting tags: {e}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
source_url = (
|
|
|
|
|
entry.get("webpage_url")
|
|
|
|
|
or entry.get("original_url")
|
|
|
|
|
or entry.get("url")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
debug(f"✓ Downloaded: {media_path.name} ({len(tags)} tags)")
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"downloaded",
|
|
|
|
|
{
|
|
|
|
|
"path": str(media_path),
|
|
|
|
|
"tag_count": len(tags),
|
|
|
|
|
"source_url": source_url,
|
|
|
|
|
"sha256": hash_value,
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return DownloadMediaResult(
|
|
|
|
|
path=media_path,
|
|
|
|
|
info=entry,
|
2025-12-12 21:55:38 -08:00
|
|
|
tag=tags,
|
2025-12-11 12:47:30 -08:00
|
|
|
source_url=source_url,
|
|
|
|
|
hash_value=hash_value,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Timeout handler to prevent yt-dlp hangs
|
|
|
|
|
def _download_with_timeout(opts: DownloadOptions, timeout_seconds: int = 300) -> Any:
|
|
|
|
|
"""Download with timeout protection.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
opts: DownloadOptions
|
|
|
|
|
timeout_seconds: Max seconds to wait (default 300s = 5 min)
|
|
|
|
|
|
|
|
|
|
Returns:
|
2025-12-13 00:18:30 -08:00
|
|
|
DownloadMediaResult or List[DownloadMediaResult]
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
|
DownloadError: If timeout exceeded
|
|
|
|
|
"""
|
|
|
|
|
import threading
|
|
|
|
|
from typing import cast
|
|
|
|
|
|
|
|
|
|
result_container: List[Optional[Any]] = [None, None] # [result, error]
|
|
|
|
|
|
|
|
|
|
def _do_download() -> None:
|
|
|
|
|
try:
|
|
|
|
|
result_container[0] = download_media(opts)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
result_container[1] = e
|
|
|
|
|
|
|
|
|
|
thread = threading.Thread(target=_do_download, daemon=False)
|
|
|
|
|
thread.start()
|
|
|
|
|
thread.join(timeout=timeout_seconds)
|
|
|
|
|
|
|
|
|
|
if thread.is_alive():
|
|
|
|
|
# Thread still running - timeout
|
|
|
|
|
raise DownloadError(f"Download timeout after {timeout_seconds} seconds for {opts.url}")
|
|
|
|
|
|
|
|
|
|
if result_container[1] is not None:
|
|
|
|
|
raise cast(Exception, result_container[1])
|
|
|
|
|
|
|
|
|
|
if result_container[0] is None:
|
|
|
|
|
raise DownloadError(f"Download failed for {opts.url}")
|
|
|
|
|
|
|
|
|
|
return cast(Any, result_container[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Download_Media(Cmdlet):
|
|
|
|
|
"""Class-based download-media cmdlet - yt-dlp only, streaming sites."""
|
|
|
|
|
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
|
"""Initialize download-media cmdlet."""
|
|
|
|
|
super().__init__(
|
|
|
|
|
name="download-media",
|
|
|
|
|
summary="Download media from streaming sites (YouTube, Twitch, etc.)",
|
|
|
|
|
usage="download-media <url> [options] or search-file | download-media [options]",
|
2025-12-12 21:55:38 -08:00
|
|
|
alias=[""],
|
2025-12-11 12:47:30 -08:00
|
|
|
arg=[
|
2025-12-12 21:55:38 -08:00
|
|
|
SharedArgs.URL,
|
2025-12-20 02:12:45 -08:00
|
|
|
SharedArgs.QUERY,
|
2025-12-11 12:47:30 -08:00
|
|
|
CmdletArg(name="audio", type="flag", alias="a", description="Download audio only"),
|
|
|
|
|
CmdletArg(name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector"),
|
2025-12-20 02:12:45 -08:00
|
|
|
CmdletArg(
|
|
|
|
|
name="clip",
|
|
|
|
|
type="string",
|
|
|
|
|
description="Extract time range(s) or keyed spec (e.g., clip:3m4s-3m14s,item:2-3)",
|
|
|
|
|
),
|
2025-12-11 12:47:30 -08:00
|
|
|
CmdletArg(name="item", type="string", description="Item selection for playlists/formats"),
|
2025-12-16 01:45:01 -08:00
|
|
|
SharedArgs.PATH
|
2025-12-11 12:47:30 -08:00
|
|
|
],
|
|
|
|
|
detail=["Download media from streaming sites using yt-dlp.", "For direct file downloads, use download-file."],
|
|
|
|
|
exec=self.run,
|
|
|
|
|
)
|
|
|
|
|
self.register()
|
|
|
|
|
|
|
|
|
|
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|
|
|
|
"""Main execution method."""
|
|
|
|
|
stage_ctx = pipeline_context.get_stage_context()
|
|
|
|
|
in_pipeline = stage_ctx is not None and getattr(stage_ctx, "total_stages", 1) > 1
|
|
|
|
|
if in_pipeline and isinstance(config, dict):
|
|
|
|
|
config["_quiet_background_output"] = True
|
|
|
|
|
return self._run_impl(result, args, config)
|
|
|
|
|
|
|
|
|
|
def _run_impl(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|
|
|
|
"""Main download implementation for yt-dlp-supported url."""
|
|
|
|
|
try:
|
|
|
|
|
debug("Starting download-media")
|
|
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
ytdlp_tool = YtDlpTool(config)
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
# Parse arguments
|
|
|
|
|
parsed = parse_cmdlet_args(args, self)
|
|
|
|
|
|
|
|
|
|
# Extract options
|
|
|
|
|
raw_url = parsed.get("url", [])
|
|
|
|
|
if isinstance(raw_url, str):
|
|
|
|
|
raw_url = [raw_url]
|
2025-12-19 15:20:08 -08:00
|
|
|
|
|
|
|
|
# Allow a single quoted argument containing multiple URLs separated by commas.
|
|
|
|
|
# Example: download-media "https://a,https://b" -audio
|
|
|
|
|
expanded_urls: List[str] = []
|
|
|
|
|
for u in (raw_url or []):
|
|
|
|
|
if u is None:
|
|
|
|
|
continue
|
|
|
|
|
s = str(u).strip()
|
|
|
|
|
if not s:
|
|
|
|
|
continue
|
|
|
|
|
if "," in s:
|
|
|
|
|
parts = [p.strip() for p in s.split(",")]
|
|
|
|
|
expanded_urls.extend([p for p in parts if p])
|
|
|
|
|
else:
|
|
|
|
|
expanded_urls.append(s)
|
|
|
|
|
if expanded_urls:
|
|
|
|
|
raw_url = expanded_urls
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
# If no url provided via args, try to extract from piped result
|
|
|
|
|
if not raw_url and result:
|
|
|
|
|
# Handle single result or list of results
|
|
|
|
|
results_to_check = result if isinstance(result, list) else [result]
|
|
|
|
|
for item in results_to_check:
|
|
|
|
|
# Try to get URL from various possible fields
|
|
|
|
|
url = get_field(item, "url") or get_field(item, "target")
|
|
|
|
|
if url:
|
|
|
|
|
raw_url.append(url)
|
|
|
|
|
|
|
|
|
|
# Filter to yt-dlp supported url only
|
|
|
|
|
supported_url = [
|
|
|
|
|
url for url in raw_url
|
|
|
|
|
if is_url_supported_by_ytdlp(url)
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
if not supported_url:
|
|
|
|
|
log("No yt-dlp-supported url to download", file=sys.stderr)
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
# Log unsupported url if any
|
|
|
|
|
unsupported = set(raw_url) - set(supported_url)
|
|
|
|
|
if unsupported:
|
|
|
|
|
debug(f"Skipping {len(unsupported)} unsupported url (use download-file for direct downloads)")
|
|
|
|
|
|
|
|
|
|
# Get output directory
|
|
|
|
|
final_output_dir = self._resolve_output_dir(parsed, config)
|
|
|
|
|
if not final_output_dir:
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
debug(f"Output directory: {final_output_dir}")
|
|
|
|
|
|
|
|
|
|
# Get other options
|
|
|
|
|
clip_spec = parsed.get("clip")
|
2025-12-20 02:12:45 -08:00
|
|
|
query_spec = parsed.get("query")
|
|
|
|
|
|
|
|
|
|
# download-media supports a small keyed spec language inside -query.
|
|
|
|
|
# Examples:
|
|
|
|
|
# -query "hash:<sha256>"
|
|
|
|
|
# -query "clip:1m-1m15s,2m1s-2m11s"
|
|
|
|
|
# -query "hash:<sha256>,clip:1m-1m15s,item:2-3"
|
|
|
|
|
query_keyed: Dict[str, List[str]] = {}
|
|
|
|
|
if query_spec:
|
|
|
|
|
try:
|
|
|
|
|
query_keyed = self._parse_keyed_csv_spec(str(query_spec), default_key="hash")
|
|
|
|
|
except Exception:
|
|
|
|
|
query_keyed = {}
|
|
|
|
|
|
|
|
|
|
# Optional: allow an explicit hash via -query "hash:<sha256>".
|
|
|
|
|
# This is used as the preferred king hash for multi-clip relationships.
|
|
|
|
|
query_hash_override: Optional[str] = None
|
|
|
|
|
try:
|
|
|
|
|
hash_values = query_keyed.get("hash", []) if isinstance(query_keyed, dict) else []
|
|
|
|
|
hash_candidate = (hash_values[-1] if hash_values else None)
|
|
|
|
|
if hash_candidate:
|
|
|
|
|
# Re-wrap for the shared parser which expects the `hash:` prefix.
|
|
|
|
|
query_hash_override = sh.parse_single_hash_query(f"hash:{hash_candidate}")
|
|
|
|
|
else:
|
|
|
|
|
# Backwards-compatible: treat a non-keyed query as a hash query.
|
|
|
|
|
query_hash_override = sh.parse_single_hash_query(str(query_spec)) if query_spec else None
|
|
|
|
|
except Exception:
|
|
|
|
|
query_hash_override = None
|
2025-12-11 12:47:30 -08:00
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
# Always enable chapters + subtitles so downstream pipes (e.g. mpv) can consume them.
|
|
|
|
|
embed_chapters = True
|
|
|
|
|
write_sub = True
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
mode = "audio" if parsed.get("audio") else "video"
|
|
|
|
|
|
2025-12-16 01:45:01 -08:00
|
|
|
# Parse clip range(s) if specified
|
|
|
|
|
clip_ranges: Optional[List[tuple[int, int]]] = None
|
2025-12-20 02:12:45 -08:00
|
|
|
clip_values: List[str] = []
|
|
|
|
|
item_values: List[str] = []
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
if clip_spec:
|
2025-12-20 02:12:45 -08:00
|
|
|
# Support keyed clip syntax:
|
|
|
|
|
# -clip "clip:3m4s-3m14s,1h22m-1h33m,item:2-3"
|
|
|
|
|
keyed = self._parse_keyed_csv_spec(str(clip_spec), default_key="clip")
|
|
|
|
|
clip_values.extend(keyed.get("clip", []) or [])
|
|
|
|
|
item_values.extend(keyed.get("item", []) or [])
|
|
|
|
|
|
|
|
|
|
# Allow the same keyed spec language inside -query so users can do:
|
|
|
|
|
# download-media <url> -query "clip:1m-1m15s,2m1s-2m11s"
|
|
|
|
|
if query_keyed:
|
|
|
|
|
clip_values.extend(query_keyed.get("clip", []) or [])
|
|
|
|
|
item_values.extend(query_keyed.get("item", []) or [])
|
|
|
|
|
|
|
|
|
|
if item_values and not parsed.get("item"):
|
|
|
|
|
parsed["item"] = ",".join([v for v in item_values if v])
|
|
|
|
|
|
|
|
|
|
if clip_values:
|
|
|
|
|
clip_ranges = self._parse_time_ranges(",".join([v for v in clip_values if v]))
|
2025-12-16 01:45:01 -08:00
|
|
|
if not clip_ranges:
|
2025-12-20 02:12:45 -08:00
|
|
|
bad_spec = clip_spec or query_spec
|
|
|
|
|
log(f"Invalid clip format: {bad_spec}", file=sys.stderr)
|
2025-12-11 12:47:30 -08:00
|
|
|
return 1
|
|
|
|
|
|
2025-12-20 02:12:45 -08:00
|
|
|
if clip_ranges:
|
|
|
|
|
try:
|
|
|
|
|
debug(f"Clip ranges: {clip_ranges}")
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
|
|
|
|
|
|
|
|
|
|
storage = None
|
|
|
|
|
hydrus_available = True
|
|
|
|
|
try:
|
|
|
|
|
from Store import Store
|
|
|
|
|
storage = Store(config=config or {}, suppress_debug=True)
|
|
|
|
|
from API.HydrusNetwork import is_hydrus_available
|
|
|
|
|
hydrus_available = bool(is_hydrus_available(config or {}))
|
|
|
|
|
except Exception:
|
|
|
|
|
storage = None
|
|
|
|
|
|
|
|
|
|
def _preflight_url_duplicate(candidate_url: str, extra_urls: Optional[Sequence[str]] = None) -> bool:
|
|
|
|
|
# NOTE: download-media sets _quiet_background_output=True when running in a pipeline to
|
|
|
|
|
# reduce background noise. URL de-dup is interactive and must still run in pipelines.
|
|
|
|
|
if storage is None:
|
|
|
|
|
debug("Preflight URL check skipped: storage unavailable")
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
debug(f"Preflight URL check: candidate={candidate_url}")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from metadata import normalize_urls
|
|
|
|
|
except Exception:
|
|
|
|
|
normalize_urls = None # type: ignore[assignment]
|
|
|
|
|
|
|
|
|
|
needles: List[str] = []
|
|
|
|
|
if normalize_urls is not None:
|
|
|
|
|
for raw in [candidate_url, *(list(extra_urls) if extra_urls else [])]:
|
|
|
|
|
try:
|
|
|
|
|
needles.extend(normalize_urls(raw))
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
# Fallback: always have at least one needle
|
|
|
|
|
if not needles:
|
|
|
|
|
needles = [str(candidate_url)]
|
|
|
|
|
|
|
|
|
|
# Deduplicate needles (preserve order)
|
|
|
|
|
seen_needles: List[str] = []
|
|
|
|
|
for needle in needles:
|
|
|
|
|
if needle and needle not in seen_needles:
|
|
|
|
|
seen_needles.append(needle)
|
|
|
|
|
needles = seen_needles
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
debug(f"Preflight URL needles: {needles}")
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
url_matches: List[Dict[str, Any]] = []
|
|
|
|
|
try:
|
|
|
|
|
from Store.HydrusNetwork import HydrusNetwork
|
|
|
|
|
|
|
|
|
|
# Avoid searching the temp/download directory backend during dedup.
|
|
|
|
|
# We only want to warn about duplicates in real stores.
|
|
|
|
|
backend_names_all = storage.list_searchable_backends()
|
|
|
|
|
backend_names: List[str] = []
|
|
|
|
|
skipped: List[str] = []
|
|
|
|
|
for backend_name in backend_names_all:
|
|
|
|
|
try:
|
|
|
|
|
backend = storage[backend_name]
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
if str(backend_name).strip().lower() == "temp":
|
|
|
|
|
skipped.append(backend_name)
|
|
|
|
|
continue
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# Heuristic: if a Folder backend points at the configured temp output dir, skip it.
|
|
|
|
|
try:
|
|
|
|
|
backend_location = getattr(backend, "_location", None)
|
|
|
|
|
if backend_location and final_output_dir:
|
|
|
|
|
backend_path = Path(str(backend_location)).expanduser().resolve()
|
|
|
|
|
temp_path = Path(str(final_output_dir)).expanduser().resolve()
|
|
|
|
|
if backend_path == temp_path:
|
|
|
|
|
skipped.append(backend_name)
|
|
|
|
|
continue
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
backend_names.append(backend_name)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
if skipped:
|
|
|
|
|
debug(f"Preflight backends: {backend_names} (skipped temp: {skipped})")
|
|
|
|
|
else:
|
|
|
|
|
debug(f"Preflight backends: {backend_names}")
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
for backend_name in backend_names:
|
|
|
|
|
backend = storage[backend_name]
|
|
|
|
|
if isinstance(backend, HydrusNetwork) and not hydrus_available:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
backend_hits: List[Dict[str, Any]] = []
|
|
|
|
|
for needle in needles:
|
|
|
|
|
try:
|
|
|
|
|
backend_hits = backend.search(f"url:{needle}", limit=25) or []
|
|
|
|
|
if backend_hits:
|
|
|
|
|
break
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
if backend_hits:
|
|
|
|
|
url_matches.extend([dict(x) if isinstance(x, dict) else {"title": str(x)} for x in backend_hits])
|
|
|
|
|
|
|
|
|
|
if len(url_matches) >= 25:
|
|
|
|
|
url_matches = url_matches[:25]
|
|
|
|
|
break
|
|
|
|
|
except Exception:
|
|
|
|
|
url_matches = []
|
|
|
|
|
|
|
|
|
|
if not url_matches:
|
|
|
|
|
debug("Preflight URL check: no matches")
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
table = ResultTable(f"URL already exists ({len(url_matches)} match(es))")
|
|
|
|
|
results_list: List[Dict[str, Any]] = []
|
|
|
|
|
for item in url_matches:
|
|
|
|
|
if "title" not in item:
|
|
|
|
|
item["title"] = item.get("name") or item.get("target") or item.get("path") or "Result"
|
2025-12-20 23:57:44 -08:00
|
|
|
|
|
|
|
|
# Keep the full payload for history/inspection, but display a focused table.
|
2025-12-21 05:10:09 -08:00
|
|
|
# Use shared extractors so Ext/Size/Store/Hash remain consistent everywhere.
|
|
|
|
|
try:
|
|
|
|
|
from result_table import build_display_row
|
|
|
|
|
except Exception:
|
|
|
|
|
build_display_row = None # type: ignore
|
|
|
|
|
|
|
|
|
|
if callable(build_display_row):
|
|
|
|
|
display_row = build_display_row(item, keys=["title", "store", "hash", "ext", "size"])
|
|
|
|
|
else:
|
|
|
|
|
display_row = {
|
|
|
|
|
"title": item.get("title"),
|
|
|
|
|
"store": item.get("store"),
|
|
|
|
|
"hash": item.get("hash") or item.get("file_hash") or item.get("sha256"),
|
|
|
|
|
"ext": str(item.get("ext") or ""),
|
|
|
|
|
"size": item.get("size") or item.get("size_bytes"),
|
|
|
|
|
}
|
2025-12-20 23:57:44 -08:00
|
|
|
table.add_result(display_row)
|
2025-12-14 00:53:52 -08:00
|
|
|
results_list.append(item)
|
|
|
|
|
|
|
|
|
|
pipeline_context.set_current_stage_table(table)
|
|
|
|
|
pipeline_context.set_last_result_table(table, results_list)
|
|
|
|
|
|
2025-12-21 05:10:09 -08:00
|
|
|
try:
|
|
|
|
|
from contextlib import nullcontext
|
|
|
|
|
except Exception:
|
|
|
|
|
nullcontext = None # type: ignore
|
|
|
|
|
|
|
|
|
|
suspend = getattr(pipeline_context, "suspend_live_progress", None)
|
|
|
|
|
cm = suspend() if callable(suspend) else (nullcontext() if nullcontext else None)
|
|
|
|
|
if cm is None:
|
|
|
|
|
get_stderr_console().print(table)
|
|
|
|
|
setattr(table, "_rendered_by_cmdlet", True)
|
|
|
|
|
if not Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()):
|
|
|
|
|
return False
|
|
|
|
|
else:
|
|
|
|
|
with cm:
|
|
|
|
|
get_stderr_console().print(table)
|
|
|
|
|
setattr(table, "_rendered_by_cmdlet", True)
|
|
|
|
|
if not Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()):
|
|
|
|
|
try:
|
|
|
|
|
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
return False
|
2025-12-20 23:57:44 -08:00
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def _preflight_url_duplicates_bulk(urls: Sequence[str]) -> bool:
|
|
|
|
|
"""Preflight URL de-dup for a batch of URLs.
|
|
|
|
|
|
|
|
|
|
Purpose:
|
|
|
|
|
- Avoid per-item interactive URL checks inside a playlist loop.
|
|
|
|
|
- Let the user see ALL duplicates up front, before any downloads start.
|
|
|
|
|
"""
|
|
|
|
|
if storage is None:
|
|
|
|
|
debug("Bulk URL preflight skipped: storage unavailable")
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
unique_urls: List[str] = []
|
|
|
|
|
for u in urls or []:
|
|
|
|
|
s = str(u or "").strip()
|
|
|
|
|
if s and s not in unique_urls:
|
|
|
|
|
unique_urls.append(s)
|
|
|
|
|
if len(unique_urls) <= 1:
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from metadata import normalize_urls
|
|
|
|
|
except Exception:
|
|
|
|
|
normalize_urls = None # type: ignore[assignment]
|
|
|
|
|
|
|
|
|
|
def _httpish(value: str) -> bool:
|
|
|
|
|
try:
|
|
|
|
|
return bool(value) and (value.startswith("http://") or value.startswith("https://"))
|
|
|
|
|
except Exception:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
url_needles: Dict[str, List[str]] = {}
|
|
|
|
|
for u in unique_urls:
|
|
|
|
|
needles: List[str] = []
|
|
|
|
|
if normalize_urls is not None:
|
|
|
|
|
try:
|
|
|
|
|
needles.extend([n for n in (normalize_urls(u) or []) if isinstance(n, str)])
|
|
|
|
|
except Exception:
|
|
|
|
|
needles = []
|
|
|
|
|
if not needles:
|
|
|
|
|
needles = [u]
|
|
|
|
|
# Prefer http(s) needles for store lookups.
|
|
|
|
|
filtered: List[str] = []
|
|
|
|
|
for n in needles:
|
|
|
|
|
n2 = str(n or "").strip()
|
|
|
|
|
if not n2:
|
|
|
|
|
continue
|
|
|
|
|
if not _httpish(n2):
|
|
|
|
|
continue
|
|
|
|
|
if n2 not in filtered:
|
|
|
|
|
filtered.append(n2)
|
|
|
|
|
url_needles[u] = filtered if filtered else [u]
|
|
|
|
|
|
|
|
|
|
# Determine backends once (same filtering as per-URL preflight).
|
|
|
|
|
backend_names: List[str] = []
|
|
|
|
|
try:
|
|
|
|
|
backend_names_all = storage.list_searchable_backends()
|
|
|
|
|
except Exception:
|
|
|
|
|
backend_names_all = []
|
|
|
|
|
|
|
|
|
|
for backend_name in backend_names_all:
|
|
|
|
|
try:
|
|
|
|
|
backend = storage[backend_name]
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
if str(backend_name).strip().lower() == "temp":
|
|
|
|
|
continue
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
backend_location = getattr(backend, "_location", None)
|
|
|
|
|
if backend_location and final_output_dir:
|
|
|
|
|
backend_path = Path(str(backend_location)).expanduser().resolve()
|
|
|
|
|
temp_path = Path(str(final_output_dir)).expanduser().resolve()
|
|
|
|
|
if backend_path == temp_path:
|
|
|
|
|
continue
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
backend_names.append(backend_name)
|
|
|
|
|
|
|
|
|
|
if not backend_names:
|
|
|
|
|
debug("Bulk URL preflight skipped: no searchable backends")
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
# Collect matches as display rows (cap to keep output reasonable)
|
|
|
|
|
seen_pairs: set[tuple[str, str]] = set()
|
|
|
|
|
matched_urls: set[str] = set()
|
|
|
|
|
match_rows: List[Dict[str, Any]] = []
|
|
|
|
|
max_rows = 200
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from Store.HydrusNetwork import HydrusNetwork
|
|
|
|
|
except Exception:
|
|
|
|
|
HydrusNetwork = None # type: ignore
|
|
|
|
|
|
|
|
|
|
for backend_name in backend_names:
|
|
|
|
|
if len(match_rows) >= max_rows:
|
|
|
|
|
break
|
|
|
|
|
try:
|
|
|
|
|
backend = storage[backend_name]
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if HydrusNetwork is not None and isinstance(backend, HydrusNetwork):
|
|
|
|
|
if not hydrus_available:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
client = getattr(backend, "_client", None)
|
|
|
|
|
if client is None:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
for original_url, needles in url_needles.items():
|
|
|
|
|
if len(match_rows) >= max_rows:
|
|
|
|
|
break
|
|
|
|
|
if (original_url, str(backend_name)) in seen_pairs:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Fast-path: ask Hydrus whether it already knows this URL.
|
|
|
|
|
found_hash: Optional[str] = None
|
|
|
|
|
found = False
|
|
|
|
|
for needle in (needles or [])[:3]:
|
|
|
|
|
if not _httpish(needle):
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
from API.HydrusNetwork import HydrusRequestSpec
|
|
|
|
|
|
|
|
|
|
spec = HydrusRequestSpec(
|
|
|
|
|
method="GET",
|
|
|
|
|
endpoint="/add_urls/get_url_files",
|
|
|
|
|
query={"url": needle},
|
|
|
|
|
)
|
|
|
|
|
response = client._perform_request(spec) # type: ignore[attr-defined]
|
|
|
|
|
raw_hashes = None
|
|
|
|
|
if isinstance(response, dict):
|
|
|
|
|
raw_hashes = response.get("hashes") or response.get("file_hashes")
|
|
|
|
|
raw_ids = response.get("file_ids")
|
|
|
|
|
has_ids = isinstance(raw_ids, list) and len(raw_ids) > 0
|
|
|
|
|
has_hashes = isinstance(raw_hashes, list) and len(raw_hashes) > 0
|
|
|
|
|
if has_hashes:
|
|
|
|
|
try:
|
|
|
|
|
found_hash = str(raw_hashes[0]).strip() # type: ignore[index]
|
|
|
|
|
except Exception:
|
|
|
|
|
found_hash = None
|
|
|
|
|
if has_ids or has_hashes:
|
|
|
|
|
found = True
|
|
|
|
|
break
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if not found:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
seen_pairs.add((original_url, str(backend_name)))
|
|
|
|
|
matched_urls.add(original_url)
|
|
|
|
|
display_row = {
|
|
|
|
|
"title": "(exists)",
|
|
|
|
|
"store": str(backend_name),
|
|
|
|
|
"hash": found_hash or "",
|
|
|
|
|
"url": original_url,
|
|
|
|
|
"columns": [
|
|
|
|
|
("Title", "(exists)"),
|
|
|
|
|
("Store", str(backend_name)),
|
|
|
|
|
("Hash", found_hash or ""),
|
|
|
|
|
("URL", original_url),
|
|
|
|
|
],
|
|
|
|
|
}
|
|
|
|
|
match_rows.append(display_row)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Generic backends: use the existing search() contract.
|
|
|
|
|
for original_url, needles in url_needles.items():
|
|
|
|
|
if len(match_rows) >= max_rows:
|
|
|
|
|
break
|
|
|
|
|
if (original_url, str(backend_name)) in seen_pairs:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
backend_hits: List[Dict[str, Any]] = []
|
|
|
|
|
for needle in (needles or [])[:3]:
|
|
|
|
|
try:
|
|
|
|
|
backend_hits = backend.search(f"url:{needle}", limit=1) or []
|
|
|
|
|
if backend_hits:
|
|
|
|
|
break
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if not backend_hits:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
seen_pairs.add((original_url, str(backend_name)))
|
|
|
|
|
matched_urls.add(original_url)
|
|
|
|
|
hit = backend_hits[0]
|
|
|
|
|
title = hit.get("title") or hit.get("name") or hit.get("target") or hit.get("path") or "(exists)"
|
|
|
|
|
file_hash = hit.get("hash") or hit.get("file_hash") or hit.get("sha256") or ""
|
2025-12-21 05:10:09 -08:00
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from result_table import build_display_row
|
|
|
|
|
except Exception:
|
|
|
|
|
build_display_row = None # type: ignore
|
|
|
|
|
|
|
|
|
|
extracted = {
|
|
|
|
|
"title": str(title),
|
|
|
|
|
"store": str(hit.get("store") or backend_name),
|
|
|
|
|
"hash": str(file_hash or ""),
|
|
|
|
|
"ext": "",
|
|
|
|
|
"size": None,
|
|
|
|
|
}
|
|
|
|
|
if callable(build_display_row):
|
|
|
|
|
try:
|
|
|
|
|
extracted = build_display_row(hit, keys=["title", "store", "hash", "ext", "size"])
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
# Ensure we still prefer the precomputed values for title/store/hash.
|
|
|
|
|
extracted["title"] = str(title)
|
|
|
|
|
extracted["store"] = str(hit.get("store") or backend_name)
|
|
|
|
|
extracted["hash"] = str(file_hash or "")
|
|
|
|
|
|
|
|
|
|
ext = extracted.get("ext")
|
|
|
|
|
size_val = extracted.get("size")
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
display_row = {
|
|
|
|
|
"title": str(title),
|
|
|
|
|
"store": str(hit.get("store") or backend_name),
|
|
|
|
|
"hash": str(file_hash or ""),
|
2025-12-21 05:10:09 -08:00
|
|
|
"ext": str(ext or ""),
|
|
|
|
|
"size": size_val,
|
2025-12-20 23:57:44 -08:00
|
|
|
"url": original_url,
|
|
|
|
|
"columns": [
|
|
|
|
|
("Title", str(title)),
|
|
|
|
|
("Store", str(hit.get("store") or backend_name)),
|
|
|
|
|
("Hash", str(file_hash or "")),
|
2025-12-21 05:10:09 -08:00
|
|
|
("Ext", str(ext or "")),
|
|
|
|
|
("Size", size_val),
|
2025-12-20 23:57:44 -08:00
|
|
|
("URL", original_url),
|
|
|
|
|
],
|
|
|
|
|
}
|
|
|
|
|
match_rows.append(display_row)
|
|
|
|
|
|
|
|
|
|
if not match_rows:
|
|
|
|
|
debug("Bulk URL preflight: no matches")
|
|
|
|
|
return True
|
|
|
|
|
|
2025-12-21 05:10:09 -08:00
|
|
|
# This table is non-interactive and intentionally wide (we want URL + ext/size).
|
|
|
|
|
table = ResultTable(f"URL already exists ({len(matched_urls)} url(s))", max_columns=10)
|
2025-12-20 23:57:44 -08:00
|
|
|
table.set_no_choice(True)
|
|
|
|
|
try:
|
|
|
|
|
table.set_preserve_order(True)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
for row in match_rows:
|
|
|
|
|
table.add_result(row)
|
|
|
|
|
|
|
|
|
|
# Display as an overlay so we don't clobber the current selectable table/history.
|
|
|
|
|
try:
|
|
|
|
|
pipeline_context.set_last_result_table_overlay(table, match_rows)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
get_stderr_console().print(table)
|
|
|
|
|
setattr(table, "_rendered_by_cmdlet", True)
|
|
|
|
|
|
|
|
|
|
if not Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()):
|
2025-12-14 00:53:52 -08:00
|
|
|
return False
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def _canonicalize_url_for_storage(requested_url: str) -> str:
|
|
|
|
|
# Prefer yt-dlp's canonical webpage URL (e.g. strips timestamps/redirects).
|
|
|
|
|
# Fall back to the requested URL if probing fails.
|
|
|
|
|
# Important: when playlist item selection is used, avoid probing (can hang on large playlists).
|
|
|
|
|
if playlist_items:
|
|
|
|
|
return str(requested_url)
|
|
|
|
|
try:
|
2025-12-16 23:23:43 -08:00
|
|
|
cf = None
|
|
|
|
|
try:
|
|
|
|
|
cookie_path = ytdlp_tool.resolve_cookiefile()
|
|
|
|
|
if cookie_path is not None and cookie_path.is_file():
|
|
|
|
|
cf = str(cookie_path)
|
|
|
|
|
except Exception:
|
|
|
|
|
cf = None
|
|
|
|
|
pr = probe_url(requested_url, no_playlist=False, timeout_seconds=15, cookiefile=cf)
|
2025-12-14 00:53:52 -08:00
|
|
|
if isinstance(pr, dict):
|
|
|
|
|
for key in ("webpage_url", "original_url", "url", "requested_url"):
|
|
|
|
|
value = pr.get(key)
|
|
|
|
|
if isinstance(value, str) and value.strip():
|
|
|
|
|
return value.strip()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
return str(requested_url)
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
# Check if we need to show format selection
|
|
|
|
|
playlist_items = str(parsed.get("item")) if parsed.get("item") else None
|
|
|
|
|
ytdl_format = parsed.get("format")
|
2025-12-14 00:53:52 -08:00
|
|
|
playlist_selection_handled = False
|
|
|
|
|
|
|
|
|
|
def _parse_at_selection(choice: str, *, max_index: int) -> Optional[List[int]]:
|
|
|
|
|
"""Parse @ selection syntax (@2, @2-5, @{1,3,5}, @2,5,7) into 1-based indices."""
|
|
|
|
|
raw = str(choice or "").strip()
|
|
|
|
|
if not raw:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
if raw.lower() in {"q", "quit", "cancel"}:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
if raw == "@*" or raw == "*":
|
|
|
|
|
return list(range(1, max_index + 1))
|
|
|
|
|
|
|
|
|
|
if raw.startswith("@"):
|
|
|
|
|
raw = raw[1:].strip()
|
|
|
|
|
|
|
|
|
|
if raw.startswith("{") and raw.endswith("}"):
|
|
|
|
|
raw = raw[1:-1].strip()
|
|
|
|
|
|
|
|
|
|
if not raw:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
indices: set[int] = set()
|
|
|
|
|
for part in raw.split(","):
|
|
|
|
|
part = part.strip()
|
|
|
|
|
if not part:
|
|
|
|
|
continue
|
|
|
|
|
if "-" in part:
|
|
|
|
|
left, right = [p.strip() for p in part.split("-", 1)]
|
|
|
|
|
if not left or not right:
|
|
|
|
|
return None
|
|
|
|
|
try:
|
|
|
|
|
start = int(left)
|
|
|
|
|
end = int(right)
|
|
|
|
|
except ValueError:
|
|
|
|
|
return None
|
|
|
|
|
if start < 1 or end < 1:
|
|
|
|
|
return None
|
|
|
|
|
if end < start:
|
|
|
|
|
start, end = end, start
|
|
|
|
|
for i in range(start, end + 1):
|
|
|
|
|
if 1 <= i <= max_index:
|
|
|
|
|
indices.add(i)
|
|
|
|
|
else:
|
|
|
|
|
try:
|
|
|
|
|
i = int(part)
|
|
|
|
|
except ValueError:
|
|
|
|
|
return None
|
|
|
|
|
if 1 <= i <= max_index:
|
|
|
|
|
indices.add(i)
|
|
|
|
|
if not indices:
|
|
|
|
|
return None
|
|
|
|
|
return sorted(indices)
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
def _maybe_show_playlist_table(url: str) -> bool:
|
|
|
|
|
"""If URL appears to be a playlist/channel/collection, show a normal selectable table.
|
|
|
|
|
|
|
|
|
|
This intentionally avoids a special input() prompt so the user can use
|
|
|
|
|
the regular REPL prompt with autocomplete and standard @ selection:
|
|
|
|
|
download-media -url "<playlist>" (shows table)
|
|
|
|
|
@* | download-media [options] | add-file ...
|
2025-12-14 00:53:52 -08:00
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
Returns True if a playlist table was shown.
|
2025-12-14 00:53:52 -08:00
|
|
|
"""
|
|
|
|
|
try:
|
2025-12-16 23:23:43 -08:00
|
|
|
cf = None
|
|
|
|
|
try:
|
|
|
|
|
cookie_path = ytdlp_tool.resolve_cookiefile()
|
|
|
|
|
if cookie_path is not None and cookie_path.is_file():
|
|
|
|
|
cf = str(cookie_path)
|
|
|
|
|
except Exception:
|
|
|
|
|
cf = None
|
|
|
|
|
pr = probe_url(url, no_playlist=False, timeout_seconds=15, cookiefile=cf)
|
2025-12-14 00:53:52 -08:00
|
|
|
except Exception:
|
|
|
|
|
pr = None
|
|
|
|
|
if not isinstance(pr, dict):
|
2025-12-20 23:57:44 -08:00
|
|
|
return False
|
2025-12-14 00:53:52 -08:00
|
|
|
entries = pr.get("entries")
|
|
|
|
|
if not isinstance(entries, list) or len(entries) <= 1:
|
2025-12-20 23:57:44 -08:00
|
|
|
return False
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
# Display table (limit rows to keep output reasonable)
|
|
|
|
|
max_rows = 200
|
|
|
|
|
display_entries = entries[:max_rows]
|
|
|
|
|
total = len(entries)
|
|
|
|
|
|
|
|
|
|
def _entry_to_url(entry: Any) -> Optional[str]:
|
|
|
|
|
if not isinstance(entry, dict):
|
|
|
|
|
return None
|
|
|
|
|
# Prefer explicit absolute URLs when present
|
|
|
|
|
for key in ("webpage_url", "original_url", "url"):
|
|
|
|
|
v = entry.get(key)
|
|
|
|
|
if isinstance(v, str) and v.strip():
|
|
|
|
|
s = v.strip()
|
|
|
|
|
try:
|
|
|
|
|
if urlparse(s).scheme in {"http", "https"}:
|
|
|
|
|
return s
|
|
|
|
|
except Exception:
|
|
|
|
|
return s
|
|
|
|
|
|
|
|
|
|
# Best-effort YouTube fallback from id
|
|
|
|
|
entry_id = entry.get("id")
|
|
|
|
|
if isinstance(entry_id, str) and entry_id.strip():
|
|
|
|
|
extractor_name = str(pr.get("extractor") or pr.get("extractor_key") or "").lower()
|
|
|
|
|
if "youtube" in extractor_name:
|
|
|
|
|
return f"https://www.youtube.com/watch?v={entry_id.strip()}"
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
table = ResultTable()
|
2025-12-20 23:57:44 -08:00
|
|
|
safe_url = str(url or "").strip()
|
|
|
|
|
table.title = f'download-media -url "{safe_url}"' if safe_url else "download-media"
|
2025-12-21 05:10:09 -08:00
|
|
|
# Selection tables should expand '@N' into a runnable command.
|
|
|
|
|
# For playlist-item rows we prefer the concrete per-item URL so the
|
|
|
|
|
# expanded command targets a single video (not the whole playlist).
|
|
|
|
|
table.set_source_command("download-media", [])
|
2025-12-14 00:53:52 -08:00
|
|
|
try:
|
|
|
|
|
table.set_preserve_order(True)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
results_list: List[Dict[str, Any]] = []
|
|
|
|
|
for idx, entry in enumerate(display_entries, 1):
|
|
|
|
|
title = None
|
|
|
|
|
uploader = None
|
|
|
|
|
duration = None
|
2025-12-20 23:57:44 -08:00
|
|
|
entry_url = _entry_to_url(entry)
|
2025-12-14 00:53:52 -08:00
|
|
|
try:
|
|
|
|
|
if isinstance(entry, dict):
|
|
|
|
|
title = entry.get("title")
|
|
|
|
|
uploader = entry.get("uploader") or pr.get("uploader")
|
|
|
|
|
duration = entry.get("duration")
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
row: Dict[str, Any] = {
|
|
|
|
|
"table": "download-media",
|
|
|
|
|
"title": str(title or f"Item {idx}"),
|
|
|
|
|
"detail": str(uploader or ""),
|
|
|
|
|
"media_kind": "playlist-item",
|
|
|
|
|
"playlist_index": idx,
|
2025-12-21 05:10:09 -08:00
|
|
|
# Enable '@N' expansion into a concrete command.
|
|
|
|
|
# Prefer selecting the resolved per-item URL when available.
|
|
|
|
|
"_selection_args": (["-url", str(entry_url)] if entry_url else ["-url", str(url), "-item", str(idx)]),
|
2025-12-20 23:57:44 -08:00
|
|
|
# Critical for normal @ selection piping: downstream cmdlets
|
|
|
|
|
# (including download-media itself) look for url/target.
|
|
|
|
|
"url": entry_url,
|
|
|
|
|
"target": entry_url,
|
2025-12-14 00:53:52 -08:00
|
|
|
"columns": [
|
|
|
|
|
("#", str(idx)),
|
|
|
|
|
("Title", str(title or "")),
|
|
|
|
|
("Duration", str(duration or "")),
|
|
|
|
|
("Uploader", str(uploader or "")),
|
|
|
|
|
],
|
|
|
|
|
}
|
|
|
|
|
results_list.append(row)
|
|
|
|
|
table.add_result(row)
|
|
|
|
|
|
|
|
|
|
pipeline_context.set_current_stage_table(table)
|
|
|
|
|
pipeline_context.set_last_result_table(table, results_list)
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
get_stderr_console().print(table)
|
|
|
|
|
setattr(table, "_rendered_by_cmdlet", True)
|
|
|
|
|
return True
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
# Playlist/multi-entry detection: if the URL has multiple items and the user didn't
|
2025-12-20 23:57:44 -08:00
|
|
|
# specify -item or -format, show a normal selectable table and return.
|
2025-12-14 00:53:52 -08:00
|
|
|
if len(supported_url) == 1 and not playlist_items and not ytdl_format:
|
|
|
|
|
candidate_url = supported_url[0]
|
2025-12-20 23:57:44 -08:00
|
|
|
if _maybe_show_playlist_table(candidate_url):
|
2025-12-14 00:53:52 -08:00
|
|
|
playlist_selection_handled = True
|
2025-12-20 23:57:44 -08:00
|
|
|
# Let the user pick items using the normal REPL prompt:
|
|
|
|
|
# @* | download-media ...
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
# Bulk preflight for playlist selections (per-entry URLs): check all URLs once before downloading.
|
|
|
|
|
skip_per_url_preflight = False
|
|
|
|
|
if len(supported_url) > 1:
|
|
|
|
|
if not _preflight_url_duplicates_bulk(list(supported_url)):
|
|
|
|
|
return 0
|
|
|
|
|
skip_per_url_preflight = True
|
|
|
|
|
|
|
|
|
|
# Playlist-level format preflight: if the batch has only one available format,
|
|
|
|
|
# discover it once and force it for every item. This avoids per-item failures
|
|
|
|
|
# and per-item --list-formats calls (e.g. Bandcamp albums).
|
|
|
|
|
formats_cache: Dict[str, Optional[List[Dict[str, Any]]]] = {}
|
|
|
|
|
|
|
|
|
|
def _cookiefile_str() -> Optional[str]:
|
|
|
|
|
try:
|
|
|
|
|
cookie_path = ytdlp_tool.resolve_cookiefile()
|
|
|
|
|
if cookie_path is not None and cookie_path.is_file():
|
|
|
|
|
return str(cookie_path)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def _list_formats_cached(u: str, *, playlist_items_value: Optional[str]) -> Optional[List[Dict[str, Any]]]:
|
|
|
|
|
key = f"{u}||{playlist_items_value or ''}"
|
|
|
|
|
if key in formats_cache:
|
|
|
|
|
return formats_cache[key]
|
|
|
|
|
fmts = list_formats(
|
|
|
|
|
u,
|
|
|
|
|
no_playlist=False,
|
|
|
|
|
playlist_items=playlist_items_value,
|
|
|
|
|
cookiefile=_cookiefile_str(),
|
|
|
|
|
)
|
|
|
|
|
formats_cache[key] = fmts
|
|
|
|
|
return fmts
|
|
|
|
|
|
|
|
|
|
forced_single_format_id: Optional[str] = None
|
|
|
|
|
forced_single_format_for_batch = False
|
|
|
|
|
if len(supported_url) > 1 and not playlist_items and not ytdl_format:
|
|
|
|
|
try:
|
|
|
|
|
sample_url = str(supported_url[0])
|
|
|
|
|
fmts = _list_formats_cached(sample_url, playlist_items_value=None)
|
|
|
|
|
if isinstance(fmts, list) and len(fmts) == 1 and isinstance(fmts[0], dict):
|
|
|
|
|
only_id = str(fmts[0].get("format_id") or "").strip()
|
|
|
|
|
if only_id:
|
|
|
|
|
forced_single_format_id = only_id
|
|
|
|
|
forced_single_format_for_batch = True
|
|
|
|
|
debug(
|
|
|
|
|
f"Playlist format preflight: only one format available; using {forced_single_format_id} for all items"
|
|
|
|
|
)
|
|
|
|
|
except Exception:
|
|
|
|
|
forced_single_format_id = None
|
|
|
|
|
forced_single_format_for_batch = False
|
2025-12-11 12:47:30 -08:00
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
# If no -item, no explicit -format specified, and single URL, show the format table.
|
|
|
|
|
# Do NOT stop to show formats when -audio is used (auto-pick) or when -clip is used.
|
|
|
|
|
if (
|
|
|
|
|
mode != "audio"
|
|
|
|
|
and not clip_spec
|
|
|
|
|
and not playlist_items
|
|
|
|
|
and not ytdl_format
|
|
|
|
|
and len(supported_url) == 1
|
2025-12-14 00:53:52 -08:00
|
|
|
and not playlist_selection_handled
|
2025-12-12 21:55:38 -08:00
|
|
|
):
|
2025-12-11 12:47:30 -08:00
|
|
|
url = supported_url[0]
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
canonical_url = _canonicalize_url_for_storage(url)
|
|
|
|
|
if not _preflight_url_duplicate(canonical_url, extra_urls=[url]):
|
|
|
|
|
log(f"Skipping download: {url}", file=sys.stderr)
|
|
|
|
|
return 0
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
formats = _list_formats_cached(url, playlist_items_value=None)
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
if formats and len(formats) > 1:
|
2025-12-20 02:12:45 -08:00
|
|
|
# Formatlist filtering
|
|
|
|
|
#
|
|
|
|
|
# Goal:
|
|
|
|
|
# - Keep the list useful (hide non-media entries like storyboards)
|
|
|
|
|
# - But NEVER filter down so far that the user can't browse/pick formats.
|
|
|
|
|
#
|
|
|
|
|
# The old filtering was too aggressive (e.g. width>=640, one per resolution),
|
|
|
|
|
# which often hid most YouTube formats.
|
|
|
|
|
def _is_browseable_format(fmt: Any) -> bool:
|
|
|
|
|
if not isinstance(fmt, dict):
|
|
|
|
|
return False
|
|
|
|
|
format_id = str(fmt.get("format_id") or "").strip()
|
|
|
|
|
if not format_id:
|
|
|
|
|
return False
|
|
|
|
|
ext = str(fmt.get("ext") or "").strip().lower()
|
|
|
|
|
if ext in {"mhtml", "json"}:
|
|
|
|
|
return False
|
|
|
|
|
note = str(fmt.get("format_note") or "").lower()
|
|
|
|
|
if "storyboard" in note:
|
|
|
|
|
return False
|
|
|
|
|
if format_id.lower().startswith("sb"):
|
|
|
|
|
return False
|
|
|
|
|
vcodec = str(fmt.get("vcodec", "none"))
|
|
|
|
|
acodec = str(fmt.get("acodec", "none"))
|
|
|
|
|
# Keep anything with at least one stream.
|
|
|
|
|
return not (vcodec == "none" and acodec == "none")
|
|
|
|
|
|
|
|
|
|
candidate_formats = [f for f in formats if _is_browseable_format(f)]
|
|
|
|
|
filtered_formats = candidate_formats if candidate_formats else list(formats)
|
|
|
|
|
|
|
|
|
|
debug(f"Formatlist: showing {len(filtered_formats)} formats (raw={len(formats)})")
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
# Build the base command that will be replayed with @N selection
|
|
|
|
|
# Include any additional args from the original command
|
|
|
|
|
base_cmd = f'download-media "{url}"'
|
|
|
|
|
# Preserve any additional pipeline stages if this is in a pipeline
|
|
|
|
|
remaining_args = [arg for arg in args if arg not in [url] and not arg.startswith('-')]
|
|
|
|
|
if remaining_args:
|
|
|
|
|
base_cmd += ' ' + ' '.join(remaining_args)
|
|
|
|
|
|
|
|
|
|
# Create result table for display
|
2025-12-20 02:12:45 -08:00
|
|
|
# NOTE: ResultTable defaults to max_columns=5; for formatlist we want more columns
|
|
|
|
|
# (including Size) so the user can compare formats.
|
|
|
|
|
table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True)
|
|
|
|
|
table.set_table("ytdlp.formatlist")
|
2025-12-11 12:47:30 -08:00
|
|
|
table.set_source_command("download-media", [url])
|
|
|
|
|
|
|
|
|
|
# Collect results for table
|
|
|
|
|
results_list = []
|
|
|
|
|
|
|
|
|
|
# Emit format results for selection
|
|
|
|
|
for idx, fmt in enumerate(filtered_formats, 1):
|
|
|
|
|
resolution = fmt.get("resolution", "")
|
|
|
|
|
ext = fmt.get("ext", "")
|
|
|
|
|
vcodec = fmt.get("vcodec", "none")
|
|
|
|
|
acodec = fmt.get("acodec", "none")
|
|
|
|
|
filesize = fmt.get("filesize")
|
2025-12-20 02:12:45 -08:00
|
|
|
filesize_approx = fmt.get("filesize_approx")
|
2025-12-11 12:47:30 -08:00
|
|
|
format_id = fmt.get("format_id", "")
|
2025-12-16 23:23:43 -08:00
|
|
|
|
|
|
|
|
# If the chosen format is video-only (no audio stream), automatically
|
|
|
|
|
# request best audio too so the resulting file has sound.
|
|
|
|
|
selection_format_id = format_id
|
|
|
|
|
try:
|
|
|
|
|
if vcodec != "none" and acodec == "none" and format_id:
|
|
|
|
|
selection_format_id = f"{format_id}+ba"
|
|
|
|
|
except Exception:
|
|
|
|
|
selection_format_id = format_id
|
2025-12-11 12:47:30 -08:00
|
|
|
|
2025-12-20 02:12:45 -08:00
|
|
|
# Format size (prefer exact filesize; fall back to filesize_approx)
|
2025-12-11 12:47:30 -08:00
|
|
|
size_str = ""
|
2025-12-20 02:12:45 -08:00
|
|
|
size_prefix = ""
|
|
|
|
|
size_bytes = filesize
|
|
|
|
|
if not size_bytes:
|
|
|
|
|
size_bytes = filesize_approx
|
|
|
|
|
if size_bytes:
|
|
|
|
|
size_prefix = "~"
|
|
|
|
|
try:
|
|
|
|
|
if isinstance(size_bytes, (int, float)) and size_bytes > 0:
|
|
|
|
|
size_mb = float(size_bytes) / (1024 * 1024)
|
|
|
|
|
size_str = f"{size_prefix}{size_mb:.1f}MB"
|
|
|
|
|
except Exception:
|
|
|
|
|
size_str = ""
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
# Build format description
|
|
|
|
|
desc_parts = []
|
|
|
|
|
if resolution and resolution != "audio only":
|
|
|
|
|
desc_parts.append(resolution)
|
|
|
|
|
if ext:
|
|
|
|
|
desc_parts.append(ext.upper())
|
|
|
|
|
if vcodec != "none":
|
|
|
|
|
desc_parts.append(f"v:{vcodec}")
|
|
|
|
|
if acodec != "none":
|
|
|
|
|
desc_parts.append(f"a:{acodec}")
|
|
|
|
|
if size_str:
|
|
|
|
|
desc_parts.append(size_str)
|
|
|
|
|
|
|
|
|
|
format_desc = " | ".join(desc_parts)
|
|
|
|
|
|
|
|
|
|
# Build format dict for emission and table
|
|
|
|
|
format_dict = {
|
2025-12-11 19:04:02 -08:00
|
|
|
"table": "download-media",
|
2025-12-11 12:47:30 -08:00
|
|
|
"title": f"Format {format_id}",
|
|
|
|
|
"url": url,
|
|
|
|
|
"target": url,
|
|
|
|
|
"detail": format_desc,
|
|
|
|
|
"annotations": [ext, resolution] if resolution else [ext],
|
|
|
|
|
"media_kind": "format",
|
|
|
|
|
"cmd": base_cmd,
|
2025-12-20 02:12:45 -08:00
|
|
|
# Put Size early so it's visible even with smaller column caps.
|
2025-12-11 12:47:30 -08:00
|
|
|
"columns": [
|
|
|
|
|
("ID", format_id),
|
|
|
|
|
("Resolution", resolution or "N/A"),
|
|
|
|
|
("Ext", ext),
|
2025-12-20 02:12:45 -08:00
|
|
|
("Size", size_str or ""),
|
2025-12-11 12:47:30 -08:00
|
|
|
("Video", vcodec),
|
|
|
|
|
("Audio", acodec),
|
|
|
|
|
],
|
|
|
|
|
"full_metadata": {
|
|
|
|
|
"format_id": format_id,
|
|
|
|
|
"url": url,
|
2025-12-16 23:23:43 -08:00
|
|
|
"item_selector": selection_format_id,
|
2025-12-11 12:47:30 -08:00
|
|
|
},
|
2025-12-20 02:12:45 -08:00
|
|
|
"_selection_args": None,
|
2025-12-11 12:47:30 -08:00
|
|
|
}
|
2025-12-20 02:12:45 -08:00
|
|
|
|
|
|
|
|
# Preserve clip settings across @N selection.
|
|
|
|
|
# Some runners only append row selection args; make sure clip intent
|
|
|
|
|
# survives even when it was provided via -query "clip:...".
|
|
|
|
|
selection_args: List[str] = ["-format", selection_format_id]
|
|
|
|
|
try:
|
|
|
|
|
if (not clip_spec) and clip_values:
|
|
|
|
|
selection_args.extend(["-clip", ",".join([v for v in clip_values if v])])
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
format_dict["_selection_args"] = selection_args
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
# Add to results list and table (don't emit - formats should wait for @N selection)
|
|
|
|
|
results_list.append(format_dict)
|
|
|
|
|
table.add_result(format_dict)
|
|
|
|
|
|
|
|
|
|
# Render and display the table
|
2025-12-20 02:12:45 -08:00
|
|
|
# Some runners (e.g. cmdnat) do not automatically render stage tables.
|
|
|
|
|
# Since this branch is explicitly interactive (user must pick @N), always
|
|
|
|
|
# print the table here and mark it as already rendered to avoid duplicates
|
|
|
|
|
# in runners that also print tables (e.g. CLI.py).
|
|
|
|
|
try:
|
2025-12-20 23:57:44 -08:00
|
|
|
get_stderr_console().print(table)
|
2025-12-20 02:12:45 -08:00
|
|
|
setattr(table, "_rendered_by_cmdlet", True)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
# Set the result table so it displays and is available for @N selection
|
|
|
|
|
pipeline_context.set_current_stage_table(table)
|
|
|
|
|
pipeline_context.set_last_result_table(table, results_list)
|
|
|
|
|
|
|
|
|
|
log(f"", file=sys.stderr)
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
# Download each URL
|
|
|
|
|
downloaded_count = 0
|
2025-12-20 23:57:44 -08:00
|
|
|
downloaded_pipe_objects: List[Dict[str, Any]] = []
|
|
|
|
|
pipe_seq = 0
|
2025-12-16 01:45:01 -08:00
|
|
|
clip_sections_spec = self._build_clip_sections_spec(clip_ranges)
|
2025-12-11 12:47:30 -08:00
|
|
|
|
2025-12-20 02:12:45 -08:00
|
|
|
if clip_sections_spec:
|
|
|
|
|
try:
|
|
|
|
|
debug(f"Clip sections spec: {clip_sections_spec}")
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
for url in supported_url:
|
|
|
|
|
try:
|
|
|
|
|
debug(f"Processing: {url}")
|
|
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
canonical_url = _canonicalize_url_for_storage(url)
|
|
|
|
|
|
|
|
|
|
# Preflight: warn if URL already exists in storage backends.
|
2025-12-20 23:57:44 -08:00
|
|
|
# For playlist selections we already did a bulk preflight; skip per-item checks.
|
|
|
|
|
if not skip_per_url_preflight:
|
|
|
|
|
if not _preflight_url_duplicate(canonical_url, extra_urls=[url]):
|
|
|
|
|
log(f"Skipping download: {url}", file=sys.stderr)
|
|
|
|
|
continue
|
2025-12-14 00:53:52 -08:00
|
|
|
|
2025-12-21 16:59:37 -08:00
|
|
|
# Step progress is per-URL download.
|
|
|
|
|
# Keep steps meaningful: long-running download + finalize.
|
|
|
|
|
# (Fast internal bookkeeping should not be steps.)
|
|
|
|
|
_begin_live_steps(2)
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
# If playlist_items is specified but looks like a format ID (e.g. from table selection),
|
|
|
|
|
# treat it as a format selector instead of playlist items.
|
|
|
|
|
# This handles the case where @N selection passes -item <format_id>
|
|
|
|
|
actual_format = ytdl_format
|
|
|
|
|
actual_playlist_items = playlist_items
|
2025-12-20 23:57:44 -08:00
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
if playlist_items and not ytdl_format:
|
|
|
|
|
# Heuristic: if it contains non-numeric chars (excluding ranges/commas)
|
|
|
|
|
# it is likely a format ID (e.g. '140-drc', 'best', '137+140')
|
|
|
|
|
import re
|
2025-12-20 23:57:44 -08:00
|
|
|
if re.search(r"[^0-9,-]", playlist_items):
|
2025-12-11 12:47:30 -08:00
|
|
|
actual_format = playlist_items
|
|
|
|
|
actual_playlist_items = None
|
|
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
# For -audio, default to yt-dlp's built-in bestaudio selector.
|
|
|
|
|
# This should *not* require interactive format picking.
|
2025-12-12 21:55:38 -08:00
|
|
|
if mode == "audio" and not actual_format:
|
2025-12-16 23:23:43 -08:00
|
|
|
actual_format = "bestaudio"
|
|
|
|
|
|
|
|
|
|
# If no explicit format is provided for video mode, allow a config override.
|
|
|
|
|
if mode == "video" and not actual_format:
|
|
|
|
|
configured = (ytdlp_tool.default_format("video") or "").strip()
|
|
|
|
|
if configured and configured != "bestvideo+bestaudio/best":
|
|
|
|
|
actual_format = configured
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
# If the batch has exactly one available format, force it.
|
|
|
|
|
# This prevents the "Requested format is not available" error loop entirely.
|
|
|
|
|
forced_single_applied = False
|
|
|
|
|
if forced_single_format_for_batch and forced_single_format_id and not ytdl_format and not actual_playlist_items:
|
|
|
|
|
actual_format = forced_single_format_id
|
|
|
|
|
forced_single_applied = True
|
|
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
# If a single format id was chosen and it is video-only, auto-merge best audio.
|
|
|
|
|
if (
|
|
|
|
|
actual_format
|
|
|
|
|
and isinstance(actual_format, str)
|
|
|
|
|
and mode != "audio"
|
|
|
|
|
and "+" not in actual_format
|
|
|
|
|
and "/" not in actual_format
|
|
|
|
|
and "[" not in actual_format
|
|
|
|
|
and actual_format not in {"best", "bv", "ba", "b"}
|
2025-12-20 23:57:44 -08:00
|
|
|
and not forced_single_applied
|
2025-12-16 23:23:43 -08:00
|
|
|
):
|
|
|
|
|
try:
|
2025-12-20 23:57:44 -08:00
|
|
|
formats = _list_formats_cached(url, playlist_items_value=actual_playlist_items)
|
2025-12-16 23:23:43 -08:00
|
|
|
if formats:
|
|
|
|
|
fmt_match = next(
|
|
|
|
|
(f for f in formats if str(f.get("format_id", "")) == actual_format),
|
|
|
|
|
None,
|
|
|
|
|
)
|
|
|
|
|
if fmt_match:
|
|
|
|
|
vcodec = str(fmt_match.get("vcodec", "none"))
|
|
|
|
|
acodec = str(fmt_match.get("acodec", "none"))
|
|
|
|
|
if vcodec != "none" and acodec == "none":
|
|
|
|
|
debug(
|
|
|
|
|
f"Selected video-only format {actual_format}; using {actual_format}+ba for audio"
|
|
|
|
|
)
|
|
|
|
|
actual_format = f"{actual_format}+ba"
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2025-12-12 21:55:38 -08:00
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
attempted_single_format_fallback = False
|
|
|
|
|
while True:
|
|
|
|
|
try:
|
|
|
|
|
opts = DownloadOptions(
|
|
|
|
|
url=url,
|
|
|
|
|
mode=mode,
|
|
|
|
|
output_dir=final_output_dir,
|
|
|
|
|
ytdl_format=actual_format,
|
|
|
|
|
cookies_path=ytdlp_tool.resolve_cookiefile(),
|
|
|
|
|
clip_sections=clip_sections_spec,
|
|
|
|
|
playlist_items=actual_playlist_items,
|
|
|
|
|
quiet=quiet_mode,
|
|
|
|
|
no_playlist=False,
|
|
|
|
|
embed_chapters=embed_chapters,
|
|
|
|
|
write_sub=write_sub,
|
|
|
|
|
)
|
|
|
|
|
|
2025-12-21 16:59:37 -08:00
|
|
|
_step("downloading")
|
2025-12-20 23:57:44 -08:00
|
|
|
# Use timeout wrapper to prevent hanging
|
|
|
|
|
debug(f"Starting download with 5-minute timeout...")
|
|
|
|
|
result_obj = _download_with_timeout(opts, timeout_seconds=300)
|
|
|
|
|
debug(f"Download completed, building pipe object...")
|
|
|
|
|
break
|
|
|
|
|
except DownloadError as e:
|
|
|
|
|
# If yt-dlp failed due to an unavailable default format and there is only
|
|
|
|
|
# one available format, auto-retry with that format instead of prompting.
|
|
|
|
|
cause = getattr(e, "__cause__", None)
|
|
|
|
|
detail = ""
|
|
|
|
|
try:
|
|
|
|
|
detail = str(cause or "")
|
|
|
|
|
except Exception:
|
|
|
|
|
detail = ""
|
|
|
|
|
|
|
|
|
|
if ("requested format is not available" in (detail or "").lower()) and mode != "audio":
|
|
|
|
|
# If we already discovered there's only one format for the batch,
|
|
|
|
|
# retry directly with it instead of re-listing formats per item.
|
|
|
|
|
if (
|
|
|
|
|
forced_single_format_for_batch
|
|
|
|
|
and forced_single_format_id
|
|
|
|
|
and not ytdl_format
|
|
|
|
|
and not actual_playlist_items
|
|
|
|
|
and not attempted_single_format_fallback
|
|
|
|
|
):
|
|
|
|
|
attempted_single_format_fallback = True
|
|
|
|
|
actual_format = forced_single_format_id
|
|
|
|
|
debug(f"Only one format available (playlist preflight); retrying with: {actual_format}")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
formats = _list_formats_cached(url, playlist_items_value=actual_playlist_items)
|
|
|
|
|
if (
|
|
|
|
|
(not attempted_single_format_fallback)
|
|
|
|
|
and isinstance(formats, list)
|
|
|
|
|
and len(formats) == 1
|
|
|
|
|
and isinstance(formats[0], dict)
|
|
|
|
|
):
|
|
|
|
|
only = formats[0]
|
|
|
|
|
fallback_format = str(only.get("format_id") or "").strip()
|
|
|
|
|
selection_format_id = fallback_format
|
|
|
|
|
try:
|
|
|
|
|
vcodec = str(only.get("vcodec", "none"))
|
|
|
|
|
acodec = str(only.get("acodec", "none"))
|
|
|
|
|
if vcodec != "none" and acodec == "none" and fallback_format:
|
|
|
|
|
selection_format_id = f"{fallback_format}+ba"
|
|
|
|
|
except Exception:
|
|
|
|
|
selection_format_id = fallback_format
|
|
|
|
|
|
|
|
|
|
if selection_format_id:
|
|
|
|
|
attempted_single_format_fallback = True
|
|
|
|
|
actual_format = selection_format_id
|
|
|
|
|
debug(f"Only one format available; retrying with: {actual_format}")
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# Fall back to interactive selection when there are multiple formats.
|
|
|
|
|
if formats:
|
|
|
|
|
formats_to_show = formats
|
|
|
|
|
|
|
|
|
|
table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True)
|
|
|
|
|
table.set_table("ytdlp.formatlist")
|
|
|
|
|
table.set_source_command("download-media", [url])
|
|
|
|
|
|
|
|
|
|
results_list: List[Dict[str, Any]] = []
|
|
|
|
|
for idx, fmt in enumerate(formats_to_show, 1):
|
|
|
|
|
resolution = fmt.get("resolution", "")
|
|
|
|
|
ext = fmt.get("ext", "")
|
|
|
|
|
vcodec = fmt.get("vcodec", "none")
|
|
|
|
|
acodec = fmt.get("acodec", "none")
|
|
|
|
|
filesize = fmt.get("filesize")
|
|
|
|
|
filesize_approx = fmt.get("filesize_approx")
|
|
|
|
|
format_id = fmt.get("format_id", "")
|
|
|
|
|
|
|
|
|
|
selection_format_id = format_id
|
|
|
|
|
try:
|
|
|
|
|
if vcodec != "none" and acodec == "none" and format_id:
|
|
|
|
|
selection_format_id = f"{format_id}+ba"
|
|
|
|
|
except Exception:
|
|
|
|
|
selection_format_id = format_id
|
|
|
|
|
|
|
|
|
|
size_str = ""
|
|
|
|
|
size_prefix = ""
|
|
|
|
|
size_bytes = filesize
|
|
|
|
|
if not size_bytes:
|
|
|
|
|
size_bytes = filesize_approx
|
|
|
|
|
if size_bytes:
|
|
|
|
|
size_prefix = "~"
|
|
|
|
|
try:
|
|
|
|
|
if isinstance(size_bytes, (int, float)) and size_bytes > 0:
|
|
|
|
|
size_mb = float(size_bytes) / (1024 * 1024)
|
|
|
|
|
size_str = f"{size_prefix}{size_mb:.1f}MB"
|
|
|
|
|
except Exception:
|
|
|
|
|
size_str = ""
|
|
|
|
|
|
|
|
|
|
desc_parts: List[str] = []
|
|
|
|
|
if resolution and resolution != "audio only":
|
|
|
|
|
desc_parts.append(str(resolution))
|
|
|
|
|
if ext:
|
|
|
|
|
desc_parts.append(str(ext).upper())
|
|
|
|
|
if vcodec != "none":
|
|
|
|
|
desc_parts.append(f"v:{vcodec}")
|
|
|
|
|
if acodec != "none":
|
|
|
|
|
desc_parts.append(f"a:{acodec}")
|
|
|
|
|
if size_str:
|
|
|
|
|
desc_parts.append(size_str)
|
|
|
|
|
format_desc = " | ".join(desc_parts)
|
|
|
|
|
|
|
|
|
|
format_dict: Dict[str, Any] = {
|
|
|
|
|
"table": "download-media",
|
|
|
|
|
"title": f"Format {format_id}",
|
|
|
|
|
"url": url,
|
|
|
|
|
"target": url,
|
|
|
|
|
"detail": format_desc,
|
|
|
|
|
"media_kind": "format",
|
|
|
|
|
"columns": [
|
|
|
|
|
("ID", format_id),
|
|
|
|
|
("Resolution", resolution or "N/A"),
|
|
|
|
|
("Ext", ext),
|
|
|
|
|
("Size", size_str or ""),
|
|
|
|
|
("Video", vcodec),
|
|
|
|
|
("Audio", acodec),
|
|
|
|
|
],
|
|
|
|
|
"full_metadata": {
|
|
|
|
|
"format_id": format_id,
|
|
|
|
|
"url": url,
|
|
|
|
|
"item_selector": selection_format_id,
|
|
|
|
|
},
|
|
|
|
|
"_selection_args": ["-format", selection_format_id],
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
results_list.append(format_dict)
|
|
|
|
|
table.add_result(format_dict)
|
|
|
|
|
|
|
|
|
|
pipeline_context.set_current_stage_table(table)
|
|
|
|
|
pipeline_context.set_last_result_table(table, results_list)
|
|
|
|
|
|
|
|
|
|
# Always print for interactive selection.
|
|
|
|
|
try:
|
|
|
|
|
get_stderr_console().print(table)
|
|
|
|
|
setattr(table, "_rendered_by_cmdlet", True)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
2025-12-21 16:59:37 -08:00
|
|
|
# Complete the step sequence: we return here and the user must
|
|
|
|
|
# re-run with @N selection.
|
|
|
|
|
_step("awaiting selection")
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
log("Requested format is not available; select a working format with @N", file=sys.stderr)
|
|
|
|
|
return 0
|
2025-12-11 12:47:30 -08:00
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
# Non-format DownloadError: surface and skip this URL.
|
|
|
|
|
raise
|
2025-12-13 00:18:30 -08:00
|
|
|
|
2025-12-16 01:45:01 -08:00
|
|
|
# Expand result set:
|
|
|
|
|
# - playlists return a list
|
|
|
|
|
# - section clips return a single DownloadMediaResult with `paths` populated
|
|
|
|
|
results_to_emit: List[Any] = []
|
|
|
|
|
if isinstance(result_obj, list):
|
|
|
|
|
results_to_emit = list(result_obj)
|
|
|
|
|
else:
|
|
|
|
|
paths = getattr(result_obj, "paths", None)
|
|
|
|
|
if isinstance(paths, list) and paths:
|
|
|
|
|
# Create one DownloadMediaResult per section file
|
|
|
|
|
for p in paths:
|
|
|
|
|
try:
|
|
|
|
|
p_path = Path(p)
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
2025-12-20 02:12:45 -08:00
|
|
|
# Sidecars (subtitles) should never be piped as standalone items.
|
|
|
|
|
# They are handled separately and attached to notes.
|
|
|
|
|
try:
|
|
|
|
|
if p_path.suffix.lower() in _SUBTITLE_EXTS:
|
|
|
|
|
continue
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2025-12-16 01:45:01 -08:00
|
|
|
if not p_path.exists() or p_path.is_dir():
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
hv = sha256_file(p_path)
|
|
|
|
|
except Exception:
|
|
|
|
|
hv = None
|
|
|
|
|
results_to_emit.append(
|
|
|
|
|
DownloadMediaResult(
|
|
|
|
|
path=p_path,
|
|
|
|
|
info=getattr(result_obj, "info", {}) or {},
|
|
|
|
|
tag=list(getattr(result_obj, "tag", []) or []),
|
|
|
|
|
source_url=getattr(result_obj, "source_url", None) or opts.url,
|
|
|
|
|
hash_value=hv,
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
results_to_emit = [result_obj]
|
2025-12-13 12:09:50 -08:00
|
|
|
|
2025-12-16 01:45:01 -08:00
|
|
|
# Build PipeObjects first so we can attach cross-clip relationships.
|
|
|
|
|
pipe_objects: List[Dict[str, Any]] = []
|
2025-12-13 00:18:30 -08:00
|
|
|
for downloaded in results_to_emit:
|
2025-12-16 23:23:43 -08:00
|
|
|
po = self._build_pipe_object(downloaded, url, opts)
|
2025-12-20 23:57:44 -08:00
|
|
|
pipe_seq += 1
|
|
|
|
|
try:
|
|
|
|
|
po.setdefault("pipe_index", pipe_seq)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2025-12-16 23:23:43 -08:00
|
|
|
|
|
|
|
|
# Attach chapter timestamps for downstream consumers (e.g., mpv scripts)
|
|
|
|
|
# even if container embedding fails.
|
|
|
|
|
try:
|
|
|
|
|
info = downloaded.info if isinstance(getattr(downloaded, "info", None), dict) else {}
|
|
|
|
|
except Exception:
|
|
|
|
|
info = {}
|
|
|
|
|
chapters_text = _format_chapters_note(info) if embed_chapters else None
|
|
|
|
|
if chapters_text:
|
|
|
|
|
notes = po.get("notes")
|
|
|
|
|
if not isinstance(notes, dict):
|
|
|
|
|
notes = {}
|
|
|
|
|
notes.setdefault("chapters", chapters_text)
|
|
|
|
|
po["notes"] = notes
|
|
|
|
|
|
|
|
|
|
if write_sub:
|
|
|
|
|
try:
|
|
|
|
|
media_path = Path(str(po.get("path") or ""))
|
|
|
|
|
except Exception:
|
|
|
|
|
media_path = None
|
|
|
|
|
|
|
|
|
|
if media_path is not None and media_path.exists() and media_path.is_file():
|
|
|
|
|
sub_path = _best_subtitle_sidecar(media_path)
|
|
|
|
|
if sub_path is not None:
|
|
|
|
|
sub_text = _read_text_file(sub_path)
|
|
|
|
|
if sub_text:
|
|
|
|
|
notes = po.get("notes")
|
|
|
|
|
if not isinstance(notes, dict):
|
|
|
|
|
notes = {}
|
|
|
|
|
notes["sub"] = sub_text
|
|
|
|
|
po["notes"] = notes
|
2025-12-20 02:12:45 -08:00
|
|
|
# We keep subtitles as notes; do not leave a sidecar that later stages
|
|
|
|
|
# might try to ingest as a file.
|
|
|
|
|
try:
|
|
|
|
|
sub_path.unlink()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2025-12-16 23:23:43 -08:00
|
|
|
|
|
|
|
|
pipe_objects.append(po)
|
2025-12-16 01:45:01 -08:00
|
|
|
|
|
|
|
|
# If this is a clip download, decorate titles/tags so the title: tag is clip-based.
|
|
|
|
|
# Relationship tags are only added when multiple clips exist.
|
|
|
|
|
try:
|
|
|
|
|
if clip_ranges and len(pipe_objects) == len(clip_ranges):
|
2025-12-20 02:12:45 -08:00
|
|
|
source_hash = query_hash_override or self._find_existing_hash_for_url(storage, canonical_url, hydrus_available=hydrus_available)
|
2025-12-16 01:45:01 -08:00
|
|
|
self._apply_clip_decorations(pipe_objects, clip_ranges, source_king_hash=source_hash)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
2025-12-13 12:09:50 -08:00
|
|
|
|
2025-12-16 01:45:01 -08:00
|
|
|
debug(f"Emitting {len(pipe_objects)} result(s) to pipeline...")
|
|
|
|
|
|
2025-12-21 16:59:37 -08:00
|
|
|
# Mark complete *before* the first emit, because the pipeline clears the
|
|
|
|
|
# status line on emit().
|
|
|
|
|
_step("finalized")
|
|
|
|
|
|
2025-12-16 01:45:01 -08:00
|
|
|
stage_ctx = pipeline_context.get_stage_context()
|
|
|
|
|
emit_enabled = bool(stage_ctx is not None and not getattr(stage_ctx, "is_last_stage", False))
|
|
|
|
|
for pipe_obj_dict in pipe_objects:
|
2025-12-13 12:09:50 -08:00
|
|
|
# Only emit when there is a downstream stage.
|
|
|
|
|
# This keeps `download-media` from producing a result table when run standalone.
|
|
|
|
|
if emit_enabled:
|
|
|
|
|
pipeline_context.emit(pipe_obj_dict)
|
2025-12-13 00:18:30 -08:00
|
|
|
|
|
|
|
|
# Automatically register url with local library
|
|
|
|
|
if pipe_obj_dict.get("url"):
|
|
|
|
|
pipe_obj = coerce_to_pipe_object(pipe_obj_dict)
|
|
|
|
|
register_url_with_local_library(pipe_obj, config)
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
# Keep a copy so we can optionally run a tail pipeline entered at the playlist prompt.
|
|
|
|
|
try:
|
|
|
|
|
downloaded_pipe_objects.append(pipe_obj_dict)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
2025-12-16 01:45:01 -08:00
|
|
|
downloaded_count += len(pipe_objects)
|
2025-12-11 12:47:30 -08:00
|
|
|
debug("✓ Downloaded and emitted")
|
|
|
|
|
|
|
|
|
|
except DownloadError as e:
|
|
|
|
|
log(f"Download failed for {url}: {e}", file=sys.stderr)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Error processing {url}: {e}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
if downloaded_count > 0:
|
|
|
|
|
debug(f"✓ Successfully processed {downloaded_count} URL(s)")
|
2025-12-20 23:57:44 -08:00
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
log("No downloads completed", file=sys.stderr)
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Error in download-media: {e}", file=sys.stderr)
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]:
|
2025-12-16 01:45:01 -08:00
|
|
|
"""Resolve the output directory.
|
|
|
|
|
|
|
|
|
|
Rules:
|
|
|
|
|
- If user passes `-path`, use that directory (override).
|
|
|
|
|
- Otherwise default to a temp directory (config["temp"] if present, else OS temp).
|
|
|
|
|
"""
|
2025-12-11 12:47:30 -08:00
|
|
|
|
2025-12-16 01:45:01 -08:00
|
|
|
# Priority 1: explicit output directory override
|
|
|
|
|
path_override = parsed.get("path")
|
|
|
|
|
if path_override:
|
2025-12-11 12:47:30 -08:00
|
|
|
try:
|
2025-12-16 01:45:01 -08:00
|
|
|
candidate = Path(str(path_override)).expanduser()
|
|
|
|
|
# If user passed a file path, treat its parent as output dir.
|
|
|
|
|
if candidate.suffix:
|
|
|
|
|
candidate = candidate.parent
|
|
|
|
|
candidate.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
debug(f"Using output directory override: {candidate}")
|
|
|
|
|
return candidate
|
2025-12-11 12:47:30 -08:00
|
|
|
except Exception as e:
|
2025-12-16 01:45:01 -08:00
|
|
|
log(f"Invalid -path output directory: {e}", file=sys.stderr)
|
2025-12-11 12:47:30 -08:00
|
|
|
return None
|
|
|
|
|
|
2025-12-16 01:45:01 -08:00
|
|
|
# Priority 2: config-provided temp/output directory
|
2025-12-13 00:18:30 -08:00
|
|
|
try:
|
2025-12-16 01:45:01 -08:00
|
|
|
temp_value = (config or {}).get("temp") if isinstance(config, dict) else None
|
2025-12-13 00:18:30 -08:00
|
|
|
except Exception:
|
2025-12-16 01:45:01 -08:00
|
|
|
temp_value = None
|
|
|
|
|
if temp_value:
|
|
|
|
|
try:
|
|
|
|
|
candidate = Path(str(temp_value)).expanduser()
|
|
|
|
|
candidate.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
debug(f"Using config temp directory: {candidate}")
|
|
|
|
|
return candidate
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Cannot use configured temp directory '{temp_value}': {e}", file=sys.stderr)
|
|
|
|
|
return None
|
2025-12-11 12:47:30 -08:00
|
|
|
|
2025-12-16 01:45:01 -08:00
|
|
|
# Priority 3: OS temp fallback
|
2025-12-11 12:47:30 -08:00
|
|
|
try:
|
2025-12-16 01:45:01 -08:00
|
|
|
candidate = Path(tempfile.gettempdir()) / "Medios-Macina"
|
|
|
|
|
candidate.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
debug(f"Using OS temp directory: {candidate}")
|
|
|
|
|
return candidate
|
2025-12-11 12:47:30 -08:00
|
|
|
except Exception as e:
|
2025-12-16 01:45:01 -08:00
|
|
|
log(f"Cannot create OS temp directory: {e}", file=sys.stderr)
|
2025-12-11 12:47:30 -08:00
|
|
|
return None
|
|
|
|
|
|
2025-12-16 01:45:01 -08:00
|
|
|
def _parse_time_ranges(self, spec: str) -> List[tuple[int, int]]:
|
|
|
|
|
"""Parse clip specs into a list of (start_seconds, end_seconds).
|
2025-12-11 12:47:30 -08:00
|
|
|
|
2025-12-16 01:45:01 -08:00
|
|
|
Supported inputs:
|
|
|
|
|
- "MM:SS-MM:SS"
|
|
|
|
|
- "HH:MM:SS-HH:MM:SS"
|
|
|
|
|
- seconds: "280-300"
|
|
|
|
|
- multiple ranges separated by commas: "4:40-5:00,5:15-5:25"
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
def _to_seconds(ts: str) -> Optional[int]:
|
|
|
|
|
ts = str(ts).strip()
|
|
|
|
|
if not ts:
|
2025-12-11 12:47:30 -08:00
|
|
|
return None
|
2025-12-16 01:45:01 -08:00
|
|
|
|
2025-12-20 02:12:45 -08:00
|
|
|
# Support compact units like 3m4s, 1h22m, 1h2m3s
|
|
|
|
|
# (case-insensitive; seconds may be fractional but are truncated to int)
|
|
|
|
|
try:
|
|
|
|
|
unit_match = re.fullmatch(
|
|
|
|
|
r"(?i)\s*(?:(?P<h>\d+)h)?\s*(?:(?P<m>\d+)m)?\s*(?:(?P<s>\d+(?:\.\d+)?)s)?\s*",
|
|
|
|
|
ts,
|
|
|
|
|
)
|
|
|
|
|
except Exception:
|
|
|
|
|
unit_match = None
|
|
|
|
|
if unit_match and unit_match.group(0).strip() and any(unit_match.group(g) for g in ("h", "m", "s")):
|
|
|
|
|
try:
|
|
|
|
|
hours = int(unit_match.group("h") or 0)
|
|
|
|
|
minutes = int(unit_match.group("m") or 0)
|
|
|
|
|
seconds = float(unit_match.group("s") or 0)
|
|
|
|
|
total = (hours * 3600) + (minutes * 60) + seconds
|
|
|
|
|
return int(total)
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
|
2025-12-16 01:45:01 -08:00
|
|
|
if ":" in ts:
|
|
|
|
|
parts = [p.strip() for p in ts.split(":")]
|
|
|
|
|
if len(parts) == 2:
|
|
|
|
|
hh_s = "0"
|
|
|
|
|
mm_s, ss_s = parts
|
|
|
|
|
elif len(parts) == 3:
|
|
|
|
|
hh_s, mm_s, ss_s = parts
|
|
|
|
|
else:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
hours = int(hh_s)
|
|
|
|
|
minutes = int(mm_s)
|
|
|
|
|
seconds = float(ss_s)
|
|
|
|
|
total = (hours * 3600) + (minutes * 60) + seconds
|
|
|
|
|
return int(total)
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
return int(float(ts))
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
ranges: List[tuple[int, int]] = []
|
|
|
|
|
if not spec:
|
|
|
|
|
return ranges
|
|
|
|
|
|
|
|
|
|
for piece in str(spec).split(","):
|
|
|
|
|
piece = piece.strip()
|
|
|
|
|
if not piece:
|
|
|
|
|
continue
|
|
|
|
|
if "-" not in piece:
|
|
|
|
|
return []
|
|
|
|
|
start_s, end_s = [p.strip() for p in piece.split("-", 1)]
|
|
|
|
|
start = _to_seconds(start_s)
|
|
|
|
|
end = _to_seconds(end_s)
|
|
|
|
|
if start is None or end is None or start >= end:
|
|
|
|
|
return []
|
|
|
|
|
ranges.append((start, end))
|
|
|
|
|
|
|
|
|
|
return ranges
|
2025-12-11 12:47:30 -08:00
|
|
|
|
2025-12-20 02:12:45 -08:00
|
|
|
@staticmethod
|
|
|
|
|
def _parse_keyed_csv_spec(spec: str, *, default_key: str) -> Dict[str, List[str]]:
|
|
|
|
|
"""Parse comma-separated values with optional sticky `key:` prefixes.
|
|
|
|
|
|
|
|
|
|
Example:
|
|
|
|
|
clip:3m4s-3m14s,1h22m-1h33m,item:2-3
|
|
|
|
|
|
|
|
|
|
Rules:
|
|
|
|
|
- Items are split on commas.
|
|
|
|
|
- If an item begins with `key:` then key becomes active for subsequent items.
|
|
|
|
|
- If an item has no `key:` prefix, it belongs to the last active key.
|
|
|
|
|
- If no key has been set yet, values belong to default_key.
|
|
|
|
|
"""
|
|
|
|
|
out: Dict[str, List[str]] = {}
|
|
|
|
|
if not isinstance(spec, str):
|
|
|
|
|
spec = str(spec)
|
|
|
|
|
text = spec.strip()
|
|
|
|
|
if not text:
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
active = (default_key or "").strip().lower() or "clip"
|
|
|
|
|
key_pattern = re.compile(r"^([A-Za-z_][A-Za-z0-9_-]*)\s*:\s*(.*)$")
|
|
|
|
|
|
|
|
|
|
for raw_piece in text.split(","):
|
|
|
|
|
piece = raw_piece.strip()
|
|
|
|
|
if not piece:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
m = key_pattern.match(piece)
|
|
|
|
|
if m:
|
|
|
|
|
active = (m.group(1) or "").strip().lower() or active
|
|
|
|
|
value = (m.group(2) or "").strip()
|
|
|
|
|
if value:
|
|
|
|
|
out.setdefault(active, []).append(value)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
out.setdefault(active, []).append(piece)
|
|
|
|
|
|
|
|
|
|
return out
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
def _build_clip_sections_spec(
|
|
|
|
|
self,
|
2025-12-16 01:45:01 -08:00
|
|
|
clip_ranges: Optional[List[tuple[int, int]]],
|
2025-12-11 12:47:30 -08:00
|
|
|
) -> Optional[str]:
|
2025-12-12 21:55:38 -08:00
|
|
|
"""Convert parsed clip range into downloader spec (seconds)."""
|
2025-12-11 12:47:30 -08:00
|
|
|
ranges: List[str] = []
|
2025-12-16 01:45:01 -08:00
|
|
|
if clip_ranges:
|
|
|
|
|
for start_s, end_s in clip_ranges:
|
|
|
|
|
ranges.append(f"{start_s}-{end_s}")
|
2025-12-11 12:47:30 -08:00
|
|
|
return ",".join(ranges) if ranges else None
|
|
|
|
|
|
|
|
|
|
def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]:
|
|
|
|
|
"""Create a PipeObject-compatible dict from a DownloadMediaResult."""
|
|
|
|
|
info: Dict[str, Any] = download_result.info if isinstance(download_result.info, dict) else {}
|
|
|
|
|
media_path = Path(download_result.path)
|
|
|
|
|
hash_value = download_result.hash_value or self._compute_file_hash(media_path)
|
|
|
|
|
title = info.get("title") or media_path.stem
|
2025-12-11 23:21:45 -08:00
|
|
|
tag = list(download_result.tag or [])
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
# Add title tag for searchability
|
2025-12-11 23:21:45 -08:00
|
|
|
if title and f"title:{title}" not in tag:
|
|
|
|
|
tag.insert(0, f"title:{title}")
|
2025-12-11 12:47:30 -08:00
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
# Store the canonical URL for de-dup/search purposes.
|
|
|
|
|
# Prefer yt-dlp's webpage_url, and do not mix in the raw requested URL (which may contain timestamps).
|
|
|
|
|
final_url = None
|
2025-12-11 12:47:30 -08:00
|
|
|
try:
|
2025-12-14 00:53:52 -08:00
|
|
|
page_url = info.get("webpage_url") or info.get("original_url") or info.get("url")
|
2025-12-11 12:47:30 -08:00
|
|
|
if page_url:
|
2025-12-14 00:53:52 -08:00
|
|
|
final_url = str(page_url)
|
2025-12-11 12:47:30 -08:00
|
|
|
except Exception:
|
2025-12-14 00:53:52 -08:00
|
|
|
final_url = None
|
|
|
|
|
if not final_url and url:
|
|
|
|
|
final_url = str(url)
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
# Construct canonical PipeObject dict: hash, store, path, url, title, tags
|
|
|
|
|
# Prefer explicit backend names (storage_name/storage_location). If none, default to PATH
|
|
|
|
|
# which indicates the file is available at a filesystem path and hasn't been added to a backend yet.
|
|
|
|
|
return {
|
|
|
|
|
"path": str(media_path),
|
|
|
|
|
"hash": hash_value,
|
|
|
|
|
"title": title,
|
|
|
|
|
"url": final_url,
|
2025-12-11 23:21:45 -08:00
|
|
|
"tag": tag,
|
2025-12-11 12:47:30 -08:00
|
|
|
"action": "cmdlet:download-media",
|
2025-12-14 00:53:52 -08:00
|
|
|
"is_temp": True,
|
2025-12-20 23:57:44 -08:00
|
|
|
"ytdl_format": getattr(opts, "ytdl_format", None),
|
2025-12-11 12:47:30 -08:00
|
|
|
# download_mode removed (deprecated), keep media_kind
|
|
|
|
|
"store": getattr(opts, "storage_name", None) or getattr(opts, "storage_location", None) or "PATH",
|
|
|
|
|
"media_kind": "video" if opts.mode == "video" else "audio",
|
|
|
|
|
}
|
|
|
|
|
|
2025-12-16 01:45:01 -08:00
|
|
|
@staticmethod
|
|
|
|
|
def _normalise_hash_hex(value: Optional[str]) -> Optional[str]:
|
|
|
|
|
if not value or not isinstance(value, str):
|
|
|
|
|
return None
|
|
|
|
|
candidate = value.strip().lower()
|
|
|
|
|
if len(candidate) == 64 and all(c in "0123456789abcdef" for c in candidate):
|
|
|
|
|
return candidate
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def _extract_hash_from_search_hit(cls, hit: Any) -> Optional[str]:
|
|
|
|
|
if not isinstance(hit, dict):
|
|
|
|
|
return None
|
|
|
|
|
for key in ("hash", "hash_hex", "file_hash", "hydrus_hash"):
|
|
|
|
|
v = hit.get(key)
|
|
|
|
|
normalized = cls._normalise_hash_hex(str(v) if v is not None else None)
|
|
|
|
|
if normalized:
|
|
|
|
|
return normalized
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def _find_existing_hash_for_url(
|
|
|
|
|
cls,
|
|
|
|
|
storage: Any,
|
|
|
|
|
canonical_url: str,
|
|
|
|
|
*,
|
|
|
|
|
hydrus_available: bool,
|
|
|
|
|
) -> Optional[str]:
|
|
|
|
|
"""Best-effort lookup of an existing stored item hash by url:<canonical_url>.
|
|
|
|
|
|
|
|
|
|
Used to make the stored source video the king for multi-clip relationships.
|
|
|
|
|
"""
|
|
|
|
|
if storage is None or not canonical_url:
|
|
|
|
|
return None
|
|
|
|
|
try:
|
|
|
|
|
from Store.HydrusNetwork import HydrusNetwork
|
|
|
|
|
except Exception:
|
|
|
|
|
HydrusNetwork = None # type: ignore
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
backend_names = list(storage.list_searchable_backends() or [])
|
|
|
|
|
except Exception:
|
|
|
|
|
backend_names = []
|
|
|
|
|
|
|
|
|
|
for backend_name in backend_names:
|
|
|
|
|
try:
|
|
|
|
|
backend = storage[backend_name]
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
if str(backend_name).strip().lower() == "temp":
|
|
|
|
|
continue
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
try:
|
|
|
|
|
if HydrusNetwork is not None and isinstance(backend, HydrusNetwork) and not hydrus_available:
|
|
|
|
|
continue
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
hits = backend.search(f"url:{canonical_url}", limit=5) or []
|
|
|
|
|
except Exception:
|
|
|
|
|
hits = []
|
|
|
|
|
for hit in hits:
|
|
|
|
|
extracted = cls._extract_hash_from_search_hit(hit)
|
|
|
|
|
if extracted:
|
|
|
|
|
return extracted
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _format_timecode(seconds: int, *, force_hours: bool) -> str:
|
|
|
|
|
total = max(0, int(seconds))
|
|
|
|
|
minutes, secs = divmod(total, 60)
|
|
|
|
|
hours, minutes = divmod(minutes, 60)
|
|
|
|
|
if force_hours:
|
|
|
|
|
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
|
|
|
|
|
return f"{minutes:02d}:{secs:02d}"
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def _format_clip_range(cls, start_s: int, end_s: int) -> str:
|
|
|
|
|
force_hours = bool(start_s >= 3600 or end_s >= 3600)
|
|
|
|
|
return f"{cls._format_timecode(start_s, force_hours=force_hours)}-{cls._format_timecode(end_s, force_hours=force_hours)}"
|
|
|
|
|
|
|
|
|
|
@classmethod
|
|
|
|
|
def _apply_clip_decorations(
|
|
|
|
|
cls,
|
|
|
|
|
pipe_objects: List[Dict[str, Any]],
|
|
|
|
|
clip_ranges: List[tuple[int, int]],
|
|
|
|
|
*,
|
|
|
|
|
source_king_hash: Optional[str],
|
|
|
|
|
) -> None:
|
|
|
|
|
"""Apply clip:{range} tags/titles and relationship metadata for multi-clip downloads.
|
|
|
|
|
|
|
|
|
|
- Sets the clip title (and title: tag) to exactly `clip:{range}`.
|
|
|
|
|
- Adds `clip:{range}` tag.
|
|
|
|
|
- Sets `relationships` on each emitted item (king hash first, then alt hashes)
|
|
|
|
|
so downstream can persist relationships into a DB/API without storing relationship tags.
|
|
|
|
|
"""
|
|
|
|
|
if not pipe_objects or len(pipe_objects) != len(clip_ranges):
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# Always apply clip titles/tags (even for a single clip).
|
|
|
|
|
for po, (start_s, end_s) in zip(pipe_objects, clip_ranges):
|
|
|
|
|
clip_range = cls._format_clip_range(start_s, end_s)
|
|
|
|
|
clip_tag = f"clip:{clip_range}"
|
|
|
|
|
|
|
|
|
|
# Title: make it generic/consistent for clips.
|
|
|
|
|
po["title"] = clip_tag
|
|
|
|
|
|
|
|
|
|
tags = po.get("tag")
|
|
|
|
|
if not isinstance(tags, list):
|
|
|
|
|
tags = []
|
|
|
|
|
|
|
|
|
|
# Replace any existing title: tags with title:<clip_tag>
|
|
|
|
|
tags = [t for t in tags if not str(t).strip().lower().startswith("title:")]
|
|
|
|
|
|
|
|
|
|
# Relationships must not be stored as tags.
|
|
|
|
|
tags = [t for t in tags if not str(t).strip().lower().startswith("relationship:")]
|
|
|
|
|
tags.insert(0, f"title:{clip_tag}")
|
|
|
|
|
|
|
|
|
|
# Ensure clip tag exists
|
|
|
|
|
if clip_tag not in tags:
|
|
|
|
|
tags.append(clip_tag)
|
|
|
|
|
|
|
|
|
|
po["tag"] = tags
|
|
|
|
|
|
|
|
|
|
# Relationship tagging only makes sense when multiple clips exist.
|
|
|
|
|
if len(pipe_objects) < 2:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
hashes: List[str] = []
|
|
|
|
|
for po in pipe_objects:
|
|
|
|
|
h = cls._normalise_hash_hex(str(po.get("hash") or ""))
|
|
|
|
|
hashes.append(h or "")
|
|
|
|
|
|
|
|
|
|
# Determine king: prefer an existing source video hash if present; else first clip becomes king.
|
|
|
|
|
king_hash = cls._normalise_hash_hex(source_king_hash) if source_king_hash else None
|
|
|
|
|
if not king_hash:
|
|
|
|
|
king_hash = hashes[0] if hashes and hashes[0] else None
|
|
|
|
|
if not king_hash:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
alt_hashes: List[str] = [h for h in hashes if h and h != king_hash]
|
|
|
|
|
if not alt_hashes:
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# Carry relationship metadata through the pipeline without using tags.
|
|
|
|
|
rel_payload = {"king": [king_hash], "alt": list(alt_hashes)}
|
|
|
|
|
for po in pipe_objects:
|
|
|
|
|
po["relationships"] = {"king": [king_hash], "alt": list(alt_hashes)}
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
def _compute_file_hash(self, filepath: Path) -> str:
|
|
|
|
|
"""Compute SHA256 hash of a file."""
|
|
|
|
|
import hashlib
|
|
|
|
|
sha256_hash = hashlib.sha256()
|
|
|
|
|
with open(filepath, "rb") as f:
|
|
|
|
|
for byte_block in iter(lambda: f.read(4096), b""):
|
|
|
|
|
sha256_hash.update(byte_block)
|
|
|
|
|
return sha256_hash.hexdigest()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Module-level singleton registration
|
|
|
|
|
CMDLET = Download_Media()
|