2025-12-11 12:47:30 -08:00
|
|
|
"""Download media from url using yt-dlp (streaming sites only).
|
|
|
|
|
|
|
|
|
|
Focused cmdlet for video/audio downloads from yt-dlp-supported sites:
|
|
|
|
|
- YouTube, Twitch, Dailymotion, Vimeo, etc.
|
|
|
|
|
- No direct file downloads (use download-file for that)
|
|
|
|
|
- Playlist detection with item selection
|
|
|
|
|
- Clip extraction (time ranges)
|
|
|
|
|
- Format selection and audio/video modes
|
|
|
|
|
- Tags extraction and metadata integration
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
import sys
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import Any, Dict, List, Optional, Sequence
|
|
|
|
|
|
|
|
|
|
import glob # noqa: F401
|
|
|
|
|
import hashlib
|
|
|
|
|
import json # noqa: F401
|
|
|
|
|
import random
|
|
|
|
|
import re
|
|
|
|
|
import string
|
|
|
|
|
import subprocess
|
|
|
|
|
import sys
|
|
|
|
|
import time
|
|
|
|
|
import traceback
|
|
|
|
|
from typing import Any, Dict, Iterator, List, Optional
|
|
|
|
|
|
|
|
|
|
import httpx
|
|
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
from SYS.logger import log, debug
|
|
|
|
|
from SYS.utils import ensure_directory, sha256_file
|
|
|
|
|
from API.HTTP import HTTPClient
|
2025-12-11 12:47:30 -08:00
|
|
|
from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLogger, ProgressBar
|
|
|
|
|
import pipeline as pipeline_context
|
|
|
|
|
from result_table import ResultTable
|
|
|
|
|
|
|
|
|
|
from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, parse_cmdlet_args, register_url_with_local_library, coerce_to_pipe_object
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Minimal inlined helpers from helper/download.py (is_url_supported_by_ytdlp, list_formats)
|
|
|
|
|
try:
|
|
|
|
|
import yt_dlp # type: ignore
|
|
|
|
|
from yt_dlp.extractor import gen_extractors # type: ignore
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
yt_dlp = None # type: ignore
|
|
|
|
|
YTDLP_IMPORT_ERROR = exc
|
|
|
|
|
else:
|
|
|
|
|
YTDLP_IMPORT_ERROR = None
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from metadata import extract_ytdlp_tags
|
|
|
|
|
except ImportError:
|
|
|
|
|
extract_ytdlp_tags = None
|
|
|
|
|
|
|
|
|
|
_EXTRACTOR_CACHE: List[Any] | None = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _ensure_yt_dlp_ready() -> None:
|
|
|
|
|
if yt_dlp is not None:
|
|
|
|
|
return
|
|
|
|
|
detail = str(YTDLP_IMPORT_ERROR or "yt-dlp is not installed")
|
|
|
|
|
raise DownloadError(f"yt-dlp module not available: {detail}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_url_supported_by_ytdlp(url: str) -> bool:
|
|
|
|
|
if yt_dlp is None:
|
|
|
|
|
return False
|
|
|
|
|
global _EXTRACTOR_CACHE
|
|
|
|
|
if _EXTRACTOR_CACHE is None:
|
|
|
|
|
try:
|
|
|
|
|
_EXTRACTOR_CACHE = [ie for ie in gen_extractors()] # type: ignore[arg-type]
|
|
|
|
|
except Exception:
|
|
|
|
|
_EXTRACTOR_CACHE = []
|
|
|
|
|
for extractor in _EXTRACTOR_CACHE:
|
|
|
|
|
try:
|
|
|
|
|
if not extractor.suitable(url):
|
|
|
|
|
continue
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
name = getattr(extractor, "IE_NAME", "")
|
|
|
|
|
if name.lower() == "generic":
|
|
|
|
|
continue
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]:
|
|
|
|
|
_ensure_yt_dlp_ready()
|
|
|
|
|
try:
|
|
|
|
|
ydl_opts = {"quiet": True, "no_warnings": True, "socket_timeout": 30}
|
|
|
|
|
if no_playlist:
|
|
|
|
|
ydl_opts["noplaylist"] = True
|
|
|
|
|
if playlist_items:
|
|
|
|
|
ydl_opts["playlist_items"] = playlist_items
|
|
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
|
|
|
debug(f"Fetching format list for: {url}")
|
|
|
|
|
info = ydl.extract_info(url, download=False)
|
|
|
|
|
formats = info.get("formats", [])
|
|
|
|
|
if not formats:
|
|
|
|
|
log("No formats available", file=sys.stderr)
|
|
|
|
|
return None
|
|
|
|
|
result_formats = []
|
|
|
|
|
for fmt in formats:
|
|
|
|
|
result_formats.append({
|
|
|
|
|
"format_id": fmt.get("format_id", ""),
|
|
|
|
|
"format": fmt.get("format", ""),
|
|
|
|
|
"ext": fmt.get("ext", ""),
|
|
|
|
|
"resolution": fmt.get("resolution", ""),
|
|
|
|
|
"width": fmt.get("width"),
|
|
|
|
|
"height": fmt.get("height"),
|
|
|
|
|
"fps": fmt.get("fps"),
|
|
|
|
|
"vcodec": fmt.get("vcodec", "none"),
|
|
|
|
|
"acodec": fmt.get("acodec", "none"),
|
|
|
|
|
"filesize": fmt.get("filesize"),
|
|
|
|
|
"tbr": fmt.get("tbr"),
|
|
|
|
|
})
|
|
|
|
|
debug(f"Found {len(result_formats)} available formats")
|
|
|
|
|
return result_formats
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"✗ Error fetching formats: {e}", file=sys.stderr)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False) -> tuple[Optional[str], Dict[str, Any]]:
|
|
|
|
|
sections_list = ytdl_options.get("download_sections", [])
|
|
|
|
|
if not sections_list:
|
|
|
|
|
return "", {}
|
|
|
|
|
|
|
|
|
|
session_id = hashlib.md5((url + str(time.time()) + ''.join(random.choices(string.ascii_letters, k=10))).encode()).hexdigest()[:12]
|
|
|
|
|
first_section_info = None
|
|
|
|
|
|
|
|
|
|
for section_idx, section in enumerate(sections_list, 1):
|
|
|
|
|
base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s")
|
|
|
|
|
output_dir_path = Path(base_outtmpl).parent
|
|
|
|
|
filename_tmpl = f"{session_id}_{section_idx}"
|
|
|
|
|
if base_outtmpl.endswith(".%(ext)s"):
|
|
|
|
|
filename_tmpl += ".%(ext)s"
|
|
|
|
|
section_outtmpl = str(output_dir_path / filename_tmpl)
|
|
|
|
|
|
|
|
|
|
if section_idx == 1:
|
|
|
|
|
metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"]
|
|
|
|
|
if ytdl_options.get("cookiefile"):
|
|
|
|
|
cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
|
|
|
|
|
metadata_cmd.extend(["--cookies", cookies_path])
|
|
|
|
|
if ytdl_options.get("noplaylist"):
|
|
|
|
|
metadata_cmd.append("--no-playlist")
|
|
|
|
|
metadata_cmd.append(url)
|
|
|
|
|
try:
|
|
|
|
|
meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True)
|
|
|
|
|
if meta_result.returncode == 0 and meta_result.stdout:
|
|
|
|
|
try:
|
|
|
|
|
info_dict = json.loads(meta_result.stdout.strip())
|
|
|
|
|
first_section_info = info_dict
|
|
|
|
|
if not quiet:
|
|
|
|
|
debug(f"Extracted title from metadata: {info_dict.get('title')}")
|
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
|
if not quiet:
|
|
|
|
|
debug("Could not parse JSON metadata")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
if not quiet:
|
|
|
|
|
debug(f"Error extracting metadata: {e}")
|
|
|
|
|
|
|
|
|
|
cmd = ["yt-dlp"]
|
|
|
|
|
if ytdl_options.get("format"):
|
|
|
|
|
cmd.extend(["-f", ytdl_options["format"]])
|
|
|
|
|
if ytdl_options.get("force_keyframes_at_cuts"):
|
|
|
|
|
cmd.extend(["--force-keyframes-at-cuts"]) if ytdl_options.get("force_keyframes_at_cuts") else None
|
|
|
|
|
cmd.extend(["-o", section_outtmpl])
|
|
|
|
|
if ytdl_options.get("cookiefile"):
|
|
|
|
|
cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
|
|
|
|
|
cmd.extend(["--cookies", cookies_path])
|
|
|
|
|
if ytdl_options.get("noplaylist"):
|
|
|
|
|
cmd.append("--no-playlist")
|
|
|
|
|
cmd.append(url)
|
|
|
|
|
if not quiet:
|
|
|
|
|
debug(f"Running yt-dlp for section: {section}")
|
|
|
|
|
try:
|
|
|
|
|
subprocess.run(cmd, check=True)
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
if not quiet:
|
|
|
|
|
debug(f"yt-dlp error for section {section}: {exc}")
|
|
|
|
|
|
|
|
|
|
return session_id, first_section_info or {}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
|
|
|
|
|
ensure_directory(opts.output_dir)
|
|
|
|
|
outtmpl = str((opts.output_dir / "%(title)s.%(ext)s").resolve())
|
|
|
|
|
base_options: Dict[str, Any] = {
|
|
|
|
|
"outtmpl": outtmpl,
|
|
|
|
|
"quiet": True,
|
|
|
|
|
"no_warnings": True,
|
|
|
|
|
"noprogress": True,
|
|
|
|
|
"socket_timeout": 30,
|
|
|
|
|
"retries": 10,
|
|
|
|
|
"fragment_retries": 10,
|
|
|
|
|
"http_chunk_size": 10_485_760,
|
|
|
|
|
"restrictfilenames": True,
|
|
|
|
|
"progress_hooks": [] if opts.quiet else [_progress_callback],
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if opts.cookies_path and opts.cookies_path.is_file():
|
|
|
|
|
base_options["cookiefile"] = str(opts.cookies_path)
|
|
|
|
|
else:
|
|
|
|
|
from hydrus_health_check import get_cookies_file_path # local import
|
|
|
|
|
global_cookies = get_cookies_file_path()
|
|
|
|
|
if global_cookies:
|
|
|
|
|
base_options["cookiefile"] = global_cookies
|
|
|
|
|
|
|
|
|
|
if opts.no_playlist:
|
|
|
|
|
base_options["noplaylist"] = True
|
|
|
|
|
|
|
|
|
|
if opts.mode == "audio":
|
|
|
|
|
base_options["format"] = opts.ytdl_format or "251/140/bestaudio"
|
|
|
|
|
base_options["postprocessors"] = [{"key": "FFmpegExtractAudio"}]
|
|
|
|
|
else:
|
|
|
|
|
base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best"
|
|
|
|
|
base_options["format_sort"] = ["res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"]
|
|
|
|
|
|
|
|
|
|
if opts.clip_sections:
|
|
|
|
|
sections = []
|
|
|
|
|
for section_range in opts.clip_sections.split(','):
|
|
|
|
|
try:
|
|
|
|
|
start_s, end_s = [int(x) for x in section_range.split('-')]
|
|
|
|
|
def _secs_to_hms(s: int) -> str:
|
|
|
|
|
minutes, seconds = divmod(s, 60)
|
|
|
|
|
hours, minutes = divmod(minutes, 60)
|
|
|
|
|
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
|
|
|
|
sections.append(f"*{_secs_to_hms(start_s)}-{_secs_to_hms(end_s)}")
|
|
|
|
|
except (ValueError, AttributeError):
|
|
|
|
|
pass
|
|
|
|
|
if sections:
|
|
|
|
|
base_options["download_sections"] = sections
|
|
|
|
|
debug(f"Download sections configured: {', '.join(sections)}")
|
|
|
|
|
|
|
|
|
|
if opts.playlist_items:
|
|
|
|
|
base_options["playlist_items"] = opts.playlist_items
|
|
|
|
|
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
debug(f"yt-dlp: mode={opts.mode}, format={base_options.get('format')}")
|
|
|
|
|
return base_options
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _iter_download_entries(info: Dict[str, Any]) -> Iterator[Dict[str, Any]]:
|
|
|
|
|
queue: List[Dict[str, Any]] = [info]
|
|
|
|
|
seen: set[int] = set()
|
|
|
|
|
while queue:
|
|
|
|
|
current = queue.pop(0)
|
|
|
|
|
obj_id = id(current)
|
|
|
|
|
if obj_id in seen:
|
|
|
|
|
continue
|
|
|
|
|
seen.add(obj_id)
|
|
|
|
|
entries = current.get("entries")
|
|
|
|
|
if isinstance(entries, list):
|
|
|
|
|
for entry in entries:
|
|
|
|
|
queue.append(entry)
|
|
|
|
|
if current.get("requested_downloads") or not entries:
|
|
|
|
|
yield current
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _candidate_paths(entry: Dict[str, Any], output_dir: Path) -> Iterator[Path]:
|
|
|
|
|
requested = entry.get("requested_downloads")
|
|
|
|
|
if isinstance(requested, list):
|
|
|
|
|
for item in requested:
|
|
|
|
|
if isinstance(item, dict):
|
|
|
|
|
fp = item.get("filepath") or item.get("_filename")
|
|
|
|
|
if fp:
|
|
|
|
|
yield Path(fp)
|
|
|
|
|
for key in ("filepath", "_filename", "filename"):
|
|
|
|
|
value = entry.get(key)
|
|
|
|
|
if value:
|
|
|
|
|
yield Path(value)
|
|
|
|
|
if entry.get("filename"):
|
|
|
|
|
yield output_dir / entry["filename"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _resolve_entry_and_path(info: Dict[str, Any], output_dir: Path) -> tuple[Dict[str, Any], Path]:
|
|
|
|
|
for entry in _iter_download_entries(info):
|
|
|
|
|
for candidate in _candidate_paths(entry, output_dir):
|
|
|
|
|
if candidate.is_file():
|
|
|
|
|
return entry, candidate
|
|
|
|
|
if not candidate.is_absolute():
|
|
|
|
|
maybe = output_dir / candidate
|
|
|
|
|
if maybe.is_file():
|
|
|
|
|
return entry, maybe
|
|
|
|
|
raise FileNotFoundError("yt-dlp did not report a downloaded media file")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
|
|
|
|
|
for payload in [info] + info.get("entries", []):
|
|
|
|
|
if not isinstance(payload, dict):
|
|
|
|
|
continue
|
|
|
|
|
hashes = payload.get("hashes")
|
|
|
|
|
if isinstance(hashes, dict):
|
|
|
|
|
for key in ("sha256", "sha-256", "sha_256"):
|
|
|
|
|
if key in hashes and isinstance(hashes[key], str) and hashes[key].strip():
|
|
|
|
|
return hashes[key].strip()
|
|
|
|
|
for key in ("sha256", "sha-256", "sha_256"):
|
|
|
|
|
value = payload.get(key)
|
|
|
|
|
if isinstance(value, str) and value.strip():
|
|
|
|
|
return value.strip()
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
|
|
|
|
|
try:
|
|
|
|
|
from urllib.parse import urlparse
|
|
|
|
|
import requests
|
|
|
|
|
parsed = urlparse(libgen_url)
|
|
|
|
|
if 'libgen' not in parsed.netloc.lower():
|
|
|
|
|
return None
|
|
|
|
|
if '/file.php' not in parsed.path.lower():
|
|
|
|
|
return None
|
|
|
|
|
session = requests.Session()
|
|
|
|
|
session.headers.update({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'})
|
|
|
|
|
debug(f"Following LibGen redirect chain for: {libgen_url}")
|
|
|
|
|
try:
|
|
|
|
|
response = session.get(libgen_url, timeout=10, allow_redirects=True)
|
|
|
|
|
final_url = response.url
|
|
|
|
|
try:
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
soup = BeautifulSoup(response.content, 'html.parser')
|
|
|
|
|
for link in soup.find_all('a'):
|
|
|
|
|
href = link.get('href')
|
|
|
|
|
if href and 'get.php' in href:
|
|
|
|
|
return urljoin(libgen_url, href)
|
|
|
|
|
except ImportError:
|
|
|
|
|
pass
|
|
|
|
|
if final_url != libgen_url:
|
|
|
|
|
debug(f"LibGen resolved to mirror: {final_url}")
|
|
|
|
|
return final_url
|
|
|
|
|
except requests.RequestException as e:
|
|
|
|
|
log(f"Error following LibGen redirects: {e}", file=sys.stderr)
|
|
|
|
|
try:
|
|
|
|
|
response = session.head(libgen_url, allow_redirects=True, timeout=10)
|
|
|
|
|
if response.url != libgen_url:
|
|
|
|
|
return response.url
|
|
|
|
|
except:
|
|
|
|
|
pass
|
|
|
|
|
return None
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Error resolving LibGen URL: {e}", file=sys.stderr)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _progress_callback(status: Dict[str, Any]) -> None:
|
|
|
|
|
"""Simple progress callback using logger."""
|
|
|
|
|
event = status.get("status")
|
|
|
|
|
if event == "downloading":
|
|
|
|
|
percent = status.get("_percent_str", "?")
|
|
|
|
|
speed = status.get("_speed_str", "?")
|
|
|
|
|
eta = status.get("_eta_str", "?")
|
|
|
|
|
sys.stdout.write(f"\r[download] {percent} at {speed} ETA {eta} ")
|
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
elif event == "finished":
|
|
|
|
|
sys.stdout.write("\r" + " " * 70 + "\r")
|
|
|
|
|
sys.stdout.flush()
|
|
|
|
|
debug(f"✓ Download finished: {status.get('filename')}")
|
|
|
|
|
elif event in ("postprocessing", "processing"):
|
|
|
|
|
debug(f"Post-processing: {status.get('postprocessor')}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _download_direct_file(
|
|
|
|
|
url: str,
|
|
|
|
|
output_dir: Path,
|
|
|
|
|
debug_logger: Optional[DebugLogger] = None,
|
|
|
|
|
quiet: bool = False,
|
|
|
|
|
) -> DownloadMediaResult:
|
|
|
|
|
"""Download a direct file (PDF, image, document, etc.) without yt-dlp."""
|
|
|
|
|
ensure_directory(output_dir)
|
|
|
|
|
|
|
|
|
|
from urllib.parse import unquote, urlparse, parse_qs
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
# Extract filename from URL
|
|
|
|
|
parsed_url = urlparse(url)
|
|
|
|
|
url_path = parsed_url.path
|
|
|
|
|
|
|
|
|
|
# Try to get filename from query parameters first (for LibGen and similar services)
|
|
|
|
|
# e.g., ?filename=Book+Title.pdf or &download=filename.pdf
|
|
|
|
|
filename = None
|
|
|
|
|
if parsed_url.query:
|
|
|
|
|
query_params = parse_qs(parsed_url.query)
|
|
|
|
|
for param_name in ('filename', 'download', 'file', 'name'):
|
|
|
|
|
if param_name in query_params and query_params[param_name]:
|
|
|
|
|
filename = query_params[param_name][0]
|
|
|
|
|
filename = unquote(filename)
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
# If not found in query params, extract from URL path
|
|
|
|
|
if not filename or not filename.strip():
|
|
|
|
|
filename = url_path.split("/")[-1] if url_path else ""
|
|
|
|
|
filename = unquote(filename)
|
|
|
|
|
|
|
|
|
|
# Remove query strings from filename if any
|
|
|
|
|
if "?" in filename:
|
|
|
|
|
filename = filename.split("?")[0]
|
|
|
|
|
|
|
|
|
|
# Try to get real filename from Content-Disposition header (HEAD request)
|
|
|
|
|
try:
|
|
|
|
|
with HTTPClient(timeout=10.0) as client:
|
|
|
|
|
response = client._request("HEAD", url, follow_redirects=True)
|
|
|
|
|
content_disposition = response.headers.get("content-disposition", "")
|
|
|
|
|
if content_disposition:
|
|
|
|
|
# Extract filename from Content-Disposition header
|
|
|
|
|
# Format: attachment; filename="filename.pdf" or filename=filename.pdf
|
|
|
|
|
match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition)
|
|
|
|
|
if match:
|
|
|
|
|
extracted_name = match.group(1) or match.group(2)
|
|
|
|
|
if extracted_name:
|
|
|
|
|
filename = unquote(extracted_name)
|
|
|
|
|
if not quiet:
|
|
|
|
|
debug(f"Filename from Content-Disposition: {filename}")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
if not quiet:
|
|
|
|
|
log(f"Could not get filename from headers: {e}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
# Fallback if we still don't have a good filename
|
|
|
|
|
if not filename or "." not in filename:
|
|
|
|
|
filename = "downloaded_file.bin"
|
|
|
|
|
|
|
|
|
|
file_path = output_dir / filename
|
|
|
|
|
progress_bar = ProgressBar()
|
|
|
|
|
|
|
|
|
|
if not quiet:
|
|
|
|
|
debug(f"Direct download: {filename}")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
start_time = time.time()
|
|
|
|
|
downloaded_bytes = [0]
|
|
|
|
|
total_bytes = [0]
|
|
|
|
|
last_progress_time = [start_time]
|
|
|
|
|
|
|
|
|
|
def progress_callback(bytes_downloaded: int, content_length: int) -> None:
|
|
|
|
|
downloaded_bytes[0] = bytes_downloaded
|
|
|
|
|
total_bytes[0] = content_length
|
|
|
|
|
|
|
|
|
|
now = time.time()
|
|
|
|
|
if now - last_progress_time[0] >= 0.5 and total_bytes[0] > 0:
|
|
|
|
|
elapsed = now - start_time
|
|
|
|
|
percent = (bytes_downloaded / content_length) * 100 if content_length > 0 else 0
|
|
|
|
|
speed = bytes_downloaded / elapsed if elapsed > 0 else 0
|
|
|
|
|
eta_seconds = (content_length - bytes_downloaded) / speed if speed > 0 else 0
|
|
|
|
|
|
|
|
|
|
speed_str = progress_bar.format_bytes(speed) + "/s"
|
|
|
|
|
minutes, seconds = divmod(int(eta_seconds), 60)
|
|
|
|
|
hours, minutes = divmod(minutes, 60)
|
|
|
|
|
eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
|
|
|
|
|
|
|
|
|
progress_line = progress_bar.format_progress(
|
|
|
|
|
percent_str=f"{percent:.1f}%",
|
|
|
|
|
downloaded=bytes_downloaded,
|
|
|
|
|
total=content_length,
|
|
|
|
|
speed_str=speed_str,
|
|
|
|
|
eta_str=eta_str,
|
|
|
|
|
)
|
|
|
|
|
if not quiet:
|
|
|
|
|
debug(progress_line)
|
|
|
|
|
last_progress_time[0] = now
|
|
|
|
|
|
|
|
|
|
with HTTPClient(timeout=30.0) as client:
|
|
|
|
|
client.download(url, str(file_path), progress_callback=progress_callback)
|
|
|
|
|
|
|
|
|
|
elapsed = time.time() - start_time
|
|
|
|
|
avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s"
|
|
|
|
|
if not quiet:
|
|
|
|
|
debug(f"✓ Downloaded in {elapsed:.1f}s at {avg_speed_str}")
|
|
|
|
|
|
|
|
|
|
# For direct file downloads, create minimal info dict without filename as title
|
|
|
|
|
# This prevents creating duplicate title: tags when filename gets auto-generated
|
|
|
|
|
# We'll add title back later only if we couldn't extract meaningful tags
|
|
|
|
|
info = {
|
|
|
|
|
"id": filename.rsplit(".", 1)[0],
|
|
|
|
|
"ext": filename.rsplit(".", 1)[1] if "." in filename else "bin",
|
|
|
|
|
"webpage_url": url,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
hash_value = None
|
|
|
|
|
try:
|
|
|
|
|
hash_value = sha256_file(file_path)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
tags = []
|
|
|
|
|
if extract_ytdlp_tags:
|
|
|
|
|
try:
|
|
|
|
|
tags = extract_ytdlp_tags(info)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Error extracting tags: {e}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
# Only use filename as a title tag if we couldn't extract any meaningful tags
|
|
|
|
|
# This prevents duplicate title: tags when the filename could be mistaken for metadata
|
|
|
|
|
if not any(t.startswith('title:') for t in tags):
|
|
|
|
|
# Re-extract tags with filename as title only if needed
|
|
|
|
|
info['title'] = filename
|
|
|
|
|
tags = []
|
|
|
|
|
if extract_ytdlp_tags:
|
|
|
|
|
try:
|
|
|
|
|
tags = extract_ytdlp_tags(info)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Error extracting tags with filename: {e}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"direct-file-downloaded",
|
|
|
|
|
{"url": url, "path": str(file_path), "hash": hash_value},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return DownloadMediaResult(
|
|
|
|
|
path=file_path,
|
|
|
|
|
info=info,
|
|
|
|
|
tags=tags,
|
|
|
|
|
source_url=url,
|
|
|
|
|
hash_value=hash_value,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
except (httpx.HTTPError, httpx.RequestError) as exc:
|
|
|
|
|
log(f"Download error: {exc}", file=sys.stderr)
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"exception",
|
|
|
|
|
{"phase": "direct-file", "url": url, "error": str(exc)},
|
|
|
|
|
)
|
|
|
|
|
raise DownloadError(f"Failed to download {url}: {exc}") from exc
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
log(f"Error downloading file: {exc}", file=sys.stderr)
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"exception",
|
|
|
|
|
{
|
|
|
|
|
"phase": "direct-file",
|
|
|
|
|
"url": url,
|
|
|
|
|
"error": str(exc),
|
|
|
|
|
"traceback": traceback.format_exc(),
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
raise DownloadError(f"Error downloading file: {exc}") from exc
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) -> Optional[Dict[str, Any]]:
|
|
|
|
|
"""Probe URL to extract metadata WITHOUT downloading.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
url: URL to probe
|
|
|
|
|
no_playlist: If True, ignore playlists and probe only the single video
|
|
|
|
|
timeout_seconds: Max seconds to wait for probe (default 15s)
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dict with keys: extractor, title, entries (if playlist), duration, etc.
|
|
|
|
|
Returns None if not supported by yt-dlp or on timeout.
|
|
|
|
|
"""
|
|
|
|
|
if not is_url_supported_by_ytdlp(url):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
# Wrap probe in timeout to prevent hanging on large playlists
|
|
|
|
|
import threading
|
|
|
|
|
from typing import cast
|
|
|
|
|
|
|
|
|
|
result_container: List[Optional[Any]] = [None, None] # [result, error]
|
|
|
|
|
|
|
|
|
|
def _do_probe() -> None:
|
|
|
|
|
try:
|
|
|
|
|
_ensure_yt_dlp_ready()
|
|
|
|
|
|
|
|
|
|
assert yt_dlp is not None
|
|
|
|
|
# Extract info without downloading
|
|
|
|
|
# Use extract_flat='in_playlist' to get full metadata for playlist items
|
|
|
|
|
ydl_opts = {
|
|
|
|
|
"quiet": True, # Suppress all output
|
|
|
|
|
"no_warnings": True,
|
|
|
|
|
"socket_timeout": 10,
|
|
|
|
|
"retries": 2, # Reduce retries for faster timeout
|
|
|
|
|
"skip_download": True, # Don't actually download
|
|
|
|
|
"extract_flat": "in_playlist", # Get playlist with metadata for each entry
|
|
|
|
|
"noprogress": True, # No progress bars
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Add cookies if available (lazy import to avoid circular dependency)
|
|
|
|
|
from hydrus_health_check import get_cookies_file_path # local import
|
|
|
|
|
|
|
|
|
|
global_cookies = get_cookies_file_path()
|
|
|
|
|
if global_cookies:
|
|
|
|
|
ydl_opts["cookiefile"] = global_cookies
|
|
|
|
|
|
|
|
|
|
# Add no_playlist option if specified
|
|
|
|
|
if no_playlist:
|
|
|
|
|
ydl_opts["noplaylist"] = True
|
|
|
|
|
|
|
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
|
|
|
|
|
info = ydl.extract_info(url, download=False)
|
|
|
|
|
|
|
|
|
|
if not isinstance(info, dict):
|
|
|
|
|
result_container[0] = None
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# Extract relevant fields
|
|
|
|
|
result_container[0] = {
|
|
|
|
|
"extractor": info.get("extractor", ""),
|
|
|
|
|
"title": info.get("title", ""),
|
|
|
|
|
"entries": info.get("entries", []), # Will be populated if playlist
|
|
|
|
|
"duration": info.get("duration"),
|
|
|
|
|
"uploader": info.get("uploader"),
|
|
|
|
|
"description": info.get("description"),
|
|
|
|
|
"url": url,
|
|
|
|
|
}
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
log(f"Probe error for {url}: {exc}")
|
|
|
|
|
result_container[1] = exc
|
|
|
|
|
|
|
|
|
|
thread = threading.Thread(target=_do_probe, daemon=False)
|
|
|
|
|
thread.start()
|
|
|
|
|
thread.join(timeout=timeout_seconds)
|
|
|
|
|
|
|
|
|
|
if thread.is_alive():
|
|
|
|
|
# Probe timed out - return None to fall back to direct download
|
|
|
|
|
debug(f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download")
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
if result_container[1] is not None:
|
|
|
|
|
# Probe error - return None to proceed anyway
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
return cast(Optional[Dict[str, Any]], result_container[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def download_media(
|
|
|
|
|
opts: DownloadOptions,
|
|
|
|
|
*,
|
|
|
|
|
debug_logger: Optional[DebugLogger] = None,
|
|
|
|
|
) -> DownloadMediaResult:
|
|
|
|
|
"""Download media from URL using yt-dlp or direct HTTP download.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
opts: DownloadOptions with url, mode, output_dir, etc.
|
|
|
|
|
debug_logger: Optional debug logger for troubleshooting
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
DownloadMediaResult with path, info, tags, hash
|
|
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
|
DownloadError: If download fails
|
|
|
|
|
"""
|
|
|
|
|
# Handle LibGen url specially
|
|
|
|
|
# file.php redirects to mirrors, get.php is direct from modern API
|
|
|
|
|
if 'libgen' in opts.url.lower():
|
|
|
|
|
if '/get.php' in opts.url.lower():
|
|
|
|
|
# Modern API get.php links are direct downloads from mirrors (not file redirects)
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
log(f"Detected LibGen get.php URL, downloading directly...")
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record("libgen-direct", {"url": opts.url})
|
|
|
|
|
return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet)
|
|
|
|
|
elif '/file.php' in opts.url.lower():
|
|
|
|
|
# Old-style file.php redirects to mirrors, we need to resolve
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
log(f"Detected LibGen file.php URL, resolving to actual mirror...")
|
|
|
|
|
actual_url = _get_libgen_download_url(opts.url)
|
|
|
|
|
if actual_url and actual_url != opts.url:
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
log(f"Resolved LibGen URL to mirror: {actual_url}")
|
|
|
|
|
opts.url = actual_url
|
|
|
|
|
# After resolution, this will typically be an onion link or direct file
|
|
|
|
|
# Skip yt-dlp for this (it won't support onion/mirrors), go direct
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record("libgen-resolved", {"original": opts.url, "resolved": actual_url})
|
|
|
|
|
return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet)
|
|
|
|
|
else:
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
log(f"Could not resolve LibGen URL, trying direct download anyway", file=sys.stderr)
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record("libgen-resolve-failed", {"url": opts.url})
|
|
|
|
|
return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet)
|
|
|
|
|
|
|
|
|
|
# Handle GoFile shares with a dedicated resolver before yt-dlp/direct fallbacks
|
|
|
|
|
try:
|
|
|
|
|
netloc = urlparse(opts.url).netloc.lower()
|
|
|
|
|
except Exception:
|
|
|
|
|
netloc = ""
|
|
|
|
|
if "gofile.io" in netloc:
|
|
|
|
|
msg = "GoFile links are currently unsupported"
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
debug(msg)
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record("gofile-unsupported", {"url": opts.url})
|
|
|
|
|
raise DownloadError(msg)
|
|
|
|
|
|
|
|
|
|
# Determine if yt-dlp should be used
|
|
|
|
|
ytdlp_supported = is_url_supported_by_ytdlp(opts.url)
|
|
|
|
|
if ytdlp_supported:
|
|
|
|
|
# Skip probe for playlists with item selection (probe can hang on large playlists)
|
|
|
|
|
# Just proceed straight to download which will handle item selection
|
|
|
|
|
if opts.playlist_items:
|
|
|
|
|
debug(f"Skipping probe for playlist (item selection: {opts.playlist_items}), proceeding with download")
|
|
|
|
|
probe_result = {"url": opts.url} # Minimal probe result
|
|
|
|
|
else:
|
|
|
|
|
probe_result = probe_url(opts.url, no_playlist=opts.no_playlist, timeout_seconds=15)
|
|
|
|
|
|
|
|
|
|
if probe_result is None:
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
log(f"URL supported by yt-dlp but no media detected, falling back to direct download: {opts.url}")
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record("ytdlp-skip-no-media", {"url": opts.url})
|
|
|
|
|
return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet)
|
|
|
|
|
else:
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
log(f"URL not supported by yt-dlp, trying direct download: {opts.url}")
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record("direct-file-attempt", {"url": opts.url})
|
|
|
|
|
return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet)
|
|
|
|
|
|
|
|
|
|
_ensure_yt_dlp_ready()
|
|
|
|
|
|
|
|
|
|
ytdl_options = _build_ytdlp_options(opts)
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
debug(f"Starting yt-dlp download: {opts.url}")
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record("ytdlp-start", {"url": opts.url})
|
|
|
|
|
|
|
|
|
|
assert yt_dlp is not None
|
|
|
|
|
try:
|
|
|
|
|
# Debug: show what options we're using
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
if ytdl_options.get("download_sections"):
|
|
|
|
|
debug(f"[yt-dlp] download_sections: {ytdl_options['download_sections']}")
|
|
|
|
|
debug(f"[yt-dlp] force_keyframes_at_cuts: {ytdl_options.get('force_keyframes_at_cuts', False)}")
|
|
|
|
|
|
|
|
|
|
# Use subprocess when download_sections are present (Python API doesn't support them properly)
|
|
|
|
|
session_id = None
|
|
|
|
|
first_section_info = {}
|
|
|
|
|
if ytdl_options.get("download_sections"):
|
|
|
|
|
session_id, first_section_info = _download_with_sections_via_cli(opts.url, ytdl_options, ytdl_options.get("download_sections", []), quiet=opts.quiet)
|
|
|
|
|
info = None
|
|
|
|
|
else:
|
|
|
|
|
with yt_dlp.YoutubeDL(ytdl_options) as ydl: # type: ignore[arg-type]
|
|
|
|
|
info = ydl.extract_info(opts.url, download=True)
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
log(f"yt-dlp failed: {exc}", file=sys.stderr)
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"exception",
|
|
|
|
|
{
|
|
|
|
|
"phase": "yt-dlp",
|
|
|
|
|
"error": str(exc),
|
|
|
|
|
"traceback": traceback.format_exc(),
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
raise DownloadError("yt-dlp download failed") from exc
|
|
|
|
|
|
|
|
|
|
# If we used subprocess, we need to find the file manually
|
|
|
|
|
if info is None:
|
|
|
|
|
# Find files created/modified during this download (after we started)
|
|
|
|
|
# Look for files matching the expected output template pattern
|
|
|
|
|
try:
|
|
|
|
|
import glob
|
|
|
|
|
import time
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
# Get the expected filename pattern from outtmpl
|
|
|
|
|
# For sections: "C:\path\{session_id}.section_1_of_3.ext", etc.
|
|
|
|
|
# For non-sections: "C:\path\title.ext"
|
|
|
|
|
|
|
|
|
|
# Wait a moment to ensure files are fully written
|
|
|
|
|
time.sleep(0.5)
|
|
|
|
|
|
|
|
|
|
# List all files in output_dir, sorted by modification time
|
|
|
|
|
files = sorted(opts.output_dir.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True)
|
|
|
|
|
if not files:
|
|
|
|
|
raise FileNotFoundError(f"No files found in {opts.output_dir}")
|
|
|
|
|
|
|
|
|
|
# If we downloaded sections, look for files with the session_id pattern
|
|
|
|
|
if opts.clip_sections and session_id:
|
|
|
|
|
# Pattern: "{session_id}_1.ext", "{session_id}_2.ext", etc.
|
|
|
|
|
section_pattern = re.compile(rf'^{re.escape(session_id)}_(\d+)\.')
|
|
|
|
|
matching_files = [f for f in files if section_pattern.search(f.name)]
|
|
|
|
|
|
|
|
|
|
if matching_files:
|
|
|
|
|
# Sort by section number to ensure correct order
|
|
|
|
|
def extract_section_num(path: Path) -> int:
|
|
|
|
|
match = section_pattern.search(path.name)
|
|
|
|
|
return int(match.group(1)) if match else 999
|
|
|
|
|
|
|
|
|
|
matching_files.sort(key=extract_section_num)
|
|
|
|
|
debug(f"Found {len(matching_files)} section file(s) matching pattern")
|
|
|
|
|
|
|
|
|
|
# Now rename section files to use hash-based names
|
|
|
|
|
# This ensures unique filenames for each section content
|
|
|
|
|
renamed_files = []
|
|
|
|
|
|
|
|
|
|
for idx, section_file in enumerate(matching_files, 1):
|
|
|
|
|
try:
|
|
|
|
|
# Calculate hash for the file
|
|
|
|
|
file_hash = sha256_file(section_file)
|
|
|
|
|
ext = section_file.suffix
|
|
|
|
|
new_name = f"{file_hash}{ext}"
|
|
|
|
|
new_path = opts.output_dir / new_name
|
|
|
|
|
|
|
|
|
|
if new_path.exists() and new_path != section_file:
|
|
|
|
|
# If file with same hash exists, use it and delete the temp one
|
|
|
|
|
debug(f"File with hash {file_hash} already exists, using existing file.")
|
|
|
|
|
try:
|
|
|
|
|
section_file.unlink()
|
|
|
|
|
except OSError:
|
|
|
|
|
pass
|
|
|
|
|
renamed_files.append(new_path)
|
|
|
|
|
else:
|
|
|
|
|
section_file.rename(new_path)
|
|
|
|
|
debug(f"Renamed section file: {section_file.name} → {new_name}")
|
|
|
|
|
renamed_files.append(new_path)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
debug(f"Failed to process section file {section_file.name}: {e}")
|
|
|
|
|
renamed_files.append(section_file)
|
|
|
|
|
|
|
|
|
|
media_path = renamed_files[0]
|
|
|
|
|
media_paths = renamed_files
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
debug(f"✓ Downloaded {len(media_paths)} section file(s) (session: {session_id})")
|
|
|
|
|
else:
|
|
|
|
|
# Fallback to most recent file if pattern not found
|
|
|
|
|
media_path = files[0]
|
|
|
|
|
media_paths = None
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
debug(f"✓ Downloaded section file (pattern not found): {media_path.name}")
|
|
|
|
|
else:
|
|
|
|
|
# No sections, just take the most recent file
|
|
|
|
|
media_path = files[0]
|
|
|
|
|
media_paths = None
|
|
|
|
|
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
debug(f"✓ Downloaded: {media_path.name}")
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record("ytdlp-file-found", {"path": str(media_path)})
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
log(f"Error finding downloaded file: {exc}", file=sys.stderr)
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"exception",
|
|
|
|
|
{"phase": "find-file", "error": str(exc)},
|
|
|
|
|
)
|
|
|
|
|
raise DownloadError(str(exc)) from exc
|
|
|
|
|
|
|
|
|
|
# Create result with minimal data extracted from filename
|
|
|
|
|
file_hash = sha256_file(media_path)
|
|
|
|
|
|
|
|
|
|
# For section downloads, create tags with the title and build proper info dict
|
|
|
|
|
tags = []
|
|
|
|
|
title = ''
|
|
|
|
|
if first_section_info:
|
|
|
|
|
title = first_section_info.get('title', '')
|
|
|
|
|
if title:
|
|
|
|
|
tags.append(f'title:{title}')
|
|
|
|
|
debug(f"Added title tag for section download: {title}")
|
|
|
|
|
|
|
|
|
|
# Build info dict - always use extracted title if available, not hash
|
|
|
|
|
if first_section_info:
|
|
|
|
|
info_dict = first_section_info
|
|
|
|
|
else:
|
|
|
|
|
info_dict = {
|
|
|
|
|
"id": media_path.stem,
|
|
|
|
|
"title": title or media_path.stem,
|
|
|
|
|
"ext": media_path.suffix.lstrip(".")
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return DownloadMediaResult(
|
|
|
|
|
path=media_path,
|
|
|
|
|
info=info_dict,
|
|
|
|
|
tags=tags,
|
|
|
|
|
source_url=opts.url,
|
|
|
|
|
hash_value=file_hash,
|
|
|
|
|
paths=media_paths, # Include all section files if present
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not isinstance(info, dict):
|
|
|
|
|
log(f"Unexpected yt-dlp response: {type(info)}", file=sys.stderr)
|
|
|
|
|
raise DownloadError("Unexpected yt-dlp response type")
|
|
|
|
|
|
|
|
|
|
info_dict: Dict[str, Any] = info
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"ytdlp-info",
|
|
|
|
|
{
|
|
|
|
|
"keys": sorted(info_dict.keys()),
|
|
|
|
|
"is_playlist": bool(info_dict.get("entries")),
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
entry, media_path = _resolve_entry_and_path(info_dict, opts.output_dir)
|
|
|
|
|
except FileNotFoundError as exc:
|
|
|
|
|
log(f"Error: {exc}", file=sys.stderr)
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"exception",
|
|
|
|
|
{"phase": "resolve-path", "error": str(exc)},
|
|
|
|
|
)
|
|
|
|
|
raise DownloadError(str(exc)) from exc
|
|
|
|
|
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"resolved-media",
|
|
|
|
|
{"path": str(media_path), "entry_keys": sorted(entry.keys())},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Extract hash from metadata or compute
|
|
|
|
|
hash_value = _extract_sha256(entry) or _extract_sha256(info_dict)
|
|
|
|
|
if not hash_value:
|
|
|
|
|
try:
|
|
|
|
|
hash_value = sha256_file(media_path)
|
|
|
|
|
except OSError as exc:
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"hash-error",
|
|
|
|
|
{"path": str(media_path), "error": str(exc)},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Extract tags using metadata.py
|
|
|
|
|
tags = []
|
|
|
|
|
if extract_ytdlp_tags:
|
|
|
|
|
try:
|
|
|
|
|
tags = extract_ytdlp_tags(entry)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Error extracting tags: {e}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
source_url = (
|
|
|
|
|
entry.get("webpage_url")
|
|
|
|
|
or entry.get("original_url")
|
|
|
|
|
or entry.get("url")
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
if not opts.quiet:
|
|
|
|
|
debug(f"✓ Downloaded: {media_path.name} ({len(tags)} tags)")
|
|
|
|
|
if debug_logger is not None:
|
|
|
|
|
debug_logger.write_record(
|
|
|
|
|
"downloaded",
|
|
|
|
|
{
|
|
|
|
|
"path": str(media_path),
|
|
|
|
|
"tag_count": len(tags),
|
|
|
|
|
"source_url": source_url,
|
|
|
|
|
"sha256": hash_value,
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return DownloadMediaResult(
|
|
|
|
|
path=media_path,
|
|
|
|
|
info=entry,
|
|
|
|
|
tags=tags,
|
|
|
|
|
source_url=source_url,
|
|
|
|
|
hash_value=hash_value,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Timeout handler to prevent yt-dlp hangs
|
|
|
|
|
def _download_with_timeout(opts: DownloadOptions, timeout_seconds: int = 300) -> Any:
|
|
|
|
|
"""Download with timeout protection.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
opts: DownloadOptions
|
|
|
|
|
timeout_seconds: Max seconds to wait (default 300s = 5 min)
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
DownloadMediaResult
|
|
|
|
|
|
|
|
|
|
Raises:
|
|
|
|
|
DownloadError: If timeout exceeded
|
|
|
|
|
"""
|
|
|
|
|
import threading
|
|
|
|
|
from typing import cast
|
|
|
|
|
|
|
|
|
|
result_container: List[Optional[Any]] = [None, None] # [result, error]
|
|
|
|
|
|
|
|
|
|
def _do_download() -> None:
|
|
|
|
|
try:
|
|
|
|
|
result_container[0] = download_media(opts)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
result_container[1] = e
|
|
|
|
|
|
|
|
|
|
thread = threading.Thread(target=_do_download, daemon=False)
|
|
|
|
|
thread.start()
|
|
|
|
|
thread.join(timeout=timeout_seconds)
|
|
|
|
|
|
|
|
|
|
if thread.is_alive():
|
|
|
|
|
# Thread still running - timeout
|
|
|
|
|
raise DownloadError(f"Download timeout after {timeout_seconds} seconds for {opts.url}")
|
|
|
|
|
|
|
|
|
|
if result_container[1] is not None:
|
|
|
|
|
raise cast(Exception, result_container[1])
|
|
|
|
|
|
|
|
|
|
if result_container[0] is None:
|
|
|
|
|
raise DownloadError(f"Download failed for {opts.url}")
|
|
|
|
|
|
|
|
|
|
return cast(Any, result_container[0])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Download_Media(Cmdlet):
|
|
|
|
|
"""Class-based download-media cmdlet - yt-dlp only, streaming sites."""
|
|
|
|
|
|
|
|
|
|
def __init__(self) -> None:
|
|
|
|
|
"""Initialize download-media cmdlet."""
|
|
|
|
|
super().__init__(
|
|
|
|
|
name="download-media",
|
|
|
|
|
summary="Download media from streaming sites (YouTube, Twitch, etc.)",
|
|
|
|
|
usage="download-media <url> [options] or search-file | download-media [options]",
|
|
|
|
|
alias=["dl-media", "download-ytdlp"],
|
|
|
|
|
arg=[
|
|
|
|
|
CmdletArg(name="url", type="string", required=False, description="URL to download (yt-dlp supported sites only)", variadic=True),
|
|
|
|
|
CmdletArg(name="-url", type="string", description="URL to download (alias for positional argument)", variadic=True),
|
|
|
|
|
CmdletArg(name="audio", type="flag", alias="a", description="Download audio only"),
|
|
|
|
|
CmdletArg(name="video", type="flag", alias="v", description="Download video (default)"),
|
|
|
|
|
CmdletArg(name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector"),
|
|
|
|
|
CmdletArg(name="clip", type="string", description="Extract time range: MM:SS-MM:SS"),
|
|
|
|
|
CmdletArg(name="section", type="string", description="Download sections: TIME_RANGE[,TIME_RANGE...]"),
|
|
|
|
|
CmdletArg(name="item", type="string", description="Item selection for playlists/formats"),
|
|
|
|
|
],
|
|
|
|
|
detail=["Download media from streaming sites using yt-dlp.", "For direct file downloads, use download-file."],
|
|
|
|
|
exec=self.run,
|
|
|
|
|
)
|
|
|
|
|
self.register()
|
|
|
|
|
|
|
|
|
|
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|
|
|
|
"""Main execution method."""
|
|
|
|
|
stage_ctx = pipeline_context.get_stage_context()
|
|
|
|
|
in_pipeline = stage_ctx is not None and getattr(stage_ctx, "total_stages", 1) > 1
|
|
|
|
|
if in_pipeline and isinstance(config, dict):
|
|
|
|
|
config["_quiet_background_output"] = True
|
|
|
|
|
return self._run_impl(result, args, config)
|
|
|
|
|
|
|
|
|
|
def _run_impl(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|
|
|
|
"""Main download implementation for yt-dlp-supported url."""
|
|
|
|
|
try:
|
|
|
|
|
debug("Starting download-media")
|
|
|
|
|
|
|
|
|
|
# Parse arguments
|
|
|
|
|
parsed = parse_cmdlet_args(args, self)
|
|
|
|
|
|
|
|
|
|
# Extract options
|
|
|
|
|
raw_url = parsed.get("url", [])
|
|
|
|
|
if isinstance(raw_url, str):
|
|
|
|
|
raw_url = [raw_url]
|
|
|
|
|
|
|
|
|
|
# If no url provided via args, try to extract from piped result
|
|
|
|
|
if not raw_url and result:
|
|
|
|
|
from ._shared import get_field
|
|
|
|
|
# Handle single result or list of results
|
|
|
|
|
results_to_check = result if isinstance(result, list) else [result]
|
|
|
|
|
for item in results_to_check:
|
|
|
|
|
# Try to get URL from various possible fields
|
|
|
|
|
url = get_field(item, "url") or get_field(item, "target")
|
|
|
|
|
if url:
|
|
|
|
|
raw_url.append(url)
|
|
|
|
|
|
|
|
|
|
# Filter to yt-dlp supported url only
|
|
|
|
|
supported_url = [
|
|
|
|
|
url for url in raw_url
|
|
|
|
|
if is_url_supported_by_ytdlp(url)
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
if not supported_url:
|
|
|
|
|
log("No yt-dlp-supported url to download", file=sys.stderr)
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
# Log unsupported url if any
|
|
|
|
|
unsupported = set(raw_url) - set(supported_url)
|
|
|
|
|
if unsupported:
|
|
|
|
|
debug(f"Skipping {len(unsupported)} unsupported url (use download-file for direct downloads)")
|
|
|
|
|
|
|
|
|
|
# Get output directory
|
|
|
|
|
final_output_dir = self._resolve_output_dir(parsed, config)
|
|
|
|
|
if not final_output_dir:
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
debug(f"Output directory: {final_output_dir}")
|
|
|
|
|
|
|
|
|
|
# Get other options
|
|
|
|
|
clip_spec = parsed.get("clip")
|
|
|
|
|
section_spec = parsed.get("section")
|
|
|
|
|
|
|
|
|
|
# Parse clip/section ranges if specified
|
|
|
|
|
clip_range = None
|
|
|
|
|
if clip_spec:
|
|
|
|
|
clip_range = self._parse_time_range(clip_spec)
|
|
|
|
|
if not clip_range:
|
|
|
|
|
log(f"Invalid clip format: {clip_spec}", file=sys.stderr)
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
section_ranges = None
|
|
|
|
|
if section_spec:
|
|
|
|
|
section_ranges = self._parse_section_ranges(section_spec)
|
|
|
|
|
if not section_ranges:
|
|
|
|
|
log(f"Invalid section format: {section_spec}", file=sys.stderr)
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
# Check if we need to show format selection
|
|
|
|
|
playlist_items = str(parsed.get("item")) if parsed.get("item") else None
|
|
|
|
|
ytdl_format = parsed.get("format")
|
|
|
|
|
|
|
|
|
|
# If no -item, no explicit -format specified, and single URL, check for multiple formats/playlist
|
|
|
|
|
if not playlist_items and not ytdl_format and len(supported_url) == 1:
|
|
|
|
|
url = supported_url[0]
|
|
|
|
|
formats = list_formats(url, no_playlist=False)
|
|
|
|
|
|
|
|
|
|
if formats and len(formats) > 1:
|
|
|
|
|
# Filter formats: multiple videos (640x+, one per resolution tier) + 1 best audio
|
|
|
|
|
video_formats = []
|
|
|
|
|
audio_formats = []
|
|
|
|
|
|
|
|
|
|
for fmt in formats:
|
|
|
|
|
width = fmt.get("width") or 0
|
|
|
|
|
height = fmt.get("height") or 0
|
|
|
|
|
vcodec = fmt.get("vcodec", "none")
|
|
|
|
|
acodec = fmt.get("acodec", "none")
|
|
|
|
|
|
|
|
|
|
# Classify as video or audio
|
|
|
|
|
if vcodec != "none" and acodec == "none" and width >= 640:
|
|
|
|
|
video_formats.append(fmt)
|
|
|
|
|
elif acodec != "none" and vcodec == "none":
|
|
|
|
|
audio_formats.append(fmt)
|
|
|
|
|
|
|
|
|
|
# Group videos by resolution and select best format per resolution
|
|
|
|
|
filtered_formats = []
|
|
|
|
|
if video_formats:
|
|
|
|
|
# Group by height (resolution tier)
|
|
|
|
|
from collections import defaultdict
|
|
|
|
|
by_resolution = defaultdict(list)
|
|
|
|
|
for f in video_formats:
|
|
|
|
|
height = f.get("height") or 0
|
|
|
|
|
by_resolution[height].append(f)
|
|
|
|
|
|
|
|
|
|
# For each resolution, prefer AV1, then highest bitrate
|
|
|
|
|
for height in sorted(by_resolution.keys(), reverse=True):
|
|
|
|
|
candidates = by_resolution[height]
|
|
|
|
|
av1_formats = [f for f in candidates if "av01" in f.get("vcodec", "")]
|
|
|
|
|
if av1_formats:
|
|
|
|
|
best = max(av1_formats, key=lambda f: f.get("tbr") or 0)
|
|
|
|
|
else:
|
|
|
|
|
best = max(candidates, key=lambda f: f.get("tbr") or 0)
|
|
|
|
|
filtered_formats.append(best)
|
|
|
|
|
|
|
|
|
|
# Select best audio: highest bitrate (any format)
|
|
|
|
|
if audio_formats:
|
|
|
|
|
best_audio = max(audio_formats, key=lambda f: f.get("tbr") or f.get("abr") or 0)
|
|
|
|
|
filtered_formats.append(best_audio)
|
|
|
|
|
|
|
|
|
|
if not filtered_formats:
|
|
|
|
|
# Fallback to all formats if filtering resulted in nothing
|
|
|
|
|
filtered_formats = formats
|
|
|
|
|
|
|
|
|
|
debug(f"Filtered to {len(filtered_formats)} formats from {len(formats)} total")
|
|
|
|
|
|
|
|
|
|
# Show format selection table
|
|
|
|
|
log(f"Available formats for {url}:", file=sys.stderr)
|
|
|
|
|
log("", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
# Build the base command that will be replayed with @N selection
|
|
|
|
|
# Include any additional args from the original command
|
|
|
|
|
base_cmd = f'download-media "{url}"'
|
|
|
|
|
# Preserve any additional pipeline stages if this is in a pipeline
|
|
|
|
|
remaining_args = [arg for arg in args if arg not in [url] and not arg.startswith('-')]
|
|
|
|
|
if remaining_args:
|
|
|
|
|
base_cmd += ' ' + ' '.join(remaining_args)
|
|
|
|
|
|
|
|
|
|
# Create result table for display
|
|
|
|
|
table = ResultTable()
|
|
|
|
|
table.title = f"Available formats for {url}"
|
|
|
|
|
table.set_source_command("download-media", [url])
|
|
|
|
|
|
|
|
|
|
# Collect results for table
|
|
|
|
|
results_list = []
|
|
|
|
|
|
|
|
|
|
# Emit format results for selection
|
|
|
|
|
for idx, fmt in enumerate(filtered_formats, 1):
|
|
|
|
|
resolution = fmt.get("resolution", "")
|
|
|
|
|
ext = fmt.get("ext", "")
|
|
|
|
|
vcodec = fmt.get("vcodec", "none")
|
|
|
|
|
acodec = fmt.get("acodec", "none")
|
|
|
|
|
filesize = fmt.get("filesize")
|
|
|
|
|
format_id = fmt.get("format_id", "")
|
|
|
|
|
|
|
|
|
|
# Format size
|
|
|
|
|
size_str = ""
|
|
|
|
|
if filesize:
|
|
|
|
|
size_mb = filesize / (1024 * 1024)
|
|
|
|
|
size_str = f"{size_mb:.1f}MB"
|
|
|
|
|
|
|
|
|
|
# Build format description
|
|
|
|
|
desc_parts = []
|
|
|
|
|
if resolution and resolution != "audio only":
|
|
|
|
|
desc_parts.append(resolution)
|
|
|
|
|
if ext:
|
|
|
|
|
desc_parts.append(ext.upper())
|
|
|
|
|
if vcodec != "none":
|
|
|
|
|
desc_parts.append(f"v:{vcodec}")
|
|
|
|
|
if acodec != "none":
|
|
|
|
|
desc_parts.append(f"a:{acodec}")
|
|
|
|
|
if size_str:
|
|
|
|
|
desc_parts.append(size_str)
|
|
|
|
|
|
|
|
|
|
format_desc = " | ".join(desc_parts)
|
|
|
|
|
|
|
|
|
|
# Build format dict for emission and table
|
|
|
|
|
format_dict = {
|
2025-12-11 19:04:02 -08:00
|
|
|
"table": "download-media",
|
2025-12-11 12:47:30 -08:00
|
|
|
"title": f"Format {format_id}",
|
|
|
|
|
"url": url,
|
|
|
|
|
"target": url,
|
|
|
|
|
"detail": format_desc,
|
|
|
|
|
"annotations": [ext, resolution] if resolution else [ext],
|
|
|
|
|
"media_kind": "format",
|
|
|
|
|
"cmd": base_cmd,
|
|
|
|
|
"columns": [
|
|
|
|
|
("#", str(idx)),
|
|
|
|
|
("ID", format_id),
|
|
|
|
|
("Resolution", resolution or "N/A"),
|
|
|
|
|
("Ext", ext),
|
|
|
|
|
("Video", vcodec),
|
|
|
|
|
("Audio", acodec),
|
|
|
|
|
("Size", size_str or "N/A"),
|
|
|
|
|
],
|
|
|
|
|
"full_metadata": {
|
|
|
|
|
"format_id": format_id,
|
|
|
|
|
"url": url,
|
|
|
|
|
"item_selector": format_id,
|
|
|
|
|
},
|
|
|
|
|
"_selection_args": ["-format", format_id]
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
# Add to results list and table (don't emit - formats should wait for @N selection)
|
|
|
|
|
results_list.append(format_dict)
|
|
|
|
|
table.add_result(format_dict)
|
|
|
|
|
|
|
|
|
|
# Render and display the table
|
|
|
|
|
# Table is displayed by pipeline runner via set_current_stage_table
|
|
|
|
|
|
|
|
|
|
# Set the result table so it displays and is available for @N selection
|
|
|
|
|
pipeline_context.set_current_stage_table(table)
|
|
|
|
|
pipeline_context.set_last_result_table(table, results_list)
|
|
|
|
|
|
|
|
|
|
log(f"", file=sys.stderr)
|
|
|
|
|
log(f"Use: @N | download-media to select and download format", file=sys.stderr)
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
# Download each URL
|
|
|
|
|
downloaded_count = 0
|
|
|
|
|
clip_sections_spec = self._build_clip_sections_spec(clip_range, section_ranges)
|
|
|
|
|
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
|
|
|
|
|
mode = "audio" if parsed.get("audio") else "video"
|
|
|
|
|
|
|
|
|
|
for url in supported_url:
|
|
|
|
|
try:
|
|
|
|
|
debug(f"Processing: {url}")
|
|
|
|
|
|
|
|
|
|
# If playlist_items is specified but looks like a format ID (e.g. from table selection),
|
|
|
|
|
# treat it as a format selector instead of playlist items.
|
|
|
|
|
# This handles the case where @N selection passes -item <format_id>
|
|
|
|
|
actual_format = ytdl_format
|
|
|
|
|
actual_playlist_items = playlist_items
|
|
|
|
|
|
|
|
|
|
if playlist_items and not ytdl_format:
|
|
|
|
|
# Heuristic: if it contains non-numeric chars (excluding ranges/commas)
|
|
|
|
|
# it is likely a format ID (e.g. '140-drc', 'best', '137+140')
|
|
|
|
|
import re
|
|
|
|
|
if re.search(r'[^0-9,-]', playlist_items):
|
|
|
|
|
actual_format = playlist_items
|
|
|
|
|
actual_playlist_items = None
|
|
|
|
|
|
|
|
|
|
opts = DownloadOptions(
|
|
|
|
|
url=url,
|
|
|
|
|
mode=mode,
|
|
|
|
|
output_dir=final_output_dir,
|
|
|
|
|
ytdl_format=actual_format,
|
|
|
|
|
clip_sections=clip_sections_spec,
|
|
|
|
|
playlist_items=actual_playlist_items,
|
|
|
|
|
quiet=quiet_mode,
|
|
|
|
|
no_playlist=False,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Use timeout wrapper to prevent hanging
|
|
|
|
|
debug(f"Starting download with 5-minute timeout...")
|
|
|
|
|
result_obj = _download_with_timeout(opts, timeout_seconds=300)
|
|
|
|
|
debug(f"Download completed, building pipe object...")
|
|
|
|
|
pipe_obj_dict = self._build_pipe_object(result_obj, url, opts)
|
|
|
|
|
debug(f"Emitting result to pipeline...")
|
|
|
|
|
pipeline_context.emit(pipe_obj_dict)
|
|
|
|
|
|
|
|
|
|
# Automatically register url with local library
|
|
|
|
|
if pipe_obj_dict.get("url"):
|
|
|
|
|
pipe_obj = coerce_to_pipe_object(pipe_obj_dict)
|
|
|
|
|
register_url_with_local_library(pipe_obj, config)
|
|
|
|
|
|
|
|
|
|
downloaded_count += 1
|
|
|
|
|
debug("✓ Downloaded and emitted")
|
|
|
|
|
|
|
|
|
|
except DownloadError as e:
|
|
|
|
|
log(f"Download failed for {url}: {e}", file=sys.stderr)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Error processing {url}: {e}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
if downloaded_count > 0:
|
|
|
|
|
debug(f"✓ Successfully processed {downloaded_count} URL(s)")
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
log("No downloads completed", file=sys.stderr)
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Error in download-media: {e}", file=sys.stderr)
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]:
|
|
|
|
|
"""Resolve the output directory from storage location or config."""
|
|
|
|
|
storage_location = parsed.get("storage")
|
|
|
|
|
|
|
|
|
|
# Priority 1: --storage flag
|
|
|
|
|
if storage_location:
|
|
|
|
|
try:
|
|
|
|
|
return SharedArgs.resolve_storage(storage_location)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Invalid storage location: {e}", file=sys.stderr)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
# Priority 2: Config outfile
|
|
|
|
|
if config and config.get("outfile"):
|
|
|
|
|
try:
|
|
|
|
|
return Path(config["outfile"]).expanduser()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# Priority 3: Default (home/Videos)
|
|
|
|
|
final_output_dir = Path.home() / "Videos"
|
|
|
|
|
debug(f"Using default directory: {final_output_dir}")
|
|
|
|
|
|
|
|
|
|
# Ensure directory exists
|
|
|
|
|
try:
|
|
|
|
|
final_output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
return final_output_dir
|
|
|
|
|
|
|
|
|
|
def _parse_time_range(self, spec: str) -> Optional[tuple]:
|
|
|
|
|
"""Parse 'MM:SS-MM:SS' format into (start_seconds, end_seconds)."""
|
|
|
|
|
try:
|
|
|
|
|
parts = spec.split("-")
|
|
|
|
|
if len(parts) != 2:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def to_seconds(ts: str) -> int:
|
|
|
|
|
ts = ts.strip()
|
|
|
|
|
if ":" in ts:
|
|
|
|
|
mm, ss = ts.split(":")
|
|
|
|
|
return int(mm) * 60 + int(ss)
|
|
|
|
|
return int(ts)
|
|
|
|
|
|
|
|
|
|
start = to_seconds(parts[0])
|
|
|
|
|
end = to_seconds(parts[1])
|
|
|
|
|
return (start, end) if start < end else None
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def _parse_section_ranges(self, spec: str) -> Optional[List[tuple]]:
|
|
|
|
|
"""Parse 'RANGE1,RANGE2,...' where each RANGE is 'MM:SS-MM:SS'."""
|
|
|
|
|
try:
|
|
|
|
|
ranges = []
|
|
|
|
|
for range_spec in spec.split(","):
|
|
|
|
|
r = self._parse_time_range(range_spec.strip())
|
|
|
|
|
if r is None:
|
|
|
|
|
return None
|
|
|
|
|
ranges.append(r)
|
|
|
|
|
return ranges if ranges else None
|
|
|
|
|
except Exception:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def _build_clip_sections_spec(
|
|
|
|
|
self,
|
|
|
|
|
clip_range: Optional[tuple],
|
|
|
|
|
section_ranges: Optional[List[tuple]],
|
|
|
|
|
) -> Optional[str]:
|
|
|
|
|
"""Convert parsed clip/section ranges into downloader spec (seconds)."""
|
|
|
|
|
ranges: List[str] = []
|
|
|
|
|
if clip_range:
|
|
|
|
|
ranges.append(f"{clip_range[0]}-{clip_range[1]}")
|
|
|
|
|
if section_ranges:
|
|
|
|
|
for start, end in section_ranges:
|
|
|
|
|
ranges.append(f"{start}-{end}")
|
|
|
|
|
return ",".join(ranges) if ranges else None
|
|
|
|
|
|
|
|
|
|
def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]:
|
|
|
|
|
"""Create a PipeObject-compatible dict from a DownloadMediaResult."""
|
|
|
|
|
info: Dict[str, Any] = download_result.info if isinstance(download_result.info, dict) else {}
|
|
|
|
|
media_path = Path(download_result.path)
|
|
|
|
|
hash_value = download_result.hash_value or self._compute_file_hash(media_path)
|
|
|
|
|
title = info.get("title") or media_path.stem
|
2025-12-11 23:21:45 -08:00
|
|
|
tag = list(download_result.tag or [])
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
# Add title tag for searchability
|
2025-12-11 23:21:45 -08:00
|
|
|
if title and f"title:{title}" not in tag:
|
|
|
|
|
tag.insert(0, f"title:{title}")
|
2025-12-11 12:47:30 -08:00
|
|
|
|
|
|
|
|
# Build a single canonical URL field; prefer yt-dlp provided webpage_url or info.url,
|
|
|
|
|
# but fall back to the original requested URL. If multiple unique urls are available,
|
|
|
|
|
# join them into a comma-separated string.
|
|
|
|
|
urls_to_consider: List[str] = []
|
|
|
|
|
try:
|
|
|
|
|
page_url = info.get("webpage_url") or info.get("url")
|
|
|
|
|
if page_url:
|
|
|
|
|
urls_to_consider.append(str(page_url))
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
if url:
|
|
|
|
|
urls_to_consider.append(str(url))
|
|
|
|
|
|
|
|
|
|
seen_urls: List[str] = []
|
|
|
|
|
for u in urls_to_consider:
|
|
|
|
|
if u and u not in seen_urls:
|
|
|
|
|
seen_urls.append(u)
|
|
|
|
|
final_url = ",".join(seen_urls) if seen_urls else None
|
|
|
|
|
|
|
|
|
|
# Construct canonical PipeObject dict: hash, store, path, url, title, tags
|
|
|
|
|
# Prefer explicit backend names (storage_name/storage_location). If none, default to PATH
|
|
|
|
|
# which indicates the file is available at a filesystem path and hasn't been added to a backend yet.
|
|
|
|
|
return {
|
|
|
|
|
"path": str(media_path),
|
|
|
|
|
"hash": hash_value,
|
|
|
|
|
"title": title,
|
|
|
|
|
"url": final_url,
|
2025-12-11 23:21:45 -08:00
|
|
|
"tag": tag,
|
2025-12-11 12:47:30 -08:00
|
|
|
"action": "cmdlet:download-media",
|
|
|
|
|
# download_mode removed (deprecated), keep media_kind
|
|
|
|
|
"store": getattr(opts, "storage_name", None) or getattr(opts, "storage_location", None) or "PATH",
|
|
|
|
|
"media_kind": "video" if opts.mode == "video" else "audio",
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
def _compute_file_hash(self, filepath: Path) -> str:
|
|
|
|
|
"""Compute SHA256 hash of a file."""
|
|
|
|
|
import hashlib
|
|
|
|
|
sha256_hash = hashlib.sha256()
|
|
|
|
|
with open(filepath, "rb") as f:
|
|
|
|
|
for byte_block in iter(lambda: f.read(4096), b""):
|
|
|
|
|
sha256_hash.update(byte_block)
|
|
|
|
|
return sha256_hash.hexdigest()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Module-level singleton registration
|
|
|
|
|
CMDLET = Download_Media()
|