df
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled
This commit is contained in:
270
SYS/download.py
270
SYS/download.py
@@ -6,6 +6,7 @@ Lean, focused downloader without event infrastructure overhead.
|
||||
- Tag extraction via metadata.extract_ytdlp_tags()
|
||||
- Logging via helper.logger.log()
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import glob # noqa: F401
|
||||
@@ -97,7 +98,9 @@ def is_url_supported_by_ytdlp(url: str) -> bool:
|
||||
return False
|
||||
|
||||
|
||||
def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]:
|
||||
def list_formats(
|
||||
url: str, no_playlist: bool = False, playlist_items: Optional[str] = None
|
||||
) -> Optional[List[Dict[str, Any]]]:
|
||||
"""Get list of available formats for a URL using yt-dlp."""
|
||||
_ensure_yt_dlp_ready()
|
||||
|
||||
@@ -125,19 +128,21 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
|
||||
|
||||
result_formats = []
|
||||
for fmt in formats:
|
||||
result_formats.append({
|
||||
"format_id": fmt.get("format_id", ""),
|
||||
"format": fmt.get("format", ""),
|
||||
"ext": fmt.get("ext", ""),
|
||||
"resolution": fmt.get("resolution", ""),
|
||||
"width": fmt.get("width"),
|
||||
"height": fmt.get("height"),
|
||||
"fps": fmt.get("fps"),
|
||||
"vcodec": fmt.get("vcodec", "none"),
|
||||
"acodec": fmt.get("acodec", "none"),
|
||||
"filesize": fmt.get("filesize"),
|
||||
"tbr": fmt.get("tbr"),
|
||||
})
|
||||
result_formats.append(
|
||||
{
|
||||
"format_id": fmt.get("format_id", ""),
|
||||
"format": fmt.get("format", ""),
|
||||
"ext": fmt.get("ext", ""),
|
||||
"resolution": fmt.get("resolution", ""),
|
||||
"width": fmt.get("width"),
|
||||
"height": fmt.get("height"),
|
||||
"fps": fmt.get("fps"),
|
||||
"vcodec": fmt.get("vcodec", "none"),
|
||||
"acodec": fmt.get("acodec", "none"),
|
||||
"filesize": fmt.get("filesize"),
|
||||
"tbr": fmt.get("tbr"),
|
||||
}
|
||||
)
|
||||
|
||||
debug(f"Found {len(result_formats)} available formats")
|
||||
return result_formats
|
||||
@@ -147,48 +152,50 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
|
||||
return None
|
||||
|
||||
|
||||
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False) -> tuple[Optional[str], Dict[str, Any]]:
|
||||
def _download_with_sections_via_cli(
|
||||
url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False
|
||||
) -> tuple[Optional[str], Dict[str, Any]]:
|
||||
"""Download each section separately so merge-file can combine them.
|
||||
|
||||
|
||||
yt-dlp with multiple --download-sections args merges them into one file.
|
||||
We need separate files for merge-file, so download each section individually.
|
||||
|
||||
|
||||
Uses hash-based filenames for sections (not title-based) to prevent yt-dlp from
|
||||
thinking sections are already downloaded. The title is extracted and stored in tags.
|
||||
|
||||
|
||||
Returns:
|
||||
(session_id, first_section_info_dict) - session_id for finding files, info dict for metadata extraction
|
||||
"""
|
||||
|
||||
|
||||
sections_list = ytdl_options.get("download_sections", [])
|
||||
if not sections_list:
|
||||
return "", {}
|
||||
|
||||
|
||||
# Generate a unique hash-based ID for this download session
|
||||
# This ensures different videos/downloads don't have filename collisions
|
||||
session_id = hashlib.md5(
|
||||
(url + str(time.time()) + ''.join(random.choices(string.ascii_letters, k=10))).encode()
|
||||
(url + str(time.time()) + "".join(random.choices(string.ascii_letters, k=10))).encode()
|
||||
).hexdigest()[:12]
|
||||
|
||||
|
||||
first_section_info = None
|
||||
title_from_first = None
|
||||
|
||||
|
||||
# Download each section separately with unique output template using session ID
|
||||
for section_idx, section in enumerate(sections_list, 1):
|
||||
# Build unique output template for this section using session-based filename
|
||||
# e.g., "{session_id}_{section_idx}.ext" - simple and unique per section
|
||||
base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s")
|
||||
output_dir_path = Path(base_outtmpl).parent
|
||||
|
||||
|
||||
# Use session_id + section index for temp filename
|
||||
# e.g., "/path/{session_id}_1.%(ext)s"
|
||||
filename_tmpl = f"{session_id}_{section_idx}"
|
||||
if base_outtmpl.endswith(".%(ext)s"):
|
||||
filename_tmpl += ".%(ext)s"
|
||||
|
||||
|
||||
# Use Path to handle separators correctly for the OS
|
||||
section_outtmpl = str(output_dir_path / filename_tmpl)
|
||||
|
||||
|
||||
# For the first section, extract metadata first (separate call)
|
||||
if section_idx == 1:
|
||||
metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"]
|
||||
@@ -198,14 +205,14 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
|
||||
if ytdl_options.get("noplaylist"):
|
||||
metadata_cmd.append("--no-playlist")
|
||||
metadata_cmd.append(url)
|
||||
|
||||
|
||||
try:
|
||||
meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True)
|
||||
if meta_result.returncode == 0 and meta_result.stdout:
|
||||
try:
|
||||
info_dict = json.loads(meta_result.stdout.strip())
|
||||
first_section_info = info_dict
|
||||
title_from_first = info_dict.get('title')
|
||||
title_from_first = info_dict.get("title")
|
||||
if not quiet:
|
||||
debug(f"Extracted title from metadata: {title_from_first}")
|
||||
except json.JSONDecodeError:
|
||||
@@ -214,50 +221,54 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
|
||||
except Exception as e:
|
||||
if not quiet:
|
||||
debug(f"Error extracting metadata: {e}")
|
||||
|
||||
|
||||
# Build yt-dlp command for downloading this section
|
||||
cmd = ["yt-dlp"]
|
||||
|
||||
|
||||
# Add format
|
||||
if ytdl_options.get("format"):
|
||||
cmd.extend(["-f", ytdl_options["format"]])
|
||||
|
||||
|
||||
# Add ONLY this section (not all sections)
|
||||
cmd.extend(["--download-sections", section])
|
||||
|
||||
|
||||
# Add force-keyframes-at-cuts if specified
|
||||
if ytdl_options.get("force_keyframes_at_cuts"):
|
||||
cmd.append("--force-keyframes-at-cuts")
|
||||
|
||||
|
||||
# Add output template for this section
|
||||
cmd.extend(["-o", section_outtmpl])
|
||||
|
||||
|
||||
# Add cookies file if present
|
||||
if ytdl_options.get("cookiefile"):
|
||||
# Convert backslashes to forward slashes for better compatibility
|
||||
cookies_path = ytdl_options["cookiefile"].replace("\\", "/")
|
||||
cmd.extend(["--cookies", cookies_path])
|
||||
|
||||
|
||||
# Add no-playlist if specified
|
||||
if ytdl_options.get("noplaylist"):
|
||||
cmd.append("--no-playlist")
|
||||
|
||||
|
||||
# Add the URL
|
||||
cmd.append(url)
|
||||
|
||||
|
||||
if not quiet:
|
||||
debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}")
|
||||
debug(f"Command: {' '.join(cmd)}")
|
||||
|
||||
|
||||
# Run the subprocess - don't capture output so progress is shown
|
||||
try:
|
||||
result = subprocess.run(cmd)
|
||||
|
||||
|
||||
if result.returncode != 0:
|
||||
raise DownloadError(f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}")
|
||||
raise DownloadError(
|
||||
f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}"
|
||||
)
|
||||
except Exception as exc:
|
||||
raise DownloadError(f"yt-dlp subprocess error for section {section_idx}: {exc}") from exc
|
||||
|
||||
raise DownloadError(
|
||||
f"yt-dlp subprocess error for section {section_idx}: {exc}"
|
||||
) from exc
|
||||
|
||||
return session_id, first_section_info or {}
|
||||
|
||||
|
||||
@@ -299,33 +310,39 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]:
|
||||
else: # video
|
||||
base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best"
|
||||
base_options["format_sort"] = [
|
||||
"res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"
|
||||
"res:4320",
|
||||
"res:2880",
|
||||
"res:2160",
|
||||
"res:1440",
|
||||
"res:1080",
|
||||
"res:720",
|
||||
"res",
|
||||
]
|
||||
|
||||
# Add clip sections if provided (yt-dlp will download only these sections)
|
||||
if opts.clip_sections:
|
||||
# Parse section ranges like "48-65,120-152,196-205" (seconds)
|
||||
# Parse section ranges like "48-65,120-152,196-205" (seconds)
|
||||
# and convert to yt-dlp format: "*HH:MM:SS-HH:MM:SS,*HH:MM:SS-HH:MM:SS"
|
||||
sections = []
|
||||
for section_range in opts.clip_sections.split(','):
|
||||
for section_range in opts.clip_sections.split(","):
|
||||
try:
|
||||
start_str, end_str = section_range.strip().split('-')
|
||||
start_str, end_str = section_range.strip().split("-")
|
||||
start_sec = float(start_str)
|
||||
end_sec = float(end_str)
|
||||
|
||||
|
||||
# Convert seconds to HH:MM:SS format
|
||||
def sec_to_hhmmss(seconds):
|
||||
hours = int(seconds // 3600)
|
||||
minutes = int((seconds % 3600) // 60)
|
||||
secs = int(seconds % 60)
|
||||
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
|
||||
|
||||
|
||||
start_time = sec_to_hhmmss(start_sec)
|
||||
end_time = sec_to_hhmmss(end_sec)
|
||||
sections.append(f"*{start_time}-{end_time}")
|
||||
except (ValueError, AttributeError):
|
||||
pass
|
||||
|
||||
|
||||
if sections:
|
||||
# Pass each section as a separate element in the list (yt-dlp expects multiple --download-sections args)
|
||||
base_options["download_sections"] = sections
|
||||
@@ -412,41 +429,41 @@ def _extract_sha256(info: Dict[str, Any]) -> Optional[str]:
|
||||
|
||||
def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
|
||||
"""Extract the actual download link from LibGen redirect URL.
|
||||
|
||||
|
||||
LibGen url like https://libgen.gl/file.php?id=123456 redirect to
|
||||
actual mirror url. This follows the redirect chain to get the real file.
|
||||
|
||||
|
||||
Args:
|
||||
libgen_url: LibGen file.php URL
|
||||
|
||||
|
||||
Returns:
|
||||
Actual download URL or None if extraction fails
|
||||
"""
|
||||
try:
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
# Check if this is a LibGen URL
|
||||
parsed = urlparse(libgen_url)
|
||||
if 'libgen' not in parsed.netloc.lower():
|
||||
if "libgen" not in parsed.netloc.lower():
|
||||
return None
|
||||
|
||||
if '/file.php' not in parsed.path.lower():
|
||||
|
||||
if "/file.php" not in parsed.path.lower():
|
||||
return None
|
||||
|
||||
|
||||
# LibGen redirects to actual mirrors, follow redirects to get final URL
|
||||
session = requests.Session()
|
||||
session.headers.update({
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
|
||||
})
|
||||
|
||||
session.headers.update(
|
||||
{"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
|
||||
)
|
||||
|
||||
debug(f"Following LibGen redirect chain for: {libgen_url}")
|
||||
|
||||
|
||||
# First, get the page and look for direct download link
|
||||
try:
|
||||
response = session.get(libgen_url, timeout=10, allow_redirects=True)
|
||||
final_url = response.url
|
||||
|
||||
|
||||
# Try to find actual download link in the page
|
||||
try:
|
||||
try:
|
||||
@@ -462,8 +479,12 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
|
||||
continue
|
||||
|
||||
href_lower = href.lower()
|
||||
if "get.php" in href_lower or href_lower.endswith((".pdf", ".epub", ".djvu", ".mobi")):
|
||||
download_url = href if href.startswith("http") else urljoin(final_url, href)
|
||||
if "get.php" in href_lower or href_lower.endswith(
|
||||
(".pdf", ".epub", ".djvu", ".mobi")
|
||||
):
|
||||
download_url = (
|
||||
href if href.startswith("http") else urljoin(final_url, href)
|
||||
)
|
||||
debug(f"Found download link: {download_url}")
|
||||
return download_url
|
||||
else:
|
||||
@@ -477,19 +498,23 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
|
||||
if not href or href.lower().startswith("javascript:"):
|
||||
continue
|
||||
href_lower = href.lower()
|
||||
if "get.php" in href_lower or href_lower.endswith((".pdf", ".epub", ".djvu", ".mobi")):
|
||||
download_url = href if href.startswith("http") else urljoin(final_url, href)
|
||||
if "get.php" in href_lower or href_lower.endswith(
|
||||
(".pdf", ".epub", ".djvu", ".mobi")
|
||||
):
|
||||
download_url = (
|
||||
href if href.startswith("http") else urljoin(final_url, href)
|
||||
)
|
||||
debug(f"Found download link: {download_url}")
|
||||
return download_url
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
# If we followed redirects successfully, return the final URL
|
||||
# This handles cases where libgen redirects to a direct download mirror
|
||||
if final_url != libgen_url:
|
||||
debug(f"LibGen resolved to mirror: {final_url}")
|
||||
return final_url
|
||||
|
||||
|
||||
except requests.RequestException as e:
|
||||
log(f"Error following LibGen redirects: {e}", file=sys.stderr)
|
||||
# Try head request as fallback
|
||||
@@ -500,9 +525,9 @@ def _get_libgen_download_url(libgen_url: str) -> Optional[str]:
|
||||
return response.url
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
return None
|
||||
|
||||
|
||||
except Exception as e:
|
||||
log(f"Error resolving LibGen URL: {e}", file=sys.stderr)
|
||||
return None
|
||||
@@ -521,7 +546,7 @@ def _download_direct_file(
|
||||
|
||||
from urllib.parse import unquote, urlparse, parse_qs
|
||||
import re
|
||||
|
||||
|
||||
def _sanitize_filename(name: str) -> str:
|
||||
# Windows-safe filename sanitization.
|
||||
# Keep it simple: strip path parts, drop invalid chars, collapse whitespace.
|
||||
@@ -563,27 +588,27 @@ def _download_direct_file(
|
||||
# Extract filename from URL
|
||||
parsed_url = urlparse(url)
|
||||
url_path = parsed_url.path
|
||||
|
||||
|
||||
# Try to get filename from query parameters first (for LibGen and similar services)
|
||||
# e.g., ?filename=Book+Title.pdf or &download=filename.pdf
|
||||
filename = None
|
||||
if parsed_url.query:
|
||||
query_params = parse_qs(parsed_url.query)
|
||||
for param_name in ('filename', 'download', 'file', 'name'):
|
||||
for param_name in ("filename", "download", "file", "name"):
|
||||
if param_name in query_params and query_params[param_name]:
|
||||
filename = query_params[param_name][0]
|
||||
filename = unquote(filename)
|
||||
break
|
||||
|
||||
|
||||
# If not found in query params, extract from URL path
|
||||
if not filename or not filename.strip():
|
||||
filename = url_path.split("/")[-1] if url_path else ""
|
||||
filename = unquote(filename)
|
||||
|
||||
|
||||
# Remove query strings from filename if any
|
||||
if "?" in filename:
|
||||
filename = filename.split("?")[0]
|
||||
|
||||
|
||||
# Try to get real filename from Content-Disposition header (HEAD request)
|
||||
content_type = ""
|
||||
try:
|
||||
@@ -627,7 +652,12 @@ def _download_direct_file(
|
||||
with HTTPClient(timeout=10.0) as client:
|
||||
with client._request_stream("GET", url, follow_redirects=True) as resp:
|
||||
resp.raise_for_status()
|
||||
ct = str(resp.headers.get("content-type", "") or "").split(";", 1)[0].strip().lower()
|
||||
ct = (
|
||||
str(resp.headers.get("content-type", "") or "")
|
||||
.split(";", 1)[0]
|
||||
.strip()
|
||||
.lower()
|
||||
)
|
||||
if ct.startswith("text/html"):
|
||||
raise DownloadError("URL appears to be an HTML page, not a direct file")
|
||||
except DownloadError:
|
||||
@@ -635,7 +665,7 @@ def _download_direct_file(
|
||||
except Exception:
|
||||
# If we can't probe, keep going; later logic may still infer a safe extension.
|
||||
pass
|
||||
|
||||
|
||||
# Apply suggested filename (from provider title) if given.
|
||||
suggested = _sanitize_filename(suggested_filename) if suggested_filename else ""
|
||||
if suggested:
|
||||
@@ -683,7 +713,9 @@ def _download_direct_file(
|
||||
|
||||
# Final guardrail: if filename is empty, refuse rather than inventing `download.bin`.
|
||||
if not filename or not str(filename).strip():
|
||||
raise DownloadError("Could not determine filename for URL (no Content-Disposition and no path filename)")
|
||||
raise DownloadError(
|
||||
"Could not determine filename for URL (no Content-Disposition and no path filename)"
|
||||
)
|
||||
|
||||
file_path = _unique_path(output_dir / filename)
|
||||
|
||||
@@ -719,12 +751,18 @@ def _download_direct_file(
|
||||
if transfer_started[0]:
|
||||
return
|
||||
try:
|
||||
total_val: Optional[int] = int(content_length) if isinstance(content_length, int) and content_length > 0 else None
|
||||
total_val: Optional[int] = (
|
||||
int(content_length)
|
||||
if isinstance(content_length, int) and content_length > 0
|
||||
else None
|
||||
)
|
||||
except Exception:
|
||||
total_val = None
|
||||
try:
|
||||
if hasattr(pipeline_progress, "begin_transfer"):
|
||||
pipeline_progress.begin_transfer(label=str(filename or "download"), total=total_val)
|
||||
pipeline_progress.begin_transfer(
|
||||
label=str(filename or "download"), total=total_val
|
||||
)
|
||||
transfer_started[0] = True
|
||||
except Exception:
|
||||
return
|
||||
@@ -737,7 +775,11 @@ def _download_direct_file(
|
||||
try:
|
||||
if pipeline_progress is not None and hasattr(pipeline_progress, "update_transfer"):
|
||||
_maybe_begin_transfer(content_length)
|
||||
total_val: Optional[int] = int(content_length) if isinstance(content_length, int) and content_length > 0 else None
|
||||
total_val: Optional[int] = (
|
||||
int(content_length)
|
||||
if isinstance(content_length, int) and content_length > 0
|
||||
else None
|
||||
)
|
||||
pipeline_progress.update_transfer(
|
||||
label=str(filename or "download"),
|
||||
completed=int(bytes_downloaded) if bytes_downloaded is not None else None,
|
||||
@@ -790,14 +832,21 @@ def _download_direct_file(
|
||||
pass
|
||||
|
||||
try:
|
||||
if pipeline_progress is not None and transfer_started[0] and hasattr(pipeline_progress, "finish_transfer"):
|
||||
if (
|
||||
pipeline_progress is not None
|
||||
and transfer_started[0]
|
||||
and hasattr(pipeline_progress, "finish_transfer")
|
||||
):
|
||||
pipeline_progress.finish_transfer(label=str(filename or "download"))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
if progress_bar is not None:
|
||||
avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s"
|
||||
avg_speed_str = (
|
||||
progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0)
|
||||
+ "/s"
|
||||
)
|
||||
else:
|
||||
avg_speed_str = f"{(downloaded_bytes[0] / elapsed if elapsed > 0 else 0):.1f} B/s"
|
||||
except Exception:
|
||||
@@ -835,9 +884,9 @@ def _download_direct_file(
|
||||
|
||||
# Only use filename as a title tag if we couldn't extract any meaningful tags
|
||||
# This prevents duplicate title: tags when the filename could be mistaken for metadata
|
||||
if not any(t.startswith('title:') for t in tags):
|
||||
if not any(t.startswith("title:") for t in tags):
|
||||
# Re-extract tags with filename as title only if needed
|
||||
info['title'] = filename
|
||||
info["title"] = filename
|
||||
tags = []
|
||||
if extract_ytdlp_tags:
|
||||
try:
|
||||
@@ -866,7 +915,11 @@ def _download_direct_file(
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if pipeline_progress is not None and transfer_started[0] and hasattr(pipeline_progress, "finish_transfer"):
|
||||
if (
|
||||
pipeline_progress is not None
|
||||
and transfer_started[0]
|
||||
and hasattr(pipeline_progress, "finish_transfer")
|
||||
):
|
||||
pipeline_progress.finish_transfer(label=str(filename or "download"))
|
||||
except Exception:
|
||||
pass
|
||||
@@ -884,7 +937,11 @@ def _download_direct_file(
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if pipeline_progress is not None and transfer_started[0] and hasattr(pipeline_progress, "finish_transfer"):
|
||||
if (
|
||||
pipeline_progress is not None
|
||||
and transfer_started[0]
|
||||
and hasattr(pipeline_progress, "finish_transfer")
|
||||
):
|
||||
pipeline_progress.finish_transfer(label=str(filename or "download"))
|
||||
except Exception:
|
||||
pass
|
||||
@@ -902,31 +959,33 @@ def _download_direct_file(
|
||||
raise DownloadError(f"Error downloading file: {exc}") from exc
|
||||
|
||||
|
||||
def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) -> Optional[Dict[str, Any]]:
|
||||
def probe_url(
|
||||
url: str, no_playlist: bool = False, timeout_seconds: int = 15
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
"""Probe URL to extract metadata WITHOUT downloading.
|
||||
|
||||
|
||||
Args:
|
||||
url: URL to probe
|
||||
no_playlist: If True, ignore playlists and probe only the single video
|
||||
timeout_seconds: Max seconds to wait for probe (default 15s)
|
||||
|
||||
|
||||
Returns:
|
||||
Dict with keys: extractor, title, entries (if playlist), duration, etc.
|
||||
Returns None if not supported by yt-dlp or on timeout.
|
||||
"""
|
||||
if not is_url_supported_by_ytdlp(url):
|
||||
return None
|
||||
|
||||
|
||||
# Wrap probe in timeout to prevent hanging on large playlists
|
||||
import threading
|
||||
from typing import cast
|
||||
|
||||
|
||||
result_container: List[Optional[Any]] = [None, None] # [result, error]
|
||||
|
||||
|
||||
def _do_probe() -> None:
|
||||
try:
|
||||
_ensure_yt_dlp_ready()
|
||||
|
||||
|
||||
assert yt_dlp is not None
|
||||
# Extract info without downloading
|
||||
# Use extract_flat='in_playlist' to get full metadata for playlist items
|
||||
@@ -939,20 +998,20 @@ def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) ->
|
||||
"extract_flat": "in_playlist", # Get playlist with metadata for each entry
|
||||
"noprogress": True, # No progress bars
|
||||
}
|
||||
|
||||
|
||||
# Cookies are optional for probing; callers should pass cookiefile via DownloadOptions when needed.
|
||||
|
||||
|
||||
# Add no_playlist option if specified
|
||||
if no_playlist:
|
||||
ydl_opts["noplaylist"] = True
|
||||
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
|
||||
info = ydl.extract_info(url, download=False)
|
||||
|
||||
|
||||
if not isinstance(info, dict):
|
||||
result_container[0] = None
|
||||
return
|
||||
|
||||
|
||||
# Extract relevant fields
|
||||
result_container[0] = {
|
||||
"extractor": info.get("extractor", ""),
|
||||
@@ -966,20 +1025,20 @@ def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15) ->
|
||||
except Exception as exc:
|
||||
log(f"Probe error for {url}: {exc}")
|
||||
result_container[1] = exc
|
||||
|
||||
|
||||
thread = threading.Thread(target=_do_probe, daemon=False)
|
||||
thread.start()
|
||||
thread.join(timeout=timeout_seconds)
|
||||
|
||||
|
||||
if thread.is_alive():
|
||||
# Probe timed out - return None to fall back to direct download
|
||||
debug(f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download")
|
||||
return None
|
||||
|
||||
|
||||
if result_container[1] is not None:
|
||||
# Probe error - return None to proceed anyway
|
||||
return None
|
||||
|
||||
|
||||
return cast(Optional[Dict[str, Any]], result_container[0])
|
||||
|
||||
|
||||
@@ -991,4 +1050,3 @@ __all__ = [
|
||||
"DownloadOptions",
|
||||
"DownloadMediaResult",
|
||||
]
|
||||
|
||||
|
||||
Reference in New Issue
Block a user