This commit is contained in:
nose
2025-12-01 14:42:30 -08:00
parent 6b9ed7d4ab
commit 89aa24961b
8 changed files with 565 additions and 51 deletions

View File

@@ -29,7 +29,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
import uuid
from helper.logger import log, debug
from helper.download import download_media, probe_url
from helper.download import download_media, probe_url, is_url_supported_by_ytdlp
from helper.utils import sha256_file
from models import DownloadOptions
@@ -710,6 +710,97 @@ def _parse_time_range(clip_spec: str) -> Optional[Tuple[int, int]]:
return None
def _parse_section_ranges(section_spec: str) -> Optional[List[Tuple[int, int]]]:
"""Parse section ranges from comma-separated time ranges.
Args:
section_spec: Section ranges like "1:30-1:35,0:05-0:15" or "90-95,5-15"
May include quotes from CLI which will be stripped
Returns:
List of (start_seconds, end_seconds) tuples or None if invalid
"""
try:
# Strip quotes if present (from CLI parsing)
section_spec = section_spec.strip('"\'')
if not section_spec or ',' not in section_spec and '-' not in section_spec:
return None
ranges = []
# Handle both comma-separated ranges and single range
if ',' in section_spec:
section_parts = section_spec.split(',')
else:
section_parts = [section_spec]
for part in section_parts:
part = part.strip()
if not part:
continue
# Parse each range using the same logic as _parse_time_range
# Handle format like "1:30-1:35" or "90-95"
if '-' not in part:
return None
# Split carefully to handle cases like "1:30-1:35"
# We need to find the dash that separates start and end
# Look for pattern: something-something where first something may have colons
dash_pos = -1
colon_count = 0
for i, char in enumerate(part):
if char == ':':
colon_count += 1
elif char == '-':
# If we've seen a colon and this is a dash, check if it's the separator
# Could be "1:30-1:35" or just "90-95"
# The separator dash should come after the first number/time
if i > 0 and i < len(part) - 1:
dash_pos = i
break
if dash_pos == -1:
return None
start_str = part[:dash_pos]
end_str = part[dash_pos+1:]
# Parse start time
if ':' in start_str:
start_parts = start_str.split(':')
if len(start_parts) == 2:
start_sec = int(start_parts[0]) * 60 + int(start_parts[1])
elif len(start_parts) == 3:
start_sec = int(start_parts[0]) * 3600 + int(start_parts[1]) * 60 + int(start_parts[2])
else:
return None
else:
start_sec = int(start_str)
# Parse end time
if ':' in end_str:
end_parts = end_str.split(':')
if len(end_parts) == 2:
end_sec = int(end_parts[0]) * 60 + int(end_parts[1])
elif len(end_parts) == 3:
end_sec = int(end_parts[0]) * 3600 + int(end_parts[1]) * 60 + int(end_parts[2])
else:
return None
else:
end_sec = int(end_str)
if start_sec >= end_sec:
return None
ranges.append((start_sec, end_sec))
return ranges if ranges else None
except (ValueError, AttributeError, IndexError):
return None
MEDIA_EXTENSIONS = {'.mp3', '.m4a', '.mp4', '.mkv', '.webm', '.flac', '.wav', '.aac'}
@@ -1023,6 +1114,23 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
else:
log(f"Invalid clip format: {clip_spec}", file=sys.stderr)
return 1
# Section download (yt-dlp only)
section_spec = parsed.get("section")
section_ranges = None
if section_spec:
# Parse section spec like "1:30-1:35,0:05-0:15" into list of (start, end) tuples
section_ranges = _parse_section_ranges(section_spec)
if section_ranges:
debug(f"Section ranges: {section_spec} ({len(section_ranges)} sections)")
# When downloading sections, auto-select best format if not specified
# Since we're only getting portions, quality matters less than completeness
if not format_selector:
format_selector = "bestvideo+bestaudio/best"
debug(f"Auto-selecting format for sections: {format_selector}")
else:
log(f"Invalid section format: {section_spec}", file=sys.stderr)
return 1
cookies_path = parsed.get("cookies")
storage_location = parsed.get("storage")
@@ -2361,7 +2469,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
if (not current_format_selector and not list_formats_mode and
isinstance(url, str) and url.startswith(('http://', 'https://'))):
# Check if this is a yt-dlp supported URL (YouTube, Vimeo, etc.)
from helper.download import is_url_supported_by_ytdlp, list_formats
from helper.download import list_formats
from result_table import ResultTable
if is_url_supported_by_ytdlp(url):
@@ -2562,13 +2670,35 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
# Detect YouTube URLs and set no_playlist to download only the single video
is_youtube_url = isinstance(url, str) and ('youtube.com' in url or 'youtu.be' in url)
# Determine clip_sections to pass to yt-dlp
# Sections take precedence over clip if both are specified
# Sections are for yt-dlp download-sections (merge multiple clips at source)
# Clip is for post-download extraction
clip_sections_str = None
if section_ranges:
# Check if this is a yt-dlp URL
if is_url_supported_by_ytdlp(url):
# Convert section ranges to yt-dlp format: "start1-end1,start2-end2"
# Use * prefix to indicate download_sections (yt-dlp convention in some contexts)
# But here we just pass the string and let helper/download.py parse it
clip_sections_str = ",".join(f"{start}-{end}" for start, end in section_ranges)
debug(f"Using yt-dlp sections: {clip_sections_str}")
else:
log(f"Warning: -section only works with yt-dlp supported URLs. Use -clip for {url}", file=sys.stderr)
elif clip_range:
# For -clip, we use the same field but it's handled differently in helper/download.py
# Wait, helper/download.py treats clip_sections as download_sections for yt-dlp
# So -clip should also work as download_sections if it's a yt-dlp URL?
# Currently -clip is just one range.
clip_sections_str = f"{clip_range[0]}-{clip_range[1]}"
download_opts = DownloadOptions(
url=url,
mode=mode,
output_dir=final_output_dir,
cookies_path=final_cookies_path,
ytdl_format=current_format_selector, # Use per-URL format override if available
clip_sections=f"{clip_range[0]}-{clip_range[1]}" if clip_range else None,
clip_sections=clip_sections_str,
playlist_items=playlist_items,
no_playlist=is_youtube_url, # For YouTube, ignore playlist URLs and download single video
)
@@ -2584,8 +2714,36 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
file_path = result_data.path
if file_path.exists():
# Check if we have multiple section files to emit
if result_data.paths:
# Section download - emit each section file separately for merge-file
debug(f"📋 Section download: emitting {len(result_data.paths)} file(s) to merge-file")
for section_file in result_data.paths:
if section_file.exists():
file_hash = _compute_file_hash(section_file)
tags = result_data.tags if result_data.tags else []
pipe_obj = create_pipe_object_result(
source='download',
identifier=section_file.stem,
file_path=str(section_file),
cmdlet_name='download-data',
title=section_file.name,
file_hash=file_hash,
is_temp=False,
extra={
'url': url,
'tags': tags,
'audio_mode': audio_mode,
'format': format_selector,
'from_sections': True,
}
)
downloaded_files.append(section_file)
pipeline_context.emit(pipe_obj)
# Check if this was a playlist download (is_actual_playlist tracks if we have a multi-item playlist)
if is_actual_playlist:
elif is_actual_playlist:
if not selected_playlist_entries:
debug("⚠ Playlist metadata unavailable; cannot emit selected items for this stage.")
exit_code = 1
@@ -2788,6 +2946,11 @@ CMDLET = Cmdlet(
type="string",
description="Extract time range: MM:SS-MM:SS (e.g., 34:03-35:08) or seconds"
),
CmdletArg(
name="section",
type="string",
description="Download sections (yt-dlp only): TIME_RANGE[,TIME_RANGE...] (e.g., '1:30-1:35,0:05-0:15')"
),
CmdletArg(
name="cookies",
type="string",
@@ -2841,6 +3004,12 @@ CMDLET = Cmdlet(
" Format: MM:SS-MM:SS (e.g., 34:03-35:08)",
" Also accepts: 2043-2108 (seconds)",
"",
"SECTION DOWNLOAD (yt-dlp only):",
" -section RANGES Download specific time sections and merge them",
" Format: HH:MM:SS-HH:MM:SS[,HH:MM:SS-HH:MM:SS...]",
" Example: -section '1:30-1:35,0:05-0:15'",
" Each section is downloaded separately then merged in order",
"",
"PLAYLIST MODE:",
" Automatically detects playlists",
" Shows numbered list of tracks",
@@ -2866,6 +3035,9 @@ CMDLET = Cmdlet(
" # Extract specific clip from video",
" download-data https://vimeo.com/123456 -clip 1:30-2:45 -format best",
"",
" # Download multiple sections and merge them",
" download-data https://youtube.com/watch?v=xyz -section '1:30-1:35,0:05-0:15' | merge-file | add-file -storage local",
"",
" # Download specific tracks from playlist",
" download-data https://youtube.com/playlist?list=xyz -item '1,3,5-8'",
"",