hh

2025-12-01 14:42:30 -08:00
parent 6b9ed7d4ab
commit 89aa24961b
8 changed files with 565 additions and 51 deletions
--- a/cmdlets/download_data.py
+++ b/cmdlets/download_data.py
@@ -29,7 +29,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
 import uuid

 from helper.logger import log, debug
-from helper.download import download_media, probe_url
+from helper.download import download_media, probe_url, is_url_supported_by_ytdlp
 from helper.utils import sha256_file
 from models import DownloadOptions

@@ -710,6 +710,97 @@ def _parse_time_range(clip_spec: str) -> Optional[Tuple[int, int]]:
        return None


+def _parse_section_ranges(section_spec: str) -> Optional[List[Tuple[int, int]]]:
+    """Parse section ranges from comma-separated time ranges.
+    
+    Args:
+        section_spec: Section ranges like "1:30-1:35,0:05-0:15" or "90-95,5-15"
+                     May include quotes from CLI which will be stripped
+    
+    Returns:
+        List of (start_seconds, end_seconds) tuples or None if invalid
+    """
+    try:
+        # Strip quotes if present (from CLI parsing)
+        section_spec = section_spec.strip('"\'')
+        
+        if not section_spec or ',' not in section_spec and '-' not in section_spec:
+            return None
+        
+        ranges = []
+        # Handle both comma-separated ranges and single range
+        if ',' in section_spec:
+            section_parts = section_spec.split(',')
+        else:
+            section_parts = [section_spec]
+        
+        for part in section_parts:
+            part = part.strip()
+            if not part:
+                continue
+            
+            # Parse each range using the same logic as _parse_time_range
+            # Handle format like "1:30-1:35" or "90-95"
+            if '-' not in part:
+                return None
+            
+            # Split carefully to handle cases like "1:30-1:35"
+            # We need to find the dash that separates start and end
+            # Look for pattern: something-something where first something may have colons
+            dash_pos = -1
+            colon_count = 0
+            for i, char in enumerate(part):
+                if char == ':':
+                    colon_count += 1
+                elif char == '-':
+                    # If we've seen a colon and this is a dash, check if it's the separator
+                    # Could be "1:30-1:35" or just "90-95"
+                    # The separator dash should come after the first number/time
+                    if i > 0 and i < len(part) - 1:
+                        dash_pos = i
+                        break
+            
+            if dash_pos == -1:
+                return None
+            
+            start_str = part[:dash_pos]
+            end_str = part[dash_pos+1:]
+            
+            # Parse start time
+            if ':' in start_str:
+                start_parts = start_str.split(':')
+                if len(start_parts) == 2:
+                    start_sec = int(start_parts[0]) * 60 + int(start_parts[1])
+                elif len(start_parts) == 3:
+                    start_sec = int(start_parts[0]) * 3600 + int(start_parts[1]) * 60 + int(start_parts[2])
+                else:
+                    return None
+            else:
+                start_sec = int(start_str)
+            
+            # Parse end time
+            if ':' in end_str:
+                end_parts = end_str.split(':')
+                if len(end_parts) == 2:
+                    end_sec = int(end_parts[0]) * 60 + int(end_parts[1])
+                elif len(end_parts) == 3:
+                    end_sec = int(end_parts[0]) * 3600 + int(end_parts[1]) * 60 + int(end_parts[2])
+                else:
+                    return None
+            else:
+                end_sec = int(end_str)
+            
+            if start_sec >= end_sec:
+                return None
+            
+            ranges.append((start_sec, end_sec))
+        
+        return ranges if ranges else None
+    
+    except (ValueError, AttributeError, IndexError):
+        return None
+
+
 MEDIA_EXTENSIONS = {'.mp3', '.m4a', '.mp4', '.mkv', '.webm', '.flac', '.wav', '.aac'}


@@ -1023,6 +1114,23 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
        else:
            log(f"Invalid clip format: {clip_spec}", file=sys.stderr)
            return 1
+    
+    # Section download (yt-dlp only)
+    section_spec = parsed.get("section")
+    section_ranges = None
+    if section_spec:
+        # Parse section spec like "1:30-1:35,0:05-0:15" into list of (start, end) tuples
+        section_ranges = _parse_section_ranges(section_spec)
+        if section_ranges:
+            debug(f"Section ranges: {section_spec} ({len(section_ranges)} sections)")
+            # When downloading sections, auto-select best format if not specified
+            # Since we're only getting portions, quality matters less than completeness
+            if not format_selector:
+                format_selector = "bestvideo+bestaudio/best"
+                debug(f"Auto-selecting format for sections: {format_selector}")
+        else:
+            log(f"Invalid section format: {section_spec}", file=sys.stderr)
+            return 1
            
    cookies_path = parsed.get("cookies")
    storage_location = parsed.get("storage")
@@ -2361,7 +2469,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
            if (not current_format_selector and not list_formats_mode and 
                isinstance(url, str) and url.startswith(('http://', 'https://'))):
                # Check if this is a yt-dlp supported URL (YouTube, Vimeo, etc.)
-                from helper.download import is_url_supported_by_ytdlp, list_formats
+                from helper.download import list_formats
                from result_table import ResultTable
                
                if is_url_supported_by_ytdlp(url):
@@ -2562,13 +2670,35 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
            # Detect YouTube URLs and set no_playlist to download only the single video
            is_youtube_url = isinstance(url, str) and ('youtube.com' in url or 'youtu.be' in url)
            
+            # Determine clip_sections to pass to yt-dlp
+            # Sections take precedence over clip if both are specified
+            # Sections are for yt-dlp download-sections (merge multiple clips at source)
+            # Clip is for post-download extraction
+            clip_sections_str = None
+            if section_ranges:
+                # Check if this is a yt-dlp URL
+                if is_url_supported_by_ytdlp(url):
+                    # Convert section ranges to yt-dlp format: "start1-end1,start2-end2"
+                    # Use * prefix to indicate download_sections (yt-dlp convention in some contexts)
+                    # But here we just pass the string and let helper/download.py parse it
+                    clip_sections_str = ",".join(f"{start}-{end}" for start, end in section_ranges)
+                    debug(f"Using yt-dlp sections: {clip_sections_str}")
+                else:
+                    log(f"Warning: -section only works with yt-dlp supported URLs. Use -clip for {url}", file=sys.stderr)
+            elif clip_range:
+                # For -clip, we use the same field but it's handled differently in helper/download.py
+                # Wait, helper/download.py treats clip_sections as download_sections for yt-dlp
+                # So -clip should also work as download_sections if it's a yt-dlp URL?
+                # Currently -clip is just one range.
+                clip_sections_str = f"{clip_range[0]}-{clip_range[1]}"
+            
            download_opts = DownloadOptions(
                url=url,
                mode=mode,
                output_dir=final_output_dir,
                cookies_path=final_cookies_path,
                ytdl_format=current_format_selector,  # Use per-URL format override if available
-                clip_sections=f"{clip_range[0]}-{clip_range[1]}" if clip_range else None,
+                clip_sections=clip_sections_str,
                playlist_items=playlist_items,
                no_playlist=is_youtube_url,  # For YouTube, ignore playlist URLs and download single video
            )
@@ -2584,8 +2714,36 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
                file_path = result_data.path
                
                if file_path.exists():
+                    # Check if we have multiple section files to emit
+                    if result_data.paths:
+                        # Section download - emit each section file separately for merge-file
+                        debug(f"📋 Section download: emitting {len(result_data.paths)} file(s) to merge-file")
+                        for section_file in result_data.paths:
+                            if section_file.exists():
+                                file_hash = _compute_file_hash(section_file)
+                                tags = result_data.tags if result_data.tags else []
+                                
+                                pipe_obj = create_pipe_object_result(
+                                    source='download',
+                                    identifier=section_file.stem,
+                                    file_path=str(section_file),
+                                    cmdlet_name='download-data',
+                                    title=section_file.name,
+                                    file_hash=file_hash,
+                                    is_temp=False,
+                                    extra={
+                                        'url': url,
+                                        'tags': tags,
+                                        'audio_mode': audio_mode,
+                                        'format': format_selector,
+                                        'from_sections': True,
+                                    }
+                                )
+                                
+                                downloaded_files.append(section_file)
+                                pipeline_context.emit(pipe_obj)
                    # Check if this was a playlist download (is_actual_playlist tracks if we have a multi-item playlist)
-                    if is_actual_playlist:
+                    elif is_actual_playlist:
                        if not selected_playlist_entries:
                            debug("⚠ Playlist metadata unavailable; cannot emit selected items for this stage.")
                            exit_code = 1
@@ -2788,6 +2946,11 @@ CMDLET = Cmdlet(
            type="string",
            description="Extract time range: MM:SS-MM:SS (e.g., 34:03-35:08) or seconds"
        ),
+        CmdletArg(
+            name="section",
+            type="string",
+            description="Download sections (yt-dlp only): TIME_RANGE[,TIME_RANGE...] (e.g., '1:30-1:35,0:05-0:15')"
+        ),
        CmdletArg(
            name="cookies",
            type="string",
@@ -2841,6 +3004,12 @@ CMDLET = Cmdlet(
        "    Format: MM:SS-MM:SS (e.g., 34:03-35:08)",
        "    Also accepts: 2043-2108 (seconds)",
        "",
+        "SECTION DOWNLOAD (yt-dlp only):",
+        "  -section RANGES         Download specific time sections and merge them",
+        "    Format: HH:MM:SS-HH:MM:SS[,HH:MM:SS-HH:MM:SS...]",
+        "    Example: -section '1:30-1:35,0:05-0:15'",
+        "    Each section is downloaded separately then merged in order",
+        "",
        "PLAYLIST MODE:",
        "  Automatically detects playlists",
        "  Shows numbered list of tracks",
@@ -2866,6 +3035,9 @@ CMDLET = Cmdlet(
        "  # Extract specific clip from video",
        "  download-data https://vimeo.com/123456 -clip 1:30-2:45 -format best",
        "",
+        "  # Download multiple sections and merge them",
+        "  download-data https://youtube.com/watch?v=xyz -section '1:30-1:35,0:05-0:15' | merge-file | add-file -storage local",
+        "",
        "  # Download specific tracks from playlist",
        "  download-data https://youtube.com/playlist?list=xyz -item '1,3,5-8'",
        "",