"""Download data from URLs using yt-dlp with playlist, clipping, and format selection. This is a merged implementation combining: - cmdlets/download_data.py (pipeline wrapper) - funact/download_data.py (feature-rich implementation) - helper/download.py (low-level machinery) Features: - Direct file downloads and yt-dlp streaming sites - Playlist detection with interactive track selection - Clip extraction (time ranges like 34:03-35:08) - Format selection and audio/video toggles - Cookies file support - Tag extraction and metadata integration - Progress tracking and debug logging - Pipeline integration with result emission - Background torrent/magnet downloads via AllDebrid """ from __future__ import annotations import hashlib import re import sys import threading import time from pathlib import Path from typing import Any, Dict, List, Optional, Sequence, Tuple import uuid from helper.logger import log, debug from helper.download import download_media, probe_url from helper.utils import sha256_file from models import DownloadOptions from . import register from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input, parse_cmdlet_args import models import pipeline as pipeline_context from config import resolve_output_dir from metadata import ( fetch_openlibrary_metadata_tags, format_playlist_entry, extract_ytdlp_tags ) # ============================================================================ # Try to import optional dependencies # ============================================================================ try: from yt_dlp.utils import sanitize_filename as ytdlp_sanitize_filename # type: ignore except Exception: # pragma: no cover - optional dependency ytdlp_sanitize_filename = None # ============================================================================ # Background Worker for AllDebrid Downloads # ============================================================================ def _download_torrent_worker( worker_id: str, magnet_url: str, output_dir: Path, config: Dict[str, Any], api_key: str, playlist_items: Optional[str] = None, audio_mode: bool = False, wait_timeout: int = 600, worker_manager: Optional[Any] = None, ) -> None: """Background worker to download torrent/magnet via AllDebrid. Runs in a separate thread and updates worker_manager with progress. Args: worker_id: Unique ID for this worker task magnet_url: Magnet link or .torrent URL to download output_dir: Directory to save downloaded files config: Configuration dict api_key: AllDebrid API key playlist_items: Optional file selection (e.g., "1,3,5-8") audio_mode: Whether to tag as audio or video wait_timeout: Timeout in seconds for magnet processing worker_manager: WorkerManager instance for progress updates """ worker = None downloaded_files = [] try: from helper.alldebrid import AllDebridClient # Get worker reference if manager provided if worker_manager: try: workers = worker_manager.get_active_workers() worker = next((w for w in workers if w.get('id') == worker_id), None) except: worker = None def log_progress(message: str) -> None: """Log progress to both console and worker manager.""" debug(message) if worker_manager and worker_id: try: worker_manager.log_step(worker_id, message) except: pass log_progress(f"[Worker {worker_id}] Submitting magnet to AllDebrid...") client = AllDebridClient(api_key) # Add magnet magnet_info = client.magnet_add(magnet_url) magnet_id = int(magnet_info.get('id', 0)) if magnet_id <= 0: log_progress(f"[Worker {worker_id}] ✗ Failed to add magnet to AllDebrid") if worker_manager: try: worker_manager.finish_worker(worker_id, "failed", f"Failed to add magnet") except: pass return log_progress(f"[Worker {worker_id}] ✓ Magnet added (ID: {magnet_id})") # Poll for ready status elapsed = 0 last_status_reported = 0 while elapsed < wait_timeout: try: status_info = client.magnet_status(magnet_id) except Exception as e: log_progress(f"[Worker {worker_id}] ⚠ Failed to get status: {e}") time.sleep(2) elapsed += 2 continue status_code = status_info.get('statusCode', -1) status_text = status_info.get('status', 'Unknown') # Report progress every 5 seconds (avoid log spam) if elapsed - last_status_reported >= 5 or elapsed < 2: downloaded = status_info.get('downloaded', 0) total_size = status_info.get('size', 0) seeders = status_info.get('seeders', 0) speed = status_info.get('downloadSpeed', 0) if total_size > 0: percent = (downloaded / total_size) * 100 speed_str = f" @ {speed / (1024**2):.1f} MB/s" if speed > 0 else "" seeders_str = f" ({seeders} seeders)" if seeders > 0 else "" progress_msg = f"[Worker {worker_id}] ⧗ {status_text}: {percent:.1f}% ({downloaded / (1024**3):.2f} / {total_size / (1024**3):.2f} GB){speed_str}{seeders_str}" log_progress(progress_msg) # Update worker with progress if worker_manager: try: worker_manager.update_worker( worker_id, status="running", progress=f"{percent:.1f}%", details=progress_msg ) except: pass else: log_progress(f"[Worker {worker_id}] ⧗ {status_text}...") last_status_reported = elapsed if status_code == 4: # Ready log_progress(f"[Worker {worker_id}] ✓ Files ready") break elif status_code >= 5: # Error error_status = { 5: "Upload failed", 6: "Internal error during unpacking", 7: "Not downloaded in 20 minutes", 8: "File too big (>1TB)", 9: "Internal error", 10: "Download took >72 hours", 11: "Deleted on hoster website", 12: "Processing failed", 13: "Processing failed", 14: "Tracker error", 15: "No peers available" } error_msg = error_status.get(status_code, f"Unknown error {status_code}") log_progress(f"[Worker {worker_id}] ✗ Magnet failed: {error_msg}") if worker_manager: try: worker_manager.finish_worker(worker_id, "failed", error_msg) except: pass return time.sleep(2) elapsed += 2 if elapsed >= wait_timeout: log_progress(f"[Worker {worker_id}] ✗ Timeout waiting for magnet (>{wait_timeout}s)") if worker_manager: try: worker_manager.finish_worker(worker_id, "failed", f"Timeout after {wait_timeout}s") except: pass return # Get files files_result = client.magnet_links([magnet_id]) magnet_files = files_result.get(str(magnet_id), {}) if not magnet_files and isinstance(magnet_id, int): # Try integer key as fallback for key in files_result: if str(key) == str(magnet_id): magnet_files = files_result[key] break files_array = magnet_files.get('files', []) if not files_array: log_progress(f"[Worker {worker_id}] ✗ No files found in magnet") if worker_manager: try: worker_manager.finish_worker(worker_id, "failed", "No files found in magnet") except: pass return log_progress(f"[Worker {worker_id}] ✓ Found {len(files_array)} file(s)") # Extract download links download_links = [] def extract_links(items, prefix=""): if not isinstance(items, list): return for item in items: if isinstance(item, dict): name = item.get('n', '') link = item.get('l', '') size = item.get('s', 0) entries = item.get('e', []) if link: download_links.append({ 'link': link, 'name': name, 'size': size, 'path': f"{prefix}/{name}" if prefix else name }) if entries: extract_links(entries, f"{prefix}/{name}" if prefix else name) extract_links(files_array) if not download_links: log_progress(f"[Worker {worker_id}] ✗ No downloadable files found") if worker_manager: try: worker_manager.finish_worker(worker_id, "failed", "No downloadable files") except: pass return # Filter by playlist_items if specified if playlist_items and playlist_items != '*': # Parse selection like "1,3,5-8" selected_indices = [] for part in playlist_items.split(','): part = part.strip() if '-' in part: start, end = part.split('-') selected_indices.extend(range(int(start)-1, int(end))) else: selected_indices.append(int(part)-1) download_links = [download_links[i] for i in selected_indices if i < len(download_links)] log_progress(f"[Worker {worker_id}] Downloading {len(download_links)} selected file(s)") # Download each file for idx, file_info in enumerate(download_links, 1): link = file_info['link'] name = file_info['name'] log_progress(f"[Worker {worker_id}] ({idx}/{len(download_links)}) Downloading: {name}") try: # Unlock the link try: actual_link = client.unlock_link(link) if actual_link and actual_link != link: link = actual_link except: pass # Download via HTTP from helper.http_client import HTTPClient output_dir.mkdir(parents=True, exist_ok=True) file_path = output_dir / name file_path.parent.mkdir(parents=True, exist_ok=True) with HTTPClient() as http_client: http_client.download(link, str(file_path)) log_progress(f"[Worker {worker_id}] ✓ Downloaded: {name}") # Compute hash and emit result file_hash = _compute_file_hash(file_path) result_obj = { 'file_path': str(file_path), 'source_url': magnet_url, 'file_hash': file_hash, 'media_kind': 'audio' if audio_mode else 'video', } pipeline_context.emit(result_obj) downloaded_files.append(file_path) except Exception as e: log_progress(f"[Worker {worker_id}] ⚠ Failed to download {name}: {e}") if downloaded_files: msg = f"✓ Torrent download complete ({len(downloaded_files)} file(s))" log_progress(f"[Worker {worker_id}] {msg}") if worker_manager: try: worker_manager.finish_worker(worker_id, "success", msg) except: pass else: if worker_manager: try: worker_manager.finish_worker(worker_id, "failed", "No files downloaded") except: pass except ImportError: log_progress(f"[Worker {worker_id}] ✗ AllDebrid client not available") if worker_manager: try: worker_manager.finish_worker(worker_id, "failed", "AllDebrid client not available") except: pass except Exception as e: import traceback log_progress(f"[Worker {worker_id}] ✗ Torrent download failed: {e}") if worker_manager: try: worker_manager.finish_worker(worker_id, "failed", str(e)) except: pass traceback.print_exc(file=sys.stderr) # ============================================================================ # CMDLET Metadata Declaration # ============================================================================ # ============================================================================ # Torrent File Parsing # ============================================================================ def _parse_torrent_file(file_path: str) -> Optional[str]: """Parse a .torrent file and extract magnet link. Args: file_path: Path to .torrent file Returns: Magnet link string or None if parsing fails """ try: import bencode3 except ImportError: log("⚠ bencode3 module not found. Install: pip install bencode3", file=sys.stderr) return None try: with open(file_path, 'rb') as f: torrent_data = bencode3.bdecode(f.read()) except Exception as e: log(f"✗ Failed to parse torrent file: {e}", file=sys.stderr) return None try: # Get info dict - bencode3 returns string keys, not bytes info = torrent_data.get('info') if not info: log("✗ No info dict in torrent file", file=sys.stderr) return None # Calculate info hash (SHA1 of bencoded info dict) import hashlib info_hash = hashlib.sha1(bencode3.bencode(info)).hexdigest() # Get name name = info.get('name', 'Unknown') if isinstance(name, bytes): name = name.decode('utf-8', errors='ignore') # Create magnet link magnet = f"magnet:?xt=urn:btih:{info_hash}&dn={name}" # Add trackers if available announce = torrent_data.get('announce') if announce: try: tracker = announce if isinstance(announce, str) else announce.decode('utf-8', errors='ignore') magnet += f"&tr={tracker}" except: pass announce_list = torrent_data.get('announce-list', []) for tier in announce_list: if isinstance(tier, list): for tracker_item in tier: try: tracker = tracker_item if isinstance(tracker_item, str) else tracker_item.decode('utf-8', errors='ignore') if tracker: magnet += f"&tr={tracker}" except: pass debug(f"✓ Parsed torrent: {name} (hash: {info_hash})") return magnet except Exception as e: log(f"✗ Error parsing torrent metadata: {e}", file=sys.stderr) return None def _download_torrent_file(url: str, temp_dir: Optional[Path] = None) -> Optional[str]: """Download a .torrent file from URL and parse it. Args: url: URL to .torrent file temp_dir: Optional temp directory for storing downloaded file Returns: Magnet link string or None if download/parsing fails """ try: from helper.http_client import HTTPClient except ImportError: log("⚠ HTTPClient not available", file=sys.stderr) return None try: # Download torrent file debug(f"⇓ Downloading torrent file: {url}") with HTTPClient(timeout=30.0) as client: response = client.get(url) response.raise_for_status() torrent_data = response.content # Create temp file if temp_dir is None: temp_dir = Path.home() / ".cache" / "downlow" temp_dir.mkdir(parents=True, exist_ok=True) # Save to temp file import hashlib url_hash = hashlib.md5(url.encode()).hexdigest()[:8] temp_file = temp_dir / f"torrent_{url_hash}.torrent" temp_file.write_bytes(torrent_data) debug(f"✓ Downloaded torrent file: {temp_file}") # Parse it magnet = _parse_torrent_file(str(temp_file)) # Clean up try: temp_file.unlink() except: pass return magnet except Exception as e: log(f"✗ Failed to download/parse torrent: {e}", file=sys.stderr) return None def _is_torrent_file_or_url(arg: str) -> bool: """Check if argument is a .torrent file path or URL. Args: arg: Argument to check Returns: True if it's a .torrent file or URL """ arg_lower = arg.lower() # Check if it's a .torrent file path if arg_lower.endswith('.torrent'): return Path(arg).exists() or arg_lower.startswith('http') # Check if it's a URL to .torrent file if arg_lower.startswith('http://') or arg_lower.startswith('https://'): return '.torrent' in arg_lower return False def _process_torrent_input(arg: str) -> Optional[str]: """Process torrent file or URL and convert to magnet link. Args: arg: .torrent file path or URL Returns: Magnet link or original argument if not processable """ try: if arg.lower().startswith('http://') or arg.lower().startswith('https://'): # It's a URL return _download_torrent_file(arg) or arg else: # It's a file path if Path(arg).exists(): return _parse_torrent_file(arg) or arg else: return arg except Exception as e: log(f"⚠ Error processing torrent: {e}", file=sys.stderr) return arg # ============================================================================ # Helper Functions # ============================================================================ def _show_playlist_table(url: str, probe_info: Dict[str, Any]) -> Optional[Dict[str, Any]]: """Show playlist result table and get user selection. Args: url: Original URL probe_info: Info dict from probe_url() Returns: Modified probe_info with selected_entries, or None if user cancelled """ entries = probe_info.get("entries", []) if not entries: return probe_info extractor = probe_info.get("extractor", "") title = probe_info.get("title", "Playlist") debug(f"📋 Detected playlist: {title} ({len(entries)} items) - {extractor}") # Skip full metadata enrichment for speed - extract_flat usually provides enough info # debug("📋 Fetching metadata for each item...") # entries = enrich_playlist_entries(entries, extractor) # Emit each playlist item as a separate result row for i, entry in enumerate(entries, 1): formatted = format_playlist_entry(entry, i, extractor) # Build tags from available metadata tags = [] artist = formatted.get("artist") or formatted.get("uploader", "") if artist: tags.append(artist) album = formatted.get("album", "") if album and album != title: # Don't repeat playlist title tags.append(album) # Extract individual fields for separate columns duration = formatted.get("duration", 0) duration_str = "" if duration: minutes = int(duration // 60) seconds = int(duration % 60) duration_str = f"{minutes}m{seconds}s" tags.append(duration_str) # Normalize extractor for comparison (remove special chars and case) ext_lower = extractor.lower().replace(":", "").replace(" ", "") track_number = None # Add site-specific tags and fields if "youtube" in ext_lower and formatted.get("channel"): tags.append(f"channel:{formatted.get('channel')}") elif "bandcamp" in ext_lower: track_number = formatted.get("track_number", i) tags.append(f"track:{track_number}") # Create result row with separate columns for important metadata # Build columns dynamically based on available data columns = [ ("#", i), ("Title", formatted["title"]), ] # Add Artist column if available if artist: columns.append(("Artist", artist)) # Add Duration column if available if duration_str: columns.append(("Duration", duration_str)) # Add Track number column for music platforms if track_number is not None: columns.append(("Track", str(track_number))) # Add Tags column for remaining tags (if any) remaining_tags = [t for t in tags if t not in [artist, duration_str]] if remaining_tags: columns.append(("Tags", ", ".join(remaining_tags))) # Create result row with compact columns display # Using "columns" field tells ResultTable which columns to show result_row = { "title": formatted["title"], "tags": tags, "index": i, # Store all metadata but don't display in table (use columns field) "__source": "playlist-probe", "__id": f"{i}", "__file_path": url, "__action": f"playlist-item:{i}", "__artist": formatted.get("artist", ""), "__duration": formatted.get("duration", 0), "__extractor": extractor, # Define which columns should be shown in the result table "columns": columns } # Add site-specific metadata for pipeline use if "youtube" in ext_lower: result_row["__video_id"] = formatted.get("video_id", "") result_row["__channel"] = formatted.get("channel", "") elif "bandcamp" in ext_lower: result_row["__track_number"] = formatted.get("track_number", i) result_row["__album"] = formatted.get("album") or title elif "spotify" in ext_lower: result_row["__artists"] = formatted.get("artists", "") result_row["__album"] = formatted.get("album", "") pipeline_context.emit(result_row) debug(f"ℹ️ Playlist items displayed. Use result table references (@1, @2, etc.) to select tracks.") # Return modified probe info return probe_info def _parse_time_range(clip_spec: str) -> Optional[Tuple[int, int]]: """Parse time range from MM:SS-MM:SS or seconds format. Args: clip_spec: Time range string like "34:03-35:08" or "2043-2108" Returns: Tuple of (start_seconds, end_seconds) or None if invalid """ try: if '-' not in clip_spec: return None parts = clip_spec.split('-') if len(parts) != 2: return None start_str, end_str = parts # Try MM:SS format first if ':' in start_str: start_parts = start_str.split(':') if len(start_parts) == 2: start_sec = int(start_parts[0]) * 60 + int(start_parts[1]) else: return None else: start_sec = int(start_str) if ':' in end_str: end_parts = end_str.split(':') if len(end_parts) == 2: end_sec = int(end_parts[0]) * 60 + int(end_parts[1]) else: return None else: end_sec = int(end_str) if start_sec >= end_sec: return None return (start_sec, end_sec) except (ValueError, AttributeError): return None MEDIA_EXTENSIONS = {'.mp3', '.m4a', '.mp4', '.mkv', '.webm', '.flac', '.wav', '.aac'} def _parse_playlist_selection_indices(selection: Optional[str], total_items: int) -> list[int]: """Convert playlist selection string to 0-based indices.""" if total_items <= 0: return [] if not selection or selection.strip() in {"*", ""}: return list(range(total_items)) indices: list[int] = [] for part in selection.split(','): part = part.strip() if not part: continue if '-' in part: bounds = part.split('-', 1) try: start = int(bounds[0]) end = int(bounds[1]) except ValueError: continue if start <= 0 or end <= 0: continue if start > end: start, end = end, start for idx in range(start - 1, end): if 0 <= idx < total_items: indices.append(idx) else: try: idx = int(part) - 1 except ValueError: continue if 0 <= idx < total_items: indices.append(idx) seen: set[int] = set() ordered: list[int] = [] for idx in indices: if idx not in seen: ordered.append(idx) seen.add(idx) return ordered def _select_playlist_entries(entries: Any, selection: Optional[str]) -> list[Dict[str, Any]]: """Pick playlist entries according to a selection string.""" if not isinstance(entries, list): return [] indices = _parse_playlist_selection_indices(selection, len(entries)) if not indices: return [] selected: list[Dict[str, Any]] = [] for idx in indices: entry = entries[idx] if isinstance(entry, dict): selected.append(entry) return selected def _sanitize_title_for_filename(title: Optional[str]) -> str: """Match yt-dlp's restricted filename sanitization for comparisons.""" if not title: return "" if ytdlp_sanitize_filename: try: return ytdlp_sanitize_filename(title, restricted=True) except Exception: pass sanitized = re.sub(r"[^0-9A-Za-z._-]+", "_", title) return sanitized.strip() or "" def _find_playlist_files_from_entries( entries: Sequence[Dict[str, Any]], output_dir: Path, ) -> list[Path]: """Resolve expected playlist files based on entry titles/exts.""" matched: list[Path] = [] seen: set[str] = set() for entry in entries: title = entry.get('title') if isinstance(entry, dict) else None sanitized = _sanitize_title_for_filename(title) if not sanitized: continue preferred_exts: list[str] = [] for key in ('ext', 'audio_ext', 'video_ext'): value = entry.get(key) if isinstance(entry, dict) else None if isinstance(value, str) and value: preferred_exts.append(value.lower()) if not preferred_exts: preferred_exts = [ext.strip('.') for ext in MEDIA_EXTENSIONS] candidate: Optional[Path] = None for ext in preferred_exts: ext = ext.lstrip('.').lower() path = output_dir / f"{sanitized}.{ext}" if path.exists(): candidate = path break if candidate is None: try: # Bandcamp/yt-dlp often prefixes uploader info, so fall back to a substring match. for f in output_dir.glob(f"*{sanitized}*"): if f.suffix.lower() in MEDIA_EXTENSIONS and f.is_file(): candidate = f break except OSError: candidate = None if candidate and str(candidate) not in seen: matched.append(candidate) seen.add(str(candidate)) return matched def _snapshot_playlist_paths( entries: Sequence[Dict[str, Any]], output_dir: Path, ) -> tuple[list[Path], set[str]]: """Capture current playlist file paths for a given selection.""" matches = _find_playlist_files_from_entries(entries, output_dir) resolved: set[str] = set() for path in matches: try: resolved.add(str(path.resolve())) except OSError: resolved.add(str(path)) return matches, resolved def _expand_playlist_selection(selection: str, num_items: int) -> str: """Expand playlist selection string, handling wildcards. Args: selection: Selection string like '1,3,5-8' or '*' num_items: Total number of items in playlist Returns: Expanded selection string like '1,3,5,6,7,8' or '1-18' for '*' """ if selection.strip() == "*": # Wildcard: select all items return f"1-{num_items}" # Return as-is if not wildcard (yt-dlp will handle ranges and lists) return selection def _parse_selection_string(selection: str) -> List[int]: """Parse selection string into list of integers. Handles formats like: - "2" -> [2] - "1,3,5" -> [1, 3, 5] - "1-3" -> [1, 2, 3] - "1,3-5,7" -> [1, 3, 4, 5, 7] Args: selection: Selection string Returns: List of integer indices """ result = [] for part in selection.split(','): part = part.strip() if '-' in part: # Range like "3-5" try: start, end = part.split('-') start_num = int(start.strip()) end_num = int(end.strip()) result.extend(range(start_num, end_num + 1)) except (ValueError, AttributeError): continue else: # Single number try: result.append(int(part)) except ValueError: continue return result def _filter_and_sort_formats(formats: List[Dict[str, Any]]) -> List[Dict[str, Any]]: """Filter and sort formats for user selection. Filters out: - Storyboards (webp, svg formats) - Low quality audio (below ~128 kbps, typically 48kHz audio) - Video below 360p Sorts to prioritize: - @1: Best combined audio+video (highest resolution, highest bitrate) - @2: Best audio-only (highest bitrate audio) - Then rest by quality Args: formats: List of format dicts from yt-dlp Returns: Filtered and sorted format list """ filtered = [] for fmt in formats: format_id = fmt.get("format_id", "") ext = fmt.get("ext", "") vcodec = fmt.get("vcodec", "") acodec = fmt.get("acodec", "") height = fmt.get("height") tbr = fmt.get("tbr") # Total bitrate # Skip storyboards (webp images, svg, etc.) if ext in {"webp", "svg", "mhtml"}: continue # Skip video-only formats below 360p if vcodec != "none" and acodec == "none": if height and height < 360: continue # Skip low-bitrate audio (typically 48kHz, very low quality) # Keep audio with tbr >= 64 kbps (reasonable quality threshold) if acodec != "none" and vcodec == "none": if tbr and tbr < 64: continue filtered.append(fmt) # Sort formats: best combined first, then best audio-only, then video-only def format_sort_key(fmt: Dict[str, Any]) -> tuple: vcodec = fmt.get("vcodec", "") acodec = fmt.get("acodec", "") height = fmt.get("height", 0) or 0 tbr = fmt.get("tbr", 0) or 0 # Category 0: has both audio and video (sort first) # Category 1: audio only (sort second) # Category 2: video only (sort last, by height desc) if vcodec != "none" and acodec != "none": category = 0 return (category, -height, -tbr) elif acodec != "none" and vcodec == "none": category = 1 return (category, -tbr) # Sort by bitrate descending else: # Video only category = 2 return (category, -height, -tbr) # Sort by height descending, then bitrate return sorted(filtered, key=format_sort_key) def _compute_file_hash(file_path: Path) -> Optional[str]: """Compute SHA256 hash of file.""" try: return sha256_file(file_path) except Exception: return None # ============================================================================ # Main Cmdlet Function # ============================================================================ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results: bool = True) -> int: """Download data from URLs with advanced options. Accepts: - Single URL as string - Result object with 'url' or 'file_path' field - List of results - File containing URLs (one per line) Returns: Exit code (0 for success, 1 for failure) """ debug("Starting download-data") collected_results: List[Dict[str, Any]] = [] def _emit(obj: Any) -> None: """Internal helper to collect and optionally emit results.""" collected_results.append(obj) if emit_results: pipeline_context.emit(obj) # Track pipeline mode once so playlist handling can respect current run scope stage_ctx = pipeline_context.get_stage_context() in_pipeline = stage_ctx is not None and getattr(stage_ctx, 'total_stages', 1) > 1 # ======================================================================== # ARGUMENT PARSING # ======================================================================== # Parse arguments using shared parser parsed = parse_cmdlet_args(args, CMDLET) audio_mode = parsed.get("audio", False) format_selector = parsed.get("format") list_formats_mode = parsed.get("list-formats", False) clip_spec = parsed.get("clip") clip_range = None if clip_spec: clip_range = _parse_time_range(clip_spec) if clip_range: debug(f"Clip range: {clip_spec} ({clip_range[0]}-{clip_range[1]} seconds)") else: log(f"Invalid clip format: {clip_spec}", file=sys.stderr) return 1 cookies_path = parsed.get("cookies") storage_location = parsed.get("storage") torrent_mode = parsed.get("torrent", False) wait_timeout = float(parsed.get("wait", 1800)) # Collect URLs from positional args and -url flag # Both map to "url" in parsed result urls_to_download = [] raw_urls = parsed.get("url", []) if isinstance(raw_urls, str): raw_urls = [raw_urls] for arg in raw_urls: if arg.lower().startswith(('http://', 'https://')): # Check if it's a .torrent URL or file first if '.torrent' in arg.lower(): debug(f"Processing torrent URL: {arg}") magnet = _process_torrent_input(arg) if magnet and magnet.lower().startswith('magnet:'): urls_to_download.append(magnet) debug(f"✓ Converted to magnet: {magnet[:70]}...") elif magnet: urls_to_download.append(magnet) else: log(f"✗ Failed to process torrent: {arg}", file=sys.stderr) else: urls_to_download.append(arg) elif torrent_mode and (arg.lower().startswith('magnet:') or len(arg) == 40 or len(arg) == 64): # In torrent mode, accept magnet links or torrent hashes (40-char SHA1 or 64-char SHA256) urls_to_download.append(arg) debug(f"Torrent/magnet added: {arg[:50]}...") elif _is_torrent_file_or_url(arg): # Handle .torrent files and URLs debug(f"Processing torrent file/URL: {arg}") magnet = _process_torrent_input(arg) if magnet and magnet.lower().startswith('magnet:'): urls_to_download.append(magnet) debug(f"✓ Converted to magnet: {magnet[:70]}...") elif magnet: urls_to_download.append(magnet) else: log(f"✗ Failed to process torrent: {arg}", file=sys.stderr) else: # Treat as URL if it looks like one if arg.lower().startswith(('magnet:', 'ftp://')): urls_to_download.append(arg) else: # Check if it's a file containing URLs path = Path(arg) if path.exists() and path.is_file(): try: with open(arg, 'r') as f: for line in f: line = line.strip() if line and line.lower().startswith(('http://', 'https://')): urls_to_download.append(line) debug(f"Loaded URLs from file: {arg}") except Exception as e: log(f"Error reading file {arg}: {e}", file=sys.stderr) else: debug(f"Ignored argument: {arg}") # Item selection (for playlists/formats) # Note: -item flag is deprecated in favor of @N pipeline selection, but kept for compatibility playlist_items = parsed.get("item") if playlist_items: debug(f"Item selection: {playlist_items}") def _is_openlibrary_downloadable(ebook_access_val: Any, status_val: Any) -> bool: access = str(ebook_access_val or "").strip().lower() status = str(status_val or "").strip().lower() if status == "download": return True if access in {"borrowable", "public", "full", "open"} or access.startswith("full "): return True if "✓" in str(status_val or ""): return True return False # ======================================================================== # INPUT PROCESSING - Extract URLs from pipeline or arguments # ======================================================================== # Initialize worker tracking for downloads import uuid from helper.local_library import LocalLibraryDB from config import get_local_storage_path worker_id = str(uuid.uuid4()) library_root = get_local_storage_path(config or {}) db = None if library_root: try: db = LocalLibraryDB(library_root) db.insert_worker( worker_id, "download", title="Download Data", description="Downloading files from search results", pipe=pipeline_context.get_current_command_text() ) except Exception as e: log(f"⚠ Worker tracking unavailable: {e}", file=sys.stderr) piped_results = normalize_result_input(result) # Track files downloaded directly (e.g. Soulseek) to avoid "No URLs" error files_downloaded_directly = 0 # Only process piped results if no URLs were provided in arguments # This prevents picking up residue from previous commands when running standalone if piped_results and not urls_to_download: for item in piped_results: url = None origin = None # ====== CHECK FOR PLAYLIST ITEM MARKER FROM add-file ====== # When add-file detects a playlist item and wants to download it if isinstance(item, dict) and item.get('__playlist_url'): playlist_url = item.get('__playlist_url') item_num = item.get('__playlist_item', 1) debug(f"📍 Playlist item from add-file: #{item_num}") # Add to download list with marker urls_to_download.append({ '__playlist_url': playlist_url, '__playlist_item': int(item_num) }) continue # ====== CHECK FOR PLAYLIST ITEM SELECTION FIRST ====== # When user selects @12 from a playlist, item is emitted dict with __action: "playlist-item:12" if isinstance(item, dict) and '__action' in item and item['__action'].startswith('playlist-item:'): playlist_url = item.get('__file_path') playlist_action = item['__action'] # e.g., "playlist-item:12" item_num = playlist_action.split(':')[1] # Extract item number (1-based) if playlist_url: # Playlist item selected - need to download this specific track debug(f"📍 Playlist item selected: #{item_num} - {item.get('title', 'Unknown')}") # Add to download list - the playlist will be probed and item extracted # Store with special marker so we know which item to select urls_to_download.append({ '__playlist_url': playlist_url, '__playlist_item': int(item_num) }) continue # ====== CHECK FOR FORMAT SELECTION RESULT ====== if isinstance(item, dict) and item.get('format_id') is not None and item.get('source_url'): debug(f"🎬 Format selected from pipe: {item.get('format_id')}") debug(f" Source URL: {item.get('source_url')}") # Store as dict so we can extract format_id + source_url during download urls_to_download.append(item) continue elif hasattr(item, 'format_id') and hasattr(item, 'source_url') and item.format_id is not None: debug(f"🎬 Format selected from pipe: {item.format_id}") debug(f" Source URL: {item.source_url}") urls_to_download.append({ 'format_id': item.format_id, 'source_url': item.source_url, }) continue if isinstance(item, dict): # Check for search provider results first origin = item.get('origin') if origin in {'openlibrary', 'libgen', 'soulseek', 'debrid'}: # Handle search provider results title = item.get('title', 'Item') if origin == 'openlibrary': # OpenLibrary: First check if lendable/downloadable via Archive.org # Only route to LibGen if NOT available on Archive.org metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {} isbn = metadata.get('isbn') or item.get('isbn') olid = metadata.get('olid') or item.get('olid') debug(f"[search-result] OpenLibrary: '{title}'") if isbn: debug(f" ISBN: {isbn}") # Check if book is borrowable from ebook_access field or status ebook_access = metadata.get('ebook_access') or item.get('ebook_access', '') status_text = metadata.get('status') or item.get('status', '') archive_id = metadata.get('archive_id') or item.get('archive_id') # Determine if borrowable based on new status vocabulary is_borrowable = _is_openlibrary_downloadable(ebook_access, status_text) if is_borrowable: debug(f" ✓ Available for borrowing on Archive.org") debug(f" → Queued for auto-borrowing...") # Queue borrow request as special dict object # We need OCAID (Archive.org ID), not just numeric OLID ocaid = archive_id if not ocaid and isbn: # If no OCAID in metadata, fetch it from OpenLibrary ISBN lookup try: import requests ol_url = f'https://openlibrary.org/isbn/{isbn}.json' r = requests.get(ol_url, timeout=5) if r.status_code == 200: ol_data = r.json() ocaid = ol_data.get('ocaid') except Exception as e: debug(f" ⚠ Could not fetch OCAID from OpenLibrary: {e}") if ocaid: urls_to_download.append({ '__borrow_request__': True, 'book_id': ocaid, 'isbn': isbn, 'title': title, 'olid': olid }) else: # OCAID not found - book claims borrowable but not on Archive.org # Fall back to LibGen search instead debug(f" ⚠ Book marked borrowable but not found on Archive.org") if isbn: try: from helper.search_provider import get_provider libgen_provider = get_provider("libgen", config) if libgen_provider: libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1) if libgen_results: libgen_result = libgen_results[0] url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None) if url: urls_to_download.append(url) debug(f" ✓ Found on LibGen instead") else: debug(f" ⚠ Not found on LibGen") else: debug(f" ⚠ Not found on LibGen") else: debug(f" ⚠ LibGen provider not available") except Exception as e: debug(f" ✗ Error searching LibGen: {e}") else: # Book is NOT borrowable - route to LibGen if isbn: debug(f" ⚠ Not available on Archive.org - attempting LibGen...") try: from helper.search_provider import get_provider libgen_provider = get_provider("libgen", config) if libgen_provider: libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1) if libgen_results: libgen_result = libgen_results[0] url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None) if url: urls_to_download.append(url) debug(f" ✓ Found on LibGen") else: debug(f" ⚠ Not found on LibGen") else: debug(f" ⚠ Not found on LibGen") debug(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data") else: debug(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data") except Exception as e: debug(f" ⚠ Could not search LibGen: {e}") debug(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data") else: debug(f" ⚠ ISBN not available") debug(f" ▶ Visit: {item.get('target', 'https://openlibrary.org')}") debug(f" ▶ Or find ISBN and use: search-file -provider libgen 'isbn:\"\"'") elif origin == 'soulseek': # Handle Soulseek downloads using the provider metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {} username = metadata.get('username') filename = metadata.get('filename') size = item.get('size_bytes') or 0 if username and filename: try: import asyncio from helper.search_provider import SoulSeekProvider provider = SoulSeekProvider(config) log(f"[search-result] Soulseek: '{title}'", flush=True) log(f" ▶ Downloading from {username}...", flush=True) if db: db.append_worker_stdout(worker_id, f"Downloading from Soulseek: {title} (from {username})") # Get temp directory from config temp_dir = config.get('temp') if temp_dir: temp_dir = str(Path(temp_dir).expanduser()) # Call async download_file with asyncio.run() success = asyncio.run(provider.download_file( username=username, filename=filename, file_size=size, target_dir=temp_dir )) if success: downloaded_file = Path(provider.DOWNLOAD_DIR) / Path(filename).name if downloaded_file.exists(): log(f" ✓ Downloaded: {downloaded_file.name}", flush=True) files_downloaded_directly += 1 if db: db.append_worker_stdout(worker_id, f"✓ Downloaded: {downloaded_file.name}") if pipeline_context._PIPE_ACTIVE: # Create proper PipeObject result result_dict = create_pipe_object_result( source='soulseek', identifier=filename, file_path=str(downloaded_file), cmdlet_name='download-data', title=title, target=str(downloaded_file), # Explicit target for add-file extra={ "metadata": metadata, "origin": "soulseek" } ) pipeline_context.emit(result_dict) else: debug(f" ✗ Download failed (peer may be offline)") if db: db.append_worker_stdout(worker_id, f"✗ Download failed for {title}") debug(f" ▶ Try another result: search-file -provider soulseek \"...\" | @2 | download-data") except Exception as e: debug(f" ✗ Download error: {e}") if db: db.append_worker_stdout(worker_id, f"✗ Error: {e}") debug(f" ▶ Alternative: search-soulseek -download \"{title}\" -storage ") else: debug(f"[search-result] Soulseek: '{title}'") debug(f" ⚠ Missing download info (username/filename)") if db: db.append_worker_stdout(worker_id, f"⚠ Missing download info for {title}") elif origin == 'libgen': # LibGen results can use the direct URL # Also extract mirrors dict for fallback if primary fails url = item.get('target') # Extract mirrors and book_id from full_metadata metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {} mirrors = metadata.get('mirrors', {}) book_id = metadata.get('book_id', '') if url: url_entry = { 'url': str(url), 'mirrors': mirrors, # Alternative mirrors for fallback 'book_id': book_id, } urls_to_download.append(url_entry) debug(f"[search-result] LibGen: '{title}'") debug(f" ✓ Queued for download") if mirrors: debug(f" Mirrors available: {len(mirrors)}") elif origin == 'debrid': # Debrid results can use download-data url = item.get('target') if url: urls_to_download.append(str(url)) debug(f"[search-result] Debrid: '{title}'") debug(f" ✓ Queued for download") else: # Regular fields for non-search results url = item.get('url') or item.get('link') or item.get('href') or item.get('target') else: # Object attributes origin = getattr(item, 'origin', None) title = getattr(item, 'title', 'Item') if origin in {'openlibrary', 'libgen', 'soulseek', 'debrid'}: # Handle search provider results if origin == 'openlibrary': # OpenLibrary: First check if lendable/downloadable via Archive.org # Only route to LibGen if NOT available on Archive.org metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {} isbn = metadata.get('isbn') or getattr(item, 'isbn', None) olid = metadata.get('olid') or getattr(item, 'olid', None) debug(f"[search-result] OpenLibrary: '{title}'") if isbn: debug(f" ISBN: {isbn}") # Check if book is borrowable from ebook_access field or status ebook_access = metadata.get('ebook_access') or getattr(item, 'ebook_access', '') status_text = metadata.get('status') or getattr(item, 'status', '') archive_id = metadata.get('archive_id') or getattr(item, 'archive_id', '') # Determine if borrowable using unified helper is_borrowable = _is_openlibrary_downloadable(ebook_access, status_text) if is_borrowable: # Book IS borrowable on Archive.org debug(f" ✓ Available for borrowing on Archive.org") debug(f" → Queued for auto-borrowing...") # Queue borrow request as special dict object ocaid = archive_id if not ocaid and isbn: try: import requests ol_url = f'https://openlibrary.org/isbn/{isbn}.json' r = requests.get(ol_url, timeout=5) if r.status_code == 200: ol_data = r.json() ocaid = ol_data.get('ocaid') except Exception as e: debug(f" ⚠ Could not fetch OCAID from OpenLibrary: {e}") if ocaid: urls_to_download.append({ '__borrow_request__': True, 'book_id': ocaid, 'isbn': isbn, 'title': title, 'olid': olid or getattr(item, 'openlibrary_id', '') }) else: # OCAID not found - book claims borrowable but not on Archive.org # Fall back to LibGen search instead debug(f" ⚠ No Archive.org ID found - attempting LibGen instead...") if isbn: try: from helper.search_provider import get_provider libgen_provider = get_provider("libgen", config) if libgen_provider: libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1) if libgen_results: libgen_result = libgen_results[0] url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None) if url: urls_to_download.append(url) debug(f" ✓ Found on LibGen instead") else: debug(f" ⚠ Not found on LibGen") else: debug(f" ⚠ Not found on LibGen") else: debug(f" ⚠ LibGen provider not available") except Exception as e: debug(f" ✗ Error searching LibGen: {e}") else: debug(f" ⚠ ISBN not available for LibGen fallback") else: # Book is NOT borrowable - route to LibGen if isbn: debug(f" ⚠ Not available on Archive.org - attempting LibGen...") try: from helper.search_provider import get_provider libgen_provider = get_provider("libgen", config) if libgen_provider: libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1) if libgen_results: libgen_result = libgen_results[0] url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None) if url: urls_to_download.append(url) debug(f" ✓ Found on LibGen") else: debug(f" ⚠ Not found on LibGen") else: debug(f" ⚠ Not found on LibGen") debug(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data") else: debug(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data") except Exception as e: debug(f" ⚠ Could not search LibGen: {e}") debug(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data") else: debug(f" ⚠ ISBN not available") debug(f" ▶ Visit: {getattr(item, 'target', 'https://openlibrary.org')}") debug(f" ▶ Or find ISBN and use: search-file -provider libgen 'isbn:\"\"'") elif origin == 'soulseek': # Handle Soulseek downloads using the provider metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {} username = metadata.get('username') filename = metadata.get('filename') size = getattr(item, 'size_bytes', 0) or 0 if username and filename: try: import asyncio from helper.search_provider import SoulSeekProvider provider = SoulSeekProvider(config) debug(f"[search-result] Soulseek: '{title}'") debug(f" ▶ Downloading from {username}...") if db: db.append_worker_stdout(worker_id, f"Downloading from Soulseek: {title} (from {username})") # Get temp directory from config temp_dir = config.get('temp') if temp_dir: temp_dir = str(Path(temp_dir).expanduser()) # Call async download_file with asyncio.run() success = asyncio.run(provider.download_file( username=username, filename=filename, file_size=size, target_dir=temp_dir )) if success: downloaded_file = Path(provider.DOWNLOAD_DIR) / Path(filename).name if downloaded_file.exists(): debug(f" ✓ Downloaded: {downloaded_file.name}") files_downloaded_directly += 1 if db: db.append_worker_stdout(worker_id, f"✓ Downloaded: {downloaded_file.name}") if pipeline_context._PIPE_ACTIVE: # Create proper PipeObject result result_dict = create_pipe_object_result( source='soulseek', identifier=filename, file_path=str(downloaded_file), cmdlet_name='download-data', title=title, target=str(downloaded_file), # Explicit target for add-file extra={ "metadata": metadata, "origin": "soulseek" } ) pipeline_context.emit(result_dict) else: debug(f" ✗ Download failed (peer may be offline)") if db: db.append_worker_stdout(worker_id, f"✗ Download failed for {title}") debug(f" ▶ Try another result: search-file -provider soulseek \"...\" | @2 | download-data") except Exception as e: debug(f" ✗ Download error: {e}") if db: db.append_worker_stdout(worker_id, f"✗ Error: {e}") debug(f" ▶ Alternative: search-soulseek -download \"{title}\" -storage ") else: debug(f"[search-result] Soulseek: '{title}'") debug(f" ⚠ Missing download info (username/filename)") if db: db.append_worker_stdout(worker_id, f"⚠ Missing download info for {title}") elif origin == 'libgen': # LibGen results with mirrors dict for fallback url = getattr(item, 'target', None) # Extract mirrors and book_id from full_metadata metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {} mirrors = metadata.get('mirrors', {}) book_id = metadata.get('book_id', '') if url: url_entry = { 'url': str(url), 'mirrors': mirrors, # Alternative mirrors for fallback 'book_id': book_id, } urls_to_download.append(url_entry) else: urls_to_download.append(url) if url else None elif origin == 'debrid': url = getattr(item, 'target', None) else: url = getattr(item, 'url', None) or getattr(item, 'link', None) or getattr(item, 'href', None) or getattr(item, 'target', None) if url: urls_to_download.append(str(url)) if not urls_to_download and files_downloaded_directly == 0: debug(f"No downloadable URLs found") return 1 debug(f"Processing {len(urls_to_download)} URL(s)") for i, u in enumerate(urls_to_download, 1): if isinstance(u, dict): debug(f" [{i}] Format: {u.get('format_id', '?')} from {u.get('source_url', '?')[:60]}...") else: debug(f" [{i}] URL: {str(u)[:60]}...") # ======================================================================== # RESOLVE OUTPUT DIRECTORY # ======================================================================== final_output_dir = None # Priority 1: --storage flag if storage_location: try: final_output_dir = SharedArgs.resolve_storage(storage_location) debug(f"Using storage location: {storage_location} → {final_output_dir}") except ValueError as e: log(str(e), file=sys.stderr) return 1 # Priority 2: Config resolver if final_output_dir is None and resolve_output_dir is not None: try: final_output_dir = resolve_output_dir(config) debug(f"Using config resolver: {final_output_dir}") except Exception: pass # Priority 4: Config outfile if final_output_dir is None and config and config.get("outfile"): try: final_output_dir = Path(config["outfile"]).expanduser() debug(f"Using config outfile: {final_output_dir}") except Exception: pass # Priority 5: Default (home/Videos) if final_output_dir is None: final_output_dir = Path.home() / "Videos" debug(f"Using default directory: {final_output_dir}") # Ensure directory exists try: final_output_dir.mkdir(parents=True, exist_ok=True) except Exception as e: log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr) return 1 # ======================================================================== # DOWNLOAD EACH URL # ======================================================================== downloaded_files = [] playlists_displayed = 0 formats_displayed = False # NEW: Track if we showed formats exit_code = 0 for url in urls_to_download: try: selected_playlist_entries: list[Dict[str, Any]] = [] playlist_existing_paths: set[str] = set() # ====== HANDLE FORMAT SELECTION FROM PIPED RESULT ====== # If url is a dict with format_id and source_url, extract them and override format_selector current_format_selector = format_selector actual_url = url if isinstance(url, dict) and url.get('format_id') and url.get('source_url'): debug(f"🎬 Format selected: {url.get('format_id')}") format_id = url.get('format_id') current_format_selector = format_id # If it's a video-only format (has vcodec but no acodec), add bestaudio vcodec = url.get('vcodec', '') acodec = url.get('acodec', '') if vcodec and vcodec != "none" and (not acodec or acodec == "none"): # Video-only format, add bestaudio automatically current_format_selector = f"{format_id}+bestaudio" debug(f" ℹ️ Video-only format detected, automatically adding bestaudio") actual_url = url.get('source_url') url = actual_url # Use the actual URL for further processing # ====== AUTO-BORROW MODE - INTERCEPT SPECIAL BORROW REQUEST DICTS ====== if isinstance(url, dict) and url.get('__borrow_request__'): try: from helper.archive_client import credential_openlibrary, loan, get_book_infos, download import tempfile import shutil book_id = url.get('book_id') if not book_id: debug(f" ✗ Missing book ID for borrowing") exit_code = 1 continue title_val = url.get('title', 'Unknown Book') book_id_str = str(book_id) debug(f"[auto-borrow] Starting borrow for: {title_val}") debug(f" Book ID: {book_id_str}") # Get Archive.org credentials email, password = credential_openlibrary(config) if not email or not password: log(f" ✗ Archive.org credentials not configured", file=sys.stderr) log(f" ▶ Set ARCHIVE_EMAIL and ARCHIVE_PASSWORD environment variables", file=sys.stderr) exit_code = 1 continue # Attempt to borrow and download try: debug(f" → Logging into Archive.org...") from helper.archive_client import login import requests try: session = login(email, password) except requests.exceptions.Timeout: debug(f" ✗ Timeout logging into Archive.org (server not responding)") exit_code = 1 continue except requests.exceptions.RequestException as e: debug(f" ✗ Error connecting to Archive.org: {e}") exit_code = 1 continue debug(f" → Borrowing book...") try: session = loan(session, book_id_str, verbose=True) except requests.exceptions.Timeout: debug(f" ✗ Timeout while borrowing (server not responding)") exit_code = 1 continue except requests.exceptions.RequestException as e: debug(f" ✗ Error while borrowing: {e}") exit_code = 1 continue debug(f" → Extracting page information...") # Try both URL formats book_urls = [ f"https://archive.org/borrow/{book_id_str}", f"https://archive.org/details/{book_id_str}" ] title = None links = None metadata = None last_error = None for book_url in book_urls: try: title, links, metadata = get_book_infos(session, book_url) if title and links: debug(f" → Found {len(links)} pages") break except requests.exceptions.Timeout: last_error = "Timeout while extracting pages" debug(f" ⚠ Timeout while extracting from {book_url}") continue except Exception as e: last_error = str(e) debug(f" ⚠ Failed to extract from {book_url}: {e}") continue if not links: debug(f" ✗ Could not extract book pages (Last error: {last_error})") exit_code = 1 continue # Download pages debug(f" → Downloading {len(links)} pages...") with tempfile.TemporaryDirectory() as temp_dir: # download(session, n_threads, directory, links, scale, book_id) images = download( session, n_threads=4, directory=temp_dir, links=links, scale=2, book_id=str(book_id) ) if not images: debug(f" ✗ No pages downloaded") exit_code = 1 continue debug(f" ✓ Downloaded {len(images)} pages") # Try to merge into PDF try: import img2pdf debug(f" → Merging pages into PDF...") filename = title if title else f"book_{book_id_str}" filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100] output_path = Path(final_output_dir) / f"{filename}.pdf" # Make unique filename if needed i = 1 while output_path.exists(): output_path = Path(final_output_dir) / f"{filename}({i}).pdf" i += 1 pdf_content = img2pdf.convert(images) if pdf_content: with open(output_path, 'wb') as f: f.write(pdf_content) debug(f" ✓ Successfully borrowed and saved to: {output_path}") downloaded_files.append(str(output_path)) # Emit result for downstream cmdlets file_hash = _compute_file_hash(output_path) # Build tags including ISBN if available emit_tags = ['book', 'borrowed', 'pdf'] isbn_tag = url.get('isbn') if isbn_tag: emit_tags.append(f'isbn:{isbn_tag}') olid_tag = url.get('olid') if olid_tag: emit_tags.append(f'olid:{olid_tag}') # Fetch OpenLibrary metadata tags ol_tags = fetch_openlibrary_metadata_tags(isbn=isbn_tag, olid=olid_tag) emit_tags.extend(ol_tags) pipe_obj = create_pipe_object_result( source='archive.org', identifier=book_id_str, file_path=str(output_path), cmdlet_name='download-data', title=title_val, file_hash=file_hash, tags=emit_tags, source_url=url.get('source_url', f'archive.org/borrow/{book_id_str}') ) pipeline_context.emit(pipe_obj) exit_code = 0 except ImportError: debug(f" ⚠ img2pdf not available - saving pages as collection") # Just copy images to output dir filename = title if title else f"book_{book_id_str}" filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100] output_dir = Path(final_output_dir) / filename i = 1 while output_dir.exists(): output_dir = Path(final_output_dir) / f"{filename}({i})" i += 1 shutil.copytree(temp_dir, str(output_dir)) debug(f" ✓ Successfully borrowed and saved to: {output_dir}") downloaded_files.append(str(output_dir)) # Emit result for downstream cmdlets # Build tags including ISBN if available emit_tags = ['book', 'borrowed', 'pages'] isbn_tag = url.get('isbn') if isbn_tag: emit_tags.append(f'isbn:{isbn_tag}') olid_tag = url.get('olid') if olid_tag: emit_tags.append(f'olid:{olid_tag}') # Fetch OpenLibrary metadata tags ol_tags = fetch_openlibrary_metadata_tags(isbn=isbn_tag, olid=olid_tag) emit_tags.extend(ol_tags) pipe_obj = create_pipe_object_result( source='archive.org', identifier=book_id_str, file_path=str(output_dir), cmdlet_name='download-data', title=title_val, tags=emit_tags, source_url=url.get('source_url', f'archive.org/borrow/{book_id_str}') ) pipeline_context.emit(pipe_obj) exit_code = 0 except Exception as e: debug(f" ✗ Borrow/download failed: {e}") import traceback traceback.print_exc() exit_code = 1 continue # Skip normal URL handling except ImportError as e: debug(f" ✗ Archive.org tools not available: {e}") exit_code = 1 continue except Exception as e: debug(f" ✗ Auto-borrow error: {e}") import traceback traceback.print_exc() exit_code = 1 continue # ====== LIBGEN MIRROR FALLBACK MODE ====== # Handle libgen results with mirrors dict for fallback on failure if isinstance(url, dict) and 'mirrors' in url: try: primary_url = url.get('url') mirrors_dict = url.get('mirrors', {}) book_id = url.get('book_id', '') if not primary_url: debug(f"Skipping libgen entry: no primary URL") exit_code = 1 continue # Build list of mirrors to try: primary first, then alternatives mirrors_to_try = [primary_url] mirrors_to_try.extend(mirrors_dict.values()) # Remove duplicates while preserving order mirrors_to_try = list(dict.fromkeys(mirrors_to_try)) debug(f"🔄 LibGen download with mirror fallback (book_id: {book_id})") debug(f" Primary: {primary_url[:80]}...") if len(mirrors_to_try) > 1: debug(f" {len(mirrors_to_try) - 1} alternative mirror(s) available") # Resolve cookies path final_cookies_path_libgen = None if cookies_path: if resolve_cookies_path: try: final_cookies_path_libgen = resolve_cookies_path(config, Path(cookies_path)) except Exception: final_cookies_path_libgen = Path(cookies_path).expanduser() if cookies_path else None else: final_cookies_path_libgen = Path(cookies_path).expanduser() download_succeeded = False last_error = None successful_mirror = None # Try each mirror in sequence using libgen_service's native download for mirror_idx, mirror_url in enumerate(mirrors_to_try, 1): try: if mirror_idx > 1: debug(f" → Trying mirror #{mirror_idx}: {mirror_url[:80]}...") # Use libgen_service's download_from_mirror for proper libgen handling from helper.libgen_service import download_from_mirror # Generate filename from book_id and title safe_title = "".join(c for c in str(title or "book") if c.isalnum() or c in (' ', '.', '-'))[:100] file_path = final_output_dir / f"{safe_title}_{book_id}.pdf" # Attempt download using libgen's native function success = download_from_mirror( mirror_url=mirror_url, output_path=file_path, log_info=lambda msg: debug(f" {msg}"), log_error=lambda msg: debug(f" ⚠ {msg}") ) if success and file_path.exists(): debug(f" ✓ Downloaded successfully from mirror #{mirror_idx}") successful_mirror = mirror_url download_succeeded = True # Emit result for downstream cmdlets file_hash = _compute_file_hash(file_path) emit_tags = ['libgen', 'book'] pipe_obj = create_pipe_object_result( source='libgen', identifier=book_id, file_path=str(file_path), cmdlet_name='download-data', file_hash=file_hash, tags=emit_tags, source_url=successful_mirror ) pipeline_context.emit(pipe_obj) downloaded_files.append(str(file_path)) exit_code = 0 break # Success, stop trying mirrors except Exception as e: last_error = str(e) if mirror_idx == 1: debug(f" ⚠ Primary mirror failed: {e}") else: debug(f" ⚠ Mirror #{mirror_idx} failed: {e}") if not download_succeeded: log(f" ✗ All mirrors failed. Last error: {last_error}", file=sys.stderr) if "getaddrinfo failed" in str(last_error) or "NameResolutionError" in str(last_error) or "Failed to resolve" in str(last_error): log(f" ⚠ Network issue detected: Cannot resolve LibGen mirror hostnames", file=sys.stderr) log(f" ▶ Check your network connection or try with a VPN/proxy", file=sys.stderr) exit_code = 1 continue # Skip to next URL except Exception as e: debug(f" ✗ LibGen mirror fallback error: {e}") import traceback traceback.print_exc(file=sys.stderr) exit_code = 1 continue # Ensure URL is a string for normal handling if not isinstance(url, str): # Check if it's a playlist item marker if isinstance(url, dict) and url.get('__playlist_url'): playlist_url = url.get('__playlist_url') item_num = url.get('__playlist_item', 1) debug(f"📍 Handling selected playlist item #{item_num}") # Convert to actual URL and set playlist_items to download only this item url = playlist_url playlist_items = str(item_num) # Fall through to normal handling below else: debug(f"Skipping invalid URL entry: {url}") continue debug(f"Probing URL: {url}") # ====== TORRENT MODE - INTERCEPT BEFORE NORMAL DOWNLOAD ====== if torrent_mode or url.lower().startswith('magnet:'): debug(f"🧲 Torrent/magnet mode - spawning background worker...") try: # Get API key from config from config import get_debrid_api_key api_key = get_debrid_api_key(config) if not api_key: log(f"✗ AllDebrid API key not found in config", file=sys.stderr) exit_code = 1 continue # Create a unique worker ID worker_id = f"torrent_{uuid.uuid4().hex[:8]}" # Get worker manager if available from config worker_manager = config.get('_worker_manager') # Create worker in manager if available if worker_manager: try: worker_manager.track_worker( worker_id, worker_type="download_torrent", title=f"Download: {url[:60]}...", description=f"Torrent/magnet download via AllDebrid", pipe=pipeline_context.get_current_command_text() ) debug(f"✓ Worker created (ID: {worker_id})") except Exception as e: debug(f"⚠ Failed to create worker: {e}") worker_manager = None # Spawn background thread to handle the download worker_thread = threading.Thread( target=_download_torrent_worker, args=( worker_id, url, final_output_dir, config, api_key, playlist_items, audio_mode, wait_timeout, worker_manager, ), daemon=False, name=f"TorrentWorker_{worker_id}" ) worker_thread.start() debug(f"✓ Background worker started (ID: {worker_id})") # Emit worker info so user can track it worker_info = { 'worker_id': worker_id, 'worker_type': 'download_torrent', 'source_url': url, 'status': 'running', 'message': 'Downloading in background...' } pipeline_context.emit(worker_info) continue except ImportError: log(f"✗ AllDebrid client not available", file=sys.stderr) exit_code = 1 except Exception as e: # Catches AllDebridError and other exceptions log(f"✗ Failed to spawn torrent worker: {e}", file=sys.stderr) import traceback traceback.print_exc(file=sys.stderr) exit_code = 1 continue # Skip to next URL # ====== NORMAL DOWNLOAD MODE (HTTP/HTTPS) ====== # First, probe the URL to detect playlists and get info # For YouTube URLs, ignore playlists and only probe the single video is_youtube_url = isinstance(url, str) and ('youtube.com' in url or 'youtu.be' in url) probe_info = probe_url(url, no_playlist=is_youtube_url) is_actual_playlist = False # Track if we have a real multi-item playlist if probe_info: debug(f"✓ Probed: {probe_info.get('title', url)} ({probe_info.get('extractor', 'unknown')})") # If it's a playlist, show the result table and skip download for now entries = probe_info.get("entries", []) if entries and not playlist_items: is_actual_playlist = True # We have a real playlist with multiple items # Playlist detected but NO selection provided # Always show table for user to select items debug(f"📋 Found playlist with {len(entries)} items") _show_playlist_table(url, probe_info) debug(f"ℹ️ Playlist displayed. To select items, use @* or @1,3,5-8 syntax after piping results") playlists_displayed += 1 continue # Skip to next URL - don't download playlist without selection elif entries and playlist_items: is_actual_playlist = True # We have a real playlist with item selection # Playlist detected WITH selection - will download below # Expand wildcard if present expanded_items = _expand_playlist_selection(playlist_items, len(entries)) playlist_items = expanded_items selected_playlist_entries = _select_playlist_entries(entries, playlist_items) debug(f"📋 Found playlist with {len(entries)} items - downloading selected: {playlist_items}") else: debug(f"Single item: {probe_info.get('title', 'Unknown')}") # ====== FORMAT LISTING MODE ====== if list_formats_mode and isinstance(url, str) and url.startswith(('http://', 'https://')): debug(f"Fetching formats for: {url}") from helper.download import list_formats from result_table import ResultTable all_formats = list_formats(url, no_playlist=is_youtube_url, playlist_items=playlist_items) if all_formats: # Filter and sort formats for better user experience formats = _filter_and_sort_formats(all_formats) # Create result table for format display table = ResultTable(title=f"Available Formats - {probe_info.get('title', 'Unknown')}") for fmt in formats: row = table.add_row() row.add_column("Format ID", fmt.get("format_id", "")) # Build resolution/bitrate string vcodec = fmt.get("vcodec", "") acodec = fmt.get("acodec", "") height = fmt.get("height") tbr = fmt.get("tbr") if vcodec != "none" and acodec != "none": # Video + audio res_str = fmt.get("resolution", "") elif acodec != "none" and vcodec == "none": # Audio only - show bitrate res_str = f"{tbr:.0f} kbps" if tbr else "audio" else: # Video only res_str = fmt.get("resolution", "") row.add_column("Resolution", res_str) # Build codec string (merged vcodec/acodec) codec_parts = [] if vcodec and vcodec != "none": codec_parts.append(f"v:{vcodec}") if acodec and acodec != "none": codec_parts.append(f"a:{acodec}") codec_str = " | ".join(codec_parts) if codec_parts else "unknown" row.add_column("Codec", codec_str) if fmt.get("filesize"): size_mb = fmt["filesize"] / (1024 * 1024) row.add_column("Size", f"{size_mb:.1f} MB") # Set source command for @N expansion table.set_source_command("download-data", [url]) # Note: Row selection args are not set - users select with @N syntax directly # Display table and emit as pipeline result log(str(table), flush=True) formats_displayed = True # Store table for @N expansion so CLI can reconstruct commands # Uses separate current_stage_table instead of result history table pipeline_context.set_current_stage_table(table) # Always emit formats so they can be selected with @N for i, fmt in enumerate(formats, 1): pipeline_context.emit({ "format_id": fmt.get("format_id", ""), "format_string": fmt.get("format", ""), "resolution": fmt.get("resolution", ""), "vcodec": fmt.get("vcodec", ""), "acodec": fmt.get("acodec", ""), "ext": fmt.get("ext", ""), "filesize": fmt.get("filesize"), "source_url": url, "index": i, }) debug(f"Use @N syntax to select a format and download") else: log(f"✗ No formats available for this URL", file=sys.stderr) continue # Skip download, just show formats # ====== AUTO-DETECT MULTIPLE FORMATS ====== # Check if multiple formats exist and handle based on -item flag if (not current_format_selector and not list_formats_mode and isinstance(url, str) and url.startswith(('http://', 'https://'))): # Check if this is a yt-dlp supported URL (YouTube, Vimeo, etc.) from helper.download import is_url_supported_by_ytdlp, list_formats from result_table import ResultTable if is_url_supported_by_ytdlp(url): debug(f"Checking available formats for: {url}") all_formats = list_formats(url, no_playlist=is_youtube_url, playlist_items=playlist_items) if all_formats: # Filter and sort formats for better user experience formats = _filter_and_sort_formats(all_formats) # Handle -item selection for formats (single video) if playlist_items and playlist_items.isdigit() and not is_actual_playlist: idx = int(playlist_items) if 0 < idx <= len(formats): fmt = formats[idx-1] current_format_selector = fmt.get("format_id") debug(f"Selected format #{idx}: {current_format_selector}") playlist_items = None # Clear so it doesn't affect download options else: log(f"Invalid format index: {idx}", file=sys.stderr) elif len(formats) > 1: # Multiple formats available debug(f"📊 Found {len(formats)} available formats for: {probe_info.get('title', 'Unknown')}") # Always show table for format selection via @N syntax # Show table and wait for @N selection table = ResultTable(title=f"Available Formats - {probe_info.get('title', 'Unknown')}") for fmt in formats: row = table.add_row() row.add_column("Format ID", fmt.get("format_id", "")) # Build resolution/bitrate string vcodec = fmt.get("vcodec", "") acodec = fmt.get("acodec", "") height = fmt.get("height") tbr = fmt.get("tbr") if vcodec != "none" and acodec != "none": # Video + audio res_str = fmt.get("resolution", "") elif acodec != "none" and vcodec == "none": # Audio only - show bitrate res_str = f"{tbr:.0f} kbps" if tbr else "audio" else: # Video only res_str = fmt.get("resolution", "") row.add_column("Resolution", res_str) # Build codec string (merged vcodec/acodec) codec_parts = [] if vcodec and vcodec != "none": codec_parts.append(f"v:{vcodec}") if acodec and acodec != "none": codec_parts.append(f"a:{acodec}") codec_str = " | ".join(codec_parts) if codec_parts else "unknown" row.add_column("Codec", codec_str) if fmt.get("filesize"): size_mb = fmt["filesize"] / (1024 * 1024) row.add_column("Size", f"{size_mb:.1f} MB") # Set source command for @N expansion table.set_source_command("download-data", [url]) # Set row selection args so @N expands to "download-data URL -item N" for i in range(len(formats)): # i is 0-based index, but -item expects 1-based index table.set_row_selection_args(i, ["-item", str(i + 1)]) # Display table and emit formats so they can be selected with @N debug(str(table)) debug(f"💡 Use @N syntax to select a format and download (e.g., @1)") # Store table for @N expansion so CLI can reconstruct commands pipeline_context.set_current_stage_table(table) # Emit formats as pipeline results for @N selection for i, fmt in enumerate(formats, 1): pipeline_context.emit({ "format_id": fmt.get("format_id", ""), "format_string": fmt.get("format", ""), "resolution": fmt.get("resolution", ""), "vcodec": fmt.get("vcodec", ""), "acodec": fmt.get("acodec", ""), "filesize": fmt.get("filesize"), "tbr": fmt.get("tbr"), "source_url": url, "index": i, }) formats_displayed = True # Mark that we displayed formats continue # Skip download, user must select format via @N debug(f"Downloading: {url}") # Resolve cookies path if specified final_cookies_path = None if cookies_path: if resolve_cookies_path: try: final_cookies_path = resolve_cookies_path(config, Path(cookies_path)) except Exception: final_cookies_path = Path(cookies_path).expanduser() if cookies_path else None else: final_cookies_path = Path(cookies_path).expanduser() # Create download options - use correct parameter names # Mode is "audio" or "video", required field mode = "audio" if audio_mode else "video" # Detect YouTube URLs and set no_playlist to download only the single video is_youtube_url = isinstance(url, str) and ('youtube.com' in url or 'youtu.be' in url) download_opts = DownloadOptions( url=url, mode=mode, output_dir=final_output_dir, cookies_path=final_cookies_path, ytdl_format=current_format_selector, # Use per-URL format override if available clip_sections=f"{clip_range[0]}-{clip_range[1]}" if clip_range else None, playlist_items=playlist_items, no_playlist=is_youtube_url, # For YouTube, ignore playlist URLs and download single video ) # For playlist downloads, capture existing files BEFORE download if playlist_items and selected_playlist_entries: _, playlist_existing_paths = _snapshot_playlist_paths(selected_playlist_entries, final_output_dir) # Call download_media from helper - no show_progress param result_data = download_media(download_opts) if result_data and result_data.path: file_path = result_data.path if file_path.exists(): # Check if this was a playlist download (is_actual_playlist tracks if we have a multi-item playlist) if is_actual_playlist: if not selected_playlist_entries: debug("⚠ Playlist metadata unavailable; cannot emit selected items for this stage.") exit_code = 1 continue matched_after, _ = _snapshot_playlist_paths(selected_playlist_entries, final_output_dir) if not matched_after: debug("⚠ No playlist files found for the selected items after download.") exit_code = 1 continue new_playlist_files: list[Path] = [] for playlist_file in matched_after: try: path_key = str(playlist_file.resolve()) except OSError: path_key = str(playlist_file) if path_key not in playlist_existing_paths: new_playlist_files.append(playlist_file) emit_targets = new_playlist_files if new_playlist_files else matched_after if new_playlist_files: debug(f"📋 Playlist download completed: {len(new_playlist_files)} new file(s)") else: debug(f"📁 Reusing {len(emit_targets)} cached playlist file(s)") for playlist_file in emit_targets: file_hash = _compute_file_hash(playlist_file) tags = [] if extract_ytdlp_tags and result_data.tags: tags = result_data.tags pipe_obj = create_pipe_object_result( source='download', identifier=playlist_file.stem, file_path=str(playlist_file), cmdlet_name='download-data', title=playlist_file.name, file_hash=file_hash, is_temp=False, extra={ 'url': url, 'tags': tags, 'audio_mode': audio_mode, 'format': format_selector, 'from_playlist': True, }, ) downloaded_files.append(playlist_file) pipeline_context.emit(pipe_obj) else: # Single file download file_hash = result_data.hash_value or _compute_file_hash(file_path) tags = result_data.tags if result_data.tags else [] pipe_obj = create_pipe_object_result( source='download', identifier=file_path.stem, file_path=str(file_path), cmdlet_name='download-data', title=file_path.name, file_hash=file_hash, is_temp=False, extra={ 'url': url, 'tags': tags, 'audio_mode': audio_mode, 'format': format_selector, 'clipped': clip_range is not None, } ) downloaded_files.append(file_path) pipeline_context.emit(pipe_obj) debug(f"✓ Downloaded: {file_path}") else: log(f"Download returned no result for {url}", file=sys.stderr) exit_code = 1 except Exception as e: log(f"Error downloading {url}: {e}", file=sys.stderr) import traceback traceback.print_exc(file=sys.stderr) exit_code = 1 # Success if we downloaded files or displayed playlists/formats if downloaded_files or files_downloaded_directly > 0: total_files = len(downloaded_files) + files_downloaded_directly debug(f"✓ Successfully downloaded {total_files} file(s)") # Create a result table for the downloaded files # This ensures that subsequent @N commands select from these files # instead of trying to expand the previous command (e.g. search-file) if downloaded_files: from result_table import ResultTable table = ResultTable("Downloaded Files") for i, file_path in enumerate(downloaded_files): row = table.add_row() row.add_column("#", str(i + 1)) row.add_column("File", file_path.name) row.add_column("Path", str(file_path)) try: size_mb = file_path.stat().st_size / (1024*1024) row.add_column("Size", f"{size_mb:.1f} MB") except OSError: row.add_column("Size", "?") # Set selection args to just the file path (or index if we want item selection) # For item selection fallback, we don't strictly need row args if source command is None # But setting them helps if we want to support command expansion later table.set_row_selection_args(i, [str(file_path)]) # Register the table but DO NOT set a source command # This forces CLI to use item-based selection (filtering the pipe) # instead of command expansion pipeline_context.set_last_result_table_overlay(table, downloaded_files) pipeline_context.set_current_stage_table(table) # Also print the table so user sees what they got log(str(table), flush=True) if db: db.update_worker_status(worker_id, 'completed') return 0 if playlists_displayed: debug(f"✓ Displayed {playlists_displayed} playlist(s) for selection") if db: db.update_worker_status(worker_id, 'completed') db.close() return 0 # Success - playlists shown if formats_displayed: debug(f"✓ Format selection table displayed - use @N to select and download") if db: db.update_worker_status(worker_id, 'completed') db.close() return 0 # Success - formats shown log(f"No files were downloaded or playlists displayed", file=sys.stderr) if db: db.update_worker_status(worker_id, 'completed') db.close() return 1 CMDLET = Cmdlet( name="download-data", exec=_run, summary="Download data from URLs with playlist/clip support using yt-dlp", usage="download-data [options] or search-file | download-data [options]", aliases=["download", "dl"], args=[ CmdletArg( name="url", type="string", required=False, description="URL to download (HTTP/HTTPS or file with URL list)", variadic=True ), CmdletArg( name="-url", type="string", description="URL to download (alias for positional argument)", variadic=True ), CmdletArg( name="list-formats", type="flag", description="List available formats without downloading" ), CmdletArg( name="audio", type="flag", alias="a", description="Download audio only (extract from video)" ), CmdletArg( name="video", type="flag", alias="v", description="Download video (default if not specified)" ), CmdletArg( name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector (e.g., 'bestvideo+bestaudio')" ), CmdletArg( name="clip", type="string", description="Extract time range: MM:SS-MM:SS (e.g., 34:03-35:08) or seconds" ), CmdletArg( name="cookies", type="string", description="Path to cookies.txt file for authentication" ), CmdletArg( name="torrent", type="flag", description="Download torrent/magnet via AllDebrid (requires API key in config)" ), CmdletArg( name="wait", type="float", description="Wait time (seconds) for magnet processing timeout" ), CmdletArg( name="item", type="string", alias="items", description="Item selection for playlists/formats: use '-item N' to select format N, or '-item' to show table for @N selection in next command" ), SharedArgs.STORAGE, # Storage location: local, hydrus, 0x0, debrid, ftp ], details=[ "Download media from URLs with advanced features.", "", "BASIC USAGE:", " download-data https://youtube.com/watch?v=xyz", " download-data https://example.com/file.pdf -storage local", "", "AUDIO/VIDEO OPTIONS:", " -audio, -a Extract audio from video (M4A, MP3)", " -video, -v Download as video (default)", "", "FORMAT SELECTION:", " -format SELECTOR Specify yt-dlp format", " Examples: 'best', 'bestvideo+bestaudio', '22'", "", "FORMAT/RESULT ITEM SELECTION:", " -item Show available formats in table (see @N below)", " -item N Auto-select and download format #N (e.g., -item 1)", " Example: download-data URL -item 2 | add-file -storage local", "", "FORMAT SELECTION WITH @N SYNTAX:", " 1. Show formats: download-data URL", " 2. Select with @N: @1 | download-data | add-file", " OR use -item N to skip manual selection", "", "CLIPPING:", " -clip START-END Extract time range from media", " Format: MM:SS-MM:SS (e.g., 34:03-35:08)", " Also accepts: 2043-2108 (seconds)", "", "PLAYLIST MODE:", " Automatically detects playlists", " Shows numbered list of tracks", " Download specific items: -item '1,3,5-8'", " Download all items: -item '*'", "", "TORRENT MODE:", " Download torrents/magnets via AllDebrid (if configured)", " Usage: download-data -torrent magnet:?xt=urn:btih:... -item '1,3,5-8'", " -wait SECONDS Maximum wait time for magnet processing (default: 1800)", "", "STORAGE LOCATIONS:", " -storage local ~/Videos (default)", " -storage hydrus ~/.hydrus/client_files", " -storage 0x0 ~/Screenshots", " -storage debrid ~/Debrid", " -storage ftp ~/FTP", "", "EXAMPLES:", " # Download YouTube video as audio", " download-data https://youtube.com/watch?v=xyz -audio -storage local", "", " # Extract specific clip from video", " download-data https://vimeo.com/123456 -clip 1:30-2:45 -format best", "", " # Download specific tracks from playlist", " download-data https://youtube.com/playlist?list=xyz -item '1,3,5-8'", "", " # Download all items from playlist", " download-data https://youtube.com/playlist?list=xyz -item '*'", "", " # Download with authentication", " download-data https://example.com/content -cookies ~/cookies.txt", "", "TORRENT EXAMPLES:", " # Download specific tracks from magnet link", " download-data -torrent magnet:?xt=urn:btih:... -item '1,3,5-8' -storage local", "", " # Download all items from torrent and merge", " download-data -torrent magnet:?xt=urn:btih:... -item '*' | merge-file | add-file", "", " # Download with custom wait time (5 minutes)", " download-data -torrent magnet:?xt=urn:btih:... -wait 300 -item '1-5'", ] )