2634 lines
122 KiB
Python
2634 lines
122 KiB
Python
"""Download data from URLs using yt-dlp with playlist, clipping, and format selection.
|
||
|
||
This is a merged implementation combining:
|
||
- cmdlets/download_data.py (pipeline wrapper)
|
||
- funact/download_data.py (feature-rich implementation)
|
||
- helper/download.py (low-level machinery)
|
||
|
||
Features:
|
||
- Direct file downloads and yt-dlp streaming sites
|
||
- Playlist detection with interactive track selection
|
||
- Clip extraction (time ranges like 34:03-35:08)
|
||
- Format selection and audio/video toggles
|
||
- Cookies file support
|
||
- Tag extraction and metadata integration
|
||
- Progress tracking and debug logging
|
||
- Pipeline integration with result emission
|
||
- Background torrent/magnet downloads via AllDebrid
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import hashlib
|
||
import re
|
||
import sys
|
||
import threading
|
||
import time
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||
import uuid
|
||
|
||
from helper.logger import log, debug
|
||
from helper.download import download_media, probe_url
|
||
from helper.utils import sha256_file
|
||
from models import DownloadOptions
|
||
|
||
from . import register
|
||
from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input, parse_cmdlet_args
|
||
import models
|
||
import pipeline as pipeline_context
|
||
from config import resolve_output_dir
|
||
from metadata import (
|
||
fetch_openlibrary_metadata_tags,
|
||
format_playlist_entry,
|
||
extract_ytdlp_tags
|
||
)
|
||
|
||
# ============================================================================
|
||
# Try to import optional dependencies
|
||
# ============================================================================
|
||
|
||
try:
|
||
from yt_dlp.utils import sanitize_filename as ytdlp_sanitize_filename # type: ignore
|
||
except Exception: # pragma: no cover - optional dependency
|
||
ytdlp_sanitize_filename = None
|
||
|
||
|
||
# ============================================================================
|
||
# Background Worker for AllDebrid Downloads
|
||
# ============================================================================
|
||
|
||
def _download_torrent_worker(
|
||
worker_id: str,
|
||
magnet_url: str,
|
||
output_dir: Path,
|
||
config: Dict[str, Any],
|
||
api_key: str,
|
||
playlist_items: Optional[str] = None,
|
||
audio_mode: bool = False,
|
||
wait_timeout: int = 600,
|
||
worker_manager: Optional[Any] = None,
|
||
) -> None:
|
||
"""Background worker to download torrent/magnet via AllDebrid.
|
||
|
||
Runs in a separate thread and updates worker_manager with progress.
|
||
|
||
Args:
|
||
worker_id: Unique ID for this worker task
|
||
magnet_url: Magnet link or .torrent URL to download
|
||
output_dir: Directory to save downloaded files
|
||
config: Configuration dict
|
||
api_key: AllDebrid API key
|
||
playlist_items: Optional file selection (e.g., "1,3,5-8")
|
||
audio_mode: Whether to tag as audio or video
|
||
wait_timeout: Timeout in seconds for magnet processing
|
||
worker_manager: WorkerManager instance for progress updates
|
||
"""
|
||
worker = None
|
||
downloaded_files = []
|
||
|
||
try:
|
||
from helper.alldebrid import AllDebridClient
|
||
|
||
# Get worker reference if manager provided
|
||
if worker_manager:
|
||
try:
|
||
workers = worker_manager.get_active_workers()
|
||
worker = next((w for w in workers if w.get('id') == worker_id), None)
|
||
except:
|
||
worker = None
|
||
|
||
def log_progress(message: str) -> None:
|
||
"""Log progress to both console and worker manager."""
|
||
debug(message)
|
||
if worker_manager and worker_id:
|
||
try:
|
||
worker_manager.log_step(worker_id, message)
|
||
except:
|
||
pass
|
||
|
||
log_progress(f"[Worker {worker_id}] Submitting magnet to AllDebrid...")
|
||
client = AllDebridClient(api_key)
|
||
|
||
# Add magnet
|
||
magnet_info = client.magnet_add(magnet_url)
|
||
magnet_id = int(magnet_info.get('id', 0))
|
||
|
||
if magnet_id <= 0:
|
||
log_progress(f"[Worker {worker_id}] ✗ Failed to add magnet to AllDebrid")
|
||
if worker_manager:
|
||
try:
|
||
worker_manager.finish_worker(worker_id, "failed", f"Failed to add magnet")
|
||
except:
|
||
pass
|
||
return
|
||
|
||
log_progress(f"[Worker {worker_id}] ✓ Magnet added (ID: {magnet_id})")
|
||
|
||
# Poll for ready status
|
||
elapsed = 0
|
||
last_status_reported = 0
|
||
|
||
while elapsed < wait_timeout:
|
||
try:
|
||
status_info = client.magnet_status(magnet_id)
|
||
except Exception as e:
|
||
log_progress(f"[Worker {worker_id}] ⚠ Failed to get status: {e}")
|
||
time.sleep(2)
|
||
elapsed += 2
|
||
continue
|
||
|
||
status_code = status_info.get('statusCode', -1)
|
||
status_text = status_info.get('status', 'Unknown')
|
||
|
||
# Report progress every 5 seconds (avoid log spam)
|
||
if elapsed - last_status_reported >= 5 or elapsed < 2:
|
||
downloaded = status_info.get('downloaded', 0)
|
||
total_size = status_info.get('size', 0)
|
||
seeders = status_info.get('seeders', 0)
|
||
speed = status_info.get('downloadSpeed', 0)
|
||
|
||
if total_size > 0:
|
||
percent = (downloaded / total_size) * 100
|
||
speed_str = f" @ {speed / (1024**2):.1f} MB/s" if speed > 0 else ""
|
||
seeders_str = f" ({seeders} seeders)" if seeders > 0 else ""
|
||
progress_msg = f"[Worker {worker_id}] ⧗ {status_text}: {percent:.1f}% ({downloaded / (1024**3):.2f} / {total_size / (1024**3):.2f} GB){speed_str}{seeders_str}"
|
||
log_progress(progress_msg)
|
||
|
||
# Update worker with progress
|
||
if worker_manager:
|
||
try:
|
||
worker_manager.update_worker(
|
||
worker_id,
|
||
status="running",
|
||
progress=f"{percent:.1f}%",
|
||
details=progress_msg
|
||
)
|
||
except:
|
||
pass
|
||
else:
|
||
log_progress(f"[Worker {worker_id}] ⧗ {status_text}...")
|
||
|
||
last_status_reported = elapsed
|
||
|
||
if status_code == 4: # Ready
|
||
log_progress(f"[Worker {worker_id}] ✓ Files ready")
|
||
break
|
||
elif status_code >= 5: # Error
|
||
error_status = {
|
||
5: "Upload failed",
|
||
6: "Internal error during unpacking",
|
||
7: "Not downloaded in 20 minutes",
|
||
8: "File too big (>1TB)",
|
||
9: "Internal error",
|
||
10: "Download took >72 hours",
|
||
11: "Deleted on hoster website",
|
||
12: "Processing failed",
|
||
13: "Processing failed",
|
||
14: "Tracker error",
|
||
15: "No peers available"
|
||
}
|
||
error_msg = error_status.get(status_code, f"Unknown error {status_code}")
|
||
log_progress(f"[Worker {worker_id}] ✗ Magnet failed: {error_msg}")
|
||
if worker_manager:
|
||
try:
|
||
worker_manager.finish_worker(worker_id, "failed", error_msg)
|
||
except:
|
||
pass
|
||
return
|
||
|
||
time.sleep(2)
|
||
elapsed += 2
|
||
|
||
if elapsed >= wait_timeout:
|
||
log_progress(f"[Worker {worker_id}] ✗ Timeout waiting for magnet (>{wait_timeout}s)")
|
||
if worker_manager:
|
||
try:
|
||
worker_manager.finish_worker(worker_id, "failed", f"Timeout after {wait_timeout}s")
|
||
except:
|
||
pass
|
||
return
|
||
|
||
# Get files
|
||
files_result = client.magnet_links([magnet_id])
|
||
magnet_files = files_result.get(str(magnet_id), {})
|
||
if not magnet_files and isinstance(magnet_id, int):
|
||
# Try integer key as fallback
|
||
for key in files_result:
|
||
if str(key) == str(magnet_id):
|
||
magnet_files = files_result[key]
|
||
break
|
||
files_array = magnet_files.get('files', [])
|
||
|
||
if not files_array:
|
||
log_progress(f"[Worker {worker_id}] ✗ No files found in magnet")
|
||
if worker_manager:
|
||
try:
|
||
worker_manager.finish_worker(worker_id, "failed", "No files found in magnet")
|
||
except:
|
||
pass
|
||
return
|
||
|
||
log_progress(f"[Worker {worker_id}] ✓ Found {len(files_array)} file(s)")
|
||
|
||
# Extract download links
|
||
download_links = []
|
||
|
||
def extract_links(items, prefix=""):
|
||
if not isinstance(items, list):
|
||
return
|
||
for item in items:
|
||
if isinstance(item, dict):
|
||
name = item.get('n', '')
|
||
link = item.get('l', '')
|
||
size = item.get('s', 0)
|
||
entries = item.get('e', [])
|
||
|
||
if link:
|
||
download_links.append({
|
||
'link': link,
|
||
'name': name,
|
||
'size': size,
|
||
'path': f"{prefix}/{name}" if prefix else name
|
||
})
|
||
|
||
if entries:
|
||
extract_links(entries, f"{prefix}/{name}" if prefix else name)
|
||
|
||
extract_links(files_array)
|
||
|
||
if not download_links:
|
||
log_progress(f"[Worker {worker_id}] ✗ No downloadable files found")
|
||
if worker_manager:
|
||
try:
|
||
worker_manager.finish_worker(worker_id, "failed", "No downloadable files")
|
||
except:
|
||
pass
|
||
return
|
||
|
||
# Filter by playlist_items if specified
|
||
if playlist_items and playlist_items != '*':
|
||
# Parse selection like "1,3,5-8"
|
||
selected_indices = []
|
||
for part in playlist_items.split(','):
|
||
part = part.strip()
|
||
if '-' in part:
|
||
start, end = part.split('-')
|
||
selected_indices.extend(range(int(start)-1, int(end)))
|
||
else:
|
||
selected_indices.append(int(part)-1)
|
||
|
||
download_links = [download_links[i] for i in selected_indices if i < len(download_links)]
|
||
log_progress(f"[Worker {worker_id}] Downloading {len(download_links)} selected file(s)")
|
||
|
||
# Download each file
|
||
for idx, file_info in enumerate(download_links, 1):
|
||
link = file_info['link']
|
||
name = file_info['name']
|
||
|
||
log_progress(f"[Worker {worker_id}] ({idx}/{len(download_links)}) Downloading: {name}")
|
||
|
||
try:
|
||
# Unlock the link
|
||
try:
|
||
actual_link = client.unlock_link(link)
|
||
if actual_link and actual_link != link:
|
||
link = actual_link
|
||
except:
|
||
pass
|
||
|
||
# Download via HTTP
|
||
from helper.http_client import HTTPClient
|
||
|
||
output_dir.mkdir(parents=True, exist_ok=True)
|
||
file_path = output_dir / name
|
||
file_path.parent.mkdir(parents=True, exist_ok=True)
|
||
|
||
with HTTPClient() as http_client:
|
||
http_client.download(link, str(file_path))
|
||
|
||
log_progress(f"[Worker {worker_id}] ✓ Downloaded: {name}")
|
||
|
||
# Compute hash and emit result
|
||
file_hash = _compute_file_hash(file_path)
|
||
|
||
result_obj = {
|
||
'file_path': str(file_path),
|
||
'source_url': magnet_url,
|
||
'file_hash': file_hash,
|
||
'media_kind': 'audio' if audio_mode else 'video',
|
||
}
|
||
|
||
pipeline_context.emit(result_obj)
|
||
downloaded_files.append(file_path)
|
||
|
||
except Exception as e:
|
||
log_progress(f"[Worker {worker_id}] ⚠ Failed to download {name}: {e}")
|
||
|
||
if downloaded_files:
|
||
msg = f"✓ Torrent download complete ({len(downloaded_files)} file(s))"
|
||
log_progress(f"[Worker {worker_id}] {msg}")
|
||
if worker_manager:
|
||
try:
|
||
worker_manager.finish_worker(worker_id, "success", msg)
|
||
except:
|
||
pass
|
||
else:
|
||
if worker_manager:
|
||
try:
|
||
worker_manager.finish_worker(worker_id, "failed", "No files downloaded")
|
||
except:
|
||
pass
|
||
|
||
except ImportError:
|
||
log_progress(f"[Worker {worker_id}] ✗ AllDebrid client not available")
|
||
if worker_manager:
|
||
try:
|
||
worker_manager.finish_worker(worker_id, "failed", "AllDebrid client not available")
|
||
except:
|
||
pass
|
||
except Exception as e:
|
||
import traceback
|
||
log_progress(f"[Worker {worker_id}] ✗ Torrent download failed: {e}")
|
||
if worker_manager:
|
||
try:
|
||
worker_manager.finish_worker(worker_id, "failed", str(e))
|
||
except:
|
||
pass
|
||
traceback.print_exc(file=sys.stderr)
|
||
|
||
|
||
# ============================================================================
|
||
# CMDLET Metadata Declaration
|
||
# ============================================================================
|
||
|
||
|
||
|
||
|
||
# ============================================================================
|
||
# Torrent File Parsing
|
||
# ============================================================================
|
||
|
||
def _parse_torrent_file(file_path: str) -> Optional[str]:
|
||
"""Parse a .torrent file and extract magnet link.
|
||
|
||
Args:
|
||
file_path: Path to .torrent file
|
||
|
||
Returns:
|
||
Magnet link string or None if parsing fails
|
||
"""
|
||
try:
|
||
import bencode3
|
||
except ImportError:
|
||
log("⚠ bencode3 module not found. Install: pip install bencode3", file=sys.stderr)
|
||
return None
|
||
|
||
try:
|
||
with open(file_path, 'rb') as f:
|
||
torrent_data = bencode3.bdecode(f.read())
|
||
except Exception as e:
|
||
log(f"✗ Failed to parse torrent file: {e}", file=sys.stderr)
|
||
return None
|
||
|
||
try:
|
||
# Get info dict - bencode3 returns string keys, not bytes
|
||
info = torrent_data.get('info')
|
||
if not info:
|
||
log("✗ No info dict in torrent file", file=sys.stderr)
|
||
return None
|
||
|
||
# Calculate info hash (SHA1 of bencoded info dict)
|
||
import hashlib
|
||
info_hash = hashlib.sha1(bencode3.bencode(info)).hexdigest()
|
||
|
||
# Get name
|
||
name = info.get('name', 'Unknown')
|
||
if isinstance(name, bytes):
|
||
name = name.decode('utf-8', errors='ignore')
|
||
|
||
# Create magnet link
|
||
magnet = f"magnet:?xt=urn:btih:{info_hash}&dn={name}"
|
||
|
||
# Add trackers if available
|
||
announce = torrent_data.get('announce')
|
||
if announce:
|
||
try:
|
||
tracker = announce if isinstance(announce, str) else announce.decode('utf-8', errors='ignore')
|
||
magnet += f"&tr={tracker}"
|
||
except:
|
||
pass
|
||
|
||
announce_list = torrent_data.get('announce-list', [])
|
||
for tier in announce_list:
|
||
if isinstance(tier, list):
|
||
for tracker_item in tier:
|
||
try:
|
||
tracker = tracker_item if isinstance(tracker_item, str) else tracker_item.decode('utf-8', errors='ignore')
|
||
if tracker:
|
||
magnet += f"&tr={tracker}"
|
||
except:
|
||
pass
|
||
|
||
debug(f"✓ Parsed torrent: {name} (hash: {info_hash})")
|
||
return magnet
|
||
|
||
except Exception as e:
|
||
log(f"✗ Error parsing torrent metadata: {e}", file=sys.stderr)
|
||
return None
|
||
|
||
|
||
def _download_torrent_file(url: str, temp_dir: Optional[Path] = None) -> Optional[str]:
|
||
"""Download a .torrent file from URL and parse it.
|
||
|
||
Args:
|
||
url: URL to .torrent file
|
||
temp_dir: Optional temp directory for storing downloaded file
|
||
|
||
Returns:
|
||
Magnet link string or None if download/parsing fails
|
||
"""
|
||
try:
|
||
from helper.http_client import HTTPClient
|
||
except ImportError:
|
||
log("⚠ HTTPClient not available", file=sys.stderr)
|
||
return None
|
||
|
||
try:
|
||
# Download torrent file
|
||
debug(f"⇓ Downloading torrent file: {url}")
|
||
|
||
with HTTPClient(timeout=30.0) as client:
|
||
response = client.get(url)
|
||
response.raise_for_status()
|
||
torrent_data = response.content
|
||
|
||
# Create temp file
|
||
if temp_dir is None:
|
||
temp_dir = Path.home() / ".cache" / "downlow"
|
||
temp_dir.mkdir(parents=True, exist_ok=True)
|
||
|
||
# Save to temp file
|
||
import hashlib
|
||
url_hash = hashlib.md5(url.encode()).hexdigest()[:8]
|
||
temp_file = temp_dir / f"torrent_{url_hash}.torrent"
|
||
temp_file.write_bytes(torrent_data)
|
||
|
||
debug(f"✓ Downloaded torrent file: {temp_file}")
|
||
|
||
# Parse it
|
||
magnet = _parse_torrent_file(str(temp_file))
|
||
|
||
# Clean up
|
||
try:
|
||
temp_file.unlink()
|
||
except:
|
||
pass
|
||
|
||
return magnet
|
||
|
||
except Exception as e:
|
||
log(f"✗ Failed to download/parse torrent: {e}", file=sys.stderr)
|
||
return None
|
||
|
||
|
||
def _is_torrent_file_or_url(arg: str) -> bool:
|
||
"""Check if argument is a .torrent file path or URL.
|
||
|
||
Args:
|
||
arg: Argument to check
|
||
|
||
Returns:
|
||
True if it's a .torrent file or URL
|
||
"""
|
||
arg_lower = arg.lower()
|
||
|
||
# Check if it's a .torrent file path
|
||
if arg_lower.endswith('.torrent'):
|
||
return Path(arg).exists() or arg_lower.startswith('http')
|
||
|
||
# Check if it's a URL to .torrent file
|
||
if arg_lower.startswith('http://') or arg_lower.startswith('https://'):
|
||
return '.torrent' in arg_lower
|
||
|
||
return False
|
||
|
||
|
||
def _process_torrent_input(arg: str) -> Optional[str]:
|
||
"""Process torrent file or URL and convert to magnet link.
|
||
|
||
Args:
|
||
arg: .torrent file path or URL
|
||
|
||
Returns:
|
||
Magnet link or original argument if not processable
|
||
"""
|
||
try:
|
||
if arg.lower().startswith('http://') or arg.lower().startswith('https://'):
|
||
# It's a URL
|
||
return _download_torrent_file(arg) or arg
|
||
else:
|
||
# It's a file path
|
||
if Path(arg).exists():
|
||
return _parse_torrent_file(arg) or arg
|
||
else:
|
||
return arg
|
||
except Exception as e:
|
||
log(f"⚠ Error processing torrent: {e}", file=sys.stderr)
|
||
return arg
|
||
|
||
|
||
# ============================================================================
|
||
# Helper Functions
|
||
# ============================================================================
|
||
|
||
|
||
|
||
|
||
def _show_playlist_table(url: str, probe_info: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
||
"""Show playlist result table and get user selection.
|
||
|
||
Args:
|
||
url: Original URL
|
||
probe_info: Info dict from probe_url()
|
||
|
||
Returns:
|
||
Modified probe_info with selected_entries, or None if user cancelled
|
||
"""
|
||
entries = probe_info.get("entries", [])
|
||
if not entries:
|
||
return probe_info
|
||
|
||
extractor = probe_info.get("extractor", "")
|
||
title = probe_info.get("title", "Playlist")
|
||
|
||
debug(f"📋 Detected playlist: {title} ({len(entries)} items) - {extractor}")
|
||
|
||
# Skip full metadata enrichment for speed - extract_flat usually provides enough info
|
||
# debug("📋 Fetching metadata for each item...")
|
||
# entries = enrich_playlist_entries(entries, extractor)
|
||
|
||
# Emit each playlist item as a separate result row
|
||
for i, entry in enumerate(entries, 1):
|
||
formatted = format_playlist_entry(entry, i, extractor)
|
||
|
||
# Build tags from available metadata
|
||
tags = []
|
||
artist = formatted.get("artist") or formatted.get("uploader", "")
|
||
if artist:
|
||
tags.append(artist)
|
||
|
||
album = formatted.get("album", "")
|
||
if album and album != title: # Don't repeat playlist title
|
||
tags.append(album)
|
||
|
||
# Extract individual fields for separate columns
|
||
duration = formatted.get("duration", 0)
|
||
duration_str = ""
|
||
if duration:
|
||
minutes = int(duration // 60)
|
||
seconds = int(duration % 60)
|
||
duration_str = f"{minutes}m{seconds}s"
|
||
tags.append(duration_str)
|
||
|
||
# Normalize extractor for comparison (remove special chars and case)
|
||
ext_lower = extractor.lower().replace(":", "").replace(" ", "")
|
||
|
||
track_number = None
|
||
# Add site-specific tags and fields
|
||
if "youtube" in ext_lower and formatted.get("channel"):
|
||
tags.append(f"channel:{formatted.get('channel')}")
|
||
elif "bandcamp" in ext_lower:
|
||
track_number = formatted.get("track_number", i)
|
||
tags.append(f"track:{track_number}")
|
||
|
||
# Create result row with separate columns for important metadata
|
||
# Build columns dynamically based on available data
|
||
columns = [
|
||
("#", i),
|
||
("Title", formatted["title"]),
|
||
]
|
||
|
||
# Add Artist column if available
|
||
if artist:
|
||
columns.append(("Artist", artist))
|
||
|
||
# Add Duration column if available
|
||
if duration_str:
|
||
columns.append(("Duration", duration_str))
|
||
|
||
# Add Track number column for music platforms
|
||
if track_number is not None:
|
||
columns.append(("Track", str(track_number)))
|
||
|
||
# Add Tags column for remaining tags (if any)
|
||
remaining_tags = [t for t in tags if t not in [artist, duration_str]]
|
||
if remaining_tags:
|
||
columns.append(("Tags", ", ".join(remaining_tags)))
|
||
|
||
# Create result row with compact columns display
|
||
# Using "columns" field tells ResultTable which columns to show
|
||
result_row = {
|
||
"title": formatted["title"],
|
||
"tags": tags,
|
||
"index": i,
|
||
# Store all metadata but don't display in table (use columns field)
|
||
"__source": "playlist-probe",
|
||
"__id": f"{i}",
|
||
"__file_path": url,
|
||
"__action": f"playlist-item:{i}",
|
||
"__artist": formatted.get("artist", ""),
|
||
"__duration": formatted.get("duration", 0),
|
||
"__extractor": extractor,
|
||
# Define which columns should be shown in the result table
|
||
"columns": columns
|
||
}
|
||
|
||
# Add site-specific metadata for pipeline use
|
||
if "youtube" in ext_lower:
|
||
result_row["__video_id"] = formatted.get("video_id", "")
|
||
result_row["__channel"] = formatted.get("channel", "")
|
||
elif "bandcamp" in ext_lower:
|
||
result_row["__track_number"] = formatted.get("track_number", i)
|
||
result_row["__album"] = formatted.get("album") or title
|
||
elif "spotify" in ext_lower:
|
||
result_row["__artists"] = formatted.get("artists", "")
|
||
result_row["__album"] = formatted.get("album", "")
|
||
|
||
pipeline_context.emit(result_row)
|
||
|
||
debug(f"ℹ️ Playlist items displayed. Use result table references (@1, @2, etc.) to select tracks.")
|
||
|
||
# Return modified probe info
|
||
return probe_info
|
||
|
||
|
||
def _parse_time_range(clip_spec: str) -> Optional[Tuple[int, int]]:
|
||
"""Parse time range from MM:SS-MM:SS or seconds format.
|
||
|
||
Args:
|
||
clip_spec: Time range string like "34:03-35:08" or "2043-2108"
|
||
|
||
Returns:
|
||
Tuple of (start_seconds, end_seconds) or None if invalid
|
||
"""
|
||
try:
|
||
if '-' not in clip_spec:
|
||
return None
|
||
|
||
parts = clip_spec.split('-')
|
||
if len(parts) != 2:
|
||
return None
|
||
|
||
start_str, end_str = parts
|
||
|
||
# Try MM:SS format first
|
||
if ':' in start_str:
|
||
start_parts = start_str.split(':')
|
||
if len(start_parts) == 2:
|
||
start_sec = int(start_parts[0]) * 60 + int(start_parts[1])
|
||
else:
|
||
return None
|
||
else:
|
||
start_sec = int(start_str)
|
||
|
||
if ':' in end_str:
|
||
end_parts = end_str.split(':')
|
||
if len(end_parts) == 2:
|
||
end_sec = int(end_parts[0]) * 60 + int(end_parts[1])
|
||
else:
|
||
return None
|
||
else:
|
||
end_sec = int(end_str)
|
||
|
||
if start_sec >= end_sec:
|
||
return None
|
||
|
||
return (start_sec, end_sec)
|
||
|
||
except (ValueError, AttributeError):
|
||
return None
|
||
|
||
|
||
MEDIA_EXTENSIONS = {'.mp3', '.m4a', '.mp4', '.mkv', '.webm', '.flac', '.wav', '.aac'}
|
||
|
||
|
||
def _parse_playlist_selection_indices(selection: Optional[str], total_items: int) -> list[int]:
|
||
"""Convert playlist selection string to 0-based indices."""
|
||
if total_items <= 0:
|
||
return []
|
||
if not selection or selection.strip() in {"*", ""}:
|
||
return list(range(total_items))
|
||
indices: list[int] = []
|
||
for part in selection.split(','):
|
||
part = part.strip()
|
||
if not part:
|
||
continue
|
||
if '-' in part:
|
||
bounds = part.split('-', 1)
|
||
try:
|
||
start = int(bounds[0])
|
||
end = int(bounds[1])
|
||
except ValueError:
|
||
continue
|
||
if start <= 0 or end <= 0:
|
||
continue
|
||
if start > end:
|
||
start, end = end, start
|
||
for idx in range(start - 1, end):
|
||
if 0 <= idx < total_items:
|
||
indices.append(idx)
|
||
else:
|
||
try:
|
||
idx = int(part) - 1
|
||
except ValueError:
|
||
continue
|
||
if 0 <= idx < total_items:
|
||
indices.append(idx)
|
||
seen: set[int] = set()
|
||
ordered: list[int] = []
|
||
for idx in indices:
|
||
if idx not in seen:
|
||
ordered.append(idx)
|
||
seen.add(idx)
|
||
return ordered
|
||
|
||
|
||
def _select_playlist_entries(entries: Any, selection: Optional[str]) -> list[Dict[str, Any]]:
|
||
"""Pick playlist entries according to a selection string."""
|
||
if not isinstance(entries, list):
|
||
return []
|
||
indices = _parse_playlist_selection_indices(selection, len(entries))
|
||
if not indices:
|
||
return []
|
||
selected: list[Dict[str, Any]] = []
|
||
for idx in indices:
|
||
entry = entries[idx]
|
||
if isinstance(entry, dict):
|
||
selected.append(entry)
|
||
return selected
|
||
|
||
|
||
def _sanitize_title_for_filename(title: Optional[str]) -> str:
|
||
"""Match yt-dlp's restricted filename sanitization for comparisons."""
|
||
if not title:
|
||
return ""
|
||
if ytdlp_sanitize_filename:
|
||
try:
|
||
return ytdlp_sanitize_filename(title, restricted=True)
|
||
except Exception:
|
||
pass
|
||
sanitized = re.sub(r"[^0-9A-Za-z._-]+", "_", title)
|
||
return sanitized.strip() or ""
|
||
|
||
|
||
def _find_playlist_files_from_entries(
|
||
entries: Sequence[Dict[str, Any]],
|
||
output_dir: Path,
|
||
) -> list[Path]:
|
||
"""Resolve expected playlist files based on entry titles/exts."""
|
||
matched: list[Path] = []
|
||
seen: set[str] = set()
|
||
for entry in entries:
|
||
title = entry.get('title') if isinstance(entry, dict) else None
|
||
sanitized = _sanitize_title_for_filename(title)
|
||
if not sanitized:
|
||
continue
|
||
preferred_exts: list[str] = []
|
||
for key in ('ext', 'audio_ext', 'video_ext'):
|
||
value = entry.get(key) if isinstance(entry, dict) else None
|
||
if isinstance(value, str) and value:
|
||
preferred_exts.append(value.lower())
|
||
if not preferred_exts:
|
||
preferred_exts = [ext.strip('.') for ext in MEDIA_EXTENSIONS]
|
||
candidate: Optional[Path] = None
|
||
for ext in preferred_exts:
|
||
ext = ext.lstrip('.').lower()
|
||
path = output_dir / f"{sanitized}.{ext}"
|
||
if path.exists():
|
||
candidate = path
|
||
break
|
||
if candidate is None:
|
||
try:
|
||
# Bandcamp/yt-dlp often prefixes uploader info, so fall back to a substring match.
|
||
for f in output_dir.glob(f"*{sanitized}*"):
|
||
if f.suffix.lower() in MEDIA_EXTENSIONS and f.is_file():
|
||
candidate = f
|
||
break
|
||
except OSError:
|
||
candidate = None
|
||
if candidate and str(candidate) not in seen:
|
||
matched.append(candidate)
|
||
seen.add(str(candidate))
|
||
return matched
|
||
|
||
|
||
def _snapshot_playlist_paths(
|
||
entries: Sequence[Dict[str, Any]],
|
||
output_dir: Path,
|
||
) -> tuple[list[Path], set[str]]:
|
||
"""Capture current playlist file paths for a given selection."""
|
||
matches = _find_playlist_files_from_entries(entries, output_dir)
|
||
resolved: set[str] = set()
|
||
for path in matches:
|
||
try:
|
||
resolved.add(str(path.resolve()))
|
||
except OSError:
|
||
resolved.add(str(path))
|
||
return matches, resolved
|
||
|
||
|
||
def _expand_playlist_selection(selection: str, num_items: int) -> str:
|
||
"""Expand playlist selection string, handling wildcards.
|
||
|
||
Args:
|
||
selection: Selection string like '1,3,5-8' or '*'
|
||
num_items: Total number of items in playlist
|
||
|
||
Returns:
|
||
Expanded selection string like '1,3,5,6,7,8' or '1-18' for '*'
|
||
"""
|
||
if selection.strip() == "*":
|
||
# Wildcard: select all items
|
||
return f"1-{num_items}"
|
||
|
||
# Return as-is if not wildcard (yt-dlp will handle ranges and lists)
|
||
return selection
|
||
|
||
|
||
def _parse_selection_string(selection: str) -> List[int]:
|
||
"""Parse selection string into list of integers.
|
||
|
||
Handles formats like:
|
||
- "2" -> [2]
|
||
- "1,3,5" -> [1, 3, 5]
|
||
- "1-3" -> [1, 2, 3]
|
||
- "1,3-5,7" -> [1, 3, 4, 5, 7]
|
||
|
||
Args:
|
||
selection: Selection string
|
||
|
||
Returns:
|
||
List of integer indices
|
||
"""
|
||
result = []
|
||
for part in selection.split(','):
|
||
part = part.strip()
|
||
if '-' in part:
|
||
# Range like "3-5"
|
||
try:
|
||
start, end = part.split('-')
|
||
start_num = int(start.strip())
|
||
end_num = int(end.strip())
|
||
result.extend(range(start_num, end_num + 1))
|
||
except (ValueError, AttributeError):
|
||
continue
|
||
else:
|
||
# Single number
|
||
try:
|
||
result.append(int(part))
|
||
except ValueError:
|
||
continue
|
||
return result
|
||
|
||
|
||
def _filter_and_sort_formats(formats: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
||
"""Filter and sort formats for user selection.
|
||
|
||
Filters out:
|
||
- Storyboards (webp, svg formats)
|
||
- Low quality audio (below ~128 kbps, typically 48kHz audio)
|
||
- Video below 360p
|
||
|
||
Sorts to prioritize:
|
||
- @1: Best combined audio+video (highest resolution, highest bitrate)
|
||
- @2: Best audio-only (highest bitrate audio)
|
||
- Then rest by quality
|
||
|
||
Args:
|
||
formats: List of format dicts from yt-dlp
|
||
|
||
Returns:
|
||
Filtered and sorted format list
|
||
"""
|
||
filtered = []
|
||
|
||
for fmt in formats:
|
||
format_id = fmt.get("format_id", "")
|
||
ext = fmt.get("ext", "")
|
||
vcodec = fmt.get("vcodec", "")
|
||
acodec = fmt.get("acodec", "")
|
||
height = fmt.get("height")
|
||
tbr = fmt.get("tbr") # Total bitrate
|
||
|
||
# Skip storyboards (webp images, svg, etc.)
|
||
if ext in {"webp", "svg", "mhtml"}:
|
||
continue
|
||
|
||
# Skip video-only formats below 360p
|
||
if vcodec != "none" and acodec == "none":
|
||
if height and height < 360:
|
||
continue
|
||
|
||
# Skip low-bitrate audio (typically 48kHz, very low quality)
|
||
# Keep audio with tbr >= 64 kbps (reasonable quality threshold)
|
||
if acodec != "none" and vcodec == "none":
|
||
if tbr and tbr < 64:
|
||
continue
|
||
|
||
filtered.append(fmt)
|
||
|
||
# Sort formats: best combined first, then best audio-only, then video-only
|
||
def format_sort_key(fmt: Dict[str, Any]) -> tuple:
|
||
vcodec = fmt.get("vcodec", "")
|
||
acodec = fmt.get("acodec", "")
|
||
height = fmt.get("height", 0) or 0
|
||
tbr = fmt.get("tbr", 0) or 0
|
||
|
||
# Category 0: has both audio and video (sort first)
|
||
# Category 1: audio only (sort second)
|
||
# Category 2: video only (sort last, by height desc)
|
||
if vcodec != "none" and acodec != "none":
|
||
category = 0
|
||
return (category, -height, -tbr)
|
||
elif acodec != "none" and vcodec == "none":
|
||
category = 1
|
||
return (category, -tbr) # Sort by bitrate descending
|
||
else: # Video only
|
||
category = 2
|
||
return (category, -height, -tbr) # Sort by height descending, then bitrate
|
||
|
||
return sorted(filtered, key=format_sort_key)
|
||
|
||
|
||
def _compute_file_hash(file_path: Path) -> Optional[str]:
|
||
"""Compute SHA256 hash of file."""
|
||
try:
|
||
return sha256_file(file_path)
|
||
except Exception:
|
||
return None
|
||
|
||
|
||
|
||
|
||
|
||
# ============================================================================
|
||
# Main Cmdlet Function
|
||
# ============================================================================
|
||
|
||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results: bool = True) -> int:
|
||
"""Download data from URLs with advanced options.
|
||
|
||
Accepts:
|
||
- Single URL as string
|
||
- Result object with 'url' or 'file_path' field
|
||
- List of results
|
||
- File containing URLs (one per line)
|
||
|
||
Returns:
|
||
Exit code (0 for success, 1 for failure)
|
||
"""
|
||
|
||
debug("Starting download-data")
|
||
|
||
collected_results: List[Dict[str, Any]] = []
|
||
|
||
def _emit(obj: Any) -> None:
|
||
"""Internal helper to collect and optionally emit results."""
|
||
collected_results.append(obj)
|
||
if emit_results:
|
||
pipeline_context.emit(obj)
|
||
|
||
# Track pipeline mode once so playlist handling can respect current run scope
|
||
stage_ctx = pipeline_context.get_stage_context()
|
||
in_pipeline = stage_ctx is not None and getattr(stage_ctx, 'total_stages', 1) > 1
|
||
|
||
# ========================================================================
|
||
# ARGUMENT PARSING
|
||
# ========================================================================
|
||
|
||
# Parse arguments using shared parser
|
||
parsed = parse_cmdlet_args(args, CMDLET)
|
||
|
||
audio_mode = parsed.get("audio", False)
|
||
format_selector = parsed.get("format")
|
||
list_formats_mode = parsed.get("list-formats", False)
|
||
|
||
clip_spec = parsed.get("clip")
|
||
clip_range = None
|
||
if clip_spec:
|
||
clip_range = _parse_time_range(clip_spec)
|
||
if clip_range:
|
||
debug(f"Clip range: {clip_spec} ({clip_range[0]}-{clip_range[1]} seconds)")
|
||
else:
|
||
log(f"Invalid clip format: {clip_spec}", file=sys.stderr)
|
||
return 1
|
||
|
||
cookies_path = parsed.get("cookies")
|
||
storage_location = parsed.get("storage")
|
||
|
||
torrent_mode = parsed.get("torrent", False)
|
||
wait_timeout = float(parsed.get("wait", 1800))
|
||
|
||
# Collect URLs from positional args and -url flag
|
||
# Both map to "url" in parsed result
|
||
urls_to_download = []
|
||
raw_urls = parsed.get("url", [])
|
||
if isinstance(raw_urls, str):
|
||
raw_urls = [raw_urls]
|
||
|
||
for arg in raw_urls:
|
||
if arg.lower().startswith(('http://', 'https://')):
|
||
# Check if it's a .torrent URL or file first
|
||
if '.torrent' in arg.lower():
|
||
debug(f"Processing torrent URL: {arg}")
|
||
magnet = _process_torrent_input(arg)
|
||
if magnet and magnet.lower().startswith('magnet:'):
|
||
urls_to_download.append(magnet)
|
||
debug(f"✓ Converted to magnet: {magnet[:70]}...")
|
||
elif magnet:
|
||
urls_to_download.append(magnet)
|
||
else:
|
||
log(f"✗ Failed to process torrent: {arg}", file=sys.stderr)
|
||
else:
|
||
urls_to_download.append(arg)
|
||
elif torrent_mode and (arg.lower().startswith('magnet:') or len(arg) == 40 or len(arg) == 64):
|
||
# In torrent mode, accept magnet links or torrent hashes (40-char SHA1 or 64-char SHA256)
|
||
urls_to_download.append(arg)
|
||
debug(f"Torrent/magnet added: {arg[:50]}...")
|
||
elif _is_torrent_file_or_url(arg):
|
||
# Handle .torrent files and URLs
|
||
log(f"Processing torrent file/URL: {arg}", flush=True)
|
||
magnet = _process_torrent_input(arg)
|
||
if magnet and magnet.lower().startswith('magnet:'):
|
||
urls_to_download.append(magnet)
|
||
log(f"✓ Converted to magnet: {magnet[:70]}...", flush=True)
|
||
elif magnet:
|
||
urls_to_download.append(magnet)
|
||
else:
|
||
log(f"✗ Failed to process torrent: {arg}", file=sys.stderr)
|
||
else:
|
||
# Treat as URL if it looks like one
|
||
if arg.lower().startswith(('magnet:', 'ftp://')):
|
||
urls_to_download.append(arg)
|
||
else:
|
||
# Check if it's a file containing URLs
|
||
path = Path(arg)
|
||
if path.exists() and path.is_file():
|
||
try:
|
||
with open(arg, 'r') as f:
|
||
for line in f:
|
||
line = line.strip()
|
||
if line and line.lower().startswith(('http://', 'https://')):
|
||
urls_to_download.append(line)
|
||
log(f"Loaded URLs from file: {arg}", flush=True)
|
||
except Exception as e:
|
||
log(f"Error reading file {arg}: {e}", file=sys.stderr)
|
||
else:
|
||
log(f"Ignored argument: {arg}", file=sys.stderr)
|
||
|
||
# Item selection (for playlists/formats)
|
||
# Note: -item flag is deprecated in favor of @N pipeline selection, but kept for compatibility
|
||
playlist_items = parsed.get("item")
|
||
if playlist_items:
|
||
log(f"Item selection: {playlist_items}", flush=True)
|
||
|
||
|
||
|
||
|
||
def _is_openlibrary_downloadable(ebook_access_val: Any, status_val: Any) -> bool:
|
||
access = str(ebook_access_val or "").strip().lower()
|
||
status = str(status_val or "").strip().lower()
|
||
if status == "download":
|
||
return True
|
||
if access in {"borrowable", "public", "full", "open"} or access.startswith("full "):
|
||
return True
|
||
if "✓" in str(status_val or ""):
|
||
return True
|
||
return False
|
||
|
||
# ========================================================================
|
||
# INPUT PROCESSING - Extract URLs from pipeline or arguments
|
||
# ========================================================================
|
||
|
||
# Initialize worker tracking for downloads
|
||
import uuid
|
||
from helper.local_library import LocalLibraryDB
|
||
from config import get_local_storage_path
|
||
|
||
worker_id = str(uuid.uuid4())
|
||
library_root = get_local_storage_path(config or {})
|
||
db = None
|
||
if library_root:
|
||
try:
|
||
db = LocalLibraryDB(library_root)
|
||
db.insert_worker(
|
||
worker_id,
|
||
"download",
|
||
title="Download Data",
|
||
description="Downloading files from search results",
|
||
pipe=pipeline_context.get_current_command_text()
|
||
)
|
||
except Exception as e:
|
||
log(f"⚠ Worker tracking unavailable: {e}", file=sys.stderr)
|
||
|
||
piped_results = normalize_result_input(result)
|
||
|
||
# Track files downloaded directly (e.g. Soulseek) to avoid "No URLs" error
|
||
files_downloaded_directly = 0
|
||
|
||
# Only process piped results if no URLs were provided in arguments
|
||
# This prevents picking up residue from previous commands when running standalone
|
||
if piped_results and not urls_to_download:
|
||
for item in piped_results:
|
||
url = None
|
||
origin = None
|
||
|
||
# ====== CHECK FOR PLAYLIST ITEM MARKER FROM add-file ======
|
||
# When add-file detects a playlist item and wants to download it
|
||
if isinstance(item, dict) and item.get('__playlist_url'):
|
||
playlist_url = item.get('__playlist_url')
|
||
item_num = item.get('__playlist_item', 1)
|
||
log(f"📍 Playlist item from add-file: #{item_num}", flush=True)
|
||
# Add to download list with marker
|
||
urls_to_download.append({
|
||
'__playlist_url': playlist_url,
|
||
'__playlist_item': int(item_num)
|
||
})
|
||
continue
|
||
|
||
# ====== CHECK FOR PLAYLIST ITEM SELECTION FIRST ======
|
||
# When user selects @12 from a playlist, item is emitted dict with __action: "playlist-item:12"
|
||
if isinstance(item, dict) and '__action' in item and item['__action'].startswith('playlist-item:'):
|
||
playlist_url = item.get('__file_path')
|
||
playlist_action = item['__action'] # e.g., "playlist-item:12"
|
||
item_num = playlist_action.split(':')[1] # Extract item number (1-based)
|
||
|
||
if playlist_url:
|
||
# Playlist item selected - need to download this specific track
|
||
log(f"📍 Playlist item selected: #{item_num} - {item.get('title', 'Unknown')}", flush=True)
|
||
# Add to download list - the playlist will be probed and item extracted
|
||
# Store with special marker so we know which item to select
|
||
urls_to_download.append({
|
||
'__playlist_url': playlist_url,
|
||
'__playlist_item': int(item_num)
|
||
})
|
||
continue
|
||
|
||
# ====== CHECK FOR FORMAT SELECTION RESULT ======
|
||
if isinstance(item, dict) and item.get('format_id') is not None and item.get('source_url'):
|
||
log(f"🎬 Format selected from pipe: {item.get('format_id')}", flush=True)
|
||
log(f" Source URL: {item.get('source_url')}", flush=True)
|
||
# Store as dict so we can extract format_id + source_url during download
|
||
urls_to_download.append(item)
|
||
continue
|
||
elif hasattr(item, 'format_id') and hasattr(item, 'source_url') and item.format_id is not None:
|
||
log(f"🎬 Format selected from pipe: {item.format_id}", flush=True)
|
||
log(f" Source URL: {item.source_url}", flush=True)
|
||
urls_to_download.append({
|
||
'format_id': item.format_id,
|
||
'source_url': item.source_url,
|
||
})
|
||
continue
|
||
|
||
if isinstance(item, dict):
|
||
# Check for search provider results first
|
||
origin = item.get('origin')
|
||
if origin in {'openlibrary', 'libgen', 'soulseek', 'debrid'}:
|
||
# Handle search provider results
|
||
title = item.get('title', 'Item')
|
||
if origin == 'openlibrary':
|
||
# OpenLibrary: First check if lendable/downloadable via Archive.org
|
||
# Only route to LibGen if NOT available on Archive.org
|
||
metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {}
|
||
isbn = metadata.get('isbn') or item.get('isbn')
|
||
olid = metadata.get('olid') or item.get('olid')
|
||
|
||
log(f"[search-result] OpenLibrary: '{title}'", flush=True)
|
||
if isbn:
|
||
log(f" ISBN: {isbn}", flush=True)
|
||
|
||
# Check if book is borrowable from ebook_access field or status
|
||
ebook_access = metadata.get('ebook_access') or item.get('ebook_access', '')
|
||
status_text = metadata.get('status') or item.get('status', '')
|
||
archive_id = metadata.get('archive_id') or item.get('archive_id')
|
||
|
||
# Determine if borrowable based on new status vocabulary
|
||
is_borrowable = _is_openlibrary_downloadable(ebook_access, status_text)
|
||
|
||
if is_borrowable:
|
||
log(f" ✓ Available for borrowing on Archive.org", flush=True)
|
||
log(f" → Queued for auto-borrowing...", flush=True)
|
||
# Queue borrow request as special dict object
|
||
# We need OCAID (Archive.org ID), not just numeric OLID
|
||
ocaid = archive_id
|
||
|
||
if not ocaid and isbn:
|
||
# If no OCAID in metadata, fetch it from OpenLibrary ISBN lookup
|
||
try:
|
||
import requests
|
||
ol_url = f'https://openlibrary.org/isbn/{isbn}.json'
|
||
r = requests.get(ol_url, timeout=5)
|
||
if r.status_code == 200:
|
||
ol_data = r.json()
|
||
ocaid = ol_data.get('ocaid')
|
||
except Exception as e:
|
||
log(f" ⚠ Could not fetch OCAID from OpenLibrary: {e}", file=sys.stderr)
|
||
|
||
if ocaid:
|
||
urls_to_download.append({
|
||
'__borrow_request__': True,
|
||
'book_id': ocaid,
|
||
'isbn': isbn,
|
||
'title': title,
|
||
'olid': olid
|
||
})
|
||
else:
|
||
# OCAID not found - book claims borrowable but not on Archive.org
|
||
# Fall back to LibGen search instead
|
||
log(f" ⚠ Book marked borrowable but not found on Archive.org", file=sys.stderr)
|
||
if isbn:
|
||
try:
|
||
from helper.search_provider import get_provider
|
||
libgen_provider = get_provider("libgen", config)
|
||
if libgen_provider:
|
||
libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1)
|
||
if libgen_results:
|
||
libgen_result = libgen_results[0]
|
||
url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None)
|
||
if url:
|
||
urls_to_download.append(url)
|
||
log(f" ✓ Found on LibGen instead", flush=True)
|
||
else:
|
||
log(f" ⚠ Not found on LibGen", file=sys.stderr)
|
||
else:
|
||
log(f" ⚠ Not found on LibGen", file=sys.stderr)
|
||
else:
|
||
log(f" ⚠ LibGen provider not available", file=sys.stderr)
|
||
except Exception as e:
|
||
log(f" ✗ Error searching LibGen: {e}", file=sys.stderr)
|
||
else:
|
||
# Book is NOT borrowable - route to LibGen
|
||
if isbn:
|
||
log(f" ⚠ Not available on Archive.org - attempting LibGen...", flush=True)
|
||
try:
|
||
from helper.search_provider import get_provider
|
||
libgen_provider = get_provider("libgen", config)
|
||
if libgen_provider:
|
||
libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1)
|
||
if libgen_results:
|
||
libgen_result = libgen_results[0]
|
||
url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None)
|
||
if url:
|
||
urls_to_download.append(url)
|
||
log(f" ✓ Found on LibGen", flush=True)
|
||
else:
|
||
log(f" ⚠ Not found on LibGen", file=sys.stderr)
|
||
else:
|
||
log(f" ⚠ Not found on LibGen", flush=True)
|
||
log(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
|
||
else:
|
||
log(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
|
||
except Exception as e:
|
||
log(f" ⚠ Could not search LibGen: {e}", file=sys.stderr)
|
||
log(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
|
||
else:
|
||
log(f" ⚠ ISBN not available", flush=True)
|
||
log(f" ▶ Visit: {item.get('target', 'https://openlibrary.org')}", flush=True)
|
||
log(f" ▶ Or find ISBN and use: search-file -provider libgen 'isbn:\"<ISBN>\"'", flush=True)
|
||
elif origin == 'soulseek':
|
||
# Handle Soulseek downloads using the provider
|
||
metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {}
|
||
username = metadata.get('username')
|
||
filename = metadata.get('filename')
|
||
size = item.get('size_bytes') or 0
|
||
|
||
if username and filename:
|
||
try:
|
||
import asyncio
|
||
from helper.search_provider import SoulSeekProvider
|
||
provider = SoulSeekProvider(config)
|
||
log(f"[search-result] Soulseek: '{title}'", flush=True)
|
||
log(f" ▶ Downloading from {username}...", flush=True)
|
||
|
||
if db:
|
||
db.append_worker_stdout(worker_id, f"Downloading from Soulseek: {title} (from {username})")
|
||
|
||
# Get temp directory from config
|
||
temp_dir = config.get('temp')
|
||
if temp_dir:
|
||
temp_dir = str(Path(temp_dir).expanduser())
|
||
|
||
# Call async download_file with asyncio.run()
|
||
success = asyncio.run(provider.download_file(
|
||
username=username,
|
||
filename=filename,
|
||
file_size=size,
|
||
target_dir=temp_dir
|
||
))
|
||
|
||
if success:
|
||
downloaded_file = Path(provider.DOWNLOAD_DIR) / Path(filename).name
|
||
if downloaded_file.exists():
|
||
log(f" ✓ Downloaded: {downloaded_file.name}", flush=True)
|
||
files_downloaded_directly += 1
|
||
if db:
|
||
db.append_worker_stdout(worker_id, f"✓ Downloaded: {downloaded_file.name}")
|
||
if pipeline_context._PIPE_ACTIVE:
|
||
# Create proper PipeObject result
|
||
result_dict = create_pipe_object_result(
|
||
source='soulseek',
|
||
identifier=filename,
|
||
file_path=str(downloaded_file),
|
||
cmdlet_name='download-data',
|
||
title=title,
|
||
target=str(downloaded_file), # Explicit target for add-file
|
||
extra={
|
||
"metadata": metadata,
|
||
"origin": "soulseek"
|
||
}
|
||
)
|
||
pipeline_context.emit(result_dict)
|
||
else:
|
||
log(f" ✗ Download failed (peer may be offline)", file=sys.stderr)
|
||
if db:
|
||
db.append_worker_stdout(worker_id, f"✗ Download failed for {title}")
|
||
log(f" ▶ Try another result: search-file -provider soulseek \"...\" | @2 | download-data", flush=True)
|
||
except Exception as e:
|
||
log(f" ✗ Download error: {e}", file=sys.stderr)
|
||
if db:
|
||
db.append_worker_stdout(worker_id, f"✗ Error: {e}")
|
||
log(f" ▶ Alternative: search-soulseek -download \"{title}\" -storage <location>", flush=True)
|
||
else:
|
||
log(f"[search-result] Soulseek: '{title}'", flush=True)
|
||
log(f" ⚠ Missing download info (username/filename)", flush=True)
|
||
if db:
|
||
db.append_worker_stdout(worker_id, f"⚠ Missing download info for {title}")
|
||
elif origin == 'libgen':
|
||
# LibGen results can use the direct URL
|
||
# Also extract mirrors dict for fallback if primary fails
|
||
url = item.get('target')
|
||
# Extract mirrors and book_id from full_metadata
|
||
metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {}
|
||
mirrors = metadata.get('mirrors', {})
|
||
book_id = metadata.get('book_id', '')
|
||
|
||
if url:
|
||
url_entry = {
|
||
'url': str(url),
|
||
'mirrors': mirrors, # Alternative mirrors for fallback
|
||
'book_id': book_id,
|
||
}
|
||
urls_to_download.append(url_entry)
|
||
log(f"[search-result] LibGen: '{title}'", flush=True)
|
||
log(f" ✓ Queued for download", flush=True)
|
||
if mirrors:
|
||
log(f" Mirrors available: {len(mirrors)}", flush=True)
|
||
elif origin == 'debrid':
|
||
# Debrid results can use download-data
|
||
url = item.get('target')
|
||
if url:
|
||
urls_to_download.append(str(url))
|
||
log(f"[search-result] Debrid: '{title}'", flush=True)
|
||
log(f" ✓ Queued for download", flush=True)
|
||
else:
|
||
# Regular fields for non-search results
|
||
url = item.get('url') or item.get('link') or item.get('href') or item.get('target')
|
||
else:
|
||
# Object attributes
|
||
origin = getattr(item, 'origin', None)
|
||
title = getattr(item, 'title', 'Item')
|
||
if origin in {'openlibrary', 'libgen', 'soulseek', 'debrid'}:
|
||
# Handle search provider results
|
||
if origin == 'openlibrary':
|
||
# OpenLibrary: First check if lendable/downloadable via Archive.org
|
||
# Only route to LibGen if NOT available on Archive.org
|
||
metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {}
|
||
isbn = metadata.get('isbn') or getattr(item, 'isbn', None)
|
||
olid = metadata.get('olid') or getattr(item, 'olid', None)
|
||
|
||
log(f"[search-result] OpenLibrary: '{title}'", flush=True)
|
||
if isbn:
|
||
log(f" ISBN: {isbn}", flush=True)
|
||
|
||
# Check if book is borrowable from ebook_access field or status
|
||
ebook_access = metadata.get('ebook_access') or getattr(item, 'ebook_access', '')
|
||
status_text = metadata.get('status') or getattr(item, 'status', '')
|
||
archive_id = metadata.get('archive_id') or getattr(item, 'archive_id', '')
|
||
|
||
# Determine if borrowable using unified helper
|
||
is_borrowable = _is_openlibrary_downloadable(ebook_access, status_text)
|
||
|
||
if is_borrowable:
|
||
# Book IS borrowable on Archive.org
|
||
log(f" ✓ Available for borrowing on Archive.org", flush=True)
|
||
log(f" → Queued for auto-borrowing...", flush=True)
|
||
# Queue borrow request as special dict object
|
||
ocaid = archive_id
|
||
if not ocaid and isbn:
|
||
try:
|
||
import requests
|
||
ol_url = f'https://openlibrary.org/isbn/{isbn}.json'
|
||
r = requests.get(ol_url, timeout=5)
|
||
if r.status_code == 200:
|
||
ol_data = r.json()
|
||
ocaid = ol_data.get('ocaid')
|
||
except Exception as e:
|
||
log(f" ⚠ Could not fetch OCAID from OpenLibrary: {e}", file=sys.stderr)
|
||
if ocaid:
|
||
urls_to_download.append({
|
||
'__borrow_request__': True,
|
||
'book_id': ocaid,
|
||
'isbn': isbn,
|
||
'title': title,
|
||
'olid': olid or getattr(item, 'openlibrary_id', '')
|
||
})
|
||
else:
|
||
# OCAID not found - book claims borrowable but not on Archive.org
|
||
# Fall back to LibGen search instead
|
||
log(f" ⚠ No Archive.org ID found - attempting LibGen instead...", file=sys.stderr)
|
||
if isbn:
|
||
try:
|
||
from helper.search_provider import get_provider
|
||
libgen_provider = get_provider("libgen", config)
|
||
if libgen_provider:
|
||
libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1)
|
||
if libgen_results:
|
||
libgen_result = libgen_results[0]
|
||
url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None)
|
||
if url:
|
||
urls_to_download.append(url)
|
||
log(f" ✓ Found on LibGen instead", flush=True)
|
||
else:
|
||
log(f" ⚠ Not found on LibGen", file=sys.stderr)
|
||
else:
|
||
log(f" ⚠ Not found on LibGen", file=sys.stderr)
|
||
else:
|
||
log(f" ⚠ LibGen provider not available", file=sys.stderr)
|
||
except Exception as e:
|
||
log(f" ✗ Error searching LibGen: {e}", file=sys.stderr)
|
||
else:
|
||
log(f" ⚠ ISBN not available for LibGen fallback", file=sys.stderr)
|
||
else:
|
||
# Book is NOT borrowable - route to LibGen
|
||
if isbn:
|
||
log(f" ⚠ Not available on Archive.org - attempting LibGen...", flush=True)
|
||
try:
|
||
from helper.search_provider import get_provider
|
||
libgen_provider = get_provider("libgen", config)
|
||
if libgen_provider:
|
||
libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1)
|
||
if libgen_results:
|
||
libgen_result = libgen_results[0]
|
||
url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None)
|
||
if url:
|
||
urls_to_download.append(url)
|
||
log(f" ✓ Found on LibGen", flush=True)
|
||
else:
|
||
log(f" ⚠ Not found on LibGen", file=sys.stderr)
|
||
else:
|
||
log(f" ⚠ Not found on LibGen", flush=True)
|
||
log(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
|
||
else:
|
||
log(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
|
||
except Exception as e:
|
||
log(f" ⚠ Could not search LibGen: {e}", file=sys.stderr)
|
||
log(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
|
||
else:
|
||
log(f" ⚠ ISBN not available", flush=True)
|
||
log(f" ▶ Visit: {getattr(item, 'target', 'https://openlibrary.org')}", flush=True)
|
||
log(f" ▶ Or find ISBN and use: search-file -provider libgen 'isbn:\"<ISBN>\"'", flush=True)
|
||
elif origin == 'soulseek':
|
||
# Handle Soulseek downloads using the provider
|
||
metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {}
|
||
username = metadata.get('username')
|
||
filename = metadata.get('filename')
|
||
size = getattr(item, 'size_bytes', 0) or 0
|
||
|
||
if username and filename:
|
||
try:
|
||
import asyncio
|
||
from helper.search_provider import SoulSeekProvider
|
||
provider = SoulSeekProvider(config)
|
||
log(f"[search-result] Soulseek: '{title}'", flush=True)
|
||
log(f" ▶ Downloading from {username}...", flush=True)
|
||
|
||
if db:
|
||
db.append_worker_stdout(worker_id, f"Downloading from Soulseek: {title} (from {username})")
|
||
|
||
# Get temp directory from config
|
||
temp_dir = config.get('temp')
|
||
if temp_dir:
|
||
temp_dir = str(Path(temp_dir).expanduser())
|
||
|
||
# Call async download_file with asyncio.run()
|
||
success = asyncio.run(provider.download_file(
|
||
username=username,
|
||
filename=filename,
|
||
file_size=size,
|
||
target_dir=temp_dir
|
||
))
|
||
|
||
if success:
|
||
downloaded_file = Path(provider.DOWNLOAD_DIR) / Path(filename).name
|
||
if downloaded_file.exists():
|
||
log(f" ✓ Downloaded: {downloaded_file.name}", flush=True)
|
||
files_downloaded_directly += 1
|
||
if db:
|
||
db.append_worker_stdout(worker_id, f"✓ Downloaded: {downloaded_file.name}")
|
||
if pipeline_context._PIPE_ACTIVE:
|
||
# Create proper PipeObject result
|
||
result_dict = create_pipe_object_result(
|
||
source='soulseek',
|
||
identifier=filename,
|
||
file_path=str(downloaded_file),
|
||
cmdlet_name='download-data',
|
||
title=title,
|
||
target=str(downloaded_file), # Explicit target for add-file
|
||
extra={
|
||
"metadata": metadata,
|
||
"origin": "soulseek"
|
||
}
|
||
)
|
||
pipeline_context.emit(result_dict)
|
||
else:
|
||
log(f" ✗ Download failed (peer may be offline)", file=sys.stderr)
|
||
if db:
|
||
db.append_worker_stdout(worker_id, f"✗ Download failed for {title}")
|
||
log(f" ▶ Try another result: search-file -provider soulseek \"...\" | @2 | download-data", flush=True)
|
||
except Exception as e:
|
||
log(f" ✗ Download error: {e}", file=sys.stderr)
|
||
if db:
|
||
db.append_worker_stdout(worker_id, f"✗ Error: {e}")
|
||
log(f" ▶ Alternative: search-soulseek -download \"{title}\" -storage <location>", flush=True)
|
||
else:
|
||
log(f"[search-result] Soulseek: '{title}'", flush=True)
|
||
log(f" ⚠ Missing download info (username/filename)", flush=True)
|
||
if db:
|
||
db.append_worker_stdout(worker_id, f"⚠ Missing download info for {title}")
|
||
elif origin == 'libgen':
|
||
# LibGen results with mirrors dict for fallback
|
||
url = getattr(item, 'target', None)
|
||
# Extract mirrors and book_id from full_metadata
|
||
metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {}
|
||
mirrors = metadata.get('mirrors', {})
|
||
book_id = metadata.get('book_id', '')
|
||
|
||
if url:
|
||
url_entry = {
|
||
'url': str(url),
|
||
'mirrors': mirrors, # Alternative mirrors for fallback
|
||
'book_id': book_id,
|
||
}
|
||
urls_to_download.append(url_entry)
|
||
else:
|
||
urls_to_download.append(url) if url else None
|
||
elif origin == 'debrid':
|
||
url = getattr(item, 'target', None)
|
||
else:
|
||
url = getattr(item, 'url', None) or getattr(item, 'link', None) or getattr(item, 'href', None) or getattr(item, 'target', None)
|
||
|
||
if url:
|
||
urls_to_download.append(str(url))
|
||
|
||
if not urls_to_download and files_downloaded_directly == 0:
|
||
log(f"No downloadable URLs found", file=sys.stderr)
|
||
return 1
|
||
|
||
log(f"Processing {len(urls_to_download)} URL(s)", flush=True)
|
||
for i, u in enumerate(urls_to_download, 1):
|
||
if isinstance(u, dict):
|
||
log(f" [{i}] Format: {u.get('format_id', '?')} from {u.get('source_url', '?')[:60]}...", flush=True)
|
||
else:
|
||
log(f" [{i}] URL: {str(u)[:60]}...", flush=True)
|
||
|
||
# ========================================================================
|
||
# RESOLVE OUTPUT DIRECTORY
|
||
# ========================================================================
|
||
|
||
final_output_dir = None
|
||
|
||
# Priority 1: --storage flag
|
||
if storage_location:
|
||
try:
|
||
final_output_dir = SharedArgs.resolve_storage(storage_location)
|
||
log(f"Using storage location: {storage_location} → {final_output_dir}", flush=True)
|
||
except ValueError as e:
|
||
log(str(e), file=sys.stderr)
|
||
return 1
|
||
|
||
# Priority 2: Config resolver
|
||
if final_output_dir is None and resolve_output_dir is not None:
|
||
try:
|
||
final_output_dir = resolve_output_dir(config)
|
||
log(f"Using config resolver: {final_output_dir}", flush=True)
|
||
except Exception:
|
||
pass
|
||
|
||
# Priority 4: Config outfile
|
||
if final_output_dir is None and config and config.get("outfile"):
|
||
try:
|
||
final_output_dir = Path(config["outfile"]).expanduser()
|
||
log(f"Using config outfile: {final_output_dir}", flush=True)
|
||
except Exception:
|
||
pass
|
||
|
||
# Priority 5: Default (home/Videos)
|
||
if final_output_dir is None:
|
||
final_output_dir = Path.home() / "Videos"
|
||
log(f"Using default directory: {final_output_dir}", flush=True)
|
||
|
||
# Ensure directory exists
|
||
try:
|
||
final_output_dir.mkdir(parents=True, exist_ok=True)
|
||
except Exception as e:
|
||
log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr)
|
||
return 1
|
||
|
||
# ========================================================================
|
||
# DOWNLOAD EACH URL
|
||
# ========================================================================
|
||
|
||
downloaded_files = []
|
||
playlists_displayed = 0
|
||
formats_displayed = False # NEW: Track if we showed formats
|
||
exit_code = 0
|
||
|
||
for url in urls_to_download:
|
||
try:
|
||
selected_playlist_entries: list[Dict[str, Any]] = []
|
||
playlist_existing_paths: set[str] = set()
|
||
|
||
# ====== HANDLE FORMAT SELECTION FROM PIPED RESULT ======
|
||
# If url is a dict with format_id and source_url, extract them and override format_selector
|
||
current_format_selector = format_selector
|
||
actual_url = url
|
||
if isinstance(url, dict) and url.get('format_id') and url.get('source_url'):
|
||
log(f"🎬 Format selected: {url.get('format_id')}", flush=True)
|
||
format_id = url.get('format_id')
|
||
current_format_selector = format_id
|
||
|
||
# If it's a video-only format (has vcodec but no acodec), add bestaudio
|
||
vcodec = url.get('vcodec', '')
|
||
acodec = url.get('acodec', '')
|
||
if vcodec and vcodec != "none" and (not acodec or acodec == "none"):
|
||
# Video-only format, add bestaudio automatically
|
||
current_format_selector = f"{format_id}+bestaudio"
|
||
log(f" ℹ️ Video-only format detected, automatically adding bestaudio", flush=True)
|
||
|
||
actual_url = url.get('source_url')
|
||
url = actual_url # Use the actual URL for further processing
|
||
|
||
# ====== AUTO-BORROW MODE - INTERCEPT SPECIAL BORROW REQUEST DICTS ======
|
||
if isinstance(url, dict) and url.get('__borrow_request__'):
|
||
try:
|
||
from helper.archive_client import credential_openlibrary, loan, get_book_infos, download
|
||
import tempfile
|
||
import shutil
|
||
|
||
book_id = url.get('book_id')
|
||
if not book_id:
|
||
log(f" ✗ Missing book ID for borrowing", file=sys.stderr)
|
||
exit_code = 1
|
||
continue
|
||
|
||
title_val = url.get('title', 'Unknown Book')
|
||
book_id_str = str(book_id)
|
||
|
||
log(f"[auto-borrow] Starting borrow for: {title_val}", flush=True)
|
||
log(f" Book ID: {book_id_str}", flush=True)
|
||
|
||
# Get Archive.org credentials
|
||
email, password = credential_openlibrary(config)
|
||
if not email or not password:
|
||
log(f" ✗ Archive.org credentials not configured", file=sys.stderr)
|
||
log(f" ▶ Set ARCHIVE_EMAIL and ARCHIVE_PASSWORD environment variables", file=sys.stderr)
|
||
exit_code = 1
|
||
continue
|
||
|
||
# Attempt to borrow and download
|
||
try:
|
||
log(f" → Logging into Archive.org...", flush=True)
|
||
from helper.archive_client import login
|
||
import requests
|
||
try:
|
||
session = login(email, password)
|
||
except requests.exceptions.Timeout:
|
||
log(f" ✗ Timeout logging into Archive.org (server not responding)", file=sys.stderr)
|
||
exit_code = 1
|
||
continue
|
||
except requests.exceptions.RequestException as e:
|
||
log(f" ✗ Error connecting to Archive.org: {e}", file=sys.stderr)
|
||
exit_code = 1
|
||
continue
|
||
|
||
log(f" → Borrowing book...", flush=True)
|
||
try:
|
||
session = loan(session, book_id_str, verbose=True)
|
||
except requests.exceptions.Timeout:
|
||
log(f" ✗ Timeout while borrowing (server not responding)", file=sys.stderr)
|
||
exit_code = 1
|
||
continue
|
||
except requests.exceptions.RequestException as e:
|
||
log(f" ✗ Error while borrowing: {e}", file=sys.stderr)
|
||
exit_code = 1
|
||
continue
|
||
|
||
log(f" → Extracting page information...", flush=True)
|
||
# Try both URL formats
|
||
book_urls = [
|
||
f"https://archive.org/borrow/{book_id_str}",
|
||
f"https://archive.org/details/{book_id_str}"
|
||
]
|
||
|
||
title = None
|
||
links = None
|
||
metadata = None
|
||
last_error = None
|
||
for book_url in book_urls:
|
||
try:
|
||
title, links, metadata = get_book_infos(session, book_url)
|
||
if title and links:
|
||
log(f" → Found {len(links)} pages", flush=True)
|
||
break
|
||
except requests.exceptions.Timeout:
|
||
last_error = "Timeout while extracting pages"
|
||
log(f" ⚠ Timeout while extracting from {book_url}", flush=True)
|
||
continue
|
||
except Exception as e:
|
||
last_error = str(e)
|
||
log(f" ⚠ Failed to extract from {book_url}: {e}", flush=True)
|
||
continue
|
||
|
||
if not links:
|
||
log(f" ✗ Could not extract book pages (Last error: {last_error})", file=sys.stderr)
|
||
exit_code = 1
|
||
continue
|
||
|
||
# Download pages
|
||
log(f" → Downloading {len(links)} pages...", flush=True)
|
||
with tempfile.TemporaryDirectory() as temp_dir:
|
||
# download(session, n_threads, directory, links, scale, book_id)
|
||
images = download(
|
||
session,
|
||
n_threads=4,
|
||
directory=temp_dir,
|
||
links=links,
|
||
scale=2,
|
||
book_id=str(book_id)
|
||
)
|
||
|
||
if not images:
|
||
log(f" ✗ No pages downloaded", file=sys.stderr)
|
||
exit_code = 1
|
||
continue
|
||
|
||
log(f" ✓ Downloaded {len(images)} pages", flush=True)
|
||
|
||
# Try to merge into PDF
|
||
try:
|
||
import img2pdf
|
||
log(f" → Merging pages into PDF...", flush=True)
|
||
|
||
filename = title if title else f"book_{book_id_str}"
|
||
filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
|
||
output_path = Path(final_output_dir) / f"{filename}.pdf"
|
||
|
||
# Make unique filename if needed
|
||
i = 1
|
||
while output_path.exists():
|
||
output_path = Path(final_output_dir) / f"{filename}({i}).pdf"
|
||
i += 1
|
||
|
||
pdf_content = img2pdf.convert(images)
|
||
if pdf_content:
|
||
with open(output_path, 'wb') as f:
|
||
f.write(pdf_content)
|
||
|
||
log(f" ✓ Successfully borrowed and saved to: {output_path}", flush=True)
|
||
downloaded_files.append(str(output_path))
|
||
|
||
# Emit result for downstream cmdlets
|
||
file_hash = _compute_file_hash(output_path)
|
||
# Build tags including ISBN if available
|
||
emit_tags = ['book', 'borrowed', 'pdf']
|
||
isbn_tag = url.get('isbn')
|
||
if isbn_tag:
|
||
emit_tags.append(f'isbn:{isbn_tag}')
|
||
olid_tag = url.get('olid')
|
||
if olid_tag:
|
||
emit_tags.append(f'olid:{olid_tag}')
|
||
|
||
# Fetch OpenLibrary metadata tags
|
||
ol_tags = fetch_openlibrary_metadata_tags(isbn=isbn_tag, olid=olid_tag)
|
||
emit_tags.extend(ol_tags)
|
||
|
||
pipe_obj = create_pipe_object_result(
|
||
source='archive.org',
|
||
identifier=book_id_str,
|
||
file_path=str(output_path),
|
||
cmdlet_name='download-data',
|
||
title=title_val,
|
||
file_hash=file_hash,
|
||
tags=emit_tags,
|
||
source_url=url.get('source_url', f'archive.org/borrow/{book_id_str}')
|
||
)
|
||
pipeline_context.emit(pipe_obj)
|
||
exit_code = 0
|
||
except ImportError:
|
||
log(f" ⚠ img2pdf not available - saving pages as collection", file=sys.stderr)
|
||
# Just copy images to output dir
|
||
filename = title if title else f"book_{book_id_str}"
|
||
filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
|
||
output_dir = Path(final_output_dir) / filename
|
||
i = 1
|
||
while output_dir.exists():
|
||
output_dir = Path(final_output_dir) / f"{filename}({i})"
|
||
i += 1
|
||
|
||
shutil.copytree(temp_dir, str(output_dir))
|
||
log(f" ✓ Successfully borrowed and saved to: {output_dir}", flush=True)
|
||
downloaded_files.append(str(output_dir))
|
||
|
||
# Emit result for downstream cmdlets
|
||
# Build tags including ISBN if available
|
||
emit_tags = ['book', 'borrowed', 'pages']
|
||
isbn_tag = url.get('isbn')
|
||
if isbn_tag:
|
||
emit_tags.append(f'isbn:{isbn_tag}')
|
||
olid_tag = url.get('olid')
|
||
if olid_tag:
|
||
emit_tags.append(f'olid:{olid_tag}')
|
||
|
||
# Fetch OpenLibrary metadata tags
|
||
ol_tags = fetch_openlibrary_metadata_tags(isbn=isbn_tag, olid=olid_tag)
|
||
emit_tags.extend(ol_tags)
|
||
|
||
pipe_obj = create_pipe_object_result(
|
||
source='archive.org',
|
||
identifier=book_id_str,
|
||
file_path=str(output_dir),
|
||
cmdlet_name='download-data',
|
||
title=title_val,
|
||
tags=emit_tags,
|
||
source_url=url.get('source_url', f'archive.org/borrow/{book_id_str}')
|
||
)
|
||
pipeline_context.emit(pipe_obj)
|
||
exit_code = 0
|
||
|
||
except Exception as e:
|
||
log(f" ✗ Borrow/download failed: {e}", file=sys.stderr)
|
||
import traceback
|
||
traceback.print_exc()
|
||
exit_code = 1
|
||
|
||
continue # Skip normal URL handling
|
||
|
||
except ImportError as e:
|
||
log(f" ✗ Archive.org tools not available: {e}", file=sys.stderr)
|
||
exit_code = 1
|
||
continue
|
||
except Exception as e:
|
||
log(f" ✗ Auto-borrow error: {e}", file=sys.stderr)
|
||
import traceback
|
||
traceback.print_exc()
|
||
exit_code = 1
|
||
continue
|
||
|
||
|
||
# ====== LIBGEN MIRROR FALLBACK MODE ======
|
||
# Handle libgen results with mirrors dict for fallback on failure
|
||
if isinstance(url, dict) and 'mirrors' in url:
|
||
try:
|
||
primary_url = url.get('url')
|
||
mirrors_dict = url.get('mirrors', {})
|
||
book_id = url.get('book_id', '')
|
||
|
||
if not primary_url:
|
||
log(f"Skipping libgen entry: no primary URL", file=sys.stderr)
|
||
exit_code = 1
|
||
continue
|
||
|
||
# Build list of mirrors to try: primary first, then alternatives
|
||
mirrors_to_try = [primary_url]
|
||
mirrors_to_try.extend(mirrors_dict.values())
|
||
|
||
# Remove duplicates while preserving order
|
||
mirrors_to_try = list(dict.fromkeys(mirrors_to_try))
|
||
|
||
log(f"🔄 LibGen download with mirror fallback (book_id: {book_id})", flush=True)
|
||
log(f" Primary: {primary_url[:80]}...", flush=True)
|
||
|
||
if len(mirrors_to_try) > 1:
|
||
log(f" {len(mirrors_to_try) - 1} alternative mirror(s) available", flush=True)
|
||
|
||
# Resolve cookies path
|
||
final_cookies_path_libgen = None
|
||
if cookies_path:
|
||
if resolve_cookies_path:
|
||
try:
|
||
final_cookies_path_libgen = resolve_cookies_path(config, Path(cookies_path))
|
||
except Exception:
|
||
final_cookies_path_libgen = Path(cookies_path).expanduser() if cookies_path else None
|
||
else:
|
||
final_cookies_path_libgen = Path(cookies_path).expanduser()
|
||
|
||
download_succeeded = False
|
||
last_error = None
|
||
successful_mirror = None
|
||
|
||
# Try each mirror in sequence using libgen_service's native download
|
||
for mirror_idx, mirror_url in enumerate(mirrors_to_try, 1):
|
||
try:
|
||
if mirror_idx > 1:
|
||
log(f" → Trying mirror #{mirror_idx}: {mirror_url[:80]}...", flush=True)
|
||
|
||
# Use libgen_service's download_from_mirror for proper libgen handling
|
||
from helper.libgen_service import download_from_mirror
|
||
|
||
# Generate filename from book_id and title
|
||
safe_title = "".join(c for c in str(title or "book") if c.isalnum() or c in (' ', '.', '-'))[:100]
|
||
file_path = final_output_dir / f"{safe_title}_{book_id}.pdf"
|
||
|
||
# Attempt download using libgen's native function
|
||
success = download_from_mirror(
|
||
mirror_url=mirror_url,
|
||
output_path=file_path,
|
||
log_info=lambda msg: log(f" {msg}", flush=True),
|
||
log_error=lambda msg: log(f" ⚠ {msg}", file=sys.stderr)
|
||
)
|
||
|
||
if success and file_path.exists():
|
||
log(f" ✓ Downloaded successfully from mirror #{mirror_idx}", flush=True)
|
||
successful_mirror = mirror_url
|
||
download_succeeded = True
|
||
|
||
# Emit result for downstream cmdlets
|
||
file_hash = _compute_file_hash(file_path)
|
||
emit_tags = ['libgen', 'book']
|
||
|
||
pipe_obj = create_pipe_object_result(
|
||
source='libgen',
|
||
identifier=book_id,
|
||
file_path=str(file_path),
|
||
cmdlet_name='download-data',
|
||
file_hash=file_hash,
|
||
tags=emit_tags,
|
||
source_url=successful_mirror
|
||
)
|
||
pipeline_context.emit(pipe_obj)
|
||
downloaded_files.append(str(file_path))
|
||
exit_code = 0
|
||
break # Success, stop trying mirrors
|
||
|
||
except Exception as e:
|
||
last_error = str(e)
|
||
if mirror_idx == 1:
|
||
log(f" ⚠ Primary mirror failed: {e}", flush=True)
|
||
else:
|
||
log(f" ⚠ Mirror #{mirror_idx} failed: {e}", flush=True)
|
||
|
||
if not download_succeeded:
|
||
log(f" ✗ All mirrors failed. Last error: {last_error}", file=sys.stderr)
|
||
if "getaddrinfo failed" in str(last_error) or "NameResolutionError" in str(last_error) or "Failed to resolve" in str(last_error):
|
||
log(f" ⚠ Network issue detected: Cannot resolve LibGen mirror hostnames", file=sys.stderr)
|
||
log(f" ▶ Check your network connection or try with a VPN/proxy", file=sys.stderr)
|
||
exit_code = 1
|
||
|
||
continue # Skip to next URL
|
||
|
||
except Exception as e:
|
||
log(f" ✗ LibGen mirror fallback error: {e}", file=sys.stderr)
|
||
import traceback
|
||
traceback.print_exc(file=sys.stderr)
|
||
exit_code = 1
|
||
continue
|
||
|
||
# Ensure URL is a string for normal handling
|
||
if not isinstance(url, str):
|
||
# Check if it's a playlist item marker
|
||
if isinstance(url, dict) and url.get('__playlist_url'):
|
||
playlist_url = url.get('__playlist_url')
|
||
item_num = url.get('__playlist_item', 1)
|
||
log(f"📍 Handling selected playlist item #{item_num}", flush=True)
|
||
# Convert to actual URL and set playlist_items to download only this item
|
||
url = playlist_url
|
||
playlist_items = str(item_num)
|
||
# Fall through to normal handling below
|
||
else:
|
||
log(f"Skipping invalid URL entry: {url}", file=sys.stderr)
|
||
continue
|
||
|
||
log(f"Probing URL: {url}", flush=True)
|
||
|
||
# ====== TORRENT MODE - INTERCEPT BEFORE NORMAL DOWNLOAD ======
|
||
if torrent_mode or url.lower().startswith('magnet:'):
|
||
log(f"🧲 Torrent/magnet mode - spawning background worker...", flush=True)
|
||
|
||
try:
|
||
# Get API key from config
|
||
from config import get_debrid_api_key
|
||
api_key = get_debrid_api_key(config)
|
||
|
||
if not api_key:
|
||
log(f"✗ AllDebrid API key not found in config", file=sys.stderr)
|
||
exit_code = 1
|
||
continue
|
||
|
||
# Create a unique worker ID
|
||
worker_id = f"torrent_{uuid.uuid4().hex[:8]}"
|
||
|
||
# Get worker manager if available from config
|
||
worker_manager = config.get('_worker_manager')
|
||
|
||
# Create worker in manager if available
|
||
if worker_manager:
|
||
try:
|
||
worker_manager.track_worker(
|
||
worker_id,
|
||
worker_type="download_torrent",
|
||
title=f"Download: {url[:60]}...",
|
||
description=f"Torrent/magnet download via AllDebrid",
|
||
pipe=pipeline_context.get_current_command_text()
|
||
)
|
||
log(f"✓ Worker created (ID: {worker_id})", flush=True)
|
||
except Exception as e:
|
||
log(f"⚠ Failed to create worker: {e}", file=sys.stderr)
|
||
worker_manager = None
|
||
|
||
# Spawn background thread to handle the download
|
||
worker_thread = threading.Thread(
|
||
target=_download_torrent_worker,
|
||
args=(
|
||
worker_id,
|
||
url,
|
||
final_output_dir,
|
||
config,
|
||
api_key,
|
||
playlist_items,
|
||
audio_mode,
|
||
wait_timeout,
|
||
worker_manager,
|
||
),
|
||
daemon=False,
|
||
name=f"TorrentWorker_{worker_id}"
|
||
)
|
||
|
||
worker_thread.start()
|
||
log(f"✓ Background worker started (ID: {worker_id})", flush=True)
|
||
|
||
# Emit worker info so user can track it
|
||
worker_info = {
|
||
'worker_id': worker_id,
|
||
'worker_type': 'download_torrent',
|
||
'source_url': url,
|
||
'status': 'running',
|
||
'message': 'Downloading in background...'
|
||
}
|
||
pipeline_context.emit(worker_info)
|
||
|
||
continue
|
||
|
||
except ImportError:
|
||
log(f"✗ AllDebrid client not available", file=sys.stderr)
|
||
exit_code = 1
|
||
except Exception as e:
|
||
# Catches AllDebridError and other exceptions
|
||
log(f"✗ Failed to spawn torrent worker: {e}", file=sys.stderr)
|
||
import traceback
|
||
traceback.print_exc(file=sys.stderr)
|
||
exit_code = 1
|
||
|
||
continue # Skip to next URL
|
||
|
||
# ====== NORMAL DOWNLOAD MODE (HTTP/HTTPS) ======
|
||
|
||
# First, probe the URL to detect playlists and get info
|
||
# For YouTube URLs, ignore playlists and only probe the single video
|
||
is_youtube_url = isinstance(url, str) and ('youtube.com' in url or 'youtu.be' in url)
|
||
probe_info = probe_url(url, no_playlist=is_youtube_url)
|
||
is_actual_playlist = False # Track if we have a real multi-item playlist
|
||
|
||
if probe_info:
|
||
log(f"✓ Probed: {probe_info.get('title', url)} ({probe_info.get('extractor', 'unknown')})")
|
||
|
||
# If it's a playlist, show the result table and skip download for now
|
||
entries = probe_info.get("entries", [])
|
||
if entries and not playlist_items:
|
||
is_actual_playlist = True # We have a real playlist with multiple items
|
||
# Playlist detected but NO selection provided
|
||
# Always show table for user to select items
|
||
log(f"📋 Found playlist with {len(entries)} items")
|
||
_show_playlist_table(url, probe_info)
|
||
log(f"ℹ️ Playlist displayed. To select items, use @* or @1,3,5-8 syntax after piping results")
|
||
playlists_displayed += 1
|
||
continue # Skip to next URL - don't download playlist without selection
|
||
elif entries and playlist_items:
|
||
is_actual_playlist = True # We have a real playlist with item selection
|
||
# Playlist detected WITH selection - will download below
|
||
# Expand wildcard if present
|
||
expanded_items = _expand_playlist_selection(playlist_items, len(entries))
|
||
playlist_items = expanded_items
|
||
selected_playlist_entries = _select_playlist_entries(entries, playlist_items)
|
||
log(f"📋 Found playlist with {len(entries)} items - downloading selected: {playlist_items}")
|
||
else:
|
||
log(f"Single item: {probe_info.get('title', 'Unknown')}")
|
||
|
||
# ====== FORMAT LISTING MODE ======
|
||
if list_formats_mode and isinstance(url, str) and url.startswith(('http://', 'https://')):
|
||
log(f"Fetching formats for: {url}", flush=True)
|
||
from helper.download import list_formats
|
||
from result_table import ResultTable
|
||
|
||
all_formats = list_formats(url, no_playlist=is_youtube_url, playlist_items=playlist_items)
|
||
if all_formats:
|
||
# Filter and sort formats for better user experience
|
||
formats = _filter_and_sort_formats(all_formats)
|
||
|
||
# Create result table for format display
|
||
table = ResultTable(title=f"Available Formats - {probe_info.get('title', 'Unknown')}")
|
||
|
||
for fmt in formats:
|
||
row = table.add_row()
|
||
row.add_column("Format ID", fmt.get("format_id", ""))
|
||
|
||
# Build resolution/bitrate string
|
||
vcodec = fmt.get("vcodec", "")
|
||
acodec = fmt.get("acodec", "")
|
||
height = fmt.get("height")
|
||
tbr = fmt.get("tbr")
|
||
|
||
if vcodec != "none" and acodec != "none":
|
||
# Video + audio
|
||
res_str = fmt.get("resolution", "")
|
||
elif acodec != "none" and vcodec == "none":
|
||
# Audio only - show bitrate
|
||
res_str = f"{tbr:.0f} kbps" if tbr else "audio"
|
||
else:
|
||
# Video only
|
||
res_str = fmt.get("resolution", "")
|
||
|
||
row.add_column("Resolution", res_str)
|
||
|
||
# Build codec string (merged vcodec/acodec)
|
||
codec_parts = []
|
||
if vcodec and vcodec != "none":
|
||
codec_parts.append(f"v:{vcodec}")
|
||
if acodec and acodec != "none":
|
||
codec_parts.append(f"a:{acodec}")
|
||
codec_str = " | ".join(codec_parts) if codec_parts else "unknown"
|
||
row.add_column("Codec", codec_str)
|
||
|
||
if fmt.get("filesize"):
|
||
size_mb = fmt["filesize"] / (1024 * 1024)
|
||
row.add_column("Size", f"{size_mb:.1f} MB")
|
||
|
||
# Set source command for @N expansion
|
||
table.set_source_command("download-data", [url])
|
||
|
||
# Note: Row selection args are not set - users select with @N syntax directly
|
||
|
||
# Display table and emit as pipeline result
|
||
log(str(table), flush=True)
|
||
formats_displayed = True
|
||
|
||
# Store table for @N expansion so CLI can reconstruct commands
|
||
# Uses separate current_stage_table instead of result history table
|
||
pipeline_context.set_current_stage_table(table)
|
||
|
||
# Always emit formats so they can be selected with @N
|
||
for i, fmt in enumerate(formats, 1):
|
||
pipeline_context.emit({
|
||
"format_id": fmt.get("format_id", ""),
|
||
"format_string": fmt.get("format", ""),
|
||
"resolution": fmt.get("resolution", ""),
|
||
"vcodec": fmt.get("vcodec", ""),
|
||
"acodec": fmt.get("acodec", ""),
|
||
"ext": fmt.get("ext", ""),
|
||
"filesize": fmt.get("filesize"),
|
||
"source_url": url,
|
||
"index": i,
|
||
})
|
||
log(f"Use @N syntax to select a format and download", flush=True)
|
||
else:
|
||
log(f"✗ No formats available for this URL", file=sys.stderr)
|
||
|
||
continue # Skip download, just show formats
|
||
|
||
# ====== AUTO-DETECT MULTIPLE FORMATS ======
|
||
# Check if multiple formats exist and handle based on -item flag
|
||
if (not current_format_selector and not list_formats_mode and
|
||
isinstance(url, str) and url.startswith(('http://', 'https://'))):
|
||
# Check if this is a yt-dlp supported URL (YouTube, Vimeo, etc.)
|
||
from helper.download import is_url_supported_by_ytdlp, list_formats
|
||
from result_table import ResultTable
|
||
|
||
if is_url_supported_by_ytdlp(url):
|
||
log(f"Checking available formats for: {url}", flush=True)
|
||
all_formats = list_formats(url, no_playlist=is_youtube_url, playlist_items=playlist_items)
|
||
|
||
if all_formats:
|
||
# Filter and sort formats for better user experience
|
||
formats = _filter_and_sort_formats(all_formats)
|
||
|
||
# Handle -item selection for formats (single video)
|
||
if playlist_items and playlist_items.isdigit() and not is_actual_playlist:
|
||
idx = int(playlist_items)
|
||
if 0 < idx <= len(formats):
|
||
fmt = formats[idx-1]
|
||
current_format_selector = fmt.get("format_id")
|
||
log(f"Selected format #{idx}: {current_format_selector}")
|
||
playlist_items = None # Clear so it doesn't affect download options
|
||
else:
|
||
log(f"Invalid format index: {idx}", file=sys.stderr)
|
||
|
||
elif len(formats) > 1:
|
||
# Multiple formats available
|
||
log(f"📊 Found {len(formats)} available formats for: {probe_info.get('title', 'Unknown')}", flush=True)
|
||
|
||
# Always show table for format selection via @N syntax
|
||
# Show table and wait for @N selection
|
||
table = ResultTable(title=f"Available Formats - {probe_info.get('title', 'Unknown')}")
|
||
|
||
for fmt in formats:
|
||
row = table.add_row()
|
||
row.add_column("Format ID", fmt.get("format_id", ""))
|
||
|
||
# Build resolution/bitrate string
|
||
vcodec = fmt.get("vcodec", "")
|
||
acodec = fmt.get("acodec", "")
|
||
height = fmt.get("height")
|
||
tbr = fmt.get("tbr")
|
||
|
||
if vcodec != "none" and acodec != "none":
|
||
# Video + audio
|
||
res_str = fmt.get("resolution", "")
|
||
elif acodec != "none" and vcodec == "none":
|
||
# Audio only - show bitrate
|
||
res_str = f"{tbr:.0f} kbps" if tbr else "audio"
|
||
else:
|
||
# Video only
|
||
res_str = fmt.get("resolution", "")
|
||
|
||
row.add_column("Resolution", res_str)
|
||
|
||
# Build codec string (merged vcodec/acodec)
|
||
codec_parts = []
|
||
if vcodec and vcodec != "none":
|
||
codec_parts.append(f"v:{vcodec}")
|
||
if acodec and acodec != "none":
|
||
codec_parts.append(f"a:{acodec}")
|
||
codec_str = " | ".join(codec_parts) if codec_parts else "unknown"
|
||
row.add_column("Codec", codec_str)
|
||
|
||
if fmt.get("filesize"):
|
||
size_mb = fmt["filesize"] / (1024 * 1024)
|
||
row.add_column("Size", f"{size_mb:.1f} MB")
|
||
|
||
# Set source command for @N expansion
|
||
table.set_source_command("download-data", [url])
|
||
|
||
# Set row selection args so @N expands to "download-data URL -item N"
|
||
for i in range(len(formats)):
|
||
# i is 0-based index, but -item expects 1-based index
|
||
table.set_row_selection_args(i, ["-item", str(i + 1)])
|
||
|
||
# Display table and emit formats so they can be selected with @N
|
||
log(str(table), flush=True)
|
||
log(f"💡 Use @N syntax to select a format and download (e.g., @1)", flush=True)
|
||
|
||
# Store table for @N expansion so CLI can reconstruct commands
|
||
pipeline_context.set_current_stage_table(table)
|
||
|
||
# Emit formats as pipeline results for @N selection
|
||
for i, fmt in enumerate(formats, 1):
|
||
pipeline_context.emit({
|
||
"format_id": fmt.get("format_id", ""),
|
||
"format_string": fmt.get("format", ""),
|
||
"resolution": fmt.get("resolution", ""),
|
||
"vcodec": fmt.get("vcodec", ""),
|
||
"acodec": fmt.get("acodec", ""),
|
||
"filesize": fmt.get("filesize"),
|
||
"tbr": fmt.get("tbr"),
|
||
"source_url": url,
|
||
"index": i,
|
||
})
|
||
|
||
formats_displayed = True # Mark that we displayed formats
|
||
continue # Skip download, user must select format via @N
|
||
|
||
log(f"Downloading: {url}", flush=True)
|
||
|
||
# Resolve cookies path if specified
|
||
final_cookies_path = None
|
||
if cookies_path:
|
||
if resolve_cookies_path:
|
||
try:
|
||
final_cookies_path = resolve_cookies_path(config, Path(cookies_path))
|
||
except Exception:
|
||
final_cookies_path = Path(cookies_path).expanduser() if cookies_path else None
|
||
else:
|
||
final_cookies_path = Path(cookies_path).expanduser()
|
||
|
||
# Create download options - use correct parameter names
|
||
# Mode is "audio" or "video", required field
|
||
mode = "audio" if audio_mode else "video"
|
||
|
||
# Detect YouTube URLs and set no_playlist to download only the single video
|
||
is_youtube_url = isinstance(url, str) and ('youtube.com' in url or 'youtu.be' in url)
|
||
|
||
download_opts = DownloadOptions(
|
||
url=url,
|
||
mode=mode,
|
||
output_dir=final_output_dir,
|
||
cookies_path=final_cookies_path,
|
||
ytdl_format=current_format_selector, # Use per-URL format override if available
|
||
clip_sections=f"{clip_range[0]}-{clip_range[1]}" if clip_range else None,
|
||
playlist_items=playlist_items,
|
||
no_playlist=is_youtube_url, # For YouTube, ignore playlist URLs and download single video
|
||
)
|
||
|
||
# For playlist downloads, capture existing files BEFORE download
|
||
if playlist_items and selected_playlist_entries:
|
||
_, playlist_existing_paths = _snapshot_playlist_paths(selected_playlist_entries, final_output_dir)
|
||
|
||
# Call download_media from helper - no show_progress param
|
||
result_data = download_media(download_opts)
|
||
|
||
if result_data and result_data.path:
|
||
file_path = result_data.path
|
||
|
||
if file_path.exists():
|
||
# Check if this was a playlist download (is_actual_playlist tracks if we have a multi-item playlist)
|
||
if is_actual_playlist:
|
||
if not selected_playlist_entries:
|
||
log(
|
||
"⚠ Playlist metadata unavailable; cannot emit selected items for this stage.",
|
||
file=sys.stderr,
|
||
)
|
||
exit_code = 1
|
||
continue
|
||
|
||
matched_after, _ = _snapshot_playlist_paths(selected_playlist_entries, final_output_dir)
|
||
if not matched_after:
|
||
log(
|
||
"⚠ No playlist files found for the selected items after download.",
|
||
file=sys.stderr,
|
||
)
|
||
exit_code = 1
|
||
continue
|
||
|
||
new_playlist_files: list[Path] = []
|
||
for playlist_file in matched_after:
|
||
try:
|
||
path_key = str(playlist_file.resolve())
|
||
except OSError:
|
||
path_key = str(playlist_file)
|
||
if path_key not in playlist_existing_paths:
|
||
new_playlist_files.append(playlist_file)
|
||
|
||
emit_targets = new_playlist_files if new_playlist_files else matched_after
|
||
if new_playlist_files:
|
||
log(f"📋 Playlist download completed: {len(new_playlist_files)} new file(s)")
|
||
else:
|
||
log(f"📁 Reusing {len(emit_targets)} cached playlist file(s)", flush=True)
|
||
|
||
for playlist_file in emit_targets:
|
||
file_hash = _compute_file_hash(playlist_file)
|
||
|
||
tags = []
|
||
if extract_ytdlp_tags and result_data.tags:
|
||
tags = result_data.tags
|
||
|
||
pipe_obj = create_pipe_object_result(
|
||
source='download',
|
||
identifier=playlist_file.stem,
|
||
file_path=str(playlist_file),
|
||
cmdlet_name='download-data',
|
||
title=playlist_file.name,
|
||
file_hash=file_hash,
|
||
is_temp=False,
|
||
extra={
|
||
'url': url,
|
||
'tags': tags,
|
||
'audio_mode': audio_mode,
|
||
'format': format_selector,
|
||
'from_playlist': True,
|
||
},
|
||
)
|
||
|
||
downloaded_files.append(playlist_file)
|
||
pipeline_context.emit(pipe_obj)
|
||
else:
|
||
# Single file download
|
||
file_hash = result_data.hash_value or _compute_file_hash(file_path)
|
||
tags = result_data.tags if result_data.tags else []
|
||
|
||
pipe_obj = create_pipe_object_result(
|
||
source='download',
|
||
identifier=file_path.stem,
|
||
file_path=str(file_path),
|
||
cmdlet_name='download-data',
|
||
title=file_path.name,
|
||
file_hash=file_hash,
|
||
is_temp=False,
|
||
extra={
|
||
'url': url,
|
||
'tags': tags,
|
||
'audio_mode': audio_mode,
|
||
'format': format_selector,
|
||
'clipped': clip_range is not None,
|
||
}
|
||
)
|
||
|
||
downloaded_files.append(file_path)
|
||
pipeline_context.emit(pipe_obj)
|
||
|
||
log(f"✓ Downloaded: {file_path}", flush=True)
|
||
else:
|
||
log(f"Download returned no result for {url}", file=sys.stderr)
|
||
exit_code = 1
|
||
|
||
except Exception as e:
|
||
log(f"Error downloading {url}: {e}", file=sys.stderr)
|
||
import traceback
|
||
traceback.print_exc(file=sys.stderr)
|
||
exit_code = 1
|
||
|
||
# Success if we downloaded files or displayed playlists/formats
|
||
if downloaded_files or files_downloaded_directly > 0:
|
||
total_files = len(downloaded_files) + files_downloaded_directly
|
||
log(f"✓ Successfully downloaded {total_files} file(s)", flush=True)
|
||
if db:
|
||
db.update_worker_status(worker_id, 'completed')
|
||
return 0
|
||
|
||
if playlists_displayed:
|
||
log(f"✓ Displayed {playlists_displayed} playlist(s) for selection", flush=True)
|
||
if db:
|
||
db.update_worker_status(worker_id, 'completed')
|
||
db.close()
|
||
return 0 # Success - playlists shown
|
||
|
||
if formats_displayed:
|
||
log(f"✓ Format selection table displayed - use @N to select and download", flush=True)
|
||
if db:
|
||
db.update_worker_status(worker_id, 'completed')
|
||
db.close()
|
||
return 0 # Success - formats shown
|
||
|
||
log(f"No files were downloaded or playlists displayed", file=sys.stderr)
|
||
if db:
|
||
db.update_worker_status(worker_id, 'completed')
|
||
db.close()
|
||
return 1
|
||
|
||
|
||
|
||
CMDLET = Cmdlet(
|
||
name="download-data",
|
||
exec=_run,
|
||
summary="Download data from URLs with playlist/clip support using yt-dlp",
|
||
usage="download-data <url> [options] or search-file | download-data [options]",
|
||
aliases=["download", "dl"],
|
||
args=[
|
||
CmdletArg(
|
||
name="url",
|
||
type="string",
|
||
required=False,
|
||
description="URL to download (HTTP/HTTPS or file with URL list)",
|
||
variadic=True
|
||
),
|
||
CmdletArg(
|
||
name="-url",
|
||
type="string",
|
||
description="URL to download (alias for positional argument)",
|
||
variadic=True
|
||
),
|
||
CmdletArg(
|
||
name="list-formats",
|
||
type="flag",
|
||
description="List available formats without downloading"
|
||
),
|
||
CmdletArg(
|
||
name="audio",
|
||
type="flag",
|
||
alias="a",
|
||
description="Download audio only (extract from video)"
|
||
),
|
||
CmdletArg(
|
||
name="video",
|
||
type="flag",
|
||
alias="v",
|
||
description="Download video (default if not specified)"
|
||
),
|
||
CmdletArg(
|
||
name="format",
|
||
type="string",
|
||
alias="fmt",
|
||
description="Explicit yt-dlp format selector (e.g., 'bestvideo+bestaudio')"
|
||
),
|
||
CmdletArg(
|
||
name="clip",
|
||
type="string",
|
||
description="Extract time range: MM:SS-MM:SS (e.g., 34:03-35:08) or seconds"
|
||
),
|
||
CmdletArg(
|
||
name="cookies",
|
||
type="string",
|
||
description="Path to cookies.txt file for authentication"
|
||
),
|
||
CmdletArg(
|
||
name="torrent",
|
||
type="flag",
|
||
description="Download torrent/magnet via AllDebrid (requires API key in config)"
|
||
),
|
||
CmdletArg(
|
||
name="wait",
|
||
type="float",
|
||
description="Wait time (seconds) for magnet processing timeout"
|
||
),
|
||
CmdletArg(
|
||
name="item",
|
||
type="string",
|
||
alias="items",
|
||
description="Item selection for playlists/formats: use '-item N' to select format N, or '-item' to show table for @N selection in next command"
|
||
),
|
||
SharedArgs.STORAGE, # Storage location: local, hydrus, 0x0, debrid, ftp
|
||
],
|
||
details=[
|
||
"Download media from URLs with advanced features.",
|
||
"",
|
||
"BASIC USAGE:",
|
||
" download-data https://youtube.com/watch?v=xyz",
|
||
" download-data https://example.com/file.pdf -storage local",
|
||
"",
|
||
"AUDIO/VIDEO OPTIONS:",
|
||
" -audio, -a Extract audio from video (M4A, MP3)",
|
||
" -video, -v Download as video (default)",
|
||
"",
|
||
"FORMAT SELECTION:",
|
||
" -format SELECTOR Specify yt-dlp format",
|
||
" Examples: 'best', 'bestvideo+bestaudio', '22'",
|
||
"",
|
||
"FORMAT/RESULT ITEM SELECTION:",
|
||
" -item Show available formats in table (see @N below)",
|
||
" -item N Auto-select and download format #N (e.g., -item 1)",
|
||
" Example: download-data URL -item 2 | add-file -storage local",
|
||
"",
|
||
"FORMAT SELECTION WITH @N SYNTAX:",
|
||
" 1. Show formats: download-data URL",
|
||
" 2. Select with @N: @1 | download-data | add-file",
|
||
" OR use -item N to skip manual selection",
|
||
"",
|
||
"CLIPPING:",
|
||
" -clip START-END Extract time range from media",
|
||
" Format: MM:SS-MM:SS (e.g., 34:03-35:08)",
|
||
" Also accepts: 2043-2108 (seconds)",
|
||
"",
|
||
"PLAYLIST MODE:",
|
||
" Automatically detects playlists",
|
||
" Shows numbered list of tracks",
|
||
" Download specific items: -item '1,3,5-8'",
|
||
" Download all items: -item '*'",
|
||
"",
|
||
"TORRENT MODE:",
|
||
" Download torrents/magnets via AllDebrid (if configured)",
|
||
" Usage: download-data -torrent magnet:?xt=urn:btih:... -item '1,3,5-8'",
|
||
" -wait SECONDS Maximum wait time for magnet processing (default: 1800)",
|
||
"",
|
||
"STORAGE LOCATIONS:",
|
||
" -storage local ~/Videos (default)",
|
||
" -storage hydrus ~/.hydrus/client_files",
|
||
" -storage 0x0 ~/Screenshots",
|
||
" -storage debrid ~/Debrid",
|
||
" -storage ftp ~/FTP",
|
||
"",
|
||
"EXAMPLES:",
|
||
" # Download YouTube video as audio",
|
||
" download-data https://youtube.com/watch?v=xyz -audio -storage local",
|
||
"",
|
||
" # Extract specific clip from video",
|
||
" download-data https://vimeo.com/123456 -clip 1:30-2:45 -format best",
|
||
"",
|
||
" # Download specific tracks from playlist",
|
||
" download-data https://youtube.com/playlist?list=xyz -item '1,3,5-8'",
|
||
"",
|
||
" # Download all items from playlist",
|
||
" download-data https://youtube.com/playlist?list=xyz -item '*'",
|
||
"",
|
||
" # Download with authentication",
|
||
" download-data https://example.com/content -cookies ~/cookies.txt",
|
||
"",
|
||
"TORRENT EXAMPLES:",
|
||
" # Download specific tracks from magnet link",
|
||
" download-data -torrent magnet:?xt=urn:btih:... -item '1,3,5-8' -storage local",
|
||
"",
|
||
" # Download all items from torrent and merge",
|
||
" download-data -torrent magnet:?xt=urn:btih:... -item '*' | merge-file | add-file",
|
||
"",
|
||
" # Download with custom wait time (5 minutes)",
|
||
" download-data -torrent magnet:?xt=urn:btih:... -wait 300 -item '1-5'",
|
||
]
|
||
)
|