2025-11-25 20:09:33 -08:00
|
|
|
|
"""Download data from URLs using yt-dlp with playlist, clipping, and format selection.
|
|
|
|
|
|
|
|
|
|
|
|
This is a merged implementation combining:
|
|
|
|
|
|
- cmdlets/download_data.py (pipeline wrapper)
|
|
|
|
|
|
- funact/download_data.py (feature-rich implementation)
|
|
|
|
|
|
- helper/download.py (low-level machinery)
|
|
|
|
|
|
|
|
|
|
|
|
Features:
|
|
|
|
|
|
- Direct file downloads and yt-dlp streaming sites
|
|
|
|
|
|
- Playlist detection with interactive track selection
|
|
|
|
|
|
- Clip extraction (time ranges like 34:03-35:08)
|
|
|
|
|
|
- Format selection and audio/video toggles
|
|
|
|
|
|
- Cookies file support
|
|
|
|
|
|
- Tag extraction and metadata integration
|
|
|
|
|
|
- Progress tracking and debug logging
|
|
|
|
|
|
- Pipeline integration with result emission
|
|
|
|
|
|
- Background torrent/magnet downloads via AllDebrid
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
|
|
import hashlib
|
|
|
|
|
|
import re
|
|
|
|
|
|
import sys
|
|
|
|
|
|
import threading
|
|
|
|
|
|
import time
|
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
|
|
|
|
|
import uuid
|
|
|
|
|
|
|
|
|
|
|
|
from helper.logger import log, debug
|
|
|
|
|
|
from helper.download import download_media, probe_url
|
|
|
|
|
|
from helper.utils import sha256_file
|
|
|
|
|
|
from models import DownloadOptions
|
|
|
|
|
|
|
|
|
|
|
|
from . import register
|
|
|
|
|
|
from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input, parse_cmdlet_args
|
|
|
|
|
|
import models
|
|
|
|
|
|
import pipeline as pipeline_context
|
|
|
|
|
|
from config import resolve_output_dir
|
|
|
|
|
|
from metadata import (
|
|
|
|
|
|
fetch_openlibrary_metadata_tags,
|
|
|
|
|
|
format_playlist_entry,
|
|
|
|
|
|
extract_ytdlp_tags
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
# Try to import optional dependencies
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
from yt_dlp.utils import sanitize_filename as ytdlp_sanitize_filename # type: ignore
|
|
|
|
|
|
except Exception: # pragma: no cover - optional dependency
|
|
|
|
|
|
ytdlp_sanitize_filename = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
# Background Worker for AllDebrid Downloads
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
def _download_torrent_worker(
|
|
|
|
|
|
worker_id: str,
|
|
|
|
|
|
magnet_url: str,
|
|
|
|
|
|
output_dir: Path,
|
|
|
|
|
|
config: Dict[str, Any],
|
|
|
|
|
|
api_key: str,
|
|
|
|
|
|
playlist_items: Optional[str] = None,
|
|
|
|
|
|
audio_mode: bool = False,
|
|
|
|
|
|
wait_timeout: int = 600,
|
|
|
|
|
|
worker_manager: Optional[Any] = None,
|
|
|
|
|
|
) -> None:
|
|
|
|
|
|
"""Background worker to download torrent/magnet via AllDebrid.
|
|
|
|
|
|
|
|
|
|
|
|
Runs in a separate thread and updates worker_manager with progress.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
worker_id: Unique ID for this worker task
|
|
|
|
|
|
magnet_url: Magnet link or .torrent URL to download
|
|
|
|
|
|
output_dir: Directory to save downloaded files
|
|
|
|
|
|
config: Configuration dict
|
|
|
|
|
|
api_key: AllDebrid API key
|
|
|
|
|
|
playlist_items: Optional file selection (e.g., "1,3,5-8")
|
|
|
|
|
|
audio_mode: Whether to tag as audio or video
|
|
|
|
|
|
wait_timeout: Timeout in seconds for magnet processing
|
|
|
|
|
|
worker_manager: WorkerManager instance for progress updates
|
|
|
|
|
|
"""
|
|
|
|
|
|
worker = None
|
|
|
|
|
|
downloaded_files = []
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
from helper.alldebrid import AllDebridClient
|
|
|
|
|
|
|
|
|
|
|
|
# Get worker reference if manager provided
|
|
|
|
|
|
if worker_manager:
|
|
|
|
|
|
try:
|
|
|
|
|
|
workers = worker_manager.get_active_workers()
|
|
|
|
|
|
worker = next((w for w in workers if w.get('id') == worker_id), None)
|
|
|
|
|
|
except:
|
|
|
|
|
|
worker = None
|
|
|
|
|
|
|
|
|
|
|
|
def log_progress(message: str) -> None:
|
|
|
|
|
|
"""Log progress to both console and worker manager."""
|
|
|
|
|
|
debug(message)
|
|
|
|
|
|
if worker_manager and worker_id:
|
|
|
|
|
|
try:
|
|
|
|
|
|
worker_manager.log_step(worker_id, message)
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] Submitting magnet to AllDebrid...")
|
|
|
|
|
|
client = AllDebridClient(api_key)
|
|
|
|
|
|
|
|
|
|
|
|
# Add magnet
|
|
|
|
|
|
magnet_info = client.magnet_add(magnet_url)
|
|
|
|
|
|
magnet_id = int(magnet_info.get('id', 0))
|
|
|
|
|
|
|
|
|
|
|
|
if magnet_id <= 0:
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] ✗ Failed to add magnet to AllDebrid")
|
|
|
|
|
|
if worker_manager:
|
|
|
|
|
|
try:
|
|
|
|
|
|
worker_manager.finish_worker(worker_id, "failed", f"Failed to add magnet")
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] ✓ Magnet added (ID: {magnet_id})")
|
|
|
|
|
|
|
|
|
|
|
|
# Poll for ready status
|
|
|
|
|
|
elapsed = 0
|
|
|
|
|
|
last_status_reported = 0
|
|
|
|
|
|
|
|
|
|
|
|
while elapsed < wait_timeout:
|
|
|
|
|
|
try:
|
|
|
|
|
|
status_info = client.magnet_status(magnet_id)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] ⚠ Failed to get status: {e}")
|
|
|
|
|
|
time.sleep(2)
|
|
|
|
|
|
elapsed += 2
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
status_code = status_info.get('statusCode', -1)
|
|
|
|
|
|
status_text = status_info.get('status', 'Unknown')
|
|
|
|
|
|
|
|
|
|
|
|
# Report progress every 5 seconds (avoid log spam)
|
|
|
|
|
|
if elapsed - last_status_reported >= 5 or elapsed < 2:
|
|
|
|
|
|
downloaded = status_info.get('downloaded', 0)
|
|
|
|
|
|
total_size = status_info.get('size', 0)
|
|
|
|
|
|
seeders = status_info.get('seeders', 0)
|
|
|
|
|
|
speed = status_info.get('downloadSpeed', 0)
|
|
|
|
|
|
|
|
|
|
|
|
if total_size > 0:
|
|
|
|
|
|
percent = (downloaded / total_size) * 100
|
|
|
|
|
|
speed_str = f" @ {speed / (1024**2):.1f} MB/s" if speed > 0 else ""
|
|
|
|
|
|
seeders_str = f" ({seeders} seeders)" if seeders > 0 else ""
|
|
|
|
|
|
progress_msg = f"[Worker {worker_id}] ⧗ {status_text}: {percent:.1f}% ({downloaded / (1024**3):.2f} / {total_size / (1024**3):.2f} GB){speed_str}{seeders_str}"
|
|
|
|
|
|
log_progress(progress_msg)
|
|
|
|
|
|
|
|
|
|
|
|
# Update worker with progress
|
|
|
|
|
|
if worker_manager:
|
|
|
|
|
|
try:
|
|
|
|
|
|
worker_manager.update_worker(
|
|
|
|
|
|
worker_id,
|
|
|
|
|
|
status="running",
|
|
|
|
|
|
progress=f"{percent:.1f}%",
|
|
|
|
|
|
details=progress_msg
|
|
|
|
|
|
)
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
else:
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] ⧗ {status_text}...")
|
|
|
|
|
|
|
|
|
|
|
|
last_status_reported = elapsed
|
|
|
|
|
|
|
|
|
|
|
|
if status_code == 4: # Ready
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] ✓ Files ready")
|
|
|
|
|
|
break
|
|
|
|
|
|
elif status_code >= 5: # Error
|
|
|
|
|
|
error_status = {
|
|
|
|
|
|
5: "Upload failed",
|
|
|
|
|
|
6: "Internal error during unpacking",
|
|
|
|
|
|
7: "Not downloaded in 20 minutes",
|
|
|
|
|
|
8: "File too big (>1TB)",
|
|
|
|
|
|
9: "Internal error",
|
|
|
|
|
|
10: "Download took >72 hours",
|
|
|
|
|
|
11: "Deleted on hoster website",
|
|
|
|
|
|
12: "Processing failed",
|
|
|
|
|
|
13: "Processing failed",
|
|
|
|
|
|
14: "Tracker error",
|
|
|
|
|
|
15: "No peers available"
|
|
|
|
|
|
}
|
|
|
|
|
|
error_msg = error_status.get(status_code, f"Unknown error {status_code}")
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] ✗ Magnet failed: {error_msg}")
|
|
|
|
|
|
if worker_manager:
|
|
|
|
|
|
try:
|
|
|
|
|
|
worker_manager.finish_worker(worker_id, "failed", error_msg)
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
time.sleep(2)
|
|
|
|
|
|
elapsed += 2
|
|
|
|
|
|
|
|
|
|
|
|
if elapsed >= wait_timeout:
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] ✗ Timeout waiting for magnet (>{wait_timeout}s)")
|
|
|
|
|
|
if worker_manager:
|
|
|
|
|
|
try:
|
|
|
|
|
|
worker_manager.finish_worker(worker_id, "failed", f"Timeout after {wait_timeout}s")
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# Get files
|
|
|
|
|
|
files_result = client.magnet_links([magnet_id])
|
|
|
|
|
|
magnet_files = files_result.get(str(magnet_id), {})
|
|
|
|
|
|
if not magnet_files and isinstance(magnet_id, int):
|
|
|
|
|
|
# Try integer key as fallback
|
|
|
|
|
|
for key in files_result:
|
|
|
|
|
|
if str(key) == str(magnet_id):
|
|
|
|
|
|
magnet_files = files_result[key]
|
|
|
|
|
|
break
|
|
|
|
|
|
files_array = magnet_files.get('files', [])
|
|
|
|
|
|
|
|
|
|
|
|
if not files_array:
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] ✗ No files found in magnet")
|
|
|
|
|
|
if worker_manager:
|
|
|
|
|
|
try:
|
|
|
|
|
|
worker_manager.finish_worker(worker_id, "failed", "No files found in magnet")
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] ✓ Found {len(files_array)} file(s)")
|
|
|
|
|
|
|
|
|
|
|
|
# Extract download links
|
|
|
|
|
|
download_links = []
|
|
|
|
|
|
|
|
|
|
|
|
def extract_links(items, prefix=""):
|
|
|
|
|
|
if not isinstance(items, list):
|
|
|
|
|
|
return
|
|
|
|
|
|
for item in items:
|
|
|
|
|
|
if isinstance(item, dict):
|
|
|
|
|
|
name = item.get('n', '')
|
|
|
|
|
|
link = item.get('l', '')
|
|
|
|
|
|
size = item.get('s', 0)
|
|
|
|
|
|
entries = item.get('e', [])
|
|
|
|
|
|
|
|
|
|
|
|
if link:
|
|
|
|
|
|
download_links.append({
|
|
|
|
|
|
'link': link,
|
|
|
|
|
|
'name': name,
|
|
|
|
|
|
'size': size,
|
|
|
|
|
|
'path': f"{prefix}/{name}" if prefix else name
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
if entries:
|
|
|
|
|
|
extract_links(entries, f"{prefix}/{name}" if prefix else name)
|
|
|
|
|
|
|
|
|
|
|
|
extract_links(files_array)
|
|
|
|
|
|
|
|
|
|
|
|
if not download_links:
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] ✗ No downloadable files found")
|
|
|
|
|
|
if worker_manager:
|
|
|
|
|
|
try:
|
|
|
|
|
|
worker_manager.finish_worker(worker_id, "failed", "No downloadable files")
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
|
|
# Filter by playlist_items if specified
|
|
|
|
|
|
if playlist_items and playlist_items != '*':
|
|
|
|
|
|
# Parse selection like "1,3,5-8"
|
|
|
|
|
|
selected_indices = []
|
|
|
|
|
|
for part in playlist_items.split(','):
|
|
|
|
|
|
part = part.strip()
|
|
|
|
|
|
if '-' in part:
|
|
|
|
|
|
start, end = part.split('-')
|
|
|
|
|
|
selected_indices.extend(range(int(start)-1, int(end)))
|
|
|
|
|
|
else:
|
|
|
|
|
|
selected_indices.append(int(part)-1)
|
|
|
|
|
|
|
|
|
|
|
|
download_links = [download_links[i] for i in selected_indices if i < len(download_links)]
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] Downloading {len(download_links)} selected file(s)")
|
|
|
|
|
|
|
|
|
|
|
|
# Download each file
|
|
|
|
|
|
for idx, file_info in enumerate(download_links, 1):
|
|
|
|
|
|
link = file_info['link']
|
|
|
|
|
|
name = file_info['name']
|
|
|
|
|
|
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] ({idx}/{len(download_links)}) Downloading: {name}")
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Unlock the link
|
|
|
|
|
|
try:
|
|
|
|
|
|
actual_link = client.unlock_link(link)
|
|
|
|
|
|
if actual_link and actual_link != link:
|
|
|
|
|
|
link = actual_link
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
# Download via HTTP
|
|
|
|
|
|
from helper.http_client import HTTPClient
|
|
|
|
|
|
|
|
|
|
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
file_path = output_dir / name
|
|
|
|
|
|
file_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
with HTTPClient() as http_client:
|
|
|
|
|
|
http_client.download(link, str(file_path))
|
|
|
|
|
|
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] ✓ Downloaded: {name}")
|
|
|
|
|
|
|
|
|
|
|
|
# Compute hash and emit result
|
|
|
|
|
|
file_hash = _compute_file_hash(file_path)
|
|
|
|
|
|
|
|
|
|
|
|
result_obj = {
|
|
|
|
|
|
'file_path': str(file_path),
|
|
|
|
|
|
'source_url': magnet_url,
|
|
|
|
|
|
'file_hash': file_hash,
|
|
|
|
|
|
'media_kind': 'audio' if audio_mode else 'video',
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
pipeline_context.emit(result_obj)
|
|
|
|
|
|
downloaded_files.append(file_path)
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] ⚠ Failed to download {name}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
if downloaded_files:
|
|
|
|
|
|
msg = f"✓ Torrent download complete ({len(downloaded_files)} file(s))"
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] {msg}")
|
|
|
|
|
|
if worker_manager:
|
|
|
|
|
|
try:
|
|
|
|
|
|
worker_manager.finish_worker(worker_id, "success", msg)
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
else:
|
|
|
|
|
|
if worker_manager:
|
|
|
|
|
|
try:
|
|
|
|
|
|
worker_manager.finish_worker(worker_id, "failed", "No files downloaded")
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] ✗ AllDebrid client not available")
|
|
|
|
|
|
if worker_manager:
|
|
|
|
|
|
try:
|
|
|
|
|
|
worker_manager.finish_worker(worker_id, "failed", "AllDebrid client not available")
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
import traceback
|
|
|
|
|
|
log_progress(f"[Worker {worker_id}] ✗ Torrent download failed: {e}")
|
|
|
|
|
|
if worker_manager:
|
|
|
|
|
|
try:
|
|
|
|
|
|
worker_manager.finish_worker(worker_id, "failed", str(e))
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
traceback.print_exc(file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
# CMDLET Metadata Declaration
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
# Torrent File Parsing
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_torrent_file(file_path: str) -> Optional[str]:
|
|
|
|
|
|
"""Parse a .torrent file and extract magnet link.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
file_path: Path to .torrent file
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Magnet link string or None if parsing fails
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
import bencode3
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
log("⚠ bencode3 module not found. Install: pip install bencode3", file=sys.stderr)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
with open(file_path, 'rb') as f:
|
|
|
|
|
|
torrent_data = bencode3.bdecode(f.read())
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"✗ Failed to parse torrent file: {e}", file=sys.stderr)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Get info dict - bencode3 returns string keys, not bytes
|
|
|
|
|
|
info = torrent_data.get('info')
|
|
|
|
|
|
if not info:
|
|
|
|
|
|
log("✗ No info dict in torrent file", file=sys.stderr)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
# Calculate info hash (SHA1 of bencoded info dict)
|
|
|
|
|
|
import hashlib
|
|
|
|
|
|
info_hash = hashlib.sha1(bencode3.bencode(info)).hexdigest()
|
|
|
|
|
|
|
|
|
|
|
|
# Get name
|
|
|
|
|
|
name = info.get('name', 'Unknown')
|
|
|
|
|
|
if isinstance(name, bytes):
|
|
|
|
|
|
name = name.decode('utf-8', errors='ignore')
|
|
|
|
|
|
|
|
|
|
|
|
# Create magnet link
|
|
|
|
|
|
magnet = f"magnet:?xt=urn:btih:{info_hash}&dn={name}"
|
|
|
|
|
|
|
|
|
|
|
|
# Add trackers if available
|
|
|
|
|
|
announce = torrent_data.get('announce')
|
|
|
|
|
|
if announce:
|
|
|
|
|
|
try:
|
|
|
|
|
|
tracker = announce if isinstance(announce, str) else announce.decode('utf-8', errors='ignore')
|
|
|
|
|
|
magnet += f"&tr={tracker}"
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
announce_list = torrent_data.get('announce-list', [])
|
|
|
|
|
|
for tier in announce_list:
|
|
|
|
|
|
if isinstance(tier, list):
|
|
|
|
|
|
for tracker_item in tier:
|
|
|
|
|
|
try:
|
|
|
|
|
|
tracker = tracker_item if isinstance(tracker_item, str) else tracker_item.decode('utf-8', errors='ignore')
|
|
|
|
|
|
if tracker:
|
|
|
|
|
|
magnet += f"&tr={tracker}"
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
debug(f"✓ Parsed torrent: {name} (hash: {info_hash})")
|
|
|
|
|
|
return magnet
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"✗ Error parsing torrent metadata: {e}", file=sys.stderr)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _download_torrent_file(url: str, temp_dir: Optional[Path] = None) -> Optional[str]:
|
|
|
|
|
|
"""Download a .torrent file from URL and parse it.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
url: URL to .torrent file
|
|
|
|
|
|
temp_dir: Optional temp directory for storing downloaded file
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Magnet link string or None if download/parsing fails
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
from helper.http_client import HTTPClient
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
log("⚠ HTTPClient not available", file=sys.stderr)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Download torrent file
|
|
|
|
|
|
debug(f"⇓ Downloading torrent file: {url}")
|
|
|
|
|
|
|
|
|
|
|
|
with HTTPClient(timeout=30.0) as client:
|
|
|
|
|
|
response = client.get(url)
|
|
|
|
|
|
response.raise_for_status()
|
|
|
|
|
|
torrent_data = response.content
|
|
|
|
|
|
|
|
|
|
|
|
# Create temp file
|
|
|
|
|
|
if temp_dir is None:
|
|
|
|
|
|
temp_dir = Path.home() / ".cache" / "downlow"
|
|
|
|
|
|
temp_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
|
|
# Save to temp file
|
|
|
|
|
|
import hashlib
|
|
|
|
|
|
url_hash = hashlib.md5(url.encode()).hexdigest()[:8]
|
|
|
|
|
|
temp_file = temp_dir / f"torrent_{url_hash}.torrent"
|
|
|
|
|
|
temp_file.write_bytes(torrent_data)
|
|
|
|
|
|
|
|
|
|
|
|
debug(f"✓ Downloaded torrent file: {temp_file}")
|
|
|
|
|
|
|
|
|
|
|
|
# Parse it
|
|
|
|
|
|
magnet = _parse_torrent_file(str(temp_file))
|
|
|
|
|
|
|
|
|
|
|
|
# Clean up
|
|
|
|
|
|
try:
|
|
|
|
|
|
temp_file.unlink()
|
|
|
|
|
|
except:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
return magnet
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"✗ Failed to download/parse torrent: {e}", file=sys.stderr)
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _is_torrent_file_or_url(arg: str) -> bool:
|
|
|
|
|
|
"""Check if argument is a .torrent file path or URL.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
arg: Argument to check
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
True if it's a .torrent file or URL
|
|
|
|
|
|
"""
|
|
|
|
|
|
arg_lower = arg.lower()
|
|
|
|
|
|
|
|
|
|
|
|
# Check if it's a .torrent file path
|
|
|
|
|
|
if arg_lower.endswith('.torrent'):
|
|
|
|
|
|
return Path(arg).exists() or arg_lower.startswith('http')
|
|
|
|
|
|
|
|
|
|
|
|
# Check if it's a URL to .torrent file
|
|
|
|
|
|
if arg_lower.startswith('http://') or arg_lower.startswith('https://'):
|
|
|
|
|
|
return '.torrent' in arg_lower
|
|
|
|
|
|
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _process_torrent_input(arg: str) -> Optional[str]:
|
|
|
|
|
|
"""Process torrent file or URL and convert to magnet link.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
arg: .torrent file path or URL
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Magnet link or original argument if not processable
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
if arg.lower().startswith('http://') or arg.lower().startswith('https://'):
|
|
|
|
|
|
# It's a URL
|
|
|
|
|
|
return _download_torrent_file(arg) or arg
|
|
|
|
|
|
else:
|
|
|
|
|
|
# It's a file path
|
|
|
|
|
|
if Path(arg).exists():
|
|
|
|
|
|
return _parse_torrent_file(arg) or arg
|
|
|
|
|
|
else:
|
|
|
|
|
|
return arg
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"⚠ Error processing torrent: {e}", file=sys.stderr)
|
|
|
|
|
|
return arg
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
# Helper Functions
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _show_playlist_table(url: str, probe_info: Dict[str, Any]) -> Optional[Dict[str, Any]]:
|
|
|
|
|
|
"""Show playlist result table and get user selection.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
url: Original URL
|
|
|
|
|
|
probe_info: Info dict from probe_url()
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Modified probe_info with selected_entries, or None if user cancelled
|
|
|
|
|
|
"""
|
|
|
|
|
|
entries = probe_info.get("entries", [])
|
|
|
|
|
|
if not entries:
|
|
|
|
|
|
return probe_info
|
|
|
|
|
|
|
|
|
|
|
|
extractor = probe_info.get("extractor", "")
|
|
|
|
|
|
title = probe_info.get("title", "Playlist")
|
|
|
|
|
|
|
|
|
|
|
|
debug(f"📋 Detected playlist: {title} ({len(entries)} items) - {extractor}")
|
|
|
|
|
|
|
|
|
|
|
|
# Skip full metadata enrichment for speed - extract_flat usually provides enough info
|
|
|
|
|
|
# debug("📋 Fetching metadata for each item...")
|
|
|
|
|
|
# entries = enrich_playlist_entries(entries, extractor)
|
|
|
|
|
|
|
|
|
|
|
|
# Emit each playlist item as a separate result row
|
|
|
|
|
|
for i, entry in enumerate(entries, 1):
|
|
|
|
|
|
formatted = format_playlist_entry(entry, i, extractor)
|
|
|
|
|
|
|
|
|
|
|
|
# Build tags from available metadata
|
|
|
|
|
|
tags = []
|
|
|
|
|
|
artist = formatted.get("artist") or formatted.get("uploader", "")
|
|
|
|
|
|
if artist:
|
|
|
|
|
|
tags.append(artist)
|
|
|
|
|
|
|
|
|
|
|
|
album = formatted.get("album", "")
|
|
|
|
|
|
if album and album != title: # Don't repeat playlist title
|
|
|
|
|
|
tags.append(album)
|
|
|
|
|
|
|
|
|
|
|
|
# Extract individual fields for separate columns
|
|
|
|
|
|
duration = formatted.get("duration", 0)
|
|
|
|
|
|
duration_str = ""
|
|
|
|
|
|
if duration:
|
|
|
|
|
|
minutes = int(duration // 60)
|
|
|
|
|
|
seconds = int(duration % 60)
|
|
|
|
|
|
duration_str = f"{minutes}m{seconds}s"
|
|
|
|
|
|
tags.append(duration_str)
|
|
|
|
|
|
|
|
|
|
|
|
# Normalize extractor for comparison (remove special chars and case)
|
|
|
|
|
|
ext_lower = extractor.lower().replace(":", "").replace(" ", "")
|
|
|
|
|
|
|
|
|
|
|
|
track_number = None
|
|
|
|
|
|
# Add site-specific tags and fields
|
|
|
|
|
|
if "youtube" in ext_lower and formatted.get("channel"):
|
|
|
|
|
|
tags.append(f"channel:{formatted.get('channel')}")
|
|
|
|
|
|
elif "bandcamp" in ext_lower:
|
|
|
|
|
|
track_number = formatted.get("track_number", i)
|
|
|
|
|
|
tags.append(f"track:{track_number}")
|
|
|
|
|
|
|
|
|
|
|
|
# Create result row with separate columns for important metadata
|
|
|
|
|
|
# Build columns dynamically based on available data
|
|
|
|
|
|
columns = [
|
|
|
|
|
|
("#", i),
|
|
|
|
|
|
("Title", formatted["title"]),
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
# Add Artist column if available
|
|
|
|
|
|
if artist:
|
|
|
|
|
|
columns.append(("Artist", artist))
|
|
|
|
|
|
|
|
|
|
|
|
# Add Duration column if available
|
|
|
|
|
|
if duration_str:
|
|
|
|
|
|
columns.append(("Duration", duration_str))
|
|
|
|
|
|
|
|
|
|
|
|
# Add Track number column for music platforms
|
|
|
|
|
|
if track_number is not None:
|
|
|
|
|
|
columns.append(("Track", str(track_number)))
|
|
|
|
|
|
|
|
|
|
|
|
# Add Tags column for remaining tags (if any)
|
|
|
|
|
|
remaining_tags = [t for t in tags if t not in [artist, duration_str]]
|
|
|
|
|
|
if remaining_tags:
|
|
|
|
|
|
columns.append(("Tags", ", ".join(remaining_tags)))
|
|
|
|
|
|
|
|
|
|
|
|
# Create result row with compact columns display
|
|
|
|
|
|
# Using "columns" field tells ResultTable which columns to show
|
|
|
|
|
|
result_row = {
|
|
|
|
|
|
"title": formatted["title"],
|
|
|
|
|
|
"tags": tags,
|
|
|
|
|
|
"index": i,
|
|
|
|
|
|
# Store all metadata but don't display in table (use columns field)
|
|
|
|
|
|
"__source": "playlist-probe",
|
|
|
|
|
|
"__id": f"{i}",
|
|
|
|
|
|
"__file_path": url,
|
|
|
|
|
|
"__action": f"playlist-item:{i}",
|
|
|
|
|
|
"__artist": formatted.get("artist", ""),
|
|
|
|
|
|
"__duration": formatted.get("duration", 0),
|
|
|
|
|
|
"__extractor": extractor,
|
|
|
|
|
|
# Define which columns should be shown in the result table
|
|
|
|
|
|
"columns": columns
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# Add site-specific metadata for pipeline use
|
|
|
|
|
|
if "youtube" in ext_lower:
|
|
|
|
|
|
result_row["__video_id"] = formatted.get("video_id", "")
|
|
|
|
|
|
result_row["__channel"] = formatted.get("channel", "")
|
|
|
|
|
|
elif "bandcamp" in ext_lower:
|
|
|
|
|
|
result_row["__track_number"] = formatted.get("track_number", i)
|
|
|
|
|
|
result_row["__album"] = formatted.get("album") or title
|
|
|
|
|
|
elif "spotify" in ext_lower:
|
|
|
|
|
|
result_row["__artists"] = formatted.get("artists", "")
|
|
|
|
|
|
result_row["__album"] = formatted.get("album", "")
|
|
|
|
|
|
|
|
|
|
|
|
pipeline_context.emit(result_row)
|
|
|
|
|
|
|
|
|
|
|
|
debug(f"ℹ️ Playlist items displayed. Use result table references (@1, @2, etc.) to select tracks.")
|
|
|
|
|
|
|
|
|
|
|
|
# Return modified probe info
|
|
|
|
|
|
return probe_info
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_time_range(clip_spec: str) -> Optional[Tuple[int, int]]:
|
|
|
|
|
|
"""Parse time range from MM:SS-MM:SS or seconds format.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
clip_spec: Time range string like "34:03-35:08" or "2043-2108"
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Tuple of (start_seconds, end_seconds) or None if invalid
|
|
|
|
|
|
"""
|
|
|
|
|
|
try:
|
|
|
|
|
|
if '-' not in clip_spec:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
parts = clip_spec.split('-')
|
|
|
|
|
|
if len(parts) != 2:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
start_str, end_str = parts
|
|
|
|
|
|
|
|
|
|
|
|
# Try MM:SS format first
|
|
|
|
|
|
if ':' in start_str:
|
|
|
|
|
|
start_parts = start_str.split(':')
|
|
|
|
|
|
if len(start_parts) == 2:
|
|
|
|
|
|
start_sec = int(start_parts[0]) * 60 + int(start_parts[1])
|
|
|
|
|
|
else:
|
|
|
|
|
|
return None
|
|
|
|
|
|
else:
|
|
|
|
|
|
start_sec = int(start_str)
|
|
|
|
|
|
|
|
|
|
|
|
if ':' in end_str:
|
|
|
|
|
|
end_parts = end_str.split(':')
|
|
|
|
|
|
if len(end_parts) == 2:
|
|
|
|
|
|
end_sec = int(end_parts[0]) * 60 + int(end_parts[1])
|
|
|
|
|
|
else:
|
|
|
|
|
|
return None
|
|
|
|
|
|
else:
|
|
|
|
|
|
end_sec = int(end_str)
|
|
|
|
|
|
|
|
|
|
|
|
if start_sec >= end_sec:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
return (start_sec, end_sec)
|
|
|
|
|
|
|
|
|
|
|
|
except (ValueError, AttributeError):
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
MEDIA_EXTENSIONS = {'.mp3', '.m4a', '.mp4', '.mkv', '.webm', '.flac', '.wav', '.aac'}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_playlist_selection_indices(selection: Optional[str], total_items: int) -> list[int]:
|
|
|
|
|
|
"""Convert playlist selection string to 0-based indices."""
|
|
|
|
|
|
if total_items <= 0:
|
|
|
|
|
|
return []
|
|
|
|
|
|
if not selection or selection.strip() in {"*", ""}:
|
|
|
|
|
|
return list(range(total_items))
|
|
|
|
|
|
indices: list[int] = []
|
|
|
|
|
|
for part in selection.split(','):
|
|
|
|
|
|
part = part.strip()
|
|
|
|
|
|
if not part:
|
|
|
|
|
|
continue
|
|
|
|
|
|
if '-' in part:
|
|
|
|
|
|
bounds = part.split('-', 1)
|
|
|
|
|
|
try:
|
|
|
|
|
|
start = int(bounds[0])
|
|
|
|
|
|
end = int(bounds[1])
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
continue
|
|
|
|
|
|
if start <= 0 or end <= 0:
|
|
|
|
|
|
continue
|
|
|
|
|
|
if start > end:
|
|
|
|
|
|
start, end = end, start
|
|
|
|
|
|
for idx in range(start - 1, end):
|
|
|
|
|
|
if 0 <= idx < total_items:
|
|
|
|
|
|
indices.append(idx)
|
|
|
|
|
|
else:
|
|
|
|
|
|
try:
|
|
|
|
|
|
idx = int(part) - 1
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
continue
|
|
|
|
|
|
if 0 <= idx < total_items:
|
|
|
|
|
|
indices.append(idx)
|
|
|
|
|
|
seen: set[int] = set()
|
|
|
|
|
|
ordered: list[int] = []
|
|
|
|
|
|
for idx in indices:
|
|
|
|
|
|
if idx not in seen:
|
|
|
|
|
|
ordered.append(idx)
|
|
|
|
|
|
seen.add(idx)
|
|
|
|
|
|
return ordered
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _select_playlist_entries(entries: Any, selection: Optional[str]) -> list[Dict[str, Any]]:
|
|
|
|
|
|
"""Pick playlist entries according to a selection string."""
|
|
|
|
|
|
if not isinstance(entries, list):
|
|
|
|
|
|
return []
|
|
|
|
|
|
indices = _parse_playlist_selection_indices(selection, len(entries))
|
|
|
|
|
|
if not indices:
|
|
|
|
|
|
return []
|
|
|
|
|
|
selected: list[Dict[str, Any]] = []
|
|
|
|
|
|
for idx in indices:
|
|
|
|
|
|
entry = entries[idx]
|
|
|
|
|
|
if isinstance(entry, dict):
|
|
|
|
|
|
selected.append(entry)
|
|
|
|
|
|
return selected
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _sanitize_title_for_filename(title: Optional[str]) -> str:
|
|
|
|
|
|
"""Match yt-dlp's restricted filename sanitization for comparisons."""
|
|
|
|
|
|
if not title:
|
|
|
|
|
|
return ""
|
|
|
|
|
|
if ytdlp_sanitize_filename:
|
|
|
|
|
|
try:
|
|
|
|
|
|
return ytdlp_sanitize_filename(title, restricted=True)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
sanitized = re.sub(r"[^0-9A-Za-z._-]+", "_", title)
|
|
|
|
|
|
return sanitized.strip() or ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _find_playlist_files_from_entries(
|
|
|
|
|
|
entries: Sequence[Dict[str, Any]],
|
|
|
|
|
|
output_dir: Path,
|
|
|
|
|
|
) -> list[Path]:
|
|
|
|
|
|
"""Resolve expected playlist files based on entry titles/exts."""
|
|
|
|
|
|
matched: list[Path] = []
|
|
|
|
|
|
seen: set[str] = set()
|
|
|
|
|
|
for entry in entries:
|
|
|
|
|
|
title = entry.get('title') if isinstance(entry, dict) else None
|
|
|
|
|
|
sanitized = _sanitize_title_for_filename(title)
|
|
|
|
|
|
if not sanitized:
|
|
|
|
|
|
continue
|
|
|
|
|
|
preferred_exts: list[str] = []
|
|
|
|
|
|
for key in ('ext', 'audio_ext', 'video_ext'):
|
|
|
|
|
|
value = entry.get(key) if isinstance(entry, dict) else None
|
|
|
|
|
|
if isinstance(value, str) and value:
|
|
|
|
|
|
preferred_exts.append(value.lower())
|
|
|
|
|
|
if not preferred_exts:
|
|
|
|
|
|
preferred_exts = [ext.strip('.') for ext in MEDIA_EXTENSIONS]
|
|
|
|
|
|
candidate: Optional[Path] = None
|
|
|
|
|
|
for ext in preferred_exts:
|
|
|
|
|
|
ext = ext.lstrip('.').lower()
|
|
|
|
|
|
path = output_dir / f"{sanitized}.{ext}"
|
|
|
|
|
|
if path.exists():
|
|
|
|
|
|
candidate = path
|
|
|
|
|
|
break
|
|
|
|
|
|
if candidate is None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Bandcamp/yt-dlp often prefixes uploader info, so fall back to a substring match.
|
|
|
|
|
|
for f in output_dir.glob(f"*{sanitized}*"):
|
|
|
|
|
|
if f.suffix.lower() in MEDIA_EXTENSIONS and f.is_file():
|
|
|
|
|
|
candidate = f
|
|
|
|
|
|
break
|
|
|
|
|
|
except OSError:
|
|
|
|
|
|
candidate = None
|
|
|
|
|
|
if candidate and str(candidate) not in seen:
|
|
|
|
|
|
matched.append(candidate)
|
|
|
|
|
|
seen.add(str(candidate))
|
|
|
|
|
|
return matched
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _snapshot_playlist_paths(
|
|
|
|
|
|
entries: Sequence[Dict[str, Any]],
|
|
|
|
|
|
output_dir: Path,
|
|
|
|
|
|
) -> tuple[list[Path], set[str]]:
|
|
|
|
|
|
"""Capture current playlist file paths for a given selection."""
|
|
|
|
|
|
matches = _find_playlist_files_from_entries(entries, output_dir)
|
|
|
|
|
|
resolved: set[str] = set()
|
|
|
|
|
|
for path in matches:
|
|
|
|
|
|
try:
|
|
|
|
|
|
resolved.add(str(path.resolve()))
|
|
|
|
|
|
except OSError:
|
|
|
|
|
|
resolved.add(str(path))
|
|
|
|
|
|
return matches, resolved
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _expand_playlist_selection(selection: str, num_items: int) -> str:
|
|
|
|
|
|
"""Expand playlist selection string, handling wildcards.
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
selection: Selection string like '1,3,5-8' or '*'
|
|
|
|
|
|
num_items: Total number of items in playlist
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Expanded selection string like '1,3,5,6,7,8' or '1-18' for '*'
|
|
|
|
|
|
"""
|
|
|
|
|
|
if selection.strip() == "*":
|
|
|
|
|
|
# Wildcard: select all items
|
|
|
|
|
|
return f"1-{num_items}"
|
|
|
|
|
|
|
|
|
|
|
|
# Return as-is if not wildcard (yt-dlp will handle ranges and lists)
|
|
|
|
|
|
return selection
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _parse_selection_string(selection: str) -> List[int]:
|
|
|
|
|
|
"""Parse selection string into list of integers.
|
|
|
|
|
|
|
|
|
|
|
|
Handles formats like:
|
|
|
|
|
|
- "2" -> [2]
|
|
|
|
|
|
- "1,3,5" -> [1, 3, 5]
|
|
|
|
|
|
- "1-3" -> [1, 2, 3]
|
|
|
|
|
|
- "1,3-5,7" -> [1, 3, 4, 5, 7]
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
selection: Selection string
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
List of integer indices
|
|
|
|
|
|
"""
|
|
|
|
|
|
result = []
|
|
|
|
|
|
for part in selection.split(','):
|
|
|
|
|
|
part = part.strip()
|
|
|
|
|
|
if '-' in part:
|
|
|
|
|
|
# Range like "3-5"
|
|
|
|
|
|
try:
|
|
|
|
|
|
start, end = part.split('-')
|
|
|
|
|
|
start_num = int(start.strip())
|
|
|
|
|
|
end_num = int(end.strip())
|
|
|
|
|
|
result.extend(range(start_num, end_num + 1))
|
|
|
|
|
|
except (ValueError, AttributeError):
|
|
|
|
|
|
continue
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Single number
|
|
|
|
|
|
try:
|
|
|
|
|
|
result.append(int(part))
|
|
|
|
|
|
except ValueError:
|
|
|
|
|
|
continue
|
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _filter_and_sort_formats(formats: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
|
|
|
|
|
|
"""Filter and sort formats for user selection.
|
|
|
|
|
|
|
|
|
|
|
|
Filters out:
|
|
|
|
|
|
- Storyboards (webp, svg formats)
|
|
|
|
|
|
- Low quality audio (below ~128 kbps, typically 48kHz audio)
|
|
|
|
|
|
- Video below 360p
|
|
|
|
|
|
|
|
|
|
|
|
Sorts to prioritize:
|
|
|
|
|
|
- @1: Best combined audio+video (highest resolution, highest bitrate)
|
|
|
|
|
|
- @2: Best audio-only (highest bitrate audio)
|
|
|
|
|
|
- Then rest by quality
|
|
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
|
formats: List of format dicts from yt-dlp
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Filtered and sorted format list
|
|
|
|
|
|
"""
|
|
|
|
|
|
filtered = []
|
|
|
|
|
|
|
|
|
|
|
|
for fmt in formats:
|
|
|
|
|
|
format_id = fmt.get("format_id", "")
|
|
|
|
|
|
ext = fmt.get("ext", "")
|
|
|
|
|
|
vcodec = fmt.get("vcodec", "")
|
|
|
|
|
|
acodec = fmt.get("acodec", "")
|
|
|
|
|
|
height = fmt.get("height")
|
|
|
|
|
|
tbr = fmt.get("tbr") # Total bitrate
|
|
|
|
|
|
|
|
|
|
|
|
# Skip storyboards (webp images, svg, etc.)
|
|
|
|
|
|
if ext in {"webp", "svg", "mhtml"}:
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# Skip video-only formats below 360p
|
|
|
|
|
|
if vcodec != "none" and acodec == "none":
|
|
|
|
|
|
if height and height < 360:
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# Skip low-bitrate audio (typically 48kHz, very low quality)
|
|
|
|
|
|
# Keep audio with tbr >= 64 kbps (reasonable quality threshold)
|
|
|
|
|
|
if acodec != "none" and vcodec == "none":
|
|
|
|
|
|
if tbr and tbr < 64:
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
filtered.append(fmt)
|
|
|
|
|
|
|
|
|
|
|
|
# Sort formats: best combined first, then best audio-only, then video-only
|
|
|
|
|
|
def format_sort_key(fmt: Dict[str, Any]) -> tuple:
|
|
|
|
|
|
vcodec = fmt.get("vcodec", "")
|
|
|
|
|
|
acodec = fmt.get("acodec", "")
|
|
|
|
|
|
height = fmt.get("height", 0) or 0
|
|
|
|
|
|
tbr = fmt.get("tbr", 0) or 0
|
|
|
|
|
|
|
|
|
|
|
|
# Category 0: has both audio and video (sort first)
|
|
|
|
|
|
# Category 1: audio only (sort second)
|
|
|
|
|
|
# Category 2: video only (sort last, by height desc)
|
|
|
|
|
|
if vcodec != "none" and acodec != "none":
|
|
|
|
|
|
category = 0
|
|
|
|
|
|
return (category, -height, -tbr)
|
|
|
|
|
|
elif acodec != "none" and vcodec == "none":
|
|
|
|
|
|
category = 1
|
|
|
|
|
|
return (category, -tbr) # Sort by bitrate descending
|
|
|
|
|
|
else: # Video only
|
|
|
|
|
|
category = 2
|
|
|
|
|
|
return (category, -height, -tbr) # Sort by height descending, then bitrate
|
|
|
|
|
|
|
|
|
|
|
|
return sorted(filtered, key=format_sort_key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _compute_file_hash(file_path: Path) -> Optional[str]:
|
|
|
|
|
|
"""Compute SHA256 hash of file."""
|
|
|
|
|
|
try:
|
|
|
|
|
|
return sha256_file(file_path)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
# Main Cmdlet Function
|
|
|
|
|
|
# ============================================================================
|
|
|
|
|
|
|
|
|
|
|
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results: bool = True) -> int:
|
|
|
|
|
|
"""Download data from URLs with advanced options.
|
|
|
|
|
|
|
|
|
|
|
|
Accepts:
|
|
|
|
|
|
- Single URL as string
|
|
|
|
|
|
- Result object with 'url' or 'file_path' field
|
|
|
|
|
|
- List of results
|
|
|
|
|
|
- File containing URLs (one per line)
|
|
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
|
Exit code (0 for success, 1 for failure)
|
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
|
|
debug("Starting download-data")
|
|
|
|
|
|
|
|
|
|
|
|
collected_results: List[Dict[str, Any]] = []
|
|
|
|
|
|
|
|
|
|
|
|
def _emit(obj: Any) -> None:
|
|
|
|
|
|
"""Internal helper to collect and optionally emit results."""
|
|
|
|
|
|
collected_results.append(obj)
|
|
|
|
|
|
if emit_results:
|
|
|
|
|
|
pipeline_context.emit(obj)
|
|
|
|
|
|
|
|
|
|
|
|
# Track pipeline mode once so playlist handling can respect current run scope
|
|
|
|
|
|
stage_ctx = pipeline_context.get_stage_context()
|
|
|
|
|
|
in_pipeline = stage_ctx is not None and getattr(stage_ctx, 'total_stages', 1) > 1
|
|
|
|
|
|
|
|
|
|
|
|
# ========================================================================
|
|
|
|
|
|
# ARGUMENT PARSING
|
|
|
|
|
|
# ========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
# Parse arguments using shared parser
|
|
|
|
|
|
parsed = parse_cmdlet_args(args, CMDLET)
|
|
|
|
|
|
|
|
|
|
|
|
audio_mode = parsed.get("audio", False)
|
|
|
|
|
|
format_selector = parsed.get("format")
|
|
|
|
|
|
list_formats_mode = parsed.get("list-formats", False)
|
|
|
|
|
|
|
|
|
|
|
|
clip_spec = parsed.get("clip")
|
|
|
|
|
|
clip_range = None
|
|
|
|
|
|
if clip_spec:
|
|
|
|
|
|
clip_range = _parse_time_range(clip_spec)
|
|
|
|
|
|
if clip_range:
|
|
|
|
|
|
debug(f"Clip range: {clip_spec} ({clip_range[0]}-{clip_range[1]} seconds)")
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f"Invalid clip format: {clip_spec}", file=sys.stderr)
|
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
|
|
cookies_path = parsed.get("cookies")
|
|
|
|
|
|
storage_location = parsed.get("storage")
|
|
|
|
|
|
|
|
|
|
|
|
torrent_mode = parsed.get("torrent", False)
|
|
|
|
|
|
wait_timeout = float(parsed.get("wait", 1800))
|
|
|
|
|
|
|
|
|
|
|
|
# Collect URLs from positional args and -url flag
|
|
|
|
|
|
# Both map to "url" in parsed result
|
|
|
|
|
|
urls_to_download = []
|
|
|
|
|
|
raw_urls = parsed.get("url", [])
|
|
|
|
|
|
if isinstance(raw_urls, str):
|
|
|
|
|
|
raw_urls = [raw_urls]
|
|
|
|
|
|
|
|
|
|
|
|
for arg in raw_urls:
|
|
|
|
|
|
if arg.lower().startswith(('http://', 'https://')):
|
|
|
|
|
|
# Check if it's a .torrent URL or file first
|
|
|
|
|
|
if '.torrent' in arg.lower():
|
|
|
|
|
|
debug(f"Processing torrent URL: {arg}")
|
|
|
|
|
|
magnet = _process_torrent_input(arg)
|
|
|
|
|
|
if magnet and magnet.lower().startswith('magnet:'):
|
|
|
|
|
|
urls_to_download.append(magnet)
|
|
|
|
|
|
debug(f"✓ Converted to magnet: {magnet[:70]}...")
|
|
|
|
|
|
elif magnet:
|
|
|
|
|
|
urls_to_download.append(magnet)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f"✗ Failed to process torrent: {arg}", file=sys.stderr)
|
|
|
|
|
|
else:
|
|
|
|
|
|
urls_to_download.append(arg)
|
|
|
|
|
|
elif torrent_mode and (arg.lower().startswith('magnet:') or len(arg) == 40 or len(arg) == 64):
|
|
|
|
|
|
# In torrent mode, accept magnet links or torrent hashes (40-char SHA1 or 64-char SHA256)
|
|
|
|
|
|
urls_to_download.append(arg)
|
|
|
|
|
|
debug(f"Torrent/magnet added: {arg[:50]}...")
|
|
|
|
|
|
elif _is_torrent_file_or_url(arg):
|
|
|
|
|
|
# Handle .torrent files and URLs
|
|
|
|
|
|
log(f"Processing torrent file/URL: {arg}", flush=True)
|
|
|
|
|
|
magnet = _process_torrent_input(arg)
|
|
|
|
|
|
if magnet and magnet.lower().startswith('magnet:'):
|
|
|
|
|
|
urls_to_download.append(magnet)
|
|
|
|
|
|
log(f"✓ Converted to magnet: {magnet[:70]}...", flush=True)
|
|
|
|
|
|
elif magnet:
|
|
|
|
|
|
urls_to_download.append(magnet)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f"✗ Failed to process torrent: {arg}", file=sys.stderr)
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Treat as URL if it looks like one
|
|
|
|
|
|
if arg.lower().startswith(('magnet:', 'ftp://')):
|
|
|
|
|
|
urls_to_download.append(arg)
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Check if it's a file containing URLs
|
|
|
|
|
|
path = Path(arg)
|
|
|
|
|
|
if path.exists() and path.is_file():
|
|
|
|
|
|
try:
|
|
|
|
|
|
with open(arg, 'r') as f:
|
|
|
|
|
|
for line in f:
|
|
|
|
|
|
line = line.strip()
|
|
|
|
|
|
if line and line.lower().startswith(('http://', 'https://')):
|
|
|
|
|
|
urls_to_download.append(line)
|
|
|
|
|
|
log(f"Loaded URLs from file: {arg}", flush=True)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"Error reading file {arg}: {e}", file=sys.stderr)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f"Ignored argument: {arg}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
|
|
# Item selection (for playlists/formats)
|
|
|
|
|
|
# Note: -item flag is deprecated in favor of @N pipeline selection, but kept for compatibility
|
|
|
|
|
|
playlist_items = parsed.get("item")
|
|
|
|
|
|
if playlist_items:
|
|
|
|
|
|
log(f"Item selection: {playlist_items}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _is_openlibrary_downloadable(ebook_access_val: Any, status_val: Any) -> bool:
|
|
|
|
|
|
access = str(ebook_access_val or "").strip().lower()
|
|
|
|
|
|
status = str(status_val or "").strip().lower()
|
|
|
|
|
|
if status == "download":
|
|
|
|
|
|
return True
|
|
|
|
|
|
if access in {"borrowable", "public", "full", "open"} or access.startswith("full "):
|
|
|
|
|
|
return True
|
|
|
|
|
|
if "✓" in str(status_val or ""):
|
|
|
|
|
|
return True
|
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
# ========================================================================
|
|
|
|
|
|
# INPUT PROCESSING - Extract URLs from pipeline or arguments
|
|
|
|
|
|
# ========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
# Initialize worker tracking for downloads
|
|
|
|
|
|
import uuid
|
|
|
|
|
|
from helper.local_library import LocalLibraryDB
|
|
|
|
|
|
from config import get_local_storage_path
|
|
|
|
|
|
|
|
|
|
|
|
worker_id = str(uuid.uuid4())
|
|
|
|
|
|
library_root = get_local_storage_path(config or {})
|
|
|
|
|
|
db = None
|
|
|
|
|
|
if library_root:
|
|
|
|
|
|
try:
|
|
|
|
|
|
db = LocalLibraryDB(library_root)
|
|
|
|
|
|
db.insert_worker(
|
|
|
|
|
|
worker_id,
|
|
|
|
|
|
"download",
|
|
|
|
|
|
title="Download Data",
|
|
|
|
|
|
description="Downloading files from search results",
|
|
|
|
|
|
pipe=pipeline_context.get_current_command_text()
|
|
|
|
|
|
)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"⚠ Worker tracking unavailable: {e}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
|
|
piped_results = normalize_result_input(result)
|
|
|
|
|
|
|
|
|
|
|
|
# Track files downloaded directly (e.g. Soulseek) to avoid "No URLs" error
|
|
|
|
|
|
files_downloaded_directly = 0
|
|
|
|
|
|
|
|
|
|
|
|
# Only process piped results if no URLs were provided in arguments
|
|
|
|
|
|
# This prevents picking up residue from previous commands when running standalone
|
|
|
|
|
|
if piped_results and not urls_to_download:
|
|
|
|
|
|
for item in piped_results:
|
|
|
|
|
|
url = None
|
|
|
|
|
|
origin = None
|
|
|
|
|
|
|
|
|
|
|
|
# ====== CHECK FOR PLAYLIST ITEM MARKER FROM add-file ======
|
|
|
|
|
|
# When add-file detects a playlist item and wants to download it
|
|
|
|
|
|
if isinstance(item, dict) and item.get('__playlist_url'):
|
|
|
|
|
|
playlist_url = item.get('__playlist_url')
|
|
|
|
|
|
item_num = item.get('__playlist_item', 1)
|
|
|
|
|
|
log(f"📍 Playlist item from add-file: #{item_num}", flush=True)
|
|
|
|
|
|
# Add to download list with marker
|
|
|
|
|
|
urls_to_download.append({
|
|
|
|
|
|
'__playlist_url': playlist_url,
|
|
|
|
|
|
'__playlist_item': int(item_num)
|
|
|
|
|
|
})
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# ====== CHECK FOR PLAYLIST ITEM SELECTION FIRST ======
|
|
|
|
|
|
# When user selects @12 from a playlist, item is emitted dict with __action: "playlist-item:12"
|
|
|
|
|
|
if isinstance(item, dict) and '__action' in item and item['__action'].startswith('playlist-item:'):
|
|
|
|
|
|
playlist_url = item.get('__file_path')
|
|
|
|
|
|
playlist_action = item['__action'] # e.g., "playlist-item:12"
|
|
|
|
|
|
item_num = playlist_action.split(':')[1] # Extract item number (1-based)
|
|
|
|
|
|
|
|
|
|
|
|
if playlist_url:
|
|
|
|
|
|
# Playlist item selected - need to download this specific track
|
|
|
|
|
|
log(f"📍 Playlist item selected: #{item_num} - {item.get('title', 'Unknown')}", flush=True)
|
|
|
|
|
|
# Add to download list - the playlist will be probed and item extracted
|
|
|
|
|
|
# Store with special marker so we know which item to select
|
|
|
|
|
|
urls_to_download.append({
|
|
|
|
|
|
'__playlist_url': playlist_url,
|
|
|
|
|
|
'__playlist_item': int(item_num)
|
|
|
|
|
|
})
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# ====== CHECK FOR FORMAT SELECTION RESULT ======
|
|
|
|
|
|
if isinstance(item, dict) and item.get('format_id') is not None and item.get('source_url'):
|
|
|
|
|
|
log(f"🎬 Format selected from pipe: {item.get('format_id')}", flush=True)
|
|
|
|
|
|
log(f" Source URL: {item.get('source_url')}", flush=True)
|
|
|
|
|
|
# Store as dict so we can extract format_id + source_url during download
|
|
|
|
|
|
urls_to_download.append(item)
|
|
|
|
|
|
continue
|
|
|
|
|
|
elif hasattr(item, 'format_id') and hasattr(item, 'source_url') and item.format_id is not None:
|
|
|
|
|
|
log(f"🎬 Format selected from pipe: {item.format_id}", flush=True)
|
|
|
|
|
|
log(f" Source URL: {item.source_url}", flush=True)
|
|
|
|
|
|
urls_to_download.append({
|
|
|
|
|
|
'format_id': item.format_id,
|
|
|
|
|
|
'source_url': item.source_url,
|
|
|
|
|
|
})
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
if isinstance(item, dict):
|
|
|
|
|
|
# Check for search provider results first
|
|
|
|
|
|
origin = item.get('origin')
|
|
|
|
|
|
if origin in {'openlibrary', 'libgen', 'soulseek', 'debrid'}:
|
|
|
|
|
|
# Handle search provider results
|
|
|
|
|
|
title = item.get('title', 'Item')
|
|
|
|
|
|
if origin == 'openlibrary':
|
|
|
|
|
|
# OpenLibrary: First check if lendable/downloadable via Archive.org
|
|
|
|
|
|
# Only route to LibGen if NOT available on Archive.org
|
|
|
|
|
|
metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {}
|
|
|
|
|
|
isbn = metadata.get('isbn') or item.get('isbn')
|
|
|
|
|
|
olid = metadata.get('olid') or item.get('olid')
|
|
|
|
|
|
|
|
|
|
|
|
log(f"[search-result] OpenLibrary: '{title}'", flush=True)
|
|
|
|
|
|
if isbn:
|
|
|
|
|
|
log(f" ISBN: {isbn}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
# Check if book is borrowable from ebook_access field or status
|
|
|
|
|
|
ebook_access = metadata.get('ebook_access') or item.get('ebook_access', '')
|
|
|
|
|
|
status_text = metadata.get('status') or item.get('status', '')
|
|
|
|
|
|
archive_id = metadata.get('archive_id') or item.get('archive_id')
|
|
|
|
|
|
|
|
|
|
|
|
# Determine if borrowable based on new status vocabulary
|
|
|
|
|
|
is_borrowable = _is_openlibrary_downloadable(ebook_access, status_text)
|
|
|
|
|
|
|
|
|
|
|
|
if is_borrowable:
|
|
|
|
|
|
log(f" ✓ Available for borrowing on Archive.org", flush=True)
|
|
|
|
|
|
log(f" → Queued for auto-borrowing...", flush=True)
|
|
|
|
|
|
# Queue borrow request as special dict object
|
|
|
|
|
|
# We need OCAID (Archive.org ID), not just numeric OLID
|
|
|
|
|
|
ocaid = archive_id
|
|
|
|
|
|
|
|
|
|
|
|
if not ocaid and isbn:
|
|
|
|
|
|
# If no OCAID in metadata, fetch it from OpenLibrary ISBN lookup
|
|
|
|
|
|
try:
|
|
|
|
|
|
import requests
|
|
|
|
|
|
ol_url = f'https://openlibrary.org/isbn/{isbn}.json'
|
|
|
|
|
|
r = requests.get(ol_url, timeout=5)
|
|
|
|
|
|
if r.status_code == 200:
|
|
|
|
|
|
ol_data = r.json()
|
|
|
|
|
|
ocaid = ol_data.get('ocaid')
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" ⚠ Could not fetch OCAID from OpenLibrary: {e}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
|
|
if ocaid:
|
|
|
|
|
|
urls_to_download.append({
|
|
|
|
|
|
'__borrow_request__': True,
|
|
|
|
|
|
'book_id': ocaid,
|
|
|
|
|
|
'isbn': isbn,
|
|
|
|
|
|
'title': title,
|
|
|
|
|
|
'olid': olid
|
|
|
|
|
|
})
|
|
|
|
|
|
else:
|
|
|
|
|
|
# OCAID not found - book claims borrowable but not on Archive.org
|
|
|
|
|
|
# Fall back to LibGen search instead
|
|
|
|
|
|
log(f" ⚠ Book marked borrowable but not found on Archive.org", file=sys.stderr)
|
|
|
|
|
|
if isbn:
|
|
|
|
|
|
try:
|
|
|
|
|
|
from helper.search_provider import get_provider
|
|
|
|
|
|
libgen_provider = get_provider("libgen", config)
|
|
|
|
|
|
if libgen_provider:
|
|
|
|
|
|
libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1)
|
|
|
|
|
|
if libgen_results:
|
|
|
|
|
|
libgen_result = libgen_results[0]
|
|
|
|
|
|
url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None)
|
|
|
|
|
|
if url:
|
|
|
|
|
|
urls_to_download.append(url)
|
|
|
|
|
|
log(f" ✓ Found on LibGen instead", flush=True)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ⚠ Not found on LibGen", file=sys.stderr)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ⚠ Not found on LibGen", file=sys.stderr)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ⚠ LibGen provider not available", file=sys.stderr)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" ✗ Error searching LibGen: {e}", file=sys.stderr)
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Book is NOT borrowable - route to LibGen
|
|
|
|
|
|
if isbn:
|
|
|
|
|
|
log(f" ⚠ Not available on Archive.org - attempting LibGen...", flush=True)
|
|
|
|
|
|
try:
|
|
|
|
|
|
from helper.search_provider import get_provider
|
|
|
|
|
|
libgen_provider = get_provider("libgen", config)
|
|
|
|
|
|
if libgen_provider:
|
|
|
|
|
|
libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1)
|
|
|
|
|
|
if libgen_results:
|
|
|
|
|
|
libgen_result = libgen_results[0]
|
|
|
|
|
|
url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None)
|
|
|
|
|
|
if url:
|
|
|
|
|
|
urls_to_download.append(url)
|
|
|
|
|
|
log(f" ✓ Found on LibGen", flush=True)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ⚠ Not found on LibGen", file=sys.stderr)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ⚠ Not found on LibGen", flush=True)
|
|
|
|
|
|
log(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" ⚠ Could not search LibGen: {e}", file=sys.stderr)
|
|
|
|
|
|
log(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ⚠ ISBN not available", flush=True)
|
|
|
|
|
|
log(f" ▶ Visit: {item.get('target', 'https://openlibrary.org')}", flush=True)
|
|
|
|
|
|
log(f" ▶ Or find ISBN and use: search-file -provider libgen 'isbn:\"<ISBN>\"'", flush=True)
|
|
|
|
|
|
elif origin == 'soulseek':
|
|
|
|
|
|
# Handle Soulseek downloads using the provider
|
|
|
|
|
|
metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {}
|
|
|
|
|
|
username = metadata.get('username')
|
|
|
|
|
|
filename = metadata.get('filename')
|
|
|
|
|
|
size = item.get('size_bytes') or 0
|
|
|
|
|
|
|
|
|
|
|
|
if username and filename:
|
|
|
|
|
|
try:
|
|
|
|
|
|
import asyncio
|
|
|
|
|
|
from helper.search_provider import SoulSeekProvider
|
|
|
|
|
|
provider = SoulSeekProvider(config)
|
|
|
|
|
|
log(f"[search-result] Soulseek: '{title}'", flush=True)
|
|
|
|
|
|
log(f" ▶ Downloading from {username}...", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
if db:
|
|
|
|
|
|
db.append_worker_stdout(worker_id, f"Downloading from Soulseek: {title} (from {username})")
|
|
|
|
|
|
|
|
|
|
|
|
# Get temp directory from config
|
|
|
|
|
|
temp_dir = config.get('temp')
|
|
|
|
|
|
if temp_dir:
|
|
|
|
|
|
temp_dir = str(Path(temp_dir).expanduser())
|
|
|
|
|
|
|
|
|
|
|
|
# Call async download_file with asyncio.run()
|
|
|
|
|
|
success = asyncio.run(provider.download_file(
|
|
|
|
|
|
username=username,
|
|
|
|
|
|
filename=filename,
|
|
|
|
|
|
file_size=size,
|
|
|
|
|
|
target_dir=temp_dir
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
if success:
|
|
|
|
|
|
downloaded_file = Path(provider.DOWNLOAD_DIR) / Path(filename).name
|
|
|
|
|
|
if downloaded_file.exists():
|
|
|
|
|
|
log(f" ✓ Downloaded: {downloaded_file.name}", flush=True)
|
|
|
|
|
|
files_downloaded_directly += 1
|
|
|
|
|
|
if db:
|
|
|
|
|
|
db.append_worker_stdout(worker_id, f"✓ Downloaded: {downloaded_file.name}")
|
|
|
|
|
|
if pipeline_context._PIPE_ACTIVE:
|
|
|
|
|
|
# Create proper PipeObject result
|
|
|
|
|
|
result_dict = create_pipe_object_result(
|
|
|
|
|
|
source='soulseek',
|
|
|
|
|
|
identifier=filename,
|
|
|
|
|
|
file_path=str(downloaded_file),
|
|
|
|
|
|
cmdlet_name='download-data',
|
|
|
|
|
|
title=title,
|
|
|
|
|
|
target=str(downloaded_file), # Explicit target for add-file
|
|
|
|
|
|
extra={
|
|
|
|
|
|
"metadata": metadata,
|
|
|
|
|
|
"origin": "soulseek"
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
pipeline_context.emit(result_dict)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ✗ Download failed (peer may be offline)", file=sys.stderr)
|
|
|
|
|
|
if db:
|
|
|
|
|
|
db.append_worker_stdout(worker_id, f"✗ Download failed for {title}")
|
|
|
|
|
|
log(f" ▶ Try another result: search-file -provider soulseek \"...\" | @2 | download-data", flush=True)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" ✗ Download error: {e}", file=sys.stderr)
|
|
|
|
|
|
if db:
|
|
|
|
|
|
db.append_worker_stdout(worker_id, f"✗ Error: {e}")
|
|
|
|
|
|
log(f" ▶ Alternative: search-soulseek -download \"{title}\" -storage <location>", flush=True)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f"[search-result] Soulseek: '{title}'", flush=True)
|
|
|
|
|
|
log(f" ⚠ Missing download info (username/filename)", flush=True)
|
|
|
|
|
|
if db:
|
|
|
|
|
|
db.append_worker_stdout(worker_id, f"⚠ Missing download info for {title}")
|
|
|
|
|
|
elif origin == 'libgen':
|
|
|
|
|
|
# LibGen results can use the direct URL
|
|
|
|
|
|
# Also extract mirrors dict for fallback if primary fails
|
|
|
|
|
|
url = item.get('target')
|
|
|
|
|
|
# Extract mirrors and book_id from full_metadata
|
|
|
|
|
|
metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {}
|
|
|
|
|
|
mirrors = metadata.get('mirrors', {})
|
|
|
|
|
|
book_id = metadata.get('book_id', '')
|
|
|
|
|
|
|
|
|
|
|
|
if url:
|
|
|
|
|
|
url_entry = {
|
|
|
|
|
|
'url': str(url),
|
|
|
|
|
|
'mirrors': mirrors, # Alternative mirrors for fallback
|
|
|
|
|
|
'book_id': book_id,
|
|
|
|
|
|
}
|
|
|
|
|
|
urls_to_download.append(url_entry)
|
|
|
|
|
|
log(f"[search-result] LibGen: '{title}'", flush=True)
|
|
|
|
|
|
log(f" ✓ Queued for download", flush=True)
|
|
|
|
|
|
if mirrors:
|
|
|
|
|
|
log(f" Mirrors available: {len(mirrors)}", flush=True)
|
|
|
|
|
|
elif origin == 'debrid':
|
|
|
|
|
|
# Debrid results can use download-data
|
|
|
|
|
|
url = item.get('target')
|
|
|
|
|
|
if url:
|
|
|
|
|
|
urls_to_download.append(str(url))
|
|
|
|
|
|
log(f"[search-result] Debrid: '{title}'", flush=True)
|
|
|
|
|
|
log(f" ✓ Queued for download", flush=True)
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Regular fields for non-search results
|
|
|
|
|
|
url = item.get('url') or item.get('link') or item.get('href') or item.get('target')
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Object attributes
|
|
|
|
|
|
origin = getattr(item, 'origin', None)
|
|
|
|
|
|
title = getattr(item, 'title', 'Item')
|
|
|
|
|
|
if origin in {'openlibrary', 'libgen', 'soulseek', 'debrid'}:
|
|
|
|
|
|
# Handle search provider results
|
|
|
|
|
|
if origin == 'openlibrary':
|
|
|
|
|
|
# OpenLibrary: First check if lendable/downloadable via Archive.org
|
|
|
|
|
|
# Only route to LibGen if NOT available on Archive.org
|
|
|
|
|
|
metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {}
|
|
|
|
|
|
isbn = metadata.get('isbn') or getattr(item, 'isbn', None)
|
|
|
|
|
|
olid = metadata.get('olid') or getattr(item, 'olid', None)
|
|
|
|
|
|
|
|
|
|
|
|
log(f"[search-result] OpenLibrary: '{title}'", flush=True)
|
|
|
|
|
|
if isbn:
|
|
|
|
|
|
log(f" ISBN: {isbn}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
# Check if book is borrowable from ebook_access field or status
|
|
|
|
|
|
ebook_access = metadata.get('ebook_access') or getattr(item, 'ebook_access', '')
|
|
|
|
|
|
status_text = metadata.get('status') or getattr(item, 'status', '')
|
|
|
|
|
|
archive_id = metadata.get('archive_id') or getattr(item, 'archive_id', '')
|
|
|
|
|
|
|
|
|
|
|
|
# Determine if borrowable using unified helper
|
|
|
|
|
|
is_borrowable = _is_openlibrary_downloadable(ebook_access, status_text)
|
|
|
|
|
|
|
|
|
|
|
|
if is_borrowable:
|
|
|
|
|
|
# Book IS borrowable on Archive.org
|
|
|
|
|
|
log(f" ✓ Available for borrowing on Archive.org", flush=True)
|
|
|
|
|
|
log(f" → Queued for auto-borrowing...", flush=True)
|
|
|
|
|
|
# Queue borrow request as special dict object
|
|
|
|
|
|
ocaid = archive_id
|
|
|
|
|
|
if not ocaid and isbn:
|
|
|
|
|
|
try:
|
|
|
|
|
|
import requests
|
|
|
|
|
|
ol_url = f'https://openlibrary.org/isbn/{isbn}.json'
|
|
|
|
|
|
r = requests.get(ol_url, timeout=5)
|
|
|
|
|
|
if r.status_code == 200:
|
|
|
|
|
|
ol_data = r.json()
|
|
|
|
|
|
ocaid = ol_data.get('ocaid')
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" ⚠ Could not fetch OCAID from OpenLibrary: {e}", file=sys.stderr)
|
|
|
|
|
|
if ocaid:
|
|
|
|
|
|
urls_to_download.append({
|
|
|
|
|
|
'__borrow_request__': True,
|
|
|
|
|
|
'book_id': ocaid,
|
|
|
|
|
|
'isbn': isbn,
|
|
|
|
|
|
'title': title,
|
|
|
|
|
|
'olid': olid or getattr(item, 'openlibrary_id', '')
|
|
|
|
|
|
})
|
|
|
|
|
|
else:
|
|
|
|
|
|
# OCAID not found - book claims borrowable but not on Archive.org
|
|
|
|
|
|
# Fall back to LibGen search instead
|
|
|
|
|
|
log(f" ⚠ No Archive.org ID found - attempting LibGen instead...", file=sys.stderr)
|
|
|
|
|
|
if isbn:
|
|
|
|
|
|
try:
|
|
|
|
|
|
from helper.search_provider import get_provider
|
|
|
|
|
|
libgen_provider = get_provider("libgen", config)
|
|
|
|
|
|
if libgen_provider:
|
|
|
|
|
|
libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1)
|
|
|
|
|
|
if libgen_results:
|
|
|
|
|
|
libgen_result = libgen_results[0]
|
|
|
|
|
|
url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None)
|
|
|
|
|
|
if url:
|
|
|
|
|
|
urls_to_download.append(url)
|
|
|
|
|
|
log(f" ✓ Found on LibGen instead", flush=True)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ⚠ Not found on LibGen", file=sys.stderr)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ⚠ Not found on LibGen", file=sys.stderr)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ⚠ LibGen provider not available", file=sys.stderr)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" ✗ Error searching LibGen: {e}", file=sys.stderr)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ⚠ ISBN not available for LibGen fallback", file=sys.stderr)
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Book is NOT borrowable - route to LibGen
|
|
|
|
|
|
if isbn:
|
|
|
|
|
|
log(f" ⚠ Not available on Archive.org - attempting LibGen...", flush=True)
|
|
|
|
|
|
try:
|
|
|
|
|
|
from helper.search_provider import get_provider
|
|
|
|
|
|
libgen_provider = get_provider("libgen", config)
|
|
|
|
|
|
if libgen_provider:
|
|
|
|
|
|
libgen_results = libgen_provider.search(f"isbn:{isbn}", limit=1)
|
|
|
|
|
|
if libgen_results:
|
|
|
|
|
|
libgen_result = libgen_results[0]
|
|
|
|
|
|
url = libgen_result.get('target') if isinstance(libgen_result, dict) else getattr(libgen_result, 'target', None)
|
|
|
|
|
|
if url:
|
|
|
|
|
|
urls_to_download.append(url)
|
|
|
|
|
|
log(f" ✓ Found on LibGen", flush=True)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ⚠ Not found on LibGen", file=sys.stderr)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ⚠ Not found on LibGen", flush=True)
|
|
|
|
|
|
log(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" ⚠ Could not search LibGen: {e}", file=sys.stderr)
|
|
|
|
|
|
log(f" ▶ To search LibGen: search-file -provider libgen 'isbn:{isbn}' | @1 | download-data", flush=True)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ⚠ ISBN not available", flush=True)
|
|
|
|
|
|
log(f" ▶ Visit: {getattr(item, 'target', 'https://openlibrary.org')}", flush=True)
|
|
|
|
|
|
log(f" ▶ Or find ISBN and use: search-file -provider libgen 'isbn:\"<ISBN>\"'", flush=True)
|
|
|
|
|
|
elif origin == 'soulseek':
|
|
|
|
|
|
# Handle Soulseek downloads using the provider
|
|
|
|
|
|
metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {}
|
|
|
|
|
|
username = metadata.get('username')
|
|
|
|
|
|
filename = metadata.get('filename')
|
|
|
|
|
|
size = getattr(item, 'size_bytes', 0) or 0
|
|
|
|
|
|
|
|
|
|
|
|
if username and filename:
|
|
|
|
|
|
try:
|
|
|
|
|
|
import asyncio
|
|
|
|
|
|
from helper.search_provider import SoulSeekProvider
|
|
|
|
|
|
provider = SoulSeekProvider(config)
|
|
|
|
|
|
log(f"[search-result] Soulseek: '{title}'", flush=True)
|
|
|
|
|
|
log(f" ▶ Downloading from {username}...", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
if db:
|
|
|
|
|
|
db.append_worker_stdout(worker_id, f"Downloading from Soulseek: {title} (from {username})")
|
|
|
|
|
|
|
|
|
|
|
|
# Get temp directory from config
|
|
|
|
|
|
temp_dir = config.get('temp')
|
|
|
|
|
|
if temp_dir:
|
|
|
|
|
|
temp_dir = str(Path(temp_dir).expanduser())
|
|
|
|
|
|
|
|
|
|
|
|
# Call async download_file with asyncio.run()
|
|
|
|
|
|
success = asyncio.run(provider.download_file(
|
|
|
|
|
|
username=username,
|
|
|
|
|
|
filename=filename,
|
|
|
|
|
|
file_size=size,
|
|
|
|
|
|
target_dir=temp_dir
|
|
|
|
|
|
))
|
|
|
|
|
|
|
|
|
|
|
|
if success:
|
|
|
|
|
|
downloaded_file = Path(provider.DOWNLOAD_DIR) / Path(filename).name
|
|
|
|
|
|
if downloaded_file.exists():
|
|
|
|
|
|
log(f" ✓ Downloaded: {downloaded_file.name}", flush=True)
|
|
|
|
|
|
files_downloaded_directly += 1
|
|
|
|
|
|
if db:
|
|
|
|
|
|
db.append_worker_stdout(worker_id, f"✓ Downloaded: {downloaded_file.name}")
|
|
|
|
|
|
if pipeline_context._PIPE_ACTIVE:
|
|
|
|
|
|
# Create proper PipeObject result
|
|
|
|
|
|
result_dict = create_pipe_object_result(
|
|
|
|
|
|
source='soulseek',
|
|
|
|
|
|
identifier=filename,
|
|
|
|
|
|
file_path=str(downloaded_file),
|
|
|
|
|
|
cmdlet_name='download-data',
|
|
|
|
|
|
title=title,
|
|
|
|
|
|
target=str(downloaded_file), # Explicit target for add-file
|
|
|
|
|
|
extra={
|
|
|
|
|
|
"metadata": metadata,
|
|
|
|
|
|
"origin": "soulseek"
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
pipeline_context.emit(result_dict)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ✗ Download failed (peer may be offline)", file=sys.stderr)
|
|
|
|
|
|
if db:
|
|
|
|
|
|
db.append_worker_stdout(worker_id, f"✗ Download failed for {title}")
|
|
|
|
|
|
log(f" ▶ Try another result: search-file -provider soulseek \"...\" | @2 | download-data", flush=True)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" ✗ Download error: {e}", file=sys.stderr)
|
|
|
|
|
|
if db:
|
|
|
|
|
|
db.append_worker_stdout(worker_id, f"✗ Error: {e}")
|
|
|
|
|
|
log(f" ▶ Alternative: search-soulseek -download \"{title}\" -storage <location>", flush=True)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f"[search-result] Soulseek: '{title}'", flush=True)
|
|
|
|
|
|
log(f" ⚠ Missing download info (username/filename)", flush=True)
|
|
|
|
|
|
if db:
|
|
|
|
|
|
db.append_worker_stdout(worker_id, f"⚠ Missing download info for {title}")
|
|
|
|
|
|
elif origin == 'libgen':
|
|
|
|
|
|
# LibGen results with mirrors dict for fallback
|
|
|
|
|
|
url = getattr(item, 'target', None)
|
|
|
|
|
|
# Extract mirrors and book_id from full_metadata
|
|
|
|
|
|
metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {}
|
|
|
|
|
|
mirrors = metadata.get('mirrors', {})
|
|
|
|
|
|
book_id = metadata.get('book_id', '')
|
|
|
|
|
|
|
|
|
|
|
|
if url:
|
|
|
|
|
|
url_entry = {
|
|
|
|
|
|
'url': str(url),
|
|
|
|
|
|
'mirrors': mirrors, # Alternative mirrors for fallback
|
|
|
|
|
|
'book_id': book_id,
|
|
|
|
|
|
}
|
|
|
|
|
|
urls_to_download.append(url_entry)
|
|
|
|
|
|
else:
|
|
|
|
|
|
urls_to_download.append(url) if url else None
|
|
|
|
|
|
elif origin == 'debrid':
|
|
|
|
|
|
url = getattr(item, 'target', None)
|
|
|
|
|
|
else:
|
|
|
|
|
|
url = getattr(item, 'url', None) or getattr(item, 'link', None) or getattr(item, 'href', None) or getattr(item, 'target', None)
|
|
|
|
|
|
|
|
|
|
|
|
if url:
|
|
|
|
|
|
urls_to_download.append(str(url))
|
|
|
|
|
|
|
|
|
|
|
|
if not urls_to_download and files_downloaded_directly == 0:
|
|
|
|
|
|
log(f"No downloadable URLs found", file=sys.stderr)
|
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
|
|
log(f"Processing {len(urls_to_download)} URL(s)", flush=True)
|
|
|
|
|
|
for i, u in enumerate(urls_to_download, 1):
|
|
|
|
|
|
if isinstance(u, dict):
|
|
|
|
|
|
log(f" [{i}] Format: {u.get('format_id', '?')} from {u.get('source_url', '?')[:60]}...", flush=True)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" [{i}] URL: {str(u)[:60]}...", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
# ========================================================================
|
|
|
|
|
|
# RESOLVE OUTPUT DIRECTORY
|
|
|
|
|
|
# ========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
final_output_dir = None
|
|
|
|
|
|
|
|
|
|
|
|
# Priority 1: --storage flag
|
|
|
|
|
|
if storage_location:
|
|
|
|
|
|
try:
|
|
|
|
|
|
final_output_dir = SharedArgs.resolve_storage(storage_location)
|
|
|
|
|
|
log(f"Using storage location: {storage_location} → {final_output_dir}", flush=True)
|
|
|
|
|
|
except ValueError as e:
|
|
|
|
|
|
log(str(e), file=sys.stderr)
|
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
|
|
# Priority 2: Config resolver
|
|
|
|
|
|
if final_output_dir is None and resolve_output_dir is not None:
|
|
|
|
|
|
try:
|
|
|
|
|
|
final_output_dir = resolve_output_dir(config)
|
|
|
|
|
|
log(f"Using config resolver: {final_output_dir}", flush=True)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
# Priority 4: Config outfile
|
|
|
|
|
|
if final_output_dir is None and config and config.get("outfile"):
|
|
|
|
|
|
try:
|
|
|
|
|
|
final_output_dir = Path(config["outfile"]).expanduser()
|
|
|
|
|
|
log(f"Using config outfile: {final_output_dir}", flush=True)
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
|
|
# Priority 5: Default (home/Videos)
|
|
|
|
|
|
if final_output_dir is None:
|
|
|
|
|
|
final_output_dir = Path.home() / "Videos"
|
|
|
|
|
|
log(f"Using default directory: {final_output_dir}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
# Ensure directory exists
|
|
|
|
|
|
try:
|
|
|
|
|
|
final_output_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"Cannot create output directory {final_output_dir}: {e}", file=sys.stderr)
|
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
|
|
# ========================================================================
|
|
|
|
|
|
# DOWNLOAD EACH URL
|
|
|
|
|
|
# ========================================================================
|
|
|
|
|
|
|
|
|
|
|
|
downloaded_files = []
|
|
|
|
|
|
playlists_displayed = 0
|
|
|
|
|
|
formats_displayed = False # NEW: Track if we showed formats
|
|
|
|
|
|
exit_code = 0
|
|
|
|
|
|
|
|
|
|
|
|
for url in urls_to_download:
|
|
|
|
|
|
try:
|
|
|
|
|
|
selected_playlist_entries: list[Dict[str, Any]] = []
|
|
|
|
|
|
playlist_existing_paths: set[str] = set()
|
|
|
|
|
|
|
|
|
|
|
|
# ====== HANDLE FORMAT SELECTION FROM PIPED RESULT ======
|
|
|
|
|
|
# If url is a dict with format_id and source_url, extract them and override format_selector
|
|
|
|
|
|
current_format_selector = format_selector
|
|
|
|
|
|
actual_url = url
|
|
|
|
|
|
if isinstance(url, dict) and url.get('format_id') and url.get('source_url'):
|
|
|
|
|
|
log(f"🎬 Format selected: {url.get('format_id')}", flush=True)
|
|
|
|
|
|
format_id = url.get('format_id')
|
|
|
|
|
|
current_format_selector = format_id
|
|
|
|
|
|
|
|
|
|
|
|
# If it's a video-only format (has vcodec but no acodec), add bestaudio
|
|
|
|
|
|
vcodec = url.get('vcodec', '')
|
|
|
|
|
|
acodec = url.get('acodec', '')
|
|
|
|
|
|
if vcodec and vcodec != "none" and (not acodec or acodec == "none"):
|
|
|
|
|
|
# Video-only format, add bestaudio automatically
|
|
|
|
|
|
current_format_selector = f"{format_id}+bestaudio"
|
|
|
|
|
|
log(f" ℹ️ Video-only format detected, automatically adding bestaudio", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
actual_url = url.get('source_url')
|
|
|
|
|
|
url = actual_url # Use the actual URL for further processing
|
|
|
|
|
|
|
|
|
|
|
|
# ====== AUTO-BORROW MODE - INTERCEPT SPECIAL BORROW REQUEST DICTS ======
|
|
|
|
|
|
if isinstance(url, dict) and url.get('__borrow_request__'):
|
|
|
|
|
|
try:
|
|
|
|
|
|
from helper.archive_client import credential_openlibrary, loan, get_book_infos, download
|
|
|
|
|
|
import tempfile
|
|
|
|
|
|
import shutil
|
|
|
|
|
|
|
|
|
|
|
|
book_id = url.get('book_id')
|
|
|
|
|
|
if not book_id:
|
|
|
|
|
|
log(f" ✗ Missing book ID for borrowing", file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
title_val = url.get('title', 'Unknown Book')
|
|
|
|
|
|
book_id_str = str(book_id)
|
|
|
|
|
|
|
|
|
|
|
|
log(f"[auto-borrow] Starting borrow for: {title_val}", flush=True)
|
|
|
|
|
|
log(f" Book ID: {book_id_str}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
# Get Archive.org credentials
|
|
|
|
|
|
email, password = credential_openlibrary(config)
|
|
|
|
|
|
if not email or not password:
|
|
|
|
|
|
log(f" ✗ Archive.org credentials not configured", file=sys.stderr)
|
|
|
|
|
|
log(f" ▶ Set ARCHIVE_EMAIL and ARCHIVE_PASSWORD environment variables", file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# Attempt to borrow and download
|
|
|
|
|
|
try:
|
|
|
|
|
|
log(f" → Logging into Archive.org...", flush=True)
|
|
|
|
|
|
from helper.archive_client import login
|
|
|
|
|
|
import requests
|
|
|
|
|
|
try:
|
|
|
|
|
|
session = login(email, password)
|
|
|
|
|
|
except requests.exceptions.Timeout:
|
|
|
|
|
|
log(f" ✗ Timeout logging into Archive.org (server not responding)", file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
|
|
|
|
log(f" ✗ Error connecting to Archive.org: {e}", file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
log(f" → Borrowing book...", flush=True)
|
|
|
|
|
|
try:
|
|
|
|
|
|
session = loan(session, book_id_str, verbose=True)
|
|
|
|
|
|
except requests.exceptions.Timeout:
|
|
|
|
|
|
log(f" ✗ Timeout while borrowing (server not responding)", file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
except requests.exceptions.RequestException as e:
|
|
|
|
|
|
log(f" ✗ Error while borrowing: {e}", file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
log(f" → Extracting page information...", flush=True)
|
|
|
|
|
|
# Try both URL formats
|
|
|
|
|
|
book_urls = [
|
|
|
|
|
|
f"https://archive.org/borrow/{book_id_str}",
|
|
|
|
|
|
f"https://archive.org/details/{book_id_str}"
|
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
title = None
|
|
|
|
|
|
links = None
|
|
|
|
|
|
metadata = None
|
|
|
|
|
|
last_error = None
|
|
|
|
|
|
for book_url in book_urls:
|
|
|
|
|
|
try:
|
|
|
|
|
|
title, links, metadata = get_book_infos(session, book_url)
|
|
|
|
|
|
if title and links:
|
|
|
|
|
|
log(f" → Found {len(links)} pages", flush=True)
|
|
|
|
|
|
break
|
|
|
|
|
|
except requests.exceptions.Timeout:
|
|
|
|
|
|
last_error = "Timeout while extracting pages"
|
|
|
|
|
|
log(f" ⚠ Timeout while extracting from {book_url}", flush=True)
|
|
|
|
|
|
continue
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
last_error = str(e)
|
|
|
|
|
|
log(f" ⚠ Failed to extract from {book_url}: {e}", flush=True)
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
if not links:
|
|
|
|
|
|
log(f" ✗ Could not extract book pages (Last error: {last_error})", file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# Download pages
|
|
|
|
|
|
log(f" → Downloading {len(links)} pages...", flush=True)
|
|
|
|
|
|
with tempfile.TemporaryDirectory() as temp_dir:
|
|
|
|
|
|
# download(session, n_threads, directory, links, scale, book_id)
|
|
|
|
|
|
images = download(
|
|
|
|
|
|
session,
|
|
|
|
|
|
n_threads=4,
|
|
|
|
|
|
directory=temp_dir,
|
|
|
|
|
|
links=links,
|
|
|
|
|
|
scale=2,
|
|
|
|
|
|
book_id=str(book_id)
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if not images:
|
|
|
|
|
|
log(f" ✗ No pages downloaded", file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
log(f" ✓ Downloaded {len(images)} pages", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
# Try to merge into PDF
|
|
|
|
|
|
try:
|
|
|
|
|
|
import img2pdf
|
|
|
|
|
|
log(f" → Merging pages into PDF...", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
filename = title if title else f"book_{book_id_str}"
|
|
|
|
|
|
filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
|
|
|
|
|
|
output_path = Path(final_output_dir) / f"{filename}.pdf"
|
|
|
|
|
|
|
|
|
|
|
|
# Make unique filename if needed
|
|
|
|
|
|
i = 1
|
|
|
|
|
|
while output_path.exists():
|
|
|
|
|
|
output_path = Path(final_output_dir) / f"{filename}({i}).pdf"
|
|
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
|
|
|
|
pdf_content = img2pdf.convert(images)
|
|
|
|
|
|
if pdf_content:
|
|
|
|
|
|
with open(output_path, 'wb') as f:
|
|
|
|
|
|
f.write(pdf_content)
|
|
|
|
|
|
|
|
|
|
|
|
log(f" ✓ Successfully borrowed and saved to: {output_path}", flush=True)
|
|
|
|
|
|
downloaded_files.append(str(output_path))
|
|
|
|
|
|
|
|
|
|
|
|
# Emit result for downstream cmdlets
|
|
|
|
|
|
file_hash = _compute_file_hash(output_path)
|
|
|
|
|
|
# Build tags including ISBN if available
|
|
|
|
|
|
emit_tags = ['book', 'borrowed', 'pdf']
|
|
|
|
|
|
isbn_tag = url.get('isbn')
|
|
|
|
|
|
if isbn_tag:
|
|
|
|
|
|
emit_tags.append(f'isbn:{isbn_tag}')
|
|
|
|
|
|
olid_tag = url.get('olid')
|
|
|
|
|
|
if olid_tag:
|
|
|
|
|
|
emit_tags.append(f'olid:{olid_tag}')
|
|
|
|
|
|
|
|
|
|
|
|
# Fetch OpenLibrary metadata tags
|
|
|
|
|
|
ol_tags = fetch_openlibrary_metadata_tags(isbn=isbn_tag, olid=olid_tag)
|
|
|
|
|
|
emit_tags.extend(ol_tags)
|
|
|
|
|
|
|
|
|
|
|
|
pipe_obj = create_pipe_object_result(
|
|
|
|
|
|
source='archive.org',
|
|
|
|
|
|
identifier=book_id_str,
|
|
|
|
|
|
file_path=str(output_path),
|
|
|
|
|
|
cmdlet_name='download-data',
|
|
|
|
|
|
title=title_val,
|
|
|
|
|
|
file_hash=file_hash,
|
|
|
|
|
|
tags=emit_tags,
|
|
|
|
|
|
source_url=url.get('source_url', f'archive.org/borrow/{book_id_str}')
|
|
|
|
|
|
)
|
|
|
|
|
|
pipeline_context.emit(pipe_obj)
|
|
|
|
|
|
exit_code = 0
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
log(f" ⚠ img2pdf not available - saving pages as collection", file=sys.stderr)
|
|
|
|
|
|
# Just copy images to output dir
|
|
|
|
|
|
filename = title if title else f"book_{book_id_str}"
|
|
|
|
|
|
filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
|
|
|
|
|
|
output_dir = Path(final_output_dir) / filename
|
|
|
|
|
|
i = 1
|
|
|
|
|
|
while output_dir.exists():
|
|
|
|
|
|
output_dir = Path(final_output_dir) / f"{filename}({i})"
|
|
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
|
|
|
|
shutil.copytree(temp_dir, str(output_dir))
|
|
|
|
|
|
log(f" ✓ Successfully borrowed and saved to: {output_dir}", flush=True)
|
|
|
|
|
|
downloaded_files.append(str(output_dir))
|
|
|
|
|
|
|
|
|
|
|
|
# Emit result for downstream cmdlets
|
|
|
|
|
|
# Build tags including ISBN if available
|
|
|
|
|
|
emit_tags = ['book', 'borrowed', 'pages']
|
|
|
|
|
|
isbn_tag = url.get('isbn')
|
|
|
|
|
|
if isbn_tag:
|
|
|
|
|
|
emit_tags.append(f'isbn:{isbn_tag}')
|
|
|
|
|
|
olid_tag = url.get('olid')
|
|
|
|
|
|
if olid_tag:
|
|
|
|
|
|
emit_tags.append(f'olid:{olid_tag}')
|
|
|
|
|
|
|
|
|
|
|
|
# Fetch OpenLibrary metadata tags
|
|
|
|
|
|
ol_tags = fetch_openlibrary_metadata_tags(isbn=isbn_tag, olid=olid_tag)
|
|
|
|
|
|
emit_tags.extend(ol_tags)
|
|
|
|
|
|
|
|
|
|
|
|
pipe_obj = create_pipe_object_result(
|
|
|
|
|
|
source='archive.org',
|
|
|
|
|
|
identifier=book_id_str,
|
|
|
|
|
|
file_path=str(output_dir),
|
|
|
|
|
|
cmdlet_name='download-data',
|
|
|
|
|
|
title=title_val,
|
|
|
|
|
|
tags=emit_tags,
|
|
|
|
|
|
source_url=url.get('source_url', f'archive.org/borrow/{book_id_str}')
|
|
|
|
|
|
)
|
|
|
|
|
|
pipeline_context.emit(pipe_obj)
|
|
|
|
|
|
exit_code = 0
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" ✗ Borrow/download failed: {e}", file=sys.stderr)
|
|
|
|
|
|
import traceback
|
|
|
|
|
|
traceback.print_exc()
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
|
|
|
|
|
|
continue # Skip normal URL handling
|
|
|
|
|
|
|
|
|
|
|
|
except ImportError as e:
|
|
|
|
|
|
log(f" ✗ Archive.org tools not available: {e}", file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" ✗ Auto-borrow error: {e}", file=sys.stderr)
|
|
|
|
|
|
import traceback
|
|
|
|
|
|
traceback.print_exc()
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# ====== LIBGEN MIRROR FALLBACK MODE ======
|
|
|
|
|
|
# Handle libgen results with mirrors dict for fallback on failure
|
|
|
|
|
|
if isinstance(url, dict) and 'mirrors' in url:
|
|
|
|
|
|
try:
|
|
|
|
|
|
primary_url = url.get('url')
|
|
|
|
|
|
mirrors_dict = url.get('mirrors', {})
|
|
|
|
|
|
book_id = url.get('book_id', '')
|
|
|
|
|
|
|
|
|
|
|
|
if not primary_url:
|
|
|
|
|
|
log(f"Skipping libgen entry: no primary URL", file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# Build list of mirrors to try: primary first, then alternatives
|
|
|
|
|
|
mirrors_to_try = [primary_url]
|
|
|
|
|
|
mirrors_to_try.extend(mirrors_dict.values())
|
|
|
|
|
|
|
|
|
|
|
|
# Remove duplicates while preserving order
|
|
|
|
|
|
mirrors_to_try = list(dict.fromkeys(mirrors_to_try))
|
|
|
|
|
|
|
|
|
|
|
|
log(f"🔄 LibGen download with mirror fallback (book_id: {book_id})", flush=True)
|
|
|
|
|
|
log(f" Primary: {primary_url[:80]}...", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
if len(mirrors_to_try) > 1:
|
|
|
|
|
|
log(f" {len(mirrors_to_try) - 1} alternative mirror(s) available", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
# Resolve cookies path
|
|
|
|
|
|
final_cookies_path_libgen = None
|
|
|
|
|
|
if cookies_path:
|
|
|
|
|
|
if resolve_cookies_path:
|
|
|
|
|
|
try:
|
|
|
|
|
|
final_cookies_path_libgen = resolve_cookies_path(config, Path(cookies_path))
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
final_cookies_path_libgen = Path(cookies_path).expanduser() if cookies_path else None
|
|
|
|
|
|
else:
|
|
|
|
|
|
final_cookies_path_libgen = Path(cookies_path).expanduser()
|
|
|
|
|
|
|
|
|
|
|
|
download_succeeded = False
|
|
|
|
|
|
last_error = None
|
|
|
|
|
|
successful_mirror = None
|
|
|
|
|
|
|
|
|
|
|
|
# Try each mirror in sequence using libgen_service's native download
|
|
|
|
|
|
for mirror_idx, mirror_url in enumerate(mirrors_to_try, 1):
|
|
|
|
|
|
try:
|
|
|
|
|
|
if mirror_idx > 1:
|
|
|
|
|
|
log(f" → Trying mirror #{mirror_idx}: {mirror_url[:80]}...", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
# Use libgen_service's download_from_mirror for proper libgen handling
|
|
|
|
|
|
from helper.libgen_service import download_from_mirror
|
|
|
|
|
|
|
|
|
|
|
|
# Generate filename from book_id and title
|
|
|
|
|
|
safe_title = "".join(c for c in str(title or "book") if c.isalnum() or c in (' ', '.', '-'))[:100]
|
|
|
|
|
|
file_path = final_output_dir / f"{safe_title}_{book_id}.pdf"
|
|
|
|
|
|
|
|
|
|
|
|
# Attempt download using libgen's native function
|
|
|
|
|
|
success = download_from_mirror(
|
|
|
|
|
|
mirror_url=mirror_url,
|
|
|
|
|
|
output_path=file_path,
|
|
|
|
|
|
log_info=lambda msg: log(f" {msg}", flush=True),
|
|
|
|
|
|
log_error=lambda msg: log(f" ⚠ {msg}", file=sys.stderr)
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if success and file_path.exists():
|
|
|
|
|
|
log(f" ✓ Downloaded successfully from mirror #{mirror_idx}", flush=True)
|
|
|
|
|
|
successful_mirror = mirror_url
|
|
|
|
|
|
download_succeeded = True
|
|
|
|
|
|
|
|
|
|
|
|
# Emit result for downstream cmdlets
|
|
|
|
|
|
file_hash = _compute_file_hash(file_path)
|
|
|
|
|
|
emit_tags = ['libgen', 'book']
|
|
|
|
|
|
|
|
|
|
|
|
pipe_obj = create_pipe_object_result(
|
|
|
|
|
|
source='libgen',
|
|
|
|
|
|
identifier=book_id,
|
|
|
|
|
|
file_path=str(file_path),
|
|
|
|
|
|
cmdlet_name='download-data',
|
|
|
|
|
|
file_hash=file_hash,
|
|
|
|
|
|
tags=emit_tags,
|
|
|
|
|
|
source_url=successful_mirror
|
|
|
|
|
|
)
|
|
|
|
|
|
pipeline_context.emit(pipe_obj)
|
|
|
|
|
|
downloaded_files.append(str(file_path))
|
|
|
|
|
|
exit_code = 0
|
|
|
|
|
|
break # Success, stop trying mirrors
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
last_error = str(e)
|
|
|
|
|
|
if mirror_idx == 1:
|
|
|
|
|
|
log(f" ⚠ Primary mirror failed: {e}", flush=True)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f" ⚠ Mirror #{mirror_idx} failed: {e}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
if not download_succeeded:
|
|
|
|
|
|
log(f" ✗ All mirrors failed. Last error: {last_error}", file=sys.stderr)
|
|
|
|
|
|
if "getaddrinfo failed" in str(last_error) or "NameResolutionError" in str(last_error) or "Failed to resolve" in str(last_error):
|
|
|
|
|
|
log(f" ⚠ Network issue detected: Cannot resolve LibGen mirror hostnames", file=sys.stderr)
|
|
|
|
|
|
log(f" ▶ Check your network connection or try with a VPN/proxy", file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
|
|
|
|
|
|
continue # Skip to next URL
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f" ✗ LibGen mirror fallback error: {e}", file=sys.stderr)
|
|
|
|
|
|
import traceback
|
|
|
|
|
|
traceback.print_exc(file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# Ensure URL is a string for normal handling
|
|
|
|
|
|
if not isinstance(url, str):
|
|
|
|
|
|
# Check if it's a playlist item marker
|
|
|
|
|
|
if isinstance(url, dict) and url.get('__playlist_url'):
|
|
|
|
|
|
playlist_url = url.get('__playlist_url')
|
|
|
|
|
|
item_num = url.get('__playlist_item', 1)
|
|
|
|
|
|
log(f"📍 Handling selected playlist item #{item_num}", flush=True)
|
|
|
|
|
|
# Convert to actual URL and set playlist_items to download only this item
|
|
|
|
|
|
url = playlist_url
|
|
|
|
|
|
playlist_items = str(item_num)
|
|
|
|
|
|
# Fall through to normal handling below
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f"Skipping invalid URL entry: {url}", file=sys.stderr)
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
log(f"Probing URL: {url}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
# ====== TORRENT MODE - INTERCEPT BEFORE NORMAL DOWNLOAD ======
|
|
|
|
|
|
if torrent_mode or url.lower().startswith('magnet:'):
|
|
|
|
|
|
log(f"🧲 Torrent/magnet mode - spawning background worker...", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
# Get API key from config
|
|
|
|
|
|
from config import get_debrid_api_key
|
|
|
|
|
|
api_key = get_debrid_api_key(config)
|
|
|
|
|
|
|
|
|
|
|
|
if not api_key:
|
|
|
|
|
|
log(f"✗ AllDebrid API key not found in config", file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
# Create a unique worker ID
|
|
|
|
|
|
worker_id = f"torrent_{uuid.uuid4().hex[:8]}"
|
|
|
|
|
|
|
|
|
|
|
|
# Get worker manager if available from config
|
|
|
|
|
|
worker_manager = config.get('_worker_manager')
|
|
|
|
|
|
|
|
|
|
|
|
# Create worker in manager if available
|
|
|
|
|
|
if worker_manager:
|
|
|
|
|
|
try:
|
|
|
|
|
|
worker_manager.track_worker(
|
|
|
|
|
|
worker_id,
|
|
|
|
|
|
worker_type="download_torrent",
|
|
|
|
|
|
title=f"Download: {url[:60]}...",
|
|
|
|
|
|
description=f"Torrent/magnet download via AllDebrid",
|
|
|
|
|
|
pipe=pipeline_context.get_current_command_text()
|
|
|
|
|
|
)
|
|
|
|
|
|
log(f"✓ Worker created (ID: {worker_id})", flush=True)
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"⚠ Failed to create worker: {e}", file=sys.stderr)
|
|
|
|
|
|
worker_manager = None
|
|
|
|
|
|
|
|
|
|
|
|
# Spawn background thread to handle the download
|
|
|
|
|
|
worker_thread = threading.Thread(
|
|
|
|
|
|
target=_download_torrent_worker,
|
|
|
|
|
|
args=(
|
|
|
|
|
|
worker_id,
|
|
|
|
|
|
url,
|
|
|
|
|
|
final_output_dir,
|
|
|
|
|
|
config,
|
|
|
|
|
|
api_key,
|
|
|
|
|
|
playlist_items,
|
|
|
|
|
|
audio_mode,
|
|
|
|
|
|
wait_timeout,
|
|
|
|
|
|
worker_manager,
|
|
|
|
|
|
),
|
|
|
|
|
|
daemon=False,
|
|
|
|
|
|
name=f"TorrentWorker_{worker_id}"
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
worker_thread.start()
|
|
|
|
|
|
log(f"✓ Background worker started (ID: {worker_id})", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
# Emit worker info so user can track it
|
|
|
|
|
|
worker_info = {
|
|
|
|
|
|
'worker_id': worker_id,
|
|
|
|
|
|
'worker_type': 'download_torrent',
|
|
|
|
|
|
'source_url': url,
|
|
|
|
|
|
'status': 'running',
|
|
|
|
|
|
'message': 'Downloading in background...'
|
|
|
|
|
|
}
|
|
|
|
|
|
pipeline_context.emit(worker_info)
|
|
|
|
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
except ImportError:
|
|
|
|
|
|
log(f"✗ AllDebrid client not available", file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
# Catches AllDebridError and other exceptions
|
|
|
|
|
|
log(f"✗ Failed to spawn torrent worker: {e}", file=sys.stderr)
|
|
|
|
|
|
import traceback
|
|
|
|
|
|
traceback.print_exc(file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
|
|
|
|
|
|
continue # Skip to next URL
|
|
|
|
|
|
|
|
|
|
|
|
# ====== NORMAL DOWNLOAD MODE (HTTP/HTTPS) ======
|
|
|
|
|
|
|
|
|
|
|
|
# First, probe the URL to detect playlists and get info
|
|
|
|
|
|
# For YouTube URLs, ignore playlists and only probe the single video
|
|
|
|
|
|
is_youtube_url = isinstance(url, str) and ('youtube.com' in url or 'youtu.be' in url)
|
|
|
|
|
|
probe_info = probe_url(url, no_playlist=is_youtube_url)
|
|
|
|
|
|
is_actual_playlist = False # Track if we have a real multi-item playlist
|
|
|
|
|
|
|
|
|
|
|
|
if probe_info:
|
|
|
|
|
|
log(f"✓ Probed: {probe_info.get('title', url)} ({probe_info.get('extractor', 'unknown')})")
|
|
|
|
|
|
|
|
|
|
|
|
# If it's a playlist, show the result table and skip download for now
|
|
|
|
|
|
entries = probe_info.get("entries", [])
|
|
|
|
|
|
if entries and not playlist_items:
|
|
|
|
|
|
is_actual_playlist = True # We have a real playlist with multiple items
|
|
|
|
|
|
# Playlist detected but NO selection provided
|
|
|
|
|
|
# Always show table for user to select items
|
|
|
|
|
|
log(f"📋 Found playlist with {len(entries)} items")
|
|
|
|
|
|
_show_playlist_table(url, probe_info)
|
|
|
|
|
|
log(f"ℹ️ Playlist displayed. To select items, use @* or @1,3,5-8 syntax after piping results")
|
|
|
|
|
|
playlists_displayed += 1
|
|
|
|
|
|
continue # Skip to next URL - don't download playlist without selection
|
|
|
|
|
|
elif entries and playlist_items:
|
|
|
|
|
|
is_actual_playlist = True # We have a real playlist with item selection
|
|
|
|
|
|
# Playlist detected WITH selection - will download below
|
|
|
|
|
|
# Expand wildcard if present
|
|
|
|
|
|
expanded_items = _expand_playlist_selection(playlist_items, len(entries))
|
|
|
|
|
|
playlist_items = expanded_items
|
|
|
|
|
|
selected_playlist_entries = _select_playlist_entries(entries, playlist_items)
|
|
|
|
|
|
log(f"📋 Found playlist with {len(entries)} items - downloading selected: {playlist_items}")
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f"Single item: {probe_info.get('title', 'Unknown')}")
|
|
|
|
|
|
|
|
|
|
|
|
# ====== FORMAT LISTING MODE ======
|
|
|
|
|
|
if list_formats_mode and isinstance(url, str) and url.startswith(('http://', 'https://')):
|
|
|
|
|
|
log(f"Fetching formats for: {url}", flush=True)
|
|
|
|
|
|
from helper.download import list_formats
|
|
|
|
|
|
from result_table import ResultTable
|
|
|
|
|
|
|
|
|
|
|
|
all_formats = list_formats(url, no_playlist=is_youtube_url, playlist_items=playlist_items)
|
|
|
|
|
|
if all_formats:
|
|
|
|
|
|
# Filter and sort formats for better user experience
|
|
|
|
|
|
formats = _filter_and_sort_formats(all_formats)
|
|
|
|
|
|
|
|
|
|
|
|
# Create result table for format display
|
|
|
|
|
|
table = ResultTable(title=f"Available Formats - {probe_info.get('title', 'Unknown')}")
|
|
|
|
|
|
|
|
|
|
|
|
for fmt in formats:
|
|
|
|
|
|
row = table.add_row()
|
|
|
|
|
|
row.add_column("Format ID", fmt.get("format_id", ""))
|
|
|
|
|
|
|
|
|
|
|
|
# Build resolution/bitrate string
|
|
|
|
|
|
vcodec = fmt.get("vcodec", "")
|
|
|
|
|
|
acodec = fmt.get("acodec", "")
|
|
|
|
|
|
height = fmt.get("height")
|
|
|
|
|
|
tbr = fmt.get("tbr")
|
|
|
|
|
|
|
|
|
|
|
|
if vcodec != "none" and acodec != "none":
|
|
|
|
|
|
# Video + audio
|
|
|
|
|
|
res_str = fmt.get("resolution", "")
|
|
|
|
|
|
elif acodec != "none" and vcodec == "none":
|
|
|
|
|
|
# Audio only - show bitrate
|
|
|
|
|
|
res_str = f"{tbr:.0f} kbps" if tbr else "audio"
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Video only
|
|
|
|
|
|
res_str = fmt.get("resolution", "")
|
|
|
|
|
|
|
|
|
|
|
|
row.add_column("Resolution", res_str)
|
|
|
|
|
|
|
|
|
|
|
|
# Build codec string (merged vcodec/acodec)
|
|
|
|
|
|
codec_parts = []
|
|
|
|
|
|
if vcodec and vcodec != "none":
|
|
|
|
|
|
codec_parts.append(f"v:{vcodec}")
|
|
|
|
|
|
if acodec and acodec != "none":
|
|
|
|
|
|
codec_parts.append(f"a:{acodec}")
|
|
|
|
|
|
codec_str = " | ".join(codec_parts) if codec_parts else "unknown"
|
|
|
|
|
|
row.add_column("Codec", codec_str)
|
|
|
|
|
|
|
|
|
|
|
|
if fmt.get("filesize"):
|
|
|
|
|
|
size_mb = fmt["filesize"] / (1024 * 1024)
|
|
|
|
|
|
row.add_column("Size", f"{size_mb:.1f} MB")
|
|
|
|
|
|
|
|
|
|
|
|
# Set source command for @N expansion
|
|
|
|
|
|
table.set_source_command("download-data", [url])
|
|
|
|
|
|
|
|
|
|
|
|
# Note: Row selection args are not set - users select with @N syntax directly
|
|
|
|
|
|
|
|
|
|
|
|
# Display table and emit as pipeline result
|
|
|
|
|
|
log(str(table), flush=True)
|
|
|
|
|
|
formats_displayed = True
|
|
|
|
|
|
|
|
|
|
|
|
# Store table for @N expansion so CLI can reconstruct commands
|
|
|
|
|
|
# Uses separate current_stage_table instead of result history table
|
|
|
|
|
|
pipeline_context.set_current_stage_table(table)
|
|
|
|
|
|
|
|
|
|
|
|
# Always emit formats so they can be selected with @N
|
|
|
|
|
|
for i, fmt in enumerate(formats, 1):
|
|
|
|
|
|
pipeline_context.emit({
|
|
|
|
|
|
"format_id": fmt.get("format_id", ""),
|
|
|
|
|
|
"format_string": fmt.get("format", ""),
|
|
|
|
|
|
"resolution": fmt.get("resolution", ""),
|
|
|
|
|
|
"vcodec": fmt.get("vcodec", ""),
|
|
|
|
|
|
"acodec": fmt.get("acodec", ""),
|
|
|
|
|
|
"ext": fmt.get("ext", ""),
|
|
|
|
|
|
"filesize": fmt.get("filesize"),
|
|
|
|
|
|
"source_url": url,
|
|
|
|
|
|
"index": i,
|
|
|
|
|
|
})
|
|
|
|
|
|
log(f"Use @N syntax to select a format and download", flush=True)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f"✗ No formats available for this URL", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
|
|
continue # Skip download, just show formats
|
|
|
|
|
|
|
|
|
|
|
|
# ====== AUTO-DETECT MULTIPLE FORMATS ======
|
|
|
|
|
|
# Check if multiple formats exist and handle based on -item flag
|
|
|
|
|
|
if (not current_format_selector and not list_formats_mode and
|
|
|
|
|
|
isinstance(url, str) and url.startswith(('http://', 'https://'))):
|
|
|
|
|
|
# Check if this is a yt-dlp supported URL (YouTube, Vimeo, etc.)
|
|
|
|
|
|
from helper.download import is_url_supported_by_ytdlp, list_formats
|
|
|
|
|
|
from result_table import ResultTable
|
|
|
|
|
|
|
|
|
|
|
|
if is_url_supported_by_ytdlp(url):
|
|
|
|
|
|
log(f"Checking available formats for: {url}", flush=True)
|
|
|
|
|
|
all_formats = list_formats(url, no_playlist=is_youtube_url, playlist_items=playlist_items)
|
|
|
|
|
|
|
|
|
|
|
|
if all_formats:
|
|
|
|
|
|
# Filter and sort formats for better user experience
|
|
|
|
|
|
formats = _filter_and_sort_formats(all_formats)
|
|
|
|
|
|
|
|
|
|
|
|
# Handle -item selection for formats (single video)
|
|
|
|
|
|
if playlist_items and playlist_items.isdigit() and not is_actual_playlist:
|
|
|
|
|
|
idx = int(playlist_items)
|
|
|
|
|
|
if 0 < idx <= len(formats):
|
|
|
|
|
|
fmt = formats[idx-1]
|
|
|
|
|
|
current_format_selector = fmt.get("format_id")
|
|
|
|
|
|
log(f"Selected format #{idx}: {current_format_selector}")
|
|
|
|
|
|
playlist_items = None # Clear so it doesn't affect download options
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f"Invalid format index: {idx}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
|
|
elif len(formats) > 1:
|
|
|
|
|
|
# Multiple formats available
|
|
|
|
|
|
log(f"📊 Found {len(formats)} available formats for: {probe_info.get('title', 'Unknown')}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
# Always show table for format selection via @N syntax
|
|
|
|
|
|
# Show table and wait for @N selection
|
|
|
|
|
|
table = ResultTable(title=f"Available Formats - {probe_info.get('title', 'Unknown')}")
|
|
|
|
|
|
|
|
|
|
|
|
for fmt in formats:
|
|
|
|
|
|
row = table.add_row()
|
|
|
|
|
|
row.add_column("Format ID", fmt.get("format_id", ""))
|
|
|
|
|
|
|
|
|
|
|
|
# Build resolution/bitrate string
|
|
|
|
|
|
vcodec = fmt.get("vcodec", "")
|
|
|
|
|
|
acodec = fmt.get("acodec", "")
|
|
|
|
|
|
height = fmt.get("height")
|
|
|
|
|
|
tbr = fmt.get("tbr")
|
|
|
|
|
|
|
|
|
|
|
|
if vcodec != "none" and acodec != "none":
|
|
|
|
|
|
# Video + audio
|
|
|
|
|
|
res_str = fmt.get("resolution", "")
|
|
|
|
|
|
elif acodec != "none" and vcodec == "none":
|
|
|
|
|
|
# Audio only - show bitrate
|
|
|
|
|
|
res_str = f"{tbr:.0f} kbps" if tbr else "audio"
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Video only
|
|
|
|
|
|
res_str = fmt.get("resolution", "")
|
|
|
|
|
|
|
|
|
|
|
|
row.add_column("Resolution", res_str)
|
|
|
|
|
|
|
|
|
|
|
|
# Build codec string (merged vcodec/acodec)
|
|
|
|
|
|
codec_parts = []
|
|
|
|
|
|
if vcodec and vcodec != "none":
|
|
|
|
|
|
codec_parts.append(f"v:{vcodec}")
|
|
|
|
|
|
if acodec and acodec != "none":
|
|
|
|
|
|
codec_parts.append(f"a:{acodec}")
|
|
|
|
|
|
codec_str = " | ".join(codec_parts) if codec_parts else "unknown"
|
|
|
|
|
|
row.add_column("Codec", codec_str)
|
|
|
|
|
|
|
|
|
|
|
|
if fmt.get("filesize"):
|
|
|
|
|
|
size_mb = fmt["filesize"] / (1024 * 1024)
|
|
|
|
|
|
row.add_column("Size", f"{size_mb:.1f} MB")
|
|
|
|
|
|
|
|
|
|
|
|
# Set source command for @N expansion
|
|
|
|
|
|
table.set_source_command("download-data", [url])
|
|
|
|
|
|
|
|
|
|
|
|
# Set row selection args so @N expands to "download-data URL -item N"
|
|
|
|
|
|
for i in range(len(formats)):
|
|
|
|
|
|
# i is 0-based index, but -item expects 1-based index
|
|
|
|
|
|
table.set_row_selection_args(i, ["-item", str(i + 1)])
|
|
|
|
|
|
|
|
|
|
|
|
# Display table and emit formats so they can be selected with @N
|
|
|
|
|
|
log(str(table), flush=True)
|
|
|
|
|
|
log(f"💡 Use @N syntax to select a format and download (e.g., @1)", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
# Store table for @N expansion so CLI can reconstruct commands
|
|
|
|
|
|
pipeline_context.set_current_stage_table(table)
|
|
|
|
|
|
|
|
|
|
|
|
# Emit formats as pipeline results for @N selection
|
|
|
|
|
|
for i, fmt in enumerate(formats, 1):
|
|
|
|
|
|
pipeline_context.emit({
|
|
|
|
|
|
"format_id": fmt.get("format_id", ""),
|
|
|
|
|
|
"format_string": fmt.get("format", ""),
|
|
|
|
|
|
"resolution": fmt.get("resolution", ""),
|
|
|
|
|
|
"vcodec": fmt.get("vcodec", ""),
|
|
|
|
|
|
"acodec": fmt.get("acodec", ""),
|
|
|
|
|
|
"filesize": fmt.get("filesize"),
|
|
|
|
|
|
"tbr": fmt.get("tbr"),
|
|
|
|
|
|
"source_url": url,
|
|
|
|
|
|
"index": i,
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
formats_displayed = True # Mark that we displayed formats
|
|
|
|
|
|
continue # Skip download, user must select format via @N
|
|
|
|
|
|
|
|
|
|
|
|
log(f"Downloading: {url}", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
# Resolve cookies path if specified
|
|
|
|
|
|
final_cookies_path = None
|
|
|
|
|
|
if cookies_path:
|
|
|
|
|
|
if resolve_cookies_path:
|
|
|
|
|
|
try:
|
|
|
|
|
|
final_cookies_path = resolve_cookies_path(config, Path(cookies_path))
|
|
|
|
|
|
except Exception:
|
|
|
|
|
|
final_cookies_path = Path(cookies_path).expanduser() if cookies_path else None
|
|
|
|
|
|
else:
|
|
|
|
|
|
final_cookies_path = Path(cookies_path).expanduser()
|
|
|
|
|
|
|
|
|
|
|
|
# Create download options - use correct parameter names
|
|
|
|
|
|
# Mode is "audio" or "video", required field
|
|
|
|
|
|
mode = "audio" if audio_mode else "video"
|
|
|
|
|
|
|
|
|
|
|
|
# Detect YouTube URLs and set no_playlist to download only the single video
|
|
|
|
|
|
is_youtube_url = isinstance(url, str) and ('youtube.com' in url or 'youtu.be' in url)
|
|
|
|
|
|
|
|
|
|
|
|
download_opts = DownloadOptions(
|
|
|
|
|
|
url=url,
|
|
|
|
|
|
mode=mode,
|
|
|
|
|
|
output_dir=final_output_dir,
|
|
|
|
|
|
cookies_path=final_cookies_path,
|
|
|
|
|
|
ytdl_format=current_format_selector, # Use per-URL format override if available
|
|
|
|
|
|
clip_sections=f"{clip_range[0]}-{clip_range[1]}" if clip_range else None,
|
|
|
|
|
|
playlist_items=playlist_items,
|
|
|
|
|
|
no_playlist=is_youtube_url, # For YouTube, ignore playlist URLs and download single video
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
# For playlist downloads, capture existing files BEFORE download
|
|
|
|
|
|
if playlist_items and selected_playlist_entries:
|
|
|
|
|
|
_, playlist_existing_paths = _snapshot_playlist_paths(selected_playlist_entries, final_output_dir)
|
|
|
|
|
|
|
|
|
|
|
|
# Call download_media from helper - no show_progress param
|
|
|
|
|
|
result_data = download_media(download_opts)
|
|
|
|
|
|
|
|
|
|
|
|
if result_data and result_data.path:
|
|
|
|
|
|
file_path = result_data.path
|
|
|
|
|
|
|
|
|
|
|
|
if file_path.exists():
|
|
|
|
|
|
# Check if this was a playlist download (is_actual_playlist tracks if we have a multi-item playlist)
|
|
|
|
|
|
if is_actual_playlist:
|
|
|
|
|
|
if not selected_playlist_entries:
|
|
|
|
|
|
log(
|
|
|
|
|
|
"⚠ Playlist metadata unavailable; cannot emit selected items for this stage.",
|
|
|
|
|
|
file=sys.stderr,
|
|
|
|
|
|
)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
matched_after, _ = _snapshot_playlist_paths(selected_playlist_entries, final_output_dir)
|
|
|
|
|
|
if not matched_after:
|
|
|
|
|
|
log(
|
|
|
|
|
|
"⚠ No playlist files found for the selected items after download.",
|
|
|
|
|
|
file=sys.stderr,
|
|
|
|
|
|
)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
new_playlist_files: list[Path] = []
|
|
|
|
|
|
for playlist_file in matched_after:
|
|
|
|
|
|
try:
|
|
|
|
|
|
path_key = str(playlist_file.resolve())
|
|
|
|
|
|
except OSError:
|
|
|
|
|
|
path_key = str(playlist_file)
|
|
|
|
|
|
if path_key not in playlist_existing_paths:
|
|
|
|
|
|
new_playlist_files.append(playlist_file)
|
|
|
|
|
|
|
|
|
|
|
|
emit_targets = new_playlist_files if new_playlist_files else matched_after
|
|
|
|
|
|
if new_playlist_files:
|
|
|
|
|
|
log(f"📋 Playlist download completed: {len(new_playlist_files)} new file(s)")
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f"📁 Reusing {len(emit_targets)} cached playlist file(s)", flush=True)
|
|
|
|
|
|
|
|
|
|
|
|
for playlist_file in emit_targets:
|
|
|
|
|
|
file_hash = _compute_file_hash(playlist_file)
|
|
|
|
|
|
|
|
|
|
|
|
tags = []
|
|
|
|
|
|
if extract_ytdlp_tags and result_data.tags:
|
|
|
|
|
|
tags = result_data.tags
|
|
|
|
|
|
|
|
|
|
|
|
pipe_obj = create_pipe_object_result(
|
|
|
|
|
|
source='download',
|
|
|
|
|
|
identifier=playlist_file.stem,
|
|
|
|
|
|
file_path=str(playlist_file),
|
|
|
|
|
|
cmdlet_name='download-data',
|
|
|
|
|
|
title=playlist_file.name,
|
|
|
|
|
|
file_hash=file_hash,
|
|
|
|
|
|
is_temp=False,
|
|
|
|
|
|
extra={
|
|
|
|
|
|
'url': url,
|
|
|
|
|
|
'tags': tags,
|
|
|
|
|
|
'audio_mode': audio_mode,
|
|
|
|
|
|
'format': format_selector,
|
|
|
|
|
|
'from_playlist': True,
|
|
|
|
|
|
},
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
downloaded_files.append(playlist_file)
|
|
|
|
|
|
pipeline_context.emit(pipe_obj)
|
|
|
|
|
|
else:
|
|
|
|
|
|
# Single file download
|
|
|
|
|
|
file_hash = result_data.hash_value or _compute_file_hash(file_path)
|
|
|
|
|
|
tags = result_data.tags if result_data.tags else []
|
|
|
|
|
|
|
|
|
|
|
|
pipe_obj = create_pipe_object_result(
|
|
|
|
|
|
source='download',
|
|
|
|
|
|
identifier=file_path.stem,
|
|
|
|
|
|
file_path=str(file_path),
|
|
|
|
|
|
cmdlet_name='download-data',
|
|
|
|
|
|
title=file_path.name,
|
|
|
|
|
|
file_hash=file_hash,
|
|
|
|
|
|
is_temp=False,
|
|
|
|
|
|
extra={
|
|
|
|
|
|
'url': url,
|
|
|
|
|
|
'tags': tags,
|
|
|
|
|
|
'audio_mode': audio_mode,
|
|
|
|
|
|
'format': format_selector,
|
|
|
|
|
|
'clipped': clip_range is not None,
|
|
|
|
|
|
}
|
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
downloaded_files.append(file_path)
|
|
|
|
|
|
pipeline_context.emit(pipe_obj)
|
|
|
|
|
|
|
|
|
|
|
|
log(f"✓ Downloaded: {file_path}", flush=True)
|
|
|
|
|
|
else:
|
|
|
|
|
|
log(f"Download returned no result for {url}", file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
|
log(f"Error downloading {url}: {e}", file=sys.stderr)
|
|
|
|
|
|
import traceback
|
|
|
|
|
|
traceback.print_exc(file=sys.stderr)
|
|
|
|
|
|
exit_code = 1
|
|
|
|
|
|
|
|
|
|
|
|
# Success if we downloaded files or displayed playlists/formats
|
|
|
|
|
|
if downloaded_files or files_downloaded_directly > 0:
|
|
|
|
|
|
total_files = len(downloaded_files) + files_downloaded_directly
|
|
|
|
|
|
log(f"✓ Successfully downloaded {total_files} file(s)", flush=True)
|
2025-11-25 22:34:41 -08:00
|
|
|
|
|
|
|
|
|
|
# Create a result table for the downloaded files
|
|
|
|
|
|
# This ensures that subsequent @N commands select from these files
|
|
|
|
|
|
# instead of trying to expand the previous command (e.g. search-file)
|
|
|
|
|
|
if downloaded_files:
|
|
|
|
|
|
from result_table import ResultTable
|
|
|
|
|
|
table = ResultTable("Downloaded Files")
|
|
|
|
|
|
for i, file_path in enumerate(downloaded_files):
|
|
|
|
|
|
row = table.add_row()
|
|
|
|
|
|
row.add_column("#", str(i + 1))
|
|
|
|
|
|
row.add_column("File", file_path.name)
|
|
|
|
|
|
row.add_column("Path", str(file_path))
|
|
|
|
|
|
try:
|
|
|
|
|
|
size_mb = file_path.stat().st_size / (1024*1024)
|
|
|
|
|
|
row.add_column("Size", f"{size_mb:.1f} MB")
|
|
|
|
|
|
except OSError:
|
|
|
|
|
|
row.add_column("Size", "?")
|
|
|
|
|
|
|
|
|
|
|
|
# Set selection args to just the file path (or index if we want item selection)
|
|
|
|
|
|
# For item selection fallback, we don't strictly need row args if source command is None
|
|
|
|
|
|
# But setting them helps if we want to support command expansion later
|
|
|
|
|
|
table.set_row_selection_args(i, [str(file_path)])
|
|
|
|
|
|
|
|
|
|
|
|
# Register the table but DO NOT set a source command
|
|
|
|
|
|
# This forces CLI to use item-based selection (filtering the pipe)
|
|
|
|
|
|
# instead of command expansion
|
|
|
|
|
|
pipeline_context.set_last_result_table_overlay(table, downloaded_files)
|
|
|
|
|
|
pipeline_context.set_current_stage_table(table)
|
|
|
|
|
|
|
|
|
|
|
|
# Also print the table so user sees what they got
|
|
|
|
|
|
log(str(table), flush=True)
|
|
|
|
|
|
|
2025-11-25 20:09:33 -08:00
|
|
|
|
if db:
|
|
|
|
|
|
db.update_worker_status(worker_id, 'completed')
|
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
if playlists_displayed:
|
|
|
|
|
|
log(f"✓ Displayed {playlists_displayed} playlist(s) for selection", flush=True)
|
|
|
|
|
|
if db:
|
|
|
|
|
|
db.update_worker_status(worker_id, 'completed')
|
|
|
|
|
|
db.close()
|
|
|
|
|
|
return 0 # Success - playlists shown
|
|
|
|
|
|
|
|
|
|
|
|
if formats_displayed:
|
|
|
|
|
|
log(f"✓ Format selection table displayed - use @N to select and download", flush=True)
|
|
|
|
|
|
if db:
|
|
|
|
|
|
db.update_worker_status(worker_id, 'completed')
|
|
|
|
|
|
db.close()
|
|
|
|
|
|
return 0 # Success - formats shown
|
|
|
|
|
|
|
|
|
|
|
|
log(f"No files were downloaded or playlists displayed", file=sys.stderr)
|
|
|
|
|
|
if db:
|
|
|
|
|
|
db.update_worker_status(worker_id, 'completed')
|
|
|
|
|
|
db.close()
|
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CMDLET = Cmdlet(
|
|
|
|
|
|
name="download-data",
|
|
|
|
|
|
exec=_run,
|
|
|
|
|
|
summary="Download data from URLs with playlist/clip support using yt-dlp",
|
|
|
|
|
|
usage="download-data <url> [options] or search-file | download-data [options]",
|
|
|
|
|
|
aliases=["download", "dl"],
|
|
|
|
|
|
args=[
|
|
|
|
|
|
CmdletArg(
|
|
|
|
|
|
name="url",
|
|
|
|
|
|
type="string",
|
|
|
|
|
|
required=False,
|
|
|
|
|
|
description="URL to download (HTTP/HTTPS or file with URL list)",
|
|
|
|
|
|
variadic=True
|
|
|
|
|
|
),
|
|
|
|
|
|
CmdletArg(
|
|
|
|
|
|
name="-url",
|
|
|
|
|
|
type="string",
|
|
|
|
|
|
description="URL to download (alias for positional argument)",
|
|
|
|
|
|
variadic=True
|
|
|
|
|
|
),
|
|
|
|
|
|
CmdletArg(
|
|
|
|
|
|
name="list-formats",
|
|
|
|
|
|
type="flag",
|
|
|
|
|
|
description="List available formats without downloading"
|
|
|
|
|
|
),
|
|
|
|
|
|
CmdletArg(
|
|
|
|
|
|
name="audio",
|
|
|
|
|
|
type="flag",
|
|
|
|
|
|
alias="a",
|
|
|
|
|
|
description="Download audio only (extract from video)"
|
|
|
|
|
|
),
|
|
|
|
|
|
CmdletArg(
|
|
|
|
|
|
name="video",
|
|
|
|
|
|
type="flag",
|
|
|
|
|
|
alias="v",
|
|
|
|
|
|
description="Download video (default if not specified)"
|
|
|
|
|
|
),
|
|
|
|
|
|
CmdletArg(
|
|
|
|
|
|
name="format",
|
|
|
|
|
|
type="string",
|
|
|
|
|
|
alias="fmt",
|
|
|
|
|
|
description="Explicit yt-dlp format selector (e.g., 'bestvideo+bestaudio')"
|
|
|
|
|
|
),
|
|
|
|
|
|
CmdletArg(
|
|
|
|
|
|
name="clip",
|
|
|
|
|
|
type="string",
|
|
|
|
|
|
description="Extract time range: MM:SS-MM:SS (e.g., 34:03-35:08) or seconds"
|
|
|
|
|
|
),
|
|
|
|
|
|
CmdletArg(
|
|
|
|
|
|
name="cookies",
|
|
|
|
|
|
type="string",
|
|
|
|
|
|
description="Path to cookies.txt file for authentication"
|
|
|
|
|
|
),
|
|
|
|
|
|
CmdletArg(
|
|
|
|
|
|
name="torrent",
|
|
|
|
|
|
type="flag",
|
|
|
|
|
|
description="Download torrent/magnet via AllDebrid (requires API key in config)"
|
|
|
|
|
|
),
|
|
|
|
|
|
CmdletArg(
|
|
|
|
|
|
name="wait",
|
|
|
|
|
|
type="float",
|
|
|
|
|
|
description="Wait time (seconds) for magnet processing timeout"
|
|
|
|
|
|
),
|
|
|
|
|
|
CmdletArg(
|
|
|
|
|
|
name="item",
|
|
|
|
|
|
type="string",
|
|
|
|
|
|
alias="items",
|
|
|
|
|
|
description="Item selection for playlists/formats: use '-item N' to select format N, or '-item' to show table for @N selection in next command"
|
|
|
|
|
|
),
|
|
|
|
|
|
SharedArgs.STORAGE, # Storage location: local, hydrus, 0x0, debrid, ftp
|
|
|
|
|
|
],
|
|
|
|
|
|
details=[
|
|
|
|
|
|
"Download media from URLs with advanced features.",
|
|
|
|
|
|
"",
|
|
|
|
|
|
"BASIC USAGE:",
|
|
|
|
|
|
" download-data https://youtube.com/watch?v=xyz",
|
|
|
|
|
|
" download-data https://example.com/file.pdf -storage local",
|
|
|
|
|
|
"",
|
|
|
|
|
|
"AUDIO/VIDEO OPTIONS:",
|
|
|
|
|
|
" -audio, -a Extract audio from video (M4A, MP3)",
|
|
|
|
|
|
" -video, -v Download as video (default)",
|
|
|
|
|
|
"",
|
|
|
|
|
|
"FORMAT SELECTION:",
|
|
|
|
|
|
" -format SELECTOR Specify yt-dlp format",
|
|
|
|
|
|
" Examples: 'best', 'bestvideo+bestaudio', '22'",
|
|
|
|
|
|
"",
|
|
|
|
|
|
"FORMAT/RESULT ITEM SELECTION:",
|
|
|
|
|
|
" -item Show available formats in table (see @N below)",
|
|
|
|
|
|
" -item N Auto-select and download format #N (e.g., -item 1)",
|
|
|
|
|
|
" Example: download-data URL -item 2 | add-file -storage local",
|
|
|
|
|
|
"",
|
|
|
|
|
|
"FORMAT SELECTION WITH @N SYNTAX:",
|
|
|
|
|
|
" 1. Show formats: download-data URL",
|
|
|
|
|
|
" 2. Select with @N: @1 | download-data | add-file",
|
|
|
|
|
|
" OR use -item N to skip manual selection",
|
|
|
|
|
|
"",
|
|
|
|
|
|
"CLIPPING:",
|
|
|
|
|
|
" -clip START-END Extract time range from media",
|
|
|
|
|
|
" Format: MM:SS-MM:SS (e.g., 34:03-35:08)",
|
|
|
|
|
|
" Also accepts: 2043-2108 (seconds)",
|
|
|
|
|
|
"",
|
|
|
|
|
|
"PLAYLIST MODE:",
|
|
|
|
|
|
" Automatically detects playlists",
|
|
|
|
|
|
" Shows numbered list of tracks",
|
|
|
|
|
|
" Download specific items: -item '1,3,5-8'",
|
|
|
|
|
|
" Download all items: -item '*'",
|
|
|
|
|
|
"",
|
|
|
|
|
|
"TORRENT MODE:",
|
|
|
|
|
|
" Download torrents/magnets via AllDebrid (if configured)",
|
|
|
|
|
|
" Usage: download-data -torrent magnet:?xt=urn:btih:... -item '1,3,5-8'",
|
|
|
|
|
|
" -wait SECONDS Maximum wait time for magnet processing (default: 1800)",
|
|
|
|
|
|
"",
|
|
|
|
|
|
"STORAGE LOCATIONS:",
|
|
|
|
|
|
" -storage local ~/Videos (default)",
|
|
|
|
|
|
" -storage hydrus ~/.hydrus/client_files",
|
|
|
|
|
|
" -storage 0x0 ~/Screenshots",
|
|
|
|
|
|
" -storage debrid ~/Debrid",
|
|
|
|
|
|
" -storage ftp ~/FTP",
|
|
|
|
|
|
"",
|
|
|
|
|
|
"EXAMPLES:",
|
|
|
|
|
|
" # Download YouTube video as audio",
|
|
|
|
|
|
" download-data https://youtube.com/watch?v=xyz -audio -storage local",
|
|
|
|
|
|
"",
|
|
|
|
|
|
" # Extract specific clip from video",
|
|
|
|
|
|
" download-data https://vimeo.com/123456 -clip 1:30-2:45 -format best",
|
|
|
|
|
|
"",
|
|
|
|
|
|
" # Download specific tracks from playlist",
|
|
|
|
|
|
" download-data https://youtube.com/playlist?list=xyz -item '1,3,5-8'",
|
|
|
|
|
|
"",
|
|
|
|
|
|
" # Download all items from playlist",
|
|
|
|
|
|
" download-data https://youtube.com/playlist?list=xyz -item '*'",
|
|
|
|
|
|
"",
|
|
|
|
|
|
" # Download with authentication",
|
|
|
|
|
|
" download-data https://example.com/content -cookies ~/cookies.txt",
|
|
|
|
|
|
"",
|
|
|
|
|
|
"TORRENT EXAMPLES:",
|
|
|
|
|
|
" # Download specific tracks from magnet link",
|
|
|
|
|
|
" download-data -torrent magnet:?xt=urn:btih:... -item '1,3,5-8' -storage local",
|
|
|
|
|
|
"",
|
|
|
|
|
|
" # Download all items from torrent and merge",
|
|
|
|
|
|
" download-data -torrent magnet:?xt=urn:btih:... -item '*' | merge-file | add-file",
|
|
|
|
|
|
"",
|
|
|
|
|
|
" # Download with custom wait time (5 minutes)",
|
|
|
|
|
|
" download-data -torrent magnet:?xt=urn:btih:... -wait 300 -item '1-5'",
|
|
|
|
|
|
]
|
|
|
|
|
|
)
|