3770 lines
153 KiB
Python
3770 lines
153 KiB
Python
"""Generic file/stream downloader.
|
|
|
|
Supports:
|
|
- Direct HTTP file URLs (PDFs, images, documents; non-yt-dlp)
|
|
- Piped provider items (uses provider.download when available)
|
|
- Streaming sites via yt-dlp (YouTube, Bandcamp, etc.)
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import sys
|
|
import re
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Optional, Sequence
|
|
from urllib.parse import urlparse
|
|
from contextlib import AbstractContextManager, nullcontext
|
|
|
|
import requests
|
|
|
|
from API.alldebrid import is_magnet_link
|
|
from Provider import internetarchive as ia_provider
|
|
from Provider import alldebrid as ad_provider
|
|
from Provider import openlibrary as ol_provider
|
|
|
|
from API.HTTP import _download_direct_file
|
|
from SYS.models import DownloadError, DownloadOptions, DownloadMediaResult
|
|
from SYS.logger import log, debug
|
|
from SYS.pipeline_progress import PipelineProgress
|
|
from SYS.result_table import ResultTable
|
|
from SYS.rich_display import stderr_console as get_stderr_console
|
|
from SYS import pipeline as pipeline_context
|
|
from SYS.utils import sha256_file
|
|
from rich.prompt import Confirm
|
|
|
|
from tool.ytdlp import (
|
|
YtDlpTool,
|
|
_best_subtitle_sidecar,
|
|
_SUBTITLE_EXTS,
|
|
_download_with_timeout,
|
|
_format_chapters_note,
|
|
_read_text_file,
|
|
is_url_supported_by_ytdlp,
|
|
list_formats,
|
|
probe_url,
|
|
)
|
|
|
|
from . import _shared as sh
|
|
|
|
Cmdlet = sh.Cmdlet
|
|
CmdletArg = sh.CmdletArg
|
|
SharedArgs = sh.SharedArgs
|
|
QueryArg = sh.QueryArg
|
|
parse_cmdlet_args = sh.parse_cmdlet_args
|
|
register_url_with_local_library = sh.register_url_with_local_library
|
|
coerce_to_pipe_object = sh.coerce_to_pipe_object
|
|
get_field = sh.get_field
|
|
|
|
|
|
class Download_File(Cmdlet):
|
|
"""Class-based download-file cmdlet - direct HTTP downloads."""
|
|
|
|
def __init__(self) -> None:
|
|
"""Initialize download-file cmdlet."""
|
|
super().__init__(
|
|
name="download-file",
|
|
summary="Download files or streaming media",
|
|
usage=
|
|
"download-file <url> [-path DIR] [options] OR @N | download-file [-path DIR|DIR] [options]",
|
|
alias=["dl-file",
|
|
"download-http"],
|
|
arg=[
|
|
SharedArgs.URL,
|
|
SharedArgs.PROVIDER,
|
|
SharedArgs.PATH,
|
|
SharedArgs.QUERY,
|
|
# Prefer -path for output directory to match other cmdlets; keep -output for backwards compatibility.
|
|
CmdletArg(
|
|
name="-output",
|
|
type="string",
|
|
alias="o",
|
|
description="(deprecated) Output directory (use -path instead)",
|
|
),
|
|
CmdletArg(
|
|
name="audio",
|
|
type="flag",
|
|
alias="a",
|
|
description="Download audio only (yt-dlp)",
|
|
),
|
|
CmdletArg(
|
|
name="format",
|
|
type="string",
|
|
alias="fmt",
|
|
description="Explicit yt-dlp format selector",
|
|
),
|
|
QueryArg(
|
|
"clip",
|
|
key="clip",
|
|
aliases=["range",
|
|
"section",
|
|
"sections"],
|
|
type="string",
|
|
required=False,
|
|
description=(
|
|
"Clip time ranges via -query keyed fields (e.g. clip:1m-2m or clip:00:01-00:10). "
|
|
"Comma-separated values supported."
|
|
),
|
|
query_only=True,
|
|
),
|
|
CmdletArg(
|
|
name="item",
|
|
type="string",
|
|
description="Item selection for playlists/formats",
|
|
),
|
|
],
|
|
detail=[
|
|
"Download files directly via HTTP or streaming media via yt-dlp.",
|
|
"For Internet Archive item pages (archive.org/details/...), shows a selectable file/format list; pick with @N to download.",
|
|
],
|
|
exec=self.run,
|
|
)
|
|
self.register()
|
|
|
|
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|
"""Main execution method."""
|
|
debug(f"[download-file] run invoked with args: {list(args)}")
|
|
return self._run_impl(result, args, config)
|
|
|
|
@staticmethod
|
|
def _normalize_urls(parsed: Dict[str, Any]) -> List[str]:
|
|
raw_url = parsed.get("url", [])
|
|
if isinstance(raw_url, str):
|
|
raw_url = [raw_url]
|
|
|
|
expanded_urls: List[str] = []
|
|
for u in raw_url or []:
|
|
if u is None:
|
|
continue
|
|
s = str(u).strip()
|
|
if not s:
|
|
continue
|
|
if "," in s:
|
|
parts = [p.strip() for p in s.split(",")]
|
|
expanded_urls.extend([p for p in parts if p])
|
|
else:
|
|
expanded_urls.append(s)
|
|
|
|
return expanded_urls
|
|
|
|
@staticmethod
|
|
def _rewrite_archive_org_urls(raw_urls: Sequence[str]) -> List[str]:
|
|
"""Rewrite Archive.org URLs using metadata JSON to pick the right flow.
|
|
|
|
- /metadata/<id>:
|
|
- if lendable (collection contains inlibrary/printdisabled/lendinglibrary) -> /borrow/<id>
|
|
- else -> /details/<id>
|
|
- /details/<id>:
|
|
- if lendable -> /borrow/<id>
|
|
|
|
This makes `download-file` do the right thing for borrow-only items.
|
|
"""
|
|
|
|
out: List[str] = []
|
|
for u in list(raw_urls or []):
|
|
s = str(u or "").strip()
|
|
if not s:
|
|
continue
|
|
|
|
try:
|
|
p = urlparse(s)
|
|
host = (p.hostname or "").strip().lower()
|
|
path = (p.path or "").strip()
|
|
except Exception:
|
|
out.append(s)
|
|
continue
|
|
|
|
if not host or (host != "archive.org" and not host.endswith(".archive.org")):
|
|
out.append(s)
|
|
continue
|
|
|
|
low_path = path.lower().strip()
|
|
if not (low_path.startswith("/metadata/") or low_path.startswith("/details/")):
|
|
out.append(s)
|
|
continue
|
|
|
|
parts = [x for x in path.split("/") if x]
|
|
if len(parts) < 2:
|
|
out.append(s)
|
|
continue
|
|
head = str(parts[0] or "").strip().lower()
|
|
archive_id = str(parts[1] or "").strip()
|
|
if head not in {"metadata", "details"} or not archive_id:
|
|
out.append(s)
|
|
continue
|
|
|
|
lendable = False
|
|
try:
|
|
meta_url = f"https://archive.org/metadata/{archive_id}"
|
|
resp = requests.get(meta_url, timeout=8)
|
|
resp.raise_for_status()
|
|
data = resp.json() if resp is not None else {}
|
|
meta = data.get("metadata", {}) if isinstance(data, dict) else {}
|
|
collection = meta.get("collection") if isinstance(meta, dict) else None
|
|
|
|
values: List[str] = []
|
|
if isinstance(collection, list):
|
|
values = [str(x).strip().lower() for x in collection if str(x).strip()]
|
|
elif isinstance(collection, str):
|
|
values = [collection.strip().lower()] if collection.strip() else []
|
|
|
|
lendable = any(v in {"inlibrary", "lendinglibrary"} for v in values)
|
|
except Exception:
|
|
lendable = False
|
|
|
|
if lendable:
|
|
debug(f"[download-file] archive.org item '{archive_id}' looks lendable; using borrow flow")
|
|
out.append(f"https://archive.org/borrow/{archive_id}")
|
|
continue
|
|
|
|
# Non-lendable: turn metadata URLs into details URLs so IA picker can show files.
|
|
if head == "metadata":
|
|
out.append(f"https://archive.org/details/{archive_id}")
|
|
continue
|
|
|
|
out.append(s)
|
|
|
|
return out
|
|
|
|
@staticmethod
|
|
def _collect_piped_items_if_no_urls(result: Any,
|
|
raw_urls: Sequence[str]) -> List[Any]:
|
|
if raw_urls:
|
|
return []
|
|
if isinstance(result, list):
|
|
return list(result)
|
|
if result:
|
|
return [result]
|
|
return []
|
|
|
|
@staticmethod
|
|
def _safe_total_items(raw_urls: Sequence[str], piped_items: Sequence[Any]) -> int:
|
|
try:
|
|
return int(len(raw_urls or []) + len(piped_items or []))
|
|
except Exception:
|
|
return 1
|
|
|
|
@staticmethod
|
|
def _build_preview(
|
|
raw_urls: Sequence[str],
|
|
piped_items: Sequence[Any],
|
|
total_items: int
|
|
) -> List[Any]:
|
|
try:
|
|
preview: List[Any] = []
|
|
preview.extend(list(raw_urls or [])[:max(0, total_items)])
|
|
if len(preview) < total_items:
|
|
preview.extend(
|
|
list(piped_items or [])[:max(0,
|
|
total_items - len(preview))]
|
|
)
|
|
return preview
|
|
except Exception:
|
|
return []
|
|
|
|
@staticmethod
|
|
def _load_provider_registry() -> Dict[str, Any]:
|
|
try:
|
|
from ProviderCore.registry import (
|
|
get_search_provider as _get_search_provider,
|
|
get_provider as _get_provider,
|
|
match_provider_name_for_url as _match_provider_name_for_url,
|
|
SearchResult as _SearchResult,
|
|
)
|
|
|
|
return {
|
|
"get_search_provider": _get_search_provider,
|
|
"get_provider": _get_provider,
|
|
"match_provider_name_for_url": _match_provider_name_for_url,
|
|
"SearchResult": _SearchResult,
|
|
}
|
|
|
|
except Exception:
|
|
return {
|
|
"get_search_provider": None,
|
|
"get_provider": None,
|
|
"match_provider_name_for_url": None,
|
|
"SearchResult": None,
|
|
}
|
|
|
|
@staticmethod
|
|
def _path_from_download_result(result_obj: Any) -> Path:
|
|
file_path = None
|
|
if hasattr(result_obj, "path"):
|
|
file_path = getattr(result_obj, "path")
|
|
elif isinstance(result_obj, dict):
|
|
file_path = result_obj.get("path")
|
|
if not file_path:
|
|
file_path = str(result_obj)
|
|
return Path(str(file_path))
|
|
|
|
def _emit_local_file(
|
|
self,
|
|
*,
|
|
downloaded_path: Path,
|
|
source: Optional[str],
|
|
title_hint: Optional[str],
|
|
tags_hint: Optional[List[str]],
|
|
media_kind_hint: Optional[str],
|
|
full_metadata: Optional[Dict[str,
|
|
Any]],
|
|
progress: PipelineProgress,
|
|
config: Dict[str,
|
|
Any],
|
|
provider_hint: Optional[str] = None,
|
|
) -> None:
|
|
title_val = (title_hint or downloaded_path.stem
|
|
or "Unknown").strip() or downloaded_path.stem
|
|
hash_value = self._compute_file_hash(downloaded_path)
|
|
notes: Optional[Dict[str, str]] = None
|
|
try:
|
|
if isinstance(full_metadata, dict):
|
|
subtitles = full_metadata.get("_tidal_lyrics_subtitles")
|
|
if isinstance(subtitles, str) and subtitles.strip():
|
|
notes = {"lyric": subtitles}
|
|
except Exception:
|
|
notes = None
|
|
tag: List[str] = []
|
|
if tags_hint:
|
|
tag.extend([str(t) for t in tags_hint if t])
|
|
if not any(str(t).lower().startswith("title:") for t in tag):
|
|
tag.insert(0, f"title:{title_val}")
|
|
|
|
payload: Dict[str,
|
|
Any] = {
|
|
"path": str(downloaded_path),
|
|
"hash": hash_value,
|
|
"title": title_val,
|
|
"action": "cmdlet:download-file",
|
|
"download_mode": "file",
|
|
"store": "local",
|
|
"media_kind": media_kind_hint or "file",
|
|
"tag": tag,
|
|
}
|
|
if provider_hint:
|
|
payload["provider"] = str(provider_hint)
|
|
if full_metadata:
|
|
payload["full_metadata"] = full_metadata
|
|
if notes:
|
|
payload["notes"] = notes
|
|
if source and str(source).startswith("http"):
|
|
payload["url"] = source
|
|
elif source:
|
|
payload["source_url"] = source
|
|
|
|
pipeline_context.emit(payload)
|
|
|
|
# When running with a local progress UI (standalone cmdlet), ensure
|
|
# the pipe advances on emit.
|
|
progress.on_emit(payload)
|
|
|
|
# Automatically register url with local library
|
|
if payload.get("url"):
|
|
pipe_obj = coerce_to_pipe_object(payload)
|
|
register_url_with_local_library(pipe_obj, config)
|
|
|
|
def _process_explicit_urls(
|
|
self,
|
|
*,
|
|
raw_urls: Sequence[str],
|
|
final_output_dir: Path,
|
|
config: Dict[str,
|
|
Any],
|
|
quiet_mode: bool,
|
|
registry: Dict[str,
|
|
Any],
|
|
progress: PipelineProgress,
|
|
) -> tuple[int,
|
|
Optional[int]]:
|
|
downloaded_count = 0
|
|
|
|
SearchResult = registry.get("SearchResult")
|
|
get_provider = registry.get("get_provider")
|
|
match_provider_name_for_url = registry.get("match_provider_name_for_url")
|
|
|
|
for url in raw_urls:
|
|
try:
|
|
debug(f"Processing URL: {url}")
|
|
|
|
# Telegram message URLs are not direct files; route through the provider.
|
|
try:
|
|
parsed_url = urlparse(str(url))
|
|
host = (parsed_url.hostname or "").lower().strip()
|
|
except Exception:
|
|
host = ""
|
|
|
|
is_telegram = host in {"t.me",
|
|
"telegram.me"} or host.endswith(".t.me")
|
|
if is_telegram and SearchResult:
|
|
try:
|
|
from ProviderCore.registry import get_provider as _get_provider
|
|
except Exception:
|
|
_get_provider = None
|
|
|
|
if _get_provider is None:
|
|
raise DownloadError("Telegram provider registry not available")
|
|
|
|
provider = _get_provider("telegram", config)
|
|
if provider is None:
|
|
raise DownloadError(
|
|
"Telegram provider not configured or not available (check telethon/app_id/api_hash)"
|
|
)
|
|
|
|
sr = SearchResult(
|
|
table="telegram",
|
|
title=str(url),
|
|
path=str(url),
|
|
full_metadata={}
|
|
)
|
|
downloaded_path = None
|
|
telegram_info: Optional[Dict[str, Any]] = None
|
|
if hasattr(provider, "download_url"):
|
|
try:
|
|
downloaded_path, telegram_info = provider.download_url(str(url), final_output_dir) # type: ignore[attr-defined]
|
|
except Exception as exc:
|
|
raise DownloadError(str(exc))
|
|
else:
|
|
downloaded_path = provider.download(sr, final_output_dir)
|
|
|
|
if not downloaded_path:
|
|
raise DownloadError("Telegram download returned no file")
|
|
|
|
channel = ""
|
|
post = None
|
|
if isinstance(telegram_info, dict):
|
|
try:
|
|
chat_info_raw = telegram_info.get("chat")
|
|
msg_info_raw = telegram_info.get("message")
|
|
chat_info: Dict[str,
|
|
Any] = (
|
|
chat_info_raw
|
|
if isinstance(chat_info_raw,
|
|
dict) else {}
|
|
)
|
|
msg_info: Dict[str,
|
|
Any] = (
|
|
msg_info_raw
|
|
if isinstance(msg_info_raw,
|
|
dict) else {}
|
|
)
|
|
channel = str(
|
|
chat_info.get("title") or chat_info.get("username")
|
|
or ""
|
|
).strip()
|
|
post = msg_info.get("id")
|
|
except Exception:
|
|
channel = ""
|
|
post = None
|
|
|
|
title_hint = None
|
|
tg_tags: List[str] = []
|
|
if channel:
|
|
tg_tags.append(f"channel:{channel}")
|
|
if post is not None:
|
|
tg_tags.append(f"post:{post}")
|
|
if channel and post is not None:
|
|
title_hint = f"{channel} {post}"
|
|
elif post is not None:
|
|
title_hint = f"post:{post}"
|
|
else:
|
|
title_hint = downloaded_path.stem
|
|
|
|
self._emit_local_file(
|
|
downloaded_path=downloaded_path,
|
|
source=str(url),
|
|
title_hint=title_hint,
|
|
tags_hint=tg_tags,
|
|
media_kind_hint="file",
|
|
full_metadata=telegram_info,
|
|
provider_hint="telegram",
|
|
progress=progress,
|
|
config=config,
|
|
)
|
|
downloaded_count += 1
|
|
debug("✓ Downloaded via Telegram provider and emitted")
|
|
continue
|
|
|
|
# Provider URL routing (e.g. OpenLibrary book pages).
|
|
provider_name = None
|
|
if match_provider_name_for_url is not None:
|
|
try:
|
|
provider_name = match_provider_name_for_url(str(url))
|
|
except Exception:
|
|
provider_name = None
|
|
|
|
# Heuristic: LibGen often uses landing pages like edition.php/file.php.
|
|
# These should never be treated as direct file URLs.
|
|
if not provider_name:
|
|
try:
|
|
p = urlparse(str(url))
|
|
h = (p.hostname or "").strip().lower()
|
|
path = (p.path or "").strip().lower()
|
|
if "libgen" in h and any(x in path for x in (
|
|
"/edition.php",
|
|
"/file.php",
|
|
"/ads.php",
|
|
"/get.php",
|
|
"/series.php", )):
|
|
provider_name = "libgen"
|
|
except Exception:
|
|
pass
|
|
|
|
if (provider_name
|
|
and str(provider_name).lower() == "alldebrid"
|
|
and is_magnet_link(str(url))):
|
|
magnet_spec = ad_provider.resolve_magnet_spec(str(url))
|
|
if magnet_spec:
|
|
_, magnet_id = ad_provider.prepare_magnet(magnet_spec, config)
|
|
if magnet_id is not None:
|
|
downloaded_count += 1
|
|
continue
|
|
|
|
if provider_name and get_provider is not None and SearchResult is not None:
|
|
# OpenLibrary URLs should be handled by the OpenLibrary provider.
|
|
if provider_name == "openlibrary":
|
|
provider = get_provider("openlibrary", config)
|
|
if provider is None:
|
|
raise DownloadError(
|
|
"OpenLibrary provider not configured or not available"
|
|
)
|
|
|
|
edition_id = ol_provider.edition_id_from_url(str(url))
|
|
title_hint = ol_provider.title_hint_from_url_slug(str(url))
|
|
|
|
download_payload: Optional[Dict[str, Any]] = None
|
|
try:
|
|
ui, _pipe_idx = progress.ui_and_pipe_index()
|
|
progress_cb = None
|
|
if ui is not None:
|
|
# High-level steps for OpenLibrary borrow/download flow.
|
|
progress.begin_steps(5)
|
|
|
|
def _progress(
|
|
kind: str,
|
|
done: int,
|
|
total: Optional[int],
|
|
label: str
|
|
) -> None:
|
|
# kind:
|
|
# - "step": advance step text
|
|
# - "pages": update pipe percent/status
|
|
# - "bytes": update transfer bar
|
|
if kind == "step":
|
|
progress.step(label)
|
|
return
|
|
|
|
if kind == "pages":
|
|
t = int(total) if isinstance(total, int) else 0
|
|
d = int(done) if isinstance(done, int) else 0
|
|
if t > 0:
|
|
pct = int(
|
|
round(
|
|
(max(0,
|
|
min(d,
|
|
t)) / max(1,
|
|
t)) * 100.0
|
|
)
|
|
)
|
|
progress.set_percent(pct)
|
|
progress.set_status(
|
|
f"downloading pages {d}/{t}"
|
|
)
|
|
else:
|
|
progress.set_status(
|
|
f"downloading pages {d}"
|
|
)
|
|
return
|
|
|
|
if kind == "bytes":
|
|
try:
|
|
lbl = str(label or "download")
|
|
except Exception:
|
|
lbl = "download"
|
|
progress.begin_transfer(label=lbl, total=total)
|
|
progress.update_transfer(
|
|
label=lbl,
|
|
completed=done,
|
|
total=total
|
|
)
|
|
try:
|
|
if (isinstance(total,
|
|
int) and total > 0
|
|
and int(done) >= int(total)):
|
|
progress.finish_transfer(label=lbl)
|
|
except Exception:
|
|
pass
|
|
return
|
|
|
|
progress_cb = _progress
|
|
|
|
if hasattr(provider, "download_url"):
|
|
download_payload = provider.download_url( # type: ignore[attr-defined]
|
|
str(url),
|
|
final_output_dir,
|
|
progress_cb,
|
|
)
|
|
|
|
if download_payload is None:
|
|
sr = None
|
|
if hasattr(provider, "search_result_from_url"):
|
|
sr = provider.search_result_from_url(str(url)) # type: ignore[attr-defined]
|
|
if sr is None:
|
|
sr = SearchResult(
|
|
table="openlibrary",
|
|
title=title_hint,
|
|
path=str(url),
|
|
media_kind="book",
|
|
full_metadata={
|
|
"openlibrary_id": edition_id,
|
|
},
|
|
)
|
|
|
|
downloaded_path = provider.download(
|
|
sr,
|
|
final_output_dir,
|
|
progress_callback=progress_cb
|
|
) # type: ignore[call-arg]
|
|
|
|
if downloaded_path:
|
|
download_payload = {
|
|
"path": Path(downloaded_path),
|
|
"search_result": sr,
|
|
}
|
|
except Exception as exc:
|
|
raise DownloadError(str(exc))
|
|
|
|
# Clear long-running status line after the download attempt.
|
|
progress.clear_status()
|
|
|
|
if download_payload and download_payload.get("path"):
|
|
downloaded_path = Path(download_payload["path"])
|
|
sr_obj = download_payload.get("search_result")
|
|
|
|
tags_hint: Optional[List[str]] = None
|
|
full_md: Optional[Dict[str, Any]] = None
|
|
resolved_title = title_hint
|
|
if sr_obj is not None:
|
|
try:
|
|
resolved_title = getattr(sr_obj, "title", None) or resolved_title
|
|
except Exception:
|
|
pass
|
|
try:
|
|
sr_tags = getattr(sr_obj, "tag", None)
|
|
if isinstance(sr_tags, set) and sr_tags:
|
|
tags_hint = sorted([str(t) for t in sr_tags if t])
|
|
except Exception:
|
|
tags_hint = None
|
|
try:
|
|
full_md = getattr(sr_obj, "full_metadata", None)
|
|
except Exception:
|
|
full_md = None
|
|
|
|
self._emit_local_file(
|
|
downloaded_path=downloaded_path,
|
|
source=str(url),
|
|
title_hint=resolved_title,
|
|
tags_hint=tags_hint,
|
|
media_kind_hint="book",
|
|
full_metadata=full_md,
|
|
provider_hint="openlibrary",
|
|
progress=progress,
|
|
config=config,
|
|
)
|
|
downloaded_count += 1
|
|
continue
|
|
|
|
# If OpenLibrary can't provide it (not lendable, no creds, etc), auto-search LibGen.
|
|
try:
|
|
fallback_query = str(title_hint or "").strip()
|
|
if fallback_query:
|
|
log(
|
|
f"[download-file] Not available on OpenLibrary; searching LibGen for: {fallback_query}",
|
|
file=sys.stderr,
|
|
)
|
|
from cmdlet.search_provider import CMDLET as _SEARCH_PROVIDER_CMDLET
|
|
|
|
exec_fn = getattr(_SEARCH_PROVIDER_CMDLET, "exec", None)
|
|
if callable(exec_fn):
|
|
ret = exec_fn(
|
|
None,
|
|
[
|
|
"-provider",
|
|
"libgen",
|
|
"-query",
|
|
fallback_query
|
|
],
|
|
config,
|
|
)
|
|
try:
|
|
table = pipeline_context.get_last_result_table()
|
|
items = pipeline_context.get_last_result_items()
|
|
if table is not None:
|
|
pipeline_context.set_last_result_table_overlay(
|
|
table,
|
|
items
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
try:
|
|
return downloaded_count, int(ret) # type: ignore[arg-type]
|
|
except Exception:
|
|
return downloaded_count, 1
|
|
except Exception:
|
|
pass
|
|
|
|
log(
|
|
"[download-file] OpenLibrary URL could not be downloaded",
|
|
file=sys.stderr,
|
|
)
|
|
continue
|
|
|
|
# Generic provider URL handler (if a provider implements `download_url`).
|
|
provider = get_provider(provider_name, config)
|
|
if provider is not None and hasattr(provider, "download_url"):
|
|
try:
|
|
downloaded_path = provider.download_url(
|
|
str(url),
|
|
final_output_dir
|
|
) # type: ignore[attr-defined]
|
|
except Exception as exc:
|
|
raise DownloadError(str(exc))
|
|
|
|
if downloaded_path:
|
|
self._emit_local_file(
|
|
downloaded_path=Path(downloaded_path),
|
|
source=str(url),
|
|
title_hint=Path(str(downloaded_path)).stem,
|
|
tags_hint=None,
|
|
media_kind_hint="file",
|
|
full_metadata=None,
|
|
provider_hint=str(provider_name),
|
|
progress=progress,
|
|
config=config,
|
|
)
|
|
downloaded_count += 1
|
|
continue
|
|
|
|
# Otherwise, try provider.download(SearchResult) with the URL as the target.
|
|
if provider is not None:
|
|
sr_obj = None
|
|
try:
|
|
sr_obj = SearchResult(
|
|
table=str(provider_name),
|
|
title=str(url),
|
|
path=str(url),
|
|
full_metadata={},
|
|
)
|
|
downloaded_path = provider.download(
|
|
sr_obj,
|
|
final_output_dir
|
|
) # type: ignore[call-arg]
|
|
except Exception:
|
|
downloaded_path = None
|
|
|
|
# Refuse to fall back to direct-download for LibGen landing pages.
|
|
# This prevents saving HTML (e.g. edition.php) as a bogus file.
|
|
if (not downloaded_path
|
|
) and str(provider_name).lower() == "libgen":
|
|
raise DownloadError(
|
|
"LibGen URL did not resolve to a downloadable file"
|
|
)
|
|
|
|
if downloaded_path:
|
|
emit_tags: Optional[List[str]] = None
|
|
full_md: Optional[Dict[str, Any]] = None
|
|
title_hint = Path(str(downloaded_path)).stem
|
|
media_kind_hint = "file"
|
|
|
|
if str(provider_name
|
|
).lower() == "libgen" and sr_obj is not None:
|
|
media_kind_hint = "book"
|
|
try:
|
|
sr_tags = getattr(sr_obj, "tag", None)
|
|
if isinstance(sr_tags, set) and sr_tags:
|
|
emit_tags = sorted(
|
|
[str(t) for t in sr_tags if t]
|
|
)
|
|
except Exception:
|
|
emit_tags = None
|
|
|
|
try:
|
|
sr_full_md = getattr(sr_obj, "full_metadata", None)
|
|
if isinstance(sr_full_md, dict):
|
|
full_md = sr_full_md
|
|
t = str(sr_full_md.get("title") or "").strip()
|
|
if t:
|
|
title_hint = t
|
|
except Exception:
|
|
full_md = None
|
|
|
|
self._emit_local_file(
|
|
downloaded_path=Path(downloaded_path),
|
|
source=str(url),
|
|
title_hint=title_hint,
|
|
tags_hint=emit_tags,
|
|
media_kind_hint=media_kind_hint,
|
|
full_metadata=full_md,
|
|
provider_hint=str(provider_name),
|
|
progress=progress,
|
|
config=config,
|
|
)
|
|
downloaded_count += 1
|
|
continue
|
|
|
|
result_obj = _download_direct_file(
|
|
str(url),
|
|
final_output_dir,
|
|
quiet=quiet_mode,
|
|
pipeline_progress=progress,
|
|
)
|
|
downloaded_path = self._path_from_download_result(result_obj)
|
|
|
|
self._emit_local_file(
|
|
downloaded_path=downloaded_path,
|
|
source=str(url),
|
|
title_hint=downloaded_path.stem,
|
|
tags_hint=[f"title:{downloaded_path.stem}"],
|
|
media_kind_hint="file",
|
|
full_metadata=None,
|
|
progress=progress,
|
|
config=config,
|
|
)
|
|
downloaded_count += 1
|
|
debug("✓ Downloaded and emitted")
|
|
|
|
except DownloadError as e:
|
|
log(f"Download failed for {url}: {e}", file=sys.stderr)
|
|
except Exception as e:
|
|
log(f"Error processing {url}: {e}", file=sys.stderr)
|
|
|
|
return downloaded_count, None
|
|
|
|
def _expand_provider_items(
|
|
self,
|
|
*,
|
|
piped_items: Sequence[Any],
|
|
registry: Dict[str,
|
|
Any],
|
|
config: Dict[str,
|
|
Any],
|
|
) -> List[Any]:
|
|
get_search_provider = registry.get("get_search_provider")
|
|
expanded_items: List[Any] = []
|
|
for item in piped_items:
|
|
try:
|
|
table = get_field(item, "table")
|
|
media_kind = get_field(item, "media_kind")
|
|
full_metadata = get_field(item, "full_metadata")
|
|
target = get_field(item, "path") or get_field(item, "url")
|
|
|
|
if (str(table or "").lower() == "alldebrid"
|
|
and str(media_kind or "").lower() == "folder"):
|
|
magnet_id = None
|
|
if isinstance(full_metadata, dict):
|
|
magnet_id = full_metadata.get("magnet_id")
|
|
if (magnet_id is None and isinstance(target,
|
|
str)
|
|
and target.lower().startswith("alldebrid:magnet:")):
|
|
try:
|
|
magnet_id = int(target.split(":")[-1])
|
|
except Exception:
|
|
magnet_id = None
|
|
|
|
expanded, detail = ad_provider.expand_folder_item(
|
|
item,
|
|
get_search_provider,
|
|
config,
|
|
)
|
|
if detail:
|
|
log(
|
|
f"[download-file] AllDebrid magnet {magnet_id or 'unknown'} not ready ({detail or 'unknown'})",
|
|
file=sys.stderr,
|
|
)
|
|
continue
|
|
if expanded:
|
|
expanded_items.extend(expanded)
|
|
continue
|
|
|
|
expanded_items.append(item)
|
|
except Exception:
|
|
expanded_items.append(item)
|
|
|
|
return expanded_items
|
|
|
|
def _process_provider_items(self,
|
|
*,
|
|
piped_items: Sequence[Any],
|
|
final_output_dir: Path,
|
|
config: Dict[str,
|
|
Any],
|
|
quiet_mode: bool,
|
|
registry: Dict[str,
|
|
Any],
|
|
progress: PipelineProgress,
|
|
) -> tuple[int, int]:
|
|
downloaded_count = 0
|
|
queued_magnet_submissions = 0
|
|
get_search_provider = registry.get("get_search_provider")
|
|
SearchResult = registry.get("SearchResult")
|
|
|
|
expanded_items = self._expand_provider_items(
|
|
piped_items=piped_items,
|
|
registry=registry,
|
|
config=config
|
|
)
|
|
|
|
total_items = len(expanded_items)
|
|
processed_items = 0
|
|
try:
|
|
if total_items:
|
|
progress.set_percent(0)
|
|
except Exception:
|
|
pass
|
|
|
|
for item in expanded_items:
|
|
try:
|
|
label = "item"
|
|
table = get_field(item, "table")
|
|
title = get_field(item, "title")
|
|
target = get_field(item, "path") or get_field(item, "url")
|
|
media_kind = get_field(item, "media_kind")
|
|
tags_val = get_field(item, "tag")
|
|
tags_list: Optional[List[str]]
|
|
if isinstance(tags_val, list):
|
|
tags_list = [str(t) for t in tags_val if t]
|
|
else:
|
|
tags_list = None
|
|
|
|
full_metadata = get_field(item, "full_metadata")
|
|
if ((not full_metadata) and isinstance(item,
|
|
dict)
|
|
and isinstance(item.get("extra"),
|
|
dict)):
|
|
extra_md = item["extra"].get("full_metadata")
|
|
if isinstance(extra_md, dict):
|
|
full_metadata = extra_md
|
|
|
|
try:
|
|
label = title or target
|
|
label = str(label or "item").strip()
|
|
if total_items:
|
|
pct = int(round((processed_items / max(1, total_items)) * 100))
|
|
progress.set_percent(pct)
|
|
progress.set_status(
|
|
f"downloading {processed_items + 1}/{total_items}: {label}"
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
transfer_label = label
|
|
if str(table or "").lower() == "hifi":
|
|
try:
|
|
progress.begin_transfer(label=transfer_label, total=None)
|
|
except Exception:
|
|
pass
|
|
|
|
# If this looks like a provider item and providers are available, prefer provider.download()
|
|
downloaded_path: Optional[Path] = None
|
|
attempted_provider_download = False
|
|
provider_sr = None
|
|
if table and get_search_provider and SearchResult:
|
|
provider = get_search_provider(str(table), config)
|
|
if provider is not None:
|
|
attempted_provider_download = True
|
|
sr = SearchResult(
|
|
table=str(table),
|
|
title=str(title or "Unknown"),
|
|
path=str(target or ""),
|
|
full_metadata=full_metadata
|
|
if isinstance(full_metadata,
|
|
dict) else {},
|
|
)
|
|
debug(
|
|
f"[download-file] Downloading provider item via {table}: {sr.title}"
|
|
)
|
|
|
|
# Preserve provider structure when possible (AllDebrid folders -> subfolders).
|
|
output_dir = final_output_dir
|
|
try:
|
|
if str(table).strip().lower() == "alldebrid":
|
|
output_dir = ad_provider.adjust_output_dir_for_alldebrid(
|
|
final_output_dir,
|
|
full_metadata if isinstance(full_metadata, dict) else None,
|
|
item,
|
|
)
|
|
except Exception:
|
|
output_dir = final_output_dir
|
|
|
|
downloaded_path = provider.download(sr, output_dir)
|
|
provider_sr = sr
|
|
|
|
# OpenLibrary: if provider download failed, do NOT try to download the OpenLibrary page HTML.
|
|
if (downloaded_path is None and attempted_provider_download
|
|
and str(table or "").lower() == "openlibrary"):
|
|
availability = None
|
|
reason = None
|
|
if isinstance(full_metadata, dict):
|
|
availability = full_metadata.get("availability")
|
|
reason = full_metadata.get("availability_reason")
|
|
msg = "[download-file] OpenLibrary item not downloadable"
|
|
if availability or reason:
|
|
msg += f" (availability={availability or ''} reason={reason or ''})"
|
|
log(msg, file=sys.stderr)
|
|
|
|
# Fallback: show a LibGen selectable ResultTable (no emits) so the user can pick @N.
|
|
# This intentionally mirrors `search-file -provider libgen` UX: results table + selection.
|
|
try:
|
|
title_text = str(title or "").strip()
|
|
if not title_text and isinstance(full_metadata, dict):
|
|
title_text = str(full_metadata.get("title") or "").strip()
|
|
if title_text and get_search_provider and SearchResult:
|
|
log(
|
|
f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}",
|
|
file=sys.stderr,
|
|
)
|
|
libgen_provider = get_search_provider("libgen", config)
|
|
if libgen_provider is None:
|
|
log(
|
|
"[download-file] LibGen provider unavailable; cannot run fallback search",
|
|
file=sys.stderr,
|
|
)
|
|
continue
|
|
|
|
try:
|
|
from SYS.result_table import ResultTable
|
|
except Exception:
|
|
ResultTable = None # type: ignore[assignment]
|
|
|
|
if ResultTable is None:
|
|
log(
|
|
"[download-file] ResultTable unavailable; cannot render LibGen fallback search",
|
|
file=sys.stderr,
|
|
)
|
|
continue
|
|
|
|
fallback_query = title_text
|
|
# Keep parity with search-file provider default when user didn't specify a limit.
|
|
results = libgen_provider.search(fallback_query, limit=50)
|
|
if not results:
|
|
log(
|
|
f"[download-file] LibGen: no results found for: {fallback_query}",
|
|
file=sys.stderr,
|
|
)
|
|
continue
|
|
|
|
table_title = f"Libgen: {fallback_query}".strip().rstrip(":")
|
|
table_obj = ResultTable(table_title).set_preserve_order(False)
|
|
table_obj.set_table("libgen")
|
|
try:
|
|
table_obj.set_table_metadata({"provider": "libgen"})
|
|
except Exception:
|
|
pass
|
|
|
|
# Mark as produced by download-file so the pipeline runner pauses and stores tail stages.
|
|
table_obj.set_source_command("download-file", [])
|
|
|
|
results_list: List[Dict[str, Any]] = []
|
|
for search_result in results:
|
|
item_dict = (
|
|
search_result.to_dict()
|
|
if hasattr(search_result, "to_dict")
|
|
else dict(search_result)
|
|
if isinstance(search_result, dict)
|
|
else {"title": str(search_result)}
|
|
)
|
|
if "table" not in item_dict:
|
|
item_dict["table"] = "libgen"
|
|
table_obj.add_result(search_result)
|
|
results_list.append(item_dict)
|
|
|
|
# Seed selection state for @N and pause the pipeline.
|
|
try:
|
|
pipeline_context.set_last_result_table(table_obj, results_list)
|
|
except Exception:
|
|
pass
|
|
try:
|
|
pipeline_context.set_current_stage_table(table_obj)
|
|
except Exception:
|
|
pass
|
|
|
|
# Returning 0 with a selectable stage table and no emits causes the CLI to render
|
|
# the table and pause, preserving the downstream pipeline tail.
|
|
return 0
|
|
except Exception:
|
|
pass
|
|
|
|
continue
|
|
|
|
# Magnet targets (e.g., torrent provider results) -> submit/download via AllDebrid
|
|
if downloaded_path is None and isinstance(target, str) and is_magnet_link(str(target)):
|
|
magnet_spec = ad_provider.resolve_magnet_spec(str(target))
|
|
if magnet_spec:
|
|
|
|
def _on_emit(path: Path, file_url: str, relpath: str, metadata: Dict[str, Any]) -> None:
|
|
title_hint = metadata.get("name") or relpath or title
|
|
self._emit_local_file(
|
|
downloaded_path=path,
|
|
source=file_url or target,
|
|
title_hint=title_hint,
|
|
tags_hint=None,
|
|
media_kind_hint="file",
|
|
full_metadata=metadata,
|
|
progress=progress,
|
|
config=config,
|
|
provider_hint="alldebrid",
|
|
)
|
|
|
|
downloaded, magnet_id = ad_provider.download_magnet(
|
|
magnet_spec,
|
|
str(target),
|
|
final_output_dir,
|
|
config,
|
|
progress,
|
|
quiet_mode,
|
|
self._path_from_download_result,
|
|
_on_emit,
|
|
)
|
|
|
|
if downloaded > 0:
|
|
downloaded_count += downloaded
|
|
continue
|
|
|
|
# If queued but not yet ready, skip the generic unsupported-target error.
|
|
if magnet_id is not None:
|
|
queued_magnet_submissions += 1
|
|
continue
|
|
|
|
# Fallback: if we have a direct HTTP URL, download it directly
|
|
if (downloaded_path is None and isinstance(target,
|
|
str)
|
|
and target.startswith("http")):
|
|
# Guard: provider landing pages (e.g. LibGen ads.php) are HTML, not files.
|
|
# Never download these as "files".
|
|
if str(table or "").lower() == "libgen":
|
|
low = target.lower()
|
|
if ("/ads.php" in low) or ("/file.php" in low) or ("/index.php"
|
|
in low):
|
|
log(
|
|
"[download-file] Refusing to download LibGen landing page (expected provider to resolve file link)",
|
|
file=sys.stderr,
|
|
)
|
|
continue
|
|
|
|
debug(
|
|
f"[download-file] Provider item looks like direct URL, downloading: {target}"
|
|
)
|
|
suggested_name = str(title).strip() if title is not None else None
|
|
result_obj = _download_direct_file(
|
|
target,
|
|
final_output_dir,
|
|
quiet=quiet_mode,
|
|
suggested_filename=suggested_name,
|
|
pipeline_progress=progress,
|
|
)
|
|
downloaded_path = self._path_from_download_result(result_obj)
|
|
|
|
if downloaded_path is None:
|
|
log(
|
|
f"Cannot download item (no provider handler / unsupported target): {title or target}",
|
|
file=sys.stderr,
|
|
)
|
|
continue
|
|
|
|
# Prefer provider-enriched metadata (providers may mutate sr.full_metadata).
|
|
if provider_sr is not None:
|
|
try:
|
|
sr_md = getattr(provider_sr, "full_metadata", None)
|
|
if isinstance(sr_md, dict) and sr_md:
|
|
full_metadata = sr_md
|
|
except Exception:
|
|
pass
|
|
|
|
# Allow providers to add/enrich tags and metadata during download.
|
|
if str(table or "").lower() == "libgen" and provider_sr is not None:
|
|
try:
|
|
sr_tags = getattr(provider_sr, "tag", None)
|
|
if tags_list is None and isinstance(sr_tags, set) and sr_tags:
|
|
tags_list = sorted([str(t) for t in sr_tags if t])
|
|
except Exception:
|
|
pass
|
|
|
|
try:
|
|
sr_md = getattr(provider_sr, "full_metadata", None)
|
|
if isinstance(sr_md, dict) and sr_md:
|
|
full_metadata = sr_md
|
|
except Exception:
|
|
pass
|
|
|
|
try:
|
|
if isinstance(full_metadata, dict):
|
|
t = str(full_metadata.get("title") or "").strip()
|
|
if t:
|
|
title = t
|
|
except Exception:
|
|
pass
|
|
|
|
self._emit_local_file(
|
|
downloaded_path=downloaded_path,
|
|
source=str(target) if target else None,
|
|
title_hint=str(title) if title else downloaded_path.stem,
|
|
tags_hint=tags_list,
|
|
media_kind_hint=str(media_kind) if media_kind else None,
|
|
full_metadata=full_metadata if isinstance(full_metadata,
|
|
dict) else None,
|
|
progress=progress,
|
|
config=config,
|
|
)
|
|
downloaded_count += 1
|
|
|
|
except DownloadError as e:
|
|
log(f"Download failed: {e}", file=sys.stderr)
|
|
except Exception as e:
|
|
log(f"Error downloading item: {e}", file=sys.stderr)
|
|
finally:
|
|
if str(table or "").lower() == "hifi":
|
|
try:
|
|
progress.finish_transfer(label=transfer_label)
|
|
except Exception:
|
|
pass
|
|
processed_items += 1
|
|
try:
|
|
pct = int(round((processed_items / max(1, total_items)) * 100))
|
|
progress.set_percent(pct)
|
|
if processed_items >= total_items:
|
|
progress.clear_status()
|
|
except Exception:
|
|
pass
|
|
|
|
return downloaded_count, queued_magnet_submissions
|
|
|
|
# === Streaming helpers (yt-dlp) ===
|
|
|
|
@staticmethod
|
|
def _append_urls_from_piped_result(raw_urls: List[str], result: Any) -> List[str]:
|
|
if raw_urls:
|
|
return raw_urls
|
|
if not result:
|
|
return raw_urls
|
|
|
|
results_to_check = result if isinstance(result, list) else [result]
|
|
for item in results_to_check:
|
|
try:
|
|
url = get_field(item, "url") or get_field(item, "target")
|
|
except Exception:
|
|
url = None
|
|
if url:
|
|
raw_urls.append(url)
|
|
return raw_urls
|
|
|
|
@staticmethod
|
|
def _filter_supported_urls(raw_urls: Sequence[str]) -> tuple[List[str], List[str]]:
|
|
supported = [url for url in (raw_urls or []) if is_url_supported_by_ytdlp(url)]
|
|
unsupported = list(set(raw_urls or []) - set(supported or []))
|
|
return supported, unsupported
|
|
|
|
def _parse_query_keyed_spec(self, query_spec: Optional[str]) -> Dict[str, List[str]]:
|
|
if not query_spec:
|
|
return {}
|
|
try:
|
|
keyed = self._parse_keyed_csv_spec(str(query_spec), default_key="hash")
|
|
if not keyed:
|
|
return {}
|
|
|
|
def _alias(src: str, dest: str) -> None:
|
|
try:
|
|
values = keyed.get(src)
|
|
except Exception:
|
|
values = None
|
|
if not values:
|
|
return
|
|
try:
|
|
keyed.setdefault(dest, []).extend(list(values))
|
|
except Exception:
|
|
pass
|
|
try:
|
|
keyed.pop(src, None)
|
|
except Exception:
|
|
pass
|
|
|
|
for src in ("range", "ranges", "section", "sections"):
|
|
_alias(src, "clip")
|
|
for src in ("fmt", "f"):
|
|
_alias(src, "format")
|
|
for src in ("aud", "a"):
|
|
_alias(src, "audio")
|
|
|
|
return keyed
|
|
except Exception:
|
|
return {}
|
|
|
|
@staticmethod
|
|
def _extract_hash_override(query_spec: Optional[str], query_keyed: Dict[str, List[str]]) -> Optional[str]:
|
|
try:
|
|
hash_values = query_keyed.get("hash", []) if isinstance(query_keyed, dict) else []
|
|
hash_candidate = hash_values[-1] if hash_values else None
|
|
if hash_candidate:
|
|
return sh.parse_single_hash_query(f"hash:{hash_candidate}")
|
|
|
|
try:
|
|
has_non_hash_keys = bool(
|
|
query_keyed
|
|
and isinstance(query_keyed, dict)
|
|
and any(k for k in query_keyed.keys() if str(k).strip().lower() != "hash")
|
|
)
|
|
except Exception:
|
|
has_non_hash_keys = False
|
|
if has_non_hash_keys:
|
|
return None
|
|
return sh.parse_single_hash_query(str(query_spec)) if query_spec else None
|
|
except Exception:
|
|
return None
|
|
|
|
def _parse_clip_ranges_and_apply_items(
|
|
self,
|
|
*,
|
|
clip_spec: Optional[str],
|
|
query_keyed: Dict[str, List[str]],
|
|
parsed: Dict[str, Any],
|
|
query_spec: Optional[str],
|
|
) -> tuple[Optional[List[tuple[int, int]]], bool, List[str]]:
|
|
clip_ranges: Optional[List[tuple[int, int]]] = None
|
|
clip_values: List[str] = []
|
|
item_values: List[str] = []
|
|
|
|
def _uniq(values: Sequence[str]) -> List[str]:
|
|
seen: set[str] = set()
|
|
out: List[str] = []
|
|
for v in values:
|
|
key = str(v)
|
|
if key in seen:
|
|
continue
|
|
seen.add(key)
|
|
out.append(v)
|
|
return out
|
|
|
|
if clip_spec:
|
|
keyed = self._parse_keyed_csv_spec(str(clip_spec), default_key="clip")
|
|
clip_values.extend(keyed.get("clip", []) or [])
|
|
item_values.extend(keyed.get("item", []) or [])
|
|
|
|
if query_keyed:
|
|
clip_values.extend(query_keyed.get("clip", []) or [])
|
|
item_values.extend(query_keyed.get("item", []) or [])
|
|
|
|
clip_values = _uniq(clip_values)
|
|
item_values = _uniq(item_values)
|
|
|
|
if item_values and not parsed.get("item"):
|
|
parsed["item"] = ",".join([v for v in item_values if v])
|
|
|
|
if clip_values:
|
|
clip_ranges = self._parse_time_ranges(",".join([v for v in clip_values if v]))
|
|
if not clip_ranges:
|
|
bad_spec = clip_spec or query_spec
|
|
log(f"Invalid clip format: {bad_spec}", file=sys.stderr)
|
|
return None, True, clip_values
|
|
|
|
return clip_ranges, False, clip_values
|
|
|
|
@staticmethod
|
|
def _init_storage(config: Dict[str, Any]) -> tuple[Optional[Any], bool]:
|
|
storage = None
|
|
hydrus_available = True
|
|
try:
|
|
from Store import Store
|
|
from API.HydrusNetwork import is_hydrus_available
|
|
|
|
storage = Store(config=config or {}, suppress_debug=True)
|
|
hydrus_available = bool(is_hydrus_available(config or {}))
|
|
except Exception:
|
|
storage = None
|
|
return storage, hydrus_available
|
|
|
|
@staticmethod
|
|
def _cookiefile_str(ytdlp_tool: YtDlpTool) -> Optional[str]:
|
|
try:
|
|
cookie_path = ytdlp_tool.resolve_cookiefile()
|
|
if cookie_path is not None and cookie_path.is_file():
|
|
return str(cookie_path)
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
def _list_formats_cached(
|
|
self,
|
|
u: str,
|
|
*,
|
|
playlist_items_value: Optional[str],
|
|
formats_cache: Dict[str, Optional[List[Dict[str, Any]]]],
|
|
ytdlp_tool: YtDlpTool,
|
|
) -> Optional[List[Dict[str, Any]]]:
|
|
key = f"{u}||{playlist_items_value or ''}"
|
|
if key in formats_cache:
|
|
return formats_cache[key]
|
|
fmts = list_formats(
|
|
u,
|
|
no_playlist=False,
|
|
playlist_items=playlist_items_value,
|
|
cookiefile=self._cookiefile_str(ytdlp_tool),
|
|
)
|
|
formats_cache[key] = fmts
|
|
return fmts
|
|
|
|
def _is_browseable_format(self, fmt: Any) -> bool:
|
|
if not isinstance(fmt, dict):
|
|
return False
|
|
format_id = str(fmt.get("format_id") or "").strip()
|
|
if not format_id:
|
|
return False
|
|
ext = str(fmt.get("ext") or "").strip().lower()
|
|
if ext in {"mhtml", "json"}:
|
|
return False
|
|
note = str(fmt.get("format_note") or "").lower()
|
|
if "storyboard" in note:
|
|
return False
|
|
if format_id.lower().startswith("sb"):
|
|
return False
|
|
vcodec = str(fmt.get("vcodec", "none"))
|
|
acodec = str(fmt.get("acodec", "none"))
|
|
return not (vcodec == "none" and acodec == "none")
|
|
|
|
def _format_id_for_query_index(
|
|
self,
|
|
query_format: str,
|
|
url: str,
|
|
formats_cache: Dict[str, Optional[List[Dict[str, Any]]]],
|
|
ytdlp_tool: YtDlpTool,
|
|
) -> Optional[str]:
|
|
import re
|
|
|
|
if not query_format or not re.match(r"^\s*#?\d+\s*$", str(query_format)):
|
|
return None
|
|
|
|
try:
|
|
idx = int(str(query_format).lstrip("#").strip())
|
|
except Exception:
|
|
raise ValueError(f"Invalid format index: {query_format}")
|
|
|
|
fmts = self._list_formats_cached(
|
|
url,
|
|
playlist_items_value=None,
|
|
formats_cache=formats_cache,
|
|
ytdlp_tool=ytdlp_tool,
|
|
)
|
|
if not fmts:
|
|
raise ValueError("Unable to list formats for the URL; cannot resolve numeric format index")
|
|
|
|
candidate_formats = [f for f in fmts if self._is_browseable_format(f)]
|
|
filtered_formats = candidate_formats if candidate_formats else list(fmts)
|
|
|
|
if not filtered_formats:
|
|
raise ValueError("No formats available for selection")
|
|
|
|
if idx <= 0 or idx > len(filtered_formats):
|
|
raise ValueError(f"Format index {idx} out of range (1..{len(filtered_formats)})")
|
|
|
|
chosen = filtered_formats[idx - 1]
|
|
selection_format_id = str(chosen.get("format_id") or "").strip()
|
|
if not selection_format_id:
|
|
raise ValueError("Selected format has no format_id")
|
|
|
|
try:
|
|
vcodec = str(chosen.get("vcodec", "none"))
|
|
acodec = str(chosen.get("acodec", "none"))
|
|
if vcodec != "none" and acodec == "none":
|
|
selection_format_id = f"{selection_format_id}+ba"
|
|
except Exception:
|
|
pass
|
|
|
|
return selection_format_id
|
|
|
|
@staticmethod
|
|
def _format_selector_for_query_height(query_format: str) -> Optional[str]:
|
|
import re
|
|
|
|
if query_format is None:
|
|
return None
|
|
|
|
s = str(query_format).strip().lower()
|
|
m = re.match(r"^(\d{2,5})p$", s)
|
|
if not m:
|
|
return None
|
|
|
|
try:
|
|
height = int(m.group(1))
|
|
except Exception:
|
|
return None
|
|
|
|
if height <= 0:
|
|
raise ValueError(f"Invalid height selection: {query_format}")
|
|
|
|
return f"bv*[height<={height}]+ba"
|
|
|
|
@staticmethod
|
|
def _canonicalize_url_for_storage(*, requested_url: str, ytdlp_tool: YtDlpTool, playlist_items: Optional[str]) -> str:
|
|
if playlist_items:
|
|
return str(requested_url)
|
|
try:
|
|
cf = None
|
|
try:
|
|
cookie_path = ytdlp_tool.resolve_cookiefile()
|
|
if cookie_path is not None and cookie_path.is_file():
|
|
cf = str(cookie_path)
|
|
except Exception:
|
|
cf = None
|
|
pr = probe_url(requested_url, no_playlist=False, timeout_seconds=15, cookiefile=cf)
|
|
if isinstance(pr, dict):
|
|
for key in ("webpage_url", "original_url", "url", "requested_url"):
|
|
value = pr.get(key)
|
|
if isinstance(value, str) and value.strip():
|
|
return value.strip()
|
|
except Exception:
|
|
pass
|
|
return str(requested_url)
|
|
|
|
def _preflight_url_duplicate(
|
|
self,
|
|
*,
|
|
storage: Any,
|
|
hydrus_available: bool,
|
|
final_output_dir: Path,
|
|
candidate_url: str,
|
|
extra_urls: Optional[Sequence[str]] = None,
|
|
) -> bool:
|
|
if storage is None:
|
|
debug("Preflight URL check skipped: storage unavailable")
|
|
return True
|
|
|
|
debug(f"Preflight URL check: candidate={candidate_url}")
|
|
|
|
try:
|
|
from SYS.metadata import normalize_urls
|
|
except Exception:
|
|
normalize_urls = None # type: ignore[assignment]
|
|
|
|
needles: List[str] = []
|
|
if normalize_urls is not None:
|
|
for raw in [candidate_url, *(list(extra_urls) if extra_urls else [])]:
|
|
try:
|
|
needles.extend(normalize_urls(raw))
|
|
except Exception:
|
|
continue
|
|
if not needles:
|
|
needles = [str(candidate_url)]
|
|
|
|
seen_needles: List[str] = []
|
|
for needle in needles:
|
|
if needle and needle not in seen_needles:
|
|
seen_needles.append(needle)
|
|
needles = seen_needles
|
|
|
|
try:
|
|
debug(f"Preflight URL needles: {needles}")
|
|
except Exception:
|
|
pass
|
|
|
|
url_matches: List[Dict[str, Any]] = []
|
|
try:
|
|
from Store.HydrusNetwork import HydrusNetwork
|
|
|
|
backend_names_all = storage.list_searchable_backends()
|
|
backend_names: List[str] = []
|
|
skipped: List[str] = []
|
|
for backend_name in backend_names_all:
|
|
try:
|
|
backend = storage[backend_name]
|
|
except Exception:
|
|
continue
|
|
|
|
try:
|
|
if str(backend_name).strip().lower() == "temp":
|
|
skipped.append(backend_name)
|
|
continue
|
|
except Exception:
|
|
pass
|
|
|
|
try:
|
|
backend_location = getattr(backend, "_location", None)
|
|
if backend_location and final_output_dir:
|
|
backend_path = Path(str(backend_location)).expanduser().resolve()
|
|
temp_path = Path(str(final_output_dir)).expanduser().resolve()
|
|
if backend_path == temp_path:
|
|
skipped.append(backend_name)
|
|
continue
|
|
except Exception:
|
|
pass
|
|
|
|
backend_names.append(backend_name)
|
|
|
|
try:
|
|
if skipped:
|
|
debug(f"Preflight backends: {backend_names} (skipped temp: {skipped})")
|
|
else:
|
|
debug(f"Preflight backends: {backend_names}")
|
|
except Exception:
|
|
pass
|
|
|
|
for backend_name in backend_names:
|
|
backend = storage[backend_name]
|
|
if isinstance(backend, HydrusNetwork) and not hydrus_available:
|
|
continue
|
|
|
|
backend_hits: List[Dict[str, Any]] = []
|
|
for needle in needles:
|
|
try:
|
|
backend_hits = backend.search(f"url:{needle}", limit=25) or []
|
|
if backend_hits:
|
|
break
|
|
except Exception:
|
|
continue
|
|
if backend_hits:
|
|
url_matches.extend(
|
|
[
|
|
dict(x) if isinstance(x, dict) else {"title": str(x)}
|
|
for x in backend_hits
|
|
]
|
|
)
|
|
|
|
if len(url_matches) >= 25:
|
|
url_matches = url_matches[:25]
|
|
break
|
|
except Exception:
|
|
url_matches = []
|
|
|
|
if not url_matches:
|
|
debug("Preflight URL check: no matches")
|
|
return True
|
|
|
|
try:
|
|
current_cmd_text = pipeline_context.get_current_command_text("")
|
|
except Exception:
|
|
current_cmd_text = ""
|
|
|
|
try:
|
|
stage_ctx = pipeline_context.get_stage_context()
|
|
except Exception:
|
|
stage_ctx = None
|
|
|
|
in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or "")))
|
|
if in_pipeline:
|
|
try:
|
|
cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="")
|
|
cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None)
|
|
except Exception:
|
|
cached_cmd = ""
|
|
cached_decision = None
|
|
|
|
if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""):
|
|
if bool(cached_decision):
|
|
return True
|
|
try:
|
|
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
|
|
except Exception:
|
|
pass
|
|
return False
|
|
|
|
table = ResultTable(f"URL already exists ({len(url_matches)} match(es))")
|
|
results_list: List[Dict[str, Any]] = []
|
|
for item in url_matches:
|
|
if "title" not in item:
|
|
item["title"] = item.get("name") or item.get("target") or item.get("path") or "Result"
|
|
|
|
try:
|
|
from SYS.result_table import build_display_row
|
|
except Exception:
|
|
build_display_row = None # type: ignore
|
|
|
|
if callable(build_display_row):
|
|
display_row = build_display_row(item, keys=["title", "store", "hash", "ext", "size"])
|
|
else:
|
|
display_row = {
|
|
"title": item.get("title"),
|
|
"store": item.get("store"),
|
|
"hash": item.get("hash") or item.get("file_hash") or item.get("sha256"),
|
|
"ext": str(item.get("ext") or ""),
|
|
"size": item.get("size") or item.get("size_bytes"),
|
|
}
|
|
table.add_result(display_row)
|
|
results_list.append(item)
|
|
|
|
pipeline_context.set_current_stage_table(table)
|
|
pipeline_context.set_last_result_table(table, results_list)
|
|
|
|
suspend = getattr(pipeline_context, "suspend_live_progress", None)
|
|
used_suspend = False
|
|
|
|
cm: AbstractContextManager[Any] = nullcontext()
|
|
if callable(suspend):
|
|
try:
|
|
maybe_cm = suspend()
|
|
if maybe_cm is not None:
|
|
cm = maybe_cm # type: ignore[assignment]
|
|
used_suspend = True
|
|
except Exception:
|
|
cm = nullcontext()
|
|
used_suspend = False
|
|
|
|
with cm:
|
|
get_stderr_console().print(table)
|
|
setattr(table, "_rendered_by_cmdlet", True)
|
|
answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()))
|
|
|
|
if in_pipeline:
|
|
try:
|
|
existing = pipeline_context.load_value("preflight", default=None)
|
|
except Exception:
|
|
existing = None
|
|
preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {}
|
|
url_dup_cache = preflight_cache.get("url_duplicates")
|
|
if not isinstance(url_dup_cache, dict):
|
|
url_dup_cache = {}
|
|
url_dup_cache["command"] = str(current_cmd_text or "")
|
|
url_dup_cache["continue"] = bool(answered_yes)
|
|
preflight_cache["url_duplicates"] = url_dup_cache
|
|
try:
|
|
pipeline_context.store_value("preflight", preflight_cache)
|
|
except Exception:
|
|
pass
|
|
|
|
if not answered_yes:
|
|
if in_pipeline and used_suspend:
|
|
try:
|
|
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
|
|
except Exception:
|
|
pass
|
|
return False
|
|
return True
|
|
|
|
def _preflight_url_duplicates_bulk(
|
|
self,
|
|
*,
|
|
storage: Any,
|
|
hydrus_available: bool,
|
|
final_output_dir: Path,
|
|
urls: Sequence[str],
|
|
) -> bool:
|
|
if storage is None:
|
|
debug("Bulk URL preflight skipped: storage unavailable")
|
|
return True
|
|
|
|
try:
|
|
current_cmd_text = pipeline_context.get_current_command_text("")
|
|
except Exception:
|
|
current_cmd_text = ""
|
|
|
|
try:
|
|
stage_ctx = pipeline_context.get_stage_context()
|
|
except Exception:
|
|
stage_ctx = None
|
|
|
|
in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or "")))
|
|
if in_pipeline:
|
|
try:
|
|
cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="")
|
|
cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None)
|
|
except Exception:
|
|
cached_cmd = ""
|
|
cached_decision = None
|
|
|
|
if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""):
|
|
if bool(cached_decision):
|
|
return True
|
|
try:
|
|
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
|
|
except Exception:
|
|
pass
|
|
return False
|
|
|
|
unique_urls: List[str] = []
|
|
for u in urls or []:
|
|
s = str(u or "").strip()
|
|
if s and s not in unique_urls:
|
|
unique_urls.append(s)
|
|
if len(unique_urls) <= 1:
|
|
return True
|
|
|
|
try:
|
|
from SYS.metadata import normalize_urls
|
|
except Exception:
|
|
normalize_urls = None # type: ignore[assignment]
|
|
|
|
def _httpish(value: str) -> bool:
|
|
try:
|
|
return bool(value) and (value.startswith("http://") or value.startswith("https://"))
|
|
except Exception:
|
|
return False
|
|
|
|
url_needles: Dict[str, List[str]] = {}
|
|
for u in unique_urls:
|
|
needles: List[str] = []
|
|
if normalize_urls is not None:
|
|
try:
|
|
needles.extend([n for n in (normalize_urls(u) or []) if isinstance(n, str)])
|
|
except Exception:
|
|
needles = []
|
|
if not needles:
|
|
needles = [u]
|
|
filtered: List[str] = []
|
|
for n in needles:
|
|
n2 = str(n or "").strip()
|
|
if not n2:
|
|
continue
|
|
if not _httpish(n2):
|
|
continue
|
|
if n2 not in filtered:
|
|
filtered.append(n2)
|
|
url_needles[u] = filtered if filtered else [u]
|
|
|
|
backend_names: List[str] = []
|
|
try:
|
|
backend_names_all = storage.list_searchable_backends()
|
|
except Exception:
|
|
backend_names_all = []
|
|
|
|
for backend_name in backend_names_all:
|
|
try:
|
|
backend = storage[backend_name]
|
|
except Exception:
|
|
continue
|
|
|
|
try:
|
|
if str(backend_name).strip().lower() == "temp":
|
|
continue
|
|
except Exception:
|
|
pass
|
|
|
|
try:
|
|
backend_location = getattr(backend, "_location", None)
|
|
if backend_location and final_output_dir:
|
|
backend_path = Path(str(backend_location)).expanduser().resolve()
|
|
temp_path = Path(str(final_output_dir)).expanduser().resolve()
|
|
if backend_path == temp_path:
|
|
continue
|
|
except Exception:
|
|
pass
|
|
|
|
backend_names.append(backend_name)
|
|
|
|
if not backend_names:
|
|
debug("Bulk URL preflight skipped: no searchable backends")
|
|
return True
|
|
|
|
seen_pairs: set[tuple[str, str]] = set()
|
|
matched_urls: set[str] = set()
|
|
match_rows: List[Dict[str, Any]] = []
|
|
max_rows = 200
|
|
|
|
try:
|
|
from Store.HydrusNetwork import HydrusNetwork
|
|
except Exception:
|
|
HydrusNetwork = None # type: ignore
|
|
|
|
for backend_name in backend_names:
|
|
if len(match_rows) >= max_rows:
|
|
break
|
|
try:
|
|
backend = storage[backend_name]
|
|
except Exception:
|
|
continue
|
|
|
|
if HydrusNetwork is not None and isinstance(backend, HydrusNetwork):
|
|
if not hydrus_available:
|
|
continue
|
|
|
|
client = getattr(backend, "_client", None)
|
|
if client is None:
|
|
continue
|
|
|
|
for original_url, needles in url_needles.items():
|
|
if len(match_rows) >= max_rows:
|
|
break
|
|
if (original_url, str(backend_name)) in seen_pairs:
|
|
continue
|
|
|
|
found_hash: Optional[str] = None
|
|
found = False
|
|
for needle in (needles or [])[:3]:
|
|
if not _httpish(needle):
|
|
continue
|
|
try:
|
|
from API.HydrusNetwork import HydrusRequestSpec
|
|
|
|
spec = HydrusRequestSpec(
|
|
method="GET",
|
|
endpoint="/add_urls/get_url_files",
|
|
query={"url": needle},
|
|
)
|
|
response = client._perform_request(spec) # type: ignore[attr-defined]
|
|
raw_hashes = None
|
|
if isinstance(response, dict):
|
|
raw_hashes = response.get("hashes") or response.get("file_hashes")
|
|
raw_ids = response.get("file_ids")
|
|
has_ids = isinstance(raw_ids, list) and len(raw_ids) > 0
|
|
has_hashes = isinstance(raw_hashes, list) and len(raw_hashes) > 0
|
|
if has_hashes:
|
|
try:
|
|
found_hash = str(raw_hashes[0]).strip() # type: ignore[index]
|
|
except Exception:
|
|
found_hash = None
|
|
if has_ids or has_hashes:
|
|
found = True
|
|
break
|
|
except Exception:
|
|
continue
|
|
|
|
if not found:
|
|
continue
|
|
|
|
seen_pairs.add((original_url, str(backend_name)))
|
|
matched_urls.add(original_url)
|
|
display_row = {
|
|
"title": "(exists)",
|
|
"store": str(backend_name),
|
|
"hash": found_hash or "",
|
|
"url": original_url,
|
|
"columns": [
|
|
("Title", "(exists)"),
|
|
("Store", str(backend_name)),
|
|
("Hash", found_hash or ""),
|
|
("URL", original_url),
|
|
],
|
|
}
|
|
match_rows.append(display_row)
|
|
continue
|
|
|
|
for original_url, needles in url_needles.items():
|
|
if len(match_rows) >= max_rows:
|
|
break
|
|
if (original_url, str(backend_name)) in seen_pairs:
|
|
continue
|
|
|
|
backend_hits: List[Dict[str, Any]] = []
|
|
for needle in (needles or [])[:3]:
|
|
try:
|
|
backend_hits = backend.search(f"url:{needle}", limit=1) or []
|
|
if backend_hits:
|
|
break
|
|
except Exception:
|
|
continue
|
|
|
|
if not backend_hits:
|
|
continue
|
|
|
|
seen_pairs.add((original_url, str(backend_name)))
|
|
matched_urls.add(original_url)
|
|
hit = backend_hits[0]
|
|
title = hit.get("title") or hit.get("name") or hit.get("target") or hit.get("path") or "(exists)"
|
|
file_hash = hit.get("hash") or hit.get("file_hash") or hit.get("sha256") or ""
|
|
|
|
try:
|
|
from SYS.result_table import build_display_row
|
|
except Exception:
|
|
build_display_row = None # type: ignore
|
|
|
|
extracted = {
|
|
"title": str(title),
|
|
"store": str(hit.get("store") or backend_name),
|
|
"hash": str(file_hash or ""),
|
|
"ext": "",
|
|
"size": None,
|
|
}
|
|
if callable(build_display_row):
|
|
try:
|
|
extracted = build_display_row(hit, keys=["title", "store", "hash", "ext", "size"])
|
|
except Exception:
|
|
pass
|
|
extracted["title"] = str(title)
|
|
extracted["store"] = str(hit.get("store") or backend_name)
|
|
extracted["hash"] = str(file_hash or "")
|
|
|
|
ext = extracted.get("ext")
|
|
size_val = extracted.get("size")
|
|
|
|
display_row = {
|
|
"title": str(title),
|
|
"store": str(hit.get("store") or backend_name),
|
|
"hash": str(file_hash or ""),
|
|
"ext": str(ext or ""),
|
|
"size": size_val,
|
|
"url": original_url,
|
|
"columns": [
|
|
("Title", str(title)),
|
|
("Store", str(hit.get("store") or backend_name)),
|
|
("Hash", str(file_hash or "")),
|
|
("Ext", str(ext or "")),
|
|
("Size", size_val),
|
|
("URL", original_url),
|
|
],
|
|
}
|
|
match_rows.append(display_row)
|
|
|
|
if not match_rows:
|
|
debug("Bulk URL preflight: no matches")
|
|
return True
|
|
|
|
table = ResultTable(f"URL already exists ({len(matched_urls)} url(s))", max_columns=10)
|
|
table.set_no_choice(True)
|
|
try:
|
|
table.set_preserve_order(True)
|
|
except Exception:
|
|
pass
|
|
|
|
for row in match_rows:
|
|
table.add_result(row)
|
|
|
|
try:
|
|
pipeline_context.set_last_result_table_overlay(table, match_rows)
|
|
except Exception:
|
|
pass
|
|
|
|
suspend = getattr(pipeline_context, "suspend_live_progress", None)
|
|
cm: AbstractContextManager[Any] = nullcontext()
|
|
if callable(suspend):
|
|
try:
|
|
maybe_cm = suspend()
|
|
if maybe_cm is not None:
|
|
cm = maybe_cm # type: ignore[assignment]
|
|
except Exception:
|
|
cm = nullcontext()
|
|
|
|
with cm:
|
|
get_stderr_console().print(table)
|
|
setattr(table, "_rendered_by_cmdlet", True)
|
|
answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()))
|
|
|
|
if in_pipeline:
|
|
try:
|
|
existing = pipeline_context.load_value("preflight", default=None)
|
|
except Exception:
|
|
existing = None
|
|
preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {}
|
|
url_dup_cache = preflight_cache.get("url_duplicates")
|
|
if not isinstance(url_dup_cache, dict):
|
|
url_dup_cache = {}
|
|
url_dup_cache["command"] = str(current_cmd_text or "")
|
|
url_dup_cache["continue"] = bool(answered_yes)
|
|
preflight_cache["url_duplicates"] = url_dup_cache
|
|
try:
|
|
pipeline_context.store_value("preflight", preflight_cache)
|
|
except Exception:
|
|
pass
|
|
|
|
if not answered_yes:
|
|
if in_pipeline:
|
|
try:
|
|
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
|
|
except Exception:
|
|
pass
|
|
return False
|
|
return True
|
|
|
|
def _maybe_show_playlist_table(self, *, url: str, ytdlp_tool: YtDlpTool) -> bool:
|
|
try:
|
|
cf = self._cookiefile_str(ytdlp_tool)
|
|
pr = probe_url(url, no_playlist=False, timeout_seconds=15, cookiefile=cf)
|
|
except Exception:
|
|
pr = None
|
|
if not isinstance(pr, dict):
|
|
return False
|
|
entries = pr.get("entries")
|
|
if not isinstance(entries, list) or len(entries) <= 1:
|
|
return False
|
|
|
|
extractor_name = ""
|
|
try:
|
|
extractor_name = str(pr.get("extractor") or pr.get("extractor_key") or "").strip().lower()
|
|
except Exception:
|
|
extractor_name = ""
|
|
table_type: Optional[str] = None
|
|
if "bandcamp" in extractor_name:
|
|
table_type = "bandcamp"
|
|
elif "youtube" in extractor_name:
|
|
table_type = "youtube"
|
|
|
|
max_rows = 200
|
|
display_entries = entries[:max_rows]
|
|
|
|
def _entry_to_url(entry: Any) -> Optional[str]:
|
|
if not isinstance(entry, dict):
|
|
return None
|
|
for key in ("webpage_url", "original_url", "url"):
|
|
v = entry.get(key)
|
|
if isinstance(v, str) and v.strip():
|
|
s_val = v.strip()
|
|
try:
|
|
if urlparse(s_val).scheme in {"http", "https"}:
|
|
return s_val
|
|
except Exception:
|
|
return s_val
|
|
|
|
entry_id = entry.get("id")
|
|
if isinstance(entry_id, str) and entry_id.strip():
|
|
extractor_name_inner = str(pr.get("extractor") or pr.get("extractor_key") or "").lower()
|
|
if "youtube" in extractor_name_inner:
|
|
return f"https://www.youtube.com/watch?v={entry_id.strip()}"
|
|
return None
|
|
|
|
table = ResultTable()
|
|
safe_url = str(url or "").strip()
|
|
table.title = f'download-file -url "{safe_url}"' if safe_url else "download-file"
|
|
if table_type:
|
|
try:
|
|
table.set_table(table_type)
|
|
except Exception:
|
|
table.table = table_type
|
|
table.set_source_command("download-file", [])
|
|
try:
|
|
table.set_preserve_order(True)
|
|
except Exception:
|
|
pass
|
|
|
|
results_list: List[Dict[str, Any]] = []
|
|
for idx, entry in enumerate(display_entries, 1):
|
|
title = None
|
|
uploader = None
|
|
duration = None
|
|
entry_url = _entry_to_url(entry)
|
|
try:
|
|
if isinstance(entry, dict):
|
|
title = entry.get("title")
|
|
uploader = entry.get("uploader") or pr.get("uploader")
|
|
duration = entry.get("duration")
|
|
except Exception:
|
|
pass
|
|
|
|
row: Dict[str, Any] = {
|
|
"table": "download-file",
|
|
"title": str(title or f"Item {idx}"),
|
|
"detail": str(uploader or ""),
|
|
"media_kind": "playlist-item",
|
|
"playlist_index": idx,
|
|
"_selection_args": (["-url", str(entry_url)] if entry_url else ["-url", str(url), "-item", str(idx)]),
|
|
"url": entry_url,
|
|
"target": entry_url,
|
|
"columns": [
|
|
("#", str(idx)),
|
|
("Title", str(title or "")),
|
|
("Duration", str(duration or "")),
|
|
("Uploader", str(uploader or "")),
|
|
],
|
|
}
|
|
results_list.append(row)
|
|
table.add_result(row)
|
|
|
|
pipeline_context.set_current_stage_table(table)
|
|
pipeline_context.set_last_result_table(table, results_list)
|
|
|
|
try:
|
|
suspend = getattr(pipeline_context, "suspend_live_progress", None)
|
|
cm: AbstractContextManager[Any] = nullcontext()
|
|
if callable(suspend):
|
|
maybe_cm = suspend()
|
|
if maybe_cm is not None:
|
|
cm = maybe_cm # type: ignore[assignment]
|
|
with cm:
|
|
get_stderr_console().print(table)
|
|
except Exception:
|
|
pass
|
|
setattr(table, "_rendered_by_cmdlet", True)
|
|
return True
|
|
|
|
def _maybe_show_format_table_for_single_url(
|
|
self,
|
|
*,
|
|
mode: str,
|
|
clip_spec: Any,
|
|
clip_values: Sequence[str],
|
|
playlist_items: Optional[str],
|
|
ytdl_format: Any,
|
|
supported_url: Sequence[str],
|
|
playlist_selection_handled: bool,
|
|
ytdlp_tool: YtDlpTool,
|
|
formats_cache: Dict[str, Optional[List[Dict[str, Any]]]],
|
|
storage: Any,
|
|
hydrus_available: bool,
|
|
final_output_dir: Path,
|
|
args: Sequence[str],
|
|
) -> Optional[int]:
|
|
if (
|
|
mode != "audio"
|
|
and not clip_spec
|
|
and not clip_values
|
|
and not playlist_items
|
|
and not ytdl_format
|
|
and len(supported_url) == 1
|
|
and not playlist_selection_handled
|
|
):
|
|
url = supported_url[0]
|
|
|
|
canonical_url = self._canonicalize_url_for_storage(
|
|
requested_url=url,
|
|
ytdlp_tool=ytdlp_tool,
|
|
playlist_items=playlist_items,
|
|
)
|
|
if not self._preflight_url_duplicate(
|
|
storage=storage,
|
|
hydrus_available=hydrus_available,
|
|
final_output_dir=final_output_dir,
|
|
candidate_url=canonical_url,
|
|
extra_urls=[url],
|
|
):
|
|
log(f"Skipping download: {url}", file=sys.stderr)
|
|
return 0
|
|
|
|
formats = self._list_formats_cached(
|
|
url,
|
|
playlist_items_value=None,
|
|
formats_cache=formats_cache,
|
|
ytdlp_tool=ytdlp_tool,
|
|
)
|
|
|
|
if formats and len(formats) > 1:
|
|
candidate_formats = [f for f in formats if self._is_browseable_format(f)]
|
|
filtered_formats = candidate_formats if candidate_formats else list(formats)
|
|
|
|
debug(f"Formatlist: showing {len(filtered_formats)} formats (raw={len(formats)})")
|
|
|
|
base_cmd = f'download-file "{url}"'
|
|
remaining_args = [arg for arg in args if arg not in [url] and not arg.startswith("-")]
|
|
if remaining_args:
|
|
base_cmd += " " + " ".join(remaining_args)
|
|
|
|
table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True)
|
|
table.set_table("ytdlp.formatlist")
|
|
table.set_source_command("download-file", [url])
|
|
|
|
results_list: List[Dict[str, Any]] = []
|
|
for idx, fmt in enumerate(filtered_formats, 1):
|
|
resolution = fmt.get("resolution", "")
|
|
ext = fmt.get("ext", "")
|
|
vcodec = fmt.get("vcodec", "none")
|
|
acodec = fmt.get("acodec", "none")
|
|
filesize = fmt.get("filesize")
|
|
filesize_approx = fmt.get("filesize_approx")
|
|
format_id = fmt.get("format_id", "")
|
|
|
|
selection_format_id = format_id
|
|
try:
|
|
if vcodec != "none" and acodec == "none" and format_id:
|
|
selection_format_id = f"{format_id}+ba"
|
|
except Exception:
|
|
selection_format_id = format_id
|
|
|
|
size_str = ""
|
|
size_prefix = ""
|
|
size_bytes = filesize
|
|
if not size_bytes:
|
|
size_bytes = filesize_approx
|
|
if size_bytes:
|
|
size_prefix = "~"
|
|
try:
|
|
if isinstance(size_bytes, (int, float)) and size_bytes > 0:
|
|
size_mb = float(size_bytes) / (1024 * 1024)
|
|
size_str = f"{size_prefix}{size_mb:.1f}MB"
|
|
except Exception:
|
|
size_str = ""
|
|
|
|
desc_parts: List[str] = []
|
|
if resolution and resolution != "audio only":
|
|
desc_parts.append(resolution)
|
|
if ext:
|
|
desc_parts.append(str(ext).upper())
|
|
if vcodec != "none":
|
|
desc_parts.append(f"v:{vcodec}")
|
|
if acodec != "none":
|
|
desc_parts.append(f"a:{acodec}")
|
|
if size_str:
|
|
desc_parts.append(size_str)
|
|
format_desc = " | ".join(desc_parts)
|
|
|
|
format_dict = {
|
|
"table": "download-file",
|
|
"title": f"Format {format_id}",
|
|
"url": url,
|
|
"target": url,
|
|
"detail": format_desc,
|
|
"annotations": [ext, resolution] if resolution else [ext],
|
|
"media_kind": "format",
|
|
"cmd": base_cmd,
|
|
"columns": [
|
|
("ID", format_id),
|
|
("Resolution", resolution or "N/A"),
|
|
("Ext", ext),
|
|
("Size", size_str or ""),
|
|
("Video", vcodec),
|
|
("Audio", acodec),
|
|
],
|
|
"full_metadata": {
|
|
"format_id": format_id,
|
|
"url": url,
|
|
"item_selector": selection_format_id,
|
|
},
|
|
"_selection_args": None,
|
|
}
|
|
|
|
selection_args: List[str] = ["-format", selection_format_id]
|
|
try:
|
|
if (not clip_spec) and clip_values:
|
|
selection_args.extend(["-query", f"clip:{','.join([v for v in clip_values if v])}"])
|
|
except Exception:
|
|
pass
|
|
format_dict["_selection_args"] = selection_args
|
|
|
|
results_list.append(format_dict)
|
|
table.add_result(format_dict)
|
|
|
|
try:
|
|
suspend = getattr(pipeline_context, "suspend_live_progress", None)
|
|
cm: AbstractContextManager[Any] = nullcontext()
|
|
if callable(suspend):
|
|
maybe_cm = suspend()
|
|
if maybe_cm is not None:
|
|
cm = maybe_cm # type: ignore[assignment]
|
|
with cm:
|
|
get_stderr_console().print(table)
|
|
except Exception:
|
|
pass
|
|
|
|
setattr(table, "_rendered_by_cmdlet", True)
|
|
pipeline_context.set_current_stage_table(table)
|
|
pipeline_context.set_last_result_table(table, results_list)
|
|
|
|
log(f"", file=sys.stderr)
|
|
return 0
|
|
|
|
return None
|
|
|
|
def _download_supported_urls(
|
|
self,
|
|
*,
|
|
supported_url: Sequence[str],
|
|
ytdlp_tool: YtDlpTool,
|
|
args: Sequence[str],
|
|
config: Dict[str, Any],
|
|
final_output_dir: Path,
|
|
mode: str,
|
|
clip_spec: Any,
|
|
clip_ranges: Optional[List[tuple[int, int]]],
|
|
query_hash_override: Optional[str],
|
|
embed_chapters: bool,
|
|
write_sub: bool,
|
|
quiet_mode: bool,
|
|
playlist_items: Optional[str],
|
|
ytdl_format: Any,
|
|
skip_per_url_preflight: bool,
|
|
forced_single_format_id: Optional[str],
|
|
forced_single_format_for_batch: bool,
|
|
formats_cache: Dict[str, Optional[List[Dict[str, Any]]]],
|
|
storage: Any,
|
|
hydrus_available: bool,
|
|
) -> int:
|
|
downloaded_count = 0
|
|
downloaded_pipe_objects: List[Dict[str, Any]] = []
|
|
pipe_seq = 0
|
|
clip_sections_spec = self._build_clip_sections_spec(clip_ranges)
|
|
|
|
if clip_sections_spec:
|
|
try:
|
|
debug(f"Clip sections spec: {clip_sections_spec}")
|
|
except Exception:
|
|
pass
|
|
|
|
for url in supported_url:
|
|
try:
|
|
debug(f"Processing: {url}")
|
|
|
|
canonical_url = self._canonicalize_url_for_storage(
|
|
requested_url=url,
|
|
ytdlp_tool=ytdlp_tool,
|
|
playlist_items=playlist_items,
|
|
)
|
|
|
|
if not skip_per_url_preflight:
|
|
if not self._preflight_url_duplicate(
|
|
storage=storage,
|
|
hydrus_available=hydrus_available,
|
|
final_output_dir=final_output_dir,
|
|
candidate_url=canonical_url,
|
|
extra_urls=[url],
|
|
):
|
|
log(f"Skipping download: {url}", file=sys.stderr)
|
|
continue
|
|
|
|
PipelineProgress(pipeline_context).begin_steps(2)
|
|
|
|
actual_format = ytdl_format
|
|
actual_playlist_items = playlist_items
|
|
|
|
if playlist_items and not ytdl_format:
|
|
import re
|
|
|
|
if re.search(r"[^0-9,-]", playlist_items):
|
|
actual_format = playlist_items
|
|
actual_playlist_items = None
|
|
|
|
if mode == "audio" and not actual_format:
|
|
actual_format = "bestaudio"
|
|
|
|
if mode == "video" and not actual_format:
|
|
configured = (ytdlp_tool.default_format("video") or "").strip()
|
|
if configured and configured != "bestvideo+bestaudio/best":
|
|
actual_format = configured
|
|
|
|
forced_single_applied = False
|
|
if (
|
|
forced_single_format_for_batch
|
|
and forced_single_format_id
|
|
and not ytdl_format
|
|
and not actual_playlist_items
|
|
):
|
|
actual_format = forced_single_format_id
|
|
forced_single_applied = True
|
|
|
|
if (
|
|
actual_format
|
|
and isinstance(actual_format, str)
|
|
and mode != "audio"
|
|
and "+" not in actual_format
|
|
and "/" not in actual_format
|
|
and "[" not in actual_format
|
|
and actual_format not in {"best", "bv", "ba", "b"}
|
|
and not forced_single_applied
|
|
):
|
|
try:
|
|
formats = self._list_formats_cached(
|
|
url,
|
|
playlist_items_value=actual_playlist_items,
|
|
formats_cache=formats_cache,
|
|
ytdlp_tool=ytdlp_tool,
|
|
)
|
|
if formats:
|
|
fmt_match = next((f for f in formats if str(f.get("format_id", "")) == actual_format), None)
|
|
if fmt_match:
|
|
vcodec = str(fmt_match.get("vcodec", "none"))
|
|
acodec = str(fmt_match.get("acodec", "none"))
|
|
if vcodec != "none" and acodec == "none":
|
|
debug(f"Selected video-only format {actual_format}; using {actual_format}+ba for audio")
|
|
actual_format = f"{actual_format}+ba"
|
|
except Exception:
|
|
pass
|
|
|
|
attempted_single_format_fallback = False
|
|
while True:
|
|
try:
|
|
opts = DownloadOptions(
|
|
url=url,
|
|
mode=mode,
|
|
output_dir=final_output_dir,
|
|
ytdl_format=actual_format,
|
|
cookies_path=ytdlp_tool.resolve_cookiefile(),
|
|
clip_sections=clip_sections_spec,
|
|
playlist_items=actual_playlist_items,
|
|
quiet=quiet_mode,
|
|
no_playlist=False,
|
|
embed_chapters=embed_chapters,
|
|
write_sub=write_sub,
|
|
)
|
|
|
|
PipelineProgress(pipeline_context).step("downloading")
|
|
debug(f"Starting download with 5-minute timeout...")
|
|
result_obj = _download_with_timeout(opts, timeout_seconds=300)
|
|
debug(f"Download completed, building pipe object...")
|
|
break
|
|
except DownloadError as e:
|
|
cause = getattr(e, "__cause__", None)
|
|
detail = ""
|
|
try:
|
|
detail = str(cause or "")
|
|
except Exception:
|
|
detail = ""
|
|
|
|
if ("requested format is not available" in (detail or "").lower()) and mode != "audio":
|
|
if (
|
|
forced_single_format_for_batch
|
|
and forced_single_format_id
|
|
and not ytdl_format
|
|
and not actual_playlist_items
|
|
and not attempted_single_format_fallback
|
|
):
|
|
attempted_single_format_fallback = True
|
|
actual_format = forced_single_format_id
|
|
debug(f"Only one format available (playlist preflight); retrying with: {actual_format}")
|
|
continue
|
|
|
|
formats = self._list_formats_cached(
|
|
url,
|
|
playlist_items_value=actual_playlist_items,
|
|
formats_cache=formats_cache,
|
|
ytdlp_tool=ytdlp_tool,
|
|
)
|
|
if (
|
|
(not attempted_single_format_fallback)
|
|
and isinstance(formats, list)
|
|
and len(formats) == 1
|
|
and isinstance(formats[0], dict)
|
|
):
|
|
only = formats[0]
|
|
fallback_format = str(only.get("format_id") or "").strip()
|
|
selection_format_id = fallback_format
|
|
try:
|
|
vcodec = str(only.get("vcodec", "none"))
|
|
acodec = str(only.get("acodec", "none"))
|
|
if vcodec != "none" and acodec == "none" and fallback_format:
|
|
selection_format_id = f"{fallback_format}+ba"
|
|
except Exception:
|
|
selection_format_id = fallback_format
|
|
|
|
if selection_format_id:
|
|
attempted_single_format_fallback = True
|
|
actual_format = selection_format_id
|
|
debug(f"Only one format available; retrying with: {actual_format}")
|
|
continue
|
|
|
|
if formats:
|
|
formats_to_show = formats
|
|
|
|
table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True)
|
|
table.set_table("ytdlp.formatlist")
|
|
table.set_source_command("download-file", [url])
|
|
|
|
results_list: List[Dict[str, Any]] = []
|
|
for idx, fmt in enumerate(formats_to_show, 1):
|
|
resolution = fmt.get("resolution", "")
|
|
ext = fmt.get("ext", "")
|
|
vcodec = fmt.get("vcodec", "none")
|
|
acodec = fmt.get("acodec", "none")
|
|
filesize = fmt.get("filesize")
|
|
filesize_approx = fmt.get("filesize_approx")
|
|
format_id = fmt.get("format_id", "")
|
|
|
|
selection_format_id = format_id
|
|
try:
|
|
if vcodec != "none" and acodec == "none" and format_id:
|
|
selection_format_id = f"{format_id}+ba"
|
|
except Exception:
|
|
selection_format_id = format_id
|
|
|
|
size_str = ""
|
|
size_prefix = ""
|
|
size_bytes = filesize
|
|
if not size_bytes:
|
|
size_bytes = filesize_approx
|
|
if size_bytes:
|
|
size_prefix = "~"
|
|
try:
|
|
if isinstance(size_bytes, (int, float)) and size_bytes > 0:
|
|
size_mb = float(size_bytes) / (1024 * 1024)
|
|
size_str = f"{size_prefix}{size_mb:.1f}MB"
|
|
except Exception:
|
|
size_str = ""
|
|
|
|
desc_parts: List[str] = []
|
|
if resolution and resolution != "audio only":
|
|
desc_parts.append(str(resolution))
|
|
if ext:
|
|
desc_parts.append(str(ext).upper())
|
|
if vcodec != "none":
|
|
desc_parts.append(f"v:{vcodec}")
|
|
if acodec != "none":
|
|
desc_parts.append(f"a:{acodec}")
|
|
if size_str:
|
|
desc_parts.append(size_str)
|
|
format_desc = " | ".join(desc_parts)
|
|
|
|
format_dict: Dict[str, Any] = {
|
|
"table": "download-file",
|
|
"title": f"Format {format_id}",
|
|
"url": url,
|
|
"target": url,
|
|
"detail": format_desc,
|
|
"media_kind": "format",
|
|
"columns": [
|
|
("ID", format_id),
|
|
("Resolution", resolution or "N/A"),
|
|
("Ext", ext),
|
|
("Size", size_str or ""),
|
|
("Video", vcodec),
|
|
("Audio", acodec),
|
|
],
|
|
"full_metadata": {
|
|
"format_id": format_id,
|
|
"url": url,
|
|
"item_selector": selection_format_id,
|
|
},
|
|
"_selection_args": ["-format", selection_format_id],
|
|
}
|
|
|
|
results_list.append(format_dict)
|
|
table.add_result(format_dict)
|
|
|
|
pipeline_context.set_current_stage_table(table)
|
|
pipeline_context.set_last_result_table(table, results_list)
|
|
|
|
try:
|
|
suspend = getattr(pipeline_context, "suspend_live_progress", None)
|
|
cm: AbstractContextManager[Any] = nullcontext()
|
|
if callable(suspend):
|
|
maybe_cm = suspend()
|
|
if maybe_cm is not None:
|
|
cm = maybe_cm # type: ignore[assignment]
|
|
with cm:
|
|
get_stderr_console().print(table)
|
|
except Exception:
|
|
pass
|
|
|
|
PipelineProgress(pipeline_context).step("awaiting selection")
|
|
|
|
log("Requested format is not available; select a working format with @N", file=sys.stderr)
|
|
return 0
|
|
|
|
raise
|
|
|
|
results_to_emit: List[Any] = []
|
|
if isinstance(result_obj, list):
|
|
results_to_emit = list(result_obj)
|
|
else:
|
|
paths = getattr(result_obj, "paths", None)
|
|
if isinstance(paths, list) and paths:
|
|
for p in paths:
|
|
try:
|
|
p_path = Path(p)
|
|
except Exception:
|
|
continue
|
|
try:
|
|
if p_path.suffix.lower() in _SUBTITLE_EXTS:
|
|
continue
|
|
except Exception:
|
|
pass
|
|
if not p_path.exists() or p_path.is_dir():
|
|
continue
|
|
try:
|
|
hv = sha256_file(p_path)
|
|
except Exception:
|
|
hv = None
|
|
results_to_emit.append(
|
|
DownloadMediaResult(
|
|
path=p_path,
|
|
info=getattr(result_obj, "info", {}) or {},
|
|
tag=list(getattr(result_obj, "tag", []) or []),
|
|
source_url=getattr(result_obj, "source_url", None) or opts.url,
|
|
hash_value=hv,
|
|
)
|
|
)
|
|
else:
|
|
results_to_emit = [result_obj]
|
|
|
|
pipe_objects: List[Dict[str, Any]] = []
|
|
for downloaded in results_to_emit:
|
|
po = self._build_pipe_object(downloaded, url, opts)
|
|
pipe_seq += 1
|
|
try:
|
|
po.setdefault("pipe_index", pipe_seq)
|
|
except Exception:
|
|
pass
|
|
|
|
try:
|
|
info = downloaded.info if isinstance(getattr(downloaded, "info", None), dict) else {}
|
|
except Exception:
|
|
info = {}
|
|
chapters_text = _format_chapters_note(info) if embed_chapters else None
|
|
if chapters_text:
|
|
notes = po.get("notes")
|
|
if not isinstance(notes, dict):
|
|
notes = {}
|
|
notes.setdefault("chapters", chapters_text)
|
|
po["notes"] = notes
|
|
|
|
if write_sub:
|
|
try:
|
|
media_path = Path(str(po.get("path") or ""))
|
|
except Exception:
|
|
media_path = None
|
|
|
|
if media_path is not None and media_path.exists() and media_path.is_file():
|
|
sub_path = _best_subtitle_sidecar(media_path)
|
|
if sub_path is not None:
|
|
sub_text = _read_text_file(sub_path)
|
|
if sub_text:
|
|
notes = po.get("notes")
|
|
if not isinstance(notes, dict):
|
|
notes = {}
|
|
notes["sub"] = sub_text
|
|
po["notes"] = notes
|
|
try:
|
|
sub_path.unlink()
|
|
except Exception:
|
|
pass
|
|
|
|
pipe_objects.append(po)
|
|
|
|
try:
|
|
if clip_ranges and len(pipe_objects) == len(clip_ranges):
|
|
source_hash = query_hash_override or self._find_existing_hash_for_url(
|
|
storage,
|
|
canonical_url,
|
|
hydrus_available=hydrus_available,
|
|
)
|
|
self._apply_clip_decorations(pipe_objects, clip_ranges, source_king_hash=source_hash)
|
|
except Exception:
|
|
pass
|
|
|
|
debug(f"Emitting {len(pipe_objects)} result(s) to pipeline...")
|
|
|
|
PipelineProgress(pipeline_context).step("finalized")
|
|
|
|
stage_ctx = pipeline_context.get_stage_context()
|
|
emit_enabled = bool(stage_ctx is not None)
|
|
for pipe_obj_dict in pipe_objects:
|
|
if emit_enabled:
|
|
pipeline_context.emit(pipe_obj_dict)
|
|
|
|
if pipe_obj_dict.get("url"):
|
|
pipe_obj = coerce_to_pipe_object(pipe_obj_dict)
|
|
register_url_with_local_library(pipe_obj, config)
|
|
|
|
try:
|
|
downloaded_pipe_objects.append(pipe_obj_dict)
|
|
except Exception:
|
|
pass
|
|
|
|
downloaded_count += len(pipe_objects)
|
|
debug("✓ Downloaded and emitted")
|
|
|
|
except DownloadError as e:
|
|
log(f"Download failed for {url}: {e}", file=sys.stderr)
|
|
except Exception as e:
|
|
log(f"Error processing {url}: {e}", file=sys.stderr)
|
|
|
|
if downloaded_count > 0:
|
|
debug(f"✓ Successfully processed {downloaded_count} URL(s)")
|
|
return 0
|
|
|
|
log("No downloads completed", file=sys.stderr)
|
|
return 1
|
|
|
|
def _run_streaming_urls(
|
|
self,
|
|
*,
|
|
streaming_urls: List[str],
|
|
args: Sequence[str],
|
|
config: Dict[str, Any],
|
|
parsed: Dict[str, Any],
|
|
) -> int:
|
|
try:
|
|
debug("Starting streaming download handler")
|
|
|
|
ytdlp_tool = YtDlpTool(config)
|
|
|
|
raw_url = list(streaming_urls)
|
|
supported_url, unsupported_list = self._filter_supported_urls(raw_url)
|
|
|
|
if not supported_url:
|
|
log("No yt-dlp-supported url to download", file=sys.stderr)
|
|
return 1
|
|
|
|
if unsupported_list:
|
|
debug(f"Skipping {len(unsupported_list)} unsupported url (use direct HTTP mode)")
|
|
|
|
final_output_dir = self._resolve_streaming_output_dir(parsed, config)
|
|
if not final_output_dir:
|
|
return 1
|
|
|
|
debug(f"Output directory: {final_output_dir}")
|
|
|
|
try:
|
|
PipelineProgress(pipeline_context).ensure_local_ui(
|
|
label="download-file",
|
|
total_items=len(supported_url),
|
|
items_preview=supported_url,
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
clip_spec = parsed.get("clip")
|
|
query_spec = parsed.get("query")
|
|
|
|
query_keyed = self._parse_query_keyed_spec(str(query_spec) if query_spec is not None else None)
|
|
|
|
query_hash_override = self._extract_hash_override(str(query_spec) if query_spec is not None else None, query_keyed)
|
|
|
|
embed_chapters = True
|
|
write_sub = True
|
|
|
|
query_format: Optional[str] = None
|
|
try:
|
|
fmt_values = query_keyed.get("format", []) if isinstance(query_keyed, dict) else []
|
|
fmt_candidate = fmt_values[-1] if fmt_values else None
|
|
if fmt_candidate is not None:
|
|
query_format = str(fmt_candidate).strip()
|
|
except Exception:
|
|
query_format = None
|
|
|
|
query_audio: Optional[bool] = None
|
|
try:
|
|
audio_values = query_keyed.get("audio", []) if isinstance(query_keyed, dict) else []
|
|
audio_candidate = audio_values[-1] if audio_values else None
|
|
if audio_candidate is not None:
|
|
s_val = str(audio_candidate).strip().lower()
|
|
if s_val in {"1", "true", "t", "yes", "y", "on"}:
|
|
query_audio = True
|
|
elif s_val in {"0", "false", "f", "no", "n", "off"}:
|
|
query_audio = False
|
|
elif s_val:
|
|
query_audio = True
|
|
except Exception:
|
|
query_audio = None
|
|
|
|
query_wants_audio = False
|
|
if query_format:
|
|
try:
|
|
query_wants_audio = str(query_format).strip().lower() == "audio"
|
|
except Exception:
|
|
query_wants_audio = False
|
|
|
|
audio_flag = bool(parsed.get("audio") is True)
|
|
wants_audio = audio_flag
|
|
if query_audio is not None:
|
|
wants_audio = wants_audio or bool(query_audio)
|
|
else:
|
|
wants_audio = wants_audio or bool(query_wants_audio)
|
|
mode = "audio" if wants_audio else "video"
|
|
|
|
clip_ranges, clip_invalid, clip_values = self._parse_clip_ranges_and_apply_items(
|
|
clip_spec=str(clip_spec) if clip_spec is not None else None,
|
|
query_keyed=query_keyed,
|
|
parsed=parsed,
|
|
query_spec=str(query_spec) if query_spec is not None else None,
|
|
)
|
|
if clip_invalid:
|
|
return 1
|
|
|
|
if clip_ranges:
|
|
try:
|
|
debug(f"Clip ranges: {clip_ranges}")
|
|
except Exception:
|
|
pass
|
|
|
|
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
|
|
|
|
storage, hydrus_available = self._init_storage(config if isinstance(config, dict) else {})
|
|
|
|
formats_cache: Dict[str, Optional[List[Dict[str, Any]]]] = {}
|
|
playlist_items = str(parsed.get("item")) if parsed.get("item") else None
|
|
ytdl_format = parsed.get("format")
|
|
if not ytdl_format and query_format and not query_wants_audio:
|
|
try:
|
|
height_selector = self._format_selector_for_query_height(query_format)
|
|
except ValueError as e:
|
|
log(f"Error parsing format selection: {e}", file=sys.stderr)
|
|
return 1
|
|
|
|
if height_selector:
|
|
ytdl_format = height_selector
|
|
else:
|
|
import re
|
|
|
|
if not re.match(r"^\s*#?\d+\s*$", str(query_format)):
|
|
ytdl_format = query_format
|
|
playlist_selection_handled = False
|
|
|
|
if len(supported_url) == 1 and not playlist_items and not ytdl_format:
|
|
candidate_url = supported_url[0]
|
|
|
|
if query_format and not query_wants_audio:
|
|
try:
|
|
idx_fmt = self._format_id_for_query_index(query_format, candidate_url, formats_cache, ytdlp_tool)
|
|
except ValueError as e:
|
|
log(f"Error parsing format selection: {e}", file=sys.stderr)
|
|
return 1
|
|
if idx_fmt:
|
|
debug(f"Resolved numeric format selection '{query_format}' -> {idx_fmt}")
|
|
ytdl_format = idx_fmt
|
|
|
|
if not ytdl_format:
|
|
if self._maybe_show_playlist_table(url=candidate_url, ytdlp_tool=ytdlp_tool):
|
|
playlist_selection_handled = True
|
|
try:
|
|
last_table = pipeline_context.get_last_result_table() if hasattr(pipeline_context, "get_last_result_table") else None
|
|
if hasattr(last_table, "rows") and getattr(last_table, "rows", None):
|
|
sample_index = 1
|
|
sample_fmt_id = None
|
|
try:
|
|
sample_row = last_table.rows[0]
|
|
sample_fmt_id = sample_row._full_metadata.get("item_selector") if getattr(sample_row, "_full_metadata", None) else None
|
|
except Exception:
|
|
sample_fmt_id = None
|
|
|
|
try:
|
|
sample_pipeline = f'download-file "{candidate_url}"'
|
|
hint = (
|
|
"To select non-interactively, re-run with an explicit format: "
|
|
"e.g. mm \"{pipeline} -format {fmt} | add-file -store <store>\" or "
|
|
"mm \"{pipeline} -query 'format:{index}' | add-file -store <store>\""
|
|
).format(
|
|
pipeline=sample_pipeline,
|
|
fmt=sample_fmt_id or "<format_id>",
|
|
index=sample_index,
|
|
)
|
|
log(hint, file=sys.stderr)
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
|
|
return 0
|
|
|
|
skip_per_url_preflight = False
|
|
if len(supported_url) > 1:
|
|
if not self._preflight_url_duplicates_bulk(
|
|
storage=storage,
|
|
hydrus_available=hydrus_available,
|
|
final_output_dir=final_output_dir,
|
|
urls=list(supported_url),
|
|
):
|
|
return 0
|
|
skip_per_url_preflight = True
|
|
|
|
forced_single_format_id: Optional[str] = None
|
|
forced_single_format_for_batch = False
|
|
if len(supported_url) > 1 and not playlist_items and not ytdl_format:
|
|
try:
|
|
sample_url = str(supported_url[0])
|
|
fmts = self._list_formats_cached(
|
|
sample_url,
|
|
playlist_items_value=None,
|
|
formats_cache=formats_cache,
|
|
ytdlp_tool=ytdlp_tool,
|
|
)
|
|
if isinstance(fmts, list) and len(fmts) == 1 and isinstance(fmts[0], dict):
|
|
only_id = str(fmts[0].get("format_id") or "").strip()
|
|
if only_id:
|
|
forced_single_format_id = only_id
|
|
forced_single_format_for_batch = True
|
|
debug(
|
|
f"Playlist format preflight: only one format available; using {forced_single_format_id} for all items"
|
|
)
|
|
except Exception:
|
|
forced_single_format_id = None
|
|
forced_single_format_for_batch = False
|
|
|
|
early_ret = self._maybe_show_format_table_for_single_url(
|
|
mode=mode,
|
|
clip_spec=clip_spec,
|
|
clip_values=clip_values,
|
|
playlist_items=playlist_items,
|
|
ytdl_format=ytdl_format,
|
|
supported_url=supported_url,
|
|
playlist_selection_handled=playlist_selection_handled,
|
|
ytdlp_tool=ytdlp_tool,
|
|
formats_cache=formats_cache,
|
|
storage=storage,
|
|
hydrus_available=hydrus_available,
|
|
final_output_dir=final_output_dir,
|
|
args=args,
|
|
)
|
|
if early_ret is not None:
|
|
return int(early_ret)
|
|
|
|
return self._download_supported_urls(
|
|
supported_url=supported_url,
|
|
ytdlp_tool=ytdlp_tool,
|
|
args=args,
|
|
config=config,
|
|
final_output_dir=final_output_dir,
|
|
mode=mode,
|
|
clip_spec=clip_spec,
|
|
clip_ranges=clip_ranges,
|
|
query_hash_override=query_hash_override,
|
|
embed_chapters=embed_chapters,
|
|
write_sub=write_sub,
|
|
quiet_mode=quiet_mode,
|
|
playlist_items=playlist_items,
|
|
ytdl_format=ytdl_format,
|
|
skip_per_url_preflight=skip_per_url_preflight,
|
|
forced_single_format_id=forced_single_format_id,
|
|
forced_single_format_for_batch=forced_single_format_for_batch,
|
|
formats_cache=formats_cache,
|
|
storage=storage,
|
|
hydrus_available=hydrus_available,
|
|
)
|
|
|
|
except Exception as e:
|
|
log(f"Error in streaming download handler: {e}", file=sys.stderr)
|
|
return 1
|
|
|
|
def _resolve_streaming_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]:
|
|
path_override = parsed.get("path")
|
|
if path_override:
|
|
try:
|
|
candidate = Path(str(path_override)).expanduser()
|
|
if candidate.suffix:
|
|
candidate = candidate.parent
|
|
candidate.mkdir(parents=True, exist_ok=True)
|
|
debug(f"Using output directory override: {candidate}")
|
|
return candidate
|
|
except Exception as e:
|
|
log(f"Invalid -path output directory: {e}", file=sys.stderr)
|
|
return None
|
|
|
|
try:
|
|
temp_value = (config or {}).get("temp") if isinstance(config, dict) else None
|
|
except Exception:
|
|
temp_value = None
|
|
if temp_value:
|
|
try:
|
|
candidate = Path(str(temp_value)).expanduser()
|
|
candidate.mkdir(parents=True, exist_ok=True)
|
|
debug(f"Using config temp directory: {candidate}")
|
|
return candidate
|
|
except Exception as e:
|
|
log(f"Cannot use configured temp directory '{temp_value}': {e}", file=sys.stderr)
|
|
return None
|
|
|
|
try:
|
|
import tempfile
|
|
|
|
candidate = Path(tempfile.gettempdir()) / "Medios-Macina"
|
|
candidate.mkdir(parents=True, exist_ok=True)
|
|
debug(f"Using OS temp directory: {candidate}")
|
|
return candidate
|
|
except Exception as e:
|
|
log(f"Cannot create OS temp directory: {e}", file=sys.stderr)
|
|
return None
|
|
|
|
def _parse_time_ranges(self, spec: str) -> List[tuple[int, int]]:
|
|
def _to_seconds(ts: str) -> Optional[int]:
|
|
ts = str(ts).strip()
|
|
if not ts:
|
|
return None
|
|
|
|
try:
|
|
unit_match = re.fullmatch(r"(?i)\s*(?:(?P<h>\d+)h)?\s*(?:(?P<m>\d+)m)?\s*(?:(?P<s>\d+(?:\.\d+)?)s)?\s*", ts)
|
|
except Exception:
|
|
unit_match = None
|
|
if unit_match and unit_match.group(0).strip() and any(unit_match.group(g) for g in ("h", "m", "s")):
|
|
try:
|
|
hours = int(unit_match.group("h") or 0)
|
|
minutes = int(unit_match.group("m") or 0)
|
|
seconds = float(unit_match.group("s") or 0)
|
|
total = (hours * 3600) + (minutes * 60) + seconds
|
|
return int(total)
|
|
except Exception:
|
|
return None
|
|
|
|
if ":" in ts:
|
|
parts = [p.strip() for p in ts.split(":")]
|
|
if len(parts) == 2:
|
|
hh_s = "0"
|
|
mm_s, ss_s = parts
|
|
elif len(parts) == 3:
|
|
hh_s, mm_s, ss_s = parts
|
|
else:
|
|
return None
|
|
|
|
try:
|
|
hours = int(hh_s)
|
|
minutes = int(mm_s)
|
|
seconds = float(ss_s)
|
|
total = (hours * 3600) + (minutes * 60) + seconds
|
|
return int(total)
|
|
except Exception:
|
|
return None
|
|
|
|
try:
|
|
return int(float(ts))
|
|
except Exception:
|
|
return None
|
|
|
|
ranges: List[tuple[int, int]] = []
|
|
if not spec:
|
|
return ranges
|
|
|
|
for piece in str(spec).split(","):
|
|
piece = piece.strip()
|
|
if not piece:
|
|
continue
|
|
if "-" not in piece:
|
|
return []
|
|
start_s, end_s = [p.strip() for p in piece.split("-", 1)]
|
|
start = _to_seconds(start_s)
|
|
end = _to_seconds(end_s)
|
|
if start is None or end is None or start >= end:
|
|
return []
|
|
ranges.append((start, end))
|
|
|
|
return ranges
|
|
|
|
@staticmethod
|
|
def _parse_keyed_csv_spec(spec: str, *, default_key: str) -> Dict[str, List[str]]:
|
|
out: Dict[str, List[str]] = {}
|
|
if not isinstance(spec, str):
|
|
spec = str(spec)
|
|
text = spec.strip()
|
|
if not text:
|
|
return out
|
|
|
|
active = (default_key or "").strip().lower() or "clip"
|
|
key_pattern = re.compile(r"^([A-Za-z_][A-Za-z0-9_-]*)\s*:\s*(.*)$")
|
|
|
|
for raw_piece in text.split(","):
|
|
piece = raw_piece.strip()
|
|
if not piece:
|
|
continue
|
|
|
|
m = key_pattern.match(piece)
|
|
if m:
|
|
active = (m.group(1) or "").strip().lower() or active
|
|
value = (m.group(2) or "").strip()
|
|
if value:
|
|
out.setdefault(active, []).append(value)
|
|
continue
|
|
|
|
out.setdefault(active, []).append(piece)
|
|
|
|
return out
|
|
|
|
def _build_clip_sections_spec(self, clip_ranges: Optional[List[tuple[int, int]]]) -> Optional[str]:
|
|
ranges: List[str] = []
|
|
if clip_ranges:
|
|
for start_s, end_s in clip_ranges:
|
|
ranges.append(f"{start_s}-{end_s}")
|
|
return ",".join(ranges) if ranges else None
|
|
|
|
def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]:
|
|
info: Dict[str, Any] = download_result.info if isinstance(download_result.info, dict) else {}
|
|
media_path = Path(download_result.path)
|
|
hash_value = download_result.hash_value or self._compute_file_hash(media_path)
|
|
title = info.get("title") or media_path.stem
|
|
tag = list(download_result.tag or [])
|
|
|
|
if title and f"title:{title}" not in tag:
|
|
tag.insert(0, f"title:{title}")
|
|
|
|
final_url = None
|
|
try:
|
|
page_url = info.get("webpage_url") or info.get("original_url") or info.get("url")
|
|
if page_url:
|
|
final_url = str(page_url)
|
|
except Exception:
|
|
final_url = None
|
|
if not final_url and url:
|
|
final_url = str(url)
|
|
|
|
return {
|
|
"path": str(media_path),
|
|
"hash": hash_value,
|
|
"title": title,
|
|
"url": final_url,
|
|
"tag": tag,
|
|
"action": "cmdlet:download-file",
|
|
"is_temp": True,
|
|
"ytdl_format": getattr(opts, "ytdl_format", None),
|
|
"store": getattr(opts, "storage_name", None) or getattr(opts, "storage_location", None) or "PATH",
|
|
"media_kind": "video" if opts.mode == "video" else "audio",
|
|
}
|
|
|
|
@staticmethod
|
|
def download_streaming_url_as_pipe_objects(
|
|
url: str,
|
|
config: Dict[str, Any],
|
|
*,
|
|
mode_hint: Optional[str] = None,
|
|
ytdl_format_hint: Optional[str] = None,
|
|
) -> List[Dict[str, Any]]:
|
|
"""Download a yt-dlp-supported URL and return PipeObject-style dict(s).
|
|
|
|
This is a lightweight helper intended for cmdlets that need to expand streaming URLs
|
|
into local files without re-implementing yt-dlp glue.
|
|
"""
|
|
url_str = str(url or "").strip()
|
|
if not url_str:
|
|
return []
|
|
|
|
if not is_url_supported_by_ytdlp(url_str):
|
|
return []
|
|
|
|
try:
|
|
from SYS.config import resolve_output_dir
|
|
|
|
out_dir = resolve_output_dir(config)
|
|
if out_dir is None:
|
|
return []
|
|
except Exception:
|
|
return []
|
|
|
|
cookies_path = None
|
|
try:
|
|
cookie_candidate = YtDlpTool(config).resolve_cookiefile()
|
|
if cookie_candidate is not None and cookie_candidate.is_file():
|
|
cookies_path = cookie_candidate
|
|
except Exception:
|
|
cookies_path = None
|
|
|
|
quiet_download = False
|
|
try:
|
|
quiet_download = bool((config or {}).get("_quiet_background_output"))
|
|
except Exception:
|
|
quiet_download = False
|
|
|
|
mode = str(mode_hint or "").strip().lower() if mode_hint else ""
|
|
if mode not in {"audio", "video"}:
|
|
mode = "video"
|
|
try:
|
|
cf = (
|
|
str(cookies_path)
|
|
if cookies_path is not None and cookies_path.is_file() else None
|
|
)
|
|
fmts_probe = list_formats(
|
|
url_str,
|
|
no_playlist=False,
|
|
playlist_items=None,
|
|
cookiefile=cf,
|
|
)
|
|
if isinstance(fmts_probe, list) and fmts_probe:
|
|
has_video = False
|
|
for f in fmts_probe:
|
|
if not isinstance(f, dict):
|
|
continue
|
|
vcodec = str(f.get("vcodec", "none") or "none").strip().lower()
|
|
if vcodec and vcodec != "none":
|
|
has_video = True
|
|
break
|
|
mode = "video" if has_video else "audio"
|
|
except Exception:
|
|
mode = "video"
|
|
|
|
fmt_hint = str(ytdl_format_hint).strip() if ytdl_format_hint else ""
|
|
chosen_format: Optional[str]
|
|
if fmt_hint:
|
|
chosen_format = fmt_hint
|
|
else:
|
|
chosen_format = None
|
|
if mode == "audio":
|
|
chosen_format = "bestaudio/best"
|
|
|
|
opts = DownloadOptions(
|
|
url=url_str,
|
|
mode=mode,
|
|
output_dir=Path(out_dir),
|
|
cookies_path=cookies_path,
|
|
ytdl_format=chosen_format,
|
|
quiet=quiet_download,
|
|
embed_chapters=True,
|
|
write_sub=True,
|
|
)
|
|
|
|
try:
|
|
result_obj = _download_with_timeout(opts, timeout_seconds=300)
|
|
except Exception as exc:
|
|
log(f"[download-file] Download failed for {url_str}: {exc}", file=sys.stderr)
|
|
return []
|
|
|
|
results: List[Any]
|
|
if isinstance(result_obj, list):
|
|
results = list(result_obj)
|
|
else:
|
|
paths = getattr(result_obj, "paths", None)
|
|
if isinstance(paths, list) and paths:
|
|
results = []
|
|
for p in paths:
|
|
try:
|
|
p_path = Path(p)
|
|
except Exception:
|
|
continue
|
|
if not p_path.exists() or p_path.is_dir():
|
|
continue
|
|
try:
|
|
hv = sha256_file(p_path)
|
|
except Exception:
|
|
hv = None
|
|
try:
|
|
results.append(
|
|
DownloadMediaResult(
|
|
path=p_path,
|
|
info=getattr(result_obj, "info", {}) or {},
|
|
tag=list(getattr(result_obj, "tag", []) or []),
|
|
source_url=getattr(result_obj, "source_url", None) or url_str,
|
|
hash_value=hv,
|
|
)
|
|
)
|
|
except Exception:
|
|
continue
|
|
else:
|
|
results = [result_obj]
|
|
|
|
out: List[Dict[str, Any]] = []
|
|
for downloaded in results:
|
|
try:
|
|
info = (
|
|
downloaded.info
|
|
if isinstance(getattr(downloaded, "info", None), dict) else {}
|
|
)
|
|
except Exception:
|
|
info = {}
|
|
|
|
try:
|
|
media_path = Path(str(getattr(downloaded, "path", "") or ""))
|
|
except Exception:
|
|
continue
|
|
if not media_path.exists() or media_path.is_dir():
|
|
continue
|
|
|
|
try:
|
|
hash_value = getattr(downloaded, "hash_value", None) or sha256_file(media_path)
|
|
except Exception:
|
|
hash_value = None
|
|
|
|
title = None
|
|
try:
|
|
title = info.get("title")
|
|
except Exception:
|
|
title = None
|
|
title = title or media_path.stem
|
|
|
|
tags = list(getattr(downloaded, "tag", []) or [])
|
|
if title and f"title:{title}" not in tags:
|
|
tags.insert(0, f"title:{title}")
|
|
|
|
final_url = None
|
|
try:
|
|
page_url = info.get("webpage_url") or info.get("original_url") or info.get("url")
|
|
if page_url:
|
|
final_url = str(page_url)
|
|
except Exception:
|
|
final_url = None
|
|
if not final_url:
|
|
final_url = url_str
|
|
|
|
po: Dict[str, Any] = {
|
|
"path": str(media_path),
|
|
"hash": hash_value,
|
|
"title": title,
|
|
"url": final_url,
|
|
"tag": tags,
|
|
"action": "cmdlet:download-file",
|
|
"is_temp": True,
|
|
"ytdl_format": getattr(opts, "ytdl_format", None),
|
|
"store": getattr(opts, "storage_name", None) or getattr(opts, "storage_location", None) or "PATH",
|
|
"media_kind": "video" if opts.mode == "video" else "audio",
|
|
}
|
|
|
|
try:
|
|
chapters_text = _format_chapters_note(info)
|
|
except Exception:
|
|
chapters_text = None
|
|
if chapters_text:
|
|
notes = po.get("notes")
|
|
if not isinstance(notes, dict):
|
|
notes = {}
|
|
notes.setdefault("chapters", chapters_text)
|
|
po["notes"] = notes
|
|
|
|
try:
|
|
sub_path = _best_subtitle_sidecar(media_path)
|
|
except Exception:
|
|
sub_path = None
|
|
if sub_path is not None:
|
|
sub_text = _read_text_file(sub_path)
|
|
if sub_text:
|
|
notes = po.get("notes")
|
|
if not isinstance(notes, dict):
|
|
notes = {}
|
|
notes["sub"] = sub_text
|
|
po["notes"] = notes
|
|
try:
|
|
sub_path.unlink()
|
|
except Exception:
|
|
pass
|
|
|
|
out.append(po)
|
|
|
|
return out
|
|
|
|
@staticmethod
|
|
def _normalise_hash_hex(value: Optional[str]) -> Optional[str]:
|
|
if not value or not isinstance(value, str):
|
|
return None
|
|
candidate = value.strip().lower()
|
|
if len(candidate) == 64 and all(c in "0123456789abcdef" for c in candidate):
|
|
return candidate
|
|
return None
|
|
|
|
@classmethod
|
|
def _extract_hash_from_search_hit(cls, hit: Any) -> Optional[str]:
|
|
if not isinstance(hit, dict):
|
|
return None
|
|
for key in ("hash", "hash_hex", "file_hash", "hydrus_hash"):
|
|
v = hit.get(key)
|
|
normalized = cls._normalise_hash_hex(str(v) if v is not None else None)
|
|
if normalized:
|
|
return normalized
|
|
return None
|
|
|
|
@classmethod
|
|
def _find_existing_hash_for_url(
|
|
cls, storage: Any, canonical_url: str, *, hydrus_available: bool
|
|
) -> Optional[str]:
|
|
if storage is None or not canonical_url:
|
|
return None
|
|
try:
|
|
from Store.HydrusNetwork import HydrusNetwork
|
|
except Exception:
|
|
HydrusNetwork = None # type: ignore
|
|
|
|
try:
|
|
backend_names = list(storage.list_searchable_backends() or [])
|
|
except Exception:
|
|
backend_names = []
|
|
|
|
for backend_name in backend_names:
|
|
try:
|
|
backend = storage[backend_name]
|
|
except Exception:
|
|
continue
|
|
try:
|
|
if str(backend_name).strip().lower() == "temp":
|
|
continue
|
|
except Exception:
|
|
pass
|
|
try:
|
|
if HydrusNetwork is not None and isinstance(backend, HydrusNetwork) and not hydrus_available:
|
|
continue
|
|
except Exception:
|
|
pass
|
|
|
|
try:
|
|
hits = backend.search(f"url:{canonical_url}", limit=5) or []
|
|
except Exception:
|
|
hits = []
|
|
for hit in hits:
|
|
extracted = cls._extract_hash_from_search_hit(hit)
|
|
if extracted:
|
|
return extracted
|
|
|
|
return None
|
|
|
|
@staticmethod
|
|
def _format_timecode(seconds: int, *, force_hours: bool) -> str:
|
|
total = max(0, int(seconds))
|
|
minutes, secs = divmod(total, 60)
|
|
hours, minutes = divmod(minutes, 60)
|
|
if force_hours:
|
|
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
|
|
return f"{minutes:02d}:{secs:02d}"
|
|
|
|
@classmethod
|
|
def _format_clip_range(cls, start_s: int, end_s: int) -> str:
|
|
force_hours = bool(start_s >= 3600 or end_s >= 3600)
|
|
return f"{cls._format_timecode(start_s, force_hours=force_hours)}-{cls._format_timecode(end_s, force_hours=force_hours)}"
|
|
|
|
@classmethod
|
|
def _apply_clip_decorations(
|
|
cls, pipe_objects: List[Dict[str, Any]], clip_ranges: List[tuple[int, int]], *, source_king_hash: Optional[str]
|
|
) -> None:
|
|
if not pipe_objects or len(pipe_objects) != len(clip_ranges):
|
|
return
|
|
|
|
for po, (start_s, end_s) in zip(pipe_objects, clip_ranges):
|
|
clip_range = cls._format_clip_range(start_s, end_s)
|
|
clip_tag = f"clip:{clip_range}"
|
|
|
|
po["title"] = clip_tag
|
|
|
|
tags = po.get("tag")
|
|
if not isinstance(tags, list):
|
|
tags = []
|
|
|
|
tags = [t for t in tags if not str(t).strip().lower().startswith("title:")]
|
|
tags = [t for t in tags if not str(t).strip().lower().startswith("relationship:")]
|
|
tags.insert(0, f"title:{clip_tag}")
|
|
|
|
if clip_tag not in tags:
|
|
tags.append(clip_tag)
|
|
|
|
po["tag"] = tags
|
|
|
|
if len(pipe_objects) < 2:
|
|
return
|
|
|
|
hashes: List[str] = []
|
|
for po in pipe_objects:
|
|
h_val = cls._normalise_hash_hex(str(po.get("hash") or ""))
|
|
hashes.append(h_val or "")
|
|
|
|
king_hash = cls._normalise_hash_hex(source_king_hash) if source_king_hash else None
|
|
if not king_hash:
|
|
king_hash = hashes[0] if hashes and hashes[0] else None
|
|
if not king_hash:
|
|
return
|
|
|
|
alt_hashes: List[str] = [h for h in hashes if h and h != king_hash]
|
|
if not alt_hashes:
|
|
return
|
|
|
|
for po in pipe_objects:
|
|
po["relationships"] = {"king": [king_hash], "alt": list(alt_hashes)}
|
|
|
|
def _run_impl(
|
|
self,
|
|
result: Any,
|
|
args: Sequence[str],
|
|
config: Dict[str,
|
|
Any]
|
|
) -> int:
|
|
"""Main download implementation for direct HTTP files."""
|
|
progress = PipelineProgress(pipeline_context)
|
|
prev_progress = None
|
|
had_progress_key = False
|
|
try:
|
|
debug("Starting download-file")
|
|
|
|
# Allow providers to tap into the active PipelineProgress (optional).
|
|
try:
|
|
if isinstance(config, dict):
|
|
had_progress_key = "_pipeline_progress" in config
|
|
prev_progress = config.get("_pipeline_progress")
|
|
config["_pipeline_progress"] = progress
|
|
except Exception:
|
|
pass
|
|
|
|
# Parse arguments
|
|
parsed = parse_cmdlet_args(args, self)
|
|
|
|
raw_url = self._normalize_urls(parsed)
|
|
raw_url = self._rewrite_archive_org_urls(raw_url)
|
|
piped_items = self._collect_piped_items_if_no_urls(result, raw_url)
|
|
|
|
had_piped_input = False
|
|
try:
|
|
if isinstance(result, list):
|
|
had_piped_input = bool(result)
|
|
else:
|
|
had_piped_input = bool(result)
|
|
except Exception:
|
|
had_piped_input = False
|
|
|
|
# UX: In piped mode, allow a single positional arg to be the destination directory.
|
|
# Example: @1-4 | download-file "C:\\Users\\Me\\Downloads\\yoyo"
|
|
if (had_piped_input and raw_url and len(raw_url) == 1
|
|
and (not parsed.get("path")) and (not parsed.get("output"))):
|
|
candidate = str(raw_url[0] or "").strip()
|
|
low = candidate.lower()
|
|
looks_like_url = low.startswith(("http://", "https://", "ftp://"))
|
|
looks_like_provider = low.startswith(
|
|
("magnet:",
|
|
"alldebrid:",
|
|
"hydrus:",
|
|
"ia:",
|
|
"internetarchive:")
|
|
)
|
|
looks_like_windows_path = (
|
|
(len(candidate) >= 2 and candidate[1] == ":")
|
|
or candidate.startswith("\\\\") or candidate.startswith("\\")
|
|
or candidate.endswith(("\\",
|
|
"/"))
|
|
)
|
|
if (not looks_like_url) and (
|
|
not looks_like_provider) and looks_like_windows_path:
|
|
parsed["path"] = candidate
|
|
raw_url = []
|
|
piped_items = self._collect_piped_items_if_no_urls(result, raw_url)
|
|
|
|
if not raw_url and not piped_items:
|
|
log("No url or piped items to download", file=sys.stderr)
|
|
return 1
|
|
|
|
# Internet Archive details URLs should present a downloadable file picker
|
|
# before we try any streaming/ytdlp probing.
|
|
try:
|
|
quiet_mode = (
|
|
bool(config.get("_quiet_background_output"))
|
|
if isinstance(config, dict) else False
|
|
)
|
|
except Exception:
|
|
quiet_mode = False
|
|
ia_picker_exit = ia_provider.maybe_show_formats_table(
|
|
raw_urls=raw_url,
|
|
piped_items=piped_items,
|
|
parsed=parsed,
|
|
config=config,
|
|
quiet_mode=quiet_mode,
|
|
get_field=get_field,
|
|
)
|
|
if ia_picker_exit is not None:
|
|
return int(ia_picker_exit)
|
|
|
|
streaming_candidates = self._append_urls_from_piped_result(list(raw_url), result)
|
|
supported_streaming, unsupported_streaming = self._filter_supported_urls(streaming_candidates)
|
|
|
|
streaming_exit_code: Optional[int] = None
|
|
streaming_downloaded = 0
|
|
if supported_streaming:
|
|
streaming_exit_code = self._run_streaming_urls(
|
|
streaming_urls=supported_streaming,
|
|
args=args,
|
|
config=config,
|
|
parsed=parsed,
|
|
)
|
|
if streaming_exit_code == 0:
|
|
streaming_downloaded += 1
|
|
# Only remove URLs from further processing when streaming succeeded.
|
|
raw_url = [u for u in raw_url if u not in supported_streaming]
|
|
if not raw_url and not unsupported_streaming:
|
|
piped_items = []
|
|
|
|
if not raw_url and not piped_items:
|
|
return int(streaming_exit_code or 0)
|
|
|
|
quiet_mode = (
|
|
bool(config.get("_quiet_background_output"))
|
|
if isinstance(config,
|
|
dict) else False
|
|
)
|
|
ia_picker_exit = ia_provider.maybe_show_formats_table(
|
|
raw_urls=raw_url,
|
|
piped_items=piped_items,
|
|
parsed=parsed,
|
|
config=config,
|
|
quiet_mode=quiet_mode,
|
|
get_field=get_field,
|
|
)
|
|
if ia_picker_exit is not None:
|
|
return int(ia_picker_exit)
|
|
|
|
# Get output directory
|
|
final_output_dir = self._resolve_output_dir(parsed, config)
|
|
if not final_output_dir:
|
|
return 1
|
|
|
|
debug(f"Output directory: {final_output_dir}")
|
|
|
|
# If the caller isn't running the shared pipeline Live progress UI (e.g. direct
|
|
# cmdlet execution), start a minimal local pipeline progress panel so downloads
|
|
# show consistent, Rich-formatted progress (like download-media).
|
|
total_items = self._safe_total_items(raw_url, piped_items)
|
|
preview = self._build_preview(raw_url, piped_items, total_items)
|
|
|
|
progress.ensure_local_ui(
|
|
label="download-file",
|
|
total_items=total_items,
|
|
items_preview=preview
|
|
)
|
|
|
|
registry = self._load_provider_registry()
|
|
|
|
downloaded_count = 0
|
|
urls_downloaded, early_exit = self._process_explicit_urls(
|
|
raw_urls=raw_url,
|
|
final_output_dir=final_output_dir,
|
|
config=config,
|
|
quiet_mode=quiet_mode,
|
|
registry=registry,
|
|
progress=progress,
|
|
)
|
|
downloaded_count += int(urls_downloaded)
|
|
if early_exit is not None:
|
|
return int(early_exit)
|
|
|
|
provider_downloaded, magnet_submissions = self._process_provider_items(
|
|
piped_items=piped_items,
|
|
final_output_dir=final_output_dir,
|
|
config=config,
|
|
quiet_mode=quiet_mode,
|
|
registry=registry,
|
|
progress=progress,
|
|
)
|
|
downloaded_count += provider_downloaded
|
|
|
|
if downloaded_count > 0 or streaming_downloaded > 0 or magnet_submissions > 0:
|
|
msg = f"✓ Successfully processed {downloaded_count} file(s)"
|
|
if magnet_submissions:
|
|
msg += f" and queued {magnet_submissions} magnet(s)"
|
|
debug(msg)
|
|
return 0
|
|
|
|
if streaming_exit_code is not None:
|
|
return int(streaming_exit_code)
|
|
|
|
log("No downloads completed", file=sys.stderr)
|
|
return 1
|
|
|
|
except Exception as e:
|
|
log(f"Error in download-file: {e}", file=sys.stderr)
|
|
return 1
|
|
|
|
finally:
|
|
try:
|
|
if isinstance(config, dict):
|
|
if had_progress_key:
|
|
config["_pipeline_progress"] = prev_progress
|
|
else:
|
|
config.pop("_pipeline_progress", None)
|
|
except Exception:
|
|
pass
|
|
progress.close_local_ui(force_complete=True)
|
|
|
|
def _resolve_output_dir(self,
|
|
parsed: Dict[str,
|
|
Any],
|
|
config: Dict[str,
|
|
Any]) -> Optional[Path]:
|
|
"""Resolve the output directory from storage location or config."""
|
|
output_dir_arg = parsed.get("path") or parsed.get("output")
|
|
if output_dir_arg:
|
|
try:
|
|
out_path = Path(str(output_dir_arg)).expanduser()
|
|
out_path.mkdir(parents=True, exist_ok=True)
|
|
return out_path
|
|
except Exception as e:
|
|
log(
|
|
f"Cannot use output directory {output_dir_arg}: {e}",
|
|
file=sys.stderr
|
|
)
|
|
return None
|
|
|
|
storage_location = parsed.get("storage")
|
|
|
|
# Priority 1: --storage flag
|
|
if storage_location:
|
|
try:
|
|
return SharedArgs.resolve_storage(storage_location)
|
|
except Exception as e:
|
|
log(f"Invalid storage location: {e}", file=sys.stderr)
|
|
return None
|
|
|
|
# Priority 2: Config default output/temp directory
|
|
try:
|
|
from SYS.config import resolve_output_dir
|
|
|
|
final_output_dir = resolve_output_dir(config)
|
|
except Exception:
|
|
final_output_dir = Path.home() / "Downloads"
|
|
|
|
debug(f"Using default directory: {final_output_dir}")
|
|
|
|
# Ensure directory exists
|
|
try:
|
|
final_output_dir.mkdir(parents=True, exist_ok=True)
|
|
except Exception as e:
|
|
log(
|
|
f"Cannot create output directory {final_output_dir}: {e}",
|
|
file=sys.stderr
|
|
)
|
|
return None
|
|
|
|
return final_output_dir
|
|
|
|
def _compute_file_hash(self, filepath: Path) -> str:
|
|
"""Compute SHA256 hash of a file."""
|
|
import hashlib
|
|
|
|
sha256_hash = hashlib.sha256()
|
|
with open(filepath, "rb") as f:
|
|
for byte_block in iter(lambda: f.read(4096), b""):
|
|
sha256_hash.update(byte_block)
|
|
return sha256_hash.hexdigest()
|
|
|
|
|
|
# Module-level singleton registration
|
|
CMDLET = Download_File()
|