diff --git a/API/Tidal.py b/API/Tidal.py
index aba691c..bf03473 100644
--- a/API/Tidal.py
+++ b/API/Tidal.py
@@ -3,6 +3,7 @@ from __future__ import annotations
from typing import Any, Dict, List, Optional, Set
from .base import API, ApiError
+from SYS.logger import debug
DEFAULT_BASE_URL = "https://tidal-api.binimum.org"
@@ -241,21 +242,24 @@ class Tidal(API):
# 1. Fetch info (metadata) - fetch raw to ensure all fields are available for merging
info_resp = self._get_json("info/", params={"id": track_int})
+ debug(f"[API.Tidal] info_resp (len={len(str(info_resp))}): {info_resp}")
info_data = info_resp.get("data") if isinstance(info_resp, dict) else info_resp
if not isinstance(info_data, dict) or "id" not in info_data:
info_data = info_resp if isinstance(info_resp, dict) and "id" in info_resp else {}
# 2. Fetch track (manifest/bit depth)
track_resp = self.track(track_id)
+ debug(f"[API.Tidal] track_resp (len={len(str(track_resp))}): {track_resp}")
# Note: track() method in this class currently returns raw JSON, so we handle it similarly.
track_data = track_resp.get("data") if isinstance(track_resp, dict) else track_resp
- if not isinstance(track_data, dict) or "id" not in track_data:
- track_data = track_resp if isinstance(track_resp, dict) and "id" in track_resp else {}
+ if not isinstance(track_data, dict):
+ track_data = track_resp if isinstance(track_resp, dict) else {}
# 3. Fetch lyrics
lyrics_data = {}
try:
lyr_resp = self.lyrics(track_id)
+ debug(f"[API.Tidal] lyrics_resp (len={len(str(lyr_resp))}): {lyr_resp}")
lyrics_data = lyr_resp.get("lyrics") or lyr_resp if isinstance(lyr_resp, dict) else {}
except Exception:
pass
@@ -267,18 +271,24 @@ class Tidal(API):
if isinstance(track_data, dict):
merged_md.update(track_data)
+ debug(f"[API.Tidal] merged_md keys: {list(merged_md.keys())}")
+
# Derived tags and normalized/parsed info
tags = build_track_tags(merged_md)
+ debug(f"[API.Tidal] generated tags: {tags}")
parsed_info = parse_track_item(merged_md)
# Structure for return
- return {
+ res = {
"metadata": merged_md,
"parsed": parsed_info,
"tags": list(tags),
"lyrics": lyrics_data,
}
+ debug(f"[API.Tidal] returning full_track_metadata keys: {list(res.keys())}")
+ return res
# Legacy alias for TidalApiClient
TidalApiClient = Tidal
+HifiApiClient = Tidal
diff --git a/CLI.py b/CLI.py
index f3e6154..125b20e 100644
--- a/CLI.py
+++ b/CLI.py
@@ -3731,18 +3731,32 @@ class PipelineExecutor:
if emits:
try:
from cmdlet import _shared as sh
+ from SYS import models
+ # 1. Apply -path persistence (moves temp files to final destination)
emits = sh.apply_output_path_from_pipeobjects(
cmd_name=cmd_name,
args=list(stage_args),
emits=emits,
)
+
+ # 2. METADATA STICKINESS / PROPAGATION
+ # We normalize all emitted items and merge metadata/tags from the previous stage.
+ # This ensures info like track titles/lyrics survive downloads/conversions.
+ # See cmdlet._shared.propagate_metadata for the merge logic.
+ prev_items = piped_result
+ if not isinstance(prev_items, (list, tuple)):
+ prev_items = [prev_items] if prev_items else []
+
+ emits = sh.propagate_metadata(prev_items, emits)
+
try:
pipeline_ctx.emits = list(emits)
except Exception:
pass
except Exception:
pass
+
if emits:
# If the cmdlet already installed an overlay table (e.g. get-tag),
# don't overwrite it: set_last_result_items_only() would clear the
diff --git a/Provider/HIFI.py b/Provider/HIFI.py
index 23ebe79..7b6795f 100644
--- a/Provider/HIFI.py
+++ b/Provider/HIFI.py
@@ -13,13 +13,15 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple
from urllib.parse import urlparse
from API.Tidal import (
- HifiApiClient,
+ Tidal as TidalApiClient,
build_track_tags,
coerce_duration_seconds,
extract_artists,
stringify,
)
from ProviderCore.base import Provider, SearchResult, parse_inline_query_arguments
+from ProviderCore.inline_utils import collect_choice
+from cmdlet._shared import get_field
from SYS import pipeline as pipeline_context
from SYS.logger import debug, log
@@ -64,7 +66,9 @@ def _format_total_seconds(seconds: Any) -> str:
return f"{mins}:{secs:02d}"
-class Tidal(Provider):
+class HIFI(Provider):
+
+ PROVIDER_NAME = "hifi"
TABLE_AUTO_STAGES = {
"hifi.track": ["download-file"],
@@ -97,7 +101,7 @@ class Tidal(Provider):
self.api_timeout = float(self.config.get("timeout", 10.0))
except Exception:
self.api_timeout = 10.0
- self.api_clients = [HifiApiClient(base_url=url, timeout=self.api_timeout) for url in self.api_urls]
+ self.api_clients = [TidalApiClient(base_url=url, timeout=self.api_timeout) for url in self.api_urls]
def extract_query_arguments(self, query: str) -> Tuple[str, Dict[str, Any]]:
normalized, parsed = parse_inline_query_arguments(query)
@@ -281,7 +285,7 @@ class Tidal(Provider):
if isinstance(detail, dict):
title = self._stringify(detail.get("title")) or title
- return SearchResult(
+ res = SearchResult(
table="hifi.track",
title=title,
path=f"hifi://track/{track_id}",
@@ -291,6 +295,12 @@ class Tidal(Provider):
full_metadata=dict(detail) if isinstance(detail, dict) else {},
selection_args=["-url", f"hifi://track/{track_id}"],
)
+ if isinstance(detail, dict):
+ try:
+ res.tag = self._build_track_tags(detail)
+ except Exception:
+ pass
+ return res
def _extract_artist_selection_context(self, selected_items: List[Any]) -> List[Tuple[int, str]]:
contexts: List[Tuple[int, str]] = []
@@ -1130,25 +1140,36 @@ class Tidal(Provider):
md = dict(getattr(result, "full_metadata") or {})
track_id = self._extract_track_id_from_result(result)
- if track_id:
+ debug(f"[hifi] download: track_id={track_id}, manifest_present={bool(md.get('manifest'))}, tag_count={len(result.tag) if result.tag else 0}")
+
+ # Enrichment: fetch full metadata if manifest or detailed info (like tags/lyrics) is missing.
+ # We check for 'manifest' because it's required for DASH playback.
+ # We also check for lyrics/subtitles to ensure they are available for add-file.
+ has_lyrics = bool(md.get("_tidal_lyrics_subtitles")) or bool(md.get("lyrics"))
+
+ if track_id and (not md.get("manifest") or not md.get("artist") or len(result.tag or []) <= 1 or not has_lyrics):
+ debug(f"[hifi] Enriching track data (reason: manifest={not md.get('manifest')}, lyrics={not has_lyrics}, tags={len(result.tag or [])})")
# Multi-part enrichment from API: metadata, tags, and lyrics.
full_data = self._fetch_all_track_data(track_id)
+ debug(f"[hifi] download: enrichment full_data present={bool(full_data)}")
if isinstance(full_data, dict):
# 1. Update metadata
api_md = full_data.get("metadata")
if isinstance(api_md, dict):
+ debug(f"[hifi] download: updating metadata with {len(api_md)} keys")
md.update(api_md)
# 2. Update tags (re-sync result.tag so cmdlet sees them)
api_tags = full_data.get("tags")
+ debug(f"[hifi] download: enrichment tags={api_tags}")
if isinstance(api_tags, list) and api_tags:
result.tag = set(api_tags)
# 3. Handle lyrics
- lyrics = full_data.get("lyrics")
- if isinstance(lyrics, dict) and lyrics:
- md.setdefault("lyrics", lyrics)
- subtitles = lyrics.get("subtitles")
+ lyrics_dict = full_data.get("lyrics")
+ if isinstance(lyrics_dict, dict) and lyrics_dict:
+ md.setdefault("lyrics", lyrics_dict)
+ subtitles = lyrics_dict.get("subtitles")
if isinstance(subtitles, str) and subtitles.strip():
md["_tidal_lyrics_subtitles"] = subtitles.strip()
@@ -1328,7 +1349,7 @@ class Tidal(Provider):
return False, None
- def _get_api_client_for_base(self, base_url: str) -> Optional[HifiApiClient]:
+ def _get_api_client_for_base(self, base_url: str) -> Optional[TidalApiClient]:
base = base_url.rstrip("/")
for client in self.api_clients:
if getattr(client, "base_url", "").rstrip("/") == base:
@@ -1739,6 +1760,10 @@ class Tidal(Provider):
or payload.get("path")
or payload.get("url")
)
+ # Guard against method binding (e.g. str.title) being returned by getattr(str, "title")
+ if callable(title):
+ title = None
+
if not title:
title = f"Track {track_id}"
path = (
@@ -1983,12 +2008,6 @@ class Tidal(Provider):
return True
- # Optimization: If we are selecting tracks, do NOT force a "Detail View" (resolving manifest) here.
- # This allows batch selection to flow immediately to `download-file` (via TABLE_AUTO_STAGES)
- # or other downstream cmdlets. The download logic (HIFI.download) handles manifest resolution locally.
- if table_type == "hifi.track" or (is_generic_hifi and any(str(get_field(i, "path")).startswith("hifi://track/") for i in selected_items)):
- return False
-
contexts = self._extract_track_selection_context(selected_items)
try:
debug(f"[hifi.selector] track contexts={len(contexts)}")
diff --git a/Provider/internetarchive.py b/Provider/internetarchive.py
index 6dbbba5..d92358e 100644
--- a/Provider/internetarchive.py
+++ b/Provider/internetarchive.py
@@ -501,6 +501,26 @@ class InternetArchive(Provider):
"internetarchive.formats": ["download-file"],
}
+ def maybe_show_picker(
+ self,
+ *,
+ url: str,
+ item: Optional[Any] = None,
+ parsed: Dict[str, Any],
+ config: Dict[str, Any],
+ quiet_mode: bool,
+ ) -> Optional[int]:
+ """Generic hook for download-file to show a selection table for IA items."""
+ from cmdlet._shared import get_field as sh_get_field
+ return maybe_show_formats_table(
+ raw_urls=[url] if url else [],
+ piped_items=[item] if item else [],
+ parsed=parsed,
+ config=config,
+ quiet_mode=quiet_mode,
+ get_field=sh_get_field,
+ )
+
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
conf = _pick_provider_config(self.config)
diff --git a/cmdlet/_shared.py b/cmdlet/_shared.py
index 67e55df..0a47cee 100644
--- a/cmdlet/_shared.py
+++ b/cmdlet/_shared.py
@@ -11,11 +11,16 @@ import sys
import tempfile
from collections.abc import Iterable as IterableABC
-from SYS.logger import log
+from SYS.logger import log, debug
from pathlib import Path
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set
from dataclasses import dataclass, field
from SYS import models
+from SYS import pipeline as pipeline_context
+from SYS.result_table import ResultTable
+from SYS.rich_display import stderr_console as get_stderr_console
+from rich.prompt import Confirm
+from contextlib import AbstractContextManager, nullcontext
@dataclass
@@ -2405,6 +2410,117 @@ def coerce_to_pipe_object(
return pipe_obj
+def propagate_metadata(
+ previous_items: Sequence[Any],
+ new_items: Sequence[Any]
+) -> List[Any]:
+ """Merge metadata/tags from previous pipeline stage into new items.
+
+ Implements "sticky metadata": items generated by a transformation (download, convert)
+ should inherit rich info (lyrics, art, tags) from their source.
+
+ Strategies:
+ A. Hash Match: If inputs/outputs share a hash, they are the same item.
+ B. Index Match: If lists are same length, assume 1:1 mapping (heuristic).
+ C. Explicit Parent: If output has `parent_hash`, link to input with that hash.
+ """
+ if not previous_items or not new_items:
+ return list(new_items)
+
+ try:
+ prev_normalized = [coerce_to_pipe_object(p) for p in previous_items]
+ except Exception:
+ return list(new_items)
+
+ prev_by_hash: Dict[str, models.PipeObject] = {}
+ for p_obj in prev_normalized:
+ if p_obj.hash and p_obj.hash != "unknown":
+ prev_by_hash[p_obj.hash] = p_obj
+
+ normalized: List[models.PipeObject] = []
+
+ # Pre-calculate length matching for heuristic
+ is_same_length = len(new_items) == len(prev_normalized)
+
+ for i, item in enumerate(new_items):
+ try:
+ obj = coerce_to_pipe_object(item)
+ except Exception:
+ normalized.append(item) # Should not happen given coerce guards
+ continue
+
+ parent: Optional[models.PipeObject] = None
+
+ # Strategy A: Precise Hash Match
+ if obj.hash in prev_by_hash:
+ parent = prev_by_hash[obj.hash]
+
+ # Strategy B: Index Match (Heuristic)
+ if not parent and is_same_length:
+ parent = prev_normalized[i]
+
+ # Strategy C: Explicit Parent Hash
+ if not parent and obj.parent_hash and obj.parent_hash in prev_by_hash:
+ parent = prev_by_hash[obj.parent_hash]
+
+ if parent:
+ # 1. Tags: Merge unique tags
+ if parent.tag:
+ if not obj.tag:
+ obj.tag = list(parent.tag)
+ else:
+ curr_tags = {str(t).lower() for t in obj.tag}
+ for pt in parent.tag:
+ if str(pt).lower() not in curr_tags:
+ obj.tag.append(pt)
+
+ # 2. Metadata: Merge missing keys
+ if parent.metadata:
+ if not obj.metadata:
+ obj.metadata = parent.metadata.copy()
+ else:
+ for mk, mv in parent.metadata.items():
+ if mk not in obj.metadata:
+ obj.metadata[mk] = mv
+
+ # 3. Source URL: Propagate if missing
+ if parent.source_url and not obj.source_url:
+ obj.source_url = parent.source_url
+ elif parent.url and not obj.source_url and not obj.url:
+ # If parent had a URL and child has none, it's likely the source
+ obj.source_url = parent.url
+
+ # 4. Relationships: Merge missing keys
+ if parent.relationships:
+ if not obj.relationships:
+ obj.relationships = parent.relationships.copy()
+ else:
+ for rk, rv in parent.relationships.items():
+ if rk not in obj.relationships:
+ obj.relationships[rk] = rv
+
+ # 5. Extra (Notes/etc): Merge missing keys
+ # Important for passing 'notes' payload (lyrics, captions)
+ if parent.extra:
+ if not obj.extra:
+ obj.extra = parent.extra.copy()
+ else:
+ # Recursive merge for 'notes' dict specifically?
+ # For now just shallow merge keys, but handle 'notes' specially if valid.
+ for ek, ev in parent.extra.items():
+ if ek not in obj.extra:
+ obj.extra[ek] = ev
+ elif ek == "notes" and isinstance(ev, dict) and isinstance(obj.extra[ek], dict):
+ # Merge notes dict
+ for nk, nv in ev.items():
+ if nk not in obj.extra[ek]:
+ obj.extra[ek][nk] = nv
+
+ normalized.append(obj)
+
+ return normalized
+
+
def register_url_with_local_library(
pipe_obj: models.PipeObject,
config: Dict[str,
@@ -2518,12 +2634,12 @@ def resolve_tidal_manifest_path(item: Any) -> Optional[str]:
if candidate_path:
m = re.search(
- r"tidal:(?://)?track[\\/](\d+)",
+ r"(tidal|hifi):(?://)?track[\\/](\d+)",
str(candidate_path),
flags=re.IGNORECASE,
)
if m:
- track_id = m.group(1)
+ track_id = m.group(2)
if (not already) and track_id is not None:
try:
@@ -2706,3 +2822,327 @@ def resolve_tidal_manifest_path(item: Any) -> Optional[str]:
return None
return str(target_path)
+
+def check_url_exists_in_storage(
+ urls: Sequence[str],
+ storage: Any,
+ hydrus_available: bool,
+ final_output_dir: Optional[Path] = None,
+) -> bool:
+ """Pre-flight check to see if URLs already exist in storage.
+
+ Args:
+ urls: List of URLs to check
+ storage: The storage interface
+ hydrus_available: Whether Hydrus is available
+ final_output_dir: Final output directory (to skip if same as storage)
+
+ Returns:
+ True if check passed (user said yes or no dups), False if user said no (stop).
+ """
+ if storage is None:
+ debug("Bulk URL preflight skipped: storage unavailable")
+ return True
+
+ try:
+ current_cmd_text = pipeline_context.get_current_command_text("")
+ except Exception:
+ current_cmd_text = ""
+
+ try:
+ stage_ctx = pipeline_context.get_stage_context()
+ except Exception:
+ stage_ctx = None
+
+ in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or "")))
+ if in_pipeline:
+ try:
+ cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="")
+ cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None)
+ except Exception:
+ cached_cmd = ""
+ cached_decision = None
+
+ if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""):
+ if bool(cached_decision):
+ return True
+ try:
+ pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
+ except Exception:
+ pass
+ return False
+
+ unique_urls: List[str] = []
+ for u in urls or []:
+ s = str(u or "").strip()
+ if s and s not in unique_urls:
+ unique_urls.append(s)
+ if len(unique_urls) == 0:
+ return True
+
+ try:
+ from SYS.metadata import normalize_urls
+ except Exception:
+ normalize_urls = None # type: ignore[assignment]
+
+ def _httpish(value: str) -> bool:
+ try:
+ return bool(value) and (value.startswith("http://") or value.startswith("https://"))
+ except Exception:
+ return False
+
+ url_needles: Dict[str, List[str]] = {}
+ for u in unique_urls:
+ needles: List[str] = []
+ if normalize_urls is not None:
+ try:
+ needles.extend([n for n in (normalize_urls(u) or []) if isinstance(n, str)])
+ except Exception:
+ needles = []
+ if not needles:
+ needles = [u]
+ filtered: List[str] = []
+ for n in needles:
+ n2 = str(n or "").strip()
+ if not n2:
+ continue
+ if not _httpish(n2):
+ continue
+ if n2 not in filtered:
+ filtered.append(n2)
+ url_needles[u] = filtered if filtered else [u]
+
+ backend_names: List[str] = []
+ try:
+ backend_names_all = storage.list_searchable_backends()
+ except Exception:
+ backend_names_all = []
+
+ for backend_name in backend_names_all:
+ try:
+ backend = storage[backend_name]
+ except Exception:
+ continue
+
+ try:
+ if str(backend_name).strip().lower() == "temp":
+ continue
+ except Exception:
+ pass
+
+ try:
+ backend_location = getattr(backend, "_location", None)
+ if backend_location and final_output_dir:
+ backend_path = Path(str(backend_location)).expanduser().resolve()
+ temp_path = Path(str(final_output_dir)).expanduser().resolve()
+ if backend_path == temp_path:
+ continue
+ except Exception:
+ pass
+
+ backend_names.append(backend_name)
+
+ if not backend_names:
+ debug("Bulk URL preflight skipped: no searchable backends")
+ return True
+
+ seen_pairs: set[tuple[str, str]] = set()
+ matched_urls: set[str] = set()
+ match_rows: List[Dict[str, Any]] = []
+ max_rows = 200
+
+ try:
+ from Store.HydrusNetwork import HydrusNetwork
+ except Exception:
+ HydrusNetwork = None # type: ignore
+
+ for backend_name in backend_names:
+ if len(match_rows) >= max_rows:
+ break
+ try:
+ backend = storage[backend_name]
+ except Exception:
+ continue
+
+ if HydrusNetwork is not None and isinstance(backend, HydrusNetwork):
+ if not hydrus_available:
+ continue
+
+ client = getattr(backend, "_client", None)
+ if client is None:
+ continue
+
+ for original_url, needles in url_needles.items():
+ if len(match_rows) >= max_rows:
+ break
+ if (original_url, str(backend_name)) in seen_pairs:
+ continue
+
+ found_hash: Optional[str] = None
+ found = False
+ for needle in (needles or [])[:3]:
+ if not _httpish(needle):
+ continue
+ try:
+ from API.HydrusNetwork import HydrusRequestSpec
+
+ spec = HydrusRequestSpec(
+ method="GET",
+ endpoint="/add_urls/get_url_files",
+ query={"url": needle},
+ )
+ # Access internal client safely if possible, else skip check
+ if hasattr(client, "_perform_request"):
+ response = client._perform_request(spec)
+ raw_hashes = None
+ if isinstance(response, dict):
+ raw_hashes = response.get("hashes") or response.get("file_hashes")
+ raw_ids = response.get("file_ids")
+ has_ids = isinstance(raw_ids, list) and len(raw_ids) > 0
+ has_hashes = isinstance(raw_hashes, list) and len(raw_hashes) > 0
+ if has_hashes:
+ try:
+ found_hash = str(raw_hashes[0]).strip()
+ except Exception:
+ found_hash = None
+ if has_ids or has_hashes:
+ found = True
+ break
+ except Exception:
+ continue
+
+ if not found:
+ continue
+
+ seen_pairs.add((original_url, str(backend_name)))
+ matched_urls.add(original_url)
+ display_row = {
+ "title": "(exists)",
+ "store": str(backend_name),
+ "hash": found_hash or "",
+ "url": original_url,
+ "columns": [
+ ("Title", "(exists)"),
+ ("Store", str(backend_name)),
+ ("Hash", found_hash or ""),
+ ("URL", original_url),
+ ],
+ }
+ match_rows.append(display_row)
+ continue
+
+ for original_url, needles in url_needles.items():
+ if len(match_rows) >= max_rows:
+ break
+ if (original_url, str(backend_name)) in seen_pairs:
+ continue
+
+ backend_hits: List[Dict[str, Any]] = []
+ for needle in (needles or [])[:3]:
+ try:
+ backend_hits = backend.search(f"url:{needle}", limit=1) or []
+ if backend_hits:
+ break
+ except Exception:
+ continue
+
+ if not backend_hits:
+ continue
+
+ seen_pairs.add((original_url, str(backend_name)))
+ matched_urls.add(original_url)
+ hit = backend_hits[0]
+ title = hit.get("title") or hit.get("name") or hit.get("target") or hit.get("path") or "(exists)"
+ file_hash = hit.get("hash") or hit.get("file_hash") or hit.get("sha256") or ""
+
+ try:
+ from SYS.result_table import build_display_row
+ extracted = build_display_row(hit, keys=["title", "store", "hash", "ext", "size"])
+ except Exception:
+ extracted = {}
+
+ extracted["title"] = str(title)
+ extracted["store"] = str(hit.get("store") or backend_name)
+ extracted["hash"] = str(file_hash or "")
+
+ ext = extracted.get("ext")
+ size_val = extracted.get("size")
+
+ display_row = {
+ "title": str(title),
+ "store": str(hit.get("store") or backend_name),
+ "hash": str(file_hash or ""),
+ "ext": str(ext or ""),
+ "size": size_val,
+ "url": original_url,
+ "columns": [
+ ("Title", str(title)),
+ ("Store", str(hit.get("store") or backend_name)),
+ ("Hash", str(file_hash or "")),
+ ("Ext", str(ext or "")),
+ ("Size", size_val),
+ ("URL", original_url),
+ ],
+ }
+ match_rows.append(display_row)
+
+ if not match_rows:
+ debug("Bulk URL preflight: no matches")
+ return True
+
+ table = ResultTable(f"URL already exists ({len(matched_urls)} url(s))", max_columns=10)
+ table.set_no_choice(True)
+ try:
+ table.set_preserve_order(True)
+ except Exception:
+ pass
+
+ for row in match_rows:
+ table.add_result(row)
+
+ try:
+ pipeline_context.set_last_result_table_overlay(table, match_rows)
+ except Exception:
+ pass
+
+ suspend = getattr(pipeline_context, "suspend_live_progress", None)
+ cm: AbstractContextManager[Any] = nullcontext()
+ if callable(suspend):
+ try:
+ maybe_cm = suspend()
+ if maybe_cm is not None:
+ cm = maybe_cm # type: ignore[assignment]
+ except Exception:
+ cm = nullcontext()
+
+ with cm:
+ get_stderr_console().print(table)
+ setattr(table, "_rendered_by_cmdlet", True)
+ answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()))
+
+ if in_pipeline:
+ try:
+ existing = pipeline_context.load_value("preflight", default=None)
+ except Exception:
+ existing = None
+ preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {}
+ url_dup_cache = preflight_cache.get("url_duplicates")
+ if not isinstance(url_dup_cache, dict):
+ url_dup_cache = {}
+ url_dup_cache["command"] = str(current_cmd_text or "")
+ url_dup_cache["continue"] = bool(answered_yes)
+ preflight_cache["url_duplicates"] = url_dup_cache
+ try:
+ pipeline_context.store_value("preflight", preflight_cache)
+ except Exception:
+ pass
+
+ if not answered_yes:
+ if in_pipeline:
+ try:
+ pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
+ except Exception:
+ pass
+ return False
+ return True
+
diff --git a/cmdlet/add_note.py b/cmdlet/add_note.py
index 7c61bbb..fced5e1 100644
--- a/cmdlet/add_note.py
+++ b/cmdlet/add_note.py
@@ -209,11 +209,8 @@ class Add_Note(Cmdlet):
note_name = str(note_name or "").strip()
note_text = str(note_text or "").strip()
if not note_name or not note_text:
- log(
- "[add_note] Error: -query must include title:
and text:",
- file=sys.stderr,
- )
- return 1
+ pass # We now support implicit pipeline notes if -query is missing
+ # But if explicit targeting (store+hash) is used, we still demand args below.
if hash_override and not store_override:
log(
@@ -224,6 +221,14 @@ class Add_Note(Cmdlet):
explicit_target = bool(hash_override and store_override)
results = normalize_result_input(result)
+
+ if explicit_target and (not note_name or not note_text):
+ log(
+ "[add_note] Error: Explicit target (store+hash) requires -query with title/text",
+ file=sys.stderr,
+ )
+ return 1
+
if results and explicit_target:
# Direct targeting mode: apply note once to the explicit target and
# pass through any piped items unchanged.
@@ -287,7 +292,36 @@ class Add_Note(Cmdlet):
ctx.emit(res)
continue
- item_note_text = note_text
+ # Determine notes to write for this item
+ notes_to_write: List[Tuple[str, str]] = []
+
+ # 1. Explicit arguments always take precedence
+ if note_name and note_text:
+ notes_to_write.append((note_name, note_text))
+
+ # 2. Pipeline notes auto-ingestion
+ # Look for 'notes' dictionary in the item (propagated by pipeline/download-file)
+ # Structure: {'notes': {'lyric': '...', 'sub': '...'}}
+ # Check both root and nested 'extra'
+
+ # Check root 'notes' (dict or extra.notes)
+ pipeline_notes = res.get("notes")
+ if not isinstance(pipeline_notes, dict):
+ extra = res.get("extra")
+ if isinstance(extra, dict):
+ pipeline_notes = extra.get("notes")
+
+ if isinstance(pipeline_notes, dict):
+ for k, v in pipeline_notes.items():
+ # If arg-provided note conflicts effectively with pipeline note?
+ # We just append both.
+ if v and str(v).strip():
+ notes_to_write.append((str(k), str(v)))
+
+ if not notes_to_write:
+ # Pass through items that have nothing to add
+ ctx.emit(res)
+ continue
store_name = str(store_override or res.get("store") or "").strip()
raw_hash = res.get("hash")
@@ -298,7 +332,7 @@ class Add_Note(Cmdlet):
"[add_note] Error: Missing -store and item has no store field",
file=sys.stderr
)
- return 1
+ continue
resolved_hash = self._resolve_hash(
raw_hash=str(raw_hash) if raw_hash else None,
@@ -312,80 +346,43 @@ class Add_Note(Cmdlet):
)
ctx.emit(res)
continue
-
- try:
- backend = store_registry[store_name]
- except Exception as exc:
- log(
- f"[add_note] Error: Unknown store '{store_name}': {exc}",
- file=sys.stderr
- )
- return 1
-
- # Queue for bulk write per store. We still emit items immediately;
- # the pipeline only advances after this cmdlet returns.
- note_ops.setdefault(store_name,
- []).append((resolved_hash,
- note_name,
- item_note_text))
- planned_ops += 1
-
+
+ # Queue operations
+ if store_name not in note_ops:
+ note_ops[store_name] = []
+
+ for (n_name, n_text) in notes_to_write:
+ note_ops[store_name].append((resolved_hash, n_name, n_text))
+ planned_ops += 1
+
ctx.emit(res)
- # Execute bulk writes per store.
- successful_writes = 0
+
+ # Execute batch operations
+ success_count = 0
for store_name, ops in note_ops.items():
- if not ops:
- continue
try:
backend = store_registry[store_name]
- except Exception:
- continue
+ if not hasattr(backend, "set_note"):
+ log(f"[add_note] Store '{store_name}' does not support notes", file=sys.stderr)
+ continue
+
+ for (h, name, text) in ops:
+ try:
+ if backend.set_note(h, name, text, config=config):
+ success_count += 1
+ except Exception as e:
+ log(f"[add_note] Write failed {store_name}:{h} ({name}): {e}", file=sys.stderr)
+
+ except Exception as e:
+ log(f"[add_note] Store access failed '{store_name}': {e}", file=sys.stderr)
- store_success = 0
- bulk_fn = getattr(backend, "set_note_bulk", None)
- if callable(bulk_fn):
- try:
- ok = bool(bulk_fn(list(ops), config=config))
- if ok:
- store_success += len(ops)
- ctx.print_if_visible(
- f"✓ add-note: {len(ops)} item(s) in '{store_name}'",
- file=sys.stderr
- )
- successful_writes += store_success
- continue
- log(
- f"[add_note] Warning: bulk set_note returned False for '{store_name}'",
- file=sys.stderr,
- )
- except Exception as exc:
- log(
- f"[add_note] Warning: bulk set_note failed for '{store_name}': {exc}; falling back",
- file=sys.stderr,
- )
-
- # Fallback: per-item writes
- for file_hash, name, text in ops:
- try:
- ok = bool(backend.set_note(file_hash, name, text, config=config))
- if ok:
- store_success += 1
- except Exception:
- continue
-
- if store_success:
- successful_writes += store_success
- ctx.print_if_visible(
- f"✓ add-note: {store_success} item(s) in '{store_name}'",
- file=sys.stderr
- )
-
- log(
- f"[add_note] Updated {successful_writes}/{planned_ops} item(s)",
- file=sys.stderr
- )
- return 0 if successful_writes > 0 else 1
+ if planned_ops > 0:
+ msg = f"✓ add-note: Updated {success_count}/{planned_ops} notes across {len(note_ops)} stores"
+ ctx.print_if_visible(msg, file=sys.stderr)
+
+ return 0
CMDLET = Add_Note()
+
diff --git a/cmdlet/download_file.py b/cmdlet/download_file.py
index 05eedfc..4e8a82e 100644
--- a/cmdlet/download_file.py
+++ b/cmdlet/download_file.py
@@ -17,10 +17,6 @@ from contextlib import AbstractContextManager, nullcontext
import requests
-from Provider import internetarchive as ia_provider
-from Provider import alldebrid as ad_provider
-from Provider import openlibrary as ol_provider
-
from API.HTTP import _download_direct_file
from SYS.models import DownloadError, DownloadOptions, DownloadMediaResult
from SYS.logger import log, debug
@@ -152,639 +148,71 @@ class Download_File(Cmdlet):
get_provider = registry.get("get_provider")
match_provider_name_for_url = registry.get("match_provider_name_for_url")
- context_items_list: List[Any]
- try:
- context_items_list = list(context_items) if context_items else []
- except Exception:
- context_items_list = []
-
for url in raw_urls:
try:
debug(f"Processing URL: {url}")
-
- # Telegram message URLs are not direct files; route through the provider.
- try:
- parsed_url = urlparse(str(url))
- host = (parsed_url.hostname or "").lower().strip()
- except Exception:
- host = ""
-
- is_telegram = host in {"t.me",
- "telegram.me"} or host.endswith(".t.me")
- if is_telegram and SearchResult:
- try:
- from ProviderCore.registry import get_provider as _get_provider
- except Exception:
- _get_provider = None
-
- if _get_provider is None:
- raise DownloadError("Telegram provider registry not available")
-
- provider = _get_provider("telegram", config)
- if provider is None:
- raise DownloadError(
- "Telegram provider not configured or not available (check telethon/app_id/api_hash)"
- )
-
- sr = SearchResult(
- table="telegram",
- title=str(url),
- path=str(url),
- full_metadata={}
- )
- downloaded_path = None
- telegram_info: Optional[Dict[str, Any]] = None
- if hasattr(provider, "download_url"):
- try:
- downloaded_path, telegram_info = provider.download_url(str(url), final_output_dir) # type: ignore[attr-defined]
- except Exception as exc:
- raise DownloadError(str(exc))
- else:
- downloaded_path = provider.download(sr, final_output_dir)
-
- if not downloaded_path:
- raise DownloadError("Telegram download returned no file")
-
- channel = ""
- post = None
- if isinstance(telegram_info, dict):
- try:
- chat_info_raw = telegram_info.get("chat")
- msg_info_raw = telegram_info.get("message")
- chat_info: Dict[str,
- Any] = (
- chat_info_raw
- if isinstance(chat_info_raw,
- dict) else {}
- )
- msg_info: Dict[str,
- Any] = (
- msg_info_raw
- if isinstance(msg_info_raw,
- dict) else {}
- )
- channel = str(
- chat_info.get("title") or chat_info.get("username")
- or ""
- ).strip()
- post = msg_info.get("id")
- except Exception:
- channel = ""
- post = None
-
- title_hint = None
- tg_tags: List[str] = []
- if channel:
- tg_tags.append(f"channel:{channel}")
- if post is not None:
- tg_tags.append(f"post:{post}")
- if channel and post is not None:
- title_hint = f"{channel} {post}"
- elif post is not None:
- title_hint = f"post:{post}"
- else:
- title_hint = downloaded_path.stem
-
- self._emit_local_file(
- downloaded_path=downloaded_path,
- source=str(url),
- title_hint=title_hint,
- tags_hint=tg_tags,
- media_kind_hint="file",
- full_metadata=telegram_info,
- provider_hint="telegram",
- progress=progress,
- config=config,
- )
- downloaded_count += 1
- debug("✓ Downloaded via Telegram provider and emitted")
- continue
-
- # Provider URL routing (e.g. OpenLibrary book pages).
+
+ # Check providers first
provider_name = None
- if match_provider_name_for_url is not None:
+ if match_provider_name_for_url:
try:
provider_name = match_provider_name_for_url(str(url))
- except Exception:
- provider_name = None
-
- # Heuristic: LibGen often uses landing pages like edition.php/file.php.
- # These should never be treated as direct file URLs.
- if not provider_name:
- try:
- p = urlparse(str(url))
- h = (p.hostname or "").strip().lower()
- path = (p.path or "").strip().lower()
- if "libgen" in h and any(x in path for x in (
- "/edition.php",
- "/file.php",
- "/ads.php",
- "/get.php",
- "/series.php", )):
- provider_name = "libgen"
except Exception:
pass
-
- provider_for_url = None
- if provider_name and get_provider is not None:
- provider_for_url = get_provider(provider_name, config)
-
- if provider_for_url is not None:
+
+ provider = None
+ if provider_name and get_provider:
+ provider = get_provider(provider_name, config)
+
+ if provider:
+ debug(f"Provider {provider_name} claimed {url}")
try:
- handled, handled_path = provider_for_url.handle_url(
- str(url),
- output_dir=final_output_dir,
- )
- except Exception as exc:
- raise DownloadError(str(exc))
- if handled:
- if handled_path:
- downloaded_path = Path(handled_path)
- self._emit_local_file(
- downloaded_path=downloaded_path,
- source=str(url),
- title_hint=downloaded_path.stem,
- tags_hint=None,
- media_kind_hint="file",
- full_metadata=None,
- provider_hint=str(provider_name),
- progress=progress,
- config=config,
- )
- downloaded_count += 1
+ # Try generic handle_url
+ if hasattr(provider, "handle_url"):
+ handled, path = provider.handle_url(str(url), output_dir=final_output_dir)
+ if handled:
+ if path:
+ self._emit_local_file(
+ downloaded_path=Path(str(path)),
+ source=str(url),
+ title_hint=Path(str(path)).stem,
+ tags_hint=None,
+ media_kind_hint="file",
+ full_metadata=None,
+ progress=progress,
+ config=config,
+ provider_hint=provider_name
+ )
+ downloaded_count += 1
+ continue
+
+ # Try generic download_url
+ elif hasattr(provider, "download_url"):
+ downloaded_path = provider.download_url(str(url), final_output_dir)
+ if downloaded_path:
+ self._emit_local_file(
+ downloaded_path=Path(downloaded_path),
+ source=str(url),
+ title_hint=Path(str(downloaded_path)).stem,
+ tags_hint=None,
+ media_kind_hint="file",
+ full_metadata=None,
+ provider_hint=provider_name,
+ progress=progress,
+ config=config,
+ )
+ downloaded_count += 1
+ continue
+
+ except Exception as e:
+ log(f"Provider {provider_name} error handling {url}: {e}", file=sys.stderr)
+ # Fallthrough to direct download?
+ # If a provider explicitly claimed it but failed, maybe we shouldn't fallback?
+ # But "barebones" implies robustness might be up to user.
+ # We'll continue to next URL.
continue
- if provider_name and get_provider is not None and SearchResult is not None:
- # OpenLibrary URLs should be handled by the OpenLibrary provider.
- if provider_name == "openlibrary":
- url_str = str(url).strip()
- provider = get_provider("openlibrary", config)
- if provider is None:
- raise DownloadError(
- "OpenLibrary provider not configured or not available"
- )
-
- edition_id = ol_provider.edition_id_from_url(url_str)
- title_hint = ol_provider.title_hint_from_url_slug(url_str)
-
- download_payload: Optional[Dict[str, Any]] = None
- try:
- ui, _pipe_idx = progress.ui_and_pipe_index()
- progress_cb = None
- if ui is not None:
- # High-level steps for OpenLibrary borrow/download flow.
- progress.begin_steps(5)
-
- def _progress(
- kind: str,
- done: int,
- total: Optional[int],
- label: str
- ) -> None:
- # kind:
- # - "step": advance step text
- # - "pages": update pipe percent/status
- # - "bytes": update transfer bar
- if kind == "step":
- progress.step(label)
- return
-
- if kind == "pages":
- t = int(total) if isinstance(total, int) else 0
- d = int(done) if isinstance(done, int) else 0
- if t > 0:
- pct = int(
- round(
- (max(0,
- min(d,
- t)) / max(1,
- t)) * 100.0
- )
- )
- progress.set_percent(pct)
- progress.set_status(
- f"downloading pages {d}/{t}"
- )
- else:
- progress.set_status(
- f"downloading pages {d}"
- )
- return
-
- if kind == "bytes":
- try:
- lbl = str(label or "download")
- except Exception:
- lbl = "download"
- progress.begin_transfer(label=lbl, total=total)
- progress.update_transfer(
- label=lbl,
- completed=done,
- total=total
- )
- try:
- if (isinstance(total,
- int) and total > 0
- and int(done) >= int(total)):
- progress.finish_transfer(label=lbl)
- except Exception:
- pass
- return
-
- progress_cb = _progress
-
- # Prefer piped OpenLibrary context (selection row) when present so we keep
- # resolved metadata like archive_id and availability.
- ctx_item = None
- ctx_md: Dict[str, Any] = {}
- ctx_title: Optional[str] = None
- ctx_tags: Optional[List[str]] = None
- ctx_media_kind: Optional[str] = None
- for candidate in context_items_list:
- try:
- table_val = get_field(candidate, "table")
- except Exception:
- table_val = None
- if str(table_val or "").lower() != "openlibrary":
- continue
-
- md_val = get_field(candidate, "full_metadata")
- md_dict = md_val if isinstance(md_val, dict) else {}
- cand_olid = str(md_dict.get("openlibrary_id") or md_dict.get("olid") or "").strip()
- cand_archive = str(md_dict.get("archive_id") or "").strip()
- cand_url = str(
- get_field(candidate, "path")
- or get_field(candidate, "url")
- or md_dict.get("selection_url")
- or ""
- ).strip()
-
- matched = False
- if edition_id and cand_olid and cand_olid == edition_id:
- matched = True
- elif cand_url and url_str and cand_url == url_str:
- matched = True
- elif (not edition_id) and cand_archive and cand_archive in url_str:
- matched = True
-
- if matched:
- ctx_item = candidate
- ctx_md = md_dict
- ctx_title = get_field(candidate, "title")
- ctx_media_kind = get_field(candidate, "media_kind")
- tags_val = get_field(candidate, "tag")
- if isinstance(tags_val, list):
- ctx_tags = [str(t) for t in tags_val if t]
- break
-
- if ctx_item is not None and SearchResult is not None:
- sr_meta = dict(ctx_md) if isinstance(ctx_md, dict) else {}
- if edition_id and not sr_meta.get("openlibrary_id"):
- sr_meta["openlibrary_id"] = edition_id
-
- sr_title = str(ctx_title or title_hint or "").strip() or title_hint
- sr_media_kind = str(ctx_media_kind or "book")
-
- sr_obj = (
- ctx_item
- if isinstance(ctx_item, SearchResult)
- else SearchResult(
- table="openlibrary",
- title=sr_title,
- path=url_str,
- media_kind=sr_media_kind,
- full_metadata=sr_meta,
- )
- )
-
- try:
- sr_obj.path = url_str # type: ignore[attr-defined]
- except Exception:
- pass
- try:
- if ctx_tags:
- sr_obj.tag = set(ctx_tags) # type: ignore[attr-defined]
- except Exception:
- pass
-
- downloaded_path = provider.download(
- sr_obj,
- final_output_dir,
- progress_callback=progress_cb
- ) # type: ignore[call-arg]
-
- if downloaded_path:
- download_payload = {
- "path": Path(downloaded_path),
- "search_result": sr_obj,
- }
-
- if download_payload is None and hasattr(provider, "download_url"):
- download_payload = provider.download_url( # type: ignore[attr-defined]
- url_str,
- final_output_dir,
- progress_cb,
- )
-
- if download_payload is None:
- sr = None
- if hasattr(provider, "search_result_from_url"):
- sr = provider.search_result_from_url(url_str) # type: ignore[attr-defined]
- if sr is None:
- sr = SearchResult(
- table="openlibrary",
- title=title_hint,
- path=url_str,
- media_kind="book",
- full_metadata={
- "openlibrary_id": edition_id,
- },
- )
-
- downloaded_path = provider.download(
- sr,
- final_output_dir,
- progress_callback=progress_cb
- ) # type: ignore[call-arg]
-
- if downloaded_path:
- download_payload = {
- "path": Path(downloaded_path),
- "search_result": sr,
- }
- except Exception as exc:
- raise DownloadError(str(exc))
-
- # Clear long-running status line after the download attempt.
- progress.clear_status()
-
- if download_payload and download_payload.get("path"):
- downloaded_path = Path(download_payload["path"])
- sr_obj = download_payload.get("search_result")
-
- tags_hint: Optional[List[str]] = None
- full_md: Optional[Dict[str, Any]] = None
- resolved_title = title_hint
- if sr_obj is not None:
- try:
- resolved_title = getattr(sr_obj, "title", None) or resolved_title
- except Exception:
- pass
- try:
- sr_tags = getattr(sr_obj, "tag", None)
- if isinstance(sr_tags, set) and sr_tags:
- tags_hint = sorted([str(t) for t in sr_tags if t])
- except Exception:
- tags_hint = None
- try:
- full_md = getattr(sr_obj, "full_metadata", None)
- except Exception:
- full_md = None
-
- self._emit_local_file(
- downloaded_path=downloaded_path,
- source=str(url),
- title_hint=resolved_title,
- tags_hint=tags_hint,
- media_kind_hint="book",
- full_metadata=full_md,
- provider_hint="openlibrary",
- progress=progress,
- config=config,
- )
- downloaded_count += 1
- continue
-
- # If OpenLibrary can't provide it (not lendable, no creds, etc), auto-search LibGen.
- try:
- fallback_query = str(title_hint or "").strip()
- if fallback_query:
- log(
- f"[download-file] Not available on OpenLibrary; searching LibGen for: {fallback_query}",
- file=sys.stderr,
- )
- from cmdlet.search_provider import CMDLET as _SEARCH_PROVIDER_CMDLET
-
- exec_fn = getattr(_SEARCH_PROVIDER_CMDLET, "exec", None)
- if callable(exec_fn):
- ret = exec_fn(
- None,
- [
- "-provider",
- "libgen",
- "-query",
- fallback_query
- ],
- config,
- )
- try:
- table = pipeline_context.get_last_result_table()
- items = pipeline_context.get_last_result_items()
- if table is not None:
- pipeline_context.set_last_result_table_overlay(
- table,
- items
- )
- except Exception:
- pass
-
- try:
- return downloaded_count, int(ret) # type: ignore[arg-type]
- except Exception:
- return downloaded_count, 1
- except Exception:
- pass
-
- log(
- "[download-file] OpenLibrary URL could not be downloaded",
- file=sys.stderr,
- )
- continue
-
- # Generic provider URL handler (if a provider implements `download_url`).
- provider = get_provider(provider_name, config)
- if provider is not None and hasattr(provider, "download_url"):
- try:
- downloaded_path = provider.download_url(
- str(url),
- final_output_dir
- ) # type: ignore[attr-defined]
- except Exception as exc:
- raise DownloadError(str(exc))
-
- if downloaded_path:
- self._emit_local_file(
- downloaded_path=Path(downloaded_path),
- source=str(url),
- title_hint=Path(str(downloaded_path)).stem,
- tags_hint=None,
- media_kind_hint="file",
- full_metadata=None,
- provider_hint=str(provider_name),
- progress=progress,
- config=config,
- )
- downloaded_count += 1
- continue
-
- # Otherwise, try provider.download(SearchResult) with the URL as the target.
- if provider is not None:
- sr_obj = None
- try:
- sr_obj = SearchResult(
- table=str(provider_name),
- title=str(url),
- path=str(url),
- full_metadata={},
- )
- downloaded_path = provider.download(
- sr_obj,
- final_output_dir
- ) # type: ignore[call-arg]
- except Exception:
- downloaded_path = None
-
- # Refuse to fall back to direct-download for LibGen landing pages.
- # This prevents saving HTML (e.g. edition.php) as a bogus file.
- if (not downloaded_path
- ) and str(provider_name).lower() == "libgen":
- raise DownloadError(
- "LibGen URL did not resolve to a downloadable file"
- )
-
- if downloaded_path:
- emit_tags: Optional[List[str]] = None
- full_md: Optional[Dict[str, Any]] = None
- title_hint = Path(str(downloaded_path)).stem
- media_kind_hint = "file"
-
- if str(provider_name
- ).lower() == "libgen" and sr_obj is not None:
- media_kind_hint = "book"
- try:
- sr_tags = getattr(sr_obj, "tag", None)
- if isinstance(sr_tags, set) and sr_tags:
- emit_tags = sorted(
- [str(t) for t in sr_tags if t]
- )
- except Exception:
- emit_tags = None
-
- try:
- sr_full_md = getattr(sr_obj, "full_metadata", None)
- if isinstance(sr_full_md, dict):
- full_md = sr_full_md
- t = str(sr_full_md.get("title") or "").strip()
- if t:
- title_hint = t
- except Exception:
- full_md = None
-
- self._emit_local_file(
- downloaded_path=Path(downloaded_path),
- source=str(url),
- title_hint=title_hint,
- tags_hint=emit_tags,
- media_kind_hint=media_kind_hint,
- full_metadata=full_md,
- provider_hint=str(provider_name),
- progress=progress,
- config=config,
- )
- downloaded_count += 1
- continue
-
- if provider_name and get_provider is not None and SearchResult is not None:
- provider = get_provider(provider_name, config)
-
- if provider is not None and hasattr(provider, "download_url"):
- try:
- downloaded_path = provider.download_url(
- str(url),
- final_output_dir
- ) # type: ignore[attr-defined]
- except Exception as exc:
- raise DownloadError(str(exc))
-
- if downloaded_path:
- self._emit_local_file(
- downloaded_path=Path(downloaded_path),
- source=str(url),
- title_hint=Path(str(downloaded_path)).stem,
- tags_hint=None,
- media_kind_hint="file",
- full_metadata=None,
- provider_hint=str(provider_name),
- progress=progress,
- config=config,
- )
- downloaded_count += 1
- continue
-
- if provider is not None:
- sr_obj = None
- try:
- sr_obj = SearchResult(
- table=str(provider_name),
- title=str(url),
- path=str(url),
- full_metadata={},
- )
- downloaded_path = provider.download(
- sr_obj,
- final_output_dir
- ) # type: ignore[call-arg]
- except Exception:
- downloaded_path = None
-
- if (not downloaded_path
- ) and str(provider_name).lower() == "libgen":
- raise DownloadError(
- "LibGen URL did not resolve to a downloadable file"
- )
-
- if downloaded_path:
- emit_tags: Optional[List[str]] = None
- full_md: Optional[Dict[str, Any]] = None
- title_hint = Path(str(downloaded_path)).stem
- media_kind_hint = "file"
-
- if str(provider_name
- ).lower() == "libgen" and sr_obj is not None:
- media_kind_hint = "book"
- try:
- sr_tags = getattr(sr_obj, "tag", None)
- if isinstance(sr_tags, set) and sr_tags:
- emit_tags = sorted(
- [str(t) for t in sr_tags if t]
- )
- except Exception:
- emit_tags = None
-
- try:
- sr_full_md = getattr(sr_obj, "full_metadata", None)
- if isinstance(sr_full_md, dict):
- full_md = sr_full_md
- t = str(sr_full_md.get("title") or "").strip()
- if t:
- title_hint = t
- except Exception:
- full_md = None
-
- self._emit_local_file(
- downloaded_path=Path(downloaded_path),
- source=str(url),
- title_hint=title_hint,
- tags_hint=emit_tags,
- media_kind_hint=media_kind_hint,
- full_metadata=full_md,
- provider_hint=str(provider_name),
- progress=progress,
- config=config,
- )
- downloaded_count += 1
- continue
-
+ # Direct Download Fallback
result_obj = _download_direct_file(
str(url),
final_output_dir,
@@ -824,40 +252,22 @@ class Download_File(Cmdlet):
) -> List[Any]:
get_search_provider = registry.get("get_search_provider")
expanded_items: List[Any] = []
+
for item in piped_items:
try:
table = get_field(item, "table")
- media_kind = get_field(item, "media_kind")
- full_metadata = get_field(item, "full_metadata")
- target = get_field(item, "path") or get_field(item, "url")
+ provider_key = str(table).split(".")[0] if table else None
+ provider = get_search_provider(provider_key, config) if provider_key and get_search_provider else None
- if (str(table or "").lower() == "alldebrid"
- and str(media_kind or "").lower() == "folder"):
- magnet_id = None
- if isinstance(full_metadata, dict):
- magnet_id = full_metadata.get("magnet_id")
- if (magnet_id is None and isinstance(target,
- str)
- and target.lower().startswith("alldebrid:magnet:")):
- try:
- magnet_id = int(target.split(":")[-1])
- except Exception:
- magnet_id = None
-
- expanded, detail = ad_provider.expand_folder_item(
- item,
- get_search_provider,
- config,
- )
- if detail:
- log(
- f"[download-file] AllDebrid magnet {magnet_id or 'unknown'} not ready ({detail or 'unknown'})",
- file=sys.stderr,
- )
- continue
- if expanded:
- expanded_items.extend(expanded)
- continue
+ # Generic hook: If provider has expand_item(item), use it.
+ if provider and hasattr(provider, "expand_item") and callable(provider.expand_item):
+ try:
+ sub_items = provider.expand_item(item)
+ if sub_items:
+ expanded_items.extend(sub_items)
+ continue
+ except Exception as e:
+ debug(f"Provider {provider_key} expand_item failed: {e}")
expanded_items.append(item)
except Exception:
@@ -904,8 +314,8 @@ class Download_File(Cmdlet):
media_kind = get_field(item, "media_kind")
tags_val = get_field(item, "tag")
tags_list: Optional[List[str]]
- if isinstance(tags_val, list):
- tags_list = [str(t) for t in tags_val if t]
+ if isinstance(tags_val, (list, set)):
+ tags_list = sorted([str(t) for t in tags_val if t])
else:
tags_list = None
@@ -953,6 +363,8 @@ class Download_File(Cmdlet):
table=str(table),
title=str(title or "Unknown"),
path=str(target or ""),
+ tag=set(tags_list) if tags_list else set(),
+ media_kind=str(media_kind or "file"),
full_metadata=full_metadata
if isinstance(full_metadata,
dict) else {},
@@ -963,179 +375,32 @@ class Download_File(Cmdlet):
# Preserve provider structure when possible (AllDebrid folders -> subfolders).
output_dir = final_output_dir
- try:
- if str(table).strip().lower() == "alldebrid":
- output_dir = ad_provider.adjust_output_dir_for_alldebrid(
- final_output_dir,
- full_metadata if isinstance(full_metadata, dict) else None,
- item,
- )
- except Exception:
- output_dir = final_output_dir
-
+ # Generic: allow provider to strict output_dir?
+ # Using default output_dir for now.
+
downloaded_path = provider_obj.download(sr, output_dir)
provider_sr = sr
if downloaded_path is None:
- download_items = getattr(provider_obj, "download_items", None)
- if callable(download_items):
-
- def _on_emit(path: Path, file_url: str, relpath: str, metadata: Dict[str, Any]) -> None:
- title_hint = metadata.get("name") or relpath or title
- self._emit_local_file(
- downloaded_path=path,
- source=file_url or target,
- title_hint=title_hint,
- tags_hint=tags_list,
- media_kind_hint="file",
- full_metadata=metadata,
- progress=progress,
- config=config,
- provider_hint=str(table) if table else None,
- )
-
- try:
- downloaded_extra = download_items(
- sr,
- output_dir,
- emit=_on_emit,
- progress=progress,
- quiet_mode=quiet_mode,
- path_from_result=self._path_from_download_result,
- config=config,
- )
- except TypeError:
- downloaded_extra = download_items(
- sr,
- output_dir,
- emit=_on_emit,
- progress=progress,
- quiet_mode=quiet_mode,
- path_from_result=self._path_from_download_result,
- )
- except Exception:
- downloaded_extra = 0
-
- if downloaded_extra:
- downloaded_count += int(downloaded_extra)
- continue
-
- # OpenLibrary: if provider download failed, do NOT try to download the OpenLibrary page HTML.
- if (downloaded_path is None and attempted_provider_download
- and str(table or "").lower() == "openlibrary"):
- availability = None
- reason = None
- if isinstance(full_metadata, dict):
- availability = full_metadata.get("availability")
- reason = full_metadata.get("availability_reason")
- msg = "[download-file] OpenLibrary item not downloadable"
- if availability or reason:
- msg += f" (availability={availability or ''} reason={reason or ''})"
- log(msg, file=sys.stderr)
-
- # Fallback: show a LibGen selectable ResultTable (no emits) so the user can pick @N.
- # This intentionally mirrors `search-file -provider libgen` UX: results table + selection.
- try:
- title_text = str(title or "").strip()
- if not title_text and isinstance(full_metadata, dict):
- title_text = str(full_metadata.get("title") or "").strip()
- if title_text and get_search_provider and SearchResult:
- log(
- f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}",
- file=sys.stderr,
- )
- libgen_provider = get_search_provider("libgen", config)
- if libgen_provider is None:
- log(
- "[download-file] LibGen provider unavailable; cannot run fallback search",
- file=sys.stderr,
- )
- continue
-
- try:
- from SYS.result_table import ResultTable
- except Exception:
- ResultTable = None # type: ignore[assignment]
-
- if ResultTable is None:
- log(
- "[download-file] ResultTable unavailable; cannot render LibGen fallback search",
- file=sys.stderr,
- )
- continue
-
- fallback_query = title_text
- # Keep parity with search-file provider default when user didn't specify a limit.
- results = libgen_provider.search(fallback_query, limit=50)
- if not results:
- log(
- f"[download-file] LibGen: no results found for: {fallback_query}",
- file=sys.stderr,
- )
- continue
-
- table_title = f"Libgen: {fallback_query}".strip().rstrip(":")
- table_obj = ResultTable(table_title).set_preserve_order(False)
- table_obj.set_table("libgen")
- try:
- table_obj.set_table_metadata({"provider": "libgen"})
- except Exception:
- pass
-
- # Mark as produced by download-file so the pipeline runner pauses and stores tail stages.
- table_obj.set_source_command("download-file", [])
-
- results_list: List[Dict[str, Any]] = []
- for search_result in results:
- item_dict = (
- search_result.to_dict()
- if hasattr(search_result, "to_dict")
- else dict(search_result)
- if isinstance(search_result, dict)
- else {"title": str(search_result)}
- )
- if "table" not in item_dict:
- item_dict["table"] = "libgen"
- table_obj.add_result(search_result)
- results_list.append(item_dict)
-
- # Seed selection state for @N and pause the pipeline.
- try:
- pipeline_context.set_last_result_table(table_obj, results_list)
- except Exception:
- pass
- try:
- pipeline_context.set_current_stage_table(table_obj)
- except Exception:
- pass
-
- # Returning 0 with a selectable stage table and no emits causes the CLI to render
- # the table and pause, preserving the downstream pipeline tail.
- return 0
- except Exception:
- pass
-
- continue
+ # Some providers might work via callback 'download_items', mostly legacy.
+ # If relevant, check for it.
+ download_items = getattr(provider_obj, "download_items", None)
+ if callable(download_items):
+ pass # We can implement generic callback support if needed,
+ # but pure download() is preferred.
# Fallback: if we have a direct HTTP URL, download it directly
if (downloaded_path is None and isinstance(target,
str)
and target.startswith("http")):
- # Guard: provider landing pages (e.g. LibGen ads.php) are HTML, not files.
- # Never download these as "files".
- if str(table or "").lower() == "libgen":
- low = target.lower()
- if ("/ads.php" in low) or ("/file.php" in low) or ("/index.php"
- in low):
- log(
- "[download-file] Refusing to download LibGen landing page (expected provider to resolve file link)",
- file=sys.stderr,
- )
- continue
-
+
+ # Generic guard for known "not-a-file" URLs could go here or in a helper,
+ # but for now we rely on user or provider.
+
debug(
f"[download-file] Provider item looks like direct URL, downloading: {target}"
)
+
suggested_name = str(title).strip() if title is not None else None
result_obj = _download_direct_file(
target,
@@ -1153,20 +418,12 @@ class Download_File(Cmdlet):
)
continue
- # Prefer provider-enriched metadata (providers may mutate sr.full_metadata).
- if provider_sr is not None:
- try:
- sr_md = getattr(provider_sr, "full_metadata", None)
- if isinstance(sr_md, dict) and sr_md:
- full_metadata = sr_md
- except Exception:
- pass
-
# Allow providers to add/enrich tags and metadata during download.
if provider_sr is not None:
try:
sr_md = getattr(provider_sr, "full_metadata", None)
if isinstance(sr_md, dict) and sr_md:
+ debug(f"[download-file] Syncing full_metadata from provider_sr (keys={list(sr_md.keys())})")
full_metadata = sr_md
except Exception:
pass
@@ -1183,6 +440,7 @@ class Download_File(Cmdlet):
try:
sr_tags = getattr(provider_sr, "tag", None)
if isinstance(sr_tags, (set, list)) and sr_tags:
+ debug(f"[download-file] Syncing tags_list from provider_sr (count={len(sr_tags)})")
# Re-sync tags_list with the potentially enriched provider_sr.tag
tags_list = sorted([str(t) for t in sr_tags if t])
except Exception:
@@ -1276,7 +534,7 @@ class Download_File(Cmdlet):
if provider_hint:
payload["provider"] = str(provider_hint)
if full_metadata:
- payload["full_metadata"] = full_metadata
+ payload["metadata"] = full_metadata
if notes:
payload["notes"] = notes
if source and str(source).startswith("http"):
@@ -1658,541 +916,46 @@ class Download_File(Cmdlet):
pass
return str(requested_url)
+
def _preflight_url_duplicate(
self,
*,
storage: Any,
hydrus_available: bool,
final_output_dir: Path,
- candidate_url: str,
- extra_urls: Optional[Sequence[str]] = None,
+ candidate_url: Optional[str] = None,
+ extra_urls: Optional[List[str]] = None,
+ **kwargs: Any,
) -> bool:
- if storage is None:
- debug("Preflight URL check skipped: storage unavailable")
- return True
+ to_check = []
+ if candidate_url:
+ to_check.append(candidate_url)
+ if extra_urls:
+ to_check.extend(extra_urls)
- debug(f"Preflight URL check: candidate={candidate_url}")
-
- try:
- from SYS.metadata import normalize_urls
- except Exception:
- normalize_urls = None # type: ignore[assignment]
-
- needles: List[str] = []
- if normalize_urls is not None:
- for raw in [candidate_url, *(list(extra_urls) if extra_urls else [])]:
- try:
- needles.extend(normalize_urls(raw))
- except Exception:
- continue
- if not needles:
- needles = [str(candidate_url)]
-
- seen_needles: List[str] = []
- for needle in needles:
- if needle and needle not in seen_needles:
- seen_needles.append(needle)
- needles = seen_needles
-
- try:
- debug(f"Preflight URL needles: {needles}")
- except Exception:
- pass
-
- url_matches: List[Dict[str, Any]] = []
- try:
- from Store.HydrusNetwork import HydrusNetwork
-
- backend_names_all = storage.list_searchable_backends()
- backend_names: List[str] = []
- skipped: List[str] = []
- for backend_name in backend_names_all:
- try:
- backend = storage[backend_name]
- except Exception:
- continue
-
- try:
- if str(backend_name).strip().lower() == "temp":
- skipped.append(backend_name)
- continue
- except Exception:
- pass
-
- try:
- backend_location = getattr(backend, "_location", None)
- if backend_location and final_output_dir:
- backend_path = Path(str(backend_location)).expanduser().resolve()
- temp_path = Path(str(final_output_dir)).expanduser().resolve()
- if backend_path == temp_path:
- skipped.append(backend_name)
- continue
- except Exception:
- pass
-
- backend_names.append(backend_name)
-
- try:
- if skipped:
- debug(f"Preflight backends: {backend_names} (skipped temp: {skipped})")
- else:
- debug(f"Preflight backends: {backend_names}")
- except Exception:
- pass
-
- for backend_name in backend_names:
- backend = storage[backend_name]
- if isinstance(backend, HydrusNetwork) and not hydrus_available:
- continue
-
- backend_hits: List[Dict[str, Any]] = []
- for needle in needles:
- try:
- backend_hits = backend.search(f"url:{needle}", limit=25) or []
- if backend_hits:
- break
- except Exception:
- continue
- if backend_hits:
- url_matches.extend(
- [
- dict(x) if isinstance(x, dict) else {"title": str(x)}
- for x in backend_hits
- ]
- )
-
- if len(url_matches) >= 25:
- url_matches = url_matches[:25]
- break
- except Exception:
- url_matches = []
-
- if not url_matches:
- debug("Preflight URL check: no matches")
- return True
-
- try:
- current_cmd_text = pipeline_context.get_current_command_text("")
- except Exception:
- current_cmd_text = ""
-
- try:
- stage_ctx = pipeline_context.get_stage_context()
- except Exception:
- stage_ctx = None
-
- in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or "")))
- if in_pipeline:
- try:
- cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="")
- cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None)
- except Exception:
- cached_cmd = ""
- cached_decision = None
-
- if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""):
- if bool(cached_decision):
- return True
- try:
- pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
- except Exception:
- pass
- return False
-
- table = ResultTable(f"URL already exists ({len(url_matches)} match(es))")
- results_list: List[Dict[str, Any]] = []
- for item in url_matches:
- if "title" not in item:
- item["title"] = item.get("name") or item.get("target") or item.get("path") or "Result"
-
- try:
- from SYS.result_table import build_display_row
- except Exception:
- build_display_row = None # type: ignore
-
- if callable(build_display_row):
- display_row = build_display_row(item, keys=["title", "store", "hash", "ext", "size"])
- else:
- display_row = {
- "title": item.get("title"),
- "store": item.get("store"),
- "hash": item.get("hash") or item.get("file_hash") or item.get("sha256"),
- "ext": str(item.get("ext") or ""),
- "size": item.get("size") or item.get("size_bytes"),
- }
- table.add_result(display_row)
- results_list.append(item)
-
- pipeline_context.set_current_stage_table(table)
- pipeline_context.set_last_result_table(table, results_list)
-
- suspend = getattr(pipeline_context, "suspend_live_progress", None)
- used_suspend = False
-
- cm: AbstractContextManager[Any] = nullcontext()
- if callable(suspend):
- try:
- maybe_cm = suspend()
- if maybe_cm is not None:
- cm = maybe_cm # type: ignore[assignment]
- used_suspend = True
- except Exception:
- cm = nullcontext()
- used_suspend = False
-
- with cm:
- get_stderr_console().print(table)
- setattr(table, "_rendered_by_cmdlet", True)
- answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()))
-
- if in_pipeline:
- try:
- existing = pipeline_context.load_value("preflight", default=None)
- except Exception:
- existing = None
- preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {}
- url_dup_cache = preflight_cache.get("url_duplicates")
- if not isinstance(url_dup_cache, dict):
- url_dup_cache = {}
- url_dup_cache["command"] = str(current_cmd_text or "")
- url_dup_cache["continue"] = bool(answered_yes)
- preflight_cache["url_duplicates"] = url_dup_cache
- try:
- pipeline_context.store_value("preflight", preflight_cache)
- except Exception:
- pass
-
- if not answered_yes:
- if in_pipeline and used_suspend:
- try:
- pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
- except Exception:
- pass
- return False
- return True
+ return sh.check_url_exists_in_storage(
+ urls=to_check,
+ storage=storage,
+ hydrus_available=hydrus_available,
+ final_output_dir=final_output_dir
+ )
def _preflight_url_duplicates_bulk(
self,
*,
+ urls: List[str],
storage: Any,
hydrus_available: bool,
final_output_dir: Path,
- urls: Sequence[str],
+ **kwargs: Any,
) -> bool:
- if storage is None:
- debug("Bulk URL preflight skipped: storage unavailable")
- return True
+ return sh.check_url_exists_in_storage(
+ urls=urls,
+ storage=storage,
+ hydrus_available=hydrus_available,
+ final_output_dir=final_output_dir
+ )
- try:
- current_cmd_text = pipeline_context.get_current_command_text("")
- except Exception:
- current_cmd_text = ""
-
- try:
- stage_ctx = pipeline_context.get_stage_context()
- except Exception:
- stage_ctx = None
-
- in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or "")))
- if in_pipeline:
- try:
- cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="")
- cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None)
- except Exception:
- cached_cmd = ""
- cached_decision = None
-
- if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""):
- if bool(cached_decision):
- return True
- try:
- pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
- except Exception:
- pass
- return False
-
- unique_urls: List[str] = []
- for u in urls or []:
- s = str(u or "").strip()
- if s and s not in unique_urls:
- unique_urls.append(s)
- if len(unique_urls) <= 1:
- return True
-
- try:
- from SYS.metadata import normalize_urls
- except Exception:
- normalize_urls = None # type: ignore[assignment]
-
- def _httpish(value: str) -> bool:
- try:
- return bool(value) and (value.startswith("http://") or value.startswith("https://"))
- except Exception:
- return False
-
- url_needles: Dict[str, List[str]] = {}
- for u in unique_urls:
- needles: List[str] = []
- if normalize_urls is not None:
- try:
- needles.extend([n for n in (normalize_urls(u) or []) if isinstance(n, str)])
- except Exception:
- needles = []
- if not needles:
- needles = [u]
- filtered: List[str] = []
- for n in needles:
- n2 = str(n or "").strip()
- if not n2:
- continue
- if not _httpish(n2):
- continue
- if n2 not in filtered:
- filtered.append(n2)
- url_needles[u] = filtered if filtered else [u]
-
- backend_names: List[str] = []
- try:
- backend_names_all = storage.list_searchable_backends()
- except Exception:
- backend_names_all = []
-
- for backend_name in backend_names_all:
- try:
- backend = storage[backend_name]
- except Exception:
- continue
-
- try:
- if str(backend_name).strip().lower() == "temp":
- continue
- except Exception:
- pass
-
- try:
- backend_location = getattr(backend, "_location", None)
- if backend_location and final_output_dir:
- backend_path = Path(str(backend_location)).expanduser().resolve()
- temp_path = Path(str(final_output_dir)).expanduser().resolve()
- if backend_path == temp_path:
- continue
- except Exception:
- pass
-
- backend_names.append(backend_name)
-
- if not backend_names:
- debug("Bulk URL preflight skipped: no searchable backends")
- return True
-
- seen_pairs: set[tuple[str, str]] = set()
- matched_urls: set[str] = set()
- match_rows: List[Dict[str, Any]] = []
- max_rows = 200
-
- try:
- from Store.HydrusNetwork import HydrusNetwork
- except Exception:
- HydrusNetwork = None # type: ignore
-
- for backend_name in backend_names:
- if len(match_rows) >= max_rows:
- break
- try:
- backend = storage[backend_name]
- except Exception:
- continue
-
- if HydrusNetwork is not None and isinstance(backend, HydrusNetwork):
- if not hydrus_available:
- continue
-
- client = getattr(backend, "_client", None)
- if client is None:
- continue
-
- for original_url, needles in url_needles.items():
- if len(match_rows) >= max_rows:
- break
- if (original_url, str(backend_name)) in seen_pairs:
- continue
-
- found_hash: Optional[str] = None
- found = False
- for needle in (needles or [])[:3]:
- if not _httpish(needle):
- continue
- try:
- from API.HydrusNetwork import HydrusRequestSpec
-
- spec = HydrusRequestSpec(
- method="GET",
- endpoint="/add_urls/get_url_files",
- query={"url": needle},
- )
- response = client._perform_request(spec) # type: ignore[attr-defined]
- raw_hashes = None
- if isinstance(response, dict):
- raw_hashes = response.get("hashes") or response.get("file_hashes")
- raw_ids = response.get("file_ids")
- has_ids = isinstance(raw_ids, list) and len(raw_ids) > 0
- has_hashes = isinstance(raw_hashes, list) and len(raw_hashes) > 0
- if has_hashes:
- try:
- found_hash = str(raw_hashes[0]).strip() # type: ignore[index]
- except Exception:
- found_hash = None
- if has_ids or has_hashes:
- found = True
- break
- except Exception:
- continue
-
- if not found:
- continue
-
- seen_pairs.add((original_url, str(backend_name)))
- matched_urls.add(original_url)
- display_row = {
- "title": "(exists)",
- "store": str(backend_name),
- "hash": found_hash or "",
- "url": original_url,
- "columns": [
- ("Title", "(exists)"),
- ("Store", str(backend_name)),
- ("Hash", found_hash or ""),
- ("URL", original_url),
- ],
- }
- match_rows.append(display_row)
- continue
-
- for original_url, needles in url_needles.items():
- if len(match_rows) >= max_rows:
- break
- if (original_url, str(backend_name)) in seen_pairs:
- continue
-
- backend_hits: List[Dict[str, Any]] = []
- for needle in (needles or [])[:3]:
- try:
- backend_hits = backend.search(f"url:{needle}", limit=1) or []
- if backend_hits:
- break
- except Exception:
- continue
-
- if not backend_hits:
- continue
-
- seen_pairs.add((original_url, str(backend_name)))
- matched_urls.add(original_url)
- hit = backend_hits[0]
- title = hit.get("title") or hit.get("name") or hit.get("target") or hit.get("path") or "(exists)"
- file_hash = hit.get("hash") or hit.get("file_hash") or hit.get("sha256") or ""
-
- try:
- from SYS.result_table import build_display_row
- except Exception:
- build_display_row = None # type: ignore
-
- extracted = {
- "title": str(title),
- "store": str(hit.get("store") or backend_name),
- "hash": str(file_hash or ""),
- "ext": "",
- "size": None,
- }
- if callable(build_display_row):
- try:
- extracted = build_display_row(hit, keys=["title", "store", "hash", "ext", "size"])
- except Exception:
- pass
- extracted["title"] = str(title)
- extracted["store"] = str(hit.get("store") or backend_name)
- extracted["hash"] = str(file_hash or "")
-
- ext = extracted.get("ext")
- size_val = extracted.get("size")
-
- display_row = {
- "title": str(title),
- "store": str(hit.get("store") or backend_name),
- "hash": str(file_hash or ""),
- "ext": str(ext or ""),
- "size": size_val,
- "url": original_url,
- "columns": [
- ("Title", str(title)),
- ("Store", str(hit.get("store") or backend_name)),
- ("Hash", str(file_hash or "")),
- ("Ext", str(ext or "")),
- ("Size", size_val),
- ("URL", original_url),
- ],
- }
- match_rows.append(display_row)
-
- if not match_rows:
- debug("Bulk URL preflight: no matches")
- return True
-
- table = ResultTable(f"URL already exists ({len(matched_urls)} url(s))", max_columns=10)
- table.set_no_choice(True)
- try:
- table.set_preserve_order(True)
- except Exception:
- pass
-
- for row in match_rows:
- table.add_result(row)
-
- try:
- pipeline_context.set_last_result_table_overlay(table, match_rows)
- except Exception:
- pass
-
- suspend = getattr(pipeline_context, "suspend_live_progress", None)
- cm: AbstractContextManager[Any] = nullcontext()
- if callable(suspend):
- try:
- maybe_cm = suspend()
- if maybe_cm is not None:
- cm = maybe_cm # type: ignore[assignment]
- except Exception:
- cm = nullcontext()
-
- with cm:
- get_stderr_console().print(table)
- setattr(table, "_rendered_by_cmdlet", True)
- answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()))
-
- if in_pipeline:
- try:
- existing = pipeline_context.load_value("preflight", default=None)
- except Exception:
- existing = None
- preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {}
- url_dup_cache = preflight_cache.get("url_duplicates")
- if not isinstance(url_dup_cache, dict):
- url_dup_cache = {}
- url_dup_cache["command"] = str(current_cmd_text or "")
- url_dup_cache["continue"] = bool(answered_yes)
- preflight_cache["url_duplicates"] = url_dup_cache
- try:
- pipeline_context.store_value("preflight", preflight_cache)
- except Exception:
- pass
-
- if not answered_yes:
- if in_pipeline:
- try:
- pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
- except Exception:
- pass
- return False
- return True
def _maybe_show_playlist_table(self, *, url: str, ytdlp_tool: YtDlpTool) -> bool:
try:
@@ -3675,12 +2438,8 @@ class Download_File(Cmdlet):
candidate = str(raw_url[0] or "").strip()
low = candidate.lower()
looks_like_url = low.startswith(("http://", "https://", "ftp://"))
- looks_like_provider = low.startswith(
- ("magnet:",
- "alldebrid:",
- "hydrus:",
- "ia:",
- "internetarchive:")
+ looks_like_provider = (
+ ":" in candidate and not candidate.startswith(("http:", "https:", "ftp:", "ftps:", "file:"))
)
looks_like_windows_path = (
(len(candidate) >= 2 and candidate[1] == ":")
@@ -3698,25 +2457,18 @@ class Download_File(Cmdlet):
log("No url or piped items to download", file=sys.stderr)
return 1
- # Internet Archive details URLs should present a downloadable file picker
- # before we try any streaming/ytdlp probing.
- try:
- quiet_mode = (
- bool(config.get("_quiet_background_output"))
- if isinstance(config, dict) else False
- )
- except Exception:
- quiet_mode = False
- ia_picker_exit = ia_provider.maybe_show_formats_table(
+ registry = self._load_provider_registry()
+
+ # Provider-pre-check (e.g. Internet Archive format picker)
+ picker_result = self._maybe_show_provider_picker(
raw_urls=raw_url,
piped_items=piped_items,
parsed=parsed,
config=config,
- quiet_mode=quiet_mode,
- get_field=get_field,
+ registry=registry,
)
- if ia_picker_exit is not None:
- return int(ia_picker_exit)
+ if picker_result is not None:
+ return int(picker_result)
streaming_candidates = self._append_urls_from_piped_result(list(raw_url), result)
supported_streaming, unsupported_streaming = self._filter_supported_urls(streaming_candidates)
@@ -3740,21 +2492,16 @@ class Download_File(Cmdlet):
if not raw_url and not piped_items:
return int(streaming_exit_code or 0)
- quiet_mode = (
- bool(config.get("_quiet_background_output"))
- if isinstance(config,
- dict) else False
- )
- ia_picker_exit = ia_provider.maybe_show_formats_table(
+ # Re-check picker if partial processing occurred
+ picker_result = self._maybe_show_provider_picker(
raw_urls=raw_url,
piped_items=piped_items,
parsed=parsed,
config=config,
- quiet_mode=quiet_mode,
- get_field=get_field,
+ registry=registry,
)
- if ia_picker_exit is not None:
- return int(ia_picker_exit)
+ if picker_result is not None:
+ return int(picker_result)
# Get output directory
final_output_dir = self._resolve_output_dir(parsed, config)
@@ -3775,8 +2522,6 @@ class Download_File(Cmdlet):
items_preview=preview
)
- registry = self._load_provider_registry()
-
downloaded_count = 0
# Special-case: support selection-inserted magnet-id arg to drive provider downloads
@@ -3917,6 +2662,58 @@ class Download_File(Cmdlet):
pass
progress.close_local_ui(force_complete=True)
+ def _maybe_show_provider_picker(
+ self,
+ *,
+ raw_urls: Sequence[str],
+ piped_items: Sequence[Any],
+ parsed: Dict[str, Any],
+ config: Dict[str, Any],
+ registry: Dict[str, Any],
+ ) -> Optional[int]:
+ """Generic hook for providers to show a selection table (e.g. Internet Archive format picker)."""
+ total_inputs = len(raw_urls or []) + len(piped_items or [])
+ if total_inputs != 1:
+ return None
+
+ target_url = None
+ if raw_urls:
+ target_url = str(raw_urls[0])
+ elif piped_items:
+ target_url = str(get_field(piped_items[0], "path") or get_field(piped_items[0], "url") or "")
+
+ if not target_url:
+ return None
+
+ match_provider_name_for_url = registry.get("match_provider_name_for_url")
+ get_provider = registry.get("get_provider")
+
+ provider_name = None
+ if match_provider_name_for_url:
+ try:
+ provider_name = match_provider_name_for_url(target_url)
+ except Exception:
+ pass
+
+ if provider_name and get_provider:
+ provider = get_provider(provider_name, config)
+ if provider and hasattr(provider, "maybe_show_picker"):
+ try:
+ quiet_mode = bool(config.get("_quiet_background_output"))
+ res = provider.maybe_show_picker(
+ url=target_url,
+ item=piped_items[0] if piped_items else None,
+ parsed=parsed,
+ config=config,
+ quiet_mode=quiet_mode,
+ )
+ if res is not None:
+ return int(res)
+ except Exception as e:
+ debug(f"Provider {provider_name} picker error: {e}")
+
+ return None
+
def _resolve_output_dir(self,
parsed: Dict[str,
Any],