diff --git a/API/Tidal.py b/API/Tidal.py index aba691c..bf03473 100644 --- a/API/Tidal.py +++ b/API/Tidal.py @@ -3,6 +3,7 @@ from __future__ import annotations from typing import Any, Dict, List, Optional, Set from .base import API, ApiError +from SYS.logger import debug DEFAULT_BASE_URL = "https://tidal-api.binimum.org" @@ -241,21 +242,24 @@ class Tidal(API): # 1. Fetch info (metadata) - fetch raw to ensure all fields are available for merging info_resp = self._get_json("info/", params={"id": track_int}) + debug(f"[API.Tidal] info_resp (len={len(str(info_resp))}): {info_resp}") info_data = info_resp.get("data") if isinstance(info_resp, dict) else info_resp if not isinstance(info_data, dict) or "id" not in info_data: info_data = info_resp if isinstance(info_resp, dict) and "id" in info_resp else {} # 2. Fetch track (manifest/bit depth) track_resp = self.track(track_id) + debug(f"[API.Tidal] track_resp (len={len(str(track_resp))}): {track_resp}") # Note: track() method in this class currently returns raw JSON, so we handle it similarly. track_data = track_resp.get("data") if isinstance(track_resp, dict) else track_resp - if not isinstance(track_data, dict) or "id" not in track_data: - track_data = track_resp if isinstance(track_resp, dict) and "id" in track_resp else {} + if not isinstance(track_data, dict): + track_data = track_resp if isinstance(track_resp, dict) else {} # 3. Fetch lyrics lyrics_data = {} try: lyr_resp = self.lyrics(track_id) + debug(f"[API.Tidal] lyrics_resp (len={len(str(lyr_resp))}): {lyr_resp}") lyrics_data = lyr_resp.get("lyrics") or lyr_resp if isinstance(lyr_resp, dict) else {} except Exception: pass @@ -267,18 +271,24 @@ class Tidal(API): if isinstance(track_data, dict): merged_md.update(track_data) + debug(f"[API.Tidal] merged_md keys: {list(merged_md.keys())}") + # Derived tags and normalized/parsed info tags = build_track_tags(merged_md) + debug(f"[API.Tidal] generated tags: {tags}") parsed_info = parse_track_item(merged_md) # Structure for return - return { + res = { "metadata": merged_md, "parsed": parsed_info, "tags": list(tags), "lyrics": lyrics_data, } + debug(f"[API.Tidal] returning full_track_metadata keys: {list(res.keys())}") + return res # Legacy alias for TidalApiClient TidalApiClient = Tidal +HifiApiClient = Tidal diff --git a/CLI.py b/CLI.py index f3e6154..125b20e 100644 --- a/CLI.py +++ b/CLI.py @@ -3731,18 +3731,32 @@ class PipelineExecutor: if emits: try: from cmdlet import _shared as sh + from SYS import models + # 1. Apply -path persistence (moves temp files to final destination) emits = sh.apply_output_path_from_pipeobjects( cmd_name=cmd_name, args=list(stage_args), emits=emits, ) + + # 2. METADATA STICKINESS / PROPAGATION + # We normalize all emitted items and merge metadata/tags from the previous stage. + # This ensures info like track titles/lyrics survive downloads/conversions. + # See cmdlet._shared.propagate_metadata for the merge logic. + prev_items = piped_result + if not isinstance(prev_items, (list, tuple)): + prev_items = [prev_items] if prev_items else [] + + emits = sh.propagate_metadata(prev_items, emits) + try: pipeline_ctx.emits = list(emits) except Exception: pass except Exception: pass + if emits: # If the cmdlet already installed an overlay table (e.g. get-tag), # don't overwrite it: set_last_result_items_only() would clear the diff --git a/Provider/HIFI.py b/Provider/HIFI.py index 23ebe79..7b6795f 100644 --- a/Provider/HIFI.py +++ b/Provider/HIFI.py @@ -13,13 +13,15 @@ from typing import Any, Dict, Iterable, List, Optional, Tuple from urllib.parse import urlparse from API.Tidal import ( - HifiApiClient, + Tidal as TidalApiClient, build_track_tags, coerce_duration_seconds, extract_artists, stringify, ) from ProviderCore.base import Provider, SearchResult, parse_inline_query_arguments +from ProviderCore.inline_utils import collect_choice +from cmdlet._shared import get_field from SYS import pipeline as pipeline_context from SYS.logger import debug, log @@ -64,7 +66,9 @@ def _format_total_seconds(seconds: Any) -> str: return f"{mins}:{secs:02d}" -class Tidal(Provider): +class HIFI(Provider): + + PROVIDER_NAME = "hifi" TABLE_AUTO_STAGES = { "hifi.track": ["download-file"], @@ -97,7 +101,7 @@ class Tidal(Provider): self.api_timeout = float(self.config.get("timeout", 10.0)) except Exception: self.api_timeout = 10.0 - self.api_clients = [HifiApiClient(base_url=url, timeout=self.api_timeout) for url in self.api_urls] + self.api_clients = [TidalApiClient(base_url=url, timeout=self.api_timeout) for url in self.api_urls] def extract_query_arguments(self, query: str) -> Tuple[str, Dict[str, Any]]: normalized, parsed = parse_inline_query_arguments(query) @@ -281,7 +285,7 @@ class Tidal(Provider): if isinstance(detail, dict): title = self._stringify(detail.get("title")) or title - return SearchResult( + res = SearchResult( table="hifi.track", title=title, path=f"hifi://track/{track_id}", @@ -291,6 +295,12 @@ class Tidal(Provider): full_metadata=dict(detail) if isinstance(detail, dict) else {}, selection_args=["-url", f"hifi://track/{track_id}"], ) + if isinstance(detail, dict): + try: + res.tag = self._build_track_tags(detail) + except Exception: + pass + return res def _extract_artist_selection_context(self, selected_items: List[Any]) -> List[Tuple[int, str]]: contexts: List[Tuple[int, str]] = [] @@ -1130,25 +1140,36 @@ class Tidal(Provider): md = dict(getattr(result, "full_metadata") or {}) track_id = self._extract_track_id_from_result(result) - if track_id: + debug(f"[hifi] download: track_id={track_id}, manifest_present={bool(md.get('manifest'))}, tag_count={len(result.tag) if result.tag else 0}") + + # Enrichment: fetch full metadata if manifest or detailed info (like tags/lyrics) is missing. + # We check for 'manifest' because it's required for DASH playback. + # We also check for lyrics/subtitles to ensure they are available for add-file. + has_lyrics = bool(md.get("_tidal_lyrics_subtitles")) or bool(md.get("lyrics")) + + if track_id and (not md.get("manifest") or not md.get("artist") or len(result.tag or []) <= 1 or not has_lyrics): + debug(f"[hifi] Enriching track data (reason: manifest={not md.get('manifest')}, lyrics={not has_lyrics}, tags={len(result.tag or [])})") # Multi-part enrichment from API: metadata, tags, and lyrics. full_data = self._fetch_all_track_data(track_id) + debug(f"[hifi] download: enrichment full_data present={bool(full_data)}") if isinstance(full_data, dict): # 1. Update metadata api_md = full_data.get("metadata") if isinstance(api_md, dict): + debug(f"[hifi] download: updating metadata with {len(api_md)} keys") md.update(api_md) # 2. Update tags (re-sync result.tag so cmdlet sees them) api_tags = full_data.get("tags") + debug(f"[hifi] download: enrichment tags={api_tags}") if isinstance(api_tags, list) and api_tags: result.tag = set(api_tags) # 3. Handle lyrics - lyrics = full_data.get("lyrics") - if isinstance(lyrics, dict) and lyrics: - md.setdefault("lyrics", lyrics) - subtitles = lyrics.get("subtitles") + lyrics_dict = full_data.get("lyrics") + if isinstance(lyrics_dict, dict) and lyrics_dict: + md.setdefault("lyrics", lyrics_dict) + subtitles = lyrics_dict.get("subtitles") if isinstance(subtitles, str) and subtitles.strip(): md["_tidal_lyrics_subtitles"] = subtitles.strip() @@ -1328,7 +1349,7 @@ class Tidal(Provider): return False, None - def _get_api_client_for_base(self, base_url: str) -> Optional[HifiApiClient]: + def _get_api_client_for_base(self, base_url: str) -> Optional[TidalApiClient]: base = base_url.rstrip("/") for client in self.api_clients: if getattr(client, "base_url", "").rstrip("/") == base: @@ -1739,6 +1760,10 @@ class Tidal(Provider): or payload.get("path") or payload.get("url") ) + # Guard against method binding (e.g. str.title) being returned by getattr(str, "title") + if callable(title): + title = None + if not title: title = f"Track {track_id}" path = ( @@ -1983,12 +2008,6 @@ class Tidal(Provider): return True - # Optimization: If we are selecting tracks, do NOT force a "Detail View" (resolving manifest) here. - # This allows batch selection to flow immediately to `download-file` (via TABLE_AUTO_STAGES) - # or other downstream cmdlets. The download logic (HIFI.download) handles manifest resolution locally. - if table_type == "hifi.track" or (is_generic_hifi and any(str(get_field(i, "path")).startswith("hifi://track/") for i in selected_items)): - return False - contexts = self._extract_track_selection_context(selected_items) try: debug(f"[hifi.selector] track contexts={len(contexts)}") diff --git a/Provider/internetarchive.py b/Provider/internetarchive.py index 6dbbba5..d92358e 100644 --- a/Provider/internetarchive.py +++ b/Provider/internetarchive.py @@ -501,6 +501,26 @@ class InternetArchive(Provider): "internetarchive.formats": ["download-file"], } + def maybe_show_picker( + self, + *, + url: str, + item: Optional[Any] = None, + parsed: Dict[str, Any], + config: Dict[str, Any], + quiet_mode: bool, + ) -> Optional[int]: + """Generic hook for download-file to show a selection table for IA items.""" + from cmdlet._shared import get_field as sh_get_field + return maybe_show_formats_table( + raw_urls=[url] if url else [], + piped_items=[item] if item else [], + parsed=parsed, + config=config, + quiet_mode=quiet_mode, + get_field=sh_get_field, + ) + def __init__(self, config: Optional[Dict[str, Any]] = None): super().__init__(config) conf = _pick_provider_config(self.config) diff --git a/cmdlet/_shared.py b/cmdlet/_shared.py index 67e55df..0a47cee 100644 --- a/cmdlet/_shared.py +++ b/cmdlet/_shared.py @@ -11,11 +11,16 @@ import sys import tempfile from collections.abc import Iterable as IterableABC -from SYS.logger import log +from SYS.logger import log, debug from pathlib import Path from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set from dataclasses import dataclass, field from SYS import models +from SYS import pipeline as pipeline_context +from SYS.result_table import ResultTable +from SYS.rich_display import stderr_console as get_stderr_console +from rich.prompt import Confirm +from contextlib import AbstractContextManager, nullcontext @dataclass @@ -2405,6 +2410,117 @@ def coerce_to_pipe_object( return pipe_obj +def propagate_metadata( + previous_items: Sequence[Any], + new_items: Sequence[Any] +) -> List[Any]: + """Merge metadata/tags from previous pipeline stage into new items. + + Implements "sticky metadata": items generated by a transformation (download, convert) + should inherit rich info (lyrics, art, tags) from their source. + + Strategies: + A. Hash Match: If inputs/outputs share a hash, they are the same item. + B. Index Match: If lists are same length, assume 1:1 mapping (heuristic). + C. Explicit Parent: If output has `parent_hash`, link to input with that hash. + """ + if not previous_items or not new_items: + return list(new_items) + + try: + prev_normalized = [coerce_to_pipe_object(p) for p in previous_items] + except Exception: + return list(new_items) + + prev_by_hash: Dict[str, models.PipeObject] = {} + for p_obj in prev_normalized: + if p_obj.hash and p_obj.hash != "unknown": + prev_by_hash[p_obj.hash] = p_obj + + normalized: List[models.PipeObject] = [] + + # Pre-calculate length matching for heuristic + is_same_length = len(new_items) == len(prev_normalized) + + for i, item in enumerate(new_items): + try: + obj = coerce_to_pipe_object(item) + except Exception: + normalized.append(item) # Should not happen given coerce guards + continue + + parent: Optional[models.PipeObject] = None + + # Strategy A: Precise Hash Match + if obj.hash in prev_by_hash: + parent = prev_by_hash[obj.hash] + + # Strategy B: Index Match (Heuristic) + if not parent and is_same_length: + parent = prev_normalized[i] + + # Strategy C: Explicit Parent Hash + if not parent and obj.parent_hash and obj.parent_hash in prev_by_hash: + parent = prev_by_hash[obj.parent_hash] + + if parent: + # 1. Tags: Merge unique tags + if parent.tag: + if not obj.tag: + obj.tag = list(parent.tag) + else: + curr_tags = {str(t).lower() for t in obj.tag} + for pt in parent.tag: + if str(pt).lower() not in curr_tags: + obj.tag.append(pt) + + # 2. Metadata: Merge missing keys + if parent.metadata: + if not obj.metadata: + obj.metadata = parent.metadata.copy() + else: + for mk, mv in parent.metadata.items(): + if mk not in obj.metadata: + obj.metadata[mk] = mv + + # 3. Source URL: Propagate if missing + if parent.source_url and not obj.source_url: + obj.source_url = parent.source_url + elif parent.url and not obj.source_url and not obj.url: + # If parent had a URL and child has none, it's likely the source + obj.source_url = parent.url + + # 4. Relationships: Merge missing keys + if parent.relationships: + if not obj.relationships: + obj.relationships = parent.relationships.copy() + else: + for rk, rv in parent.relationships.items(): + if rk not in obj.relationships: + obj.relationships[rk] = rv + + # 5. Extra (Notes/etc): Merge missing keys + # Important for passing 'notes' payload (lyrics, captions) + if parent.extra: + if not obj.extra: + obj.extra = parent.extra.copy() + else: + # Recursive merge for 'notes' dict specifically? + # For now just shallow merge keys, but handle 'notes' specially if valid. + for ek, ev in parent.extra.items(): + if ek not in obj.extra: + obj.extra[ek] = ev + elif ek == "notes" and isinstance(ev, dict) and isinstance(obj.extra[ek], dict): + # Merge notes dict + for nk, nv in ev.items(): + if nk not in obj.extra[ek]: + obj.extra[ek][nk] = nv + + normalized.append(obj) + + return normalized + + def register_url_with_local_library( pipe_obj: models.PipeObject, config: Dict[str, @@ -2518,12 +2634,12 @@ def resolve_tidal_manifest_path(item: Any) -> Optional[str]: if candidate_path: m = re.search( - r"tidal:(?://)?track[\\/](\d+)", + r"(tidal|hifi):(?://)?track[\\/](\d+)", str(candidate_path), flags=re.IGNORECASE, ) if m: - track_id = m.group(1) + track_id = m.group(2) if (not already) and track_id is not None: try: @@ -2706,3 +2822,327 @@ def resolve_tidal_manifest_path(item: Any) -> Optional[str]: return None return str(target_path) + +def check_url_exists_in_storage( + urls: Sequence[str], + storage: Any, + hydrus_available: bool, + final_output_dir: Optional[Path] = None, +) -> bool: + """Pre-flight check to see if URLs already exist in storage. + + Args: + urls: List of URLs to check + storage: The storage interface + hydrus_available: Whether Hydrus is available + final_output_dir: Final output directory (to skip if same as storage) + + Returns: + True if check passed (user said yes or no dups), False if user said no (stop). + """ + if storage is None: + debug("Bulk URL preflight skipped: storage unavailable") + return True + + try: + current_cmd_text = pipeline_context.get_current_command_text("") + except Exception: + current_cmd_text = "" + + try: + stage_ctx = pipeline_context.get_stage_context() + except Exception: + stage_ctx = None + + in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or ""))) + if in_pipeline: + try: + cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="") + cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None) + except Exception: + cached_cmd = "" + cached_decision = None + + if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""): + if bool(cached_decision): + return True + try: + pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) + except Exception: + pass + return False + + unique_urls: List[str] = [] + for u in urls or []: + s = str(u or "").strip() + if s and s not in unique_urls: + unique_urls.append(s) + if len(unique_urls) == 0: + return True + + try: + from SYS.metadata import normalize_urls + except Exception: + normalize_urls = None # type: ignore[assignment] + + def _httpish(value: str) -> bool: + try: + return bool(value) and (value.startswith("http://") or value.startswith("https://")) + except Exception: + return False + + url_needles: Dict[str, List[str]] = {} + for u in unique_urls: + needles: List[str] = [] + if normalize_urls is not None: + try: + needles.extend([n for n in (normalize_urls(u) or []) if isinstance(n, str)]) + except Exception: + needles = [] + if not needles: + needles = [u] + filtered: List[str] = [] + for n in needles: + n2 = str(n or "").strip() + if not n2: + continue + if not _httpish(n2): + continue + if n2 not in filtered: + filtered.append(n2) + url_needles[u] = filtered if filtered else [u] + + backend_names: List[str] = [] + try: + backend_names_all = storage.list_searchable_backends() + except Exception: + backend_names_all = [] + + for backend_name in backend_names_all: + try: + backend = storage[backend_name] + except Exception: + continue + + try: + if str(backend_name).strip().lower() == "temp": + continue + except Exception: + pass + + try: + backend_location = getattr(backend, "_location", None) + if backend_location and final_output_dir: + backend_path = Path(str(backend_location)).expanduser().resolve() + temp_path = Path(str(final_output_dir)).expanduser().resolve() + if backend_path == temp_path: + continue + except Exception: + pass + + backend_names.append(backend_name) + + if not backend_names: + debug("Bulk URL preflight skipped: no searchable backends") + return True + + seen_pairs: set[tuple[str, str]] = set() + matched_urls: set[str] = set() + match_rows: List[Dict[str, Any]] = [] + max_rows = 200 + + try: + from Store.HydrusNetwork import HydrusNetwork + except Exception: + HydrusNetwork = None # type: ignore + + for backend_name in backend_names: + if len(match_rows) >= max_rows: + break + try: + backend = storage[backend_name] + except Exception: + continue + + if HydrusNetwork is not None and isinstance(backend, HydrusNetwork): + if not hydrus_available: + continue + + client = getattr(backend, "_client", None) + if client is None: + continue + + for original_url, needles in url_needles.items(): + if len(match_rows) >= max_rows: + break + if (original_url, str(backend_name)) in seen_pairs: + continue + + found_hash: Optional[str] = None + found = False + for needle in (needles or [])[:3]: + if not _httpish(needle): + continue + try: + from API.HydrusNetwork import HydrusRequestSpec + + spec = HydrusRequestSpec( + method="GET", + endpoint="/add_urls/get_url_files", + query={"url": needle}, + ) + # Access internal client safely if possible, else skip check + if hasattr(client, "_perform_request"): + response = client._perform_request(spec) + raw_hashes = None + if isinstance(response, dict): + raw_hashes = response.get("hashes") or response.get("file_hashes") + raw_ids = response.get("file_ids") + has_ids = isinstance(raw_ids, list) and len(raw_ids) > 0 + has_hashes = isinstance(raw_hashes, list) and len(raw_hashes) > 0 + if has_hashes: + try: + found_hash = str(raw_hashes[0]).strip() + except Exception: + found_hash = None + if has_ids or has_hashes: + found = True + break + except Exception: + continue + + if not found: + continue + + seen_pairs.add((original_url, str(backend_name))) + matched_urls.add(original_url) + display_row = { + "title": "(exists)", + "store": str(backend_name), + "hash": found_hash or "", + "url": original_url, + "columns": [ + ("Title", "(exists)"), + ("Store", str(backend_name)), + ("Hash", found_hash or ""), + ("URL", original_url), + ], + } + match_rows.append(display_row) + continue + + for original_url, needles in url_needles.items(): + if len(match_rows) >= max_rows: + break + if (original_url, str(backend_name)) in seen_pairs: + continue + + backend_hits: List[Dict[str, Any]] = [] + for needle in (needles or [])[:3]: + try: + backend_hits = backend.search(f"url:{needle}", limit=1) or [] + if backend_hits: + break + except Exception: + continue + + if not backend_hits: + continue + + seen_pairs.add((original_url, str(backend_name))) + matched_urls.add(original_url) + hit = backend_hits[0] + title = hit.get("title") or hit.get("name") or hit.get("target") or hit.get("path") or "(exists)" + file_hash = hit.get("hash") or hit.get("file_hash") or hit.get("sha256") or "" + + try: + from SYS.result_table import build_display_row + extracted = build_display_row(hit, keys=["title", "store", "hash", "ext", "size"]) + except Exception: + extracted = {} + + extracted["title"] = str(title) + extracted["store"] = str(hit.get("store") or backend_name) + extracted["hash"] = str(file_hash or "") + + ext = extracted.get("ext") + size_val = extracted.get("size") + + display_row = { + "title": str(title), + "store": str(hit.get("store") or backend_name), + "hash": str(file_hash or ""), + "ext": str(ext or ""), + "size": size_val, + "url": original_url, + "columns": [ + ("Title", str(title)), + ("Store", str(hit.get("store") or backend_name)), + ("Hash", str(file_hash or "")), + ("Ext", str(ext or "")), + ("Size", size_val), + ("URL", original_url), + ], + } + match_rows.append(display_row) + + if not match_rows: + debug("Bulk URL preflight: no matches") + return True + + table = ResultTable(f"URL already exists ({len(matched_urls)} url(s))", max_columns=10) + table.set_no_choice(True) + try: + table.set_preserve_order(True) + except Exception: + pass + + for row in match_rows: + table.add_result(row) + + try: + pipeline_context.set_last_result_table_overlay(table, match_rows) + except Exception: + pass + + suspend = getattr(pipeline_context, "suspend_live_progress", None) + cm: AbstractContextManager[Any] = nullcontext() + if callable(suspend): + try: + maybe_cm = suspend() + if maybe_cm is not None: + cm = maybe_cm # type: ignore[assignment] + except Exception: + cm = nullcontext() + + with cm: + get_stderr_console().print(table) + setattr(table, "_rendered_by_cmdlet", True) + answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console())) + + if in_pipeline: + try: + existing = pipeline_context.load_value("preflight", default=None) + except Exception: + existing = None + preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {} + url_dup_cache = preflight_cache.get("url_duplicates") + if not isinstance(url_dup_cache, dict): + url_dup_cache = {} + url_dup_cache["command"] = str(current_cmd_text or "") + url_dup_cache["continue"] = bool(answered_yes) + preflight_cache["url_duplicates"] = url_dup_cache + try: + pipeline_context.store_value("preflight", preflight_cache) + except Exception: + pass + + if not answered_yes: + if in_pipeline: + try: + pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) + except Exception: + pass + return False + return True + diff --git a/cmdlet/add_note.py b/cmdlet/add_note.py index 7c61bbb..fced5e1 100644 --- a/cmdlet/add_note.py +++ b/cmdlet/add_note.py @@ -209,11 +209,8 @@ class Add_Note(Cmdlet): note_name = str(note_name or "").strip() note_text = str(note_text or "").strip() if not note_name or not note_text: - log( - "[add_note] Error: -query must include title: and text:<text>", - file=sys.stderr, - ) - return 1 + pass # We now support implicit pipeline notes if -query is missing + # But if explicit targeting (store+hash) is used, we still demand args below. if hash_override and not store_override: log( @@ -224,6 +221,14 @@ class Add_Note(Cmdlet): explicit_target = bool(hash_override and store_override) results = normalize_result_input(result) + + if explicit_target and (not note_name or not note_text): + log( + "[add_note] Error: Explicit target (store+hash) requires -query with title/text", + file=sys.stderr, + ) + return 1 + if results and explicit_target: # Direct targeting mode: apply note once to the explicit target and # pass through any piped items unchanged. @@ -287,7 +292,36 @@ class Add_Note(Cmdlet): ctx.emit(res) continue - item_note_text = note_text + # Determine notes to write for this item + notes_to_write: List[Tuple[str, str]] = [] + + # 1. Explicit arguments always take precedence + if note_name and note_text: + notes_to_write.append((note_name, note_text)) + + # 2. Pipeline notes auto-ingestion + # Look for 'notes' dictionary in the item (propagated by pipeline/download-file) + # Structure: {'notes': {'lyric': '...', 'sub': '...'}} + # Check both root and nested 'extra' + + # Check root 'notes' (dict or extra.notes) + pipeline_notes = res.get("notes") + if not isinstance(pipeline_notes, dict): + extra = res.get("extra") + if isinstance(extra, dict): + pipeline_notes = extra.get("notes") + + if isinstance(pipeline_notes, dict): + for k, v in pipeline_notes.items(): + # If arg-provided note conflicts effectively with pipeline note? + # We just append both. + if v and str(v).strip(): + notes_to_write.append((str(k), str(v))) + + if not notes_to_write: + # Pass through items that have nothing to add + ctx.emit(res) + continue store_name = str(store_override or res.get("store") or "").strip() raw_hash = res.get("hash") @@ -298,7 +332,7 @@ class Add_Note(Cmdlet): "[add_note] Error: Missing -store and item has no store field", file=sys.stderr ) - return 1 + continue resolved_hash = self._resolve_hash( raw_hash=str(raw_hash) if raw_hash else None, @@ -312,80 +346,43 @@ class Add_Note(Cmdlet): ) ctx.emit(res) continue - - try: - backend = store_registry[store_name] - except Exception as exc: - log( - f"[add_note] Error: Unknown store '{store_name}': {exc}", - file=sys.stderr - ) - return 1 - - # Queue for bulk write per store. We still emit items immediately; - # the pipeline only advances after this cmdlet returns. - note_ops.setdefault(store_name, - []).append((resolved_hash, - note_name, - item_note_text)) - planned_ops += 1 - + + # Queue operations + if store_name not in note_ops: + note_ops[store_name] = [] + + for (n_name, n_text) in notes_to_write: + note_ops[store_name].append((resolved_hash, n_name, n_text)) + planned_ops += 1 + ctx.emit(res) - # Execute bulk writes per store. - successful_writes = 0 + + # Execute batch operations + success_count = 0 for store_name, ops in note_ops.items(): - if not ops: - continue try: backend = store_registry[store_name] - except Exception: - continue + if not hasattr(backend, "set_note"): + log(f"[add_note] Store '{store_name}' does not support notes", file=sys.stderr) + continue + + for (h, name, text) in ops: + try: + if backend.set_note(h, name, text, config=config): + success_count += 1 + except Exception as e: + log(f"[add_note] Write failed {store_name}:{h} ({name}): {e}", file=sys.stderr) + + except Exception as e: + log(f"[add_note] Store access failed '{store_name}': {e}", file=sys.stderr) - store_success = 0 - bulk_fn = getattr(backend, "set_note_bulk", None) - if callable(bulk_fn): - try: - ok = bool(bulk_fn(list(ops), config=config)) - if ok: - store_success += len(ops) - ctx.print_if_visible( - f"✓ add-note: {len(ops)} item(s) in '{store_name}'", - file=sys.stderr - ) - successful_writes += store_success - continue - log( - f"[add_note] Warning: bulk set_note returned False for '{store_name}'", - file=sys.stderr, - ) - except Exception as exc: - log( - f"[add_note] Warning: bulk set_note failed for '{store_name}': {exc}; falling back", - file=sys.stderr, - ) - - # Fallback: per-item writes - for file_hash, name, text in ops: - try: - ok = bool(backend.set_note(file_hash, name, text, config=config)) - if ok: - store_success += 1 - except Exception: - continue - - if store_success: - successful_writes += store_success - ctx.print_if_visible( - f"✓ add-note: {store_success} item(s) in '{store_name}'", - file=sys.stderr - ) - - log( - f"[add_note] Updated {successful_writes}/{planned_ops} item(s)", - file=sys.stderr - ) - return 0 if successful_writes > 0 else 1 + if planned_ops > 0: + msg = f"✓ add-note: Updated {success_count}/{planned_ops} notes across {len(note_ops)} stores" + ctx.print_if_visible(msg, file=sys.stderr) + + return 0 CMDLET = Add_Note() + diff --git a/cmdlet/download_file.py b/cmdlet/download_file.py index 05eedfc..4e8a82e 100644 --- a/cmdlet/download_file.py +++ b/cmdlet/download_file.py @@ -17,10 +17,6 @@ from contextlib import AbstractContextManager, nullcontext import requests -from Provider import internetarchive as ia_provider -from Provider import alldebrid as ad_provider -from Provider import openlibrary as ol_provider - from API.HTTP import _download_direct_file from SYS.models import DownloadError, DownloadOptions, DownloadMediaResult from SYS.logger import log, debug @@ -152,639 +148,71 @@ class Download_File(Cmdlet): get_provider = registry.get("get_provider") match_provider_name_for_url = registry.get("match_provider_name_for_url") - context_items_list: List[Any] - try: - context_items_list = list(context_items) if context_items else [] - except Exception: - context_items_list = [] - for url in raw_urls: try: debug(f"Processing URL: {url}") - - # Telegram message URLs are not direct files; route through the provider. - try: - parsed_url = urlparse(str(url)) - host = (parsed_url.hostname or "").lower().strip() - except Exception: - host = "" - - is_telegram = host in {"t.me", - "telegram.me"} or host.endswith(".t.me") - if is_telegram and SearchResult: - try: - from ProviderCore.registry import get_provider as _get_provider - except Exception: - _get_provider = None - - if _get_provider is None: - raise DownloadError("Telegram provider registry not available") - - provider = _get_provider("telegram", config) - if provider is None: - raise DownloadError( - "Telegram provider not configured or not available (check telethon/app_id/api_hash)" - ) - - sr = SearchResult( - table="telegram", - title=str(url), - path=str(url), - full_metadata={} - ) - downloaded_path = None - telegram_info: Optional[Dict[str, Any]] = None - if hasattr(provider, "download_url"): - try: - downloaded_path, telegram_info = provider.download_url(str(url), final_output_dir) # type: ignore[attr-defined] - except Exception as exc: - raise DownloadError(str(exc)) - else: - downloaded_path = provider.download(sr, final_output_dir) - - if not downloaded_path: - raise DownloadError("Telegram download returned no file") - - channel = "" - post = None - if isinstance(telegram_info, dict): - try: - chat_info_raw = telegram_info.get("chat") - msg_info_raw = telegram_info.get("message") - chat_info: Dict[str, - Any] = ( - chat_info_raw - if isinstance(chat_info_raw, - dict) else {} - ) - msg_info: Dict[str, - Any] = ( - msg_info_raw - if isinstance(msg_info_raw, - dict) else {} - ) - channel = str( - chat_info.get("title") or chat_info.get("username") - or "" - ).strip() - post = msg_info.get("id") - except Exception: - channel = "" - post = None - - title_hint = None - tg_tags: List[str] = [] - if channel: - tg_tags.append(f"channel:{channel}") - if post is not None: - tg_tags.append(f"post:{post}") - if channel and post is not None: - title_hint = f"{channel} {post}" - elif post is not None: - title_hint = f"post:{post}" - else: - title_hint = downloaded_path.stem - - self._emit_local_file( - downloaded_path=downloaded_path, - source=str(url), - title_hint=title_hint, - tags_hint=tg_tags, - media_kind_hint="file", - full_metadata=telegram_info, - provider_hint="telegram", - progress=progress, - config=config, - ) - downloaded_count += 1 - debug("✓ Downloaded via Telegram provider and emitted") - continue - - # Provider URL routing (e.g. OpenLibrary book pages). + + # Check providers first provider_name = None - if match_provider_name_for_url is not None: + if match_provider_name_for_url: try: provider_name = match_provider_name_for_url(str(url)) - except Exception: - provider_name = None - - # Heuristic: LibGen often uses landing pages like edition.php/file.php. - # These should never be treated as direct file URLs. - if not provider_name: - try: - p = urlparse(str(url)) - h = (p.hostname or "").strip().lower() - path = (p.path or "").strip().lower() - if "libgen" in h and any(x in path for x in ( - "/edition.php", - "/file.php", - "/ads.php", - "/get.php", - "/series.php", )): - provider_name = "libgen" except Exception: pass - - provider_for_url = None - if provider_name and get_provider is not None: - provider_for_url = get_provider(provider_name, config) - - if provider_for_url is not None: + + provider = None + if provider_name and get_provider: + provider = get_provider(provider_name, config) + + if provider: + debug(f"Provider {provider_name} claimed {url}") try: - handled, handled_path = provider_for_url.handle_url( - str(url), - output_dir=final_output_dir, - ) - except Exception as exc: - raise DownloadError(str(exc)) - if handled: - if handled_path: - downloaded_path = Path(handled_path) - self._emit_local_file( - downloaded_path=downloaded_path, - source=str(url), - title_hint=downloaded_path.stem, - tags_hint=None, - media_kind_hint="file", - full_metadata=None, - provider_hint=str(provider_name), - progress=progress, - config=config, - ) - downloaded_count += 1 + # Try generic handle_url + if hasattr(provider, "handle_url"): + handled, path = provider.handle_url(str(url), output_dir=final_output_dir) + if handled: + if path: + self._emit_local_file( + downloaded_path=Path(str(path)), + source=str(url), + title_hint=Path(str(path)).stem, + tags_hint=None, + media_kind_hint="file", + full_metadata=None, + progress=progress, + config=config, + provider_hint=provider_name + ) + downloaded_count += 1 + continue + + # Try generic download_url + elif hasattr(provider, "download_url"): + downloaded_path = provider.download_url(str(url), final_output_dir) + if downloaded_path: + self._emit_local_file( + downloaded_path=Path(downloaded_path), + source=str(url), + title_hint=Path(str(downloaded_path)).stem, + tags_hint=None, + media_kind_hint="file", + full_metadata=None, + provider_hint=provider_name, + progress=progress, + config=config, + ) + downloaded_count += 1 + continue + + except Exception as e: + log(f"Provider {provider_name} error handling {url}: {e}", file=sys.stderr) + # Fallthrough to direct download? + # If a provider explicitly claimed it but failed, maybe we shouldn't fallback? + # But "barebones" implies robustness might be up to user. + # We'll continue to next URL. continue - if provider_name and get_provider is not None and SearchResult is not None: - # OpenLibrary URLs should be handled by the OpenLibrary provider. - if provider_name == "openlibrary": - url_str = str(url).strip() - provider = get_provider("openlibrary", config) - if provider is None: - raise DownloadError( - "OpenLibrary provider not configured or not available" - ) - - edition_id = ol_provider.edition_id_from_url(url_str) - title_hint = ol_provider.title_hint_from_url_slug(url_str) - - download_payload: Optional[Dict[str, Any]] = None - try: - ui, _pipe_idx = progress.ui_and_pipe_index() - progress_cb = None - if ui is not None: - # High-level steps for OpenLibrary borrow/download flow. - progress.begin_steps(5) - - def _progress( - kind: str, - done: int, - total: Optional[int], - label: str - ) -> None: - # kind: - # - "step": advance step text - # - "pages": update pipe percent/status - # - "bytes": update transfer bar - if kind == "step": - progress.step(label) - return - - if kind == "pages": - t = int(total) if isinstance(total, int) else 0 - d = int(done) if isinstance(done, int) else 0 - if t > 0: - pct = int( - round( - (max(0, - min(d, - t)) / max(1, - t)) * 100.0 - ) - ) - progress.set_percent(pct) - progress.set_status( - f"downloading pages {d}/{t}" - ) - else: - progress.set_status( - f"downloading pages {d}" - ) - return - - if kind == "bytes": - try: - lbl = str(label or "download") - except Exception: - lbl = "download" - progress.begin_transfer(label=lbl, total=total) - progress.update_transfer( - label=lbl, - completed=done, - total=total - ) - try: - if (isinstance(total, - int) and total > 0 - and int(done) >= int(total)): - progress.finish_transfer(label=lbl) - except Exception: - pass - return - - progress_cb = _progress - - # Prefer piped OpenLibrary context (selection row) when present so we keep - # resolved metadata like archive_id and availability. - ctx_item = None - ctx_md: Dict[str, Any] = {} - ctx_title: Optional[str] = None - ctx_tags: Optional[List[str]] = None - ctx_media_kind: Optional[str] = None - for candidate in context_items_list: - try: - table_val = get_field(candidate, "table") - except Exception: - table_val = None - if str(table_val or "").lower() != "openlibrary": - continue - - md_val = get_field(candidate, "full_metadata") - md_dict = md_val if isinstance(md_val, dict) else {} - cand_olid = str(md_dict.get("openlibrary_id") or md_dict.get("olid") or "").strip() - cand_archive = str(md_dict.get("archive_id") or "").strip() - cand_url = str( - get_field(candidate, "path") - or get_field(candidate, "url") - or md_dict.get("selection_url") - or "" - ).strip() - - matched = False - if edition_id and cand_olid and cand_olid == edition_id: - matched = True - elif cand_url and url_str and cand_url == url_str: - matched = True - elif (not edition_id) and cand_archive and cand_archive in url_str: - matched = True - - if matched: - ctx_item = candidate - ctx_md = md_dict - ctx_title = get_field(candidate, "title") - ctx_media_kind = get_field(candidate, "media_kind") - tags_val = get_field(candidate, "tag") - if isinstance(tags_val, list): - ctx_tags = [str(t) for t in tags_val if t] - break - - if ctx_item is not None and SearchResult is not None: - sr_meta = dict(ctx_md) if isinstance(ctx_md, dict) else {} - if edition_id and not sr_meta.get("openlibrary_id"): - sr_meta["openlibrary_id"] = edition_id - - sr_title = str(ctx_title or title_hint or "").strip() or title_hint - sr_media_kind = str(ctx_media_kind or "book") - - sr_obj = ( - ctx_item - if isinstance(ctx_item, SearchResult) - else SearchResult( - table="openlibrary", - title=sr_title, - path=url_str, - media_kind=sr_media_kind, - full_metadata=sr_meta, - ) - ) - - try: - sr_obj.path = url_str # type: ignore[attr-defined] - except Exception: - pass - try: - if ctx_tags: - sr_obj.tag = set(ctx_tags) # type: ignore[attr-defined] - except Exception: - pass - - downloaded_path = provider.download( - sr_obj, - final_output_dir, - progress_callback=progress_cb - ) # type: ignore[call-arg] - - if downloaded_path: - download_payload = { - "path": Path(downloaded_path), - "search_result": sr_obj, - } - - if download_payload is None and hasattr(provider, "download_url"): - download_payload = provider.download_url( # type: ignore[attr-defined] - url_str, - final_output_dir, - progress_cb, - ) - - if download_payload is None: - sr = None - if hasattr(provider, "search_result_from_url"): - sr = provider.search_result_from_url(url_str) # type: ignore[attr-defined] - if sr is None: - sr = SearchResult( - table="openlibrary", - title=title_hint, - path=url_str, - media_kind="book", - full_metadata={ - "openlibrary_id": edition_id, - }, - ) - - downloaded_path = provider.download( - sr, - final_output_dir, - progress_callback=progress_cb - ) # type: ignore[call-arg] - - if downloaded_path: - download_payload = { - "path": Path(downloaded_path), - "search_result": sr, - } - except Exception as exc: - raise DownloadError(str(exc)) - - # Clear long-running status line after the download attempt. - progress.clear_status() - - if download_payload and download_payload.get("path"): - downloaded_path = Path(download_payload["path"]) - sr_obj = download_payload.get("search_result") - - tags_hint: Optional[List[str]] = None - full_md: Optional[Dict[str, Any]] = None - resolved_title = title_hint - if sr_obj is not None: - try: - resolved_title = getattr(sr_obj, "title", None) or resolved_title - except Exception: - pass - try: - sr_tags = getattr(sr_obj, "tag", None) - if isinstance(sr_tags, set) and sr_tags: - tags_hint = sorted([str(t) for t in sr_tags if t]) - except Exception: - tags_hint = None - try: - full_md = getattr(sr_obj, "full_metadata", None) - except Exception: - full_md = None - - self._emit_local_file( - downloaded_path=downloaded_path, - source=str(url), - title_hint=resolved_title, - tags_hint=tags_hint, - media_kind_hint="book", - full_metadata=full_md, - provider_hint="openlibrary", - progress=progress, - config=config, - ) - downloaded_count += 1 - continue - - # If OpenLibrary can't provide it (not lendable, no creds, etc), auto-search LibGen. - try: - fallback_query = str(title_hint or "").strip() - if fallback_query: - log( - f"[download-file] Not available on OpenLibrary; searching LibGen for: {fallback_query}", - file=sys.stderr, - ) - from cmdlet.search_provider import CMDLET as _SEARCH_PROVIDER_CMDLET - - exec_fn = getattr(_SEARCH_PROVIDER_CMDLET, "exec", None) - if callable(exec_fn): - ret = exec_fn( - None, - [ - "-provider", - "libgen", - "-query", - fallback_query - ], - config, - ) - try: - table = pipeline_context.get_last_result_table() - items = pipeline_context.get_last_result_items() - if table is not None: - pipeline_context.set_last_result_table_overlay( - table, - items - ) - except Exception: - pass - - try: - return downloaded_count, int(ret) # type: ignore[arg-type] - except Exception: - return downloaded_count, 1 - except Exception: - pass - - log( - "[download-file] OpenLibrary URL could not be downloaded", - file=sys.stderr, - ) - continue - - # Generic provider URL handler (if a provider implements `download_url`). - provider = get_provider(provider_name, config) - if provider is not None and hasattr(provider, "download_url"): - try: - downloaded_path = provider.download_url( - str(url), - final_output_dir - ) # type: ignore[attr-defined] - except Exception as exc: - raise DownloadError(str(exc)) - - if downloaded_path: - self._emit_local_file( - downloaded_path=Path(downloaded_path), - source=str(url), - title_hint=Path(str(downloaded_path)).stem, - tags_hint=None, - media_kind_hint="file", - full_metadata=None, - provider_hint=str(provider_name), - progress=progress, - config=config, - ) - downloaded_count += 1 - continue - - # Otherwise, try provider.download(SearchResult) with the URL as the target. - if provider is not None: - sr_obj = None - try: - sr_obj = SearchResult( - table=str(provider_name), - title=str(url), - path=str(url), - full_metadata={}, - ) - downloaded_path = provider.download( - sr_obj, - final_output_dir - ) # type: ignore[call-arg] - except Exception: - downloaded_path = None - - # Refuse to fall back to direct-download for LibGen landing pages. - # This prevents saving HTML (e.g. edition.php) as a bogus file. - if (not downloaded_path - ) and str(provider_name).lower() == "libgen": - raise DownloadError( - "LibGen URL did not resolve to a downloadable file" - ) - - if downloaded_path: - emit_tags: Optional[List[str]] = None - full_md: Optional[Dict[str, Any]] = None - title_hint = Path(str(downloaded_path)).stem - media_kind_hint = "file" - - if str(provider_name - ).lower() == "libgen" and sr_obj is not None: - media_kind_hint = "book" - try: - sr_tags = getattr(sr_obj, "tag", None) - if isinstance(sr_tags, set) and sr_tags: - emit_tags = sorted( - [str(t) for t in sr_tags if t] - ) - except Exception: - emit_tags = None - - try: - sr_full_md = getattr(sr_obj, "full_metadata", None) - if isinstance(sr_full_md, dict): - full_md = sr_full_md - t = str(sr_full_md.get("title") or "").strip() - if t: - title_hint = t - except Exception: - full_md = None - - self._emit_local_file( - downloaded_path=Path(downloaded_path), - source=str(url), - title_hint=title_hint, - tags_hint=emit_tags, - media_kind_hint=media_kind_hint, - full_metadata=full_md, - provider_hint=str(provider_name), - progress=progress, - config=config, - ) - downloaded_count += 1 - continue - - if provider_name and get_provider is not None and SearchResult is not None: - provider = get_provider(provider_name, config) - - if provider is not None and hasattr(provider, "download_url"): - try: - downloaded_path = provider.download_url( - str(url), - final_output_dir - ) # type: ignore[attr-defined] - except Exception as exc: - raise DownloadError(str(exc)) - - if downloaded_path: - self._emit_local_file( - downloaded_path=Path(downloaded_path), - source=str(url), - title_hint=Path(str(downloaded_path)).stem, - tags_hint=None, - media_kind_hint="file", - full_metadata=None, - provider_hint=str(provider_name), - progress=progress, - config=config, - ) - downloaded_count += 1 - continue - - if provider is not None: - sr_obj = None - try: - sr_obj = SearchResult( - table=str(provider_name), - title=str(url), - path=str(url), - full_metadata={}, - ) - downloaded_path = provider.download( - sr_obj, - final_output_dir - ) # type: ignore[call-arg] - except Exception: - downloaded_path = None - - if (not downloaded_path - ) and str(provider_name).lower() == "libgen": - raise DownloadError( - "LibGen URL did not resolve to a downloadable file" - ) - - if downloaded_path: - emit_tags: Optional[List[str]] = None - full_md: Optional[Dict[str, Any]] = None - title_hint = Path(str(downloaded_path)).stem - media_kind_hint = "file" - - if str(provider_name - ).lower() == "libgen" and sr_obj is not None: - media_kind_hint = "book" - try: - sr_tags = getattr(sr_obj, "tag", None) - if isinstance(sr_tags, set) and sr_tags: - emit_tags = sorted( - [str(t) for t in sr_tags if t] - ) - except Exception: - emit_tags = None - - try: - sr_full_md = getattr(sr_obj, "full_metadata", None) - if isinstance(sr_full_md, dict): - full_md = sr_full_md - t = str(sr_full_md.get("title") or "").strip() - if t: - title_hint = t - except Exception: - full_md = None - - self._emit_local_file( - downloaded_path=Path(downloaded_path), - source=str(url), - title_hint=title_hint, - tags_hint=emit_tags, - media_kind_hint=media_kind_hint, - full_metadata=full_md, - provider_hint=str(provider_name), - progress=progress, - config=config, - ) - downloaded_count += 1 - continue - + # Direct Download Fallback result_obj = _download_direct_file( str(url), final_output_dir, @@ -824,40 +252,22 @@ class Download_File(Cmdlet): ) -> List[Any]: get_search_provider = registry.get("get_search_provider") expanded_items: List[Any] = [] + for item in piped_items: try: table = get_field(item, "table") - media_kind = get_field(item, "media_kind") - full_metadata = get_field(item, "full_metadata") - target = get_field(item, "path") or get_field(item, "url") + provider_key = str(table).split(".")[0] if table else None + provider = get_search_provider(provider_key, config) if provider_key and get_search_provider else None - if (str(table or "").lower() == "alldebrid" - and str(media_kind or "").lower() == "folder"): - magnet_id = None - if isinstance(full_metadata, dict): - magnet_id = full_metadata.get("magnet_id") - if (magnet_id is None and isinstance(target, - str) - and target.lower().startswith("alldebrid:magnet:")): - try: - magnet_id = int(target.split(":")[-1]) - except Exception: - magnet_id = None - - expanded, detail = ad_provider.expand_folder_item( - item, - get_search_provider, - config, - ) - if detail: - log( - f"[download-file] AllDebrid magnet {magnet_id or 'unknown'} not ready ({detail or 'unknown'})", - file=sys.stderr, - ) - continue - if expanded: - expanded_items.extend(expanded) - continue + # Generic hook: If provider has expand_item(item), use it. + if provider and hasattr(provider, "expand_item") and callable(provider.expand_item): + try: + sub_items = provider.expand_item(item) + if sub_items: + expanded_items.extend(sub_items) + continue + except Exception as e: + debug(f"Provider {provider_key} expand_item failed: {e}") expanded_items.append(item) except Exception: @@ -904,8 +314,8 @@ class Download_File(Cmdlet): media_kind = get_field(item, "media_kind") tags_val = get_field(item, "tag") tags_list: Optional[List[str]] - if isinstance(tags_val, list): - tags_list = [str(t) for t in tags_val if t] + if isinstance(tags_val, (list, set)): + tags_list = sorted([str(t) for t in tags_val if t]) else: tags_list = None @@ -953,6 +363,8 @@ class Download_File(Cmdlet): table=str(table), title=str(title or "Unknown"), path=str(target or ""), + tag=set(tags_list) if tags_list else set(), + media_kind=str(media_kind or "file"), full_metadata=full_metadata if isinstance(full_metadata, dict) else {}, @@ -963,179 +375,32 @@ class Download_File(Cmdlet): # Preserve provider structure when possible (AllDebrid folders -> subfolders). output_dir = final_output_dir - try: - if str(table).strip().lower() == "alldebrid": - output_dir = ad_provider.adjust_output_dir_for_alldebrid( - final_output_dir, - full_metadata if isinstance(full_metadata, dict) else None, - item, - ) - except Exception: - output_dir = final_output_dir - + # Generic: allow provider to strict output_dir? + # Using default output_dir for now. + downloaded_path = provider_obj.download(sr, output_dir) provider_sr = sr if downloaded_path is None: - download_items = getattr(provider_obj, "download_items", None) - if callable(download_items): - - def _on_emit(path: Path, file_url: str, relpath: str, metadata: Dict[str, Any]) -> None: - title_hint = metadata.get("name") or relpath or title - self._emit_local_file( - downloaded_path=path, - source=file_url or target, - title_hint=title_hint, - tags_hint=tags_list, - media_kind_hint="file", - full_metadata=metadata, - progress=progress, - config=config, - provider_hint=str(table) if table else None, - ) - - try: - downloaded_extra = download_items( - sr, - output_dir, - emit=_on_emit, - progress=progress, - quiet_mode=quiet_mode, - path_from_result=self._path_from_download_result, - config=config, - ) - except TypeError: - downloaded_extra = download_items( - sr, - output_dir, - emit=_on_emit, - progress=progress, - quiet_mode=quiet_mode, - path_from_result=self._path_from_download_result, - ) - except Exception: - downloaded_extra = 0 - - if downloaded_extra: - downloaded_count += int(downloaded_extra) - continue - - # OpenLibrary: if provider download failed, do NOT try to download the OpenLibrary page HTML. - if (downloaded_path is None and attempted_provider_download - and str(table or "").lower() == "openlibrary"): - availability = None - reason = None - if isinstance(full_metadata, dict): - availability = full_metadata.get("availability") - reason = full_metadata.get("availability_reason") - msg = "[download-file] OpenLibrary item not downloadable" - if availability or reason: - msg += f" (availability={availability or ''} reason={reason or ''})" - log(msg, file=sys.stderr) - - # Fallback: show a LibGen selectable ResultTable (no emits) so the user can pick @N. - # This intentionally mirrors `search-file -provider libgen` UX: results table + selection. - try: - title_text = str(title or "").strip() - if not title_text and isinstance(full_metadata, dict): - title_text = str(full_metadata.get("title") or "").strip() - if title_text and get_search_provider and SearchResult: - log( - f"[download-file] Not available on OpenLibrary; searching LibGen for: {title_text}", - file=sys.stderr, - ) - libgen_provider = get_search_provider("libgen", config) - if libgen_provider is None: - log( - "[download-file] LibGen provider unavailable; cannot run fallback search", - file=sys.stderr, - ) - continue - - try: - from SYS.result_table import ResultTable - except Exception: - ResultTable = None # type: ignore[assignment] - - if ResultTable is None: - log( - "[download-file] ResultTable unavailable; cannot render LibGen fallback search", - file=sys.stderr, - ) - continue - - fallback_query = title_text - # Keep parity with search-file provider default when user didn't specify a limit. - results = libgen_provider.search(fallback_query, limit=50) - if not results: - log( - f"[download-file] LibGen: no results found for: {fallback_query}", - file=sys.stderr, - ) - continue - - table_title = f"Libgen: {fallback_query}".strip().rstrip(":") - table_obj = ResultTable(table_title).set_preserve_order(False) - table_obj.set_table("libgen") - try: - table_obj.set_table_metadata({"provider": "libgen"}) - except Exception: - pass - - # Mark as produced by download-file so the pipeline runner pauses and stores tail stages. - table_obj.set_source_command("download-file", []) - - results_list: List[Dict[str, Any]] = [] - for search_result in results: - item_dict = ( - search_result.to_dict() - if hasattr(search_result, "to_dict") - else dict(search_result) - if isinstance(search_result, dict) - else {"title": str(search_result)} - ) - if "table" not in item_dict: - item_dict["table"] = "libgen" - table_obj.add_result(search_result) - results_list.append(item_dict) - - # Seed selection state for @N and pause the pipeline. - try: - pipeline_context.set_last_result_table(table_obj, results_list) - except Exception: - pass - try: - pipeline_context.set_current_stage_table(table_obj) - except Exception: - pass - - # Returning 0 with a selectable stage table and no emits causes the CLI to render - # the table and pause, preserving the downstream pipeline tail. - return 0 - except Exception: - pass - - continue + # Some providers might work via callback 'download_items', mostly legacy. + # If relevant, check for it. + download_items = getattr(provider_obj, "download_items", None) + if callable(download_items): + pass # We can implement generic callback support if needed, + # but pure download() is preferred. # Fallback: if we have a direct HTTP URL, download it directly if (downloaded_path is None and isinstance(target, str) and target.startswith("http")): - # Guard: provider landing pages (e.g. LibGen ads.php) are HTML, not files. - # Never download these as "files". - if str(table or "").lower() == "libgen": - low = target.lower() - if ("/ads.php" in low) or ("/file.php" in low) or ("/index.php" - in low): - log( - "[download-file] Refusing to download LibGen landing page (expected provider to resolve file link)", - file=sys.stderr, - ) - continue - + + # Generic guard for known "not-a-file" URLs could go here or in a helper, + # but for now we rely on user or provider. + debug( f"[download-file] Provider item looks like direct URL, downloading: {target}" ) + suggested_name = str(title).strip() if title is not None else None result_obj = _download_direct_file( target, @@ -1153,20 +418,12 @@ class Download_File(Cmdlet): ) continue - # Prefer provider-enriched metadata (providers may mutate sr.full_metadata). - if provider_sr is not None: - try: - sr_md = getattr(provider_sr, "full_metadata", None) - if isinstance(sr_md, dict) and sr_md: - full_metadata = sr_md - except Exception: - pass - # Allow providers to add/enrich tags and metadata during download. if provider_sr is not None: try: sr_md = getattr(provider_sr, "full_metadata", None) if isinstance(sr_md, dict) and sr_md: + debug(f"[download-file] Syncing full_metadata from provider_sr (keys={list(sr_md.keys())})") full_metadata = sr_md except Exception: pass @@ -1183,6 +440,7 @@ class Download_File(Cmdlet): try: sr_tags = getattr(provider_sr, "tag", None) if isinstance(sr_tags, (set, list)) and sr_tags: + debug(f"[download-file] Syncing tags_list from provider_sr (count={len(sr_tags)})") # Re-sync tags_list with the potentially enriched provider_sr.tag tags_list = sorted([str(t) for t in sr_tags if t]) except Exception: @@ -1276,7 +534,7 @@ class Download_File(Cmdlet): if provider_hint: payload["provider"] = str(provider_hint) if full_metadata: - payload["full_metadata"] = full_metadata + payload["metadata"] = full_metadata if notes: payload["notes"] = notes if source and str(source).startswith("http"): @@ -1658,541 +916,46 @@ class Download_File(Cmdlet): pass return str(requested_url) + def _preflight_url_duplicate( self, *, storage: Any, hydrus_available: bool, final_output_dir: Path, - candidate_url: str, - extra_urls: Optional[Sequence[str]] = None, + candidate_url: Optional[str] = None, + extra_urls: Optional[List[str]] = None, + **kwargs: Any, ) -> bool: - if storage is None: - debug("Preflight URL check skipped: storage unavailable") - return True + to_check = [] + if candidate_url: + to_check.append(candidate_url) + if extra_urls: + to_check.extend(extra_urls) - debug(f"Preflight URL check: candidate={candidate_url}") - - try: - from SYS.metadata import normalize_urls - except Exception: - normalize_urls = None # type: ignore[assignment] - - needles: List[str] = [] - if normalize_urls is not None: - for raw in [candidate_url, *(list(extra_urls) if extra_urls else [])]: - try: - needles.extend(normalize_urls(raw)) - except Exception: - continue - if not needles: - needles = [str(candidate_url)] - - seen_needles: List[str] = [] - for needle in needles: - if needle and needle not in seen_needles: - seen_needles.append(needle) - needles = seen_needles - - try: - debug(f"Preflight URL needles: {needles}") - except Exception: - pass - - url_matches: List[Dict[str, Any]] = [] - try: - from Store.HydrusNetwork import HydrusNetwork - - backend_names_all = storage.list_searchable_backends() - backend_names: List[str] = [] - skipped: List[str] = [] - for backend_name in backend_names_all: - try: - backend = storage[backend_name] - except Exception: - continue - - try: - if str(backend_name).strip().lower() == "temp": - skipped.append(backend_name) - continue - except Exception: - pass - - try: - backend_location = getattr(backend, "_location", None) - if backend_location and final_output_dir: - backend_path = Path(str(backend_location)).expanduser().resolve() - temp_path = Path(str(final_output_dir)).expanduser().resolve() - if backend_path == temp_path: - skipped.append(backend_name) - continue - except Exception: - pass - - backend_names.append(backend_name) - - try: - if skipped: - debug(f"Preflight backends: {backend_names} (skipped temp: {skipped})") - else: - debug(f"Preflight backends: {backend_names}") - except Exception: - pass - - for backend_name in backend_names: - backend = storage[backend_name] - if isinstance(backend, HydrusNetwork) and not hydrus_available: - continue - - backend_hits: List[Dict[str, Any]] = [] - for needle in needles: - try: - backend_hits = backend.search(f"url:{needle}", limit=25) or [] - if backend_hits: - break - except Exception: - continue - if backend_hits: - url_matches.extend( - [ - dict(x) if isinstance(x, dict) else {"title": str(x)} - for x in backend_hits - ] - ) - - if len(url_matches) >= 25: - url_matches = url_matches[:25] - break - except Exception: - url_matches = [] - - if not url_matches: - debug("Preflight URL check: no matches") - return True - - try: - current_cmd_text = pipeline_context.get_current_command_text("") - except Exception: - current_cmd_text = "" - - try: - stage_ctx = pipeline_context.get_stage_context() - except Exception: - stage_ctx = None - - in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or ""))) - if in_pipeline: - try: - cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="") - cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None) - except Exception: - cached_cmd = "" - cached_decision = None - - if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""): - if bool(cached_decision): - return True - try: - pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) - except Exception: - pass - return False - - table = ResultTable(f"URL already exists ({len(url_matches)} match(es))") - results_list: List[Dict[str, Any]] = [] - for item in url_matches: - if "title" not in item: - item["title"] = item.get("name") or item.get("target") or item.get("path") or "Result" - - try: - from SYS.result_table import build_display_row - except Exception: - build_display_row = None # type: ignore - - if callable(build_display_row): - display_row = build_display_row(item, keys=["title", "store", "hash", "ext", "size"]) - else: - display_row = { - "title": item.get("title"), - "store": item.get("store"), - "hash": item.get("hash") or item.get("file_hash") or item.get("sha256"), - "ext": str(item.get("ext") or ""), - "size": item.get("size") or item.get("size_bytes"), - } - table.add_result(display_row) - results_list.append(item) - - pipeline_context.set_current_stage_table(table) - pipeline_context.set_last_result_table(table, results_list) - - suspend = getattr(pipeline_context, "suspend_live_progress", None) - used_suspend = False - - cm: AbstractContextManager[Any] = nullcontext() - if callable(suspend): - try: - maybe_cm = suspend() - if maybe_cm is not None: - cm = maybe_cm # type: ignore[assignment] - used_suspend = True - except Exception: - cm = nullcontext() - used_suspend = False - - with cm: - get_stderr_console().print(table) - setattr(table, "_rendered_by_cmdlet", True) - answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console())) - - if in_pipeline: - try: - existing = pipeline_context.load_value("preflight", default=None) - except Exception: - existing = None - preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {} - url_dup_cache = preflight_cache.get("url_duplicates") - if not isinstance(url_dup_cache, dict): - url_dup_cache = {} - url_dup_cache["command"] = str(current_cmd_text or "") - url_dup_cache["continue"] = bool(answered_yes) - preflight_cache["url_duplicates"] = url_dup_cache - try: - pipeline_context.store_value("preflight", preflight_cache) - except Exception: - pass - - if not answered_yes: - if in_pipeline and used_suspend: - try: - pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) - except Exception: - pass - return False - return True + return sh.check_url_exists_in_storage( + urls=to_check, + storage=storage, + hydrus_available=hydrus_available, + final_output_dir=final_output_dir + ) def _preflight_url_duplicates_bulk( self, *, + urls: List[str], storage: Any, hydrus_available: bool, final_output_dir: Path, - urls: Sequence[str], + **kwargs: Any, ) -> bool: - if storage is None: - debug("Bulk URL preflight skipped: storage unavailable") - return True + return sh.check_url_exists_in_storage( + urls=urls, + storage=storage, + hydrus_available=hydrus_available, + final_output_dir=final_output_dir + ) - try: - current_cmd_text = pipeline_context.get_current_command_text("") - except Exception: - current_cmd_text = "" - - try: - stage_ctx = pipeline_context.get_stage_context() - except Exception: - stage_ctx = None - - in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or ""))) - if in_pipeline: - try: - cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="") - cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None) - except Exception: - cached_cmd = "" - cached_decision = None - - if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""): - if bool(cached_decision): - return True - try: - pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) - except Exception: - pass - return False - - unique_urls: List[str] = [] - for u in urls or []: - s = str(u or "").strip() - if s and s not in unique_urls: - unique_urls.append(s) - if len(unique_urls) <= 1: - return True - - try: - from SYS.metadata import normalize_urls - except Exception: - normalize_urls = None # type: ignore[assignment] - - def _httpish(value: str) -> bool: - try: - return bool(value) and (value.startswith("http://") or value.startswith("https://")) - except Exception: - return False - - url_needles: Dict[str, List[str]] = {} - for u in unique_urls: - needles: List[str] = [] - if normalize_urls is not None: - try: - needles.extend([n for n in (normalize_urls(u) or []) if isinstance(n, str)]) - except Exception: - needles = [] - if not needles: - needles = [u] - filtered: List[str] = [] - for n in needles: - n2 = str(n or "").strip() - if not n2: - continue - if not _httpish(n2): - continue - if n2 not in filtered: - filtered.append(n2) - url_needles[u] = filtered if filtered else [u] - - backend_names: List[str] = [] - try: - backend_names_all = storage.list_searchable_backends() - except Exception: - backend_names_all = [] - - for backend_name in backend_names_all: - try: - backend = storage[backend_name] - except Exception: - continue - - try: - if str(backend_name).strip().lower() == "temp": - continue - except Exception: - pass - - try: - backend_location = getattr(backend, "_location", None) - if backend_location and final_output_dir: - backend_path = Path(str(backend_location)).expanduser().resolve() - temp_path = Path(str(final_output_dir)).expanduser().resolve() - if backend_path == temp_path: - continue - except Exception: - pass - - backend_names.append(backend_name) - - if not backend_names: - debug("Bulk URL preflight skipped: no searchable backends") - return True - - seen_pairs: set[tuple[str, str]] = set() - matched_urls: set[str] = set() - match_rows: List[Dict[str, Any]] = [] - max_rows = 200 - - try: - from Store.HydrusNetwork import HydrusNetwork - except Exception: - HydrusNetwork = None # type: ignore - - for backend_name in backend_names: - if len(match_rows) >= max_rows: - break - try: - backend = storage[backend_name] - except Exception: - continue - - if HydrusNetwork is not None and isinstance(backend, HydrusNetwork): - if not hydrus_available: - continue - - client = getattr(backend, "_client", None) - if client is None: - continue - - for original_url, needles in url_needles.items(): - if len(match_rows) >= max_rows: - break - if (original_url, str(backend_name)) in seen_pairs: - continue - - found_hash: Optional[str] = None - found = False - for needle in (needles or [])[:3]: - if not _httpish(needle): - continue - try: - from API.HydrusNetwork import HydrusRequestSpec - - spec = HydrusRequestSpec( - method="GET", - endpoint="/add_urls/get_url_files", - query={"url": needle}, - ) - response = client._perform_request(spec) # type: ignore[attr-defined] - raw_hashes = None - if isinstance(response, dict): - raw_hashes = response.get("hashes") or response.get("file_hashes") - raw_ids = response.get("file_ids") - has_ids = isinstance(raw_ids, list) and len(raw_ids) > 0 - has_hashes = isinstance(raw_hashes, list) and len(raw_hashes) > 0 - if has_hashes: - try: - found_hash = str(raw_hashes[0]).strip() # type: ignore[index] - except Exception: - found_hash = None - if has_ids or has_hashes: - found = True - break - except Exception: - continue - - if not found: - continue - - seen_pairs.add((original_url, str(backend_name))) - matched_urls.add(original_url) - display_row = { - "title": "(exists)", - "store": str(backend_name), - "hash": found_hash or "", - "url": original_url, - "columns": [ - ("Title", "(exists)"), - ("Store", str(backend_name)), - ("Hash", found_hash or ""), - ("URL", original_url), - ], - } - match_rows.append(display_row) - continue - - for original_url, needles in url_needles.items(): - if len(match_rows) >= max_rows: - break - if (original_url, str(backend_name)) in seen_pairs: - continue - - backend_hits: List[Dict[str, Any]] = [] - for needle in (needles or [])[:3]: - try: - backend_hits = backend.search(f"url:{needle}", limit=1) or [] - if backend_hits: - break - except Exception: - continue - - if not backend_hits: - continue - - seen_pairs.add((original_url, str(backend_name))) - matched_urls.add(original_url) - hit = backend_hits[0] - title = hit.get("title") or hit.get("name") or hit.get("target") or hit.get("path") or "(exists)" - file_hash = hit.get("hash") or hit.get("file_hash") or hit.get("sha256") or "" - - try: - from SYS.result_table import build_display_row - except Exception: - build_display_row = None # type: ignore - - extracted = { - "title": str(title), - "store": str(hit.get("store") or backend_name), - "hash": str(file_hash or ""), - "ext": "", - "size": None, - } - if callable(build_display_row): - try: - extracted = build_display_row(hit, keys=["title", "store", "hash", "ext", "size"]) - except Exception: - pass - extracted["title"] = str(title) - extracted["store"] = str(hit.get("store") or backend_name) - extracted["hash"] = str(file_hash or "") - - ext = extracted.get("ext") - size_val = extracted.get("size") - - display_row = { - "title": str(title), - "store": str(hit.get("store") or backend_name), - "hash": str(file_hash or ""), - "ext": str(ext or ""), - "size": size_val, - "url": original_url, - "columns": [ - ("Title", str(title)), - ("Store", str(hit.get("store") or backend_name)), - ("Hash", str(file_hash or "")), - ("Ext", str(ext or "")), - ("Size", size_val), - ("URL", original_url), - ], - } - match_rows.append(display_row) - - if not match_rows: - debug("Bulk URL preflight: no matches") - return True - - table = ResultTable(f"URL already exists ({len(matched_urls)} url(s))", max_columns=10) - table.set_no_choice(True) - try: - table.set_preserve_order(True) - except Exception: - pass - - for row in match_rows: - table.add_result(row) - - try: - pipeline_context.set_last_result_table_overlay(table, match_rows) - except Exception: - pass - - suspend = getattr(pipeline_context, "suspend_live_progress", None) - cm: AbstractContextManager[Any] = nullcontext() - if callable(suspend): - try: - maybe_cm = suspend() - if maybe_cm is not None: - cm = maybe_cm # type: ignore[assignment] - except Exception: - cm = nullcontext() - - with cm: - get_stderr_console().print(table) - setattr(table, "_rendered_by_cmdlet", True) - answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console())) - - if in_pipeline: - try: - existing = pipeline_context.load_value("preflight", default=None) - except Exception: - existing = None - preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {} - url_dup_cache = preflight_cache.get("url_duplicates") - if not isinstance(url_dup_cache, dict): - url_dup_cache = {} - url_dup_cache["command"] = str(current_cmd_text or "") - url_dup_cache["continue"] = bool(answered_yes) - preflight_cache["url_duplicates"] = url_dup_cache - try: - pipeline_context.store_value("preflight", preflight_cache) - except Exception: - pass - - if not answered_yes: - if in_pipeline: - try: - pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) - except Exception: - pass - return False - return True def _maybe_show_playlist_table(self, *, url: str, ytdlp_tool: YtDlpTool) -> bool: try: @@ -3675,12 +2438,8 @@ class Download_File(Cmdlet): candidate = str(raw_url[0] or "").strip() low = candidate.lower() looks_like_url = low.startswith(("http://", "https://", "ftp://")) - looks_like_provider = low.startswith( - ("magnet:", - "alldebrid:", - "hydrus:", - "ia:", - "internetarchive:") + looks_like_provider = ( + ":" in candidate and not candidate.startswith(("http:", "https:", "ftp:", "ftps:", "file:")) ) looks_like_windows_path = ( (len(candidate) >= 2 and candidate[1] == ":") @@ -3698,25 +2457,18 @@ class Download_File(Cmdlet): log("No url or piped items to download", file=sys.stderr) return 1 - # Internet Archive details URLs should present a downloadable file picker - # before we try any streaming/ytdlp probing. - try: - quiet_mode = ( - bool(config.get("_quiet_background_output")) - if isinstance(config, dict) else False - ) - except Exception: - quiet_mode = False - ia_picker_exit = ia_provider.maybe_show_formats_table( + registry = self._load_provider_registry() + + # Provider-pre-check (e.g. Internet Archive format picker) + picker_result = self._maybe_show_provider_picker( raw_urls=raw_url, piped_items=piped_items, parsed=parsed, config=config, - quiet_mode=quiet_mode, - get_field=get_field, + registry=registry, ) - if ia_picker_exit is not None: - return int(ia_picker_exit) + if picker_result is not None: + return int(picker_result) streaming_candidates = self._append_urls_from_piped_result(list(raw_url), result) supported_streaming, unsupported_streaming = self._filter_supported_urls(streaming_candidates) @@ -3740,21 +2492,16 @@ class Download_File(Cmdlet): if not raw_url and not piped_items: return int(streaming_exit_code or 0) - quiet_mode = ( - bool(config.get("_quiet_background_output")) - if isinstance(config, - dict) else False - ) - ia_picker_exit = ia_provider.maybe_show_formats_table( + # Re-check picker if partial processing occurred + picker_result = self._maybe_show_provider_picker( raw_urls=raw_url, piped_items=piped_items, parsed=parsed, config=config, - quiet_mode=quiet_mode, - get_field=get_field, + registry=registry, ) - if ia_picker_exit is not None: - return int(ia_picker_exit) + if picker_result is not None: + return int(picker_result) # Get output directory final_output_dir = self._resolve_output_dir(parsed, config) @@ -3775,8 +2522,6 @@ class Download_File(Cmdlet): items_preview=preview ) - registry = self._load_provider_registry() - downloaded_count = 0 # Special-case: support selection-inserted magnet-id arg to drive provider downloads @@ -3917,6 +2662,58 @@ class Download_File(Cmdlet): pass progress.close_local_ui(force_complete=True) + def _maybe_show_provider_picker( + self, + *, + raw_urls: Sequence[str], + piped_items: Sequence[Any], + parsed: Dict[str, Any], + config: Dict[str, Any], + registry: Dict[str, Any], + ) -> Optional[int]: + """Generic hook for providers to show a selection table (e.g. Internet Archive format picker).""" + total_inputs = len(raw_urls or []) + len(piped_items or []) + if total_inputs != 1: + return None + + target_url = None + if raw_urls: + target_url = str(raw_urls[0]) + elif piped_items: + target_url = str(get_field(piped_items[0], "path") or get_field(piped_items[0], "url") or "") + + if not target_url: + return None + + match_provider_name_for_url = registry.get("match_provider_name_for_url") + get_provider = registry.get("get_provider") + + provider_name = None + if match_provider_name_for_url: + try: + provider_name = match_provider_name_for_url(target_url) + except Exception: + pass + + if provider_name and get_provider: + provider = get_provider(provider_name, config) + if provider and hasattr(provider, "maybe_show_picker"): + try: + quiet_mode = bool(config.get("_quiet_background_output")) + res = provider.maybe_show_picker( + url=target_url, + item=piped_items[0] if piped_items else None, + parsed=parsed, + config=config, + quiet_mode=quiet_mode, + ) + if res is not None: + return int(res) + except Exception as e: + debug(f"Provider {provider_name} picker error: {e}") + + return None + def _resolve_output_dir(self, parsed: Dict[str, Any],