diff --git a/API/HydrusNetwork.py b/API/HydrusNetwork.py index 5fe3321..f8d0a44 100644 --- a/API/HydrusNetwork.py +++ b/API/HydrusNetwork.py @@ -542,6 +542,53 @@ class HydrusNetwork: } return self._post("/add_tags/add_tags", data=body) + def mutate_tags_by_key( + self, + hash: Union[str, + Iterable[str]], + service_key: str, + *, + add_tags: Optional[Iterable[str]] = None, + remove_tags: Optional[Iterable[str]] = None, + ) -> dict[str, + Any]: + """Add or remove tags with a single /add_tags/add_tags call. + + Hydrus Client API: POST /add_tags/add_tags + Use `service_keys_to_actions_to_tags` so the client can apply additions + and removals in a single request (action '0' = add, '1' = remove). + """ + hash_list = self._ensure_hashes(hash) + def _clean(tags: Optional[Iterable[str]]) -> list[str]: + if not tags: + return [] + clean_list: list[str] = [] + for tag in tags: + if not isinstance(tag, str): + continue + text = tag.strip() + if not text: + continue + clean_list.append(text) + return clean_list + + actions: dict[str, list[str]] = {} + adds = _clean(add_tags) + removes = _clean(remove_tags) + if adds: + actions["0"] = adds + if removes: + actions["1"] = removes + if not actions: + return {} + body = { + "hashes": hash_list, + "service_keys_to_actions_to_tags": { + str(service_key): actions + }, + } + return self._post("/add_tags/add_tags", data=body) + def associate_url(self, file_hashes: Union[str, Iterable[str]], diff --git a/API/alldebrid.py b/API/alldebrid.py index 05d3e73..deaaf0a 100644 --- a/API/alldebrid.py +++ b/API/alldebrid.py @@ -7,13 +7,15 @@ API docs: https://docs.alldebrid.com/#general-informations from __future__ import annotations import json -import sys - -from SYS.logger import log, debug -import time import logging +import sys +import time + from typing import Any, Dict, Optional, Set, List, Sequence, Tuple from urllib.parse import urlparse + +from SYS.logger import log, debug +from SYS.rich_display import show_provider_config_panel from .HTTP import HTTPClient logger = logging.getLogger(__name__) @@ -1035,6 +1037,7 @@ def unlock_link_cmdlet(result: Any, args: Sequence[str], config: Dict[str, Any]) api_key = _get_alldebrid_api_key_from_config(config) if not api_key: + show_provider_config_panel("alldebrid", ["api_key"]) log( "AllDebrid API key not configured (provider.alldebrid.api_key)", file=sys.stderr diff --git a/CLI.py b/CLI.py index 6a46cf9..5d45ed1 100644 --- a/CLI.py +++ b/CLI.py @@ -17,6 +17,7 @@ import threading import time import uuid from copy import deepcopy +from datetime import datetime from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Sequence, Set, TextIO, Tuple, cast @@ -57,7 +58,6 @@ from SYS.logger import debug, set_debug from SYS.worker_manager import WorkerManager from SYS.cmdlet_catalog import ( - ensure_registry_loaded, get_cmdlet_arg_choices, get_cmdlet_arg_flags, get_cmdlet_metadata, @@ -871,7 +871,7 @@ class CmdletCompleter(Completer): ) -> Set[str]: """Return logical argument names already used in this cmdlet stage. - Example: if the user has typed `download-media -url ...`, then `url` + Example: if the user has typed `download-file -url ...`, then `url` is considered used and should not be suggested again (even as `--url`). """ arg_flags = CmdletIntrospection.cmdlet_args(cmd_name, config) @@ -970,8 +970,9 @@ class CmdletCompleter(Completer): ) if choices: for choice in choices: - if choice.lower().startswith(current_token): - yield Completion(choice, start_position=-len(current_token)) + yield Completion(choice, start_position=-len(current_token)) + # Example: if the user has typed `download-file -url ...`, then `url` + # is considered used and should not be suggested again (even as `--url`). return arg_names = CmdletIntrospection.cmdlet_args(cmd_name, config) @@ -1347,8 +1348,6 @@ class CmdletExecutor: from SYS import pipeline as ctx from cmdlet import REGISTRY - ensure_registry_loaded() - # REPL guard: stage-local selection tables should not leak across independent # commands. @ selection can always re-seed from the last result table. try: @@ -1732,11 +1731,9 @@ class CmdletExecutor: selectable_commands = { "search-file", "download-data", - "download-media", "download-file", "search_file", "download_data", - "download_media", "download_file", ".config", ".worker", @@ -1924,14 +1921,14 @@ class PipelineExecutor: return stages @staticmethod - def _validate_download_media_relationship_order(stages: List[List[str]]) -> bool: - """Guard against running add-relationship on unstored download-media results. + def _validate_download_file_relationship_order(stages: List[List[str]]) -> bool: + """Guard against running add-relationship on unstored download-file results. Intended UX: - download-media ... | add-file -store | add-relationship + download-file ... | add-file -store | add-relationship Rationale: - download-media outputs items that may not yet have a stable store+hash. + download-file outputs items that may not yet have a stable store+hash. add-relationship is designed to operate in store/hash mode. """ @@ -1944,14 +1941,14 @@ class PipelineExecutor: continue names.append(_norm(stage[0])) - dl_idxs = [i for i, n in enumerate(names) if n == "download-media"] + dl_idxs = [i for i, n in enumerate(names) if n == "download-file"] rel_idxs = [i for i, n in enumerate(names) if n == "add-relationship"] add_file_idxs = [i for i, n in enumerate(names) if n == "add-file"] if not dl_idxs or not rel_idxs: return True - # If download-media is upstream of add-relationship, require an add-file in between. + # If download-file is upstream of add-relationship, require an add-file in between. for rel_i in rel_idxs: dl_before = [d for d in dl_idxs if d < rel_i] if not dl_before: @@ -1959,9 +1956,9 @@ class PipelineExecutor: dl_i = max(dl_before) if not any(dl_i < a < rel_i for a in add_file_idxs): print( - "Pipeline order error: when using download-media with add-relationship, " + "Pipeline order error: when using download-file with add-relationship, " "add-relationship must come after add-file (so items are stored and have store+hash).\n" - "Example: download-media <...> | add-file -store | add-relationship\n" + "Example: download-file <...> | add-file -store | add-relationship\n" ) return False @@ -2238,6 +2235,37 @@ class PipelineExecutor: return False + @staticmethod + def _summarize_stage_text(stage_tokens: Sequence[str], limit: int = 140) -> str: + combined = " ".join(str(tok) for tok in stage_tokens if tok is not None).strip() + if not combined: + return "" + normalized = re.sub(r"\s+", " ", combined) + if len(normalized) <= limit: + return normalized + return normalized[:limit - 3].rstrip() + "..." + + @staticmethod + def _log_pipeline_event( + worker_manager: Any, + worker_id: Optional[str], + message: str, + ) -> None: + if not worker_manager or not worker_id or not message: + return + try: + timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + except Exception: + timestamp = "" + if timestamp: + text = f"{timestamp} - PIPELINE - {message}" + else: + text = f"PIPELINE - {message}" + try: + worker_manager.append_stdout(worker_id, text + "\n", channel="log") + except Exception: + pass + @staticmethod def _maybe_open_url_selection( current_table: Any, @@ -2571,11 +2599,11 @@ class PipelineExecutor: if not stages: if table_type == "youtube": - print("Auto-running YouTube selection via download-media") - stages.append(["download-media"]) + print("Auto-running YouTube selection via download-file") + stages.append(["download-file"]) elif table_type == "bandcamp": - print("Auto-running Bandcamp selection via download-media") - stages.append(["download-media"]) + print("Auto-running Bandcamp selection via download-file") + stages.append(["download-file"]) elif table_type == "internetarchive": print("Auto-loading Internet Archive item via download-file") stages.append(["download-file"]) @@ -2594,32 +2622,24 @@ class PipelineExecutor: first_cmd = stages[0][0] if stages and stages[0] else None if table_type == "soulseek" and first_cmd not in ( "download-file", - "download-media", - "download_media", ".pipe", ): debug("Auto-inserting download-file after Soulseek selection") stages.insert(0, ["download-file"]) if table_type == "youtube" and first_cmd not in ( - "download-media", - "download_media", "download-file", ".pipe", ): - debug("Auto-inserting download-media after YouTube selection") - stages.insert(0, ["download-media"]) + debug("Auto-inserting download-file after YouTube selection") + stages.insert(0, ["download-file"]) if table_type == "bandcamp" and first_cmd not in ( - "download-media", - "download_media", "download-file", ".pipe", ): - print("Auto-inserting download-media after Bandcamp selection") - stages.insert(0, ["download-media"]) + print("Auto-inserting download-file after Bandcamp selection") + stages.insert(0, ["download-file"]) if table_type == "internetarchive" and first_cmd not in ( "download-file", - "download-media", - "download_media", ".pipe", ): debug( @@ -2628,16 +2648,12 @@ class PipelineExecutor: stages.insert(0, ["download-file"]) if table_type == "podcastindex.episodes" and first_cmd not in ( "download-file", - "download-media", - "download_media", ".pipe", ): print("Auto-inserting download-file after PodcastIndex episode selection") stages.insert(0, ["download-file"]) if table_type == "libgen" and first_cmd not in ( "download-file", - "download-media", - "download_media", ".pipe", ): print("Auto-inserting download-file after Libgen selection") @@ -2814,6 +2830,12 @@ class PipelineExecutor: pipeline_text=pipeline_text, config=config ) + if pipeline_session and worker_manager: + self._log_pipeline_event( + worker_manager, + pipeline_session.worker_id, + f"Pipeline start: {pipeline_text or '(empty pipeline)'}", + ) raw_stage_texts = self._get_raw_stage_texts(ctx) self._maybe_enable_background_notifier( worker_manager, @@ -2843,8 +2865,8 @@ class PipelineExecutor: if initial_piped is not None: piped_result = initial_piped - # REPL guard: prevent add-relationship before add-file for download-media pipelines. - if not self._validate_download_media_relationship_order(stages): + # REPL guard: prevent add-relationship before add-file for download-file pipelines. + if not self._validate_download_file_relationship_order(stages): pipeline_status = "failed" pipeline_error = "Invalid pipeline order" return @@ -3144,11 +3166,11 @@ class PipelineExecutor: if filter_spec is None: if stage_index + 1 >= len(stages): if table_type == "youtube": - print("Auto-running YouTube selection via download-media") - stages.append(["download-media", *stage_args]) + print("Auto-running YouTube selection via download-file") + stages.append(["download-file", *stage_args]) elif table_type == "bandcamp": - print("Auto-running Bandcamp selection via download-media") - stages.append(["download-media"]) + print("Auto-running Bandcamp selection via download-file") + stages.append(["download-file"]) elif table_type == "internetarchive": print("Auto-loading Internet Archive item via download-file") stages.append(["download-file"]) @@ -3161,56 +3183,53 @@ class PipelineExecutor: else: if table_type == "soulseek" and next_cmd not in ( "download-file", - "download-media", - "download_media", ".pipe", ): debug("Auto-inserting download-file after Soulseek selection") stages.insert(stage_index + 1, ["download-file"]) if table_type == "youtube" and next_cmd not in ( - "download-media", - "download_media", "download-file", ".pipe", ): - debug("Auto-inserting download-media after YouTube selection") - stages.insert(stage_index + 1, ["download-media"]) + debug("Auto-inserting download-file after YouTube selection") + stages.insert(stage_index + 1, ["download-file"]) if table_type == "bandcamp" and next_cmd not in ( - "download-media", - "download_media", "download-file", ".pipe", ): - print("Auto-inserting download-media after Bandcamp selection") - stages.insert(stage_index + 1, ["download-media"]) + print("Auto-inserting download-file after Bandcamp selection") + stages.insert(stage_index + 1, ["download-file"]) if table_type == "internetarchive" and next_cmd not in ( "download-file", - "download-media", - "download_media", ".pipe", ): debug("Auto-inserting download-file after Internet Archive selection") stages.insert(stage_index + 1, ["download-file"]) if table_type == "podcastindex.episodes" and next_cmd not in ( "download-file", - "download-media", - "download_media", ".pipe", ): print("Auto-inserting download-file after PodcastIndex episode selection") stages.insert(stage_index + 1, ["download-file"]) if table_type == "libgen" and next_cmd not in ( "download-file", - "download-media", - "download_media", ".pipe", ): print("Auto-inserting download-file after Libgen selection") stages.insert(stage_index + 1, ["download-file"]) continue - ensure_registry_loaded() cmd_fn = REGISTRY.get(cmd_name) + if not cmd_fn: + try: + mod = import_cmd_module(cmd_name) + data = getattr(mod, "CMDLET", None) if mod else None + if data and hasattr(data, "exec") and callable(getattr(data, "exec")): + run_fn = getattr(data, "exec") + REGISTRY[cmd_name] = run_fn + cmd_fn = run_fn + except Exception: + cmd_fn = None if not cmd_fn: print(f"Unknown command: {cmd_name}\n") pipeline_status = "failed" @@ -3226,6 +3245,14 @@ class PipelineExecutor: ) stage_worker_id = stage_session.worker_id if stage_session else None + stage_summary = self._summarize_stage_text(stage_tokens) + if pipeline_session and worker_manager: + summary_text = stage_summary or cmd_name + self._log_pipeline_event( + worker_manager, + pipeline_session.worker_id, + f"Stage {stage_index + 1}/{len(stages)} start: {summary_text}", + ) # Estimate how many per-item tasks this pipe will run. pipe_idx = pipe_index_by_stage.get(stage_index) @@ -3433,7 +3460,7 @@ class PipelineExecutor: else: piped_result = None - # Some cmdlets (notably download-media format selection) populate a selectable + # Some cmdlets (notably download-file format selection) populate a selectable # current-stage table without emitting pipeline items. In these cases, render # the table and pause the pipeline so the user can pick @N. stage_table = ( @@ -3458,21 +3485,18 @@ class PipelineExecutor: except Exception: stage_table_source = "" if ((not stage_is_last) and (not emits) and cmd_name in { - "download-media", - "download_media", - "download-data", - "download_data", + "download-file", + "download-data", + "download_data", } and stage_table is not None - and (stage_table_type in { - "ytdlp.formatlist", - "download-media", - "download_media", - "bandcamp", - "youtube", - } or stage_table_source in {"download-media", - "download_media"} - or stage_table_type in {"internetarchive.formats"} - or stage_table_source in {"download-file"})): + and (stage_table_type in { + "ytdlp.formatlist", + "download-file", + "bandcamp", + "youtube", + } or stage_table_source in {"download-file"} + or stage_table_type in {"internetarchive.formats"} + or stage_table_source in {"download-file"})): try: is_selectable = not bool( getattr(stage_table, @@ -3671,6 +3695,18 @@ class PipelineExecutor: pipeline_error = f"{stage_label} error: {exc}" return finally: + if pipeline_session and worker_manager: + status_label = ( + "completed" if stage_status == "completed" else "failed" + ) + msg = f"{stage_label} {status_label}" + if stage_error and stage_status != "completed": + msg += f": {stage_error}" + self._log_pipeline_event( + worker_manager, + pipeline_session.worker_id, + msg, + ) if progress_ui is not None and pipe_idx is not None: try: progress_ui.finish_pipe( @@ -3820,6 +3856,17 @@ class PipelineExecutor: ctx.set_current_stage_table(None) except Exception: pass + if pipeline_session and worker_manager: + final_msg = f"Pipeline {pipeline_status}" + if pipeline_error: + final_msg += f": {pipeline_error}" + else: + final_msg += " (ok)" + self._log_pipeline_event( + worker_manager, + pipeline_session.worker_id, + final_msg, + ) if pipeline_session: pipeline_session.close( status=pipeline_status, diff --git a/MPV/LUA/main.lua b/MPV/LUA/main.lua index 39c862e..86b26da 100644 --- a/MPV/LUA/main.lua +++ b/MPV/LUA/main.lua @@ -1987,7 +1987,7 @@ mp.register_script_message('medios-download-pick-store', function(json) local url = tostring(_pending_download.url) local fmt = tostring(_pending_download.format) - local pipeline_cmd = 'download-media -url ' .. quote_pipeline_arg(url) .. ' -format ' .. quote_pipeline_arg(fmt) + local pipeline_cmd = 'download-file -url ' .. quote_pipeline_arg(url) .. ' -format ' .. quote_pipeline_arg(fmt) .. ' | add-file -store ' .. quote_pipeline_arg(store) if not _run_pipeline_detached(pipeline_cmd) then @@ -2015,7 +2015,7 @@ mp.register_script_message('medios-download-pick-path', function() local url = tostring(_pending_download.url) local fmt = tostring(_pending_download.format) - local pipeline_cmd = 'download-media -url ' .. quote_pipeline_arg(url) .. ' -format ' .. quote_pipeline_arg(fmt) + local pipeline_cmd = 'download-file -url ' .. quote_pipeline_arg(url) .. ' -format ' .. quote_pipeline_arg(fmt) .. ' | add-file -path ' .. quote_pipeline_arg(folder) if not _run_pipeline_detached(pipeline_cmd) then diff --git a/MPV/mpv_ipc.py b/MPV/mpv_ipc.py index c4efc7f..7d47f96 100644 --- a/MPV/mpv_ipc.py +++ b/MPV/mpv_ipc.py @@ -343,7 +343,7 @@ class MPV: def _q(s: str) -> str: return '"' + s.replace("\\", "\\\\").replace('"', '\\"') + '"' - pipeline = f"download-media -url {_q(url)} -format {_q(fmt)}" + pipeline = f"download-file -url {_q(url)} -format {_q(fmt)}" if store: pipeline += f" | add-file -store {_q(store)}" else: diff --git a/Provider/alldebrid.py b/Provider/alldebrid.py index e55fac1..773fd56 100644 --- a/Provider/alldebrid.py +++ b/Provider/alldebrid.py @@ -1,11 +1,17 @@ from __future__ import annotations -from pathlib import Path +import hashlib import sys -from typing import Any, Dict, Iterable, List, Optional +import time +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional, Callable, Tuple +from urllib.parse import urlparse +from API.HTTP import HTTPClient +from API.alldebrid import AllDebridClient, parse_magnet_or_hash, is_magnet_link, is_torrent_file from ProviderCore.base import Provider, SearchResult from ProviderCore.download import sanitize_filename +from SYS.download import _download_direct_file from SYS.logger import log @@ -53,7 +59,356 @@ def _get_debrid_api_key(config: Dict[str, Any]) -> Optional[str]: return None +def _consume_bencoded_value(data: bytes, pos: int) -> int: + if pos >= len(data): + raise ValueError("Unexpected end of bencode") + token = data[pos:pos + 1] + if token == b"i": + end = data.find(b"e", pos + 1) + if end == -1: + raise ValueError("Unterminated integer") + return end + 1 + if token == b"l" or token == b"d": + cursor = pos + 1 + while cursor < len(data): + if data[cursor:cursor + 1] == b"e": + return cursor + 1 + cursor = _consume_bencoded_value(data, cursor) + raise ValueError("Unterminated list/dict") + if token and b"0" <= token <= b"9": + colon = data.find(b":", pos) + if colon == -1: + raise ValueError("Invalid string length") + length = int(data[pos:colon]) + return colon + 1 + length + raise ValueError("Unknown bencode token") + + +def _info_hash_from_torrent_bytes(data: bytes) -> Optional[str]: + needle = b"4:info" + idx = data.find(needle) + if idx == -1: + return None + + start = idx + len(needle) + try: + end = _consume_bencoded_value(data, start) + except ValueError: + return None + + info_bytes = data[start:end] + try: + return hashlib.sha1(info_bytes).hexdigest() + except Exception: + return None + + +def _fetch_torrent_bytes(target: str) -> Optional[bytes]: + path_obj = Path(str(target)) + try: + if path_obj.exists() and path_obj.is_file(): + return path_obj.read_bytes() + except Exception: + pass + + try: + parsed = urlparse(target) + except Exception: + parsed = None + + if parsed is None or not parsed.scheme or parsed.scheme.lower() not in {"http", "https"}: + return None + + if not target.lower().endswith(".torrent"): + return None + + try: + with HTTPClient(timeout=30.0) as client: + response = client.get(target) + return response.content + except Exception as exc: + log(f"Failed to download .torrent from {target}: {exc}", file=sys.stderr) + return None + + +def resolve_magnet_spec(target: str) -> Optional[str]: + """Resolve a magnet/hash/torrent URL into a magnet/hash string.""" + candidate = str(target or "").strip() + if not candidate: + return None + + parsed = parse_magnet_or_hash(candidate) + if parsed: + return parsed + + if is_torrent_file(candidate): + torrent_bytes = _fetch_torrent_bytes(candidate) + if not torrent_bytes: + return None + hash_value = _info_hash_from_torrent_bytes(torrent_bytes) + if hash_value: + return hash_value + return None + + +def _dispatch_alldebrid_magnet_search( + magnet_id: int, + config: Dict[str, Any], +) -> None: + try: + from cmdlet.search_file import CMDLET as _SEARCH_FILE_CMDLET + + exec_fn = getattr(_SEARCH_FILE_CMDLET, "exec", None) + if callable(exec_fn): + exec_fn( + None, + ["-provider", "alldebrid", f"ID={magnet_id}"], + config, + ) + except Exception: + pass + log(f"[alldebrid] Sent magnet {magnet_id} to AllDebrid for download", file=sys.stderr) + + +def prepare_magnet( + magnet_spec: str, + config: Dict[str, Any], +) -> tuple[Optional[AllDebridClient], Optional[int]]: + api_key = _get_debrid_api_key(config or {}) + if not api_key: + try: + from ProviderCore.registry import show_provider_config_panel + + show_provider_config_panel("alldebrid", ["api_key"]) + except Exception: + pass + log("AllDebrid API key not configured (provider.alldebrid.api_key)", file=sys.stderr) + return None, None + + try: + client = AllDebridClient(api_key) + except Exception as exc: + log(f"Failed to initialize AllDebrid client: {exc}", file=sys.stderr) + return None, None + + try: + magnet_info = client.magnet_add(magnet_spec) + magnet_id = int(magnet_info.get("id", 0)) + if magnet_id <= 0: + log(f"AllDebrid magnet submission failed: {magnet_info}", file=sys.stderr) + return None, None + except Exception as exc: + log(f"Failed to submit magnet to AllDebrid: {exc}", file=sys.stderr) + return None, None + + _dispatch_alldebrid_magnet_search(magnet_id, config) + return client, magnet_id + + +def _flatten_files_with_relpath(items: Any) -> Iterable[Dict[str, Any]]: + for node in AllDebrid._flatten_files(items): + enriched = dict(node) + rel = node.get("_relpath") or node.get("relpath") + if not rel: + name = node.get("n") or node.get("name") + rel = str(name or "").strip() + enriched["relpath"] = rel + yield enriched + + +def download_magnet( + magnet_spec: str, + original_url: str, + final_output_dir: Path, + config: Dict[str, Any], + progress: Any, + quiet_mode: bool, + path_from_result: Callable[[Any], Path], + on_emit: Callable[[Path, str, str, Dict[str, Any]], None], +) -> tuple[int, Optional[int]]: + client, magnet_id = prepare_magnet(magnet_spec, config) + if client is None or magnet_id is None: + return 0, None + + wait_timeout = 300 + try: + streaming_config = config.get("streaming", {}) if isinstance(config, dict) else {} + wait_timeout = int(streaming_config.get("wait_timeout", 300)) + except Exception: + wait_timeout = 300 + + elapsed = 0 + while elapsed < wait_timeout: + try: + status = client.magnet_status(magnet_id) + except Exception as exc: + log(f"Failed to read magnet status {magnet_id}: {exc}", file=sys.stderr) + return 0, magnet_id + ready = bool(status.get("ready")) or status.get("statusCode") == 4 + if ready: + break + time.sleep(5) + elapsed += 5 + else: + log(f"AllDebrid magnet {magnet_id} timed out after {wait_timeout}s", file=sys.stderr) + return 0, magnet_id + + try: + files_result = client.magnet_links([magnet_id]) + except Exception as exc: + log(f"Failed to list AllDebrid magnet files: {exc}", file=sys.stderr) + return 0, magnet_id + + magnet_files = files_result.get(str(magnet_id), {}) if isinstance(files_result, dict) else {} + file_nodes = magnet_files.get("files") if isinstance(magnet_files, dict) else [] + if not file_nodes: + log(f"AllDebrid magnet {magnet_id} produced no files", file=sys.stderr) + return 0, magnet_id + + downloaded = 0 + for node in _flatten_files_with_relpath(file_nodes): + file_url = str(node.get("link") or "").strip() + file_name = str(node.get("name") or "").strip() + relpath = str(node.get("relpath") or file_name).strip() + if not file_url or not relpath: + continue + + target_path = final_output_dir + rel_path_obj = Path(relpath) + output_dir = target_path + if rel_path_obj.parent: + output_dir = target_path / rel_path_obj.parent + try: + output_dir.mkdir(parents=True, exist_ok=True) + except Exception: + output_dir = target_path + + try: + result_obj = _download_direct_file( + file_url, + output_dir, + quiet=quiet_mode, + suggested_filename=rel_path_obj.name, + pipeline_progress=progress, + ) + except Exception as exc: + log(f"Failed to download AllDebrid file {file_url}: {exc}", file=sys.stderr) + continue + + downloaded_path = path_from_result(result_obj) + metadata = { + "magnet_id": magnet_id, + "relpath": relpath, + "name": file_name, + } + on_emit(downloaded_path, file_url or original_url, relpath, metadata) + downloaded += 1 + + return downloaded, magnet_id + + +def expand_folder_item( + item: Any, + get_search_provider: Optional[Callable[[str, Dict[str, Any]], Any]], + config: Dict[str, Any], +) -> Tuple[List[Any], Optional[str]]: + table = getattr(item, "table", None) if not isinstance(item, dict) else item.get("table") + media_kind = getattr(item, "media_kind", None) if not isinstance(item, dict) else item.get("media_kind") + full_metadata = getattr(item, "full_metadata", None) if not isinstance(item, dict) else item.get("full_metadata") + target = None + if isinstance(item, dict): + target = item.get("path") or item.get("url") + else: + target = getattr(item, "path", None) or getattr(item, "url", None) + + if (str(table or "").lower() != "alldebrid") or (str(media_kind or "").lower() != "folder"): + return [], None + + magnet_id = None + if isinstance(full_metadata, dict): + magnet_id = full_metadata.get("magnet_id") + if magnet_id is None and isinstance(target, str) and target.lower().startswith("alldebrid:magnet:"): + try: + magnet_id = int(target.split(":")[-1]) + except Exception: + magnet_id = None + + if magnet_id is None or get_search_provider is None: + return [], None + + provider = get_search_provider("alldebrid", config) if get_search_provider else None + if provider is None: + return [], None + + try: + files = provider.search("*", limit=10_000, filters={"view": "files", "magnet_id": int(magnet_id)}) + except Exception: + files = [] + + if files and len(files) == 1 and getattr(files[0], "media_kind", "") == "folder": + detail = getattr(files[0], "detail", "") + return [], str(detail or "unknown") + + expanded: List[Any] = [] + for sr in files: + expanded.append(sr.to_dict() if hasattr(sr, "to_dict") else sr) + return expanded, None + + +def adjust_output_dir_for_alldebrid( + base_output_dir: Path, + full_metadata: Optional[Dict[str, Any]], + item: Any, +) -> Path: + from ProviderCore.download import sanitize_filename as _sf + + output_dir = base_output_dir + md = full_metadata if isinstance(full_metadata, dict) else {} + magnet_name = md.get("magnet_name") or md.get("folder") + if not magnet_name: + try: + detail_val = getattr(item, "detail", None) if not isinstance(item, dict) else item.get("detail") + magnet_name = str(detail_val or "").strip() or None + except Exception: + magnet_name = None + + magnet_dir_name = _sf(str(magnet_name)) if magnet_name else "" + try: + base_tail = str(Path(output_dir).name or "") + except Exception: + base_tail = "" + base_tail_norm = _sf(base_tail).lower() if base_tail.strip() else "" + magnet_dir_norm = magnet_dir_name.lower() if magnet_dir_name else "" + + if magnet_dir_name and (not base_tail_norm or base_tail_norm != magnet_dir_norm): + output_dir = Path(output_dir) / magnet_dir_name + + relpath = md.get("relpath") if isinstance(md, dict) else None + if (not relpath) and isinstance(md.get("file"), dict): + relpath = md["file"].get("_relpath") + + if relpath: + parts = [p for p in str(relpath).replace("\\", "/").split("/") if p and p not in {".", ".."}] + if magnet_dir_name and parts: + try: + if _sf(parts[0]).lower() == magnet_dir_norm: + parts = parts[1:] + except Exception: + pass + for part in parts[:-1]: + output_dir = Path(output_dir) / _sf(part) + + try: + Path(output_dir).mkdir(parents=True, exist_ok=True) + except Exception: + output_dir = base_output_dir + + return output_dir + + class AllDebrid(Provider): + # Magnet URIs should be routed through this provider. + URL = ("magnet:",) """Search provider for AllDebrid account content. This provider lists and searches the files/magnets already present in the @@ -311,7 +666,10 @@ class AllDebrid(Provider): ], full_metadata={ "magnet": magnet_status, - "magnet_id": magnet_id + "magnet_id": magnet_id, + "provider": "alldebrid", + "provider_view": "files", + "magnet_name": magnet_name, }, ) ] @@ -382,6 +740,8 @@ class AllDebrid(Provider): "magnet_name": magnet_name, "relpath": relpath, "file": file_node, + "provider": "alldebrid", + "provider_view": "files", }, ) ) @@ -465,7 +825,10 @@ class AllDebrid(Provider): ], full_metadata={ "magnet": magnet, - "magnet_id": magnet_id + "magnet_id": magnet_id, + "provider": "alldebrid", + "provider_view": "folders", + "magnet_name": magnet_name, }, ) ) @@ -474,3 +837,128 @@ class AllDebrid(Provider): break return results + + def selector( + self, + selected_items: List[Any], + *, + ctx: Any, + stage_is_last: bool = True, + **_kwargs: Any, + ) -> bool: + """Handle AllDebrid `@N` selection by drilling into magnet files.""" + if not stage_is_last: + return False + + def _as_payload(item: Any) -> Dict[str, Any]: + if isinstance(item, dict): + return dict(item) + try: + if hasattr(item, "to_dict"): + maybe = item.to_dict() # type: ignore[attr-defined] + if isinstance(maybe, dict): + return maybe + except Exception: + pass + payload: Dict[str, Any] = {} + try: + payload = { + "title": getattr(item, "title", None), + "path": getattr(item, "path", None), + "table": getattr(item, "table", None), + "annotations": getattr(item, "annotations", None), + "media_kind": getattr(item, "media_kind", None), + "full_metadata": getattr(item, "full_metadata", None), + } + except Exception: + payload = {} + return payload + + chosen: List[Dict[str, Any]] = [] + for item in selected_items or []: + payload = _as_payload(item) + meta = payload.get("full_metadata") or payload.get("metadata") or {} + if not isinstance(meta, dict): + meta = {} + + ann_set: set[str] = set() + for ann_source in (payload.get("annotations"), meta.get("annotations")): + if isinstance(ann_source, (list, tuple, set)): + for ann in ann_source: + ann_text = str(ann or "").strip().lower() + if ann_text: + ann_set.add(ann_text) + + media_kind = str(payload.get("media_kind") or meta.get("media_kind") or "").strip().lower() + is_folder = (media_kind == "folder") or ("folder" in ann_set) + magnet_id = meta.get("magnet_id") + if magnet_id is None or (not is_folder): + continue + + title = str(payload.get("title") or meta.get("magnet_name") or meta.get("name") or "").strip() + if not title: + title = f"magnet-{magnet_id}" + + chosen.append({ + "magnet_id": magnet_id, + "title": title, + }) + + if not chosen: + return False + + target = chosen[0] + magnet_id = target.get("magnet_id") + title = target.get("title") or f"magnet-{magnet_id}" + + try: + files = self.search("*", limit=200, filters={"view": "files", "magnet_id": magnet_id}) + except Exception as exc: + print(f"alldebrid selector failed: {exc}\n") + return True + + try: + from SYS.result_table import ResultTable + from SYS.rich_display import stdout_console + except Exception: + return True + + table = ResultTable(f"AllDebrid Files: {title}").set_preserve_order(True) + table.set_table("alldebrid") + try: + table.set_table_metadata({"provider": "alldebrid", "view": "files", "magnet_id": magnet_id}) + except Exception: + pass + table.set_source_command( + "search-file", + ["-provider", "alldebrid", "-open", str(magnet_id), "-query", "*"], + ) + + results_payload: List[Dict[str, Any]] = [] + for r in files or []: + table.add_result(r) + try: + results_payload.append(r.to_dict()) + except Exception: + results_payload.append( + { + "table": getattr(r, "table", "alldebrid"), + "title": getattr(r, "title", ""), + "path": getattr(r, "path", ""), + "full_metadata": getattr(r, "full_metadata", None), + } + ) + + try: + ctx.set_last_result_table(table, results_payload) + ctx.set_current_stage_table(table) + except Exception: + pass + + try: + stdout_console().print() + stdout_console().print(table) + except Exception: + pass + + return True diff --git a/Provider/internetarchive.py b/Provider/internetarchive.py index 863e9f3..5964c78 100644 --- a/Provider/internetarchive.py +++ b/Provider/internetarchive.py @@ -13,6 +13,156 @@ from ProviderCore.base import Provider, SearchResult from ProviderCore.download import sanitize_filename from SYS.logger import log +# Helper for download-file: render selectable formats for a details URL. +def maybe_show_formats_table( + *, + raw_urls: Any, + piped_items: Any, + parsed: Dict[str, Any], + config: Dict[str, Any], + quiet_mode: bool, + get_field: Any, +) -> Optional[int]: + """If input is a single Internet Archive details URL, render a formats table. + + Returns an exit code when handled; otherwise None. + """ + if quiet_mode: + return None + + try: + total_inputs = int(len(raw_urls or []) + len(piped_items or [])) + except Exception: + total_inputs = 0 + + if total_inputs != 1: + return None + + item = piped_items[0] if piped_items else None + target = "" + if item is not None: + try: + target = str(get_field(item, + "path") or get_field(item, + "url") or "").strip() + except Exception: + target = "" + if not target and raw_urls: + target = str(raw_urls[0]).strip() + if not target: + return None + + identifier = "" + try: + md = get_field(item, "full_metadata") if item is not None else None + if isinstance(md, dict): + identifier = str(md.get("identifier") or "").strip() + except Exception: + identifier = "" + if not identifier: + try: + identifier = str(extract_identifier(target) or "").strip() + except Exception: + identifier = "" + if not identifier: + return None + + # Only show picker for item pages (details); direct download URLs should download immediately. + try: + if not is_details_url(target): + return None + except Exception: + return None + + try: + files = list_download_files(identifier) + except Exception as exc: + log(f"download-file: Internet Archive lookup failed: {exc}", file=sys.stderr) + return 1 + + if not files: + log("download-file: Internet Archive item has no downloadable files", file=sys.stderr) + return 1 + + title = "" + try: + title = str(get_field(item, "title") or "").strip() if item is not None else "" + except Exception: + title = "" + table_title = ( + f"Internet Archive: {title}".strip().rstrip(":") + if title else f"Internet Archive: {identifier}" + ) + + try: + from SYS.result_table import ResultTable + from SYS import pipeline as pipeline_context + except Exception as exc: + log(f"download-file: ResultTable unavailable: {exc}", file=sys.stderr) + return 1 + + base_args: List[str] = [] + out_arg = parsed.get("path") or parsed.get("output") + if out_arg: + base_args.extend(["-path", str(out_arg)]) + + table = ResultTable(table_title).set_preserve_order(True) + table.set_table("internetarchive.formats") + table.set_source_command("download-file", base_args) + + rows: List[Dict[str, Any]] = [] + for f in files: + name = str(f.get("name") or "").strip() + if not name: + continue + fmt = str(f.get("format") or "").strip() + src = str(f.get("source") or "").strip() + direct_url = str(f.get("direct_url") or "").strip() + if not direct_url: + continue + + size_val: Any = f.get("size") + try: + size_val = int(size_val) if size_val not in (None, "") else "" + except Exception: + pass + + row_item: Dict[str, Any] = { + "table": "internetarchive", + "title": fmt or name, + "path": direct_url, + "url": direct_url, + "columns": [ + ("Format", fmt), + ("Name", name), + ("Size", size_val), + ("Source", src), + ], + "_selection_args": [direct_url], + "full_metadata": { + "identifier": identifier, + "name": name, + "format": fmt, + "source": src, + "size": f.get("size"), + }, + } + rows.append(row_item) + table.add_result(row_item) + + if not rows: + log("download-file: no downloadable files found for this item", file=sys.stderr) + return 1 + + try: + pipeline_context.set_last_result_table(table, rows, subject=item) + pipeline_context.set_current_stage_table(table) + except Exception: + pass + + log("Internet Archive item detected: select a file with @N to download", file=sys.stderr) + return 0 + def _ia() -> Any: try: @@ -322,6 +472,7 @@ class InternetArchive(Provider): collection="..." # optional (upload) mediatype="..." # optional (upload) """ + URL = ("archive.org",) def __init__(self, config: Optional[Dict[str, Any]] = None): super().__init__(config) diff --git a/Provider/libgen.py b/Provider/libgen.py index d049aed..54e27e9 100644 --- a/Provider/libgen.py +++ b/Provider/libgen.py @@ -665,6 +665,7 @@ class Libgen(Provider): "libgen.rs", "libgen.st", ) + URL = URL_DOMAINS """Search provider for Library Genesis books.""" def search( diff --git a/Provider/loc.py b/Provider/loc.py index fe57690..8f1582e 100644 --- a/Provider/loc.py +++ b/Provider/loc.py @@ -15,6 +15,7 @@ class LOC(Provider): """ URL_DOMAINS = ["www.loc.gov"] + URL = URL_DOMAINS def validate(self) -> bool: return True diff --git a/Provider/openlibrary.py b/Provider/openlibrary.py index ae7e368..6012934 100644 --- a/Provider/openlibrary.py +++ b/Provider/openlibrary.py @@ -229,6 +229,30 @@ def _archive_id_from_url(url: str) -> str: "advancedsearch.php"}: return first + + def edition_id_from_url(u: str) -> str: + """Extract an OpenLibrary edition id (OL...M) from a book URL.""" + try: + p = urlparse(str(u)) + parts = [x for x in (p.path or "").split("/") if x] + except Exception: + parts = [] + if len(parts) >= 2 and str(parts[0]).lower() == "books": + return str(parts[1]).strip() + return "" + + + def title_hint_from_url_slug(u: str) -> str: + """Derive a human-friendly title hint from the URL slug.""" + try: + p = urlparse(str(u)) + parts = [x for x in (p.path or "").split("/") if x] + slug = parts[-1] if parts else "" + except Exception: + slug = "" + slug = (slug or "").strip().replace("_", " ") + return slug or "OpenLibrary" + return "" @@ -415,6 +439,7 @@ class OpenLibrary(Provider): "openlibrary.org", "archive.org", ) + URL = URL_DOMAINS """Search provider for OpenLibrary books + Archive.org direct/borrow download.""" def __init__(self, config: Optional[Dict[str, Any]] = None): @@ -1419,6 +1444,64 @@ class OpenLibrary(Provider): log("[openlibrary] Direct download failed", file=sys.stderr) return None + # --- Convenience helpers for URL-driven downloads (used by download-file) --- + + def search_result_from_url(self, url: str) -> Optional[SearchResult]: + """Build a minimal SearchResult from a bare OpenLibrary URL.""" + edition_id = edition_id_from_url(url) + title_hint = title_hint_from_url_slug(url) + return SearchResult( + table="openlibrary", + title=title_hint, + path=str(url), + media_kind="book", + full_metadata={"openlibrary_id": edition_id} if edition_id else {}, + ) + + def download_url( + self, + url: str, + output_dir: Path, + progress_callback: Optional[Callable[[str, int, Optional[int], str], None]] = None, + ) -> Optional[Dict[str, Any]]: + """Download a book directly from an OpenLibrary URL. + + Returns a dict with the downloaded path and SearchResult when successful. + """ + sr = self.search_result_from_url(url) + if sr is None: + return None + + downloaded = self.download(sr, output_dir, progress_callback) + if not downloaded: + return None + + return { + "path": Path(downloaded), + "search_result": sr, + } + try: + if progress_callback is not None: + progress_callback("step", 0, None, "direct download") + except Exception: + pass + out_path = unique_path(output_dir / f"{safe_title}.pdf") + ok = download_file( + pdf_url, + out_path, + session=self._session, + progress_callback=( + ( + lambda downloaded, total, label: + progress_callback("bytes", downloaded, total, label) + ) if progress_callback is not None else None + ), + ) + if ok: + return out_path + log("[openlibrary] Direct download failed", file=sys.stderr) + return None + # 2) Borrow flow (credentials required). try: email, password = self._credential_archive(self.config or {}) diff --git a/Provider/telegram.py b/Provider/telegram.py index fffe3c0..e2632cd 100644 --- a/Provider/telegram.py +++ b/Provider/telegram.py @@ -145,7 +145,9 @@ class Telegram(Provider): [provider=telegram] app_id= api_hash= + bot_token= """ + URL = ("t.me", "telegram.me") def __init__(self, config: Optional[Dict[str, Any]] = None): super().__init__(config) diff --git a/ProviderCore/base.py b/ProviderCore/base.py index 2ff4004..467b542 100644 --- a/ProviderCore/base.py +++ b/ProviderCore/base.py @@ -3,7 +3,7 @@ from __future__ import annotations from abc import ABC, abstractmethod from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, List, Optional, Sequence, Tuple @dataclass @@ -53,6 +53,8 @@ class Provider(ABC): - validate() """ + URL: Sequence[str] = () + def __init__(self, config: Optional[Dict[str, Any]] = None): self.config = config or {} self.name = self.__class__.__name__.lower() @@ -107,6 +109,30 @@ class Provider(ABC): _ = stage_is_last return False + @classmethod + def url_patterns(cls) -> Tuple[str, ...]: + """Return normalized URL patterns that this provider handles.""" + patterns: List[str] = [] + maybe_urls = getattr(cls, "URL", None) + if isinstance(maybe_urls, (list, tuple)): + for entry in maybe_urls: + try: + candidate = str(entry or "").strip().lower() + except Exception: + continue + if candidate: + patterns.append(candidate) + maybe_domains = getattr(cls, "URL_DOMAINS", None) + if isinstance(maybe_domains, (list, tuple)): + for entry in maybe_domains: + try: + candidate = str(entry or "").strip().lower() + except Exception: + continue + if candidate and candidate not in patterns: + patterns.append(candidate) + return tuple(patterns) + class SearchProvider(Provider): """Compatibility alias for older code. diff --git a/ProviderCore/registry.py b/ProviderCore/registry.py index 1db4dc0..93f6b6f 100644 --- a/ProviderCore/registry.py +++ b/ProviderCore/registry.py @@ -68,6 +68,13 @@ def _supports_upload(provider: Provider) -> bool: return provider.__class__.upload is not Provider.upload +def _provider_url_patterns(provider_class: Type[Provider]) -> Sequence[str]: + try: + return list(provider_class.url_patterns()) + except Exception: + return [] + + def get_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[Provider]: @@ -166,47 +173,53 @@ def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bo def match_provider_name_for_url(url: str) -> Optional[str]: """Return a registered provider name that claims the URL's domain. - Providers can declare domains via a class attribute `URL_DOMAINS` (sequence of strings). + Providers can declare domains via class attribute `URL` (preferred) or `URL_DOMAINS`. This matcher is intentionally cheap (no provider instantiation, no network). """ + raw_url = str(url or "").strip() + raw_url_lower = raw_url.lower() try: - parsed = urlparse(str(url)) + parsed = urlparse(raw_url) host = (parsed.hostname or "").strip().lower() path = (parsed.path or "").strip() except Exception: host = "" path = "" - if not host: - return None - # Prefer Internet Archive for archive.org links unless the URL clearly refers # to a borrow/loan flow (handled by OpenLibrary provider). # # This keeps direct downloads and item pages routed to `internetarchive`, while # preserving OpenLibrary's scripted borrow pipeline for loan/reader URLs. - if host == "openlibrary.org" or host.endswith(".openlibrary.org"): - return "openlibrary" if "openlibrary" in _PROVIDERS else None - - if host == "archive.org" or host.endswith(".archive.org"): - low_path = str(path or "").lower() - is_borrowish = ( - low_path.startswith("/borrow/") or low_path.startswith("/stream/") - or low_path.startswith("/services/loans/") or "/services/loans/" in low_path - ) - if is_borrowish: + if host: + if host == "openlibrary.org" or host.endswith(".openlibrary.org"): return "openlibrary" if "openlibrary" in _PROVIDERS else None - return "internetarchive" if "internetarchive" in _PROVIDERS else None + + if host == "archive.org" or host.endswith(".archive.org"): + low_path = str(path or "").lower() + is_borrowish = ( + low_path.startswith("/borrow/") or low_path.startswith("/stream/") + or low_path.startswith("/services/loans/") or "/services/loans/" in low_path + ) + if is_borrowish: + return "openlibrary" if "openlibrary" in _PROVIDERS else None + return "internetarchive" if "internetarchive" in _PROVIDERS else None for name, provider_class in _PROVIDERS.items(): - domains = getattr(provider_class, "URL_DOMAINS", None) - if not isinstance(domains, (list, tuple)): + domains = _provider_url_patterns(provider_class) + if not domains: continue for d in domains: dom = str(d or "").strip().lower() if not dom: continue + if raw_url_lower.startswith(dom): + return name + for d in domains: + dom = str(d or "").strip().lower() + if not dom or not host: + continue if host == dom or host.endswith("." + dom): return name diff --git a/SYS/cmdlet_catalog.py b/SYS/cmdlet_catalog.py index 92ce848..feb446d 100644 --- a/SYS/cmdlet_catalog.py +++ b/SYS/cmdlet_catalog.py @@ -1,6 +1,7 @@ from __future__ import annotations from importlib import import_module +from types import ModuleType from typing import Any, Dict, List, Optional try: @@ -21,22 +22,36 @@ def _should_hide_db_args(config: Optional[Dict[str, Any]]) -> bool: return False -try: - from cmdlet import REGISTRY -except Exception: - REGISTRY = {} # type: ignore +_cmdlet_pkg: ModuleType | None = None -try: - from cmdnat import register_native_commands as _register_native_commands -except Exception: - _register_native_commands = None + +def _get_cmdlet_package() -> Optional[ModuleType]: + global _cmdlet_pkg + if _cmdlet_pkg is not None: + return _cmdlet_pkg + try: + _cmdlet_pkg = import_module("cmdlet") + except Exception: + _cmdlet_pkg = None + return _cmdlet_pkg + + +def _get_registry() -> Dict[str, Any]: + pkg = _get_cmdlet_package() + if pkg is None: + return {} + return getattr(pkg, "REGISTRY", {}) or {} def ensure_registry_loaded() -> None: """Ensure native commands are registered into REGISTRY (idempotent).""" - if _register_native_commands and REGISTRY is not None: + pkg = _get_cmdlet_package() + if pkg is None: + return + ensure_fn = getattr(pkg, "ensure_cmdlet_modules_loaded", None) + if callable(ensure_fn): try: - _register_native_commands(REGISTRY) + ensure_fn() except Exception: pass @@ -105,7 +120,8 @@ def get_cmdlet_metadata( if data is None: try: - reg_fn = (REGISTRY or {}).get(cmd_name.replace("_", "-").lower()) + registry = _get_registry() + reg_fn = registry.get(cmd_name.replace("_", "-").lower()) if reg_fn: owner_mod = getattr(reg_fn, "__module__", "") if owner_mod: @@ -150,7 +166,8 @@ def list_cmdlet_metadata(config: Optional[Dict[str, Any]] = None) -> Dict[str, D """Collect metadata for all registered cmdlet keyed by canonical name.""" ensure_registry_loaded() entries: Dict[str, Dict[str, Any]] = {} - for reg_name in (REGISTRY or {}).keys(): + registry = _get_registry() + for reg_name in registry.keys(): meta = get_cmdlet_metadata(reg_name, config=config) canonical = str(reg_name).replace("_", "-").lower() diff --git a/SYS/models.py b/SYS/models.py index 375d9e6..07bb199 100644 --- a/SYS/models.py +++ b/SYS/models.py @@ -103,7 +103,7 @@ class PipeObject: return # Prefer a stable, human-friendly title: - # "1 - download-media", "2 - download-media", ... + # "1 - download-file", "2 - download-file", ... # The index is preserved when possible via `pipe_index` in the PipeObject's extra. idx = None try: @@ -875,7 +875,7 @@ class PipelineLiveProgress: # IMPORTANT: use the shared stderr Console instance so that any # `stderr_console().print(...)` calls from inside cmdlets (e.g. preflight - # tables/prompts in download-media) cooperate with Rich Live rendering. + # tables/prompts in download-file) cooperate with Rich Live rendering. # If we create a separate Console(file=sys.stderr), output will fight for # terminal cursor control and appear "blocked"/truncated. from SYS.rich_display import stderr_console diff --git a/SYS/result_table.py b/SYS/result_table.py index a21cb96..e417f31 100644 --- a/SYS/result_table.py +++ b/SYS/result_table.py @@ -361,6 +361,8 @@ class ResultRow: """Arguments to use for this row when selected via @N syntax (e.g., ['-item', '3'])""" source_index: Optional[int] = None """Original insertion order index (used to map sorted views back to source items).""" + payload: Optional[Any] = None + """Original object that contributed to this row.""" def add_column(self, name: str, value: Any) -> None: """Add a column to this row.""" @@ -498,6 +500,9 @@ class ResultTable: self.table: Optional[str] = None """Table type (e.g., 'youtube', 'soulseek') for context-aware selection logic.""" + self.table_metadata: Dict[str, Any] = {} + """Optional provider/table metadata (e.g., provider name, view).""" + self.value_case: str = "lower" """Display-only value casing: 'lower' (default), 'upper', or 'preserve'.""" @@ -525,6 +530,18 @@ class ResultTable: self.table = table return self + def set_table_metadata(self, metadata: Optional[Dict[str, Any]]) -> "ResultTable": + """Attach provider/table metadata for downstream selection logic.""" + self.table_metadata = dict(metadata or {}) + return self + + def get_table_metadata(self) -> Dict[str, Any]: + """Return attached provider/table metadata (copy to avoid mutation).""" + try: + return dict(self.table_metadata) + except Exception: + return {} + def set_no_choice(self, no_choice: bool = True) -> "ResultTable": """Mark the table as non-interactive (no row numbers, no selection parsing).""" self.no_choice = bool(no_choice) @@ -612,6 +629,9 @@ class ResultTable: new_table.input_options = dict(self.input_options) if self.input_options else {} new_table.no_choice = self.no_choice new_table.table = self.table + new_table.table_metadata = ( + dict(self.table_metadata) if getattr(self, "table_metadata", None) else {} + ) new_table.header_lines = list(self.header_lines) if self.header_lines else [] return new_table @@ -712,6 +732,7 @@ class ResultTable: Self for chaining """ row = self.add_row() + row.payload = result # Handle TagItem from get_tag.py (tag display with index) if hasattr(result, "__class__") and result.__class__.__name__ == "TagItem": @@ -738,6 +759,21 @@ class ResultTable: return self + def get_row_payload(self, row_index: int) -> Optional[Any]: + """Return the original payload for the row at ``row_index`` if available.""" + if 0 <= row_index < len(self.rows): + return getattr(self.rows[row_index], "payload", None) + return None + + def get_payloads(self) -> List[Any]: + """Return the payloads for every row, preserving table order.""" + payloads: List[Any] = [] + for row in self.rows: + payload = getattr(row, "payload", None) + if payload is not None: + payloads.append(payload) + return payloads + def _add_search_result(self, row: ResultRow, result: Any) -> None: """Extract and add SearchResult fields to row.""" # If provider supplied explicit columns, render those and skip legacy defaults diff --git a/SYS/rich_display.py b/SYS/rich_display.py index 65af2fb..bd27ec1 100644 --- a/SYS/rich_display.py +++ b/SYS/rich_display.py @@ -11,9 +11,11 @@ from __future__ import annotations import contextlib import sys -from typing import Any, Iterator, TextIO +from typing import Any, Iterator, Sequence, TextIO from rich.console import Console +from rich.panel import Panel +from rich.text import Text # Configure Rich pretty-printing to avoid truncating long strings (hashes/paths). # This is version-safe: older Rich versions may not support the max_* arguments. @@ -70,3 +72,33 @@ def capture_rich_output(*, stdout: TextIO, stderr: TextIO) -> Iterator[None]: finally: _STDOUT_CONSOLE = previous_stdout _STDERR_CONSOLE = previous_stderr + + +def show_provider_config_panel( + provider_name: str, + keys: Sequence[str] | None = None, + *, + config_hint: str = "config.conf" +) -> None: + """Show a Rich panel explaining how to configure a provider.""" + + normalized = str(provider_name or "").strip() or "provider" + pre = Text("Add this to your config", style="bold") + footer = Text( + f"Place this block in {config_hint} or config.d/*.conf", + style="dim" + ) + body = Text() + body.append(f"[provider={normalized}]\n", style="bold cyan") + for key in keys or []: + body.append(f'{key}=""\n', style="yellow") + + stderr_console().print(pre) + stderr_console().print( + Panel( + body, + title=f"{normalized} configuration", + expand=False + ) + ) + stderr_console().print(footer) diff --git a/Store/HydrusNetwork.py b/Store/HydrusNetwork.py index 0f122de..24677f4 100644 --- a/Store/HydrusNetwork.py +++ b/Store/HydrusNetwork.py @@ -1389,19 +1389,51 @@ class HydrusNetwork(Store): if not tags_to_add and not tags_to_remove: return True + service_key: Optional[str] = None + try: + from API import HydrusNetwork as hydrus_wrapper + + service_key = hydrus_wrapper.get_tag_service_key( + client, service_name + ) + except Exception: + service_key = None + + mutate_success = False + if service_key: + try: + client.mutate_tags_by_key( + file_hash, + service_key, + add_tags=tags_to_add, + remove_tags=tags_to_remove, + ) + mutate_success = True + except Exception as exc: + debug( + f"{self._log_prefix()} add_tag: mutate_tags_by_key failed: {exc}" + ) + did_any = False - if tags_to_remove: - try: - client.delete_tag(file_hash, tags_to_remove, service_name) - did_any = True - except Exception as exc: - debug(f"{self._log_prefix()} add_tag: delete_tag failed: {exc}") - if tags_to_add: - try: - client.add_tag(file_hash, tags_to_add, service_name) - did_any = True - except Exception as exc: - debug(f"{self._log_prefix()} add_tag: add_tag failed: {exc}") + if not mutate_success: + if tags_to_remove: + try: + client.delete_tag(file_hash, tags_to_remove, service_name) + did_any = True + except Exception as exc: + debug( + f"{self._log_prefix()} add_tag: delete_tag failed: {exc}" + ) + if tags_to_add: + try: + client.add_tag(file_hash, tags_to_add, service_name) + did_any = True + except Exception as exc: + debug( + f"{self._log_prefix()} add_tag: add_tag failed: {exc}" + ) + else: + did_any = bool(tags_to_add or tags_to_remove) return did_any except Exception as exc: diff --git a/TUI.py b/TUI.py index 98fdddd..51b2d8d 100644 --- a/TUI.py +++ b/TUI.py @@ -6,7 +6,7 @@ import json import re import sys from pathlib import Path -from typing import Any, List, Optional, Sequence, Tuple +from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple from textual import on, work from textual.app import App, ComposeResult @@ -28,6 +28,7 @@ from textual.widgets import ( ) from textual.widgets.option_list import Option + BASE_DIR = Path(__file__).resolve().parent REPO_ROOT = BASE_DIR TUI_DIR = REPO_ROOT / "TUI" @@ -37,7 +38,7 @@ for path in (REPO_ROOT, TUI_DIR): sys.path.insert(0, str_path) from TUI.pipeline_runner import PipelineRunResult # type: ignore # noqa: E402 -from SYS.result_table import ResultTable # type: ignore # noqa: E402 +from SYS.result_table import ResultTable, extract_hash_value, extract_store_value # type: ignore # noqa: E402 from SYS.config import load_config # type: ignore # noqa: E402 from Store.registry import Store as StoreRegistry # type: ignore # noqa: E402 @@ -68,14 +69,23 @@ def _extract_tag_names(emitted: Sequence[Any]) -> List[str]: try: if hasattr(obj, "tag_name"): val = getattr(obj, "tag_name") - if val: - tags.append(str(val)) + if val and isinstance(val, str): + tags.append(val) continue except Exception: pass if isinstance(obj, dict): - for k in ("tag_name", "tag", "name", "value"): + # Prefer explicit tag lists + tag_list = obj.get("tag") + if isinstance(tag_list, (list, tuple)): + for t in tag_list: + if isinstance(t, str) and t.strip(): + tags.append(t.strip()) + if tag_list: + continue + # Fall back to individual tag_name/value/name strings + for k in ("tag_name", "value", "name"): v = obj.get(k) if isinstance(v, str) and v.strip(): tags.append(v.strip()) @@ -84,6 +94,30 @@ def _extract_tag_names(emitted: Sequence[Any]) -> List[str]: return _dedup_preserve_order(tags) +def _extract_tag_names_from_table(table: Any) -> List[str]: + if not table: + return [] + sources: List[Any] = [] + get_payloads = getattr(table, "get_payloads", None) + if callable(get_payloads): + try: + payloads = get_payloads() + if payloads: + sources.extend(payloads) + except Exception: + pass + rows = getattr(table, "rows", []) or [] + for row in rows: + for col in getattr(row, "columns", []) or []: + if str(getattr(col, "name", "") or "").strip().lower() == "tag": + val = getattr(col, "value", None) + if val: + sources.append({"tag_name": val}) + if not sources: + return [] + return _extract_tag_names(sources) + + class TextPopup(ModalScreen[None]): def __init__(self, *, title: str, text: str) -> None: @@ -139,20 +173,24 @@ class TagEditorPopup(ModalScreen[None]): @work(thread=True) def _load_tags_background(self) -> None: app = self.app # PipelineHubApp - try: - runner: PipelineRunner = getattr(app, "executor") - cmd = f"@1 | get-tag -emit" - res = runner.run_pipeline(cmd, seeds=self._seeds, isolate=True) - tags = _extract_tag_names(res.emitted) - except Exception as exc: - tags = [] + tags = self._fetch_tags_from_store() + if not tags: try: - app.call_from_thread( - self._set_status, - f"Error: {type(exc).__name__}: {exc}" - ) - except Exception: - self._set_status(f"Error: {type(exc).__name__}: {exc}") + runner: PipelineRunner = getattr(app, "executor") + cmd = "@1 | get-tag" + res = runner.run_pipeline(cmd, seeds=self._seeds, isolate=True) + tags = _extract_tag_names_from_table(getattr(res, "result_table", None)) + if not tags: + tags = _extract_tag_names(getattr(res, "emitted", [])) + except Exception as exc: + tags = [] + try: + app.call_from_thread( + self._set_status, + f"Error: {type(exc).__name__}: {exc}" + ) + except Exception: + self._set_status(f"Error: {type(exc).__name__}: {exc}") self._original_tags = tags try: app.call_from_thread(self._apply_loaded_tags, tags) @@ -164,6 +202,42 @@ class TagEditorPopup(ModalScreen[None]): self._editor.text = "\n".join(tags) self._set_status(f"Loaded {len(tags)} tag(s)") + def _fetch_tags_from_store(self) -> Optional[List[str]]: + if not self._store or not self._hash: + return None + try: + cfg = load_config() or {} + except Exception: + cfg = {} + store_key = str(self._store or "").strip() + hash_value = str(self._hash or "").strip().lower() + if not store_key or not hash_value: + return None + try: + registry = StoreRegistry(config=cfg, suppress_debug=True) + except Exception: + return [] + match = None + normalized = store_key.lower() + for name in registry.list_backends(): + if str(name or "").strip().lower() == normalized: + match = name + break + if match is None: + return None + try: + backend = registry[match] + except KeyError: + return None + try: + tags, _src = backend.get_tag(hash_value, config=cfg) + if not tags: + return [] + filtered = [str(t).strip() for t in tags if str(t).strip()] + return _dedup_preserve_order(filtered) + except Exception: + return None + def _parse_editor_tags(self) -> List[str]: raw = "" try: @@ -207,6 +281,33 @@ class TagEditorPopup(ModalScreen[None]): desired: List[str] ) -> None: app = self.app # PipelineHubApp + def _log_message(msg: str) -> None: + if not msg: + return + try: + app.call_from_thread(app._append_log_line, msg) + except Exception: + pass + + def _log_pipeline_command(stage: str, cmd: str) -> None: + if not cmd: + return + _log_message(f"tags-save: {stage}: {cmd}") + + def _log_pipeline_result(stage: str, result: PipelineRunResult | None) -> None: + if result is None: + return + status = "success" if getattr(result, "success", False) else "failed" + _log_message(f"tags-save: {stage} result: {status}") + error = str(getattr(result, "error", "") or "").strip() + if error: + _log_message(f"tags-save: {stage} error: {error}") + for attr in ("stdout", "stderr"): + raw = str(getattr(result, attr, "") or "").strip() + if not raw: + continue + for line in raw.splitlines(): + _log_message(f"tags-save: {stage} {attr}: {line}") try: runner: PipelineRunner = getattr(app, "executor") store_tok = json.dumps(self._store) @@ -216,8 +317,10 @@ class TagEditorPopup(ModalScreen[None]): if to_del: del_args = " ".join(json.dumps(t) for t in to_del) - del_cmd = f"@1 | delete-tag -store {store_tok}{query_chunk} {del_args}" + del_cmd = f"delete-tag -store {store_tok}{query_chunk} {del_args}" + _log_pipeline_command("delete-tag", del_cmd) del_res = runner.run_pipeline(del_cmd, seeds=self._seeds, isolate=True) + _log_pipeline_result("delete-tag", del_res) if not getattr(del_res, "success", False): failures.append( str( @@ -231,8 +334,10 @@ class TagEditorPopup(ModalScreen[None]): if to_add: add_args = " ".join(json.dumps(t) for t in to_add) - add_cmd = f"@1 | add-tag -store {store_tok}{query_chunk} {add_args}" + add_cmd = f"add-tag -store {store_tok}{query_chunk} {add_args}" + _log_pipeline_command("add-tag", add_cmd) add_res = runner.run_pipeline(add_cmd, seeds=self._seeds, isolate=True) + _log_pipeline_result("add-tag", add_res) if not getattr(add_res, "success", False): failures.append( str( @@ -252,14 +357,38 @@ class TagEditorPopup(ModalScreen[None]): self._set_status(f"Error: {msg}") return - self._original_tags = list(desired) + reloaded = self._fetch_tags_from_store() + refreshed = reloaded is not None + tags_to_show = list(reloaded or []) if refreshed else list(desired) + self._original_tags = list(tags_to_show) try: - app.call_from_thread( - self._set_status, - f"Saved (+{len(to_add)}, -{len(to_del)})" - ) + app.call_from_thread(self._apply_loaded_tags, tags_to_show) except Exception: - self._set_status(f"Saved (+{len(to_add)}, -{len(to_del)})") + self._apply_loaded_tags(tags_to_show) + + def _refresh_overlay() -> None: + try: + app.refresh_tag_overlay( + self._store, + self._hash, + tags_to_show, + self._seeds, + ) + except Exception: + pass + + try: + app.call_from_thread(_refresh_overlay) + except Exception: + _refresh_overlay() + + status_msg = f"Saved (+{len(to_add)}, -{len(to_del)})" + if refreshed: + status_msg += f"; loaded {len(tags_to_show)} tag(s)" + try: + app.call_from_thread(self._set_status, status_msg) + except Exception: + self._set_status(status_msg) except Exception as exc: try: app.call_from_thread( @@ -547,7 +676,7 @@ class PipelineHubApp(App): """Apply store/path/tags UI fields to the pipeline text. Rules (simple + non-destructive): - - If output path is set and the first stage is download-media and has no -path/--path, append -path. + - If output path is set and the first stage is download-file and has no -path/--path, append -path. - If a store is selected and pipeline has no add-file stage, append add-file -store . """ base = str(pipeline_text or "").strip() @@ -578,11 +707,11 @@ class PipelineHubApp(App): except Exception: first_stage_cmd = "" - # Apply -path to download-media first stage (only if missing) + # Apply -path to download-file first stage (only if missing) if output_path: first = stages[0] low = first.lower() - if low.startswith("download-media" + if low.startswith("download-file" ) and " -path" not in low and " --path" not in low: stages[0] = f"{first} -path {json.dumps(output_path)}" @@ -594,9 +723,7 @@ class PipelineHubApp(App): should_auto_add_file = bool( selected_store and ("add-file" not in low_joined) and ( first_stage_cmd - in {"download-media", - "download-file", - "download-torrent"} + in {"download-file"} ) ) @@ -714,6 +841,42 @@ class PipelineHubApp(App): key=str(idx - 1) ) + def refresh_tag_overlay(self, + store_name: str, + file_hash: str, + tags: List[str], + subject: Any) -> None: + """Update the shared get-tag overlay after manual tag edits.""" + if not store_name or not file_hash: + return + try: + from cmdlet.get_tag import _emit_tags_as_table + except Exception: + return + + try: + cfg = load_config() or {} + except Exception: + cfg = {} + + payload_subject = subject if subject is not None else None + if not isinstance(payload_subject, dict): + payload_subject = { + "store": store_name, + "hash": file_hash, + } + + try: + _emit_tags_as_table( + list(tags), + file_hash=file_hash, + store=store_name, + config=cfg, + subject=payload_subject, + ) + except Exception: + pass + def _load_cmdlet_names(self) -> None: try: ensure_registry_loaded() @@ -825,6 +988,10 @@ class PipelineHubApp(App): index = 0 item: Any = None + row_payload: Any = None + row = None + column_store: Optional[str] = None + column_hash: Optional[str] = None # Prefer mapping displayed table row -> source item. if self.current_result_table and 0 <= index < len( @@ -832,30 +999,66 @@ class PipelineHubApp(App): "rows", []) or []): row = self.current_result_table.rows[index] + row_payload = getattr(row, "payload", None) src_idx = getattr(row, "source_index", None) if isinstance(src_idx, int) and 0 <= src_idx < len(self.result_items): item = self.result_items[src_idx] + for col in getattr(row, "columns", []) or []: + name = str(getattr(col, "name", "") or "").strip().lower() + value = str(getattr(col, "value", "") or "").strip() + if not column_store and name in {"store", "storage", "source", "table"}: + column_store = value + if not column_hash and name in {"hash", "hash_hex", "file_hash", "sha256"}: + column_hash = value if item is None and 0 <= index < len(self.result_items): item = self.result_items[index] - store_name = None - file_hash = None - if isinstance(item, dict): - store_name = item.get("store") - file_hash = item.get("hash") - else: - store_name = getattr(item, "store", None) - file_hash = getattr(item, "hash", None) + def _pick_from_candidates( + candidates: List[Any], extractor: Callable[[Any], str] + ) -> str: + for candidate in candidates: + if candidate is None: + continue + try: + value = extractor(candidate) + except Exception: + value = "" + if value and str(value).strip(): + return str(value).strip() + return "" - store_text = str(store_name).strip() if store_name is not None else "" - hash_text = str(file_hash).strip() if file_hash is not None else "" + candidate_sources: List[Any] = [] + if row_payload is not None: + candidate_sources.append(row_payload) + if item is not None: + candidate_sources.append(item) + + store_name = _pick_from_candidates(candidate_sources, extract_store_value) + file_hash = _pick_from_candidates(candidate_sources, extract_hash_value) + + if not store_name and column_store: + store_name = column_store + if not file_hash and column_hash: + file_hash = column_hash + + store_text = str(store_name).strip() if store_name else "" + hash_text = str(file_hash).strip() if file_hash else "" if not store_text: # Fallback to UI store selection when item doesn't carry it. store_text = self._get_selected_store() or "" - return item, (store_text or None), (hash_text or None) + final_item = row_payload if row_payload is not None else item + if final_item is None and (store_text or hash_text): + fallback: Dict[str, str] = {} + if store_text: + fallback["store"] = store_text + if hash_text: + fallback["hash"] = hash_text + final_item = fallback + + return final_item, (store_text or None), (hash_text or None) def _open_tags_popup(self) -> None: if self._pipeline_running: diff --git a/TUI/menu_actions.py b/TUI/menu_actions.py index 22d0994..d6e66db 100644 --- a/TUI/menu_actions.py +++ b/TUI/menu_actions.py @@ -30,15 +30,15 @@ PIPELINE_PRESETS: List[PipelinePreset] = [ PipelinePreset( label="Download → Merge → Local", description= - "Use download-media with playlist auto-selection, merge the pieces, tag, then import into local storage.", + "Use download-file with playlist auto-selection, merge the pieces, tag, then import into local storage.", pipeline= - 'download-media "" | merge-file | add-tags -store local | add-file -storage local', + 'download-file "" | merge-file | add-tags -store local | add-file -storage local', ), PipelinePreset( label="Download → Hydrus", description="Fetch media, auto-tag, and push directly into Hydrus.", pipeline= - 'download-media "" | merge-file | add-tags -store hydrus | add-file -storage hydrus', + 'download-file "" | merge-file | add-tags -store hydrus | add-file -storage hydrus', ), PipelinePreset( label="Search Local Library", diff --git a/TUI/modalscreen/download.py b/TUI/modalscreen/download.py index 764abb3..477ac9c 100644 --- a/TUI/modalscreen/download.py +++ b/TUI/modalscreen/download.py @@ -405,7 +405,7 @@ class DownloadModal(ModalScreen): download_succeeded = False download_stderr_text = "" # Store for merge stage if download_enabled: - download_cmdlet_name = "download-media" if self.is_playlist else "download-file" + download_cmdlet_name = "download-file" download_cmdlet = get_cmdlet(download_cmdlet_name) if download_cmdlet: logger.info(f"📥 Executing {download_cmdlet_name} stage") @@ -416,9 +416,9 @@ class DownloadModal(ModalScreen): if worker: worker.log_step(f"Starting {download_cmdlet_name} stage...") - # Build arguments for download-media (yt-dlp) playlists; download-file takes no yt-dlp args. + # Build yt-dlp playlist arguments for download-file streaming (if applicable). cmdlet_args = [] - if download_cmdlet_name == "download-media" and self.is_playlist: + if self.is_playlist: # Always use yt-dlp's native --playlist-items for playlists if playlist_selection: # User provided specific selection diff --git a/cmdlet/__init__.py b/cmdlet/__init__.py index b577d7d..8032801 100644 --- a/cmdlet/__init__.py +++ b/cmdlet/__init__.py @@ -1,6 +1,8 @@ from __future__ import annotations -from typing import Any, Callable, Dict, Iterable, Sequence +import os +import sys +from typing import Any, Callable, Dict, Iterable, Iterator, Sequence from importlib import import_module as _import_module # A cmdlet is a callable taking (result, args, config) -> int @@ -47,51 +49,71 @@ def get(cmd_name: str) -> Cmdlet | None: return REGISTRY.get(_normalize_cmd_name(cmd_name)) -# Dynamically import all cmdlet modules in this directory (ignore files starting with _ and __init__.py) -# cmdlet self-register when instantiated via their __init__ method -import os +_MODULES_LOADED = False -cmdlet_dir = os.path.dirname(__file__) -for filename in os.listdir(cmdlet_dir): - if not (filename.endswith(".py") and not filename.startswith("_") - and filename != "__init__.py"): - continue +def _iter_cmdlet_module_names() -> Iterator[str]: + cmdlet_dir = os.path.dirname(__file__) + try: + entries = os.listdir(cmdlet_dir) + except Exception: + return iter(()) - mod_name = filename[:-3] + def _generator() -> Iterator[str]: + for filename in entries: + if not (filename.endswith(".py") and not filename.startswith("_") + and filename != "__init__.py"): + continue + mod_name = filename[:-3] + if "_" not in mod_name: + continue + yield mod_name - # Enforce Powershell-style two-word cmdlet naming (e.g., add_file, get_file) - # Skip native/utility scripts that are not cmdlet (e.g., adjective, worker, matrix, pipe) - if "_" not in mod_name: - continue + return _generator() + +def _load_cmdlet_module(mod_name: str) -> None: try: _import_module(f".{mod_name}", __name__) - except Exception as e: - import sys + except Exception as exc: + print(f"Error importing cmdlet '{mod_name}': {exc}", file=sys.stderr) - print(f"Error importing cmdlet '{mod_name}': {e}", file=sys.stderr) - continue -# Import and register native commands that are not considered cmdlet -try: - from cmdnat import register_native_commands as _register_native_commands +def _load_root_modules() -> None: + for root in ("select_cmdlet",): + try: + _import_module(root) + except Exception: + continue - _register_native_commands(REGISTRY) -except Exception: - # Native commands are optional; ignore if unavailable - pass -# Import root-level modules that also register cmdlet -for _root_mod in ("select_cmdlet", - ): +def _load_helper_modules() -> None: try: - _import_module(_root_mod) + import API.alldebrid as _alldebrid except Exception: - # Allow missing optional modules - continue + pass -# Also import helper modules that register cmdlet -try: - import API.alldebrid as _alldebrid -except Exception: - pass + +def _register_native_commands() -> None: + try: + from cmdnat import register_native_commands + except Exception: + return + try: + register_native_commands(REGISTRY) + except Exception: + pass + + +def ensure_cmdlet_modules_loaded() -> None: + global _MODULES_LOADED + + if _MODULES_LOADED: + return + + for mod_name in _iter_cmdlet_module_names(): + _load_cmdlet_module(mod_name) + + _load_root_modules() + _load_helper_modules() + _register_native_commands() + _MODULES_LOADED = True diff --git a/cmdlet/add_file.py b/cmdlet/add_file.py index 3815877..4b36174 100644 --- a/cmdlet/add_file.py +++ b/cmdlet/add_file.py @@ -519,8 +519,11 @@ class Add_File(Cmdlet): # - If the sample URL only has one available format, force it for the batch. # - If the sample URL appears audio-only (no video codecs), prefer audio mode. try: - from cmdlet.download_media import is_url_supported_by_ytdlp, list_formats - from tool.ytdlp import YtDlpTool + from tool.ytdlp import ( + YtDlpTool, + is_url_supported_by_ytdlp, + list_formats, + ) sample_url = unique_urls[0] if unique_urls else None if sample_url and is_url_supported_by_ytdlp(str(sample_url)): @@ -677,6 +680,59 @@ class Add_File(Cmdlet): # Update pipe_obj with resolved path pipe_obj.path = str(media_path_or_url) + table = None + full_metadata = None + if isinstance(pipe_obj.extra, dict): + table = pipe_obj.extra.get("table") + full_metadata = pipe_obj.extra.get("full_metadata") + + provider_table = str( + table or getattr(pipe_obj, "provider", "") + ).strip().lower() + if (provider_table == "alldebrid" + and isinstance(media_path_or_url, str) + and media_path_or_url.lower().startswith( + ("http://", "https://")) + and (provider_name or location)): + url_str = str(media_path_or_url) + if url_str in skip_url_downloads: + log( + f"Skipping download (already stored): {url_str}", + file=sys.stderr, + ) + successes += 1 + continue + + temp_dir_candidate = Path( + tempfile.mkdtemp(prefix="medios_alldebrid_") + ) + downloaded_path: Optional[Path] = None + try: + from ProviderCore.registry import get_search_provider + + provider = get_search_provider("alldebrid", config) + if provider is not None: + downloaded = provider.download( + pipe_obj, + temp_dir_candidate, + ) + if downloaded: + downloaded_path = Path(downloaded) + except Exception as exc: + log( + f"[add-file] AllDebrid download failed: {exc}", + file=sys.stderr, + ) + if downloaded_path and downloaded_path.exists(): + media_path_or_url = downloaded_path + pipe_obj.path = str(downloaded_path) + pipe_obj.is_temp = True + delete_after_item = True + temp_dir_to_cleanup = temp_dir_candidate + processed_url_items.add(url_str) + else: + shutil.rmtree(temp_dir_candidate, ignore_errors=True) + # URL targets: prefer provider-aware download for OpenLibrary selections. if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith( @@ -684,12 +740,6 @@ class Add_File(Cmdlet): "https://", "magnet:", "torrent:")): - table = None - full_metadata = None - if isinstance(pipe_obj.extra, dict): - table = pipe_obj.extra.get("table") - full_metadata = pipe_obj.extra.get("full_metadata") - is_openlibrary = (str(table or "").lower() == "openlibrary") or ( "openlibrary.org/books/" in media_path_or_url.lower() ) @@ -1079,7 +1129,7 @@ class Add_File(Cmdlet): continue # No destination specified: keep legacy behavior (download-media only). - code = self._delegate_to_download_media( + code = self._delegate_to_download_file( item, url_str, location, @@ -2052,7 +2102,7 @@ class Add_File(Cmdlet): pass return None - def _delegate_to_download_media( + def _delegate_to_download_file( self, result: Any, url_str: str, @@ -2062,13 +2112,13 @@ class Add_File(Cmdlet): config: Dict[str, Any], ) -> int: - """Delegate URL handling to download-media cmdlet.""" + """Delegate URL handling to download-file cmdlet (yt-dlp path).""" log( - f"Target is a URL, delegating to download-media: {url_str}", + f"Target is a URL, delegating to download-file: {url_str}", file=sys.stderr ) # Reuse the globally-registered cmdlet instance to avoid duplicative registration - from cmdlet.download_media import CMDLET as dl_cmdlet + from cmdlet.download_file import CMDLET as dl_cmdlet dl_args = list(args) if args else [] @@ -2087,11 +2137,11 @@ class Add_File(Cmdlet): if selection_args: dl_args.extend(selection_args) - # download-media doesn't support -storage flag + # download-file doesn't support -storage flag # It downloads to the configured directory, then add-file will handle storage # Note: Provider uploads (0x0) are not supported via this path - # Call download-media with the URL in args + # Call download-file with the URL in args return dl_cmdlet.run(None, dl_args, config) @staticmethod @@ -2832,17 +2882,16 @@ class Add_File(Cmdlet): return [] try: - from cmdlet.download_media import ( - CMDLET as dl_cmdlet, + from SYS.models import DownloadOptions + from tool.ytdlp import ( + YtDlpTool, + _best_subtitle_sidecar, _download_with_timeout, + _format_chapters_note, + _read_text_file, is_url_supported_by_ytdlp, list_formats, - _format_chapters_note, - _best_subtitle_sidecar, - _read_text_file, ) - from SYS.models import DownloadOptions - from tool.ytdlp import YtDlpTool except Exception: return [] diff --git a/cmdlet/add_tag.py b/cmdlet/add_tag.py index 12b374d..daa955f 100644 --- a/cmdlet/add_tag.py +++ b/cmdlet/add_tag.py @@ -540,9 +540,11 @@ class Add_Tag(Cmdlet): ) return 1 + hash_override = normalize_hash(query_hash) if query_hash else None + # If add-tag is in the middle of a pipeline (has downstream stages), default to # including temp files. This enables common flows like: - # @N | download-media | add-tag ... | add-file ... + # @N | download-file | add-tag ... | add-file ... store_override = parsed.get("store") stage_ctx = ctx.get_stage_context() has_downstream = bool( @@ -562,6 +564,10 @@ class Add_Tag(Cmdlet): if not include_temp: results = filter_results_by_temp(results, include_temp=False) + # When no pipeline payload is present but -query/-store pinpoints a hash, tag it directly. + if not results and hash_override and store_override: + results = [{"hash": hash_override, "store": store_override}] + if not results: log( "No valid files to tag (all results were temporary; use --all to include temporary files)", @@ -628,7 +634,6 @@ class Add_Tag(Cmdlet): return 1 # Get other flags - hash_override = normalize_hash(query_hash) if query_hash else None duplicate_arg = parsed.get("duplicate") # tag ARE provided - apply them to each store-backed result diff --git a/cmdlet/download_file.py b/cmdlet/download_file.py index 8aeaf9e..a1308a3 100644 --- a/cmdlet/download_file.py +++ b/cmdlet/download_file.py @@ -1,29 +1,52 @@ -"""Generic file downloader. +"""Generic file/stream downloader. Supports: - Direct HTTP file URLs (PDFs, images, documents; non-yt-dlp) - Piped provider items (uses provider.download when available) - -No streaming site logic; use download-media for yt-dlp/streaming. +- Streaming sites via yt-dlp (YouTube, Bandcamp, etc.) """ from __future__ import annotations import sys +import re from pathlib import Path from typing import Any, Dict, List, Optional, Sequence from urllib.parse import urlparse +from contextlib import AbstractContextManager, nullcontext + +from API.alldebrid import is_magnet_link +from Provider import internetarchive as ia_provider +from Provider import alldebrid as ad_provider +from Provider import openlibrary as ol_provider from SYS.download import DownloadError, _download_direct_file +from SYS.models import DownloadOptions, DownloadMediaResult from SYS.logger import log, debug from SYS.pipeline_progress import PipelineProgress +from SYS.result_table import ResultTable +from SYS.rich_display import stderr_console as get_stderr_console from SYS import pipeline as pipeline_context +from SYS.utils import sha256_file +from rich.prompt import Confirm + +from tool.ytdlp import ( + YtDlpTool, + _best_subtitle_sidecar, + _download_with_timeout, + _format_chapters_note, + _read_text_file, + is_url_supported_by_ytdlp, + list_formats, + probe_url, +) from . import _shared as sh Cmdlet = sh.Cmdlet CmdletArg = sh.CmdletArg SharedArgs = sh.SharedArgs +QueryArg = sh.QueryArg parse_cmdlet_args = sh.parse_cmdlet_args register_url_with_local_library = sh.register_url_with_local_library coerce_to_pipe_object = sh.coerce_to_pipe_object @@ -37,7 +60,7 @@ class Download_File(Cmdlet): """Initialize download-file cmdlet.""" super().__init__( name="download-file", - summary="Download files via HTTP or provider handlers", + summary="Download files or streaming media", usage= "download-file [-path DIR] [options] OR @N | download-file [-path DIR|DIR] [options]", alias=["dl-file", @@ -45,6 +68,7 @@ class Download_File(Cmdlet): arg=[ SharedArgs.URL, SharedArgs.PATH, + SharedArgs.QUERY, # Prefer -path for output directory to match other cmdlets; keep -output for backwards compatibility. CmdletArg( name="-output", @@ -52,11 +76,41 @@ class Download_File(Cmdlet): alias="o", description="(deprecated) Output directory (use -path instead)", ), + CmdletArg( + name="audio", + type="flag", + alias="a", + description="Download audio only (yt-dlp)", + ), + CmdletArg( + name="format", + type="string", + alias="fmt", + description="Explicit yt-dlp format selector", + ), + QueryArg( + "clip", + key="clip", + aliases=["range", + "section", + "sections"], + type="string", + required=False, + description=( + "Clip time ranges via -query keyed fields (e.g. clip:1m-2m or clip:00:01-00:10). " + "Comma-separated values supported." + ), + query_only=True, + ), + CmdletArg( + name="item", + type="string", + description="Item selection for playlists/formats", + ), ], detail=[ - "Download files directly via HTTP without yt-dlp processing.", - "For streaming sites, use download-media.", - "For Internet Archive item pages (archive.org/details/...), shows a selectable file list; pick with @N to download.", + "Download files directly via HTTP or streaming media via yt-dlp.", + "For Internet Archive item pages (archive.org/details/...), shows a selectable file/format list; pick with @N to download.", ], exec=self.run, ) @@ -148,206 +202,6 @@ class Download_File(Cmdlet): "SearchResult": None, } - @staticmethod - def _maybe_show_internetarchive_formats( - *, - raw_urls: Sequence[str], - piped_items: Sequence[Any], - parsed: Dict[str, - Any], - config: Dict[str, - Any], - quiet_mode: bool, - ) -> Optional[int]: - """If the input is an IA item page, show a selectable formats table. - - Returns an exit code when handled; otherwise None. - """ - if quiet_mode: - return None - - try: - total_inputs = int(len(raw_urls or []) + len(piped_items or [])) - except Exception: - total_inputs = 0 - - if total_inputs != 1: - return None - - item = piped_items[0] if piped_items else None - target = "" - if item is not None: - try: - target = str(get_field(item, - "path") or get_field(item, - "url") or "").strip() - except Exception: - target = "" - if not target and raw_urls: - target = str(raw_urls[0]).strip() - if not target: - return None - - try: - from Provider import internetarchive as ia - except Exception: - return None - - identifier = "" - try: - md = get_field(item, "full_metadata") if item is not None else None - if isinstance(md, dict): - identifier = str(md.get("identifier") or "").strip() - except Exception: - identifier = "" - if not identifier: - try: - identifier = str(ia.extract_identifier(target) or "").strip() - except Exception: - identifier = "" - if not identifier: - return None - - # Only show picker for item pages (details); direct download URLs should download immediately. - try: - if not ia.is_details_url(target): - return None - except Exception: - return None - - try: - files = ia.list_download_files(identifier) - except Exception as exc: - log( - f"download-file: Internet Archive lookup failed: {exc}", - file=sys.stderr - ) - return 1 - - if not files: - log( - "download-file: Internet Archive item has no downloadable files", - file=sys.stderr - ) - return 1 - - title = "" - try: - title = str(get_field(item, - "title") or "").strip() if item is not None else "" - except Exception: - title = "" - table_title = ( - f"Internet Archive: {title}".strip().rstrip(":") - if title else f"Internet Archive: {identifier}" - ) - - try: - from SYS.result_table import ResultTable - except Exception as exc: - log(f"download-file: ResultTable unavailable: {exc}", file=sys.stderr) - return 1 - - base_args: List[str] = [] - out_arg = parsed.get("path") or parsed.get("output") - if out_arg: - base_args.extend(["-path", str(out_arg)]) - - table = ResultTable(table_title).set_preserve_order(True) - table.set_table("internetarchive.formats") - table.set_source_command("download-file", base_args) - - rows: List[Dict[str, Any]] = [] - for f in files: - name = str(f.get("name") or "").strip() - if not name: - continue - fmt = str(f.get("format") or "").strip() - src = str(f.get("source") or "").strip() - direct_url = str(f.get("direct_url") or "").strip() - if not direct_url: - continue - - size_val: Any = f.get("size") - try: - size_val = int(size_val) if size_val not in (None, "") else "" - except Exception: - pass - - row_item: Dict[str, - Any] = { - "table": - "internetarchive", - "title": - fmt or name, - "path": - direct_url, - "url": - direct_url, - "columns": [ - ("Format", - fmt), - ("Name", - name), - ("Size", - size_val), - ("Source", - src), - ], - "_selection_args": [direct_url], - "full_metadata": { - "identifier": identifier, - "name": name, - "format": fmt, - "source": src, - "size": f.get("size"), - }, - } - rows.append(row_item) - table.add_result(row_item) - - if not rows: - log( - "download-file: no downloadable files found for this item", - file=sys.stderr - ) - return 1 - - try: - pipeline_context.set_last_result_table(table, rows, subject=item) - pipeline_context.set_current_stage_table(table) - except Exception: - pass - - log( - "Internet Archive item detected: select a file with @N to download", - file=sys.stderr - ) - return 0 - - @staticmethod - def _openlibrary_edition_id_from_url(u: str) -> str: - try: - p = urlparse(str(u)) - parts = [x for x in (p.path or "").split("/") if x] - except Exception: - parts = [] - # /books/OL35443598M/... - if len(parts) >= 2 and str(parts[0]).lower() == "books": - return str(parts[1]).strip() - return "" - - @staticmethod - def _title_hint_from_url_slug(u: str) -> str: - try: - p = urlparse(str(u)) - parts = [x for x in (p.path or "").split("/") if x] - slug = parts[-1] if parts else "" - except Exception: - slug = "" - slug = (slug or "").strip().replace("_", " ") - return slug or "OpenLibrary" - @staticmethod def _path_from_download_result(result_obj: Any) -> Path: file_path = None @@ -560,6 +414,16 @@ class Download_File(Cmdlet): except Exception: pass + if (provider_name + and str(provider_name).lower() == "alldebrid" + and is_magnet_link(str(url))): + magnet_spec = ad_provider.resolve_magnet_spec(str(url)) + if magnet_spec: + _, magnet_id = ad_provider.prepare_magnet(magnet_spec, config) + if magnet_id is not None: + downloaded_count += 1 + continue + if provider_name and get_provider is not None and SearchResult is not None: # OpenLibrary URLs should be handled by the OpenLibrary provider. if provider_name == "openlibrary": @@ -569,20 +433,10 @@ class Download_File(Cmdlet): "OpenLibrary provider not configured or not available" ) - edition_id = self._openlibrary_edition_id_from_url(str(url)) - title_hint = self._title_hint_from_url_slug(str(url)) + edition_id = ol_provider.edition_id_from_url(str(url)) + title_hint = ol_provider.title_hint_from_url_slug(str(url)) - sr = SearchResult( - table="openlibrary", - title=title_hint, - path=str(url), - media_kind="book", - full_metadata={ - "openlibrary_id": edition_id, - }, - ) - - downloaded_path = None + download_payload: Optional[Dict[str, Any]] = None try: ui, _pipe_idx = progress.ui_and_pipe_index() progress_cb = None @@ -648,33 +502,75 @@ class Download_File(Cmdlet): progress_cb = _progress - downloaded_path = provider.download( - sr, - final_output_dir, - progress_callback=progress_cb - ) # type: ignore[call-arg] + if hasattr(provider, "download_url"): + download_payload = provider.download_url( # type: ignore[attr-defined] + str(url), + final_output_dir, + progress_cb, + ) + + if download_payload is None: + sr = None + if hasattr(provider, "search_result_from_url"): + sr = provider.search_result_from_url(str(url)) # type: ignore[attr-defined] + if sr is None: + sr = SearchResult( + table="openlibrary", + title=title_hint, + path=str(url), + media_kind="book", + full_metadata={ + "openlibrary_id": edition_id, + }, + ) + + downloaded_path = provider.download( + sr, + final_output_dir, + progress_callback=progress_cb + ) # type: ignore[call-arg] + + if downloaded_path: + download_payload = { + "path": Path(downloaded_path), + "search_result": sr, + } except Exception as exc: raise DownloadError(str(exc)) # Clear long-running status line after the download attempt. progress.clear_status() - if downloaded_path: + if download_payload and download_payload.get("path"): + downloaded_path = Path(download_payload["path"]) + sr_obj = download_payload.get("search_result") + tags_hint: Optional[List[str]] = None - try: - sr_tags = getattr(sr, "tag", None) - if isinstance(sr_tags, set) and sr_tags: - tags_hint = sorted([str(t) for t in sr_tags if t]) - except Exception: - tags_hint = None + full_md: Optional[Dict[str, Any]] = None + resolved_title = title_hint + if sr_obj is not None: + try: + resolved_title = getattr(sr_obj, "title", None) or resolved_title + except Exception: + pass + try: + sr_tags = getattr(sr_obj, "tag", None) + if isinstance(sr_tags, set) and sr_tags: + tags_hint = sorted([str(t) for t in sr_tags if t]) + except Exception: + tags_hint = None + try: + full_md = getattr(sr_obj, "full_metadata", None) + except Exception: + full_md = None self._emit_local_file( - downloaded_path=Path(downloaded_path), + downloaded_path=downloaded_path, source=str(url), - title_hint=title_hint, + title_hint=resolved_title, tags_hint=tags_hint, media_kind_hint="book", - full_metadata=sr.full_metadata, + full_metadata=full_md, provider_hint="openlibrary", progress=progress, config=config, @@ -880,37 +776,20 @@ class Download_File(Cmdlet): except Exception: magnet_id = None - if magnet_id is not None and get_search_provider is not None: - provider = get_search_provider("alldebrid", config) - if provider is not None: - try: - files = provider.search( - "*", - limit=10_000, - filters={ - "view": "files", - "magnet_id": int(magnet_id) - }, - ) - except Exception: - files = [] - - # If the magnet isn't ready, provider.search returns a single not-ready folder row. - if (files and len(files) == 1 and getattr(files[0], - "media_kind", - "") == "folder"): - detail = getattr(files[0], "detail", "") - log( - f"[download-file] AllDebrid magnet {magnet_id} not ready ({detail or 'unknown'})", - file=sys.stderr, - ) - else: - for sr in files: - expanded_items.append( - sr.to_dict() if hasattr(sr, - "to_dict") else sr - ) - continue + expanded, detail = ad_provider.expand_folder_item( + item, + get_search_provider, + config, + ) + if detail: + log( + f"[download-file] AllDebrid magnet {magnet_id or 'unknown'} not ready ({detail or 'unknown'})", + file=sys.stderr, + ) + continue + if expanded: + expanded_items.extend(expanded) + continue expanded_items.append(item) except Exception: @@ -986,70 +865,11 @@ class Download_File(Cmdlet): output_dir = final_output_dir try: if str(table).strip().lower() == "alldebrid": - from ProviderCore.download import sanitize_filename as _sf - - md = full_metadata if isinstance(full_metadata, - dict) else {} - magnet_name = None - if isinstance(md, dict): - magnet_name = md.get("magnet_name" - ) or md.get("folder") - if not magnet_name: - magnet_name = ( - str(get_field(item, - "detail") or "").strip() or None - ) - - magnet_dir_name = _sf( - str(magnet_name) - ) if magnet_name else "" - - # If user already chose -path that ends with the magnet folder name, - # don't create a duplicate nested folder. - try: - base_tail = str(Path(output_dir).name or "") - except Exception: - base_tail = "" - base_tail_norm = _sf(base_tail).lower( - ) if base_tail.strip() else "" - magnet_dir_norm = magnet_dir_name.lower( - ) if magnet_dir_name else "" - - if magnet_dir_name and (not base_tail_norm - or base_tail_norm - != magnet_dir_norm): - output_dir = Path(output_dir) / magnet_dir_name - - relpath = None - if isinstance(md, dict): - relpath = md.get("relpath") - if not relpath and isinstance(md.get("file"), dict): - relpath = md["file"].get("_relpath") - - if relpath: - parts = [ - p for p in str(relpath).replace("\\", "/" - ).split("/") - if p and p not in {".", ".."} - ] - - # If the provider relpath already includes the magnet folder name as a - # root directory (common), strip it to prevent double nesting. - if magnet_dir_name and parts: - try: - if _sf(parts[0]).lower() == magnet_dir_norm: - parts = parts[1:] - except Exception: - pass - - # relpath includes the filename; only join parent directories. - for part in parts[:-1]: - output_dir = Path(output_dir) / _sf(part) - - try: - Path(output_dir).mkdir(parents=True, exist_ok=True) - except Exception: - output_dir = final_output_dir + output_dir = ad_provider.adjust_output_dir_for_alldebrid( + final_output_dir, + full_metadata if isinstance(full_metadata, dict) else None, + item, + ) except Exception: output_dir = final_output_dir @@ -1199,6 +1019,2047 @@ class Download_File(Cmdlet): return downloaded_count + # === Streaming helpers (yt-dlp) === + + @staticmethod + def _append_urls_from_piped_result(raw_urls: List[str], result: Any) -> List[str]: + if raw_urls: + return raw_urls + if not result: + return raw_urls + + results_to_check = result if isinstance(result, list) else [result] + for item in results_to_check: + try: + url = get_field(item, "url") or get_field(item, "target") + except Exception: + url = None + if url: + raw_urls.append(url) + return raw_urls + + @staticmethod + def _filter_supported_urls(raw_urls: Sequence[str]) -> tuple[List[str], List[str]]: + supported = [url for url in (raw_urls or []) if is_url_supported_by_ytdlp(url)] + unsupported = list(set(raw_urls or []) - set(supported or [])) + return supported, unsupported + + def _parse_query_keyed_spec(self, query_spec: Optional[str]) -> Dict[str, List[str]]: + if not query_spec: + return {} + try: + keyed = self._parse_keyed_csv_spec(str(query_spec), default_key="hash") + if not keyed: + return {} + + def _alias(src: str, dest: str) -> None: + try: + values = keyed.get(src) + except Exception: + values = None + if not values: + return + try: + keyed.setdefault(dest, []).extend(list(values)) + except Exception: + pass + try: + keyed.pop(src, None) + except Exception: + pass + + for src in ("range", "ranges", "section", "sections"): + _alias(src, "clip") + for src in ("fmt", "f"): + _alias(src, "format") + for src in ("aud", "a"): + _alias(src, "audio") + + return keyed + except Exception: + return {} + + @staticmethod + def _extract_hash_override(query_spec: Optional[str], query_keyed: Dict[str, List[str]]) -> Optional[str]: + try: + hash_values = query_keyed.get("hash", []) if isinstance(query_keyed, dict) else [] + hash_candidate = hash_values[-1] if hash_values else None + if hash_candidate: + return sh.parse_single_hash_query(f"hash:{hash_candidate}") + + try: + has_non_hash_keys = bool( + query_keyed + and isinstance(query_keyed, dict) + and any(k for k in query_keyed.keys() if str(k).strip().lower() != "hash") + ) + except Exception: + has_non_hash_keys = False + if has_non_hash_keys: + return None + return sh.parse_single_hash_query(str(query_spec)) if query_spec else None + except Exception: + return None + + def _parse_clip_ranges_and_apply_items( + self, + *, + clip_spec: Optional[str], + query_keyed: Dict[str, List[str]], + parsed: Dict[str, Any], + query_spec: Optional[str], + ) -> tuple[Optional[List[tuple[int, int]]], bool, List[str]]: + clip_ranges: Optional[List[tuple[int, int]]] = None + clip_values: List[str] = [] + item_values: List[str] = [] + + def _uniq(values: Sequence[str]) -> List[str]: + seen: set[str] = set() + out: List[str] = [] + for v in values: + key = str(v) + if key in seen: + continue + seen.add(key) + out.append(v) + return out + + if clip_spec: + keyed = self._parse_keyed_csv_spec(str(clip_spec), default_key="clip") + clip_values.extend(keyed.get("clip", []) or []) + item_values.extend(keyed.get("item", []) or []) + + if query_keyed: + clip_values.extend(query_keyed.get("clip", []) or []) + item_values.extend(query_keyed.get("item", []) or []) + + clip_values = _uniq(clip_values) + item_values = _uniq(item_values) + + if item_values and not parsed.get("item"): + parsed["item"] = ",".join([v for v in item_values if v]) + + if clip_values: + clip_ranges = self._parse_time_ranges(",".join([v for v in clip_values if v])) + if not clip_ranges: + bad_spec = clip_spec or query_spec + log(f"Invalid clip format: {bad_spec}", file=sys.stderr) + return None, True, clip_values + + return clip_ranges, False, clip_values + + @staticmethod + def _init_storage(config: Dict[str, Any]) -> tuple[Optional[Any], bool]: + storage = None + hydrus_available = True + try: + from Store import Store + from API.HydrusNetwork import is_hydrus_available + + storage = Store(config=config or {}, suppress_debug=True) + hydrus_available = bool(is_hydrus_available(config or {})) + except Exception: + storage = None + return storage, hydrus_available + + @staticmethod + def _cookiefile_str(ytdlp_tool: YtDlpTool) -> Optional[str]: + try: + cookie_path = ytdlp_tool.resolve_cookiefile() + if cookie_path is not None and cookie_path.is_file(): + return str(cookie_path) + except Exception: + pass + return None + + def _list_formats_cached( + self, + u: str, + *, + playlist_items_value: Optional[str], + formats_cache: Dict[str, Optional[List[Dict[str, Any]]]], + ytdlp_tool: YtDlpTool, + ) -> Optional[List[Dict[str, Any]]]: + key = f"{u}||{playlist_items_value or ''}" + if key in formats_cache: + return formats_cache[key] + fmts = list_formats( + u, + no_playlist=False, + playlist_items=playlist_items_value, + cookiefile=self._cookiefile_str(ytdlp_tool), + ) + formats_cache[key] = fmts + return fmts + + def _is_browseable_format(self, fmt: Any) -> bool: + if not isinstance(fmt, dict): + return False + format_id = str(fmt.get("format_id") or "").strip() + if not format_id: + return False + ext = str(fmt.get("ext") or "").strip().lower() + if ext in {"mhtml", "json"}: + return False + note = str(fmt.get("format_note") or "").lower() + if "storyboard" in note: + return False + if format_id.lower().startswith("sb"): + return False + vcodec = str(fmt.get("vcodec", "none")) + acodec = str(fmt.get("acodec", "none")) + return not (vcodec == "none" and acodec == "none") + + def _format_id_for_query_index( + self, + query_format: str, + url: str, + formats_cache: Dict[str, Optional[List[Dict[str, Any]]]], + ytdlp_tool: YtDlpTool, + ) -> Optional[str]: + import re + + if not query_format or not re.match(r"^\s*#?\d+\s*$", str(query_format)): + return None + + try: + idx = int(str(query_format).lstrip("#").strip()) + except Exception: + raise ValueError(f"Invalid format index: {query_format}") + + fmts = self._list_formats_cached( + url, + playlist_items_value=None, + formats_cache=formats_cache, + ytdlp_tool=ytdlp_tool, + ) + if not fmts: + raise ValueError("Unable to list formats for the URL; cannot resolve numeric format index") + + candidate_formats = [f for f in fmts if self._is_browseable_format(f)] + filtered_formats = candidate_formats if candidate_formats else list(fmts) + + if not filtered_formats: + raise ValueError("No formats available for selection") + + if idx <= 0 or idx > len(filtered_formats): + raise ValueError(f"Format index {idx} out of range (1..{len(filtered_formats)})") + + chosen = filtered_formats[idx - 1] + selection_format_id = str(chosen.get("format_id") or "").strip() + if not selection_format_id: + raise ValueError("Selected format has no format_id") + + try: + vcodec = str(chosen.get("vcodec", "none")) + acodec = str(chosen.get("acodec", "none")) + if vcodec != "none" and acodec == "none": + selection_format_id = f"{selection_format_id}+ba" + except Exception: + pass + + return selection_format_id + + @staticmethod + def _format_selector_for_query_height(query_format: str) -> Optional[str]: + import re + + if query_format is None: + return None + + s = str(query_format).strip().lower() + m = re.match(r"^(\d{2,5})p$", s) + if not m: + return None + + try: + height = int(m.group(1)) + except Exception: + return None + + if height <= 0: + raise ValueError(f"Invalid height selection: {query_format}") + + return f"bv*[height<={height}]+ba" + + @staticmethod + def _canonicalize_url_for_storage(*, requested_url: str, ytdlp_tool: YtDlpTool, playlist_items: Optional[str]) -> str: + if playlist_items: + return str(requested_url) + try: + cf = None + try: + cookie_path = ytdlp_tool.resolve_cookiefile() + if cookie_path is not None and cookie_path.is_file(): + cf = str(cookie_path) + except Exception: + cf = None + pr = probe_url(requested_url, no_playlist=False, timeout_seconds=15, cookiefile=cf) + if isinstance(pr, dict): + for key in ("webpage_url", "original_url", "url", "requested_url"): + value = pr.get(key) + if isinstance(value, str) and value.strip(): + return value.strip() + except Exception: + pass + return str(requested_url) + + def _preflight_url_duplicate( + self, + *, + storage: Any, + hydrus_available: bool, + final_output_dir: Path, + candidate_url: str, + extra_urls: Optional[Sequence[str]] = None, + ) -> bool: + if storage is None: + debug("Preflight URL check skipped: storage unavailable") + return True + + debug(f"Preflight URL check: candidate={candidate_url}") + + try: + from SYS.metadata import normalize_urls + except Exception: + normalize_urls = None # type: ignore[assignment] + + needles: List[str] = [] + if normalize_urls is not None: + for raw in [candidate_url, *(list(extra_urls) if extra_urls else [])]: + try: + needles.extend(normalize_urls(raw)) + except Exception: + continue + if not needles: + needles = [str(candidate_url)] + + seen_needles: List[str] = [] + for needle in needles: + if needle and needle not in seen_needles: + seen_needles.append(needle) + needles = seen_needles + + try: + debug(f"Preflight URL needles: {needles}") + except Exception: + pass + + url_matches: List[Dict[str, Any]] = [] + try: + from Store.HydrusNetwork import HydrusNetwork + + backend_names_all = storage.list_searchable_backends() + backend_names: List[str] = [] + skipped: List[str] = [] + for backend_name in backend_names_all: + try: + backend = storage[backend_name] + except Exception: + continue + + try: + if str(backend_name).strip().lower() == "temp": + skipped.append(backend_name) + continue + except Exception: + pass + + try: + backend_location = getattr(backend, "_location", None) + if backend_location and final_output_dir: + backend_path = Path(str(backend_location)).expanduser().resolve() + temp_path = Path(str(final_output_dir)).expanduser().resolve() + if backend_path == temp_path: + skipped.append(backend_name) + continue + except Exception: + pass + + backend_names.append(backend_name) + + try: + if skipped: + debug(f"Preflight backends: {backend_names} (skipped temp: {skipped})") + else: + debug(f"Preflight backends: {backend_names}") + except Exception: + pass + + for backend_name in backend_names: + backend = storage[backend_name] + if isinstance(backend, HydrusNetwork) and not hydrus_available: + continue + + backend_hits: List[Dict[str, Any]] = [] + for needle in needles: + try: + backend_hits = backend.search(f"url:{needle}", limit=25) or [] + if backend_hits: + break + except Exception: + continue + if backend_hits: + url_matches.extend( + [ + dict(x) if isinstance(x, dict) else {"title": str(x)} + for x in backend_hits + ] + ) + + if len(url_matches) >= 25: + url_matches = url_matches[:25] + break + except Exception: + url_matches = [] + + if not url_matches: + debug("Preflight URL check: no matches") + return True + + try: + current_cmd_text = pipeline_context.get_current_command_text("") + except Exception: + current_cmd_text = "" + + try: + stage_ctx = pipeline_context.get_stage_context() + except Exception: + stage_ctx = None + + in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or ""))) + if in_pipeline: + try: + cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="") + cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None) + except Exception: + cached_cmd = "" + cached_decision = None + + if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""): + if bool(cached_decision): + return True + try: + pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) + except Exception: + pass + return False + + table = ResultTable(f"URL already exists ({len(url_matches)} match(es))") + results_list: List[Dict[str, Any]] = [] + for item in url_matches: + if "title" not in item: + item["title"] = item.get("name") or item.get("target") or item.get("path") or "Result" + + try: + from SYS.result_table import build_display_row + except Exception: + build_display_row = None # type: ignore + + if callable(build_display_row): + display_row = build_display_row(item, keys=["title", "store", "hash", "ext", "size"]) + else: + display_row = { + "title": item.get("title"), + "store": item.get("store"), + "hash": item.get("hash") or item.get("file_hash") or item.get("sha256"), + "ext": str(item.get("ext") or ""), + "size": item.get("size") or item.get("size_bytes"), + } + table.add_result(display_row) + results_list.append(item) + + pipeline_context.set_current_stage_table(table) + pipeline_context.set_last_result_table(table, results_list) + + suspend = getattr(pipeline_context, "suspend_live_progress", None) + used_suspend = False + + cm: AbstractContextManager[Any] = nullcontext() + if callable(suspend): + try: + maybe_cm = suspend() + if maybe_cm is not None: + cm = maybe_cm # type: ignore[assignment] + used_suspend = True + except Exception: + cm = nullcontext() + used_suspend = False + + with cm: + get_stderr_console().print(table) + setattr(table, "_rendered_by_cmdlet", True) + answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console())) + + if in_pipeline: + try: + existing = pipeline_context.load_value("preflight", default=None) + except Exception: + existing = None + preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {} + url_dup_cache = preflight_cache.get("url_duplicates") + if not isinstance(url_dup_cache, dict): + url_dup_cache = {} + url_dup_cache["command"] = str(current_cmd_text or "") + url_dup_cache["continue"] = bool(answered_yes) + preflight_cache["url_duplicates"] = url_dup_cache + try: + pipeline_context.store_value("preflight", preflight_cache) + except Exception: + pass + + if not answered_yes: + if in_pipeline and used_suspend: + try: + pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) + except Exception: + pass + return False + return True + + def _preflight_url_duplicates_bulk( + self, + *, + storage: Any, + hydrus_available: bool, + final_output_dir: Path, + urls: Sequence[str], + ) -> bool: + if storage is None: + debug("Bulk URL preflight skipped: storage unavailable") + return True + + try: + current_cmd_text = pipeline_context.get_current_command_text("") + except Exception: + current_cmd_text = "" + + try: + stage_ctx = pipeline_context.get_stage_context() + except Exception: + stage_ctx = None + + in_pipeline = bool(stage_ctx is not None or ("|" in str(current_cmd_text or ""))) + if in_pipeline: + try: + cached_cmd = pipeline_context.load_value("preflight.url_duplicates.command", default="") + cached_decision = pipeline_context.load_value("preflight.url_duplicates.continue", default=None) + except Exception: + cached_cmd = "" + cached_decision = None + + if cached_decision is not None and str(cached_cmd or "") == str(current_cmd_text or ""): + if bool(cached_decision): + return True + try: + pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) + except Exception: + pass + return False + + unique_urls: List[str] = [] + for u in urls or []: + s = str(u or "").strip() + if s and s not in unique_urls: + unique_urls.append(s) + if len(unique_urls) <= 1: + return True + + try: + from SYS.metadata import normalize_urls + except Exception: + normalize_urls = None # type: ignore[assignment] + + def _httpish(value: str) -> bool: + try: + return bool(value) and (value.startswith("http://") or value.startswith("https://")) + except Exception: + return False + + url_needles: Dict[str, List[str]] = {} + for u in unique_urls: + needles: List[str] = [] + if normalize_urls is not None: + try: + needles.extend([n for n in (normalize_urls(u) or []) if isinstance(n, str)]) + except Exception: + needles = [] + if not needles: + needles = [u] + filtered: List[str] = [] + for n in needles: + n2 = str(n or "").strip() + if not n2: + continue + if not _httpish(n2): + continue + if n2 not in filtered: + filtered.append(n2) + url_needles[u] = filtered if filtered else [u] + + backend_names: List[str] = [] + try: + backend_names_all = storage.list_searchable_backends() + except Exception: + backend_names_all = [] + + for backend_name in backend_names_all: + try: + backend = storage[backend_name] + except Exception: + continue + + try: + if str(backend_name).strip().lower() == "temp": + continue + except Exception: + pass + + try: + backend_location = getattr(backend, "_location", None) + if backend_location and final_output_dir: + backend_path = Path(str(backend_location)).expanduser().resolve() + temp_path = Path(str(final_output_dir)).expanduser().resolve() + if backend_path == temp_path: + continue + except Exception: + pass + + backend_names.append(backend_name) + + if not backend_names: + debug("Bulk URL preflight skipped: no searchable backends") + return True + + seen_pairs: set[tuple[str, str]] = set() + matched_urls: set[str] = set() + match_rows: List[Dict[str, Any]] = [] + max_rows = 200 + + try: + from Store.HydrusNetwork import HydrusNetwork + except Exception: + HydrusNetwork = None # type: ignore + + for backend_name in backend_names: + if len(match_rows) >= max_rows: + break + try: + backend = storage[backend_name] + except Exception: + continue + + if HydrusNetwork is not None and isinstance(backend, HydrusNetwork): + if not hydrus_available: + continue + + client = getattr(backend, "_client", None) + if client is None: + continue + + for original_url, needles in url_needles.items(): + if len(match_rows) >= max_rows: + break + if (original_url, str(backend_name)) in seen_pairs: + continue + + found_hash: Optional[str] = None + found = False + for needle in (needles or [])[:3]: + if not _httpish(needle): + continue + try: + from API.HydrusNetwork import HydrusRequestSpec + + spec = HydrusRequestSpec( + method="GET", + endpoint="/add_urls/get_url_files", + query={"url": needle}, + ) + response = client._perform_request(spec) # type: ignore[attr-defined] + raw_hashes = None + if isinstance(response, dict): + raw_hashes = response.get("hashes") or response.get("file_hashes") + raw_ids = response.get("file_ids") + has_ids = isinstance(raw_ids, list) and len(raw_ids) > 0 + has_hashes = isinstance(raw_hashes, list) and len(raw_hashes) > 0 + if has_hashes: + try: + found_hash = str(raw_hashes[0]).strip() # type: ignore[index] + except Exception: + found_hash = None + if has_ids or has_hashes: + found = True + break + except Exception: + continue + + if not found: + continue + + seen_pairs.add((original_url, str(backend_name))) + matched_urls.add(original_url) + display_row = { + "title": "(exists)", + "store": str(backend_name), + "hash": found_hash or "", + "url": original_url, + "columns": [ + ("Title", "(exists)"), + ("Store", str(backend_name)), + ("Hash", found_hash or ""), + ("URL", original_url), + ], + } + match_rows.append(display_row) + continue + + for original_url, needles in url_needles.items(): + if len(match_rows) >= max_rows: + break + if (original_url, str(backend_name)) in seen_pairs: + continue + + backend_hits: List[Dict[str, Any]] = [] + for needle in (needles or [])[:3]: + try: + backend_hits = backend.search(f"url:{needle}", limit=1) or [] + if backend_hits: + break + except Exception: + continue + + if not backend_hits: + continue + + seen_pairs.add((original_url, str(backend_name))) + matched_urls.add(original_url) + hit = backend_hits[0] + title = hit.get("title") or hit.get("name") or hit.get("target") or hit.get("path") or "(exists)" + file_hash = hit.get("hash") or hit.get("file_hash") or hit.get("sha256") or "" + + try: + from SYS.result_table import build_display_row + except Exception: + build_display_row = None # type: ignore + + extracted = { + "title": str(title), + "store": str(hit.get("store") or backend_name), + "hash": str(file_hash or ""), + "ext": "", + "size": None, + } + if callable(build_display_row): + try: + extracted = build_display_row(hit, keys=["title", "store", "hash", "ext", "size"]) + except Exception: + pass + extracted["title"] = str(title) + extracted["store"] = str(hit.get("store") or backend_name) + extracted["hash"] = str(file_hash or "") + + ext = extracted.get("ext") + size_val = extracted.get("size") + + display_row = { + "title": str(title), + "store": str(hit.get("store") or backend_name), + "hash": str(file_hash or ""), + "ext": str(ext or ""), + "size": size_val, + "url": original_url, + "columns": [ + ("Title", str(title)), + ("Store", str(hit.get("store") or backend_name)), + ("Hash", str(file_hash or "")), + ("Ext", str(ext or "")), + ("Size", size_val), + ("URL", original_url), + ], + } + match_rows.append(display_row) + + if not match_rows: + debug("Bulk URL preflight: no matches") + return True + + table = ResultTable(f"URL already exists ({len(matched_urls)} url(s))", max_columns=10) + table.set_no_choice(True) + try: + table.set_preserve_order(True) + except Exception: + pass + + for row in match_rows: + table.add_result(row) + + try: + pipeline_context.set_last_result_table_overlay(table, match_rows) + except Exception: + pass + + suspend = getattr(pipeline_context, "suspend_live_progress", None) + cm: AbstractContextManager[Any] = nullcontext() + if callable(suspend): + try: + maybe_cm = suspend() + if maybe_cm is not None: + cm = maybe_cm # type: ignore[assignment] + except Exception: + cm = nullcontext() + + with cm: + get_stderr_console().print(table) + setattr(table, "_rendered_by_cmdlet", True) + answered_yes = bool(Confirm.ask("Continue anyway?", default=False, console=get_stderr_console())) + + if in_pipeline: + try: + existing = pipeline_context.load_value("preflight", default=None) + except Exception: + existing = None + preflight_cache: Dict[str, Any] = existing if isinstance(existing, dict) else {} + url_dup_cache = preflight_cache.get("url_duplicates") + if not isinstance(url_dup_cache, dict): + url_dup_cache = {} + url_dup_cache["command"] = str(current_cmd_text or "") + url_dup_cache["continue"] = bool(answered_yes) + preflight_cache["url_duplicates"] = url_dup_cache + try: + pipeline_context.store_value("preflight", preflight_cache) + except Exception: + pass + + if not answered_yes: + if in_pipeline: + try: + pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0) + except Exception: + pass + return False + return True + + def _maybe_show_playlist_table(self, *, url: str, ytdlp_tool: YtDlpTool) -> bool: + try: + cf = self._cookiefile_str(ytdlp_tool) + pr = probe_url(url, no_playlist=False, timeout_seconds=15, cookiefile=cf) + except Exception: + pr = None + if not isinstance(pr, dict): + return False + entries = pr.get("entries") + if not isinstance(entries, list) or len(entries) <= 1: + return False + + extractor_name = "" + try: + extractor_name = str(pr.get("extractor") or pr.get("extractor_key") or "").strip().lower() + except Exception: + extractor_name = "" + table_type: Optional[str] = None + if "bandcamp" in extractor_name: + table_type = "bandcamp" + elif "youtube" in extractor_name: + table_type = "youtube" + + max_rows = 200 + display_entries = entries[:max_rows] + + def _entry_to_url(entry: Any) -> Optional[str]: + if not isinstance(entry, dict): + return None + for key in ("webpage_url", "original_url", "url"): + v = entry.get(key) + if isinstance(v, str) and v.strip(): + s_val = v.strip() + try: + if urlparse(s_val).scheme in {"http", "https"}: + return s_val + except Exception: + return s_val + + entry_id = entry.get("id") + if isinstance(entry_id, str) and entry_id.strip(): + extractor_name_inner = str(pr.get("extractor") or pr.get("extractor_key") or "").lower() + if "youtube" in extractor_name_inner: + return f"https://www.youtube.com/watch?v={entry_id.strip()}" + return None + + table = ResultTable() + safe_url = str(url or "").strip() + table.title = f'download-file -url "{safe_url}"' if safe_url else "download-file" + if table_type: + try: + table.set_table(table_type) + except Exception: + table.table = table_type + table.set_source_command("download-file", []) + try: + table.set_preserve_order(True) + except Exception: + pass + + results_list: List[Dict[str, Any]] = [] + for idx, entry in enumerate(display_entries, 1): + title = None + uploader = None + duration = None + entry_url = _entry_to_url(entry) + try: + if isinstance(entry, dict): + title = entry.get("title") + uploader = entry.get("uploader") or pr.get("uploader") + duration = entry.get("duration") + except Exception: + pass + + row: Dict[str, Any] = { + "table": "download-file", + "title": str(title or f"Item {idx}"), + "detail": str(uploader or ""), + "media_kind": "playlist-item", + "playlist_index": idx, + "_selection_args": (["-url", str(entry_url)] if entry_url else ["-url", str(url), "-item", str(idx)]), + "url": entry_url, + "target": entry_url, + "columns": [ + ("#", str(idx)), + ("Title", str(title or "")), + ("Duration", str(duration or "")), + ("Uploader", str(uploader or "")), + ], + } + results_list.append(row) + table.add_result(row) + + pipeline_context.set_current_stage_table(table) + pipeline_context.set_last_result_table(table, results_list) + + try: + suspend = getattr(pipeline_context, "suspend_live_progress", None) + cm: AbstractContextManager[Any] = nullcontext() + if callable(suspend): + maybe_cm = suspend() + if maybe_cm is not None: + cm = maybe_cm # type: ignore[assignment] + with cm: + get_stderr_console().print(table) + except Exception: + pass + setattr(table, "_rendered_by_cmdlet", True) + return True + + def _maybe_show_format_table_for_single_url( + self, + *, + mode: str, + clip_spec: Any, + clip_values: Sequence[str], + playlist_items: Optional[str], + ytdl_format: Any, + supported_url: Sequence[str], + playlist_selection_handled: bool, + ytdlp_tool: YtDlpTool, + formats_cache: Dict[str, Optional[List[Dict[str, Any]]]], + storage: Any, + hydrus_available: bool, + final_output_dir: Path, + args: Sequence[str], + ) -> Optional[int]: + if ( + mode != "audio" + and not clip_spec + and not clip_values + and not playlist_items + and not ytdl_format + and len(supported_url) == 1 + and not playlist_selection_handled + ): + url = supported_url[0] + + canonical_url = self._canonicalize_url_for_storage( + requested_url=url, + ytdlp_tool=ytdlp_tool, + playlist_items=playlist_items, + ) + if not self._preflight_url_duplicate( + storage=storage, + hydrus_available=hydrus_available, + final_output_dir=final_output_dir, + candidate_url=canonical_url, + extra_urls=[url], + ): + log(f"Skipping download: {url}", file=sys.stderr) + return 0 + + formats = self._list_formats_cached( + url, + playlist_items_value=None, + formats_cache=formats_cache, + ytdlp_tool=ytdlp_tool, + ) + + if formats and len(formats) > 1: + candidate_formats = [f for f in formats if self._is_browseable_format(f)] + filtered_formats = candidate_formats if candidate_formats else list(formats) + + debug(f"Formatlist: showing {len(filtered_formats)} formats (raw={len(formats)})") + + base_cmd = f'download-file "{url}"' + remaining_args = [arg for arg in args if arg not in [url] and not arg.startswith("-")] + if remaining_args: + base_cmd += " " + " ".join(remaining_args) + + table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True) + table.set_table("ytdlp.formatlist") + table.set_source_command("download-file", [url]) + + results_list: List[Dict[str, Any]] = [] + for idx, fmt in enumerate(filtered_formats, 1): + resolution = fmt.get("resolution", "") + ext = fmt.get("ext", "") + vcodec = fmt.get("vcodec", "none") + acodec = fmt.get("acodec", "none") + filesize = fmt.get("filesize") + filesize_approx = fmt.get("filesize_approx") + format_id = fmt.get("format_id", "") + + selection_format_id = format_id + try: + if vcodec != "none" and acodec == "none" and format_id: + selection_format_id = f"{format_id}+ba" + except Exception: + selection_format_id = format_id + + size_str = "" + size_prefix = "" + size_bytes = filesize + if not size_bytes: + size_bytes = filesize_approx + if size_bytes: + size_prefix = "~" + try: + if isinstance(size_bytes, (int, float)) and size_bytes > 0: + size_mb = float(size_bytes) / (1024 * 1024) + size_str = f"{size_prefix}{size_mb:.1f}MB" + except Exception: + size_str = "" + + desc_parts: List[str] = [] + if resolution and resolution != "audio only": + desc_parts.append(resolution) + if ext: + desc_parts.append(str(ext).upper()) + if vcodec != "none": + desc_parts.append(f"v:{vcodec}") + if acodec != "none": + desc_parts.append(f"a:{acodec}") + if size_str: + desc_parts.append(size_str) + format_desc = " | ".join(desc_parts) + + format_dict = { + "table": "download-file", + "title": f"Format {format_id}", + "url": url, + "target": url, + "detail": format_desc, + "annotations": [ext, resolution] if resolution else [ext], + "media_kind": "format", + "cmd": base_cmd, + "columns": [ + ("ID", format_id), + ("Resolution", resolution or "N/A"), + ("Ext", ext), + ("Size", size_str or ""), + ("Video", vcodec), + ("Audio", acodec), + ], + "full_metadata": { + "format_id": format_id, + "url": url, + "item_selector": selection_format_id, + }, + "_selection_args": None, + } + + selection_args: List[str] = ["-format", selection_format_id] + try: + if (not clip_spec) and clip_values: + selection_args.extend(["-query", f"clip:{','.join([v for v in clip_values if v])}"]) + except Exception: + pass + format_dict["_selection_args"] = selection_args + + results_list.append(format_dict) + table.add_result(format_dict) + + try: + suspend = getattr(pipeline_context, "suspend_live_progress", None) + cm: AbstractContextManager[Any] = nullcontext() + if callable(suspend): + maybe_cm = suspend() + if maybe_cm is not None: + cm = maybe_cm # type: ignore[assignment] + with cm: + get_stderr_console().print(table) + except Exception: + pass + + setattr(table, "_rendered_by_cmdlet", True) + pipeline_context.set_current_stage_table(table) + pipeline_context.set_last_result_table(table, results_list) + + log(f"", file=sys.stderr) + return 0 + + return None + + def _download_supported_urls( + self, + *, + supported_url: Sequence[str], + ytdlp_tool: YtDlpTool, + args: Sequence[str], + config: Dict[str, Any], + final_output_dir: Path, + mode: str, + clip_spec: Any, + clip_ranges: Optional[List[tuple[int, int]]], + query_hash_override: Optional[str], + embed_chapters: bool, + write_sub: bool, + quiet_mode: bool, + playlist_items: Optional[str], + ytdl_format: Any, + skip_per_url_preflight: bool, + forced_single_format_id: Optional[str], + forced_single_format_for_batch: bool, + formats_cache: Dict[str, Optional[List[Dict[str, Any]]]], + storage: Any, + hydrus_available: bool, + ) -> int: + downloaded_count = 0 + downloaded_pipe_objects: List[Dict[str, Any]] = [] + pipe_seq = 0 + clip_sections_spec = self._build_clip_sections_spec(clip_ranges) + + if clip_sections_spec: + try: + debug(f"Clip sections spec: {clip_sections_spec}") + except Exception: + pass + + for url in supported_url: + try: + debug(f"Processing: {url}") + + canonical_url = self._canonicalize_url_for_storage( + requested_url=url, + ytdlp_tool=ytdlp_tool, + playlist_items=playlist_items, + ) + + if not skip_per_url_preflight: + if not self._preflight_url_duplicate( + storage=storage, + hydrus_available=hydrus_available, + final_output_dir=final_output_dir, + candidate_url=canonical_url, + extra_urls=[url], + ): + log(f"Skipping download: {url}", file=sys.stderr) + continue + + PipelineProgress(pipeline_context).begin_steps(2) + + actual_format = ytdl_format + actual_playlist_items = playlist_items + + if playlist_items and not ytdl_format: + import re + + if re.search(r"[^0-9,-]", playlist_items): + actual_format = playlist_items + actual_playlist_items = None + + if mode == "audio" and not actual_format: + actual_format = "bestaudio" + + if mode == "video" and not actual_format: + configured = (ytdlp_tool.default_format("video") or "").strip() + if configured and configured != "bestvideo+bestaudio/best": + actual_format = configured + + forced_single_applied = False + if ( + forced_single_format_for_batch + and forced_single_format_id + and not ytdl_format + and not actual_playlist_items + ): + actual_format = forced_single_format_id + forced_single_applied = True + + if ( + actual_format + and isinstance(actual_format, str) + and mode != "audio" + and "+" not in actual_format + and "/" not in actual_format + and "[" not in actual_format + and actual_format not in {"best", "bv", "ba", "b"} + and not forced_single_applied + ): + try: + formats = self._list_formats_cached( + url, + playlist_items_value=actual_playlist_items, + formats_cache=formats_cache, + ytdlp_tool=ytdlp_tool, + ) + if formats: + fmt_match = next((f for f in formats if str(f.get("format_id", "")) == actual_format), None) + if fmt_match: + vcodec = str(fmt_match.get("vcodec", "none")) + acodec = str(fmt_match.get("acodec", "none")) + if vcodec != "none" and acodec == "none": + debug(f"Selected video-only format {actual_format}; using {actual_format}+ba for audio") + actual_format = f"{actual_format}+ba" + except Exception: + pass + + attempted_single_format_fallback = False + while True: + try: + opts = DownloadOptions( + url=url, + mode=mode, + output_dir=final_output_dir, + ytdl_format=actual_format, + cookies_path=ytdlp_tool.resolve_cookiefile(), + clip_sections=clip_sections_spec, + playlist_items=actual_playlist_items, + quiet=quiet_mode, + no_playlist=False, + embed_chapters=embed_chapters, + write_sub=write_sub, + ) + + PipelineProgress(pipeline_context).step("downloading") + debug(f"Starting download with 5-minute timeout...") + result_obj = _download_with_timeout(opts, timeout_seconds=300) + debug(f"Download completed, building pipe object...") + break + except DownloadError as e: + cause = getattr(e, "__cause__", None) + detail = "" + try: + detail = str(cause or "") + except Exception: + detail = "" + + if ("requested format is not available" in (detail or "").lower()) and mode != "audio": + if ( + forced_single_format_for_batch + and forced_single_format_id + and not ytdl_format + and not actual_playlist_items + and not attempted_single_format_fallback + ): + attempted_single_format_fallback = True + actual_format = forced_single_format_id + debug(f"Only one format available (playlist preflight); retrying with: {actual_format}") + continue + + formats = self._list_formats_cached( + url, + playlist_items_value=actual_playlist_items, + formats_cache=formats_cache, + ytdlp_tool=ytdlp_tool, + ) + if ( + (not attempted_single_format_fallback) + and isinstance(formats, list) + and len(formats) == 1 + and isinstance(formats[0], dict) + ): + only = formats[0] + fallback_format = str(only.get("format_id") or "").strip() + selection_format_id = fallback_format + try: + vcodec = str(only.get("vcodec", "none")) + acodec = str(only.get("acodec", "none")) + if vcodec != "none" and acodec == "none" and fallback_format: + selection_format_id = f"{fallback_format}+ba" + except Exception: + selection_format_id = fallback_format + + if selection_format_id: + attempted_single_format_fallback = True + actual_format = selection_format_id + debug(f"Only one format available; retrying with: {actual_format}") + continue + + if formats: + formats_to_show = formats + + table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True) + table.set_table("ytdlp.formatlist") + table.set_source_command("download-file", [url]) + + results_list: List[Dict[str, Any]] = [] + for idx, fmt in enumerate(formats_to_show, 1): + resolution = fmt.get("resolution", "") + ext = fmt.get("ext", "") + vcodec = fmt.get("vcodec", "none") + acodec = fmt.get("acodec", "none") + filesize = fmt.get("filesize") + filesize_approx = fmt.get("filesize_approx") + format_id = fmt.get("format_id", "") + + selection_format_id = format_id + try: + if vcodec != "none" and acodec == "none" and format_id: + selection_format_id = f"{format_id}+ba" + except Exception: + selection_format_id = format_id + + size_str = "" + size_prefix = "" + size_bytes = filesize + if not size_bytes: + size_bytes = filesize_approx + if size_bytes: + size_prefix = "~" + try: + if isinstance(size_bytes, (int, float)) and size_bytes > 0: + size_mb = float(size_bytes) / (1024 * 1024) + size_str = f"{size_prefix}{size_mb:.1f}MB" + except Exception: + size_str = "" + + desc_parts: List[str] = [] + if resolution and resolution != "audio only": + desc_parts.append(str(resolution)) + if ext: + desc_parts.append(str(ext).upper()) + if vcodec != "none": + desc_parts.append(f"v:{vcodec}") + if acodec != "none": + desc_parts.append(f"a:{acodec}") + if size_str: + desc_parts.append(size_str) + format_desc = " | ".join(desc_parts) + + format_dict: Dict[str, Any] = { + "table": "download-file", + "title": f"Format {format_id}", + "url": url, + "target": url, + "detail": format_desc, + "media_kind": "format", + "columns": [ + ("ID", format_id), + ("Resolution", resolution or "N/A"), + ("Ext", ext), + ("Size", size_str or ""), + ("Video", vcodec), + ("Audio", acodec), + ], + "full_metadata": { + "format_id": format_id, + "url": url, + "item_selector": selection_format_id, + }, + "_selection_args": ["-format", selection_format_id], + } + + results_list.append(format_dict) + table.add_result(format_dict) + + pipeline_context.set_current_stage_table(table) + pipeline_context.set_last_result_table(table, results_list) + + try: + suspend = getattr(pipeline_context, "suspend_live_progress", None) + cm: AbstractContextManager[Any] = nullcontext() + if callable(suspend): + maybe_cm = suspend() + if maybe_cm is not None: + cm = maybe_cm # type: ignore[assignment] + with cm: + get_stderr_console().print(table) + except Exception: + pass + + PipelineProgress(pipeline_context).step("awaiting selection") + + log("Requested format is not available; select a working format with @N", file=sys.stderr) + return 0 + + raise + + results_to_emit: List[Any] = [] + if isinstance(result_obj, list): + results_to_emit = list(result_obj) + else: + paths = getattr(result_obj, "paths", None) + if isinstance(paths, list) and paths: + for p in paths: + try: + p_path = Path(p) + except Exception: + continue + try: + if p_path.suffix.lower() in _best_subtitle_sidecar.__defaults__[0]: + continue + except Exception: + pass + if not p_path.exists() or p_path.is_dir(): + continue + try: + hv = sha256_file(p_path) + except Exception: + hv = None + results_to_emit.append( + DownloadMediaResult( + path=p_path, + info=getattr(result_obj, "info", {}) or {}, + tag=list(getattr(result_obj, "tag", []) or []), + source_url=getattr(result_obj, "source_url", None) or opts.url, + hash_value=hv, + ) + ) + else: + results_to_emit = [result_obj] + + pipe_objects: List[Dict[str, Any]] = [] + for downloaded in results_to_emit: + po = self._build_pipe_object(downloaded, url, opts) + pipe_seq += 1 + try: + po.setdefault("pipe_index", pipe_seq) + except Exception: + pass + + try: + info = downloaded.info if isinstance(getattr(downloaded, "info", None), dict) else {} + except Exception: + info = {} + chapters_text = _format_chapters_note(info) if embed_chapters else None + if chapters_text: + notes = po.get("notes") + if not isinstance(notes, dict): + notes = {} + notes.setdefault("chapters", chapters_text) + po["notes"] = notes + + if write_sub: + try: + media_path = Path(str(po.get("path") or "")) + except Exception: + media_path = None + + if media_path is not None and media_path.exists() and media_path.is_file(): + sub_path = _best_subtitle_sidecar(media_path) + if sub_path is not None: + sub_text = _read_text_file(sub_path) + if sub_text: + notes = po.get("notes") + if not isinstance(notes, dict): + notes = {} + notes["sub"] = sub_text + po["notes"] = notes + try: + sub_path.unlink() + except Exception: + pass + + pipe_objects.append(po) + + try: + if clip_ranges and len(pipe_objects) == len(clip_ranges): + source_hash = query_hash_override or self._find_existing_hash_for_url( + storage, + canonical_url, + hydrus_available=hydrus_available, + ) + self._apply_clip_decorations(pipe_objects, clip_ranges, source_king_hash=source_hash) + except Exception: + pass + + debug(f"Emitting {len(pipe_objects)} result(s) to pipeline...") + + PipelineProgress(pipeline_context).step("finalized") + + stage_ctx = pipeline_context.get_stage_context() + emit_enabled = bool(stage_ctx is not None) + for pipe_obj_dict in pipe_objects: + if emit_enabled: + pipeline_context.emit(pipe_obj_dict) + + if pipe_obj_dict.get("url"): + pipe_obj = coerce_to_pipe_object(pipe_obj_dict) + register_url_with_local_library(pipe_obj, config) + + try: + downloaded_pipe_objects.append(pipe_obj_dict) + except Exception: + pass + + downloaded_count += len(pipe_objects) + debug("✓ Downloaded and emitted") + + except DownloadError as e: + log(f"Download failed for {url}: {e}", file=sys.stderr) + except Exception as e: + log(f"Error processing {url}: {e}", file=sys.stderr) + + if downloaded_count > 0: + debug(f"✓ Successfully processed {downloaded_count} URL(s)") + return 0 + + log("No downloads completed", file=sys.stderr) + return 1 + + def _run_streaming_urls( + self, + *, + streaming_urls: List[str], + args: Sequence[str], + config: Dict[str, Any], + parsed: Dict[str, Any], + ) -> int: + try: + debug("Starting streaming download handler") + + ytdlp_tool = YtDlpTool(config) + + raw_url = list(streaming_urls) + supported_url, unsupported_list = self._filter_supported_urls(raw_url) + + if not supported_url: + log("No yt-dlp-supported url to download", file=sys.stderr) + return 1 + + if unsupported_list: + debug(f"Skipping {len(unsupported_list)} unsupported url (use direct HTTP mode)") + + final_output_dir = self._resolve_streaming_output_dir(parsed, config) + if not final_output_dir: + return 1 + + debug(f"Output directory: {final_output_dir}") + + clip_spec = parsed.get("clip") + query_spec = parsed.get("query") + + query_keyed = self._parse_query_keyed_spec(str(query_spec) if query_spec is not None else None) + + query_hash_override = self._extract_hash_override(str(query_spec) if query_spec is not None else None, query_keyed) + + embed_chapters = True + write_sub = True + + query_format: Optional[str] = None + try: + fmt_values = query_keyed.get("format", []) if isinstance(query_keyed, dict) else [] + fmt_candidate = fmt_values[-1] if fmt_values else None + if fmt_candidate is not None: + query_format = str(fmt_candidate).strip() + except Exception: + query_format = None + + query_audio: Optional[bool] = None + try: + audio_values = query_keyed.get("audio", []) if isinstance(query_keyed, dict) else [] + audio_candidate = audio_values[-1] if audio_values else None + if audio_candidate is not None: + s_val = str(audio_candidate).strip().lower() + if s_val in {"1", "true", "t", "yes", "y", "on"}: + query_audio = True + elif s_val in {"0", "false", "f", "no", "n", "off"}: + query_audio = False + elif s_val: + query_audio = True + except Exception: + query_audio = None + + query_wants_audio = False + if query_format: + try: + query_wants_audio = str(query_format).strip().lower() == "audio" + except Exception: + query_wants_audio = False + + audio_flag = bool(parsed.get("audio") is True) + wants_audio = audio_flag + if query_audio is not None: + wants_audio = wants_audio or bool(query_audio) + else: + wants_audio = wants_audio or bool(query_wants_audio) + mode = "audio" if wants_audio else "video" + + clip_ranges, clip_invalid, clip_values = self._parse_clip_ranges_and_apply_items( + clip_spec=str(clip_spec) if clip_spec is not None else None, + query_keyed=query_keyed, + parsed=parsed, + query_spec=str(query_spec) if query_spec is not None else None, + ) + if clip_invalid: + return 1 + + if clip_ranges: + try: + debug(f"Clip ranges: {clip_ranges}") + except Exception: + pass + + quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False + + storage, hydrus_available = self._init_storage(config if isinstance(config, dict) else {}) + + formats_cache: Dict[str, Optional[List[Dict[str, Any]]]] = {} + playlist_items = str(parsed.get("item")) if parsed.get("item") else None + ytdl_format = parsed.get("format") + if not ytdl_format and query_format and not query_wants_audio: + try: + height_selector = self._format_selector_for_query_height(query_format) + except ValueError as e: + log(f"Error parsing format selection: {e}", file=sys.stderr) + return 1 + + if height_selector: + ytdl_format = height_selector + else: + import re + + if not re.match(r"^\s*#?\d+\s*$", str(query_format)): + ytdl_format = query_format + playlist_selection_handled = False + + if len(supported_url) == 1 and not playlist_items and not ytdl_format: + candidate_url = supported_url[0] + + if query_format and not query_wants_audio: + try: + idx_fmt = self._format_id_for_query_index(query_format, candidate_url, formats_cache, ytdlp_tool) + except ValueError as e: + log(f"Error parsing format selection: {e}", file=sys.stderr) + return 1 + if idx_fmt: + debug(f"Resolved numeric format selection '{query_format}' -> {idx_fmt}") + ytdl_format = idx_fmt + + if not ytdl_format: + if self._maybe_show_playlist_table(url=candidate_url, ytdlp_tool=ytdlp_tool): + playlist_selection_handled = True + try: + last_table = pipeline_context.get_last_result_table() if hasattr(pipeline_context, "get_last_result_table") else None + if hasattr(last_table, "rows") and getattr(last_table, "rows", None): + sample_index = 1 + sample_fmt_id = None + try: + sample_row = last_table.rows[0] + sample_fmt_id = sample_row._full_metadata.get("item_selector") if getattr(sample_row, "_full_metadata", None) else None + except Exception: + sample_fmt_id = None + + try: + sample_pipeline = f'download-file "{candidate_url}"' + hint = ( + "To select non-interactively, re-run with an explicit format: " + "e.g. mm \"{pipeline} -format {fmt} | add-file -store \" or " + "mm \"{pipeline} -query 'format:{index}' | add-file -store \"" + ).format( + pipeline=sample_pipeline, + fmt=sample_fmt_id or "", + index=sample_index, + ) + log(hint, file=sys.stderr) + except Exception: + pass + except Exception: + pass + + return 0 + + skip_per_url_preflight = False + if len(supported_url) > 1: + if not self._preflight_url_duplicates_bulk( + storage=storage, + hydrus_available=hydrus_available, + final_output_dir=final_output_dir, + urls=list(supported_url), + ): + return 0 + skip_per_url_preflight = True + + forced_single_format_id: Optional[str] = None + forced_single_format_for_batch = False + if len(supported_url) > 1 and not playlist_items and not ytdl_format: + try: + sample_url = str(supported_url[0]) + fmts = self._list_formats_cached( + sample_url, + playlist_items_value=None, + formats_cache=formats_cache, + ytdlp_tool=ytdlp_tool, + ) + if isinstance(fmts, list) and len(fmts) == 1 and isinstance(fmts[0], dict): + only_id = str(fmts[0].get("format_id") or "").strip() + if only_id: + forced_single_format_id = only_id + forced_single_format_for_batch = True + debug( + f"Playlist format preflight: only one format available; using {forced_single_format_id} for all items" + ) + except Exception: + forced_single_format_id = None + forced_single_format_for_batch = False + + early_ret = self._maybe_show_format_table_for_single_url( + mode=mode, + clip_spec=clip_spec, + clip_values=clip_values, + playlist_items=playlist_items, + ytdl_format=ytdl_format, + supported_url=supported_url, + playlist_selection_handled=playlist_selection_handled, + ytdlp_tool=ytdlp_tool, + formats_cache=formats_cache, + storage=storage, + hydrus_available=hydrus_available, + final_output_dir=final_output_dir, + args=args, + ) + if early_ret is not None: + return int(early_ret) + + return self._download_supported_urls( + supported_url=supported_url, + ytdlp_tool=ytdlp_tool, + args=args, + config=config, + final_output_dir=final_output_dir, + mode=mode, + clip_spec=clip_spec, + clip_ranges=clip_ranges, + query_hash_override=query_hash_override, + embed_chapters=embed_chapters, + write_sub=write_sub, + quiet_mode=quiet_mode, + playlist_items=playlist_items, + ytdl_format=ytdl_format, + skip_per_url_preflight=skip_per_url_preflight, + forced_single_format_id=forced_single_format_id, + forced_single_format_for_batch=forced_single_format_for_batch, + formats_cache=formats_cache, + storage=storage, + hydrus_available=hydrus_available, + ) + + except Exception as e: + log(f"Error in streaming download handler: {e}", file=sys.stderr) + return 1 + + def _resolve_streaming_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]: + path_override = parsed.get("path") + if path_override: + try: + candidate = Path(str(path_override)).expanduser() + if candidate.suffix: + candidate = candidate.parent + candidate.mkdir(parents=True, exist_ok=True) + debug(f"Using output directory override: {candidate}") + return candidate + except Exception as e: + log(f"Invalid -path output directory: {e}", file=sys.stderr) + return None + + try: + temp_value = (config or {}).get("temp") if isinstance(config, dict) else None + except Exception: + temp_value = None + if temp_value: + try: + candidate = Path(str(temp_value)).expanduser() + candidate.mkdir(parents=True, exist_ok=True) + debug(f"Using config temp directory: {candidate}") + return candidate + except Exception as e: + log(f"Cannot use configured temp directory '{temp_value}': {e}", file=sys.stderr) + return None + + try: + import tempfile + + candidate = Path(tempfile.gettempdir()) / "Medios-Macina" + candidate.mkdir(parents=True, exist_ok=True) + debug(f"Using OS temp directory: {candidate}") + return candidate + except Exception as e: + log(f"Cannot create OS temp directory: {e}", file=sys.stderr) + return None + + def _parse_time_ranges(self, spec: str) -> List[tuple[int, int]]: + def _to_seconds(ts: str) -> Optional[int]: + ts = str(ts).strip() + if not ts: + return None + + try: + unit_match = re.fullmatch(r"(?i)\s*(?:(?P\d+)h)?\s*(?:(?P\d+)m)?\s*(?:(?P\d+(?:\.\d+)?)s)?\s*", ts) + except Exception: + unit_match = None + if unit_match and unit_match.group(0).strip() and any(unit_match.group(g) for g in ("h", "m", "s")): + try: + hours = int(unit_match.group("h") or 0) + minutes = int(unit_match.group("m") or 0) + seconds = float(unit_match.group("s") or 0) + total = (hours * 3600) + (minutes * 60) + seconds + return int(total) + except Exception: + return None + + if ":" in ts: + parts = [p.strip() for p in ts.split(":")] + if len(parts) == 2: + hh_s = "0" + mm_s, ss_s = parts + elif len(parts) == 3: + hh_s, mm_s, ss_s = parts + else: + return None + + try: + hours = int(hh_s) + minutes = int(mm_s) + seconds = float(ss_s) + total = (hours * 3600) + (minutes * 60) + seconds + return int(total) + except Exception: + return None + + try: + return int(float(ts)) + except Exception: + return None + + ranges: List[tuple[int, int]] = [] + if not spec: + return ranges + + for piece in str(spec).split(","): + piece = piece.strip() + if not piece: + continue + if "-" not in piece: + return [] + start_s, end_s = [p.strip() for p in piece.split("-", 1)] + start = _to_seconds(start_s) + end = _to_seconds(end_s) + if start is None or end is None or start >= end: + return [] + ranges.append((start, end)) + + return ranges + + @staticmethod + def _parse_keyed_csv_spec(spec: str, *, default_key: str) -> Dict[str, List[str]]: + out: Dict[str, List[str]] = {} + if not isinstance(spec, str): + spec = str(spec) + text = spec.strip() + if not text: + return out + + active = (default_key or "").strip().lower() or "clip" + key_pattern = re.compile(r"^([A-Za-z_][A-Za-z0-9_-]*)\s*:\s*(.*)$") + + for raw_piece in text.split(","): + piece = raw_piece.strip() + if not piece: + continue + + m = key_pattern.match(piece) + if m: + active = (m.group(1) or "").strip().lower() or active + value = (m.group(2) or "").strip() + if value: + out.setdefault(active, []).append(value) + continue + + out.setdefault(active, []).append(piece) + + return out + + def _build_clip_sections_spec(self, clip_ranges: Optional[List[tuple[int, int]]]) -> Optional[str]: + ranges: List[str] = [] + if clip_ranges: + for start_s, end_s in clip_ranges: + ranges.append(f"{start_s}-{end_s}") + return ",".join(ranges) if ranges else None + + def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]: + info: Dict[str, Any] = download_result.info if isinstance(download_result.info, dict) else {} + media_path = Path(download_result.path) + hash_value = download_result.hash_value or self._compute_file_hash(media_path) + title = info.get("title") or media_path.stem + tag = list(download_result.tag or []) + + if title and f"title:{title}" not in tag: + tag.insert(0, f"title:{title}") + + final_url = None + try: + page_url = info.get("webpage_url") or info.get("original_url") or info.get("url") + if page_url: + final_url = str(page_url) + except Exception: + final_url = None + if not final_url and url: + final_url = str(url) + + return { + "path": str(media_path), + "hash": hash_value, + "title": title, + "url": final_url, + "tag": tag, + "action": "cmdlet:download-file", + "is_temp": True, + "ytdl_format": getattr(opts, "ytdl_format", None), + "store": getattr(opts, "storage_name", None) or getattr(opts, "storage_location", None) or "PATH", + "media_kind": "video" if opts.mode == "video" else "audio", + } + + @staticmethod + def _normalise_hash_hex(value: Optional[str]) -> Optional[str]: + if not value or not isinstance(value, str): + return None + candidate = value.strip().lower() + if len(candidate) == 64 and all(c in "0123456789abcdef" for c in candidate): + return candidate + return None + + @classmethod + def _extract_hash_from_search_hit(cls, hit: Any) -> Optional[str]: + if not isinstance(hit, dict): + return None + for key in ("hash", "hash_hex", "file_hash", "hydrus_hash"): + v = hit.get(key) + normalized = cls._normalise_hash_hex(str(v) if v is not None else None) + if normalized: + return normalized + return None + + @classmethod + def _find_existing_hash_for_url( + cls, storage: Any, canonical_url: str, *, hydrus_available: bool + ) -> Optional[str]: + if storage is None or not canonical_url: + return None + try: + from Store.HydrusNetwork import HydrusNetwork + except Exception: + HydrusNetwork = None # type: ignore + + try: + backend_names = list(storage.list_searchable_backends() or []) + except Exception: + backend_names = [] + + for backend_name in backend_names: + try: + backend = storage[backend_name] + except Exception: + continue + try: + if str(backend_name).strip().lower() == "temp": + continue + except Exception: + pass + try: + if HydrusNetwork is not None and isinstance(backend, HydrusNetwork) and not hydrus_available: + continue + except Exception: + pass + + try: + hits = backend.search(f"url:{canonical_url}", limit=5) or [] + except Exception: + hits = [] + for hit in hits: + extracted = cls._extract_hash_from_search_hit(hit) + if extracted: + return extracted + + return None + + @staticmethod + def _format_timecode(seconds: int, *, force_hours: bool) -> str: + total = max(0, int(seconds)) + minutes, secs = divmod(total, 60) + hours, minutes = divmod(minutes, 60) + if force_hours: + return f"{hours:02d}:{minutes:02d}:{secs:02d}" + return f"{minutes:02d}:{secs:02d}" + + @classmethod + def _format_clip_range(cls, start_s: int, end_s: int) -> str: + force_hours = bool(start_s >= 3600 or end_s >= 3600) + return f"{cls._format_timecode(start_s, force_hours=force_hours)}-{cls._format_timecode(end_s, force_hours=force_hours)}" + + @classmethod + def _apply_clip_decorations( + cls, pipe_objects: List[Dict[str, Any]], clip_ranges: List[tuple[int, int]], *, source_king_hash: Optional[str] + ) -> None: + if not pipe_objects or len(pipe_objects) != len(clip_ranges): + return + + for po, (start_s, end_s) in zip(pipe_objects, clip_ranges): + clip_range = cls._format_clip_range(start_s, end_s) + clip_tag = f"clip:{clip_range}" + + po["title"] = clip_tag + + tags = po.get("tag") + if not isinstance(tags, list): + tags = [] + + tags = [t for t in tags if not str(t).strip().lower().startswith("title:")] + tags = [t for t in tags if not str(t).strip().lower().startswith("relationship:")] + tags.insert(0, f"title:{clip_tag}") + + if clip_tag not in tags: + tags.append(clip_tag) + + po["tag"] = tags + + if len(pipe_objects) < 2: + return + + hashes: List[str] = [] + for po in pipe_objects: + h_val = cls._normalise_hash_hex(str(po.get("hash") or "")) + hashes.append(h_val or "") + + king_hash = cls._normalise_hash_hex(source_king_hash) if source_king_hash else None + if not king_hash: + king_hash = hashes[0] if hashes and hashes[0] else None + if not king_hash: + return + + alt_hashes: List[str] = [h for h in hashes if h and h != king_hash] + if not alt_hashes: + return + + for po in pipe_objects: + po["relationships"] = {"king": [king_hash], "alt": list(alt_hashes)} + def _run_impl( self, result: Any, @@ -1267,17 +3128,40 @@ class Download_File(Cmdlet): log("No url or piped items to download", file=sys.stderr) return 1 + streaming_candidates = self._append_urls_from_piped_result(list(raw_url), result) + supported_streaming, unsupported_streaming = self._filter_supported_urls(streaming_candidates) + + streaming_exit_code: Optional[int] = None + streaming_downloaded = 0 + if supported_streaming: + streaming_exit_code = self._run_streaming_urls( + streaming_urls=supported_streaming, + args=args, + config=config, + parsed=parsed, + ) + if streaming_exit_code == 0: + streaming_downloaded += 1 + + raw_url = [u for u in raw_url if u not in supported_streaming] + if not raw_url and not unsupported_streaming: + piped_items = [] + + if not raw_url and not piped_items: + return int(streaming_exit_code or 0) + quiet_mode = ( bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False ) - ia_picker_exit = self._maybe_show_internetarchive_formats( + ia_picker_exit = ia_provider.maybe_show_formats_table( raw_urls=raw_url, piped_items=piped_items, parsed=parsed, config=config, quiet_mode=quiet_mode, + get_field=get_field, ) if ia_picker_exit is not None: return int(ia_picker_exit) @@ -1325,10 +3209,13 @@ class Download_File(Cmdlet): progress=progress, ) - if downloaded_count > 0: + if downloaded_count > 0 or streaming_downloaded > 0: debug(f"✓ Successfully processed {downloaded_count} file(s)") return 0 + if streaming_exit_code is not None: + return int(streaming_exit_code) + log("No downloads completed", file=sys.stderr) return 1 diff --git a/cmdlet/download_media.py b/cmdlet/download_media.py deleted file mode 100644 index 1acce20..0000000 --- a/cmdlet/download_media.py +++ /dev/null @@ -1,3980 +0,0 @@ -"""Download media from url using yt-dlp (streaming sites only). - -Focused cmdlet for video/audio downloads from yt-dlp-supported sites: -- YouTube, Twitch, Dailymotion, Vimeo, etc. -- No direct file downloads (use download-file for that) -- Playlist detection with item selection -- Clip extraction (time ranges) -- Format selection and audio/video modes -- Tags extraction and metadata integration -""" - -from __future__ import annotations - -import glob # noqa: F401 -import hashlib -import json # noqa: F401 -import random -import re -import string -import subprocess -import sys -import tempfile -import time -import traceback -from contextlib import AbstractContextManager, nullcontext -from pathlib import Path -from typing import Any, Dict, Iterator, List, Optional, Sequence, cast -from urllib.parse import urlparse - -from SYS.logger import log, debug -from SYS.pipeline_progress import PipelineProgress -from SYS.utils import sha256_file -from SYS.models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLogger, ProgressBar -from SYS import pipeline as pipeline_context -from SYS.result_table import ResultTable -from rich.prompt import Confirm -from SYS.rich_display import stderr_console as get_stderr_console -from . import _shared as sh - -QueryArg = sh.QueryArg - -from tool.ytdlp import YtDlpTool - -from . import _shared as sh - -Cmdlet = sh.Cmdlet -CmdletArg = sh.CmdletArg -SharedArgs = sh.SharedArgs -create_pipe_object_result = sh.create_pipe_object_result -parse_cmdlet_args = sh.parse_cmdlet_args -register_url_with_local_library = sh.register_url_with_local_library -coerce_to_pipe_object = sh.coerce_to_pipe_object -get_field = sh.get_field - - -def _live_ui_and_pipe_index() -> tuple[Optional[Any], int]: - ui = None - try: - ui = ( - pipeline_context.get_live_progress() - if hasattr(pipeline_context, - "get_live_progress") else None - ) - except Exception: - ui = None - - pipe_idx: int = 0 - try: - stage_ctx = ( - pipeline_context.get_stage_context() - if hasattr(pipeline_context, - "get_stage_context") else None - ) - maybe_idx = getattr( - stage_ctx, - "pipe_index", - None - ) if stage_ctx is not None else None - if isinstance(maybe_idx, int): - pipe_idx = int(maybe_idx) - except Exception: - pipe_idx = 0 - - return ui, pipe_idx - - -def _begin_live_steps(total_steps: int) -> None: - """Declare the total number of steps for the current pipe.""" - ui, pipe_idx = _live_ui_and_pipe_index() - if ui is None: - return - try: - begin = getattr(ui, "begin_pipe_steps", None) - if callable(begin): - begin(int(pipe_idx), total_steps=int(total_steps)) - except Exception: - return - - -def _step(text: str) -> None: - """Emit a *new* step (increments i/N and advances percent automatically).""" - ui, pipe_idx = _live_ui_and_pipe_index() - if ui is None: - return - try: - adv = getattr(ui, "advance_pipe_step", None) - if callable(adv): - adv(int(pipe_idx), str(text)) - except Exception: - return - - -def _set_pipe_percent(percent: int) -> None: - """Best-effort percent update without changing step text.""" - ui, pipe_idx = _live_ui_and_pipe_index() - if ui is None: - return - try: - set_pct = getattr(ui, "set_pipe_percent", None) - if callable(set_pct): - set_pct(int(pipe_idx), int(percent)) - except Exception: - return - - -def _print_table_suspended(table: Any) -> None: - """Print a Rich table while pausing Live progress if active.""" - suspend = getattr(pipeline_context, "suspend_live_progress", None) - cm: AbstractContextManager[Any] = nullcontext() - if callable(suspend): - try: - maybe_cm = suspend() - if maybe_cm is not None: - cm = maybe_cm # type: ignore[assignment] - except Exception: - cm = nullcontext() - with cm: - get_stderr_console().print(table) - - -# Minimal inlined helpers from helper/download.py (is_url_supported_by_ytdlp, list_formats) -try: - import yt_dlp # type: ignore - from yt_dlp.extractor import gen_extractors # type: ignore -except Exception as exc: - yt_dlp = None # type: ignore - gen_extractors = None # type: ignore - YTDLP_IMPORT_ERROR = exc -else: - YTDLP_IMPORT_ERROR = None - -try: - from SYS.metadata import extract_ytdlp_tags -except ImportError: - extract_ytdlp_tags = None - -_EXTRACTOR_CACHE: List[Any] | None = None - -# Reused progress formatter for yt-dlp callbacks (stderr only). -_YTDLP_PROGRESS_BAR = ProgressBar() - -_SUBTITLE_EXTS = (".vtt", ".srt", ".ass", ".ssa", ".lrc") - - -def _format_chapters_note(info: Dict[str, Any]) -> Optional[str]: - """Format yt-dlp chapter metadata into a stable, note-friendly text. - - Output is one chapter per line, e.g.: - 00:00 Intro - 01:23-02:10 Topic name - """ - try: - chapters = info.get("chapters") - except Exception: - chapters = None - - if not isinstance(chapters, list) or not chapters: - return None - - rows: List[tuple[int, Optional[int], str]] = [] - max_t = 0 - for ch in chapters: - if not isinstance(ch, dict): - continue - start_raw = ch.get("start_time") - end_raw = ch.get("end_time") - title_raw = ch.get("title") or ch.get("name") or ch.get("chapter") - - try: - if start_raw is None: - continue - start_s = int(float(start_raw)) - except Exception: - continue - - end_s: Optional[int] = None - try: - if end_raw is not None: - end_s = int(float(end_raw)) - except Exception: - end_s = None - - title = str(title_raw).strip() if title_raw is not None else "" - rows.append((start_s, end_s, title)) - try: - max_t = max(max_t, start_s, end_s or 0) - except Exception: - max_t = max(max_t, start_s) - - if not rows: - return None - - force_hours = bool(max_t >= 3600) - - def _tc(seconds: int) -> str: - total = max(0, int(seconds)) - minutes, secs = divmod(total, 60) - hours, minutes = divmod(minutes, 60) - if force_hours: - return f"{hours:02d}:{minutes:02d}:{secs:02d}" - return f"{minutes:02d}:{secs:02d}" - - lines: List[str] = [] - for start_s, end_s, title in sorted( - rows, key=lambda r: (r[0], r[1] if r[1] is not None else 10**9, r[2]) - ): - if end_s is not None and end_s > start_s: - prefix = f"{_tc(start_s)}-{_tc(end_s)}" - else: - prefix = _tc(start_s) - line = f"{prefix} {title}".strip() - if line: - lines.append(line) - - text = "\n".join(lines).strip() - return text or None - - -def _best_subtitle_sidecar(media_path: Path) -> Optional[Path]: - """Find the most likely subtitle sidecar file for a downloaded media file.""" - try: - base_dir = media_path.parent - stem = media_path.stem - if not stem: - return None - - candidates: List[Path] = [] - for p in base_dir.glob(stem + ".*"): - try: - if not p.is_file(): - continue - except Exception: - continue - if p.suffix.lower() in _SUBTITLE_EXTS: - candidates.append(p) - - # Prefer VTT then SRT then others. - preferred_order = [".vtt", ".srt", ".ass", ".ssa", ".lrc"] - for ext in preferred_order: - for p in candidates: - if p.suffix.lower() == ext: - return p - - return candidates[0] if candidates else None - except Exception: - return None - - -def _read_text_file(path: Path) -> Optional[str]: - try: - return path.read_text(encoding="utf-8", errors="ignore") - except Exception: - return None - - -def _ensure_yt_dlp_ready() -> None: - if YTDLP_IMPORT_ERROR is not None: - raise DownloadError(f"yt-dlp import error: {YTDLP_IMPORT_ERROR}") - if yt_dlp is None: - raise DownloadError("yt-dlp is not available") - - -def _get_extractors() -> List[Any]: - global _EXTRACTOR_CACHE - if _EXTRACTOR_CACHE is not None: - return _EXTRACTOR_CACHE - _ensure_yt_dlp_ready() - assert gen_extractors is not None - try: - _EXTRACTOR_CACHE = list(gen_extractors()) - except Exception: - _EXTRACTOR_CACHE = [] - return _EXTRACTOR_CACHE - - -def is_url_supported_by_ytdlp(url: str) -> bool: - if not url or not isinstance(url, str): - return False - if YTDLP_IMPORT_ERROR is not None: - return False - try: - parsed = urlparse(url) - if not parsed.scheme or not parsed.netloc: - return False - except Exception: - return False - try: - for ie in _get_extractors(): - try: - if ie.suitable(url) and ie.IE_NAME != "generic": - return True - except Exception: - continue - except Exception: - return False - return False - - -def list_formats( - url: str, - *, - no_playlist: bool = False, - playlist_items: Optional[str] = None, - cookiefile: Optional[str] = None, -) -> Optional[List[Dict[str, - Any]]]: - if not is_url_supported_by_ytdlp(url): - return None - _ensure_yt_dlp_ready() - assert yt_dlp is not None - - ydl_opts: Dict[str, - Any] = { - "quiet": True, - "no_warnings": True, - "skip_download": True, - "noprogress": True, - } - if cookiefile: - ydl_opts["cookiefile"] = str(cookiefile) - if no_playlist: - ydl_opts["noplaylist"] = True - if playlist_items: - ydl_opts["playlist_items"] = str(playlist_items) - - try: - with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type] - info = ydl.extract_info(url, download=False) - except Exception: - return None - - if not isinstance(info, dict): - return None - formats = info.get("formats") - if not isinstance(formats, list): - return None - out: List[Dict[str, Any]] = [] - for f in formats: - if isinstance(f, dict): - out.append(f) - return out - - -def _download_with_sections_via_cli( - url: str, - ytdl_options: Dict[str, - Any], - sections: List[str], - quiet: bool = False, -) -> tuple[Optional[str], - Dict[str, - Any]]: - sections_list = ytdl_options.get("download_sections", []) - if not sections_list: - return "", {} - - session_id = hashlib.md5( - (url + str(time.time()) + "".join(random.choices(string.ascii_letters, - k=10))).encode() - ).hexdigest()[:12] - first_section_info = None - - total_sections = len(sections_list) - for section_idx, section in enumerate(sections_list, 1): - # While step 1/2 is "downloading", keep the pipe bar moving for multi-section clips. - # Map sections onto 50..99 so step 2/2 can still jump to 100. - try: - if total_sections > 0: - pct = 50 + int(((section_idx - 1) / max(1, total_sections)) * 49) - _set_pipe_percent(pct) - except Exception: - pass - - base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s") - output_dir_path = Path(base_outtmpl).parent - filename_tmpl = f"{session_id}_{section_idx}" - if base_outtmpl.endswith(".%(ext)s"): - filename_tmpl += ".%(ext)s" - section_outtmpl = str(output_dir_path / filename_tmpl) - - if section_idx == 1: - metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"] - if ytdl_options.get("cookiefile"): - cookies_path = ytdl_options["cookiefile"].replace("\\", "/") - metadata_cmd.extend(["--cookies", cookies_path]) - if ytdl_options.get("noplaylist"): - metadata_cmd.append("--no-playlist") - metadata_cmd.append(url) - try: - meta_result = subprocess.run( - metadata_cmd, - capture_output=True, - text=True - ) - if meta_result.returncode == 0 and meta_result.stdout: - try: - info_dict = json.loads(meta_result.stdout.strip()) - first_section_info = info_dict - if not quiet: - debug( - f"Extracted title from metadata: {info_dict.get('title')}" - ) - except json.JSONDecodeError: - if not quiet: - debug("Could not parse JSON metadata") - except Exception as e: - if not quiet: - debug(f"Error extracting metadata: {e}") - - cmd = ["yt-dlp"] - if quiet: - cmd.append("--quiet") - cmd.append("--no-warnings") - cmd.append("--no-progress") - # Keep ffmpeg/merger output from taking over the terminal. - cmd.extend(["--postprocessor-args", "ffmpeg:-hide_banner -loglevel error"]) - if ytdl_options.get("ffmpeg_location"): - try: - cmd.extend(["--ffmpeg-location", str(ytdl_options["ffmpeg_location"])]) - except Exception: - pass - if ytdl_options.get("format"): - cmd.extend(["-f", ytdl_options["format"]]) - if ytdl_options.get("merge_output_format"): - cmd.extend( - ["--merge-output-format", - str(ytdl_options["merge_output_format"])] - ) - - # For CLI downloads, infer chapter/metadata embedding from either legacy flags - # or explicit FFmpegMetadata postprocessor entries. - postprocessors = ytdl_options.get("postprocessors") - want_add_metadata = bool(ytdl_options.get("addmetadata")) - want_embed_chapters = bool(ytdl_options.get("embedchapters")) - if isinstance(postprocessors, list): - for pp in postprocessors: - if not isinstance(pp, dict): - continue - if str(pp.get("key") or "") == "FFmpegMetadata": - want_add_metadata = True - if bool(pp.get("add_chapters", True)): - want_embed_chapters = True - - if want_add_metadata: - cmd.append("--add-metadata") - if want_embed_chapters: - cmd.append("--embed-chapters") - if ytdl_options.get("writesubtitles"): - cmd.append("--write-sub") - cmd.append("--write-auto-sub") - cmd.extend(["--sub-format", "vtt"]) - if ytdl_options.get("force_keyframes_at_cuts"): - cmd.append("--force-keyframes-at-cuts") - cmd.extend(["-o", section_outtmpl]) - if ytdl_options.get("cookiefile"): - cookies_path = ytdl_options["cookiefile"].replace("\\", "/") - cmd.extend(["--cookies", cookies_path]) - if ytdl_options.get("noplaylist"): - cmd.append("--no-playlist") - - # Apply clip/section selection - cmd.extend(["--download-sections", section]) - - cmd.append(url) - if not quiet: - debug(f"Running yt-dlp for section: {section}") - try: - if quiet: - subprocess.run(cmd, check=True, capture_output=True, text=True) - else: - subprocess.run(cmd, check=True) - except subprocess.CalledProcessError as exc: - stderr_text = exc.stderr or "" - tail = "\n".join(stderr_text.splitlines()[-12:]).strip() - details = f"\n{tail}" if tail else "" - raise DownloadError( - f"yt-dlp failed for section {section} (exit {exc.returncode}){details}" - ) from exc - except Exception as exc: - raise DownloadError(f"yt-dlp failed for section {section}: {exc}") from exc - - # Mark near-complete before returning so the runner can finalize cleanly. - try: - _set_pipe_percent(99) - except Exception: - pass - - return session_id, first_section_info or {} - - -def _iter_download_entries(info: Dict[str, Any]) -> Iterator[Dict[str, Any]]: - queue: List[Dict[str, Any]] = [info] - seen: set[int] = set() - while queue: - current = queue.pop(0) - obj_id = id(current) - if obj_id in seen: - continue - seen.add(obj_id) - entries = current.get("entries") - if isinstance(entries, list): - for entry in entries: - queue.append(entry) - if current.get("requested_downloads") or not entries: - yield current - - -def _candidate_paths(entry: Dict[str, Any], output_dir: Path) -> Iterator[Path]: - requested = entry.get("requested_downloads") - if isinstance(requested, list): - for item in requested: - if isinstance(item, dict): - fp = item.get("filepath") or item.get("_filename") - if fp: - yield Path(fp) - for key in ("filepath", "_filename", "filename"): - value = entry.get(key) - if value: - yield Path(value) - if entry.get("filename"): - yield output_dir / entry["filename"] - - -def _resolve_entry_and_path(info: Dict[str, - Any], - output_dir: Path) -> tuple[Dict[str, - Any], - Path]: - for entry in _iter_download_entries(info): - for candidate in _candidate_paths(entry, output_dir): - if candidate.is_file(): - return entry, candidate - if not candidate.is_absolute(): - maybe = output_dir / candidate - if maybe.is_file(): - return entry, maybe - raise FileNotFoundError("yt-dlp did not report a downloaded media file") - - -def _resolve_entries_and_paths(info: Dict[str, - Any], - output_dir: Path) -> List[tuple[Dict[str, - Any], - Path]]: - resolved: List[tuple[Dict[str, Any], Path]] = [] - seen: set[str] = set() - for entry in _iter_download_entries(info): - chosen: Optional[Path] = None - for candidate in _candidate_paths(entry, output_dir): - if candidate.is_file(): - chosen = candidate - break - if not candidate.is_absolute(): - maybe = output_dir / candidate - if maybe.is_file(): - chosen = maybe - break - if chosen is None: - continue - key = str(chosen.resolve()) - if key in seen: - continue - seen.add(key) - resolved.append((entry, chosen)) - return resolved - - -def _extract_sha256(info: Dict[str, Any]) -> Optional[str]: - for payload in [info] + info.get("entries", []): - if not isinstance(payload, dict): - continue - hashes = payload.get("hashes") - if isinstance(hashes, dict): - for key in ("sha256", "sha-256", "sha_256"): - if key in hashes and isinstance(hashes[key], - str) and hashes[key].strip(): - return hashes[key].strip() - for key in ("sha256", "sha-256", "sha_256"): - value = payload.get(key) - if isinstance(value, str) and value.strip(): - return value.strip() - return None - - -def _progress_callback(status: Dict[str, Any]) -> None: - """Simple progress callback using logger.""" - event = status.get("status") - if event == "downloading": - # Always print progress to stderr so piped stdout remains clean. - percent = status.get("_percent_str") - downloaded = status.get("downloaded_bytes") - total = status.get("total_bytes") or status.get("total_bytes_estimate") - speed = status.get("_speed_str") - eta = status.get("_eta_str") - - _YTDLP_PROGRESS_BAR.update( - downloaded=int(downloaded) if downloaded is not None else None, - total=int(total) if total is not None else None, - label="download", - file=sys.stderr, - ) - elif event == "finished": - _YTDLP_PROGRESS_BAR.finish() - elif event in ("postprocessing", "processing"): - return - - -def probe_url( - url: str, - no_playlist: bool = False, - timeout_seconds: int = 15, - *, - cookiefile: Optional[str] = None, -) -> Optional[Dict[str, - Any]]: - """Probe URL to extract metadata WITHOUT downloading. - - Args: - url: URL to probe - no_playlist: If True, ignore playlists and probe only the single video - timeout_seconds: Max seconds to wait for probe (default 15s) - - Returns: - Dict with keys: extractor, title, entries (if playlist), duration, etc. - Returns None if not supported by yt-dlp or on timeout. - """ - if not is_url_supported_by_ytdlp(url): - return None - - # Wrap probe in timeout to prevent hanging on large playlists - import threading - from typing import cast - - result_container: List[Optional[Any]] = [None, None] # [result, error] - - def _do_probe() -> None: - try: - _ensure_yt_dlp_ready() - - assert yt_dlp is not None - # Extract info without downloading - # Use extract_flat='in_playlist' to get full metadata for playlist items - ydl_opts = { - "quiet": True, # Suppress all output - "no_warnings": True, - "socket_timeout": 10, - "retries": 2, # Reduce retries for faster timeout - "skip_download": True, # Don't actually download - "extract_flat": "in_playlist", # Get playlist with metadata for each entry - "noprogress": True, # No progress bars - } - - if cookiefile: - ydl_opts["cookiefile"] = str(cookiefile) - - # Add no_playlist option if specified - if no_playlist: - ydl_opts["noplaylist"] = True - - with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type] - info = ydl.extract_info(url, download=False) - - if not isinstance(info, dict): - result_container[0] = None - return - - # Extract relevant fields - webpage_url = info.get("webpage_url") or info.get("original_url" - ) or info.get("url") - result_container[0] = { - "extractor": info.get("extractor", ""), - "title": info.get("title", ""), - "entries": info.get("entries", []), # Will be populated if playlist - "duration": info.get("duration"), - "uploader": info.get("uploader"), - "description": info.get("description"), - # Keep both the requested and canonical URL forms; callers should prefer webpage_url. - "requested_url": url, - "webpage_url": webpage_url, - } - except Exception as exc: - log(f"Probe error for {url}: {exc}") - result_container[1] = exc - - thread = threading.Thread(target=_do_probe, daemon=False) - thread.start() - thread.join(timeout=timeout_seconds) - - if thread.is_alive(): - # Probe timed out - return None so the caller can raise an error - debug( - f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download" - ) - return None - - if result_container[1] is not None: - # Probe error - return None to proceed anyway - return None - - return cast(Optional[Dict[str, Any]], result_container[0]) - - -def download_media( - opts: DownloadOptions, - *, - debug_logger: Optional[DebugLogger] = None, -) -> Any: - """Download streaming media exclusively via yt-dlp. - - Args: - opts: DownloadOptions with url, mode, output_dir, etc. - debug_logger: Optional debug logger for troubleshooting - - Returns: - DownloadMediaResult with path, info, tags, hash - - Raises: - DownloadError: If the URL is unsupported or yt-dlp detects no media - """ - # Handle GoFile shares before yt-dlp (they remain unsupported) - try: - netloc = urlparse(opts.url).netloc.lower() - except Exception: - netloc = "" - if "gofile.io" in netloc: - msg = "GoFile links are currently unsupported" - if not opts.quiet: - debug(msg) - if debug_logger is not None: - debug_logger.write_record("gofile-unsupported", - { - "url": opts.url - }) - raise DownloadError(msg) - - # Determine if yt-dlp should be used - ytdlp_supported = is_url_supported_by_ytdlp(opts.url) - if not ytdlp_supported: - msg = "URL not supported by yt-dlp; try download-file for manual downloads" - if not opts.quiet: - log(msg) - if debug_logger is not None: - debug_logger.write_record("ytdlp-unsupported", - { - "url": opts.url - }) - raise DownloadError(msg) - - # Skip probe for playlists with item selection (probe can hang on large playlists) - # Just proceed straight to download which will handle item selection - if opts.playlist_items: - debug( - f"Skipping probe for playlist (item selection: {opts.playlist_items}), proceeding with download" - ) - probe_result = { - "url": opts.url - } # Minimal probe result - else: - probe_cookiefile = None - try: - if opts.cookies_path and opts.cookies_path.is_file(): - probe_cookiefile = str(opts.cookies_path) - except Exception: - probe_cookiefile = None - - probe_result = probe_url( - opts.url, - no_playlist=opts.no_playlist, - timeout_seconds=15, - cookiefile=probe_cookiefile - ) - - if probe_result is None: - msg = "yt-dlp could not detect media for this URL; use download-file for direct downloads" - if not opts.quiet: - log(msg) - if debug_logger is not None: - debug_logger.write_record("ytdlp-skip-no-media", - { - "url": opts.url - }) - raise DownloadError(msg) - - _ensure_yt_dlp_ready() - - ytdlp_tool = YtDlpTool() - ytdl_options = ytdlp_tool.build_ytdlp_options(opts) - hooks = ytdl_options.get("progress_hooks") - if not isinstance(hooks, list): - hooks = [] - ytdl_options["progress_hooks"] = hooks - if _progress_callback not in hooks: - hooks.append(_progress_callback) - if not opts.quiet: - debug(f"Starting yt-dlp download: {opts.url}") - if debug_logger is not None: - debug_logger.write_record("ytdlp-start", - { - "url": opts.url - }) - - assert yt_dlp is not None - try: - # Debug: show what options we're using - if not opts.quiet: - if ytdl_options.get("download_sections"): - debug( - f"[yt-dlp] download_sections: {ytdl_options['download_sections']}" - ) - debug( - f"[yt-dlp] force_keyframes_at_cuts: {ytdl_options.get('force_keyframes_at_cuts', False)}" - ) - - # Use subprocess when download_sections are present (Python API doesn't support them properly) - session_id = None - first_section_info = {} - if ytdl_options.get("download_sections"): - # For clip (download_sections), keep pipeline Live UI active and suppress - # yt-dlp/ffmpeg CLI spam when running in quiet/pipeline mode. - live_ui, _ = PipelineProgress(pipeline_context).ui_and_pipe_index() - quiet_sections = bool(opts.quiet) or (live_ui is not None) - session_id, first_section_info = _download_with_sections_via_cli( - opts.url, - ytdl_options, - ytdl_options.get("download_sections", []), - quiet=quiet_sections, - ) - info = None - else: - with yt_dlp.YoutubeDL(ytdl_options) as ydl: # type: ignore[arg-type] - info = ydl.extract_info(opts.url, download=True) - except Exception as exc: - log(f"yt-dlp failed: {exc}", file=sys.stderr) - if debug_logger is not None: - debug_logger.write_record( - "exception", - { - "phase": "yt-dlp", - "error": str(exc), - "traceback": traceback.format_exc(), - }, - ) - raise DownloadError("yt-dlp download failed") from exc - - # If we used subprocess, we need to find the file manually - if info is None: - # Find files created/modified during this download (after we started) - # Look for files matching the expected output template pattern - try: - import glob - import time - import re - - # Get the expected filename pattern from outtmpl - # For sections: "C:\path\{session_id}.section_1_of_3.ext", etc. - # For non-sections: "C:\path\title.ext" - - # Wait a moment to ensure files are fully written - time.sleep(0.5) - - # List all files in output_dir, sorted by modification time - files = sorted( - opts.output_dir.iterdir(), - key=lambda p: p.stat().st_mtime, - reverse=True - ) - if not files: - raise FileNotFoundError(f"No files found in {opts.output_dir}") - - # If we downloaded sections, look for files with the session_id pattern - if opts.clip_sections and session_id: - # Pattern: "{session_id}_1.ext", "{session_id}_2.ext", etc. - # Also includes sidecars like "{session_id}_1.en.vtt". - section_pattern = re.compile(rf"^{re.escape(session_id)}_(\d+)") - matching_files = [f for f in files if section_pattern.search(f.name)] - - if matching_files: - # Sort by section number to ensure correct order - def extract_section_num(path: Path) -> int: - match = section_pattern.search(path.name) - return int(match.group(1)) if match else 999 - - matching_files.sort(key=extract_section_num) - debug( - f"Found {len(matching_files)} section file(s) matching pattern" - ) - - # Now rename section *media* files to use hash-based names. - # Sidecars (subtitles) are renamed to match the media hash so they can be - # attached as notes later (and not emitted as separate pipeline items). - by_index: Dict[int, - List[Path]] = {} - for f in matching_files: - m = section_pattern.search(f.name) - if not m: - continue - try: - n = int(m.group(1)) - except Exception: - continue - by_index.setdefault(n, []).append(f) - - renamed_media_files: List[Path] = [] - - for sec_num in sorted(by_index.keys()): - group = by_index.get(sec_num) or [] - if not group: - continue - - def _is_subtitle(p: Path) -> bool: - try: - return p.suffix.lower() in _SUBTITLE_EXTS - except Exception: - return False - - media_candidates = [p for p in group if not _is_subtitle(p)] - subtitle_candidates = [p for p in group if _is_subtitle(p)] - - # Pick the primary media file for this section. - # Prefer non-json, non-info sidecars. - media_file: Optional[Path] = None - for cand in media_candidates: - try: - if cand.suffix.lower() in {".json", - ".info.json"}: - continue - except Exception: - pass - media_file = cand - break - if media_file is None and media_candidates: - media_file = media_candidates[0] - if media_file is None: - # No media file found for this section; skip. - continue - - try: - media_hash = sha256_file(media_file) - except Exception as e: - debug( - f"Failed to hash section media file {media_file.name}: {e}" - ) - renamed_media_files.append(media_file) - continue - - # Preserve any suffix tail after the section index so language tags survive. - # Example: _1.en.vtt -> .en.vtt - prefix = f"{session_id}_{sec_num}" - - def _tail(name: str) -> str: - try: - if name.startswith(prefix): - return name[len(prefix):] - except Exception: - pass - # Fallback: keep just the last suffix. - try: - return Path(name).suffix - except Exception: - return "" - - # Rename media file to (tail typically like .mkv). - try: - new_media_name = f"{media_hash}{_tail(media_file.name)}" - new_media_path = opts.output_dir / new_media_name - if new_media_path.exists() and new_media_path != media_file: - debug( - f"File with hash {media_hash} already exists, using existing file." - ) - try: - media_file.unlink() - except OSError: - pass - else: - media_file.rename(new_media_path) - debug( - f"Renamed section file: {media_file.name} -> {new_media_name}" - ) - renamed_media_files.append(new_media_path) - except Exception as e: - debug( - f"Failed to rename section media file {media_file.name}: {e}" - ) - renamed_media_files.append(media_file) - new_media_path = media_file - - # Rename subtitle sidecars to match media hash for later note attachment. - for sub_file in subtitle_candidates: - try: - new_sub_name = f"{media_hash}{_tail(sub_file.name)}" - new_sub_path = opts.output_dir / new_sub_name - if new_sub_path.exists() and new_sub_path != sub_file: - try: - sub_file.unlink() - except OSError: - pass - else: - sub_file.rename(new_sub_path) - debug( - f"Renamed section file: {sub_file.name} -> {new_sub_name}" - ) - except Exception as e: - debug( - f"Failed to rename section subtitle file {sub_file.name}: {e}" - ) - - media_path = ( - renamed_media_files[0] - if renamed_media_files else matching_files[0] - ) - media_paths = renamed_media_files if renamed_media_files else None - if not opts.quiet: - count = len(media_paths) if isinstance(media_paths, list) else 1 - debug( - f"✓ Downloaded {count} section media file(s) (session: {session_id})" - ) - else: - # Fallback to most recent file if pattern not found - media_path = files[0] - media_paths = None - if not opts.quiet: - debug( - f"✓ Downloaded section file (pattern not found): {media_path.name}" - ) - else: - # No sections, just take the most recent file - media_path = files[0] - media_paths = None - - if not opts.quiet: - debug(f"✓ Downloaded: {media_path.name}") - if debug_logger is not None: - debug_logger.write_record( - "ytdlp-file-found", - { - "path": str(media_path) - } - ) - except Exception as exc: - log(f"Error finding downloaded file: {exc}", file=sys.stderr) - if debug_logger is not None: - debug_logger.write_record( - "exception", - { - "phase": "find-file", - "error": str(exc) - }, - ) - raise DownloadError(str(exc)) from exc - - # Create result with minimal data extracted from filename - file_hash = sha256_file(media_path) - - # For section downloads, create tags with the title and build proper info dict - tags = [] - title = "" - if first_section_info: - title = first_section_info.get("title", "") - if title: - tags.append(f"title:{title}") - debug(f"Added title tag for section download: {title}") - - # Build info dict - always use extracted title if available, not hash - if first_section_info: - info_dict = first_section_info - else: - info_dict = { - "id": media_path.stem, - "title": title or media_path.stem, - "ext": media_path.suffix.lstrip("."), - } - - return DownloadMediaResult( - path=media_path, - info=info_dict, - tag=tags, - source_url=opts.url, - hash_value=file_hash, - paths=media_paths, # Include all section files if present - ) - - if not isinstance(info, dict): - log(f"Unexpected yt-dlp response: {type(info)}", file=sys.stderr) - raise DownloadError("Unexpected yt-dlp response type") - - info_dict: Dict[str, Any] = cast(Dict[str, Any], info) - if debug_logger is not None: - debug_logger.write_record( - "ytdlp-info", - { - "keys": sorted(info_dict.keys()), - "is_playlist": bool(info_dict.get("entries")), - }, - ) - - # Playlist/album handling: resolve ALL downloaded entries and return multiple results. - # The cmdlet will emit one PipeObject per downloaded file. - if info_dict.get("entries") and not opts.no_playlist: - resolved = _resolve_entries_and_paths(info_dict, opts.output_dir) - if resolved: - results: List[DownloadMediaResult] = [] - for entry, media_path in resolved: - hash_value = _extract_sha256(entry) or _extract_sha256(info_dict) - if not hash_value: - try: - hash_value = sha256_file(media_path) - except OSError: - hash_value = None - - tags: List[str] = [] - if extract_ytdlp_tags: - try: - tags = extract_ytdlp_tags(entry) - except Exception as e: - log(f"Error extracting tags: {e}", file=sys.stderr) - - source_url = ( - entry.get("webpage_url") or entry.get("original_url") - or entry.get("url") or opts.url - ) - - results.append( - DownloadMediaResult( - path=media_path, - info=entry, - tag=tags, - source_url=source_url, - hash_value=hash_value, - ) - ) - - if not opts.quiet: - debug(f"✓ Downloaded playlist items: {len(results)}") - return results - - try: - entry, media_path = _resolve_entry_and_path(info_dict, opts.output_dir) - except FileNotFoundError as exc: - log(f"Error: {exc}", file=sys.stderr) - if debug_logger is not None: - debug_logger.write_record( - "exception", - { - "phase": "resolve-path", - "error": str(exc) - }, - ) - raise DownloadError(str(exc)) from exc - - if debug_logger is not None: - debug_logger.write_record( - "resolved-media", - { - "path": str(media_path), - "entry_keys": sorted(entry.keys()) - }, - ) - - # Extract hash from metadata or compute - hash_value = _extract_sha256(entry) or _extract_sha256(info_dict) - if not hash_value: - try: - hash_value = sha256_file(media_path) - except OSError as exc: - if debug_logger is not None: - debug_logger.write_record( - "hash-error", - { - "path": str(media_path), - "error": str(exc) - }, - ) - - # Extract tags using metadata.py - tags = [] - if extract_ytdlp_tags: - try: - tags = extract_ytdlp_tags(entry) - except Exception as e: - log(f"Error extracting tags: {e}", file=sys.stderr) - - source_url = entry.get("webpage_url") or entry.get("original_url" - ) or entry.get("url") - - if not opts.quiet: - debug(f"✓ Downloaded: {media_path.name} ({len(tags)} tags)") - if debug_logger is not None: - debug_logger.write_record( - "downloaded", - { - "path": str(media_path), - "tag_count": len(tags), - "source_url": source_url, - "sha256": hash_value, - }, - ) - - return DownloadMediaResult( - path=media_path, - info=entry, - tag=tags, - source_url=source_url, - hash_value=hash_value, - ) - - -# Timeout handler to prevent yt-dlp hangs -def _download_with_timeout(opts: DownloadOptions, timeout_seconds: int = 300) -> Any: - """Download with timeout protection. - - Args: - opts: DownloadOptions - timeout_seconds: Max seconds to wait (default 300s = 5 min) - - Returns: - DownloadMediaResult or List[DownloadMediaResult] - - Raises: - DownloadError: If timeout exceeded - """ - import threading - from typing import cast - - result_container: List[Optional[Any]] = [None, None] # [result, error] - - def _do_download() -> None: - try: - result_container[0] = download_media(opts) - except Exception as e: - result_container[1] = e - - thread = threading.Thread(target=_do_download, daemon=False) - thread.start() - thread.join(timeout=timeout_seconds) - - if thread.is_alive(): - # Thread still running - timeout - raise DownloadError( - f"Download timeout after {timeout_seconds} seconds for {opts.url}" - ) - - if result_container[1] is not None: - raise cast(Exception, result_container[1]) - - if result_container[0] is None: - raise DownloadError(f"Download failed for {opts.url}") - - return cast(Any, result_container[0]) - - -class Download_Media(Cmdlet): - """Class-based download-media cmdlet - yt-dlp only, streaming sites.""" - - def __init__(self) -> None: - """Initialize download-media cmdlet.""" - super().__init__( - name="download-media", - summary="Download media from streaming sites (YouTube, Twitch, etc.)", - usage= - "download-media [options] or search-file | download-media [options]", - alias=[""], - arg=[ - SharedArgs.URL, - SharedArgs.QUERY, - CmdletArg( - name="audio", - type="flag", - alias="a", - description="Download audio only" - ), - CmdletArg( - name="format", - type="string", - alias="fmt", - description="Explicit yt-dlp format selector", - ), - QueryArg( - "clip", - key="clip", - aliases=["range", - "section", - "sections"], - type="string", - required=False, - description=( - "Clip time ranges via -query keyed fields (e.g. clip:1m-2m or clip:00:01-00:10). " - "Comma-separated values supported." - ), - query_only=True, - ), - CmdletArg( - name="item", - type="string", - description="Item selection for playlists/formats" - ), - SharedArgs.PATH, - ], - detail=[ - "Download media from streaming sites using yt-dlp.", - "For direct file downloads, use download-file.", - ], - exec=self.run, - ) - self.register() - - def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - """Main execution method.""" - stage_ctx = pipeline_context.get_stage_context() - in_pipeline = stage_ctx is not None and getattr( - stage_ctx, - "total_stages", - 1 - ) > 1 - if in_pipeline and isinstance(config, dict): - config["_quiet_background_output"] = True - return self._run_impl(result, args, config) - - @staticmethod - def _normalize_urls(parsed: Dict[str, Any]) -> List[str]: - raw_url = parsed.get("url", []) - if isinstance(raw_url, str): - raw_url = [raw_url] - - expanded_urls: List[str] = [] - for u in raw_url or []: - if u is None: - continue - s = str(u).strip() - if not s: - continue - if "," in s: - parts = [p.strip() for p in s.split(",")] - expanded_urls.extend([p for p in parts if p]) - else: - expanded_urls.append(s) - return expanded_urls - - @staticmethod - def _append_urls_from_piped_result(raw_urls: List[str], result: Any) -> List[str]: - if raw_urls: - return raw_urls - if not result: - return raw_urls - - results_to_check = result if isinstance(result, list) else [result] - for item in results_to_check: - try: - url = get_field(item, "url") or get_field(item, "target") - except Exception: - url = None - if url: - raw_urls.append(url) - return raw_urls - - @staticmethod - def _filter_supported_urls(raw_urls: Sequence[str]) -> tuple[List[str], List[str]]: - supported = [url for url in (raw_urls or []) if is_url_supported_by_ytdlp(url)] - # Preserve original debug semantics: count unique unsupported URLs. - unsupported = list(set(raw_urls or []) - set(supported or [])) - return supported, unsupported - - def _parse_query_keyed_spec(self, - query_spec: Optional[str]) -> Dict[str, - List[str]]: - if not query_spec: - return {} - try: - keyed = self._parse_keyed_csv_spec(str(query_spec), default_key="hash") - if not keyed: - return {} - - # Normalize aliases so users can write shorter/alternate keys. - # Note: download-media uses a comma-separated keyed spec language inside -query. - def _alias(src: str, dest: str) -> None: - try: - values = keyed.get(src) - except Exception: - values = None - if not values: - return - try: - keyed.setdefault(dest, []).extend(list(values)) - except Exception: - pass - try: - keyed.pop(src, None) - except Exception: - pass - - for src in ("range", "ranges", "section", "sections"): - _alias(src, "clip") - for src in ("fmt", "f"): - _alias(src, "format") - for src in ("aud", "a"): - _alias(src, "audio") - - return keyed - except Exception: - return {} - - @staticmethod - def _extract_hash_override( - query_spec: Optional[str], - query_keyed: Dict[str, - List[str]] - ) -> Optional[str]: - try: - hash_values = query_keyed.get("hash", - []) if isinstance(query_keyed, - dict) else [] - hash_candidate = hash_values[-1] if hash_values else None - if hash_candidate: - return sh.parse_single_hash_query(f"hash:{hash_candidate}") - - # Backwards-compatible: treat a non-keyed query as a hash query. - # If the query uses keyed specs (e.g. format:, item:, clip:), do NOT attempt - # to interpret the whole string as a hash. - try: - has_non_hash_keys = bool( - query_keyed and isinstance(query_keyed, - dict) - and any( - k - for k in query_keyed.keys() if str(k).strip().lower() != "hash" - ) - ) - except Exception: - has_non_hash_keys = False - if has_non_hash_keys: - return None - return sh.parse_single_hash_query(str(query_spec)) if query_spec else None - except Exception: - return None - - def _parse_clip_ranges_and_apply_items( - self, - *, - clip_spec: Optional[str], - query_keyed: Dict[str, - List[str]], - parsed: Dict[str, - Any], - query_spec: Optional[str], - ) -> tuple[Optional[List[tuple[int, - int]]], - bool, - List[str]]: - clip_ranges: Optional[List[tuple[int, int]]] = None - clip_values: List[str] = [] - item_values: List[str] = [] - - def _uniq(values: Sequence[str]) -> List[str]: - seen: set[str] = set() - out: List[str] = [] - for v in values: - key = str(v) - if key in seen: - continue - seen.add(key) - out.append(v) - return out - - if clip_spec: - # Support keyed clip syntax: - # -query "clip:3m4s-3m14s,1h22m-1h33m,item:2-3" - keyed = self._parse_keyed_csv_spec(str(clip_spec), default_key="clip") - clip_values.extend(keyed.get("clip", []) or []) - item_values.extend(keyed.get("item", []) or []) - - # Allow the same keyed spec language inside -query so users can do: - # download-media -query "clip:1m-1m15s,2m1s-2m11s" - if query_keyed: - clip_values.extend(query_keyed.get("clip", []) or []) - item_values.extend(query_keyed.get("item", []) or []) - - # QueryArg also hydrates clip via -query, so combine and deduplicate here - clip_values = _uniq(clip_values) - item_values = _uniq(item_values) - - if item_values and not parsed.get("item"): - parsed["item"] = ",".join([v for v in item_values if v]) - - if clip_values: - clip_ranges = self._parse_time_ranges( - ",".join([v for v in clip_values if v]) - ) - if not clip_ranges: - bad_spec = clip_spec or query_spec - log(f"Invalid clip format: {bad_spec}", file=sys.stderr) - return None, True, clip_values - - return clip_ranges, False, clip_values - - @staticmethod - def _init_storage(config: Dict[str, Any]) -> tuple[Optional[Any], bool]: - storage = None - hydrus_available = True - try: - from Store import Store - - storage = Store( - config=config or {}, - suppress_debug=True - ) - from API.HydrusNetwork import is_hydrus_available - - hydrus_available = bool(is_hydrus_available(config or {})) - except Exception: - storage = None - return storage, hydrus_available - - @staticmethod - def _cookiefile_str(ytdlp_tool: YtDlpTool) -> Optional[str]: - try: - cookie_path = ytdlp_tool.resolve_cookiefile() - if cookie_path is not None and cookie_path.is_file(): - return str(cookie_path) - except Exception: - pass - return None - - def _list_formats_cached( - self, - u: str, - *, - playlist_items_value: Optional[str], - formats_cache: Dict[str, - Optional[List[Dict[str, - Any]]]], - ytdlp_tool: YtDlpTool, - ) -> Optional[List[Dict[str, - Any]]]: - key = f"{u}||{playlist_items_value or ''}" - if key in formats_cache: - return formats_cache[key] - fmts = list_formats( - u, - no_playlist=False, - playlist_items=playlist_items_value, - cookiefile=self._cookiefile_str(ytdlp_tool), - ) - formats_cache[key] = fmts - return fmts - - def _is_browseable_format(self, fmt: Any) -> bool: - """Return True for formats that are sensible to show in the format table.""" - if not isinstance(fmt, dict): - return False - format_id = str(fmt.get("format_id") or "").strip() - if not format_id: - return False - ext = str(fmt.get("ext") or "").strip().lower() - if ext in {"mhtml", - "json"}: - return False - note = str(fmt.get("format_note") or "").lower() - if "storyboard" in note: - return False - if format_id.lower().startswith("sb"): - return False - vcodec = str(fmt.get("vcodec", "none")) - acodec = str(fmt.get("acodec", "none")) - # Keep anything with at least one stream. - return not (vcodec == "none" and acodec == "none") - - def _format_id_for_query_index( - self, - query_format: str, - url: str, - formats_cache: Dict[str, - Optional[List[Dict[str, - Any]]]], - ytdlp_tool: YtDlpTool, - ) -> Optional[str]: - """Resolve a numeric 'format:N' query into an actual yt-dlp format selector. - - Acceptable forms: '7', '#7', ' 7 ' (whitespace allowed). Uses the same - browseable filtering rules as the interactive table and selects the - 1-based index. Returns a yt-dlp format string (possibly with +ba added - for video-only formats). Raises ValueError when the index is invalid or - formats cannot be listed. - """ - import re - - if not query_format or not re.match(r"^\s*#?\d+\s*$", str(query_format)): - return None - - try: - idx = int(str(query_format).lstrip("#").strip()) - except Exception: - raise ValueError(f"Invalid format index: {query_format}") - - fmts = self._list_formats_cached( - url, - playlist_items_value=None, - formats_cache=formats_cache, - ytdlp_tool=ytdlp_tool, - ) - if not fmts: - raise ValueError( - "Unable to list formats for the URL; cannot resolve numeric format index" - ) - - candidate_formats = [f for f in fmts if self._is_browseable_format(f)] - filtered_formats = candidate_formats if candidate_formats else list(fmts) - - if not filtered_formats: - raise ValueError("No formats available for selection") - - if idx <= 0 or idx > len(filtered_formats): - raise ValueError( - f"Format index {idx} out of range (1..{len(filtered_formats)})" - ) - - chosen = filtered_formats[idx - 1] - selection_format_id = str(chosen.get("format_id") or "").strip() - if not selection_format_id: - raise ValueError("Selected format has no format_id") - - try: - vcodec = str(chosen.get("vcodec", "none")) - acodec = str(chosen.get("acodec", "none")) - if vcodec != "none" and acodec == "none": - selection_format_id = f"{selection_format_id}+ba" - except Exception: - pass - - return selection_format_id - - @staticmethod - def _format_selector_for_query_height(query_format: str) -> Optional[str]: - """Translate a query value like '720p' into a yt-dlp -f selector. - - Returns a selector that chooses the best video at or under the requested - height and always pairs it with audio. - - Example: '640p' -> 'bv*[height<=640]+ba' - - Notes: - - Only the 'p' form is treated as a height cap to avoid - ambiguity with numeric format IDs and numeric index selection. - """ - import re - - if query_format is None: - return None - - s = str(query_format).strip().lower() - m = re.match(r"^(\d{2,5})p$", s) - if not m: - return None - - try: - height = int(m.group(1)) - except Exception: - return None - - if height <= 0: - raise ValueError(f"Invalid height selection: {query_format}") - - return f"bv*[height<={height}]+ba" - - @staticmethod - def _canonicalize_url_for_storage( - *, - requested_url: str, - ytdlp_tool: YtDlpTool, - playlist_items: Optional[str] - ) -> str: - # Prefer yt-dlp's canonical webpage URL (e.g. strips timestamps/redirects). - # Fall back to the requested URL if probing fails. - # Important: when playlist item selection is used, avoid probing (can hang on large playlists). - if playlist_items: - return str(requested_url) - try: - cf = None - try: - cookie_path = ytdlp_tool.resolve_cookiefile() - if cookie_path is not None and cookie_path.is_file(): - cf = str(cookie_path) - except Exception: - cf = None - pr = probe_url( - requested_url, - no_playlist=False, - timeout_seconds=15, - cookiefile=cf - ) - if isinstance(pr, dict): - for key in ("webpage_url", "original_url", "url", "requested_url"): - value = pr.get(key) - if isinstance(value, str) and value.strip(): - return value.strip() - except Exception: - pass - return str(requested_url) - - def _preflight_url_duplicate( - self, - *, - storage: Any, - hydrus_available: bool, - final_output_dir: Path, - candidate_url: str, - extra_urls: Optional[Sequence[str]] = None, - ) -> bool: - # NOTE: download-media sets _quiet_background_output=True when running in a pipeline to - # reduce background noise. URL de-dup is interactive and must still run in pipelines. - if storage is None: - debug("Preflight URL check skipped: storage unavailable") - return True - - debug(f"Preflight URL check: candidate={candidate_url}") - - try: - from SYS.metadata import normalize_urls - except Exception: - normalize_urls = None # type: ignore[assignment] - - needles: List[str] = [] - if normalize_urls is not None: - for raw in [candidate_url, *(list(extra_urls) if extra_urls else [])]: - try: - needles.extend(normalize_urls(raw)) - except Exception: - continue - # Fallback: always have at least one needle - if not needles: - needles = [str(candidate_url)] - - # Deduplicate needles (preserve order) - seen_needles: List[str] = [] - for needle in needles: - if needle and needle not in seen_needles: - seen_needles.append(needle) - needles = seen_needles - - try: - debug(f"Preflight URL needles: {needles}") - except Exception: - pass - - url_matches: List[Dict[str, Any]] = [] - try: - from Store.HydrusNetwork import HydrusNetwork - - # Avoid searching the temp/download directory backend during dedup. - # We only want to warn about duplicates in real stores. - backend_names_all = storage.list_searchable_backends() - backend_names: List[str] = [] - skipped: List[str] = [] - for backend_name in backend_names_all: - try: - backend = storage[backend_name] - except Exception: - continue - - try: - if str(backend_name).strip().lower() == "temp": - skipped.append(backend_name) - continue - except Exception: - pass - - # Heuristic: if a Folder backend points at the configured temp output dir, skip it. - try: - backend_location = getattr(backend, "_location", None) - if backend_location and final_output_dir: - backend_path = Path(str(backend_location) - ).expanduser().resolve() - temp_path = Path(str(final_output_dir)).expanduser().resolve() - if backend_path == temp_path: - skipped.append(backend_name) - continue - except Exception: - pass - - backend_names.append(backend_name) - - try: - if skipped: - debug( - f"Preflight backends: {backend_names} (skipped temp: {skipped})" - ) - else: - debug(f"Preflight backends: {backend_names}") - except Exception: - pass - - for backend_name in backend_names: - backend = storage[backend_name] - if isinstance(backend, HydrusNetwork) and not hydrus_available: - continue - - backend_hits: List[Dict[str, Any]] = [] - for needle in needles: - try: - backend_hits = backend.search(f"url:{needle}", limit=25) or [] - if backend_hits: - break - except Exception: - continue - if backend_hits: - url_matches.extend( - [ - dict(x) if isinstance(x, - dict) else { - "title": str(x) - } for x in backend_hits - ] - ) - - if len(url_matches) >= 25: - url_matches = url_matches[:25] - break - except Exception: - url_matches = [] - - if not url_matches: - debug("Preflight URL check: no matches") - return True - - # If the user already answered the duplicate URL prompt for this pipeline/command, - # respect that decision and don't re-prompt for every item. - try: - current_cmd_text = pipeline_context.get_current_command_text("") - except Exception: - current_cmd_text = "" - - try: - stage_ctx = pipeline_context.get_stage_context() - except Exception: - stage_ctx = None - - in_pipeline = bool( - stage_ctx is not None or ("|" in str(current_cmd_text or "")) - ) - if in_pipeline: - try: - cached_cmd = pipeline_context.load_value( - "preflight.url_duplicates.command", - default="" - ) - cached_decision = pipeline_context.load_value( - "preflight.url_duplicates.continue", - default=None - ) - except Exception: - cached_cmd = "" - cached_decision = None - - if cached_decision is not None and str(cached_cmd or "") == str( - current_cmd_text or ""): - if bool(cached_decision): - return True - try: - pipeline_context.request_pipeline_stop( - reason="duplicate-url declined", - exit_code=0 - ) - except Exception: - pass - return False - - table = ResultTable(f"URL already exists ({len(url_matches)} match(es))") - results_list: List[Dict[str, Any]] = [] - for item in url_matches: - if "title" not in item: - item["title"] = ( - item.get("name") or item.get("target") or item.get("path") - or "Result" - ) - - # Keep the full payload for history/inspection, but display a focused table. - # Use shared extractors so Ext/Size/Store/Hash remain consistent everywhere. - try: - from SYS.result_table import build_display_row - except Exception: - build_display_row = None # type: ignore - - if callable(build_display_row): - display_row = build_display_row( - item, - keys=["title", - "store", - "hash", - "ext", - "size"] - ) - else: - display_row = { - "title": item.get("title"), - "store": item.get("store"), - "hash": item.get("hash") or item.get("file_hash") - or item.get("sha256"), - "ext": str(item.get("ext") or ""), - "size": item.get("size") or item.get("size_bytes"), - } - table.add_result(display_row) - results_list.append(item) - - pipeline_context.set_current_stage_table(table) - pipeline_context.set_last_result_table(table, results_list) - - suspend = getattr(pipeline_context, "suspend_live_progress", None) - used_suspend = False - - cm: AbstractContextManager[Any] = nullcontext() - if callable(suspend): - try: - maybe_cm = suspend() - if maybe_cm is not None: - cm = maybe_cm # type: ignore[assignment] - used_suspend = True - except Exception: - cm = nullcontext() - used_suspend = False - - with cm: - get_stderr_console().print(table) - setattr(table, "_rendered_by_cmdlet", True) - answered_yes = bool( - Confirm.ask( - "Continue anyway?", - default=False, - console=get_stderr_console() - ) - ) - - # Cache decision for the duration of this pipeline/command. - if in_pipeline: - try: - existing = pipeline_context.load_value("preflight", default=None) - except Exception: - existing = None - preflight_cache: Dict[str, - Any] = existing if isinstance(existing, - dict) else {} - url_dup_cache = preflight_cache.get("url_duplicates") - if not isinstance(url_dup_cache, dict): - url_dup_cache = {} - url_dup_cache["command"] = str(current_cmd_text or "") - url_dup_cache["continue"] = bool(answered_yes) - preflight_cache["url_duplicates"] = url_dup_cache - try: - pipeline_context.store_value("preflight", preflight_cache) - except Exception: - pass - - if not answered_yes: - if in_pipeline and used_suspend: - try: - pipeline_context.request_pipeline_stop( - reason="duplicate-url declined", - exit_code=0 - ) - except Exception: - pass - return False - return True - - def _preflight_url_duplicates_bulk( - self, - *, - storage: Any, - hydrus_available: bool, - final_output_dir: Path, - urls: Sequence[str], - ) -> bool: - """Preflight URL de-dup for a batch of URLs. - - Purpose: - - Avoid per-item interactive URL checks inside a playlist loop. - - Let the user see ALL duplicates up front, before any downloads start. - """ - if storage is None: - debug("Bulk URL preflight skipped: storage unavailable") - return True - - # Honor any prior duplicate URL decision for this pipeline/command. - try: - current_cmd_text = pipeline_context.get_current_command_text("") - except Exception: - current_cmd_text = "" - - try: - stage_ctx = pipeline_context.get_stage_context() - except Exception: - stage_ctx = None - - in_pipeline = bool( - stage_ctx is not None or ("|" in str(current_cmd_text or "")) - ) - if in_pipeline: - try: - cached_cmd = pipeline_context.load_value( - "preflight.url_duplicates.command", - default="" - ) - cached_decision = pipeline_context.load_value( - "preflight.url_duplicates.continue", - default=None - ) - except Exception: - cached_cmd = "" - cached_decision = None - - if cached_decision is not None and str(cached_cmd or "") == str( - current_cmd_text or ""): - if bool(cached_decision): - return True - try: - pipeline_context.request_pipeline_stop( - reason="duplicate-url declined", - exit_code=0 - ) - except Exception: - pass - return False - - unique_urls: List[str] = [] - for u in urls or []: - s = str(u or "").strip() - if s and s not in unique_urls: - unique_urls.append(s) - if len(unique_urls) <= 1: - return True - - try: - from SYS.metadata import normalize_urls - except Exception: - normalize_urls = None # type: ignore[assignment] - - def _httpish(value: str) -> bool: - try: - return bool(value) and ( - value.startswith("http://") or value.startswith("https://") - ) - except Exception: - return False - - url_needles: Dict[str, - List[str]] = {} - for u in unique_urls: - needles: List[str] = [] - if normalize_urls is not None: - try: - needles.extend( - [n for n in (normalize_urls(u) or []) if isinstance(n, str)] - ) - except Exception: - needles = [] - if not needles: - needles = [u] - # Prefer http(s) needles for store lookups. - filtered: List[str] = [] - for n in needles: - n2 = str(n or "").strip() - if not n2: - continue - if not _httpish(n2): - continue - if n2 not in filtered: - filtered.append(n2) - url_needles[u] = filtered if filtered else [u] - - # Determine backends once (same filtering as per-URL preflight). - backend_names: List[str] = [] - try: - backend_names_all = storage.list_searchable_backends() - except Exception: - backend_names_all = [] - - for backend_name in backend_names_all: - try: - backend = storage[backend_name] - except Exception: - continue - - try: - if str(backend_name).strip().lower() == "temp": - continue - except Exception: - pass - - try: - backend_location = getattr(backend, "_location", None) - if backend_location and final_output_dir: - backend_path = Path(str(backend_location)).expanduser().resolve() - temp_path = Path(str(final_output_dir)).expanduser().resolve() - if backend_path == temp_path: - continue - except Exception: - pass - - backend_names.append(backend_name) - - if not backend_names: - debug("Bulk URL preflight skipped: no searchable backends") - return True - - # Collect matches as display rows (cap to keep output reasonable) - seen_pairs: set[tuple[str, str]] = set() - matched_urls: set[str] = set() - match_rows: List[Dict[str, Any]] = [] - max_rows = 200 - - try: - from Store.HydrusNetwork import HydrusNetwork - except Exception: - HydrusNetwork = None # type: ignore - - for backend_name in backend_names: - if len(match_rows) >= max_rows: - break - try: - backend = storage[backend_name] - except Exception: - continue - - if HydrusNetwork is not None and isinstance(backend, HydrusNetwork): - if not hydrus_available: - continue - - client = getattr(backend, "_client", None) - if client is None: - continue - - for original_url, needles in url_needles.items(): - if len(match_rows) >= max_rows: - break - if (original_url, str(backend_name)) in seen_pairs: - continue - - # Fast-path: ask Hydrus whether it already knows this URL. - found_hash: Optional[str] = None - found = False - for needle in (needles or [])[:3]: - if not _httpish(needle): - continue - try: - from API.HydrusNetwork import HydrusRequestSpec - - spec = HydrusRequestSpec( - method="GET", - endpoint="/add_urls/get_url_files", - query={ - "url": needle - }, - ) - response = client._perform_request( - spec - ) # type: ignore[attr-defined] - raw_hashes = None - if isinstance(response, dict): - raw_hashes = response.get("hashes") or response.get( - "file_hashes" - ) - raw_ids = response.get("file_ids") - has_ids = isinstance(raw_ids, list) and len(raw_ids) > 0 - has_hashes = isinstance(raw_hashes, - list) and len(raw_hashes) > 0 - if has_hashes: - try: - found_hash = str( - raw_hashes[0] - ).strip() # type: ignore[index] - except Exception: - found_hash = None - if has_ids or has_hashes: - found = True - break - except Exception: - continue - - if not found: - continue - - seen_pairs.add((original_url, str(backend_name))) - matched_urls.add(original_url) - display_row = { - "title": - "(exists)", - "store": - str(backend_name), - "hash": - found_hash or "", - "url": - original_url, - "columns": [ - ("Title", - "(exists)"), - ("Store", - str(backend_name)), - ("Hash", - found_hash or ""), - ("URL", - original_url), - ], - } - match_rows.append(display_row) - continue - - # Generic backends: use the existing search() contract. - for original_url, needles in url_needles.items(): - if len(match_rows) >= max_rows: - break - if (original_url, str(backend_name)) in seen_pairs: - continue - - backend_hits: List[Dict[str, Any]] = [] - for needle in (needles or [])[:3]: - try: - backend_hits = backend.search(f"url:{needle}", limit=1) or [] - if backend_hits: - break - except Exception: - continue - - if not backend_hits: - continue - - seen_pairs.add((original_url, str(backend_name))) - matched_urls.add(original_url) - hit = backend_hits[0] - title = ( - hit.get("title") or hit.get("name") or hit.get("target") - or hit.get("path") or "(exists)" - ) - file_hash = hit.get("hash") or hit.get("file_hash" - ) or hit.get("sha256") or "" - - try: - from SYS.result_table import build_display_row - except Exception: - build_display_row = None # type: ignore - - extracted = { - "title": str(title), - "store": str(hit.get("store") or backend_name), - "hash": str(file_hash or ""), - "ext": "", - "size": None, - } - if callable(build_display_row): - try: - extracted = build_display_row( - hit, - keys=["title", - "store", - "hash", - "ext", - "size"] - ) - except Exception: - pass - # Ensure we still prefer the precomputed values for title/store/hash. - extracted["title"] = str(title) - extracted["store"] = str(hit.get("store") or backend_name) - extracted["hash"] = str(file_hash or "") - - ext = extracted.get("ext") - size_val = extracted.get("size") - - display_row = { - "title": - str(title), - "store": - str(hit.get("store") or backend_name), - "hash": - str(file_hash or ""), - "ext": - str(ext or ""), - "size": - size_val, - "url": - original_url, - "columns": [ - ("Title", - str(title)), - ("Store", - str(hit.get("store") or backend_name)), - ("Hash", - str(file_hash or "")), - ("Ext", - str(ext or "")), - ("Size", - size_val), - ("URL", - original_url), - ], - } - match_rows.append(display_row) - - if not match_rows: - debug("Bulk URL preflight: no matches") - return True - - # This table is non-interactive and intentionally wide (we want URL + ext/size). - table = ResultTable( - f"URL already exists ({len(matched_urls)} url(s))", - max_columns=10 - ) - table.set_no_choice(True) - try: - table.set_preserve_order(True) - except Exception: - pass - - for row in match_rows: - table.add_result(row) - - # Display as an overlay so we don't clobber the current selectable table/history. - try: - pipeline_context.set_last_result_table_overlay(table, match_rows) - except Exception: - pass - - _print_table_suspended(table) - setattr(table, "_rendered_by_cmdlet", True) - - suspend = getattr(pipeline_context, "suspend_live_progress", None) - cm: AbstractContextManager[Any] = nullcontext() - if callable(suspend): - try: - maybe_cm = suspend() - if maybe_cm is not None: - cm = maybe_cm # type: ignore[assignment] - except Exception: - cm = nullcontext() - - with cm: - answered_yes = bool( - Confirm.ask( - "Continue anyway?", - default=False, - console=get_stderr_console() - ) - ) - - # Cache decision for the duration of this pipeline/command. - if in_pipeline: - try: - existing = pipeline_context.load_value("preflight", default=None) - except Exception: - existing = None - preflight_cache: Dict[str, - Any] = existing if isinstance(existing, - dict) else {} - url_dup_cache = preflight_cache.get("url_duplicates") - if not isinstance(url_dup_cache, dict): - url_dup_cache = {} - url_dup_cache["command"] = str(current_cmd_text or "") - url_dup_cache["continue"] = bool(answered_yes) - preflight_cache["url_duplicates"] = url_dup_cache - try: - pipeline_context.store_value("preflight", preflight_cache) - except Exception: - pass - - if not answered_yes: - if in_pipeline: - try: - pipeline_context.request_pipeline_stop( - reason="duplicate-url declined", - exit_code=0 - ) - except Exception: - pass - return False - return True - - def _maybe_show_playlist_table(self, *, url: str, ytdlp_tool: YtDlpTool) -> bool: - """Show a normal selectable playlist table when URL yields multiple entries.""" - try: - cf = self._cookiefile_str(ytdlp_tool) - pr = probe_url(url, no_playlist=False, timeout_seconds=15, cookiefile=cf) - except Exception: - pr = None - if not isinstance(pr, dict): - return False - entries = pr.get("entries") - if not isinstance(entries, list) or len(entries) <= 1: - return False - - # Identify a stable table type so `@* | ...` pipelines can auto-insert the - # appropriate downloader stage (e.g., Bandcamp selections should insert - # `download-media` before `merge-file`). - extractor_name = "" - try: - extractor_name = ( - str(pr.get("extractor") or pr.get("extractor_key") - or "").strip().lower() - ) - except Exception: - extractor_name = "" - table_type: Optional[str] = None - if "bandcamp" in extractor_name: - table_type = "bandcamp" - elif "youtube" in extractor_name: - table_type = "youtube" - - # Display table (limit rows to keep output reasonable) - max_rows = 200 - display_entries = entries[:max_rows] - - def _entry_to_url(entry: Any) -> Optional[str]: - if not isinstance(entry, dict): - return None - # Prefer explicit absolute URLs when present - for key in ("webpage_url", "original_url", "url"): - v = entry.get(key) - if isinstance(v, str) and v.strip(): - s = v.strip() - try: - if urlparse(s).scheme in {"http", - "https"}: - return s - except Exception: - return s - - # Best-effort YouTube fallback from id - entry_id = entry.get("id") - if isinstance(entry_id, str) and entry_id.strip(): - extractor_name = str( - pr.get("extractor") or pr.get("extractor_key") or "" - ).lower() - if "youtube" in extractor_name: - return f"https://www.youtube.com/watch?v={entry_id.strip()}" - return None - - table = ResultTable() - safe_url = str(url or "").strip() - table.title = f'download-media -url "{safe_url}"' if safe_url else "download-media" - if table_type: - try: - table.set_table(table_type) - except Exception: - table.table = table_type - table.set_source_command("download-media", []) - try: - table.set_preserve_order(True) - except Exception: - pass - - results_list: List[Dict[str, Any]] = [] - for idx, entry in enumerate(display_entries, 1): - title = None - uploader = None - duration = None - entry_url = _entry_to_url(entry) - try: - if isinstance(entry, dict): - title = entry.get("title") - uploader = entry.get("uploader") or pr.get("uploader") - duration = entry.get("duration") - except Exception: - pass - - row: Dict[str, - Any] = { - "table": - "download-media", - "title": - str(title or f"Item {idx}"), - "detail": - str(uploader or ""), - "media_kind": - "playlist-item", - "playlist_index": - idx, - "_selection_args": ( - ["-url", - str(entry_url)] - if entry_url else ["-url", - str(url), - "-item", - str(idx)] - ), - "url": - entry_url, - "target": - entry_url, - "columns": [ - ("#", - str(idx)), - ("Title", - str(title or "")), - ("Duration", - str(duration or "")), - ("Uploader", - str(uploader or "")), - ], - } - results_list.append(row) - table.add_result(row) - - pipeline_context.set_current_stage_table(table) - pipeline_context.set_last_result_table(table, results_list) - - _print_table_suspended(table) - setattr(table, "_rendered_by_cmdlet", True) - return True - - def _maybe_show_format_table_for_single_url( - self, - *, - mode: str, - clip_spec: Any, - clip_values: Sequence[str], - playlist_items: Optional[str], - ytdl_format: Any, - supported_url: Sequence[str], - playlist_selection_handled: bool, - ytdlp_tool: YtDlpTool, - formats_cache: Dict[str, - Optional[List[Dict[str, - Any]]]], - storage: Any, - hydrus_available: bool, - final_output_dir: Path, - args: Sequence[str], - ) -> Optional[int]: - # If no -item, no explicit -format specified, and single URL, show the format table. - # Do NOT stop to show formats when -audio is used (auto-pick) or when clip ranges are requested via -query. - if (mode != "audio" and not clip_spec and not clip_values and not playlist_items - and not ytdl_format and len(supported_url) == 1 - and not playlist_selection_handled): - url = supported_url[0] - - canonical_url = self._canonicalize_url_for_storage( - requested_url=url, - ytdlp_tool=ytdlp_tool, - playlist_items=playlist_items, - ) - if not self._preflight_url_duplicate( - storage=storage, - hydrus_available=hydrus_available, - final_output_dir=final_output_dir, - candidate_url=canonical_url, - extra_urls=[url], - ): - log(f"Skipping download: {url}", file=sys.stderr) - return 0 - - formats = self._list_formats_cached( - url, - playlist_items_value=None, - formats_cache=formats_cache, - ytdlp_tool=ytdlp_tool, - ) - - if formats and len(formats) > 1: - # Formatlist filtering - # - # Goal: - # - Keep the list useful (hide non-media entries like storyboards) - # - But NEVER filter down so far that the user can't browse/pick formats. - def _is_browseable_format(fmt: Any) -> bool: - if not isinstance(fmt, dict): - return False - format_id = str(fmt.get("format_id") or "").strip() - if not format_id: - return False - ext = str(fmt.get("ext") or "").strip().lower() - if ext in {"mhtml", - "json"}: - return False - note = str(fmt.get("format_note") or "").lower() - if "storyboard" in note: - return False - if format_id.lower().startswith("sb"): - return False - vcodec = str(fmt.get("vcodec", "none")) - acodec = str(fmt.get("acodec", "none")) - # Keep anything with at least one stream. - return not (vcodec == "none" and acodec == "none") - - candidate_formats = [f for f in formats if _is_browseable_format(f)] - filtered_formats = candidate_formats if candidate_formats else list( - formats - ) - - debug( - f"Formatlist: showing {len(filtered_formats)} formats (raw={len(formats)})" - ) - - # Build the base command that will be replayed with @N selection - base_cmd = f'download-media "{url}"' - remaining_args = [ - arg for arg in args if arg not in [url] and not arg.startswith("-") - ] - if remaining_args: - base_cmd += " " + " ".join(remaining_args) - - # Create result table for display - table = ResultTable( - title=f"Available formats for {url}", - max_columns=10, - preserve_order=True - ) - table.set_table("ytdlp.formatlist") - table.set_source_command("download-media", [url]) - - results_list: List[Dict[str, Any]] = [] - for idx, fmt in enumerate(filtered_formats, 1): - resolution = fmt.get("resolution", "") - ext = fmt.get("ext", "") - vcodec = fmt.get("vcodec", "none") - acodec = fmt.get("acodec", "none") - filesize = fmt.get("filesize") - filesize_approx = fmt.get("filesize_approx") - format_id = fmt.get("format_id", "") - - # If the chosen format is video-only (no audio stream), automatically - # request best audio too so the resulting file has sound. - selection_format_id = format_id - try: - if vcodec != "none" and acodec == "none" and format_id: - selection_format_id = f"{format_id}+ba" - except Exception: - selection_format_id = format_id - - size_str = "" - size_prefix = "" - size_bytes = filesize - if not size_bytes: - size_bytes = filesize_approx - if size_bytes: - size_prefix = "~" - try: - if isinstance(size_bytes, (int, float)) and size_bytes > 0: - size_mb = float(size_bytes) / (1024 * 1024) - size_str = f"{size_prefix}{size_mb:.1f}MB" - except Exception: - size_str = "" - - desc_parts: List[str] = [] - if resolution and resolution != "audio only": - desc_parts.append(resolution) - if ext: - desc_parts.append(str(ext).upper()) - if vcodec != "none": - desc_parts.append(f"v:{vcodec}") - if acodec != "none": - desc_parts.append(f"a:{acodec}") - if size_str: - desc_parts.append(size_str) - format_desc = " | ".join(desc_parts) - - format_dict = { - "table": - "download-media", - "title": - f"Format {format_id}", - "url": - url, - "target": - url, - "detail": - format_desc, - "annotations": [ext, - resolution] if resolution else [ext], - "media_kind": - "format", - "cmd": - base_cmd, - "columns": [ - ("ID", - format_id), - ("Resolution", - resolution or "N/A"), - ("Ext", - ext), - ("Size", - size_str or ""), - ("Video", - vcodec), - ("Audio", - acodec), - ], - "full_metadata": { - "format_id": format_id, - "url": url, - "item_selector": selection_format_id, - }, - "_selection_args": - None, - } - - selection_args: List[str] = ["-format", selection_format_id] - try: - if (not clip_spec) and clip_values: - selection_args.extend( - [ - "-query", - f"clip:{','.join([v for v in clip_values if v])}" - ] - ) - except Exception: - pass - format_dict["_selection_args"] = selection_args - - results_list.append(format_dict) - table.add_result(format_dict) - - try: - _print_table_suspended(table) - setattr(table, "_rendered_by_cmdlet", True) - except Exception: - pass - - pipeline_context.set_current_stage_table(table) - pipeline_context.set_last_result_table(table, results_list) - - log(f"", file=sys.stderr) - return 0 - - return None - - def _download_supported_urls( - self, - *, - supported_url: Sequence[str], - ytdlp_tool: YtDlpTool, - args: Sequence[str], - config: Dict[str, - Any], - final_output_dir: Path, - mode: str, - clip_spec: Any, - clip_ranges: Optional[List[tuple[int, - int]]], - query_hash_override: Optional[str], - embed_chapters: bool, - write_sub: bool, - quiet_mode: bool, - playlist_items: Optional[str], - ytdl_format: Any, - skip_per_url_preflight: bool, - forced_single_format_id: Optional[str], - forced_single_format_for_batch: bool, - formats_cache: Dict[str, - Optional[List[Dict[str, - Any]]]], - storage: Any, - hydrus_available: bool, - ) -> int: - downloaded_count = 0 - downloaded_pipe_objects: List[Dict[str, Any]] = [] - pipe_seq = 0 - clip_sections_spec = self._build_clip_sections_spec(clip_ranges) - - if clip_sections_spec: - try: - debug(f"Clip sections spec: {clip_sections_spec}") - except Exception: - pass - - for url in supported_url: - try: - debug(f"Processing: {url}") - - canonical_url = self._canonicalize_url_for_storage( - requested_url=url, - ytdlp_tool=ytdlp_tool, - playlist_items=playlist_items, - ) - - if not skip_per_url_preflight: - if not self._preflight_url_duplicate( - storage=storage, - hydrus_available=hydrus_available, - final_output_dir=final_output_dir, - candidate_url=canonical_url, - extra_urls=[url], - ): - log(f"Skipping download: {url}", file=sys.stderr) - continue - - PipelineProgress(pipeline_context).begin_steps(2) - - actual_format = ytdl_format - actual_playlist_items = playlist_items - - if playlist_items and not ytdl_format: - import re - - if re.search(r"[^0-9,-]", playlist_items): - actual_format = playlist_items - actual_playlist_items = None - - if mode == "audio" and not actual_format: - actual_format = "bestaudio" - - if mode == "video" and not actual_format: - configured = (ytdlp_tool.default_format("video") or "").strip() - if configured and configured != "bestvideo+bestaudio/best": - actual_format = configured - - forced_single_applied = False - if (forced_single_format_for_batch and forced_single_format_id - and not ytdl_format and not actual_playlist_items): - actual_format = forced_single_format_id - forced_single_applied = True - - if (actual_format and isinstance(actual_format, - str) and mode != "audio" - and "+" not in actual_format and "/" not in actual_format - and "[" not in actual_format and actual_format not in {"best", - "bv", - "ba", - "b"} - and not forced_single_applied): - try: - formats = self._list_formats_cached( - url, - playlist_items_value=actual_playlist_items, - formats_cache=formats_cache, - ytdlp_tool=ytdlp_tool, - ) - if formats: - fmt_match = next( - ( - f for f in formats - if str(f.get("format_id", "")) == actual_format - ), - None, - ) - if fmt_match: - vcodec = str(fmt_match.get("vcodec", "none")) - acodec = str(fmt_match.get("acodec", "none")) - if vcodec != "none" and acodec == "none": - debug( - f"Selected video-only format {actual_format}; using {actual_format}+ba for audio" - ) - actual_format = f"{actual_format}+ba" - except Exception: - pass - - attempted_single_format_fallback = False - while True: - try: - opts = DownloadOptions( - url=url, - mode=mode, - output_dir=final_output_dir, - ytdl_format=actual_format, - cookies_path=ytdlp_tool.resolve_cookiefile(), - clip_sections=clip_sections_spec, - playlist_items=actual_playlist_items, - quiet=quiet_mode, - no_playlist=False, - embed_chapters=embed_chapters, - write_sub=write_sub, - ) - - PipelineProgress(pipeline_context).step("downloading") - debug(f"Starting download with 5-minute timeout...") - result_obj = _download_with_timeout(opts, timeout_seconds=300) - debug(f"Download completed, building pipe object...") - break - except DownloadError as e: - cause = getattr(e, "__cause__", None) - detail = "" - try: - detail = str(cause or "") - except Exception: - detail = "" - - if ("requested format is not available" - in (detail or "").lower()) and mode != "audio": - if (forced_single_format_for_batch - and forced_single_format_id and not ytdl_format - and not actual_playlist_items - and not attempted_single_format_fallback): - attempted_single_format_fallback = True - actual_format = forced_single_format_id - debug( - f"Only one format available (playlist preflight); retrying with: {actual_format}" - ) - continue - - formats = self._list_formats_cached( - url, - playlist_items_value=actual_playlist_items, - formats_cache=formats_cache, - ytdlp_tool=ytdlp_tool, - ) - if ((not attempted_single_format_fallback) - and isinstance(formats, - list) and len(formats) == 1 - and isinstance(formats[0], - dict)): - only = formats[0] - fallback_format = str(only.get("format_id") - or "").strip() - selection_format_id = fallback_format - try: - vcodec = str(only.get("vcodec", "none")) - acodec = str(only.get("acodec", "none")) - if vcodec != "none" and acodec == "none" and fallback_format: - selection_format_id = f"{fallback_format}+ba" - except Exception: - selection_format_id = fallback_format - - if selection_format_id: - attempted_single_format_fallback = True - actual_format = selection_format_id - debug( - f"Only one format available; retrying with: {actual_format}" - ) - continue - - if formats: - formats_to_show = formats - - table = ResultTable( - title=f"Available formats for {url}", - max_columns=10, - preserve_order=True, - ) - table.set_table("ytdlp.formatlist") - table.set_source_command("download-media", [url]) - - results_list: List[Dict[str, Any]] = [] - for idx, fmt in enumerate(formats_to_show, 1): - resolution = fmt.get("resolution", "") - ext = fmt.get("ext", "") - vcodec = fmt.get("vcodec", "none") - acodec = fmt.get("acodec", "none") - filesize = fmt.get("filesize") - filesize_approx = fmt.get("filesize_approx") - format_id = fmt.get("format_id", "") - - selection_format_id = format_id - try: - if vcodec != "none" and acodec == "none" and format_id: - selection_format_id = f"{format_id}+ba" - except Exception: - selection_format_id = format_id - - size_str = "" - size_prefix = "" - size_bytes = filesize - if not size_bytes: - size_bytes = filesize_approx - if size_bytes: - size_prefix = "~" - try: - if isinstance(size_bytes, - (int, - float)) and size_bytes > 0: - size_mb = float(size_bytes) / (1024 * 1024) - size_str = f"{size_prefix}{size_mb:.1f}MB" - except Exception: - size_str = "" - - desc_parts: List[str] = [] - if resolution and resolution != "audio only": - desc_parts.append(str(resolution)) - if ext: - desc_parts.append(str(ext).upper()) - if vcodec != "none": - desc_parts.append(f"v:{vcodec}") - if acodec != "none": - desc_parts.append(f"a:{acodec}") - if size_str: - desc_parts.append(size_str) - format_desc = " | ".join(desc_parts) - - format_dict: Dict[ - str, - Any] = { - "table": - "download-media", - "title": - f"Format {format_id}", - "url": - url, - "target": - url, - "detail": - format_desc, - "media_kind": - "format", - "columns": [ - ("ID", - format_id), - ("Resolution", - resolution or "N/A"), - ("Ext", - ext), - ("Size", - size_str or ""), - ("Video", - vcodec), - ("Audio", - acodec), - ], - "full_metadata": { - "format_id": format_id, - "url": url, - "item_selector": selection_format_id, - }, - "_selection_args": - ["-format", - selection_format_id], - } - - results_list.append(format_dict) - table.add_result(format_dict) - - pipeline_context.set_current_stage_table(table) - pipeline_context.set_last_result_table( - table, - results_list - ) - - try: - _print_table_suspended(table) - setattr(table, "_rendered_by_cmdlet", True) - except Exception: - pass - - PipelineProgress(pipeline_context - ).step("awaiting selection") - - log( - "Requested format is not available; select a working format with @N", - file=sys.stderr, - ) - return 0 - - raise - - results_to_emit: List[Any] = [] - if isinstance(result_obj, list): - results_to_emit = list(result_obj) - else: - paths = getattr(result_obj, "paths", None) - if isinstance(paths, list) and paths: - for p in paths: - try: - p_path = Path(p) - except Exception: - continue - try: - if p_path.suffix.lower() in _SUBTITLE_EXTS: - continue - except Exception: - pass - if not p_path.exists() or p_path.is_dir(): - continue - try: - hv = sha256_file(p_path) - except Exception: - hv = None - results_to_emit.append( - DownloadMediaResult( - path=p_path, - info=getattr(result_obj, - "info", - {}) or {}, - tag=list(getattr(result_obj, - "tag", - []) or []), - source_url=getattr(result_obj, - "source_url", - None) or opts.url, - hash_value=hv, - ) - ) - else: - results_to_emit = [result_obj] - - pipe_objects: List[Dict[str, Any]] = [] - for downloaded in results_to_emit: - po = self._build_pipe_object(downloaded, url, opts) - pipe_seq += 1 - try: - po.setdefault("pipe_index", pipe_seq) - except Exception: - pass - - try: - info = ( - downloaded.info - if isinstance(getattr(downloaded, - "info", - None), - dict) else {} - ) - except Exception: - info = {} - chapters_text = _format_chapters_note( - info - ) if embed_chapters else None - if chapters_text: - notes = po.get("notes") - if not isinstance(notes, dict): - notes = {} - notes.setdefault("chapters", chapters_text) - po["notes"] = notes - - if write_sub: - try: - media_path = Path(str(po.get("path") or "")) - except Exception: - media_path = None - - if media_path is not None and media_path.exists( - ) and media_path.is_file(): - sub_path = _best_subtitle_sidecar(media_path) - if sub_path is not None: - sub_text = _read_text_file(sub_path) - if sub_text: - notes = po.get("notes") - if not isinstance(notes, dict): - notes = {} - notes["sub"] = sub_text - po["notes"] = notes - try: - sub_path.unlink() - except Exception: - pass - - pipe_objects.append(po) - - try: - if clip_ranges and len(pipe_objects) == len(clip_ranges): - source_hash = query_hash_override or self._find_existing_hash_for_url( - storage, - canonical_url, - hydrus_available=hydrus_available, - ) - self._apply_clip_decorations( - pipe_objects, - clip_ranges, - source_king_hash=source_hash - ) - except Exception: - pass - - debug(f"Emitting {len(pipe_objects)} result(s) to pipeline...") - - PipelineProgress(pipeline_context).step("finalized") - - stage_ctx = pipeline_context.get_stage_context() - # Emit per downloaded item whenever we're running under the pipeline runner. - # Live progress advances on emit(), and suppressing emits for the last stage - # causes the pipe bar to stay at 0% even while downloads complete. - emit_enabled = bool(stage_ctx is not None) - for pipe_obj_dict in pipe_objects: - if emit_enabled: - pipeline_context.emit(pipe_obj_dict) - - if pipe_obj_dict.get("url"): - pipe_obj = coerce_to_pipe_object(pipe_obj_dict) - register_url_with_local_library(pipe_obj, config) - - try: - downloaded_pipe_objects.append(pipe_obj_dict) - except Exception: - pass - - downloaded_count += len(pipe_objects) - debug("✓ Downloaded and emitted") - - except DownloadError as e: - log(f"Download failed for {url}: {e}", file=sys.stderr) - except Exception as e: - log(f"Error processing {url}: {e}", file=sys.stderr) - - if downloaded_count > 0: - debug(f"✓ Successfully processed {downloaded_count} URL(s)") - return 0 - - log("No downloads completed", file=sys.stderr) - return 1 - - def _run_impl( - self, - result: Any, - args: Sequence[str], - config: Dict[str, - Any] - ) -> int: - """Main download implementation for yt-dlp-supported url.""" - try: - debug("Starting download-media") - - ytdlp_tool = YtDlpTool(config) - - # Parse arguments - parsed = parse_cmdlet_args(args, self) - - raw_url = self._normalize_urls(parsed) - raw_url = self._append_urls_from_piped_result(raw_url, result) - - supported_url, unsupported_list = self._filter_supported_urls(raw_url) - - if not supported_url: - log("No yt-dlp-supported url to download", file=sys.stderr) - return 1 - - if unsupported_list: - debug( - f"Skipping {len(unsupported_list)} unsupported url (use download-file for direct downloads)" - ) - - # Get output directory - final_output_dir = self._resolve_output_dir(parsed, config) - if not final_output_dir: - return 1 - - debug(f"Output directory: {final_output_dir}") - - # Get other options - clip_spec = parsed.get("clip") - query_spec = parsed.get("query") - - # download-media supports a small keyed spec language inside -query. - # Examples: - # -query "hash:" - # -query "clip:1m-1m15s,2m1s-2m11s" - # -query "hash:,clip:1m-1m15s,item:2-3" - # -query "format:audio,item:1-3" (audio-only + playlist selection) - query_keyed = self._parse_query_keyed_spec( - str(query_spec) if query_spec is not None else None - ) - - # Optional: allow an explicit hash via -query "hash:". - # This is used as the preferred king hash for multi-clip relationships. - query_hash_override = self._extract_hash_override( - str(query_spec) if query_spec is not None else None, - query_keyed, - ) - - # Always enable chapters + subtitles so downstream pipes (e.g. mpv) can consume them. - embed_chapters = True - write_sub = True - - # QueryArgs: - # - format:audio => audio-only (highest quality audio) - # - format: => equivalent to -format - # - audio:true|false => audio-only mode toggle (preferred over format:audio when provided) - query_format: Optional[str] = None - try: - fmt_values = query_keyed.get("format", - []) if isinstance(query_keyed, - dict) else [] - fmt_candidate = fmt_values[-1] if fmt_values else None - if fmt_candidate is not None: - query_format = str(fmt_candidate).strip() - except Exception: - query_format = None - - query_audio: Optional[bool] = None - try: - audio_values = query_keyed.get("audio", - []) if isinstance(query_keyed, - dict) else [] - audio_candidate = audio_values[-1] if audio_values else None - if audio_candidate is not None: - s = str(audio_candidate).strip().lower() - if s in {"1", - "true", - "t", - "yes", - "y", - "on"}: - query_audio = True - elif s in {"0", - "false", - "f", - "no", - "n", - "off"}: - query_audio = False - elif s: - # Any other non-empty value implies "on". - query_audio = True - except Exception: - query_audio = None - - query_wants_audio = False - if query_format: - try: - query_wants_audio = str(query_format).strip().lower() == "audio" - except Exception: - query_wants_audio = False - - # Explicit CLI flag wins; else query audio: can select audio mode. - # Back-compat: format:audio is still supported. - audio_flag = bool(parsed.get("audio") is True) - wants_audio = audio_flag - if query_audio is not None: - wants_audio = wants_audio or bool(query_audio) - else: - wants_audio = wants_audio or bool(query_wants_audio) - mode = "audio" if wants_audio else "video" - - clip_ranges, clip_invalid, clip_values = self._parse_clip_ranges_and_apply_items( - clip_spec=str(clip_spec) if clip_spec is not None else None, - query_keyed=query_keyed, - parsed=parsed, - query_spec=str(query_spec) if query_spec is not None else None, - ) - if clip_invalid: - return 1 - - if clip_ranges: - try: - debug(f"Clip ranges: {clip_ranges}") - except Exception: - pass - - quiet_mode = ( - bool(config.get("_quiet_background_output")) - if isinstance(config, - dict) else False - ) - - storage, hydrus_available = self._init_storage( - config if isinstance(config, dict) else {} - ) - - # Check if we need to show format selection - formats_cache: Dict[str, - Optional[List[Dict[str, - Any]]]] = {} - playlist_items = str(parsed.get("item")) if parsed.get("item") else None - ytdl_format = parsed.get("format") - # If user didn't pass -format, allow -query "format:<...>" to provide it. - # Supported query forms: - # - format:audio => audio-only mode (handled above) - # - format:720p => pick best video <= 720p and always include audio - # - format: => treated as a raw yt-dlp selector (non-numeric) - # - format: => treated as a 1-based index into the shown format list (resolved below) - if not ytdl_format and query_format and not query_wants_audio: - try: - height_selector = self._format_selector_for_query_height( - query_format - ) - except ValueError as e: - log(f"Error parsing format selection: {e}", file=sys.stderr) - return 1 - - if height_selector: - ytdl_format = height_selector - else: - import re - - # Preserve numeric index selection and avoid ambiguity with numeric format IDs. - if not re.match(r"^\s*#?\d+\s*$", str(query_format)): - ytdl_format = query_format - playlist_selection_handled = False - - # Playlist/multi-entry detection: if the URL has multiple items and the user didn't - # specify -item or -format, show a normal selectable table and return. - if len(supported_url) == 1 and not playlist_items and not ytdl_format: - candidate_url = supported_url[0] - - # Support numeric index selection via -query "format:" where N is 1-based index - # into the filtered format list (e.g., -query "format:7" selects the 7th listed format). - # This allows non-interactive invocation from shells (PowerShell treats '@' specially). - if query_format and not query_wants_audio: - try: - idx_fmt = self._format_id_for_query_index( - query_format, - candidate_url, - formats_cache, - ytdlp_tool - ) - except ValueError as e: - log(f"Error parsing format selection: {e}", file=sys.stderr) - return 1 - if idx_fmt: - debug( - f"Resolved numeric format selection '{query_format}' -> {idx_fmt}" - ) - ytdl_format = idx_fmt - - if not ytdl_format: - if self._maybe_show_playlist_table(url=candidate_url, - ytdlp_tool=ytdlp_tool): - playlist_selection_handled = True - # Let the user pick items using the normal REPL prompt: - # @* | download-media ... - - # If we printed a format table, give a quick hint for non-interactive selection. - try: - last_table = ( - pipeline_context.get_last_result_table() - if hasattr(pipeline_context, - "get_last_result_table") else None - ) - if hasattr(last_table, - "rows") and getattr(last_table, - "rows", - None): - # Build user-friendly examples using the base command we already constructed - sample_index = 1 - sample_fmt_id = None - try: - sample_row = last_table.rows[0] - sample_fmt_id = ( - sample_row._full_metadata.get("item_selector") - if getattr(sample_row, - "_full_metadata", - None) else None - ) - except Exception: - sample_fmt_id = None - - try: - # Use single quotes inside the outer quotes so PowerShell doesn't interpret the pipe character - sample_pipeline = base_cmd.replace( - f'"{candidate_url}"', - f"'{candidate_url}'" - ) - hint = ( - "To select non-interactively, re-run with an explicit format: " - 'e.g. mm "{pipeline} -format {fmt} | add-file -store " or ' - "mm \"{pipeline} -query 'format:{index}' | add-file -store \"" - ).format( - pipeline=sample_pipeline, - fmt=sample_fmt_id or "", - index=sample_index, - ) - log(hint, file=sys.stderr) - except Exception: - pass - except Exception: - pass - - return 0 - - # Bulk preflight for playlist selections (per-entry URLs): check all URLs once before downloading. - skip_per_url_preflight = False - if len(supported_url) > 1: - if not self._preflight_url_duplicates_bulk( - storage=storage, - hydrus_available=hydrus_available, - final_output_dir=final_output_dir, - urls=list(supported_url), - ): - return 0 - skip_per_url_preflight = True - - # Playlist-level format preflight: if the batch has only one available format, - # discover it once and force it for every item. This avoids per-item failures - # and per-item --list-formats calls (e.g. Bandcamp albums). - - forced_single_format_id: Optional[str] = None - forced_single_format_for_batch = False - if len(supported_url) > 1 and not playlist_items and not ytdl_format: - try: - sample_url = str(supported_url[0]) - fmts = self._list_formats_cached( - sample_url, - playlist_items_value=None, - formats_cache=formats_cache, - ytdlp_tool=ytdlp_tool, - ) - if isinstance(fmts, - list) and len(fmts) == 1 and isinstance(fmts[0], - dict): - only_id = str(fmts[0].get("format_id") or "").strip() - if only_id: - forced_single_format_id = only_id - forced_single_format_for_batch = True - debug( - f"Playlist format preflight: only one format available; using {forced_single_format_id} for all items" - ) - except Exception: - forced_single_format_id = None - forced_single_format_for_batch = False - - early_ret = self._maybe_show_format_table_for_single_url( - mode=mode, - clip_spec=clip_spec, - clip_values=clip_values, - playlist_items=playlist_items, - ytdl_format=ytdl_format, - supported_url=supported_url, - playlist_selection_handled=playlist_selection_handled, - ytdlp_tool=ytdlp_tool, - formats_cache=formats_cache, - storage=storage, - hydrus_available=hydrus_available, - final_output_dir=final_output_dir, - args=args, - ) - if early_ret is not None: - return int(early_ret) - - return self._download_supported_urls( - supported_url=supported_url, - ytdlp_tool=ytdlp_tool, - args=args, - config=config, - final_output_dir=final_output_dir, - mode=mode, - clip_spec=clip_spec, - clip_ranges=clip_ranges, - query_hash_override=query_hash_override, - embed_chapters=embed_chapters, - write_sub=write_sub, - quiet_mode=quiet_mode, - playlist_items=playlist_items, - ytdl_format=ytdl_format, - skip_per_url_preflight=skip_per_url_preflight, - forced_single_format_id=forced_single_format_id, - forced_single_format_for_batch=forced_single_format_for_batch, - formats_cache=formats_cache, - storage=storage, - hydrus_available=hydrus_available, - ) - - except Exception as e: - log(f"Error in download-media: {e}", file=sys.stderr) - return 1 - - def _resolve_output_dir(self, - parsed: Dict[str, - Any], - config: Dict[str, - Any]) -> Optional[Path]: - """Resolve the output directory. - - Rules: - - If user passes `-path`, use that directory (override). - - Otherwise default to a temp directory (config["temp"] if present, else OS temp). - """ - - # Priority 1: explicit output directory override - path_override = parsed.get("path") - if path_override: - try: - candidate = Path(str(path_override)).expanduser() - # If user passed a file path, treat its parent as output dir. - if candidate.suffix: - candidate = candidate.parent - candidate.mkdir(parents=True, exist_ok=True) - debug(f"Using output directory override: {candidate}") - return candidate - except Exception as e: - log(f"Invalid -path output directory: {e}", file=sys.stderr) - return None - - # Priority 2: config-provided temp/output directory - try: - temp_value = (config - or {}).get("temp") if isinstance(config, - dict) else None - except Exception: - temp_value = None - if temp_value: - try: - candidate = Path(str(temp_value)).expanduser() - candidate.mkdir(parents=True, exist_ok=True) - debug(f"Using config temp directory: {candidate}") - return candidate - except Exception as e: - log( - f"Cannot use configured temp directory '{temp_value}': {e}", - file=sys.stderr - ) - return None - - # Priority 3: OS temp fallback - try: - candidate = Path(tempfile.gettempdir()) / "Medios-Macina" - candidate.mkdir(parents=True, exist_ok=True) - debug(f"Using OS temp directory: {candidate}") - return candidate - except Exception as e: - log(f"Cannot create OS temp directory: {e}", file=sys.stderr) - return None - - def _parse_time_ranges(self, spec: str) -> List[tuple[int, int]]: - """Parse clip specs into a list of (start_seconds, end_seconds). - - Supported inputs: - - "MM:SS-MM:SS" - - "HH:MM:SS-HH:MM:SS" - - seconds: "280-300" - - multiple ranges separated by commas: "4:40-5:00,5:15-5:25" - """ - - def _to_seconds(ts: str) -> Optional[int]: - ts = str(ts).strip() - if not ts: - return None - - # Support compact units like 3m4s, 1h22m, 1h2m3s - # (case-insensitive; seconds may be fractional but are truncated to int) - try: - unit_match = re.fullmatch( - r"(?i)\s*(?:(?P\d+)h)?\s*(?:(?P\d+)m)?\s*(?:(?P\d+(?:\.\d+)?)s)?\s*", - ts, - ) - except Exception: - unit_match = None - if (unit_match and unit_match.group(0).strip() - and any(unit_match.group(g) for g in ("h", "m", "s"))): - try: - hours = int(unit_match.group("h") or 0) - minutes = int(unit_match.group("m") or 0) - seconds = float(unit_match.group("s") or 0) - total = (hours * 3600) + (minutes * 60) + seconds - return int(total) - except Exception: - return None - - if ":" in ts: - parts = [p.strip() for p in ts.split(":")] - if len(parts) == 2: - hh_s = "0" - mm_s, ss_s = parts - elif len(parts) == 3: - hh_s, mm_s, ss_s = parts - else: - return None - - try: - hours = int(hh_s) - minutes = int(mm_s) - seconds = float(ss_s) - total = (hours * 3600) + (minutes * 60) + seconds - return int(total) - except Exception: - return None - - try: - return int(float(ts)) - except Exception: - return None - - ranges: List[tuple[int, int]] = [] - if not spec: - return ranges - - for piece in str(spec).split(","): - piece = piece.strip() - if not piece: - continue - if "-" not in piece: - return [] - start_s, end_s = [p.strip() for p in piece.split("-", 1)] - start = _to_seconds(start_s) - end = _to_seconds(end_s) - if start is None or end is None or start >= end: - return [] - ranges.append((start, end)) - - return ranges - - @staticmethod - def _parse_keyed_csv_spec(spec: str, *, default_key: str) -> Dict[str, List[str]]: - """Parse comma-separated values with optional sticky `key:` prefixes. - - Example: - clip:3m4s-3m14s,1h22m-1h33m,item:2-3 - - Rules: - - Items are split on commas. - - If an item begins with `key:` then key becomes active for subsequent items. - - If an item has no `key:` prefix, it belongs to the last active key. - - If no key has been set yet, values belong to default_key. - """ - out: Dict[str, - List[str]] = {} - if not isinstance(spec, str): - spec = str(spec) - text = spec.strip() - if not text: - return out - - active = (default_key or "").strip().lower() or "clip" - key_pattern = re.compile(r"^([A-Za-z_][A-Za-z0-9_-]*)\s*:\s*(.*)$") - - for raw_piece in text.split(","): - piece = raw_piece.strip() - if not piece: - continue - - m = key_pattern.match(piece) - if m: - active = (m.group(1) or "").strip().lower() or active - value = (m.group(2) or "").strip() - if value: - out.setdefault(active, []).append(value) - continue - - out.setdefault(active, []).append(piece) - - return out - - def _build_clip_sections_spec( - self, - clip_ranges: Optional[List[tuple[int, - int]]], - ) -> Optional[str]: - """Convert parsed clip range into downloader spec (seconds).""" - ranges: List[str] = [] - if clip_ranges: - for start_s, end_s in clip_ranges: - ranges.append(f"{start_s}-{end_s}") - return ",".join(ranges) if ranges else None - - def _build_pipe_object(self, - download_result: Any, - url: str, - opts: DownloadOptions) -> Dict[str, - Any]: - """Create a PipeObject-compatible dict from a DownloadMediaResult.""" - info: Dict[str, - Any] = ( - download_result.info if isinstance(download_result.info, - dict) else {} - ) - media_path = Path(download_result.path) - hash_value = download_result.hash_value or self._compute_file_hash(media_path) - title = info.get("title") or media_path.stem - tag = list(download_result.tag or []) - - # Add title tag for searchability - if title and f"title:{title}" not in tag: - tag.insert(0, f"title:{title}") - - # Store the canonical URL for de-dup/search purposes. - # Prefer yt-dlp's webpage_url, and do not mix in the raw requested URL (which may contain timestamps). - final_url = None - try: - page_url = info.get("webpage_url") or info.get("original_url" - ) or info.get("url") - if page_url: - final_url = str(page_url) - except Exception: - final_url = None - if not final_url and url: - final_url = str(url) - - # Construct canonical PipeObject dict: hash, store, path, url, title, tags - # Prefer explicit backend names (storage_name/storage_location). If none, default to PATH - # which indicates the file is available at a filesystem path and hasn't been added to a backend yet. - return { - "path": - str(media_path), - "hash": - hash_value, - "title": - title, - "url": - final_url, - "tag": - tag, - "action": - "cmdlet:download-media", - "is_temp": - True, - "ytdl_format": - getattr(opts, - "ytdl_format", - None), - # download_mode removed (deprecated), keep media_kind - "store": - getattr(opts, - "storage_name", - None) or getattr(opts, - "storage_location", - None) or "PATH", - "media_kind": - "video" if opts.mode == "video" else "audio", - } - - @staticmethod - def _normalise_hash_hex(value: Optional[str]) -> Optional[str]: - if not value or not isinstance(value, str): - return None - candidate = value.strip().lower() - if len(candidate) == 64 and all(c in "0123456789abcdef" for c in candidate): - return candidate - return None - - @classmethod - def _extract_hash_from_search_hit(cls, hit: Any) -> Optional[str]: - if not isinstance(hit, dict): - return None - for key in ("hash", "hash_hex", "file_hash", "hydrus_hash"): - v = hit.get(key) - normalized = cls._normalise_hash_hex(str(v) if v is not None else None) - if normalized: - return normalized - return None - - @classmethod - def _find_existing_hash_for_url( - cls, - storage: Any, - canonical_url: str, - *, - hydrus_available: bool, - ) -> Optional[str]: - """Best-effort lookup of an existing stored item hash by url:. - - Used to make the stored source video the king for multi-clip relationships. - """ - if storage is None or not canonical_url: - return None - try: - from Store.HydrusNetwork import HydrusNetwork - except Exception: - HydrusNetwork = None # type: ignore - - try: - backend_names = list(storage.list_searchable_backends() or []) - except Exception: - backend_names = [] - - for backend_name in backend_names: - try: - backend = storage[backend_name] - except Exception: - continue - try: - if str(backend_name).strip().lower() == "temp": - continue - except Exception: - pass - try: - if (HydrusNetwork is not None and isinstance(backend, - HydrusNetwork) - and not hydrus_available): - continue - except Exception: - pass - - try: - hits = backend.search(f"url:{canonical_url}", limit=5) or [] - except Exception: - hits = [] - for hit in hits: - extracted = cls._extract_hash_from_search_hit(hit) - if extracted: - return extracted - - return None - - @staticmethod - def _format_timecode(seconds: int, *, force_hours: bool) -> str: - total = max(0, int(seconds)) - minutes, secs = divmod(total, 60) - hours, minutes = divmod(minutes, 60) - if force_hours: - return f"{hours:02d}:{minutes:02d}:{secs:02d}" - return f"{minutes:02d}:{secs:02d}" - - @classmethod - def _format_clip_range(cls, start_s: int, end_s: int) -> str: - force_hours = bool(start_s >= 3600 or end_s >= 3600) - return f"{cls._format_timecode(start_s, force_hours=force_hours)}-{cls._format_timecode(end_s, force_hours=force_hours)}" - - @classmethod - def _apply_clip_decorations( - cls, - pipe_objects: List[Dict[str, - Any]], - clip_ranges: List[tuple[int, - int]], - *, - source_king_hash: Optional[str], - ) -> None: - """Apply clip:{range} tags/titles and relationship metadata for multi-clip downloads. - - - Sets the clip title (and title: tag) to exactly `clip:{range}`. - - Adds `clip:{range}` tag. - - Sets `relationships` on each emitted item (king hash first, then alt hashes) - so downstream can persist relationships into a DB/API without storing relationship tags. - """ - if not pipe_objects or len(pipe_objects) != len(clip_ranges): - return - - # Always apply clip titles/tags (even for a single clip). - for po, (start_s, end_s) in zip(pipe_objects, clip_ranges): - clip_range = cls._format_clip_range(start_s, end_s) - clip_tag = f"clip:{clip_range}" - - # Title: make it generic/consistent for clips. - po["title"] = clip_tag - - tags = po.get("tag") - if not isinstance(tags, list): - tags = [] - - # Replace any existing title: tags with title: - tags = [t for t in tags if not str(t).strip().lower().startswith("title:")] - - # Relationships must not be stored as tags. - tags = [ - t for t in tags - if not str(t).strip().lower().startswith("relationship:") - ] - tags.insert(0, f"title:{clip_tag}") - - # Ensure clip tag exists - if clip_tag not in tags: - tags.append(clip_tag) - - po["tag"] = tags - - # Relationship tagging only makes sense when multiple clips exist. - if len(pipe_objects) < 2: - return - - hashes: List[str] = [] - for po in pipe_objects: - h = cls._normalise_hash_hex(str(po.get("hash") or "")) - hashes.append(h or "") - - # Determine king: prefer an existing source video hash if present; else first clip becomes king. - king_hash = cls._normalise_hash_hex( - source_king_hash - ) if source_king_hash else None - if not king_hash: - king_hash = hashes[0] if hashes and hashes[0] else None - if not king_hash: - return - - alt_hashes: List[str] = [h for h in hashes if h and h != king_hash] - if not alt_hashes: - return - - # Carry relationship metadata through the pipeline without using tags. - rel_payload = { - "king": [king_hash], - "alt": list(alt_hashes) - } - for po in pipe_objects: - po["relationships"] = { - "king": [king_hash], - "alt": list(alt_hashes) - } - - def _compute_file_hash(self, filepath: Path) -> str: - """Compute SHA256 hash of a file.""" - import hashlib - - sha256_hash = hashlib.sha256() - with open(filepath, "rb") as f: - for byte_block in iter(lambda: f.read(4096), b""): - sha256_hash.update(byte_block) - return sha256_hash.hexdigest() - - -# Module-level singleton registration -CMDLET = Download_Media() diff --git a/cmdlet/download_torrent.py b/cmdlet/download_torrent.py deleted file mode 100644 index 14937f9..0000000 --- a/cmdlet/download_torrent.py +++ /dev/null @@ -1,280 +0,0 @@ -"""Download torrent/magnet links via AllDebrid in a dedicated cmdlet. - -Features: -- Accepts magnet links and .torrent files/url -- Uses AllDebrid API for background downloads -- Progress tracking and worker management -- Self-registering class-based cmdlet -""" - -from __future__ import annotations -import sys -import uuid -import threading -from pathlib import Path -from typing import Any, Dict, Optional, Sequence - -from SYS.logger import log -from . import _shared as sh - - -class Download_Torrent(sh.Cmdlet): - """Class-based download-torrent cmdlet with self-registration.""" - - def __init__(self) -> None: - super().__init__( - name="download-torrent", - summary="Download torrent/magnet links via AllDebrid", - usage="download-torrent [options]", - alias=["torrent", - "magnet"], - arg=[ - sh.CmdletArg( - name="magnet", - type="string", - required=False, - description="Magnet link or .torrent file/URL", - variadic=True, - ), - sh.CmdletArg( - name="output", - type="string", - description="Output directory for downloaded files", - ), - sh.CmdletArg( - name="wait", - type="float", - description="Wait time (seconds) for magnet processing timeout", - ), - sh.CmdletArg( - name="background", - type="flag", - alias="bg", - description="Start download in background", - ), - ], - detail=["Download torrents/magnets via AllDebrid API."], - exec=self.run, - ) - self.register() - - def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - parsed = sh.parse_cmdlet_args(args, self) - magnet_args = parsed.get("magnet", []) - output_dir = Path(parsed.get("output") or Path.home() / "Downloads") - wait_timeout = int(float(parsed.get("wait", 600))) - background_mode = parsed.get("background", False) - api_key = None - try: - from Provider.alldebrid import _get_debrid_api_key # type: ignore - - api_key = _get_debrid_api_key(config) - except Exception: - api_key = None - if not api_key: - log( - "AllDebrid API key not configured (check config.conf [provider=alldebrid] api_key=...)", - file=sys.stderr, - ) - return 1 - for magnet_url in magnet_args: - if background_mode: - self._start_background_worker( - magnet_url, - output_dir, - config, - api_key, - wait_timeout - ) - log(f"⧗ Torrent download queued in background: {magnet_url}") - else: - # Foreground mode: submit quickly, then continue processing in background - # so we return control to the REPL immediately. - worker_id = str(uuid.uuid4()) - magnet_id = self._submit_magnet(worker_id, magnet_url, api_key) - if magnet_id <= 0: - continue - self._start_background_magnet_worker( - worker_id, - magnet_id, - output_dir, - api_key, - wait_timeout - ) - log(f"⧗ Torrent processing started (ID: {magnet_id})") - return 0 - - @staticmethod - def _submit_magnet(worker_id: str, magnet_url: str, api_key: str) -> int: - """Submit a magnet and return its AllDebrid magnet ID. - - This is intentionally fast so the caller can return to the REPL. - """ - try: - from API.alldebrid import AllDebridClient - - client = AllDebridClient(api_key) - log(f"[Worker {worker_id}] Submitting magnet to AllDebrid...") - magnet_info = client.magnet_add(magnet_url) - magnet_id = int(magnet_info.get("id", 0)) - if magnet_id <= 0: - log(f"[Worker {worker_id}] Magnet add failed", file=sys.stderr) - return 0 - log(f"[Worker {worker_id}] ✓ Magnet added (ID: {magnet_id})") - return magnet_id - except Exception as e: - log(f"[Worker {worker_id}] Magnet submit failed: {e}", file=sys.stderr) - return 0 - - def _start_background_magnet_worker( - self, - worker_id: str, - magnet_id: int, - output_dir: Path, - api_key: str, - wait_timeout: int - ) -> None: - thread = threading.Thread( - target=self._download_magnet_worker, - args=(worker_id, - magnet_id, - output_dir, - api_key, - wait_timeout), - daemon=True, - name=f"TorrentWorker_{worker_id}", - ) - thread.start() - - @staticmethod - def _download_magnet_worker( - worker_id: str, - magnet_id: int, - output_dir: Path, - api_key: str, - wait_timeout: int = 600, - ) -> None: - """Poll AllDebrid magnet status until ready, then download the files.""" - try: - from API.alldebrid import AllDebridClient - - client = AllDebridClient(api_key) - - # Poll for ready status (simplified) - import time - - elapsed = 0 - while elapsed < wait_timeout: - status = client.magnet_status(magnet_id) - if status.get("ready"): - break - time.sleep(5) - elapsed += 5 - if elapsed >= wait_timeout: - log(f"[Worker {worker_id}] Timeout waiting for magnet", file=sys.stderr) - return - - files_result = client.magnet_links([magnet_id]) - magnet_files = files_result.get(str(magnet_id), - {}) - files_array = magnet_files.get("files", []) - if not files_array: - log(f"[Worker {worker_id}] No files found", file=sys.stderr) - return - for file_info in files_array: - file_url = file_info.get("link") - file_name = file_info.get("name") - if file_url and file_name: - Download_Torrent._download_file(file_url, output_dir / file_name) - log(f"[Worker {worker_id}] ✓ Downloaded {file_name}") - except Exception as e: - log(f"[Worker {worker_id}] Torrent download failed: {e}", file=sys.stderr) - - @staticmethod - def _download_torrent_worker( - worker_id: str, - magnet_url: str, - output_dir: Path, - config: Dict[str, - Any], - api_key: str, - wait_timeout: int = 600, - worker_manager: Optional[Any] = None, - ) -> None: - try: - from API.alldebrid import AllDebridClient - - client = AllDebridClient(api_key) - log(f"[Worker {worker_id}] Submitting magnet to AllDebrid...") - magnet_info = client.magnet_add(magnet_url) - magnet_id = int(magnet_info.get("id", 0)) - if magnet_id <= 0: - log(f"[Worker {worker_id}] Magnet add failed", file=sys.stderr) - return - log(f"[Worker {worker_id}] ✓ Magnet added (ID: {magnet_id})") - # Poll for ready status (simplified) - import time - - elapsed = 0 - while elapsed < wait_timeout: - status = client.magnet_status(magnet_id) - if status.get("ready"): - break - time.sleep(5) - elapsed += 5 - if elapsed >= wait_timeout: - log(f"[Worker {worker_id}] Timeout waiting for magnet", file=sys.stderr) - return - files_result = client.magnet_links([magnet_id]) - magnet_files = files_result.get(str(magnet_id), - {}) - files_array = magnet_files.get("files", []) - if not files_array: - log(f"[Worker {worker_id}] No files found", file=sys.stderr) - return - for file_info in files_array: - file_url = file_info.get("link") - file_name = file_info.get("name") - if file_url: - Download_Torrent._download_file(file_url, output_dir / file_name) - log(f"[Worker {worker_id}] ✓ Downloaded {file_name}") - except Exception as e: - log(f"[Worker {worker_id}] Torrent download failed: {e}", file=sys.stderr) - - @staticmethod - def _download_file(url: str, dest: Path) -> None: - try: - import requests - - resp = requests.get(url, stream=True) - with open(dest, "wb") as f: - for chunk in resp.iter_content(chunk_size=8192): - if chunk: - f.write(chunk) - except Exception as e: - log(f"File download failed: {e}", file=sys.stderr) - - def _start_background_worker( - self, - magnet_url, - output_dir, - config, - api_key, - wait_timeout - ): - worker_id = f"torrent_{uuid.uuid4().hex[:6]}" - thread = threading.Thread( - target=self._download_torrent_worker, - args=(worker_id, - magnet_url, - output_dir, - config, - api_key, - wait_timeout), - daemon=True, - name=f"TorrentWorker_{worker_id}", - ) - thread.start() - - -CMDLET = Download_Torrent() diff --git a/cmdlet/merge_file.py b/cmdlet/merge_file.py index cc94c67..474e2c9 100644 --- a/cmdlet/merge_file.py +++ b/cmdlet/merge_file.py @@ -155,8 +155,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: mode_hint: Optional[str] = None forced_format: Optional[str] = None try: - from cmdlet.download_media import list_formats - from tool.ytdlp import YtDlpTool + from tool.ytdlp import YtDlpTool, list_formats sample_url = urls_to_download[0] cookiefile = None diff --git a/cmdlet/search_file.py b/cmdlet/search_file.py index c84273d..2497d8b 100644 --- a/cmdlet/search_file.py +++ b/cmdlet/search_file.py @@ -209,6 +209,18 @@ class search_file(Cmdlet): provider_text = str(provider_name or "").strip() provider_lower = provider_text.lower() + id_match = re.search(r"\bid\s*[=:]\s*(\d+)", query, flags=re.IGNORECASE) + parsed_open_id = open_id + if id_match and parsed_open_id is None: + try: + parsed_open_id = int(id_match.group(1)) + except Exception: + parsed_open_id = None + query = re.sub(r"\bid\s*[=:]\s*\d+", "", query, flags=re.IGNORECASE).strip() + if not query: + query = "*" + + effective_open_id = parsed_open_id if parsed_open_id is not None else open_id if provider_lower == "youtube": provider_label = "Youtube" elif provider_lower == "openlibrary": @@ -218,22 +230,32 @@ class search_file(Cmdlet): else: provider_label = provider_text[:1].upper() + provider_text[1:] if provider_text else "Provider" - if provider_lower == "alldebrid" and open_id is not None: - table_title = f"{provider_label} Files: {open_id}".strip().rstrip(":") + if provider_lower == "alldebrid" and effective_open_id is not None: + table_title = f"{provider_label} Files: {effective_open_id}".strip().rstrip(":") else: table_title = f"{provider_label}: {query}".strip().rstrip(":") preserve_order = provider_lower in {"youtube", "openlibrary", "loc"} table = ResultTable(table_title).set_preserve_order(preserve_order) table.set_table(provider_name) + table_meta: Dict[str, Any] = {"provider": provider_name} + if provider_lower == "alldebrid": + table_meta["view"] = "files" if effective_open_id is not None else "folders" + if effective_open_id is not None: + table_meta["magnet_id"] = effective_open_id + try: + table.set_table_metadata(table_meta) + except Exception: + pass table.set_source_command("search-file", list(args_list)) debug(f"[search-file] Calling {provider_name}.search()") if provider_lower == "alldebrid": - if open_id is not None: - results = provider.search(query, limit=limit, filters={"view": "files", "magnet_id": open_id}) - else: - results = provider.search(query, limit=limit, filters={"view": "folders"}) + filters = {"view": "folders"} + search_open_id = parsed_open_id if parsed_open_id is not None else open_id + if search_open_id is not None: + filters = {"view": "files", "magnet_id": search_open_id} + results = provider.search(query, limit=limit, filters=filters) else: results = provider.search(query, limit=limit) debug(f"[search-file] {provider_name} -> {len(results or [])} result(s)") @@ -260,17 +282,6 @@ class search_file(Cmdlet): row_index = len(table.rows) table.add_result(search_result) - try: - if provider_lower == "alldebrid" and getattr(search_result, "media_kind", "") == "folder": - magnet_id = None - meta = getattr(search_result, "full_metadata", None) - if isinstance(meta, dict): - magnet_id = meta.get("magnet_id") - if magnet_id is not None: - table.set_row_selection_args(row_index, ["-open", str(magnet_id), "-query", "*"]) - except Exception: - pass - results_list.append(item_dict) ctx.emit(item_dict) diff --git a/cmdnat/worker.py b/cmdnat/worker.py index 2b218b6..216c0b5 100644 --- a/cmdnat/worker.py +++ b/cmdnat/worker.py @@ -2,6 +2,7 @@ from __future__ import annotations +import re import sys from dataclasses import dataclass from datetime import datetime, timezone @@ -199,27 +200,40 @@ def _render_worker_list(db, status_filter: str | None, limit: int) -> int: date_str = _extract_date(started) start_time = _format_event_timestamp(started) end_time = _format_event_timestamp(ended) + worker_id = str(worker.get("worker_id") or worker.get("id") or "unknown") + status = str(worker.get("status") or "unknown") + result_state = str(worker.get("result") or "") + status_label = status + if result_state and result_state.lower() not in {"", status.lower()}: + status_label = f"{status_label} ({result_state})" + pipe_display = _summarize_pipe(worker.get("pipe")) + error_message = _normalize_text(worker.get("error_message")) + description = _normalize_text(worker.get("description")) + + columns = [ + ("ID", worker_id[:8]), + ("Status", status_label), + ("Pipe", pipe_display), + ("Date", date_str), + ("Start", start_time), + ("End", end_time), + ] + if error_message: + columns.append(("Error", error_message[:140])) + if description and description != error_message: + columns.append(("Details", description[:200])) item = { - "columns": [ - ("Status", - worker.get("status", - "")), - ("Pipe", - _summarize_pipe(worker.get("pipe"))), - ("Date", - date_str), - ("Start Time", - start_time), - ("End Time", - end_time), - ], - "__worker_metadata": - worker, - "_selection_args": ["-id", - worker.get("worker_id")], + "columns": columns, + "__worker_metadata": worker, + "_selection_args": ["-id", worker.get("worker_id")], } ctx.emit(item) + log( + f"Worker {worker_id[:8]} status={status_label} pipe={pipe_display} " + f"error={error_message or 'none'}", + file=sys.stderr, + ) return 0 @@ -311,11 +325,25 @@ def _emit_worker_detail(worker: Dict[str, Any], events: List[Dict[str, Any]]) -> # Events are already always derived from stdout for now. -def _summarize_pipe(pipe_value: Any, limit: int = 60) -> str: - text = str(pipe_value or "").strip() +def _summarize_pipe(pipe_value: Any, limit: int = 200) -> str: + text = _normalize_text(pipe_value) if not text: return "(none)" - return text if len(text) <= limit else text[:limit - 3] + "..." + + stage_count = text.count("|") + 1 if text else 0 + display = text + if len(display) > limit: + trimmed = display[:max(limit - 3, 0)].rstrip() + if not trimmed: + trimmed = display[:limit] + display = f"{trimmed}..." + if stage_count > 1: + suffix = f" ({stage_count} stages)" + if not display.endswith("..."): + display = f"{display}{suffix}" + else: + display = f"{display}{suffix}" + return display def _format_event_timestamp(raw_timestamp: Any) -> str: @@ -378,3 +406,24 @@ def _extract_date(raw_timestamp: Any) -> str: except Exception: pass return date_part + + +def _normalize_text(value: Any) -> str: + text = str(value or "").strip() + if not text: + return "" + # collapse whitespace to keep table columns aligned + normalized = re.sub(r"\s+", " ", text) + return normalized + + +def _truncate_text(value: str, limit: int) -> str: + if limit <= 0: + return "" + if len(value) <= limit: + return value + cutoff = max(limit - 3, 0) + trimmed = value[:cutoff].rstrip() + if not trimmed: + return value[:limit] + return f"{trimmed}..." diff --git a/docs/img/download-media-bandcamp.svg b/docs/img/download-media-bandcamp.svg index e63dace..d58b145 100644 --- a/docs/img/download-media-bandcamp.svg +++ b/docs/img/download-media-bandcamp.svg @@ -121,7 +121,7 @@ - ╭───────────────────────────────────────────── download-media -url "https://altrusiangrace.bandcamp.com/album/zetetic-astronomy-earth-not-a-globe-full-audiobook" ─────────────────────────────────────────────╮ + ╭───────────────────────────────────────────── download-file -url "https://altrusiangrace.bandcamp.com/album/zetetic-astronomy-earth-not-a-globe-full-audiobook" ─────────────────────────────────────────────╮                #TITLE                                                                                      DURATION                                  UPLOADER                                    ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────  diff --git a/docs/result_table.md b/docs/result_table.md new file mode 100644 index 0000000..46c8ef6 --- /dev/null +++ b/docs/result_table.md @@ -0,0 +1,226 @@ +# ResultTable system — Overview & usage + +This document explains the `ResultTable` system used across the CLI and TUI: how tables are built, how providers integrate with them, and how `@N` selection/expansion and provider selectors work. + +## TL;DR ✅ +- `ResultTable` is the unified object used to render tabular results and drive selection (`@N`) behavior. +- Providers should return `SearchResult` objects (or dicts) and can either supply `selection_args` per row or implement a `selector()` method to handle `@N` selections. +- Table metadata (`set_table_metadata`) helps providers attach context (e.g., `provider_view`, `magnet_id`) that selectors can use. + +--- + +## Key concepts + +- **ResultTable** (`SYS/result_table.py`) + - Renders rows as a rich table and stores metadata used for selection expansion. + - Important APIs: `add_result()`, `set_table()`, `set_source_command()`, `set_row_selection_args()`, `set_table_metadata()`, and `select_interactive()`. + +- **ResultRow** + - Holds columns plus `selection_args` (used for `@N` expansion) and `payload` (original object). + +- **Provider selector** + - If a provider implements `selector(selected_items, ctx=..., stage_is_last=True)`, it is run first when `@N` is used; if the selector returns `True` it has handled the selection (e.g., drilling into a folder and publishing a new ResultTable). + +- **Pipeline / CLI expansion** + - When the user types `@N`, CLI tries provider selectors first. If none handle it, CLI re-runs `source_command + source_args + row_selection_args` (for single-selection) or pipes items downstream for multi-selection. + +- **Table metadata** + - `ResultTable.set_table_metadata(dict)` allows attaching provider-specific context (for example: `{"provider":"alldebrid","view":"files","magnet_id":123}`) for selectors and other code to use. + +--- + +## How to build a table (provider pattern) + +Typical provider flow (pseudocode): + +```py +from SYS.result_table import ResultTable + +table = ResultTable("Provider: X result").set_preserve_order(True) +table.set_table("provider_name") +table.set_table_metadata({"provider":"provider_name","view":"folders"}) +table.set_source_command("search-file", ["-provider","provider_name","query"]) + +for r in results: + table.add_result(r) # r can be a SearchResult, dict, or PipeObject + +ctx.set_last_result_table(table, payloads) +ctx.set_current_stage_table(table) +``` + +Notes: +- To drive a direct `@N` re-run, call `table.set_row_selection_args(row_index, ["-open", ""])`. +- For more advanced or interactive behavior (e.g., drill-into, fetch more rows), implement `provider.selector()` and return `True` when handled. + +--- + +## Selection (@N) flow (brief) + +1. User enters `@N` in the CLI. +2. CLI chooses the appropriate table (overlay > last table > history) and gathers the selected payload(s). +3. `PipelineExecutor._maybe_run_class_selector()` runs provider `selector()` hooks for the provider inferred from table or payloads. If any selector returns `True`, expansion stops. +4. Otherwise, for single selections, CLI grabs `row.selection_args` and expands: `source_command + source_args + row_selection_args` and inserts it as the expanded stage. For multi-selections, items are piped downstream. + +--- + +## Columns & display + +- Providers can pass a `columns` list ([(name, value), ...]) in the result dict/SearchResult to control which columns are shown and their order. +- Otherwise, `ResultTable` uses a priority list (title/store/size/ext) and sensible defaults. +- The table rendering functions (`to_rich`, `format_json`, `format_compact`) are available for different UIs. + +--- + +## Provider-specific examples + +### AllDebrid (debrid file hosting) + +AllDebrid exposes a list of magnets (folder rows) and the files inside each magnet. The provider returns `folder` SearchResults for magnets and `file` SearchResults for individual files. The provider includes a `selector()` that drills into a magnet by calling `search(..., filters={"view":"files","magnet_id":...})` and builds a new `ResultTable` of files. + +Example commands: + +``` +# List magnets in your account +search-file -provider alldebrid "*" + +# Open magnet id 123 and list its files +search-file -provider alldebrid -open 123 "*" + +# Or expand via @ selection (selector handles drilling): +search-file -provider alldebrid "*" +@3 # selector will open the magnet referenced by row #3 and show the file table +``` + +Illustrative folder (magnet) SearchResult: + +```py +SearchResult( + table="alldebrid", + title="My Magnet Title", + path="alldebrid:magnet:123", + detail="OK", + annotations=["folder", "ready"], + media_kind="folder", + columns=[("Folder", "My Magnet Title"), ("ID", "123"), ("Status", "ready"), ("Ready", "yes")], + full_metadata={ + "magnet": {...}, + "magnet_id": 123, + "provider": "alldebrid", + "provider_view": "folders", + "magnet_name": "My Magnet Title", + }, +) +``` + +Illustrative file SearchResult (after drilling): + +```py +SearchResult( + table="alldebrid", + title="Episode 01.mkv", + path="https://.../unlocked_direct_url", + detail="My Magnet Title", + annotations=["file"], + media_kind="file", + size_bytes=123456789, + columns=[("File", "Episode 01.mkv"), ("Folder", "My Magnet Title"), ("ID", "123")], + full_metadata={ + "magnet": {...}, + "magnet_id": 123, + "magnet_name": "My Magnet Title", + "relpath": "Season 1/E01.mkv", + "provider": "alldebrid", + "provider_view": "files", + "file": {...}, + }, +) +``` + +Selection & download flows + +- Drill-in (selector): `@3` on a magnet row runs the provider's `selector()` to build a new file table and show it. The selector uses `search(..., filters={"view":"files","magnet_id":...})` to fetch file rows. + +- `download-file` integration: With a file row (http(s) path), `@2 | download-file` will download the file. The `download-file` cmdlet expands AllDebrid magnet folders and will call the provider layer to fetch file bytes as appropriate. + +- `add-file` convenience: Piping a file row into `add-file -path ` will trigger add-file's provider-aware logic. If the piped item has `table == 'alldebrid'` and a http(s) `path`, `add-file` will call `provider.download()` into a temporary directory and then ingest the downloaded file, cleaning up the temp when done. Example: + +``` +# Expand magnet and add first file to local directory +search-file -provider alldebrid "*" +@3 # view files +@1 | add-file -path C:\mydir +``` + +Notes & troubleshooting + +- Configure an AllDebrid API key (see `Provider/alldebrid._get_debrid_api_key()`). +- If a magnet isn't ready the selector or `download-file` will log the magnet status and avoid attempting file downloads. + +--- + +### Bandcamp (artist → discography drill-in) + +Bandcamp search supports `artist:` queries. Bandcamp's provider implements a `selector()` that detects `artist` results and scrapes the artist's page using Playwright to build a discography `ResultTable`. + +Example usage: + +``` +# Search for an artist +search-file -provider bandcamp "artist:radiohead" + +# Select an artist row to expand into releases +@1 +``` + +Bandcamp SearchResult (artist / album rows): + +```py +SearchResult( + table="bandcamp", + title="Album Title", + path="https://bandcamp.com/album_url", + detail="By: Artist", + annotations=["album"], + media_kind="audio", + columns=[("Title","Album Title"), ("Location","Artist"), ("Type","album"), ("Url","https://...")], + full_metadata={"artist":"Artist","type":"album","url":"https://..."} +) +``` + +Notes: +- Playwright is required for Bandcamp scraping. The selector will log an informative message if Playwright is missing. +- Provider selectors are ideal when you need to replace one table with another (artist → discography). + +--- + +## Provider author checklist (short) + +- Implement `search(query, limit, filters)` and return `SearchResult` objects or dicts; include useful `full_metadata` (IDs, view names) for selection/drilling. +- If you support fetching downloadable file bytes, implement `download(result, output_dir) -> Optional[Path]`. +- For drill-in or interactive transforms, implement `selector(selected_items, ctx=..., stage_is_last=True)` and call `ctx.set_last_result_table(...)` / `ctx.set_current_stage_table(...)`; return `True` when handled. +- Add tests (unit/integration) that exercise search → select → download flows. + +--- + +## Debugging tips +- Use `ctx.set_last_result_table(table, payloads)` to immediately show a table while developing a selector. +- Add `log(...)` messages in provider code to capture fail points. +- Check `full_metadata` attached to SearchResults to pass extra context (IDs, view names, provider names). + +--- + +## Quick reference +- ResultTable location: `SYS/result_table.py` +- Pipeline helpers: `SYS/pipeline.py` (`set_last_result_table`, `set_current_stage_table`, `get_current_stage_table_row_selection_args`) +- CLI expansion: `CLI.py` (handles `@N`, provider selectors, and insertion of expanded stages) +- Provider selector pattern: Implement `.selector(selected_items, ctx=..., stage_is_last=True)` in provider class. + +--- + +If you'd like, I can also: +- Add provider-specific examples (AllDebrid, Bandcamp) into this doc ✅ +- Add a short checklist for PR reviewers when adding new providers + +--- + +Created by GitHub Copilot (Raptor mini - Preview) — brief guide to the ResultTable system. Feedback welcome! diff --git a/readme.md b/readme.md index 0ec0278..a22efcb 100644 --- a/readme.md +++ b/readme.md @@ -1,7 +1,5 @@ # Medeia-Macina -This repository now keeps its detailed guide inside `scripts/README.md` to keep the root folder lean. See that file for installation, usage, and contributor information.# Medios-Macina - Medios-Macina is a CLI media manager and toolkit focused on downloading, tagging, and media storage (audio, video, images, and text) from a variety of providers and sources. It is designed around a compact, pipeable command language ("cmdlets") so complex workflows can be composed simply and repeatably. ## Features @@ -22,6 +20,7 @@ GIT CLONE https://code.glowers.club/goyimnose/Medios-Macina 2. rename config.conf.remove to config.conf, [config tutorial](https://code.glowers.club/goyimnose/Medios-Macina/wiki/Config.conf) +### MINIMAL EXAMPLE CONFIG - CHANGE VALUES ```Minimal config temp="C:\\Users\\Admin\\Downloads" diff --git a/scripts/cli_entry.py b/scripts/cli_entry.py index 04ae572..bd9c160 100644 Binary files a/scripts/cli_entry.py and b/scripts/cli_entry.py differ diff --git a/tool/ytdlp.py b/tool/ytdlp.py index f751834..7e691cd 100644 --- a/tool/ytdlp.py +++ b/tool/ytdlp.py @@ -1,14 +1,45 @@ from __future__ import annotations +import hashlib +import json import os - +import random +import re +import string +import subprocess +import sys +import threading +import time +import traceback +from contextlib import AbstractContextManager, nullcontext from dataclasses import dataclass from pathlib import Path -from typing import Any, Dict, List, Optional, Sequence +from typing import Any, Dict, Iterator, List, Optional, Sequence, cast +from urllib.parse import urlparse -from SYS.logger import debug -from SYS.utils import ensure_directory -from SYS.models import DownloadOptions +from SYS import pipeline as pipeline_context +from SYS.logger import debug, log +from SYS.models import ( + DebugLogger, + DownloadError, + DownloadMediaResult, + DownloadOptions, + ProgressBar, +) +from SYS.pipeline_progress import PipelineProgress +from SYS.utils import ensure_directory, sha256_file + +try: + import yt_dlp # type: ignore + from yt_dlp.extractor import gen_extractors # type: ignore +except Exception as exc: # pragma: no cover - handled at runtime + yt_dlp = None # type: ignore + gen_extractors = None # type: ignore + YTDLP_IMPORT_ERROR = exc +else: + YTDLP_IMPORT_ERROR = None + +_EXTRACTOR_CACHE: List[Any] | None = None def _get_nested(config: Dict[str, Any], *path: str) -> Any: @@ -41,6 +72,199 @@ def _parse_csv_list(value: Any) -> Optional[List[str]]: return parts or None +def ensure_yt_dlp_ready() -> None: + """Verify yt-dlp is importable, raising DownloadError if missing.""" + + if yt_dlp is not None: + return + + detail = str(YTDLP_IMPORT_ERROR or "yt-dlp is not installed") + raise DownloadError(f"yt-dlp module not available: {detail}") + + +def _get_extractors() -> List[Any]: + global _EXTRACTOR_CACHE + + if _EXTRACTOR_CACHE is not None: + return _EXTRACTOR_CACHE + + ensure_yt_dlp_ready() + + if gen_extractors is None: + _EXTRACTOR_CACHE = [] + return _EXTRACTOR_CACHE + + try: + _EXTRACTOR_CACHE = [ie for ie in gen_extractors()] + except Exception: + _EXTRACTOR_CACHE = [] + + return _EXTRACTOR_CACHE + + +def is_url_supported_by_ytdlp(url: str) -> bool: + """Return True if yt-dlp has a non-generic extractor for the URL.""" + + if not url or not isinstance(url, str): + return False + + if YTDLP_IMPORT_ERROR is not None: + return False + + try: + parsed = urlparse(url) + if not parsed.scheme or not parsed.netloc: + return False + except Exception: + return False + + try: + for extractor in _get_extractors(): + try: + if not extractor.suitable(url): + continue + except Exception: + continue + + name = getattr(extractor, "IE_NAME", "").lower() + if name == "generic": + continue + return True + except Exception: + return False + + return False + + +def list_formats( + url: str, + *, + no_playlist: bool = False, + playlist_items: Optional[str] = None, + cookiefile: Optional[str] = None, +) -> Optional[List[Dict[str, Any]]]: + """Get available formats for a URL. + + Returns a list of format dicts or None if unsupported or probing fails. + """ + + if not is_url_supported_by_ytdlp(url): + return None + + ensure_yt_dlp_ready() + assert yt_dlp is not None + + ydl_opts: Dict[str, Any] = { + "quiet": True, + "no_warnings": True, + "skip_download": True, + "noprogress": True, + } + + if cookiefile: + ydl_opts["cookiefile"] = str(cookiefile) + if no_playlist: + ydl_opts["noplaylist"] = True + if playlist_items: + ydl_opts["playlist_items"] = str(playlist_items) + + try: + with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type] + info = ydl.extract_info(url, download=False) + except Exception as exc: + debug(f"yt-dlp format probe failed for {url}: {exc}") + return None + + if not isinstance(info, dict): + return None + + formats = info.get("formats") + if not isinstance(formats, list): + return None + + out: List[Dict[str, Any]] = [] + for fmt in formats: + if isinstance(fmt, dict): + out.append(fmt) + + return out + + +def probe_url( + url: str, + no_playlist: bool = False, + timeout_seconds: int = 15, + *, + cookiefile: Optional[str] = None, +) -> Optional[Dict[str, Any]]: + """Probe URL metadata without downloading. + + Returns None if unsupported, errors, or times out. + """ + + if not is_url_supported_by_ytdlp(url): + return None + + result_container: List[Optional[Any]] = [None, None] # [result, error] + + def _do_probe() -> None: + try: + ensure_yt_dlp_ready() + + assert yt_dlp is not None + ydl_opts: Dict[str, Any] = { + "quiet": True, + "no_warnings": True, + "socket_timeout": 10, + "retries": 2, + "skip_download": True, + "extract_flat": "in_playlist", + "noprogress": True, + } + + if cookiefile: + ydl_opts["cookiefile"] = str(cookiefile) + if no_playlist: + ydl_opts["noplaylist"] = True + + with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type] + info = ydl.extract_info(url, download=False) + + if not isinstance(info, dict): + result_container[0] = None + return + + webpage_url = info.get("webpage_url") or info.get("original_url") or info.get("url") + + result_container[0] = { + "extractor": info.get("extractor", ""), + "title": info.get("title", ""), + "entries": info.get("entries", []), + "duration": info.get("duration"), + "uploader": info.get("uploader"), + "description": info.get("description"), + "requested_url": url, + "webpage_url": webpage_url, + "url": webpage_url or url, + } + except Exception as exc: + debug(f"Probe error for {url}: {exc}") + result_container[1] = exc + + thread = threading.Thread(target=_do_probe, daemon=False) + thread.start() + thread.join(timeout=timeout_seconds) + + if thread.is_alive(): + debug(f"Probe timeout for {url} (>={timeout_seconds}s), proceeding without probe") + return None + + if result_container[1] is not None: + return None + + return cast(Optional[Dict[str, Any]], result_container[0]) + + @dataclass(slots=True) class YtDlpDefaults: """User-tunable defaults for yt-dlp behavior. @@ -337,3 +561,761 @@ class YtDlpTool: debug("yt-dlp argv: " + " ".join(str(a) for a in argv)) except Exception: pass + + +# Progress + utility helpers for yt-dlp driven downloads (previously in cmdlet/download_media). +_YTDLP_PROGRESS_BAR = ProgressBar() +_SUBTITLE_EXTS = (".vtt", ".srt", ".ass", ".ssa", ".lrc") + + +def _live_ui_and_pipe_index() -> tuple[Optional[Any], int]: + ui = None + try: + ui = pipeline_context.get_live_progress() if hasattr(pipeline_context, "get_live_progress") else None + except Exception: + ui = None + + pipe_idx: int = 0 + try: + stage_ctx = pipeline_context.get_stage_context() if hasattr(pipeline_context, "get_stage_context") else None + maybe_idx = getattr(stage_ctx, "pipe_index", None) if stage_ctx is not None else None + if isinstance(maybe_idx, int): + pipe_idx = int(maybe_idx) + except Exception: + pipe_idx = 0 + + return ui, pipe_idx + + +def _begin_live_steps(total_steps: int) -> None: + ui, pipe_idx = _live_ui_and_pipe_index() + if ui is None: + return + try: + begin = getattr(ui, "begin_pipe_steps", None) + if callable(begin): + begin(int(pipe_idx), total_steps=int(total_steps)) + except Exception: + return + + +def _step(text: str) -> None: + ui, pipe_idx = _live_ui_and_pipe_index() + if ui is None: + return + try: + adv = getattr(ui, "advance_pipe_step", None) + if callable(adv): + adv(int(pipe_idx), str(text)) + except Exception: + return + + +def _set_pipe_percent(percent: int) -> None: + ui, pipe_idx = _live_ui_and_pipe_index() + if ui is None: + return + try: + set_pct = getattr(ui, "set_pipe_percent", None) + if callable(set_pct): + set_pct(int(pipe_idx), int(percent)) + except Exception: + return + + +def _format_chapters_note(info: Dict[str, Any]) -> Optional[str]: + """Format yt-dlp chapter metadata into a stable, note-friendly text.""" + try: + chapters = info.get("chapters") + except Exception: + chapters = None + + if not isinstance(chapters, list) or not chapters: + return None + + rows: List[tuple[int, Optional[int], str]] = [] + max_t = 0 + for ch in chapters: + if not isinstance(ch, dict): + continue + start_raw = ch.get("start_time") + end_raw = ch.get("end_time") + title_raw = ch.get("title") or ch.get("name") or ch.get("chapter") + + try: + if start_raw is None: + continue + start_s = int(float(start_raw)) + except Exception: + continue + + end_s: Optional[int] = None + try: + if end_raw is not None: + end_s = int(float(end_raw)) + except Exception: + end_s = None + + title = str(title_raw).strip() if title_raw is not None else "" + rows.append((start_s, end_s, title)) + try: + max_t = max(max_t, start_s, end_s or 0) + except Exception: + max_t = max(max_t, start_s) + + if not rows: + return None + + force_hours = bool(max_t >= 3600) + + def _tc(seconds: int) -> str: + total = max(0, int(seconds)) + minutes, secs = divmod(total, 60) + hours, minutes = divmod(minutes, 60) + if force_hours: + return f"{hours:02d}:{minutes:02d}:{secs:02d}" + return f"{minutes:02d}:{secs:02d}" + + lines: List[str] = [] + for start_s, end_s, title in sorted( + rows, key=lambda r: (r[0], r[1] if r[1] is not None else 10**9, r[2]) + ): + if end_s is not None and end_s > start_s: + prefix = f"{_tc(start_s)}-{_tc(end_s)}" + else: + prefix = _tc(start_s) + line = f"{prefix} {title}".strip() + if line: + lines.append(line) + + text = "\n".join(lines).strip() + return text or None + + +def _best_subtitle_sidecar(media_path: Path) -> Optional[Path]: + """Find the most likely subtitle sidecar file for a downloaded media file.""" + try: + base_dir = media_path.parent + stem = media_path.stem + if not stem: + return None + + candidates: List[Path] = [] + for p in base_dir.glob(stem + ".*"): + try: + if not p.is_file(): + continue + except Exception: + continue + if p.suffix.lower() in _SUBTITLE_EXTS: + candidates.append(p) + + preferred_order = [".vtt", ".srt", ".ass", ".ssa", ".lrc"] + for ext in preferred_order: + for p in candidates: + if p.suffix.lower() == ext: + return p + + return candidates[0] if candidates else None + except Exception: + return None + + +def _read_text_file(path: Path) -> Optional[str]: + try: + return path.read_text(encoding="utf-8", errors="ignore") + except Exception: + return None + + +def _download_with_sections_via_cli( + url: str, + ytdl_options: Dict[str, Any], + sections: List[str], + quiet: bool = False, +) -> tuple[Optional[str], Dict[str, Any]]: + sections_list = ytdl_options.get("download_sections", []) + if not sections_list: + return "", {} + + session_id = hashlib.md5((url + str(time.time()) + "".join(random.choices(string.ascii_letters, k=10))).encode()).hexdigest()[:12] + first_section_info = None + + total_sections = len(sections_list) + for section_idx, section in enumerate(sections_list, 1): + try: + if total_sections > 0: + pct = 50 + int(((section_idx - 1) / max(1, total_sections)) * 49) + _set_pipe_percent(pct) + except Exception: + pass + + base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s") + output_dir_path = Path(base_outtmpl).parent + filename_tmpl = f"{session_id}_{section_idx}" + if base_outtmpl.endswith(".%(ext)s"): + filename_tmpl += ".%(ext)s" + section_outtmpl = str(output_dir_path / filename_tmpl) + + if section_idx == 1: + metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"] + if ytdl_options.get("cookiefile"): + cookies_path = ytdl_options["cookiefile"].replace("\\", "/") + metadata_cmd.extend(["--cookies", cookies_path]) + if ytdl_options.get("noplaylist"): + metadata_cmd.append("--no-playlist") + metadata_cmd.append(url) + try: + meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True) + if meta_result.returncode == 0 and meta_result.stdout: + try: + info_dict = json.loads(meta_result.stdout.strip()) + first_section_info = info_dict + if not quiet: + debug(f"Extracted title from metadata: {info_dict.get('title')}") + except json.JSONDecodeError: + if not quiet: + debug("Could not parse JSON metadata") + except Exception as exc: + if not quiet: + debug(f"Error extracting metadata: {exc}") + + cmd = ["yt-dlp"] + if quiet: + cmd.append("--quiet") + cmd.append("--no-warnings") + cmd.append("--no-progress") + cmd.extend(["--postprocessor-args", "ffmpeg:-hide_banner -loglevel error"]) + if ytdl_options.get("ffmpeg_location"): + try: + cmd.extend(["--ffmpeg-location", str(ytdl_options["ffmpeg_location"])]) + except Exception: + pass + if ytdl_options.get("format"): + cmd.extend(["-f", ytdl_options["format"]]) + if ytdl_options.get("merge_output_format"): + cmd.extend(["--merge-output-format", str(ytdl_options["merge_output_format"])]) + + postprocessors = ytdl_options.get("postprocessors") + want_add_metadata = bool(ytdl_options.get("addmetadata")) + want_embed_chapters = bool(ytdl_options.get("embedchapters")) + if isinstance(postprocessors, list): + for pp in postprocessors: + if not isinstance(pp, dict): + continue + if str(pp.get("key") or "") == "FFmpegMetadata": + want_add_metadata = True + if bool(pp.get("add_chapters", True)): + want_embed_chapters = True + + if want_add_metadata: + cmd.append("--add-metadata") + if want_embed_chapters: + cmd.append("--embed-chapters") + if ytdl_options.get("writesubtitles"): + cmd.append("--write-sub") + cmd.append("--write-auto-sub") + cmd.extend(["--sub-format", "vtt"]) + if ytdl_options.get("force_keyframes_at_cuts"): + cmd.append("--force-keyframes-at-cuts") + cmd.extend(["-o", section_outtmpl]) + if ytdl_options.get("cookiefile"): + cookies_path = ytdl_options["cookiefile"].replace("\\", "/") + cmd.extend(["--cookies", cookies_path]) + if ytdl_options.get("noplaylist"): + cmd.append("--no-playlist") + + cmd.extend(["--download-sections", section]) + + cmd.append(url) + if not quiet: + debug(f"Running yt-dlp for section: {section}") + try: + if quiet: + subprocess.run(cmd, check=True, capture_output=True, text=True) + else: + subprocess.run(cmd, check=True) + except subprocess.CalledProcessError as exc: + stderr_text = exc.stderr or "" + tail = "\n".join(stderr_text.splitlines()[-12:]).strip() + details = f"\n{tail}" if tail else "" + raise DownloadError(f"yt-dlp failed for section {section} (exit {exc.returncode}){details}") from exc + except Exception as exc: + raise DownloadError(f"yt-dlp failed for section {section}: {exc}") from exc + + try: + _set_pipe_percent(99) + except Exception: + pass + + return session_id, first_section_info or {} + + +def _iter_download_entries(info: Dict[str, Any]) -> Iterator[Dict[str, Any]]: + queue: List[Dict[str, Any]] = [info] + seen: set[int] = set() + while queue: + current = queue.pop(0) + obj_id = id(current) + if obj_id in seen: + continue + seen.add(obj_id) + entries = current.get("entries") + if isinstance(entries, list): + for entry in entries: + queue.append(entry) + if current.get("requested_downloads") or not entries: + yield current + + +def _candidate_paths(entry: Dict[str, Any], output_dir: Path) -> Iterator[Path]: + requested = entry.get("requested_downloads") + if isinstance(requested, list): + for item in requested: + if isinstance(item, dict): + fp = item.get("filepath") or item.get("_filename") + if fp: + yield Path(fp) + for key in ("filepath", "_filename", "filename"): + value = entry.get(key) + if value: + yield Path(value) + if entry.get("filename"): + yield output_dir / entry["filename"] + + +def _resolve_entry_and_path(info: Dict[str, Any], output_dir: Path) -> tuple[Dict[str, Any], Path]: + for entry in _iter_download_entries(info): + for candidate in _candidate_paths(entry, output_dir): + if candidate.is_file(): + return entry, candidate + if not candidate.is_absolute(): + maybe = output_dir / candidate + if maybe.is_file(): + return entry, maybe + raise FileNotFoundError("yt-dlp did not report a downloaded media file") + + +def _resolve_entries_and_paths(info: Dict[str, Any], output_dir: Path) -> List[tuple[Dict[str, Any], Path]]: + resolved: List[tuple[Dict[str, Any], Path]] = [] + seen: set[str] = set() + for entry in _iter_download_entries(info): + chosen: Optional[Path] = None + for candidate in _candidate_paths(entry, output_dir): + if candidate.is_file(): + chosen = candidate + break + if not candidate.is_absolute(): + maybe = output_dir / candidate + if maybe.is_file(): + chosen = maybe + break + if chosen is None: + continue + key = str(chosen.resolve()) + if key in seen: + continue + seen.add(key) + resolved.append((entry, chosen)) + return resolved + + +def _extract_sha256(info: Dict[str, Any]) -> Optional[str]: + for payload in [info] + info.get("entries", []): + if not isinstance(payload, dict): + continue + hashes = payload.get("hashes") + if isinstance(hashes, dict): + for key in ("sha256", "sha-256", "sha_256"): + if key in hashes and isinstance(hashes[key], str) and hashes[key].strip(): + return hashes[key].strip() + for key in ("sha256", "sha-256", "sha_256"): + value = payload.get(key) + if isinstance(value, str) and value.strip(): + return value.strip() + return None + + +def _progress_callback(status: Dict[str, Any]) -> None: + event = status.get("status") + if event == "downloading": + downloaded = status.get("downloaded_bytes") + total = status.get("total_bytes") or status.get("total_bytes_estimate") + + _YTDLP_PROGRESS_BAR.update( + downloaded=int(downloaded) if downloaded is not None else None, + total=int(total) if total is not None else None, + label="download", + file=sys.stderr, + ) + elif event == "finished": + _YTDLP_PROGRESS_BAR.finish() + elif event in ("postprocessing", "processing"): + return + + +try: + from SYS.metadata import extract_ytdlp_tags +except ImportError: + extract_ytdlp_tags = None # type: ignore + + +def download_media(opts: DownloadOptions, *, debug_logger: Optional[DebugLogger] = None) -> Any: + """Download streaming media exclusively via yt-dlp.""" + + try: + netloc = urlparse(opts.url).netloc.lower() + except Exception: + netloc = "" + if "gofile.io" in netloc: + msg = "GoFile links are currently unsupported" + if not opts.quiet: + debug(msg) + if debug_logger is not None: + debug_logger.write_record("gofile-unsupported", {"url": opts.url}) + raise DownloadError(msg) + + ytdlp_supported = is_url_supported_by_ytdlp(opts.url) + if not ytdlp_supported: + msg = "URL not supported by yt-dlp; try download-file for manual downloads" + if not opts.quiet: + log(msg) + if debug_logger is not None: + debug_logger.write_record("ytdlp-unsupported", {"url": opts.url}) + raise DownloadError(msg) + + if opts.playlist_items: + debug( + f"Skipping probe for playlist (item selection: {opts.playlist_items}), proceeding with download" + ) + probe_result = {"url": opts.url} + else: + probe_cookiefile = None + try: + if opts.cookies_path and opts.cookies_path.is_file(): + probe_cookiefile = str(opts.cookies_path) + except Exception: + probe_cookiefile = None + + probe_result = probe_url(opts.url, no_playlist=opts.no_playlist, timeout_seconds=15, cookiefile=probe_cookiefile) + + if probe_result is None: + msg = "yt-dlp could not detect media for this URL; use download-file for direct downloads" + if not opts.quiet: + log(msg) + if debug_logger is not None: + debug_logger.write_record("ytdlp-skip-no-media", {"url": opts.url}) + raise DownloadError(msg) + + ensure_yt_dlp_ready() + + ytdlp_tool = YtDlpTool() + ytdl_options = ytdlp_tool.build_ytdlp_options(opts) + hooks = ytdl_options.get("progress_hooks") + if not isinstance(hooks, list): + hooks = [] + ytdl_options["progress_hooks"] = hooks + if _progress_callback not in hooks: + hooks.append(_progress_callback) + if not opts.quiet: + debug(f"Starting yt-dlp download: {opts.url}") + if debug_logger is not None: + debug_logger.write_record("ytdlp-start", {"url": opts.url}) + + assert yt_dlp is not None + try: + if not opts.quiet: + if ytdl_options.get("download_sections"): + debug(f"[yt-dlp] download_sections: {ytdl_options['download_sections']}") + debug(f"[yt-dlp] force_keyframes_at_cuts: {ytdl_options.get('force_keyframes_at_cuts', False)}") + + session_id = None + first_section_info = {} + if ytdl_options.get("download_sections"): + live_ui, _ = PipelineProgress(pipeline_context).ui_and_pipe_index() + quiet_sections = bool(opts.quiet) or (live_ui is not None) + session_id, first_section_info = _download_with_sections_via_cli( + opts.url, + ytdl_options, + ytdl_options.get("download_sections", []), + quiet=quiet_sections, + ) + info = None + else: + with yt_dlp.YoutubeDL(ytdl_options) as ydl: # type: ignore[arg-type] + info = ydl.extract_info(opts.url, download=True) + except Exception as exc: + log(f"yt-dlp failed: {exc}", file=sys.stderr) + if debug_logger is not None: + debug_logger.write_record( + "exception", + {"phase": "yt-dlp", "error": str(exc), "traceback": traceback.format_exc()}, + ) + raise DownloadError("yt-dlp download failed") from exc + + if info is None: + try: + time.sleep(0.5) + files = sorted(opts.output_dir.iterdir(), key=lambda p: p.stat().st_mtime, reverse=True) + if not files: + raise FileNotFoundError(f"No files found in {opts.output_dir}") + + if opts.clip_sections and session_id: + section_pattern = re.compile(rf"^{re.escape(session_id)}_(\d+)") + matching_files = [f for f in files if section_pattern.search(f.name)] + + if matching_files: + def extract_section_num(path: Path) -> int: + match = section_pattern.search(path.name) + return int(match.group(1)) if match else 999 + + matching_files.sort(key=extract_section_num) + debug(f"Found {len(matching_files)} section file(s) matching pattern") + + by_index: Dict[int, List[Path]] = {} + for f in matching_files: + m = section_pattern.search(f.name) + if not m: + continue + try: + n = int(m.group(1)) + except Exception: + continue + by_index.setdefault(n, []).append(f) + + renamed_media_files: List[Path] = [] + + for sec_num in sorted(by_index.keys()): + group = by_index.get(sec_num) or [] + if not group: + continue + + def _is_subtitle(p: Path) -> bool: + try: + return p.suffix.lower() in _SUBTITLE_EXTS + except Exception: + return False + + media_candidates = [p for p in group if not _is_subtitle(p)] + subtitle_candidates = [p for p in group if _is_subtitle(p)] + + media_file: Optional[Path] = None + for cand in media_candidates: + try: + if cand.suffix.lower() in {".json", ".info.json"}: + continue + except Exception: + pass + media_file = cand + break + if media_file is None and media_candidates: + media_file = media_candidates[0] + if media_file is None: + continue + + try: + media_hash = sha256_file(media_file) + except Exception as exc: + debug(f"Failed to hash section media file {media_file.name}: {exc}") + renamed_media_files.append(media_file) + continue + + prefix = f"{session_id}_{sec_num}" + + def _tail(name: str) -> str: + try: + if name.startswith(prefix): + return name[len(prefix):] + except Exception: + pass + try: + return Path(name).suffix + except Exception: + return "" + + try: + new_media_name = f"{media_hash}{_tail(media_file.name)}" + new_media_path = opts.output_dir / new_media_name + if new_media_path.exists() and new_media_path != media_file: + debug(f"File with hash {media_hash} already exists, using existing file.") + try: + media_file.unlink() + except OSError: + pass + else: + media_file.rename(new_media_path) + debug(f"Renamed section file: {media_file.name} -> {new_media_name}") + renamed_media_files.append(new_media_path) + except Exception as exc: + debug(f"Failed to rename section media file {media_file.name}: {exc}") + renamed_media_files.append(media_file) + new_media_path = media_file + + for sub_file in subtitle_candidates: + try: + new_sub_name = f"{media_hash}{_tail(sub_file.name)}" + new_sub_path = opts.output_dir / new_sub_name + if new_sub_path.exists() and new_sub_path != sub_file: + try: + sub_file.unlink() + except OSError: + pass + else: + sub_file.rename(new_sub_path) + debug(f"Renamed section file: {sub_file.name} -> {new_sub_name}") + except Exception as exc: + debug(f"Failed to rename section subtitle file {sub_file.name}: {exc}") + + media_path = renamed_media_files[0] if renamed_media_files else matching_files[0] + media_paths = renamed_media_files if renamed_media_files else None + if not opts.quiet: + count = len(media_paths) if isinstance(media_paths, list) else 1 + debug(f"✓ Downloaded {count} section media file(s) (session: {session_id})") + else: + media_path = files[0] + media_paths = None + if not opts.quiet: + debug(f"✓ Downloaded section file (pattern not found): {media_path.name}") + else: + media_path = files[0] + media_paths = None + + if not opts.quiet: + debug(f"✓ Downloaded: {media_path.name}") + if debug_logger is not None: + debug_logger.write_record("ytdlp-file-found", {"path": str(media_path)}) + except Exception as exc: + log(f"Error finding downloaded file: {exc}", file=sys.stderr) + if debug_logger is not None: + debug_logger.write_record("exception", {"phase": "find-file", "error": str(exc)}) + raise DownloadError(str(exc)) from exc + + file_hash = sha256_file(media_path) + tags = [] + title = "" + if first_section_info: + title = first_section_info.get("title", "") + if title: + tags.append(f"title:{title}") + debug(f"Added title tag for section download: {title}") + + if first_section_info: + info_dict = first_section_info + else: + info_dict = {"id": media_path.stem, "title": title or media_path.stem, "ext": media_path.suffix.lstrip(".")} + + return DownloadMediaResult(path=media_path, info=info_dict, tag=tags, source_url=opts.url, hash_value=file_hash, paths=media_paths) + + if not isinstance(info, dict): + log(f"Unexpected yt-dlp response: {type(info)}", file=sys.stderr) + raise DownloadError("Unexpected yt-dlp response type") + + info_dict: Dict[str, Any] = cast(Dict[str, Any], info) + if debug_logger is not None: + debug_logger.write_record("ytdlp-info", {"keys": sorted(info_dict.keys()), "is_playlist": bool(info_dict.get("entries"))}) + + if info_dict.get("entries") and not opts.no_playlist: + resolved = _resolve_entries_and_paths(info_dict, opts.output_dir) + if resolved: + results: List[DownloadMediaResult] = [] + for entry, media_path in resolved: + hash_value = _extract_sha256(entry) or _extract_sha256(info_dict) + if not hash_value: + try: + hash_value = sha256_file(media_path) + except OSError: + hash_value = None + + tags: List[str] = [] + if extract_ytdlp_tags: + try: + tags = extract_ytdlp_tags(entry) + except Exception as exc: + log(f"Error extracting tags: {exc}", file=sys.stderr) + + source_url = entry.get("webpage_url") or entry.get("original_url") or entry.get("url") or opts.url + + results.append( + DownloadMediaResult( + path=media_path, + info=entry, + tag=tags, + source_url=source_url, + hash_value=hash_value, + ) + ) + + if not opts.quiet: + debug(f"✓ Downloaded playlist items: {len(results)}") + return results + + try: + entry, media_path = _resolve_entry_and_path(info_dict, opts.output_dir) + except FileNotFoundError as exc: + log(f"Error: {exc}", file=sys.stderr) + if debug_logger is not None: + debug_logger.write_record("exception", {"phase": "resolve-path", "error": str(exc)}) + raise DownloadError(str(exc)) from exc + + if debug_logger is not None: + debug_logger.write_record("resolved-media", {"path": str(media_path), "entry_keys": sorted(entry.keys())}) + + hash_value = _extract_sha256(entry) or _extract_sha256(info_dict) + if not hash_value: + try: + hash_value = sha256_file(media_path) + except OSError as exc: + if debug_logger is not None: + debug_logger.write_record("hash-error", {"path": str(media_path), "error": str(exc)}) + + tags = [] + if extract_ytdlp_tags: + try: + tags = extract_ytdlp_tags(entry) + except Exception as exc: + log(f"Error extracting tags: {exc}", file=sys.stderr) + + source_url = entry.get("webpage_url") or entry.get("original_url") or entry.get("url") + + if not opts.quiet: + debug(f"✓ Downloaded: {media_path.name} ({len(tags)} tags)") + if debug_logger is not None: + debug_logger.write_record( + "downloaded", + { + "path": str(media_path), + "tag_count": len(tags), + "source_url": source_url, + "sha256": hash_value, + }, + ) + + return DownloadMediaResult(path=media_path, info=entry, tag=tags, source_url=source_url, hash_value=hash_value) + + +def _download_with_timeout(opts: DownloadOptions, timeout_seconds: int = 300) -> Any: + import threading + from typing import cast + + result_container: List[Optional[Any]] = [None, None] + + def _do_download() -> None: + try: + result_container[0] = download_media(opts) + except Exception as exc: + result_container[1] = exc + + thread = threading.Thread(target=_do_download, daemon=False) + thread.start() + thread.join(timeout=timeout_seconds) + + if thread.is_alive(): + raise DownloadError(f"Download timeout after {timeout_seconds} seconds for {opts.url}") + + if result_container[1] is not None: + raise cast(Exception, result_container[1]) + + if result_container[0] is None: + raise DownloadError(f"Download failed for {opts.url}") + + return cast(Any, result_container[0])