diff --git a/API/HTTP.py b/API/HTTP.py index 1c8e350..886b540 100644 --- a/API/HTTP.py +++ b/API/HTTP.py @@ -229,6 +229,13 @@ class HTTPClient: response.raise_for_status() total_bytes = int(response.headers.get("content-length", 0)) bytes_downloaded = 0 + + # Render progress immediately (even if the transfer is very fast) + if progress_callback: + try: + progress_callback(0, total_bytes) + except Exception: + pass with open(path, "wb") as f: for chunk in response.iter_bytes(chunk_size): @@ -237,6 +244,13 @@ class HTTPClient: bytes_downloaded += len(chunk) if progress_callback: progress_callback(bytes_downloaded, total_bytes) + + # Ensure a final callback is emitted. + if progress_callback: + try: + progress_callback(bytes_downloaded, total_bytes) + except Exception: + pass return path diff --git a/API/HydrusNetwork.py b/API/HydrusNetwork.py index c5c0e0a..387f28e 100644 --- a/API/HydrusNetwork.py +++ b/API/HydrusNetwork.py @@ -152,55 +152,24 @@ class HydrusNetwork: logger.debug(f"{self._log_prefix()} Uploading file {file_path.name} ({file_size} bytes)") # Stream upload body with a stderr progress bar (pipeline-safe). - try: - from models import ProgressBar - except Exception: - ProgressBar = None # type: ignore[assignment] + from models import ProgressBar - bar = ProgressBar() if ProgressBar is not None else None + bar = ProgressBar() label = f"{self._log_prefix().strip('[]')} upload" start_t = time.time() last_render_t = [start_t] - last_log_t = [start_t] sent = [0] - tty = bool(getattr(sys.stderr, "isatty", lambda: False)()) def _render_progress(final: bool = False) -> None: - if bar is None: - return if file_size <= 0: return now = time.time() if not final and (now - float(last_render_t[0])) < 0.25: return last_render_t[0] = now - elapsed = max(0.001, now - start_t) - speed = float(sent[0]) / elapsed - eta_s = (float(file_size) - float(sent[0])) / speed if speed > 0 else 0.0 - minutes, seconds = divmod(int(max(0.0, eta_s)), 60) - hours, minutes = divmod(minutes, 60) - eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}" - speed_str = bar.format_bytes(speed) + "/s" - - line = bar.format_progress( - percent_str=None, - downloaded=int(sent[0]), - total=int(file_size), - speed_str=speed_str, - eta_str=eta_str, - ) - - try: - if tty: - sys.stderr.write("\r" + f"[{label}] " + line + " ") - sys.stderr.flush() - else: - # Non-interactive: keep it quiet-ish. - if final or (now - float(last_log_t[0])) >= 2.0: - log(f"[{label}] {line}", file=sys.stderr) - last_log_t[0] = now - except Exception: - pass + bar.update(downloaded=int(sent[0]), total=int(file_size), label=str(label), file=sys.stderr) + if final: + bar.finish() def file_gen(): try: @@ -214,12 +183,6 @@ class HydrusNetwork: yield chunk finally: _render_progress(final=True) - if tty: - try: - sys.stderr.write("\n") - sys.stderr.flush() - except Exception: - pass response = client.request( spec.method, diff --git a/API/folder.py b/API/folder.py index d39024e..9584c0f 100644 --- a/API/folder.py +++ b/API/folder.py @@ -258,6 +258,7 @@ class API_folder_store: cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_path ON files(file_path)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_tags_hash ON tags(hash)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_tags_tag ON tags(tag)") + cursor.execute("CREATE INDEX IF NOT EXISTS idx_metadata_ext ON metadata(ext)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_worker_id ON worker(worker_id)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_worker_status ON worker(status)") cursor.execute("CREATE INDEX IF NOT EXISTS idx_worker_type ON worker(worker_type)") @@ -1858,6 +1859,73 @@ class DatabaseAPI: ) return {row[0] for row in cursor.fetchall()} + def get_file_hashes_by_ext(self, ext_value: str, limit: Optional[int] = None) -> Set[str]: + """Get hashes of files whose metadata ext matches the given extension. + + Matches case-insensitively and ignores any leading '.' in stored ext. + Supports glob wildcards '*' and '?' in the query. + """ + ext_clean = str(ext_value or "").strip().lower().lstrip(".") + ext_clean = "".join(ch for ch in ext_clean if ch.isalnum()) + if not ext_clean: + return set() + + cursor = self.get_cursor() + + has_glob = ("*" in ext_value) or ("?" in ext_value) + if has_glob: + pattern = str(ext_value or "").strip().lower().lstrip(".") + pattern = pattern.replace("%", "\\%").replace("_", "\\_") + pattern = pattern.replace("*", "%").replace("?", "_") + cursor.execute( + """ + SELECT DISTINCT f.hash + FROM files f + JOIN metadata m ON f.hash = m.hash + WHERE LOWER(LTRIM(COALESCE(m.ext, ''), '.')) LIKE ? ESCAPE '\\' + LIMIT ? + """, + (pattern, limit or 10000), + ) + else: + cursor.execute( + """ + SELECT DISTINCT f.hash + FROM files f + JOIN metadata m ON f.hash = m.hash + WHERE LOWER(LTRIM(COALESCE(m.ext, ''), '.')) = ? + LIMIT ? + """, + (ext_clean, limit or 10000), + ) + return {row[0] for row in cursor.fetchall()} + + def get_files_by_ext(self, ext_value: str, limit: Optional[int] = None) -> List[tuple]: + """Get files whose metadata ext matches the given extension. + + Returns (hash, file_path, size, ext) tuples. + """ + ext_clean = str(ext_value or "").strip().lower().lstrip(".") + ext_clean = "".join(ch for ch in ext_clean if ch.isalnum()) + if not ext_clean: + return [] + + cursor = self.get_cursor() + cursor.execute( + """ + SELECT f.hash, f.file_path, + COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size, + COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext + FROM files f + JOIN metadata m ON f.hash = m.hash + WHERE LOWER(LTRIM(COALESCE(m.ext, ''), '.')) = ? + ORDER BY f.file_path + LIMIT ? + """, + (ext_clean, limit or 10000), + ) + return cursor.fetchall() + def get_files_with_any_url(self, limit: Optional[int] = None) -> List[tuple]: """Get files that have any non-empty URL metadata. diff --git a/CLI.py b/CLI.py index d10678d..c4a0c50 100644 --- a/CLI.py +++ b/CLI.py @@ -28,6 +28,26 @@ from prompt_toolkit.document import Document from prompt_toolkit.lexers import Lexer from prompt_toolkit.styles import Style +from rich_display import stderr_console, stdout_console + + +def _install_rich_traceback(*, show_locals: bool = False) -> None: + """Install Rich traceback handler as the default excepthook. + + This keeps uncaught exceptions readable in the terminal. + """ + try: + from rich.traceback import install as rich_traceback_install + + rich_traceback_install(show_locals=bool(show_locals)) + except Exception: + # Fall back to the standard Python traceback if Rich isn't available. + return + + +# Default to Rich tracebacks for the whole process. +_install_rich_traceback(show_locals=False) + from SYS.background_notifier import ensure_background_notifier from SYS.logger import debug, set_debug from SYS.worker_manager import WorkerManager @@ -530,6 +550,32 @@ class CmdletCompleter(Completer): self._config_loader = config_loader self.cmdlet_names = CmdletIntrospection.cmdlet_names() + @staticmethod + def _used_arg_logicals(cmd_name: str, stage_tokens: List[str]) -> Set[str]: + """Return logical argument names already used in this cmdlet stage. + + Example: if the user has typed `download-media -url ...`, then `url` + is considered used and should not be suggested again (even as `--url`). + """ + arg_flags = CmdletIntrospection.cmdlet_args(cmd_name) + allowed = {a.lstrip("-").strip().lower() for a in arg_flags if a} + if not allowed: + return set() + + used: Set[str] = set() + for tok in stage_tokens[1:]: + if not tok or not tok.startswith("-"): + continue + if tok in {"-", "--"}: + continue + # Handle common `-arg=value` form. + raw = tok.split("=", 1)[0] + logical = raw.lstrip("-").strip().lower() + if logical and logical in allowed: + used.add(logical) + + return used + def get_completions(self, document: Document, complete_event): # type: ignore[override] text = document.text_before_cursor tokens = text.split() @@ -600,6 +646,7 @@ class CmdletCompleter(Completer): return arg_names = CmdletIntrospection.cmdlet_args(cmd_name) + used_logicals = self._used_arg_logicals(cmd_name, stage_tokens) logical_seen: Set[str] = set() for arg in arg_names: arg_low = arg.lower() @@ -607,6 +654,8 @@ class CmdletCompleter(Completer): if prefer_single_dash and arg_low.startswith("--"): continue logical = arg.lstrip("-").lower() + if logical in used_logicals: + continue if prefer_single_dash and logical in logical_seen: continue if arg_low.startswith(current_token): @@ -751,26 +800,32 @@ class CmdletHelp: def show_cmdlet_list() -> None: try: metadata = list_cmdlet_metadata() or {} - print("\nAvailable cmdlet:") + from rich.box import SIMPLE + from rich.panel import Panel + from rich.table import Table as RichTable + + table = RichTable(show_header=True, header_style="bold", box=SIMPLE, expand=True) + table.add_column("Cmdlet", no_wrap=True) + table.add_column("Aliases") + table.add_column("Args") + table.add_column("Summary") + for cmd_name in sorted(metadata.keys()): info = metadata[cmd_name] aliases = info.get("aliases", []) args = info.get("args", []) + summary = info.get("summary") or "" + alias_str = ", ".join([str(a) for a in (aliases or []) if str(a).strip()]) + arg_names = [a.get("name") for a in (args or []) if isinstance(a, dict) and a.get("name")] + args_str = ", ".join([str(a) for a in arg_names if str(a).strip()]) + table.add_row(str(cmd_name), alias_str, args_str, str(summary)) - display = f" cmd:{cmd_name}" - if aliases: - display += f" alias:{', '.join(aliases)}" - if args: - arg_names = [a.get("name") for a in args if a.get("name")] - if arg_names: - display += f" args:{', '.join(arg_names)}" - summary = info.get("summary") - if summary: - display += f" - {summary}" - print(display) - print() + stdout_console().print(Panel(table, title="Cmdlets", expand=False)) except Exception as exc: - print(f"Error: {exc}\n") + from rich.panel import Panel + from rich.text import Text + + stderr_console().print(Panel(Text(f"Error: {exc}"), title="Error", expand=False)) @staticmethod def show_cmdlet_help(cmd_name: str) -> None: @@ -787,7 +842,10 @@ class CmdletHelp: def _print_metadata(cmd_name: str, data: Any) -> None: d = data.to_dict() if hasattr(data, "to_dict") else data if not isinstance(d, dict): - print(f"Invalid metadata for {cmd_name}\n") + from rich.panel import Panel + from rich.text import Text + + stderr_console().print(Panel(Text(f"Invalid metadata for {cmd_name}"), title="Error", expand=False)) return name = d.get("name", cmd_name) @@ -797,45 +855,48 @@ class CmdletHelp: args = d.get("args", []) details = d.get("details", []) - print("\nNAME") - print(f" {name}") + from rich.box import SIMPLE + from rich.console import Group + from rich.panel import Panel + from rich.table import Table as RichTable + from rich.text import Text - print("\nSYNOPSIS") - print(f" {usage or name}") + header = Text.assemble((str(name), "bold")) + synopsis = Text(str(usage or name)) + stdout_console().print(Panel(Group(header, synopsis), title="Help", expand=False)) if summary or description: - print("\nDESCRIPTION") + desc_bits: List[Text] = [] if summary: - print(f" {summary}") + desc_bits.append(Text(str(summary))) if description: - print(f" {description}") + desc_bits.append(Text(str(description))) + stdout_console().print(Panel(Group(*desc_bits), title="Description", expand=False)) if args and isinstance(args, list): - print("\nPARAMETERS") + param_table = RichTable(show_header=True, header_style="bold", box=SIMPLE, expand=True) + param_table.add_column("Arg", no_wrap=True) + param_table.add_column("Type", no_wrap=True) + param_table.add_column("Required", no_wrap=True) + param_table.add_column("Description") for arg in args: if isinstance(arg, dict): name_str = arg.get("name", "?") typ = arg.get("type", "string") - required = arg.get("required", False) + required = bool(arg.get("required", False)) desc = arg.get("description", "") else: name_str = getattr(arg, "name", "?") typ = getattr(arg, "type", "string") - required = getattr(arg, "required", False) + required = bool(getattr(arg, "required", False)) desc = getattr(arg, "description", "") - req_marker = "[required]" if required else "[optional]" - print(f" -{name_str} <{typ}>") - if desc: - print(f" {desc}") - print(f" {req_marker}") - print() + param_table.add_row(f"-{name_str}", str(typ), "yes" if required else "no", str(desc or "")) + + stdout_console().print(Panel(param_table, title="Parameters", expand=False)) if details: - print("REMARKS") - for detail in details: - print(f" {detail}") - print() + stdout_console().print(Panel(Group(*[Text(str(x)) for x in details]), title="Remarks", expand=False)) class CmdletExecutor: @@ -1044,6 +1105,26 @@ class CmdletExecutor: ctx.set_last_selection(selected_indices) try: + try: + if hasattr(ctx, "set_current_cmdlet_name"): + ctx.set_current_cmdlet_name(cmd_name) + except Exception: + pass + + try: + if hasattr(ctx, "set_current_stage_text"): + raw_stage = "" + try: + raw_stage = ctx.get_current_command_text("") if hasattr(ctx, "get_current_command_text") else "" + except Exception: + raw_stage = "" + if raw_stage: + ctx.set_current_stage_text(raw_stage) + else: + ctx.set_current_stage_text(" ".join([cmd_name, *filtered_args]).strip() or cmd_name) + except Exception: + pass + ret_code = cmd_fn(result, filtered_args, config) if getattr(pipeline_ctx, "emits", None): @@ -1113,8 +1194,8 @@ class CmdletExecutor: else: ctx.set_last_result_items_only(emits) - print() - print(table.format_plain()) + stdout_console().print() + stdout_console().print(table) if ret_code != 0: stage_status = "failed" @@ -1125,6 +1206,16 @@ class CmdletExecutor: stage_error = f"{type(exc).__name__}: {exc}" print(f"[error] {type(exc).__name__}: {exc}\n") finally: + try: + if hasattr(ctx, "clear_current_cmdlet_name"): + ctx.clear_current_cmdlet_name() + except Exception: + pass + try: + if hasattr(ctx, "clear_current_stage_text"): + ctx.clear_current_stage_text() + except Exception: + pass ctx.clear_last_selection() if stage_session: stage_session.close(status=stage_status, error_msg=stage_error) @@ -1322,6 +1413,13 @@ class PipelineExecutor: pipeline_text = " | ".join(" ".join(stage) for stage in stages) pipeline_session = WorkerStages.begin_pipeline(worker_manager, pipeline_text=pipeline_text, config=config) + raw_stage_texts: List[str] = [] + try: + if hasattr(ctx, "get_current_command_stages"): + raw_stage_texts = ctx.get_current_command_stages() or [] + except Exception: + raw_stage_texts = [] + if pipeline_session and worker_manager and isinstance(config, dict): session_worker_ids = config.get("_session_worker_ids") if session_worker_ids: @@ -1452,6 +1550,9 @@ class PipelineExecutor: if table_type == "youtube": print("Auto-running YouTube selection via download-media") stages.append(["download-media"]) + elif table_type == "bandcamp": + print("Auto-running Bandcamp selection via download-media") + stages.append(["download-media"]) elif table_type in {"soulseek", "openlibrary", "libgen"}: print("Auto-piping selection to download-file") stages.append(["download-file"]) @@ -1473,6 +1574,14 @@ class PipelineExecutor: ): print("Auto-inserting download-media after YouTube selection") stages.insert(0, ["download-media"]) + if table_type == "bandcamp" and first_cmd not in ( + "download-media", + "download_media", + "download-file", + ".pipe", + ): + print("Auto-inserting download-media after Bandcamp selection") + stages.insert(0, ["download-media"]) if table_type == "libgen" and first_cmd not in ( "download-file", "download-media", @@ -1645,6 +1754,32 @@ class PipelineExecutor: except Exception: pass + try: + if hasattr(ctx, "set_current_cmdlet_name"): + ctx.set_current_cmdlet_name(cmd_name) + except Exception: + pass + + try: + if hasattr(ctx, "set_current_stage_text"): + stage_text = "" + if raw_stage_texts and stage_index < len(raw_stage_texts): + candidate = str(raw_stage_texts[stage_index] or "").strip() + if candidate: + try: + cand_tokens = shlex.split(candidate) + except Exception: + cand_tokens = candidate.split() + if cand_tokens: + first = str(cand_tokens[0]).replace("_", "-").lower() + if first == cmd_name: + stage_text = candidate + if not stage_text: + stage_text = " ".join(stage_tokens).strip() + ctx.set_current_stage_text(stage_text) + except Exception: + pass + ret_code = cmd_fn(piped_result, list(stage_args), config) stage_is_last = stage_index + 1 >= len(stages) @@ -1676,7 +1811,6 @@ class PipelineExecutor: and (not emits) and cmd_name in {"download-media", "download_media"} and stage_table is not None - and hasattr(stage_table, "format_plain") and stage_table_type in {"ytdlp.formatlist", "download-media", "download_media"} ): try: @@ -1691,8 +1825,8 @@ class PipelineExecutor: already_rendered = False if not already_rendered: - print() - print(stage_table.format_plain()) + stdout_console().print() + stdout_console().print(stage_table) try: remaining = stages[stage_index + 1 :] @@ -1719,15 +1853,15 @@ class PipelineExecutor: if final_table is None: final_table = stage_table - if final_table is not None and hasattr(final_table, "format_plain"): + if final_table is not None: try: already_rendered = bool(getattr(final_table, "_rendered_by_cmdlet", False)) except Exception: already_rendered = False if not already_rendered: - print() - print(final_table.format_plain()) + stdout_console().print() + stdout_console().print(final_table) # Fallback: if a cmdlet emitted results but did not provide a table, # render a standard ResultTable so last-stage pipelines still show output. @@ -1739,8 +1873,8 @@ class PipelineExecutor: table = ResultTable(table_title) for item in emits: table.add_result(item) - print() - print(table.format_plain()) + stdout_console().print() + stdout_console().print(table) if isinstance(ret_code, int) and ret_code != 0: stage_status = "failed" @@ -1757,6 +1891,16 @@ class PipelineExecutor: pipeline_error = f"{stage_label} error: {exc}" return finally: + try: + if hasattr(ctx, "clear_current_cmdlet_name"): + ctx.clear_current_cmdlet_name() + except Exception: + pass + try: + if hasattr(ctx, "clear_current_stage_text"): + ctx.clear_current_stage_text() + except Exception: + pass if stage_session: stage_session.close(status=stage_status, error_msg=stage_error) elif pipeline_session and worker_manager: @@ -1774,8 +1918,8 @@ class PipelineExecutor: for item in items: table.add_result(item) ctx.set_last_result_items_only(items) - print() - print(table.format_plain()) + stdout_console().print() + stdout_console().print(table) except Exception as exc: pipeline_status = "failed" pipeline_error = str(exc) @@ -1786,7 +1930,20 @@ class PipelineExecutor: except Exception as exc: print(f"[error] Failed to execute pipeline: {exc}\n") +Welcome = """ +# MEDIOS-MACINA +Rich can do a pretty *decent* job of rendering markdown. + +1. This is a list item +2. This is another list item +""" +from rich.markdown import Markdown +from rich.console import Console + +console = Console() +md = Markdown(Welcome) +console.print(md) class MedeiaCLI: """Main CLI application object.""" @@ -1892,25 +2049,20 @@ class MedeiaCLI: return app def run(self) -> None: + # Ensure Rich tracebacks are active even when invoking subcommands. + try: + config = self._config_loader.load() + debug_enabled = bool(config.get("debug", False)) if isinstance(config, dict) else False + except Exception: + debug_enabled = False + + set_debug(debug_enabled) + _install_rich_traceback(show_locals=debug_enabled) + self.build_app()() def run_repl(self) -> None: - banner = r""" - Medeia-Macina -===================== -|123456789|ABCDEFGHI| -|246813579|JKLMNOPQR| -|369369369|STUVWXYZ0| -|483726159|ABCDEFGHI| -|=========+=========| -|516273849|JKLMNOPQR| -|639639639|STUVWXYZ0| -|753186429|ABCDEFGHI| -|876543219|JKLMNOPQR| -|999999999|STUVWXYZ0| -===================== - """ - print(banner) + # (Startup banner is optional; keep the REPL quiet by default.) prompt_text = "🜂🜄🜁🜃|" @@ -1918,6 +2070,11 @@ class MedeiaCLI: "*********************************************" ) startup_table.set_no_choice(True).set_preserve_order(True) + startup_table.set_value_case("upper") + + def _upper(value: Any) -> str: + text = "" if value is None else str(value) + return text.upper() def _add_startup_check( status: str, @@ -1929,12 +2086,12 @@ class MedeiaCLI: detail: str = "", ) -> None: row = startup_table.add_row() - row.add_column("Status", status) - row.add_column("Name", name) - row.add_column("Provider", provider or "") - row.add_column("Store", store or "") - row.add_column("Files", "" if files is None else str(files)) - row.add_column("Detail", detail or "") + row.add_column("STATUS", _upper(status)) + row.add_column("NAME", _upper(name)) + row.add_column("PROVIDER", _upper(provider or "")) + row.add_column("STORE", _upper(store or "")) + row.add_column("FILES", "" if files is None else str(files)) + row.add_column("DETAIL", _upper(detail or "")) def _has_store_subtype(cfg: dict, subtype: str) -> bool: store_cfg = cfg.get("store") @@ -1967,8 +2124,8 @@ class MedeiaCLI: config = self._config_loader.load() debug_enabled = bool(config.get("debug", False)) set_debug(debug_enabled) - if debug_enabled: - debug("✓ Debug logging enabled") + _install_rich_traceback(show_locals=debug_enabled) + _add_startup_check("ENABLED" if debug_enabled else "DISABLED", "DEBUGGING") try: try: @@ -2226,8 +2383,8 @@ class MedeiaCLI: _add_startup_check("ERROR", "Cookies", detail=str(exc)) if startup_table.rows: - print() - print(startup_table.format_plain()) + stdout_console().print() + stdout_console().print(startup_table) except Exception as exc: if debug_enabled: debug(f"⚠ Could not check service availability: {exc}") @@ -2349,9 +2506,9 @@ class MedeiaCLI: if last_table is None: last_table = ctx.get_last_result_table() if last_table: - print() + stdout_console().print() ctx.set_current_stage_table(last_table) - print(last_table.format_plain()) + stdout_console().print(last_table) else: items = ctx.get_last_result_items() if items: @@ -2370,10 +2527,44 @@ class MedeiaCLI: last_table = ctx.get_display_table() if hasattr(ctx, "get_display_table") else None if last_table is None: last_table = ctx.get_last_result_table() + + # Auto-refresh search-store tables when navigating back, + # so row payloads (titles/tags) reflect latest store state. + try: + src_cmd = getattr(last_table, "source_command", None) if last_table else None + if isinstance(src_cmd, str) and src_cmd.lower().replace("_", "-") == "search-store": + src_args = getattr(last_table, "source_args", None) if last_table else None + base_args = list(src_args) if isinstance(src_args, list) else [] + cleaned_args = [ + str(a) + for a in base_args + if str(a).strip().lower() not in {"--refresh", "-refresh"} + ] + if hasattr(ctx, "set_current_command_text"): + try: + title_text = getattr(last_table, "title", None) if last_table else None + if isinstance(title_text, str) and title_text.strip(): + ctx.set_current_command_text(title_text.strip()) + else: + ctx.set_current_command_text(" ".join(["search-store", *cleaned_args]).strip()) + except Exception: + pass + try: + self._cmdlet_executor.execute("search-store", cleaned_args + ["--refresh"]) + finally: + if hasattr(ctx, "clear_current_command_text"): + try: + ctx.clear_current_command_text() + except Exception: + pass + continue + except Exception as exc: + print(f"Error refreshing search-store table: {exc}", file=sys.stderr) + if last_table: - print() + stdout_console().print() ctx.set_current_stage_table(last_table) - print(last_table.format_plain()) + stdout_console().print(last_table) else: items = ctx.get_last_result_items() if items: diff --git a/Provider/bandcamp.py b/Provider/bandcamp.py index 129e5f4..fd89ec2 100644 --- a/Provider/bandcamp.py +++ b/Provider/bandcamp.py @@ -1,6 +1,7 @@ from __future__ import annotations import sys +from urllib.parse import urlparse from typing import Any, Dict, List, Optional from ProviderCore.base import Provider, SearchResult @@ -15,6 +16,204 @@ except ImportError: # pragma: no cover class Bandcamp(Provider): """Search provider for Bandcamp.""" + @staticmethod + def _base_url(raw_url: str) -> str: + """Normalize a Bandcamp URL down to scheme://netloc.""" + text = str(raw_url or "").strip() + if not text: + return "" + try: + parsed = urlparse(text) + if not parsed.scheme or not parsed.netloc: + return text + return f"{parsed.scheme}://{parsed.netloc}" + except Exception: + return text + + @classmethod + def _discography_url(cls, raw_url: str) -> str: + base = cls._base_url(raw_url) + if not base: + return "" + # Bandcamp discography lives under /music. + return base.rstrip("/") + "/music" + + def _scrape_artist_page(self, page: Any, artist_url: str, limit: int = 50) -> List[SearchResult]: + """Scrape an artist page for albums/tracks (discography).""" + base = self._base_url(artist_url) + discography_url = self._discography_url(artist_url) + if not base or not discography_url: + return [] + + debug(f"[bandcamp] Scraping artist page: {discography_url}") + page.goto(discography_url) + page.wait_for_load_state("domcontentloaded") + + results: List[SearchResult] = [] + cards = page.query_selector_all("li.music-grid-item") or [] + if not cards: + # Fallback selector + cards = page.query_selector_all(".music-grid-item") or [] + + for item in cards[:limit]: + try: + link = item.query_selector("a") + if not link: + continue + + href = link.get_attribute("href") or "" + href = str(href).strip() + if not href: + continue + + if href.startswith("/"): + target = base.rstrip("/") + href + elif href.startswith("http://") or href.startswith("https://"): + target = href + else: + target = base.rstrip("/") + "/" + href + + title_node = item.query_selector("p.title") or item.query_selector(".title") + title = (title_node.inner_text().strip() if title_node else "") + if title: + title = " ".join(title.split()) + if not title: + title = target.rsplit("/", 1)[-1] + + kind = "album" if "/album/" in target else ("track" if "/track/" in target else "item") + + results.append( + SearchResult( + table="bandcamp", + title=title, + path=target, + detail="", + annotations=[kind], + media_kind="audio", + columns=[ + ("Title", title), + ("Type", kind), + ("Url", target), + ], + full_metadata={ + "type": kind, + "url": target, + "artist_url": base, + }, + ) + ) + except Exception as exc: + debug(f"[bandcamp] Error parsing artist item: {exc}") + + return results + + def selector(self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any) -> bool: + """Handle Bandcamp `@N` selection. + + If the selected item is an ARTIST result, selecting it auto-expands into + a discography table by scraping the artist URL. + """ + if not stage_is_last: + return False + + if sync_playwright is None: + return False + + # Only handle artist selections. + chosen: List[Dict[str, Any]] = [] + for item in selected_items or []: + payload: Dict[str, Any] = {} + if isinstance(item, dict): + payload = item + else: + try: + if hasattr(item, "to_dict"): + payload = item.to_dict() # type: ignore[assignment] + except Exception: + payload = {} + if not payload: + try: + payload = { + "title": getattr(item, "title", None), + "url": getattr(item, "url", None), + "path": getattr(item, "path", None), + "metadata": getattr(item, "metadata", None), + "extra": getattr(item, "extra", None), + } + except Exception: + payload = {} + + meta = payload.get("metadata") or payload.get("full_metadata") or {} + if not isinstance(meta, dict): + meta = {} + extra = payload.get("extra") + if isinstance(extra, dict): + meta = {**meta, **extra} + + type_val = str(meta.get("type") or "").strip().lower() + if type_val != "artist": + continue + + title = str(payload.get("title") or "").strip() + url_val = str(payload.get("url") or payload.get("path") or meta.get("url") or "").strip() + base = self._base_url(url_val) + if not base: + continue + + chosen.append({"title": title, "url": base, "location": str(meta.get("artist") or "").strip()}) + + if not chosen: + return False + + # Build a new table from artist discography. + try: + from result_table import ResultTable + from rich_display import stdout_console + except Exception: + return False + + artist_title = chosen[0].get("title") or "artist" + artist_url = chosen[0].get("url") or "" + + try: + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + page = browser.new_page() + discography = self._scrape_artist_page(page, artist_url, limit=50) + browser.close() + except Exception as exc: + print(f"bandcamp artist lookup failed: {exc}\n") + return True + + table = ResultTable(f"Bandcamp: artist:{artist_title}").set_preserve_order(True) + table.set_table("bandcamp") + try: + table.set_value_case("lower") + except Exception: + pass + + results_payload: List[Dict[str, Any]] = [] + for r in discography: + table.add_result(r) + try: + results_payload.append(r.to_dict()) + except Exception: + results_payload.append({"table": "bandcamp", "title": getattr(r, "title", ""), "path": getattr(r, "path", "")}) + + try: + ctx.set_last_result_table(table, results_payload) + ctx.set_current_stage_table(table) + except Exception: + pass + + try: + stdout_console().print() + stdout_console().print(table) + except Exception: + pass + + return True + def search( self, query: str, @@ -73,6 +272,7 @@ class Bandcamp(Provider): title = link.inner_text().strip() target_url = link.get_attribute("href") + base_url = self._base_url(str(target_url or "")) subhead = item.query_selector(".subhead") artist = subhead.inner_text().strip() if subhead else "Unknown" @@ -89,13 +289,15 @@ class Bandcamp(Provider): annotations=[media_type], media_kind="audio", columns=[ - ("Name", title), - ("Artist", artist), + ("Title", title), + ("Location", artist), ("Type", media_type), + ("Url", base_url or str(target_url or "")), ], full_metadata={ "artist": artist, "type": media_type, + "url": base_url or str(target_url or ""), }, ) ) diff --git a/Provider/libgen.py b/Provider/libgen.py index 522f117..e6467da 100644 --- a/Provider/libgen.py +++ b/Provider/libgen.py @@ -175,42 +175,11 @@ class Libgen(Provider): elapsed = max(0.001, now - start_time) speed = downloaded / elapsed - eta_seconds = 0.0 - if total and total > 0 and speed > 0: - eta_seconds = max(0.0, float(total - downloaded) / float(speed)) - minutes, seconds = divmod(int(eta_seconds), 60) - hours, minutes = divmod(minutes, 60) - eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}" if total else "?:?:?" - speed_str = progress_bar.format_bytes(speed) + "/s" - - percent_str = None - if total and total > 0: - percent = (downloaded / total) * 100.0 - percent_str = f"{percent:.1f}%" - - line = progress_bar.format_progress( - percent_str=percent_str, - downloaded=downloaded, - total=total, - speed_str=speed_str, - eta_str=eta_str, - ) - - # Prefix with filename for clarity when downloading multiple items. - if label: - line = f"{label} {line}" - - if getattr(sys.stderr, "isatty", lambda: True)(): - sys.stderr.write("\r" + line + " ") - sys.stderr.flush() + progress_bar.update(downloaded=downloaded, total=total, label=str(label or "download"), file=sys.stderr) last_progress_time[0] = now ok, final_path = download_from_mirror(target, out_path, progress_callback=progress_callback) - # Clear the in-place progress line. - if getattr(sys.stderr, "isatty", lambda: True)(): - sys.stderr.write("\r" + (" " * 180) + "\r") - sys.stderr.write("\n") - sys.stderr.flush() + progress_bar.finish() if ok and final_path: return Path(final_path) return None diff --git a/Provider/soulseek.py b/Provider/soulseek.py index fb340bc..65abfb8 100644 --- a/Provider/soulseek.py +++ b/Provider/soulseek.py @@ -584,48 +584,19 @@ async def download_soulseek_file( log(f"[soulseek] Download timeout after {timeout}s", file=sys.stderr) bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0) state_val = getattr(getattr(transfer, "state", None), "VALUE", None) - try: - if getattr(sys.stderr, "isatty", lambda: False)(): - sys.stderr.write("\r" + (" " * 140) + "\r") - sys.stderr.flush() - except Exception: - pass + progress_bar.finish() return None, state_val, bytes_done, elapsed bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0) total_bytes = int(getattr(transfer, "filesize", 0) or 0) now = time.time() if now - last_progress_time >= 0.5: - percent = (bytes_done / total_bytes) * 100.0 if total_bytes > 0 else 0.0 - speed = bytes_done / elapsed if elapsed > 0 else 0.0 - eta_str: Optional[str] = None - if total_bytes > 0 and speed > 0: - try: - eta_seconds = max(0.0, float(total_bytes - bytes_done) / float(speed)) - minutes, seconds = divmod(int(eta_seconds), 60) - hours, minutes = divmod(minutes, 60) - eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}" - except Exception: - eta_str = None - - speed_str = progress_bar.format_bytes(speed) + "/s" - progress_line = progress_bar.format_progress( - percent_str=f"{percent:.1f}%", + progress_bar.update( downloaded=bytes_done, total=total_bytes if total_bytes > 0 else None, - speed_str=speed_str, - eta_str=eta_str, + label="download", + file=sys.stderr, ) - - try: - if getattr(sys.stderr, "isatty", lambda: False)(): - sys.stderr.write("\r" + progress_line + " ") - sys.stderr.flush() - else: - log(progress_line, file=sys.stderr) - except Exception: - pass - last_progress_time = now await asyncio.sleep(1) @@ -635,12 +606,7 @@ async def download_soulseek_file( final_elapsed = time.time() - start_time # Clear in-place progress bar. - try: - if getattr(sys.stderr, "isatty", lambda: False)(): - sys.stderr.write("\r" + (" " * 140) + "\r") - sys.stderr.flush() - except Exception: - pass + progress_bar.finish() # If a file was written, treat it as success even if state is odd. try: diff --git a/Provider/telegram.py b/Provider/telegram.py index ec0e094..fc97e2a 100644 --- a/Provider/telegram.py +++ b/Provider/telegram.py @@ -467,27 +467,16 @@ class Telegram(Provider): pass # Progress callback: prints to stderr so it doesn't interfere with pipeline stdout. + from models import ProgressBar + progress_bar = ProgressBar() last_print = {"t": 0.0} def _progress(current: int, total: int) -> None: - try: - now = time.monotonic() - # Throttle to avoid spamming. - if now - float(last_print.get("t", 0.0)) < 0.25 and current < total: - return - last_print["t"] = now - - pct = "" - try: - if total and total > 0: - pct = f" {min(100.0, (current / total) * 100.0):5.1f}%" - except Exception: - pct = "" - - line = f"[telegram] Downloading{pct} ({_format_bytes(current)}/{_format_bytes(total)})" - sys.stderr.write("\r" + line) - sys.stderr.flush() - except Exception: + now = time.monotonic() + # Throttle to avoid spamming. + if now - float(last_print.get("t", 0.0)) < 0.25 and current < total: return + last_print["t"] = now + progress_bar.update(downloaded=int(current), total=int(total), label="telegram", file=sys.stderr) part_kb = self._resolve_part_size_kb(file_size) try: @@ -502,11 +491,7 @@ class Telegram(Provider): except TypeError: # Older/newer Telethon versions may not accept part_size_kb on download_media. downloaded = _resolve(client.download_media(message, file=str(output_dir), progress_callback=_progress)) - try: - sys.stderr.write("\n") - sys.stderr.flush() - except Exception: - pass + progress_bar.finish() if not downloaded: raise Exception("Telegram download returned no file") downloaded_path = Path(str(downloaded)) diff --git a/ProviderCore/download.py b/ProviderCore/download.py index 3cb68ed..27d616a 100644 --- a/ProviderCore/download.py +++ b/ProviderCore/download.py @@ -2,9 +2,12 @@ from __future__ import annotations from pathlib import Path from typing import Optional +import sys import requests +from models import ProgressBar + def sanitize_filename(name: str, *, max_len: int = 150) -> str: text = str(name or "").strip() @@ -25,15 +28,45 @@ def download_file(url: str, output_path: Path, *, session: Optional[requests.Ses s = session or requests.Session() + bar = ProgressBar() + downloaded = 0 + total = None + try: with s.get(url, stream=True, timeout=timeout_s) as resp: resp.raise_for_status() + try: + total_val = int(resp.headers.get("content-length") or 0) + total = total_val if total_val > 0 else None + except Exception: + total = None + + # Render once immediately so fast downloads still show something. + try: + bar.update(downloaded=0, total=total, label=str(output_path.name or "download"), file=sys.stderr) + except Exception: + pass + with open(output_path, "wb") as f: for chunk in resp.iter_content(chunk_size=1024 * 256): if chunk: f.write(chunk) + downloaded += len(chunk) + try: + bar.update(downloaded=downloaded, total=total, label=str(output_path.name or "download"), file=sys.stderr) + except Exception: + pass + + try: + bar.finish() + except Exception: + pass return output_path.exists() and output_path.stat().st_size > 0 except Exception: + try: + bar.finish() + except Exception: + pass try: if output_path.exists(): output_path.unlink() diff --git a/SYS/download.py b/SYS/download.py index 990c340..f508ad8 100644 --- a/SYS/download.py +++ b/SYS/download.py @@ -44,6 +44,7 @@ except ImportError: extract_ytdlp_tags = None _EXTRACTOR_CACHE: List[Any] | None = None +_YTDLP_PROGRESS = ProgressBar() def _ensure_yt_dlp_ready() -> None: @@ -58,14 +59,16 @@ def _progress_callback(status: Dict[str, Any]) -> None: """Simple progress callback using logger.""" event = status.get("status") if event == "downloading": - percent = status.get("_percent_str", "?") - speed = status.get("_speed_str", "?") - eta = status.get("_eta_str", "?") - sys.stdout.write(f"\r[download] {percent} at {speed} ETA {eta} ") - sys.stdout.flush() + downloaded = status.get("downloaded_bytes") + total = status.get("total_bytes") or status.get("total_bytes_estimate") + _YTDLP_PROGRESS.update( + downloaded=int(downloaded or 0), + total=int(total) if total else None, + label="download", + file=sys.stderr, + ) elif event == "finished": - sys.stdout.write("\r" + " " * 70 + "\r") - sys.stdout.flush() + _YTDLP_PROGRESS.finish() debug(f"✓ Download finished: {status.get('filename')}") elif event in ("postprocessing", "processing"): debug(f"Post-processing: {status.get('postprocessor')}") @@ -632,13 +635,17 @@ def _download_direct_file( downloaded_bytes = [0] total_bytes = [0] last_progress_time = [start_time] + rendered_once = [False] def progress_callback(bytes_downloaded: int, content_length: int) -> None: downloaded_bytes[0] = bytes_downloaded total_bytes[0] = content_length now = time.time() - if now - last_progress_time[0] < 0.5: + is_final = bool(content_length > 0 and bytes_downloaded >= content_length) + if (not rendered_once[0]) or is_final: + pass + elif now - last_progress_time[0] < 0.5: return elapsed = now - start_time @@ -654,26 +661,14 @@ def _download_direct_file( except Exception: eta_str = None - speed_str = progress_bar.format_bytes(speed) + "/s" - - progress_line = progress_bar.format_progress( - percent_str=f"{percent:.1f}%", + progress_bar.update( downloaded=bytes_downloaded, total=content_length if content_length > 0 else None, - speed_str=speed_str, - eta_str=eta_str, + label=str(filename or "download"), + file=sys.stderr, ) - if not quiet: - try: - if getattr(sys.stderr, "isatty", lambda: False)(): - sys.stderr.write("\r" + progress_line + " ") - sys.stderr.flush() - else: - # Non-interactive: print occasional progress lines. - log(progress_line, file=sys.stderr) - except Exception: - pass + rendered_once[0] = True last_progress_time[0] = now @@ -681,14 +676,7 @@ def _download_direct_file( client.download(url, str(file_path), progress_callback=progress_callback) elapsed = time.time() - start_time - # Clear in-place progress bar. - if not quiet: - try: - if getattr(sys.stderr, "isatty", lambda: False)(): - sys.stderr.write("\r" + (" " * 140) + "\r") - sys.stderr.flush() - except Exception: - pass + progress_bar.finish() avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s" if not quiet: debug(f"✓ Downloaded in {elapsed:.1f}s at {avg_speed_str}") @@ -742,6 +730,10 @@ def _download_direct_file( ) except (httpx.HTTPError, httpx.RequestError) as exc: + try: + progress_bar.finish() + except Exception: + pass log(f"Download error: {exc}", file=sys.stderr) if debug_logger is not None: debug_logger.write_record( @@ -750,6 +742,10 @@ def _download_direct_file( ) raise DownloadError(f"Failed to download {url}: {exc}") from exc except Exception as exc: + try: + progress_bar.finish() + except Exception: + pass log(f"Error downloading file: {exc}", file=sys.stderr) if debug_logger is not None: debug_logger.write_record( diff --git a/SYS/logger.py b/SYS/logger.py index b41b423..02894d0 100644 --- a/SYS/logger.py +++ b/SYS/logger.py @@ -5,6 +5,8 @@ import inspect import threading from pathlib import Path +from rich_display import console_for + _DEBUG_ENABLED = False _thread_local = threading.local() @@ -56,6 +58,80 @@ def debug(*args, **kwargs) -> None: # Use the same logic as log() log(*args, **kwargs) + +def debug_inspect( + obj, + *, + title: str | None = None, + file=None, + methods: bool = False, + docs: bool = False, + private: bool = False, + dunder: bool = False, + sort: bool = True, + all: bool = False, + value: bool = True, +) -> None: + """Rich-inspect an object when debug logging is enabled. + + Uses the same stream / quiet-mode behavior as `debug()` and prepends a + `[file.function]` prefix when debug is enabled. + """ + if not _DEBUG_ENABLED: + return + + # Mirror debug() quiet-mode guard. + try: + stderr_name = getattr(sys.stderr, "name", "") + if "nul" in str(stderr_name).lower() or "/dev/null" in str(stderr_name): + return + except Exception: + pass + + # Resolve destination stream. + stream = get_thread_stream() + if stream is not None: + file = stream + elif file is None: + file = sys.stderr + + # Compute caller prefix (same as log()). + prefix = None + frame = inspect.currentframe() + if frame is not None and frame.f_back is not None: + caller_frame = frame.f_back + try: + file_name = Path(caller_frame.f_code.co_filename).stem + func_name = caller_frame.f_code.co_name + prefix = f"[{file_name}.{func_name}]" + finally: + del caller_frame + if frame is not None: + del frame + + # Render. + from rich import inspect as rich_inspect + + console = console_for(file) + # If the caller provides a title, treat it as authoritative. + # Only fall back to the automatic [file.func] prefix when no title is supplied. + effective_title = title + if not effective_title and prefix: + effective_title = prefix + + rich_inspect( + obj, + console=console, + title=effective_title, + methods=methods, + docs=docs, + private=private, + dunder=dunder, + sort=sort, + all=all, + value=value, + ) + def log(*args, **kwargs) -> None: """Print with automatic file.function prefix. @@ -71,12 +147,18 @@ def log(*args, **kwargs) -> None: # Get the calling frame frame = inspect.currentframe() if frame is None: - print(*args, **kwargs) + file = kwargs.pop("file", sys.stdout) + sep = kwargs.pop("sep", " ") + end = kwargs.pop("end", "\n") + console_for(file).print(*args, sep=sep, end=end) return caller_frame = frame.f_back if caller_frame is None: - print(*args, **kwargs) + file = kwargs.pop("file", sys.stdout) + sep = kwargs.pop("sep", " ") + end = kwargs.pop("end", "\n") + console_for(file).print(*args, sep=sep, end=end) return try: @@ -93,12 +175,15 @@ def log(*args, **kwargs) -> None: # Set default to stdout if not specified elif 'file' not in kwargs: kwargs['file'] = sys.stdout - + + file = kwargs.pop("file", sys.stdout) + sep = kwargs.pop("sep", " ") + end = kwargs.pop("end", "\n") if add_prefix: prefix = f"[{file_name}.{func_name}]" - print(prefix, *args, **kwargs) + console_for(file).print(prefix, *args, sep=sep, end=end) else: - print(*args, **kwargs) + console_for(file).print(*args, sep=sep, end=end) finally: del frame del caller_frame diff --git a/SYS/progress.py b/SYS/progress.py index 121ddc1..909ff68 100644 --- a/SYS/progress.py +++ b/SYS/progress.py @@ -1,102 +1,22 @@ -#!/usr/bin/env python3 -"""Text-based progress bar utilities for consistent display across all downloads.""" +"""Rich-only progress helpers. + +These functions preserve the legacy call signatures used around the codebase, +but all rendering is performed via Rich (no ASCII progress bars). +""" + +from __future__ import annotations import sys -from SYS.logger import log +from models import ProgressBar -def format_progress_bar(current: int, total: int, width: int = 40, label: str = "") -> str: - """Create a text-based progress bar. - - Args: - current: Current progress (bytes/items) - total: Total to complete (bytes/items) - width: Width of the bar in characters (default 40) - label: Optional label prefix - - Returns: - Formatted progress bar string - - Examples: - format_progress_bar(50, 100) - # Returns: "[████████████████░░░░░░░░░░░░░░░░░░░░] 50.0%" - - format_progress_bar(256*1024*1024, 1024*1024*1024, label="download.zip") - # Returns: "download.zip: [████████░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░] 25.0%" - """ - if total <= 0: - percentage = 0 - filled = 0 - else: - percentage = (current / total) * 100 - filled = int((current / total) * width) - - bar = "█" * filled + "░" * (width - filled) - pct_str = f"{percentage:.1f}%" - - if label: - result = f"{label}: [{bar}] {pct_str}" - else: - result = f"[{bar}] {pct_str}" - - return result - - -def format_size(bytes_val: float) -> str: - """Format bytes to human-readable size.""" - for unit in ['B', 'KB', 'MB', 'GB', 'TB']: - if bytes_val < 1024: - return f"{bytes_val:.2f} {unit}" - bytes_val /= 1024 - return f"{bytes_val:.2f} PB" - - -def format_download_status(filename: str, current: int, total: int, speed: float = 0) -> str: - """Format download status with progress bar and details.""" - bar = format_progress_bar(current, total, width=30) - size_current = format_size(current) - size_total = format_size(total) - - if speed > 0: - speed_str = f" @ {format_size(speed)}/s" - else: - speed_str = "" - - return f"{bar} ({size_current} / {size_total}{speed_str})" +_BAR = ProgressBar() def print_progress(filename: str, current: int, total: int, speed: float = 0, end: str = "\r") -> None: - """Print download progress to stderr (doesn't interfere with piped output).""" - status = format_download_status(filename, current, total, speed) - print(status, file=sys.stderr, end=end, flush=True) + _BAR.update(downloaded=int(current), total=int(total) if total else None, label=str(filename or "progress"), file=sys.stderr) def print_final_progress(filename: str, total: int, elapsed: float) -> None: - """Print final progress line (100%) with time elapsed.""" - bar = format_progress_bar(total, total, width=30) - size_str = format_size(total) - - if elapsed < 60: - time_str = f"{elapsed:.1f}s" - elif elapsed < 3600: - minutes = elapsed / 60 - time_str = f"{minutes:.1f}m" - else: - hours = elapsed / 3600 - time_str = f"{hours:.2f}h" - - print(f"{bar} ({size_str}) - {time_str}", file=sys.stderr, flush=True) - - -if __name__ == "__main__": - import time - - log("Progress Bar Demo:", file=sys.stderr) - - for i in range(101): - print_progress("demo.bin", i * 10 * 1024 * 1024, 1024 * 1024 * 1024) - time.sleep(0.02) - - print_final_progress("demo.bin", 1024 * 1024 * 1024, 2.0) - log() + _BAR.finish() diff --git a/SYS/utils.py b/SYS/utils.py index 17db8d3..0cac422 100644 --- a/SYS/utils.py +++ b/SYS/utils.py @@ -124,7 +124,7 @@ def create_tags_sidecar(file_path: Path, tags: set) -> None: try: with open(tags_path, 'w', encoding='utf-8') as f: for tag in sorted(tags): - f.write(f"{tag}\n") + f.write(f"{str(tag).strip().lower()}\n") except Exception as e: raise RuntimeError(f"Failed to create tags sidecar {tags_path}: {e}") from e diff --git a/Store/Folder.py b/Store/Folder.py index f41d9f2..d9a9ffb 100644 --- a/Store/Folder.py +++ b/Store/Folder.py @@ -452,7 +452,44 @@ class Folder(Store): query = query.lower() query_lower = query # Ensure query_lower is defined for all code paths - match_all = query == "*" + + def _normalize_ext_filter(value: str) -> str: + v = str(value or "").strip().lower().lstrip('.') + v = "".join(ch for ch in v if ch.isalnum()) + return v + + def _extract_system_filetype_ext(text: str) -> Optional[str]: + # Match: system:filetype = png (allow optional '=' and flexible spaces) + m = re.search(r"\bsystem:filetype\s*(?:=\s*)?([^\s,]+)", text) + if not m: + m = re.search(r"\bsystem:filetype\s*=\s*([^\s,]+)", text) + if not m: + return None + return _normalize_ext_filter(m.group(1)) or None + + # Support `ext:` and Hydrus-style `system:filetype = ` anywhere + # in the query (space or comma separated). + ext_filter: Optional[str] = None + try: + sys_ext = _extract_system_filetype_ext(query_lower) + if sys_ext: + ext_filter = sys_ext + query_lower = re.sub(r"\s*\bsystem:filetype\s*(?:=\s*)?[^\s,]+", " ", query_lower) + query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(',') + query = query_lower + + m = re.search(r"\bext:([^\s,]+)", query_lower) + if not m: + m = re.search(r"\bextension:([^\s,]+)", query_lower) + if m: + ext_filter = _normalize_ext_filter(m.group(1)) or None + query_lower = re.sub(r"\s*\b(?:ext|extension):[^\s,]+", " ", query_lower) + query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(',') + query = query_lower + except Exception: + ext_filter = None + + match_all = query == "*" or (not query and bool(ext_filter)) results = [] search_dir = Path(self._location).expanduser() @@ -518,6 +555,41 @@ class Folder(Store): try: with DatabaseAPI(search_dir) as api: + ext_hashes: set[str] | None = None + if ext_filter: + # Fetch a bounded set of hashes to intersect with other filters. + ext_fetch_limit = (limit or 45) * 50 + ext_hashes = api.get_file_hashes_by_ext(ext_filter, limit=ext_fetch_limit) + + # ext-only search: query is empty (or coerced to match_all above). + if ext_filter and (not query_lower or query_lower == "*"): + rows = api.get_files_by_ext(ext_filter, limit) + for file_hash, file_path_str, size_bytes, ext in rows: + if not file_path_str: + continue + file_path = Path(file_path_str) + if not file_path.exists(): + continue + if size_bytes is None: + try: + size_bytes = file_path.stat().st_size + except OSError: + size_bytes = None + tags = api.get_tags_for_file(file_hash) + entry = _create_entry(file_path, tags, size_bytes, file_hash) + try: + db_ext = str(ext or "").strip().lstrip('.') + if db_ext: + entry["ext"] = db_ext + except Exception: + pass + results.append(entry) + if limit is not None and len(results) >= limit: + return results + backend_label = str(getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder") + debug(f"[folder:{backend_label}] {len(results)} result(s)") + return results + if tokens and len(tokens) > 1: url_fetch_limit = (limit or 45) * 50 @@ -546,6 +618,22 @@ class Folder(Store): return api.get_file_hashes_with_any_url(limit=url_fetch_limit) return api.get_file_hashes_by_url_like(_url_like_pattern(pattern), limit=url_fetch_limit) + if namespace == 'system': + # Hydrus-compatible query: system:filetype = png + m_ft = re.match(r"^filetype\s*(?:=\s*)?(.+)$", pattern) + if m_ft: + normalized_ext = _normalize_ext_filter(m_ft.group(1)) + if not normalized_ext: + return set() + return api.get_file_hashes_by_ext(normalized_ext, limit=url_fetch_limit) + return set() + + if namespace in {'ext', 'extension'}: + normalized_ext = _normalize_ext_filter(pattern) + if not normalized_ext: + return set() + return api.get_file_hashes_by_ext(normalized_ext, limit=url_fetch_limit) + if namespace == 'store': if pattern not in {'local', 'file', 'filesystem'}: return set() @@ -579,6 +667,11 @@ class Folder(Store): if not matching_hashes: return results + if ext_hashes is not None: + matching_hashes = (matching_hashes or set()) & ext_hashes + if not matching_hashes: + return results + if not matching_hashes: return results @@ -596,6 +689,12 @@ class Folder(Store): size_bytes = None tags = api.get_tags_for_file(file_hash) entry = _create_entry(file_path, tags, size_bytes, file_hash) + try: + db_ext = str(ext or "").strip().lstrip('.') + if db_ext: + entry["ext"] = db_ext + except Exception: + pass results.append(entry) if limit is not None and len(results) >= limit: return results @@ -631,6 +730,12 @@ class Folder(Store): size_bytes = None tags = api.get_tags_for_file(file_hash) entry = _create_entry(file_path, tags, size_bytes, file_hash) + try: + db_ext = str(ext or "").strip().lstrip('.') + if db_ext: + entry["ext"] = db_ext + except Exception: + pass results.append(entry) if limit is not None and len(results) >= limit: return results @@ -658,6 +763,67 @@ class Folder(Store): if limit is not None and len(results) >= limit: return results return results + + if namespace == "system": + # Hydrus-compatible query: system:filetype = png + m_ft = re.match(r"^filetype\s*(?:=\s*)?(.+)$", pattern) + if m_ft: + normalized_ext = _normalize_ext_filter(m_ft.group(1)) + if not normalized_ext: + return results + rows = api.get_files_by_ext(normalized_ext, limit) + for file_hash, file_path_str, size_bytes, ext in rows: + if not file_path_str: + continue + file_path = Path(file_path_str) + if not file_path.exists(): + continue + if size_bytes is None: + try: + size_bytes = file_path.stat().st_size + except OSError: + size_bytes = None + tags = api.get_tags_for_file(file_hash) + entry = _create_entry(file_path, tags, size_bytes, file_hash) + try: + db_ext = str(ext or "").strip().lstrip('.') + if db_ext: + entry["ext"] = db_ext + except Exception: + pass + results.append(entry) + if limit is not None and len(results) >= limit: + return results + return results + + if namespace in {"ext", "extension"}: + normalized_ext = _normalize_ext_filter(pattern) + if not normalized_ext: + return results + rows = api.get_files_by_ext(normalized_ext, limit) + for file_hash, file_path_str, size_bytes, ext in rows: + if not file_path_str: + continue + file_path = Path(file_path_str) + if not file_path.exists(): + continue + if size_bytes is None: + try: + size_bytes = file_path.stat().st_size + except OSError: + size_bytes = None + tags = api.get_tags_for_file(file_hash) + entry = _create_entry(file_path, tags, size_bytes, file_hash) + try: + db_ext = str(ext or "").strip().lstrip('.') + if db_ext: + entry["ext"] = db_ext + except Exception: + pass + results.append(entry) + if limit is not None and len(results) >= limit: + return results + return results query_pattern = f"{namespace}:%" rows = api.get_files_by_namespace_pattern(query_pattern, limit) @@ -674,12 +840,20 @@ class Folder(Store): if tag_lower.startswith(f"{namespace}:"): value = tag_lower[len(namespace)+1:] if fnmatch(value, pattern): + if ext_hashes is not None and file_hash not in ext_hashes: + break file_path = Path(file_path_str) if file_path.exists(): if size_bytes is None: size_bytes = file_path.stat().st_size all_tags = api.get_tags_for_file(file_hash) entry = _create_entry(file_path, all_tags, size_bytes, file_hash) + try: + db_ext = str(ext or "").strip().lstrip('.') + if db_ext: + entry["ext"] = db_ext + except Exception: + pass results.append(entry) else: debug(f"File missing on disk: {file_path}") @@ -703,6 +877,8 @@ class Folder(Store): for file_hash, file_path_str, size_bytes, ext in term_rows: if not file_path_str: continue + if ext_hashes is not None and file_hash not in ext_hashes: + continue entry = hits.get(file_hash) if entry: entry["count"] += 1 @@ -746,6 +922,8 @@ class Folder(Store): rows = api.get_all_files(limit) for file_hash, file_path_str, size_bytes, ext in rows: if file_path_str: + if ext_hashes is not None and file_hash not in ext_hashes: + continue file_path = Path(file_path_str) if file_path.exists(): if size_bytes is None: @@ -753,6 +931,12 @@ class Folder(Store): tags = api.get_tags_for_file(file_hash) entry = _create_entry(file_path, tags, size_bytes, file_hash) + try: + db_ext = str(ext or "").strip().lstrip('.') + if db_ext: + entry["ext"] = db_ext + except Exception: + pass results.append(entry) backend_label = str(getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder") @@ -896,7 +1080,7 @@ class Folder(Store): if db_tags: # Return actual store name instead of generic "local_db" store_name = self._name if self._name else "local" - return list(db_tags), store_name + return [str(t).strip().lower() for t in db_tags if isinstance(t, str) and t.strip()], store_name except Exception as exc: debug(f"Local DB lookup failed: {exc}") return [], "unknown" @@ -917,22 +1101,30 @@ class Folder(Store): try: with API_folder_store(Path(self._location)) as db: - # Get existing tags - existing_tags = list(db.get_tags(hash) or []) - original_tags_lower = {t.lower() for t in existing_tags} - - # Merge new tags, handling namespace overwrites - for new_tag in tag: - if ':' in new_tag: - namespace = new_tag.split(':', 1)[0] - # Remove existing tags in same namespace - existing_tags = [t for t in existing_tags if not t.startswith(namespace + ':')] - # Add new tag if not already present (case-insensitive check) - if new_tag.lower() not in original_tags_lower: - existing_tags.append(new_tag) - - # Save merged tags - db.add_tags_to_hash(hash, existing_tags) + existing_tags = [t for t in (db.get_tags(hash) or []) if isinstance(t, str) and t.strip()] + + from metadata import compute_namespaced_tag_overwrite + + _to_remove, _to_add, merged = compute_namespaced_tag_overwrite(existing_tags, tag or []) + if not _to_remove and not _to_add: + return True + + # Folder DB tag table is case-sensitive and add_tags_to_hash() is additive. + # To enforce lowercase-only tags and namespace overwrites, rewrite the full tag set. + cursor = db.connection.cursor() + cursor.execute("DELETE FROM tags WHERE hash = ?", (hash,)) + for t in merged: + t = str(t).strip().lower() + if t: + cursor.execute( + "INSERT OR IGNORE INTO tags (hash, tag) VALUES (?, ?)", + (hash, t), + ) + db.connection.commit() + try: + db._update_metadata_modified_time(hash) + except Exception: + pass return True except Exception as exc: debug(f"Local DB add_tags failed: {exc}") @@ -949,7 +1141,10 @@ class Folder(Store): if self._location: try: with API_folder_store(Path(self._location)) as db: - db.remove_tags_from_hash(file_hash, list(tags)) + tag_list = [str(t).strip().lower() for t in (tags or []) if isinstance(t, str) and str(t).strip()] + if not tag_list: + return True + db.remove_tags_from_hash(file_hash, tag_list) return True except Exception as exc: debug(f"Local DB remove_tags failed: {exc}") @@ -1006,6 +1201,130 @@ class Folder(Store): debug(f"add_url failed for local file: {exc}") return False + def add_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool: + """Add known urls to many local files in one DB session. + + This is a performance optimization used by cmdlets that receive many PipeObjects. + """ + from API.folder import API_folder_store + try: + if not self._location: + return False + + # Normalize + coalesce duplicates per hash. + try: + from metadata import normalize_urls + except Exception: + normalize_urls = None # type: ignore + + merged_by_hash: Dict[str, List[str]] = {} + for file_identifier, url_list in (items or []): + file_hash = str(file_identifier or "").strip().lower() + if not file_hash: + continue + + incoming: List[str] + if normalize_urls is not None: + try: + incoming = normalize_urls(url_list) + except Exception: + incoming = [str(u).strip() for u in (url_list or []) if str(u).strip()] + else: + incoming = [str(u).strip() for u in (url_list or []) if str(u).strip()] + + if not incoming: + continue + + existing = merged_by_hash.get(file_hash) or [] + for u in incoming: + if u and u not in existing: + existing.append(u) + merged_by_hash[file_hash] = existing + + if not merged_by_hash: + return True + + import json + + with API_folder_store(Path(self._location)) as db: + conn = getattr(db, "connection", None) + if conn is None: + return False + cursor = conn.cursor() + + # Ensure metadata rows exist (may be needed for older entries). + for file_hash in merged_by_hash.keys(): + try: + cursor.execute("INSERT OR IGNORE INTO metadata (hash) VALUES (?)", (file_hash,)) + except Exception: + continue + + # Load existing urls for all hashes in chunks. + existing_urls_by_hash: Dict[str, List[str]] = {h: [] for h in merged_by_hash.keys()} + hashes = list(merged_by_hash.keys()) + chunk_size = 400 + for i in range(0, len(hashes), chunk_size): + chunk = hashes[i : i + chunk_size] + if not chunk: + continue + placeholders = ",".join(["?"] * len(chunk)) + try: + cursor.execute(f"SELECT hash, url FROM metadata WHERE hash IN ({placeholders})", chunk) + rows = cursor.fetchall() or [] + except Exception: + rows = [] + + for row in rows: + try: + row_hash = str(row[0]).strip().lower() + except Exception: + continue + raw_urls = None + try: + raw_urls = row[1] + except Exception: + raw_urls = None + + parsed_urls: List[str] = [] + if raw_urls: + try: + parsed = json.loads(raw_urls) + if normalize_urls is not None: + parsed_urls = normalize_urls(parsed) + else: + if isinstance(parsed, list): + parsed_urls = [str(u).strip() for u in parsed if str(u).strip()] + except Exception: + parsed_urls = [] + + existing_urls_by_hash[row_hash] = parsed_urls + + # Compute updates and write in one commit. + updates: List[tuple[str, str]] = [] + for file_hash, incoming_urls in merged_by_hash.items(): + existing_urls = existing_urls_by_hash.get(file_hash) or [] + final = list(existing_urls) + for u in incoming_urls: + if u and u not in final: + final.append(u) + if final != existing_urls: + try: + updates.append((json.dumps(final), file_hash)) + except Exception: + continue + + if updates: + cursor.executemany( + "UPDATE metadata SET url = ?, time_modified = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP WHERE hash = ?", + updates, + ) + + conn.commit() + return True + except Exception as exc: + debug(f"add_url_bulk failed for local file: {exc}") + return False + def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: """Delete known url from a local file by hash.""" from API.folder import API_folder_store @@ -1031,6 +1350,119 @@ class Folder(Store): debug(f"delete_url failed for local file: {exc}") return False + def delete_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool: + """Delete known urls from many local files in one DB session.""" + from API.folder import API_folder_store + try: + if not self._location: + return False + + try: + from metadata import normalize_urls + except Exception: + normalize_urls = None # type: ignore + + remove_by_hash: Dict[str, set[str]] = {} + for file_identifier, url_list in (items or []): + file_hash = str(file_identifier or "").strip().lower() + if not file_hash: + continue + + incoming: List[str] + if normalize_urls is not None: + try: + incoming = normalize_urls(url_list) + except Exception: + incoming = [str(u).strip() for u in (url_list or []) if str(u).strip()] + else: + incoming = [str(u).strip() for u in (url_list or []) if str(u).strip()] + + remove = {u for u in incoming if u} + if not remove: + continue + remove_by_hash.setdefault(file_hash, set()).update(remove) + + if not remove_by_hash: + return True + + import json + + with API_folder_store(Path(self._location)) as db: + conn = getattr(db, "connection", None) + if conn is None: + return False + cursor = conn.cursor() + + # Ensure metadata rows exist. + for file_hash in remove_by_hash.keys(): + try: + cursor.execute("INSERT OR IGNORE INTO metadata (hash) VALUES (?)", (file_hash,)) + except Exception: + continue + + # Load existing urls for hashes in chunks. + existing_urls_by_hash: Dict[str, List[str]] = {h: [] for h in remove_by_hash.keys()} + hashes = list(remove_by_hash.keys()) + chunk_size = 400 + for i in range(0, len(hashes), chunk_size): + chunk = hashes[i : i + chunk_size] + if not chunk: + continue + placeholders = ",".join(["?"] * len(chunk)) + try: + cursor.execute(f"SELECT hash, url FROM metadata WHERE hash IN ({placeholders})", chunk) + rows = cursor.fetchall() or [] + except Exception: + rows = [] + + for row in rows: + try: + row_hash = str(row[0]).strip().lower() + except Exception: + continue + raw_urls = None + try: + raw_urls = row[1] + except Exception: + raw_urls = None + + parsed_urls: List[str] = [] + if raw_urls: + try: + parsed = json.loads(raw_urls) + if normalize_urls is not None: + parsed_urls = normalize_urls(parsed) + else: + if isinstance(parsed, list): + parsed_urls = [str(u).strip() for u in parsed if str(u).strip()] + except Exception: + parsed_urls = [] + + existing_urls_by_hash[row_hash] = parsed_urls + + # Apply removals + write updates. + updates: List[tuple[str, str]] = [] + for file_hash, remove_set in remove_by_hash.items(): + existing_urls = existing_urls_by_hash.get(file_hash) or [] + new_urls = [u for u in existing_urls if u not in remove_set] + if new_urls != existing_urls: + try: + updates.append((json.dumps(new_urls), file_hash)) + except Exception: + continue + + if updates: + cursor.executemany( + "UPDATE metadata SET url = ?, time_modified = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP WHERE hash = ?", + updates, + ) + + conn.commit() + return True + except Exception as exc: + debug(f"delete_url_bulk failed for local file: {exc}") + return False + def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]: """Get notes for a local file by hash.""" from API.folder import API_folder_store @@ -1077,6 +1509,94 @@ class Folder(Store): debug(f"set_note failed for local file: {exc}") return False + def set_note_bulk(self, items: List[tuple[str, str, str]], **kwargs: Any) -> bool: + """Set notes for many local files in one DB session. + + Preserves existing semantics by only setting notes for hashes that still + map to a file path that exists on disk. + """ + from API.folder import API_folder_store + try: + if not self._location: + return False + + # Normalize input. + normalized: List[tuple[str, str, str]] = [] + for file_identifier, name, text in (items or []): + file_hash = str(file_identifier or "").strip().lower() + note_name = str(name or "").strip() + note_text = str(text or "") + if not file_hash or not _normalize_hash(file_hash) or not note_name: + continue + normalized.append((file_hash, note_name, note_text)) + + if not normalized: + return True + + with API_folder_store(Path(self._location)) as db: + conn = getattr(db, "connection", None) + if conn is None: + return False + cursor = conn.cursor() + + # Look up file paths for hashes in chunks (to verify existence). + wanted_hashes = sorted({h for (h, _n, _t) in normalized}) + hash_to_path: Dict[str, str] = {} + chunk_size = 400 + for i in range(0, len(wanted_hashes), chunk_size): + chunk = wanted_hashes[i : i + chunk_size] + if not chunk: + continue + placeholders = ",".join(["?"] * len(chunk)) + try: + cursor.execute(f"SELECT hash, file_path FROM files WHERE hash IN ({placeholders})", chunk) + rows = cursor.fetchall() or [] + except Exception: + rows = [] + for row in rows: + try: + h = str(row[0]).strip().lower() + p = str(row[1]).strip() + except Exception: + continue + if h and p: + hash_to_path[h] = p + + # Ensure notes rows exist and only write for existing files. + inserts: List[tuple[str, str, str]] = [] + for h, note_name, note_text in normalized: + p = hash_to_path.get(h) + if not p: + continue + try: + if not Path(p).exists(): + continue + except Exception: + continue + inserts.append((h, note_name, note_text)) + + if not inserts: + return False + + # Prefer upsert when supported, else fall back to INSERT OR REPLACE. + try: + cursor.executemany( + "INSERT INTO notes (hash, name, note) VALUES (?, ?, ?) " + "ON CONFLICT(hash, name) DO UPDATE SET note = excluded.note, updated_at = CURRENT_TIMESTAMP", + inserts, + ) + except Exception: + cursor.executemany( + "INSERT OR REPLACE INTO notes (hash, name, note) VALUES (?, ?, ?)", + inserts, + ) + + conn.commit() + return True + except Exception as exc: + debug(f"set_note_bulk failed for local file: {exc}") + return False + def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool: """Delete a named note for a local file by hash.""" from API.folder import API_folder_store diff --git a/Store/HydrusNetwork.py b/Store/HydrusNetwork.py index cb9f78d..f93d4bf 100644 --- a/Store/HydrusNetwork.py +++ b/Store/HydrusNetwork.py @@ -217,10 +217,13 @@ class HydrusNetwork(Store): # Add title to tags if provided and not already present if title: - title_tag = f"title:{title}" + title_tag = f"title:{title}".strip().lower() if not any(str(candidate).lower().startswith("title:") for candidate in tag_list): tag_list = [title_tag] + list(tag_list) + # Hydrus is lowercase-only tags; normalize here for consistency. + tag_list = [str(t).strip().lower() for t in (tag_list or []) if isinstance(t, str) and str(t).strip()] + try: # Compute file hash file_hash = sha256_file(file_path) @@ -445,6 +448,36 @@ class HydrusNetwork(Store): query_lower = query.lower().strip() + # Support `ext:` anywhere in the query. We filter results by the + # Hydrus metadata extension field. + def _normalize_ext_filter(value: str) -> str: + v = str(value or "").strip().lower().lstrip('.') + v = "".join(ch for ch in v if ch.isalnum()) + return v + + ext_filter: str | None = None + ext_only: bool = False + try: + m = re.search(r"\bext:([^\s,]+)", query_lower) + if not m: + m = re.search(r"\bextension:([^\s,]+)", query_lower) + if m: + ext_filter = _normalize_ext_filter(m.group(1)) or None + query_lower = re.sub(r"\s*\b(?:ext|extension):[^\s,]+", " ", query_lower) + query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(',') + query = query_lower + if ext_filter and not query_lower: + query = "*" + query_lower = "*" + ext_only = True + except Exception: + ext_filter = None + ext_only = False + + # Split into meaningful terms for AND logic. + # Avoid punctuation tokens like '-' that would make matching brittle. + search_terms = [t for t in re.findall(r"[a-z0-9]+", query_lower) if t] + # Special case: url:* and url: metadata_list: list[dict[str, Any]] | None = None if ":" in query_lower and not query_lower.startswith(":"): @@ -508,54 +541,268 @@ class HydrusNetwork(Store): metadata_list = _iter_url_filtered_metadata(pattern, want_any=False, fetch_limit=int(limit) if limit else 100) # Parse the query into tags - # Handle both simple tags and complex queries # "*" means "match all" - use system:everything tag in Hydrus + # If query has explicit namespace, use it as a tag search. + # If query is free-form, search BOTH: + # - title:*term* (title: is the only namespace searched implicitly) + # - *term* (freeform tags; we will filter out other namespace matches client-side) + tags: list[str] = [] + freeform_union_search: bool = False + title_predicates: list[str] = [] + freeform_predicates: list[str] = [] + if query.strip() == "*": - # Use system:everything to match all files in Hydrus tags = ["system:everything"] + elif ':' in query_lower: + tags = [query_lower] else: - # If query doesn't have a namespace (no ':'), search all files and filter by title/tags - # If query has explicit namespace, use it as a tag search - if ':' not in query_lower: - # No namespace provided: search all files, then filter by title/tags containing the query - tags = ["system:everything"] + freeform_union_search = True + if search_terms: + # Hydrus supports wildcard matching primarily as a prefix (e.g., tag*). + # Use per-term prefix matching for both title: and freeform tags. + title_predicates = [f"title:{term}*" for term in search_terms] + freeform_predicates = [f"{term}*" for term in search_terms] else: - # User provided explicit namespace (e.g., "creator:john" or "system:has_audio") - # Use it as a tag search - tags = [query_lower] - - if not tags: - debug(f"{prefix} 0 result(s)") - return [] + # If we can't extract alnum terms, fall back to the raw query text. + title_predicates = [f"title:{query_lower}*"] + freeform_predicates = [f"{query_lower}*"] # Search files with the tags (unless url: search already produced metadata) results = [] - # Split by comma or space for AND logic - search_terms = set(query_lower.replace(',', ' ').split()) # For substring matching + + def _extract_search_ids(payload: Any) -> tuple[list[int], list[str]]: + if not isinstance(payload, dict): + return [], [] + raw_ids = payload.get("file_ids", []) + raw_hashes = payload.get("hashes", []) + ids_out: list[int] = [] + hashes_out: list[str] = [] + if isinstance(raw_ids, list): + for item in raw_ids: + try: + ids_out.append(int(item)) + except (TypeError, ValueError): + continue + if isinstance(raw_hashes, list): + hashes_out = [str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()] + return ids_out, hashes_out if metadata_list is None: - search_result = client.search_files( - tags=tags, - return_hashes=True, - return_file_ids=True - ) + file_ids: list[int] = [] + hashes: list[str] = [] - file_ids = search_result.get("file_ids", []) if isinstance(search_result, dict) else [] - hashes = search_result.get("hashes", []) if isinstance(search_result, dict) else [] + if freeform_union_search: + if not title_predicates and not freeform_predicates: + debug(f"{prefix} 0 result(s)") + return [] + + payloads: list[Any] = [] + try: + payloads.append( + client.search_files( + tags=title_predicates, + return_hashes=True, + return_file_ids=True, + ) + ) + except Exception: + pass + + try: + payloads.append( + client.search_files( + tags=freeform_predicates, + return_hashes=True, + return_file_ids=True, + ) + ) + except Exception: + pass + + id_set: set[int] = set() + hash_set: set[str] = set() + for payload in payloads: + ids_part, hashes_part = _extract_search_ids(payload) + for fid in ids_part: + id_set.add(fid) + for hh in hashes_part: + hash_set.add(hh) + file_ids = list(id_set) + hashes = list(hash_set) + else: + if not tags: + debug(f"{prefix} 0 result(s)") + return [] + + search_result = client.search_files( + tags=tags, + return_hashes=True, + return_file_ids=True + ) + file_ids, hashes = _extract_search_ids(search_result) + + # Fast path: ext-only search. Avoid fetching metadata for an unbounded + # system:everything result set; fetch in chunks until we have enough. + if ext_only and ext_filter: + results: list[dict[str, Any]] = [] + if not file_ids and not hashes: + debug(f"{prefix} 0 result(s)") + return [] + + # Prefer file_ids if available. + if file_ids: + chunk_size = 200 + for start in range(0, len(file_ids), chunk_size): + if len(results) >= limit: + break + chunk = file_ids[start : start + chunk_size] + try: + payload = client.fetch_file_metadata( + file_ids=chunk, + include_service_keys_to_tags=True, + include_file_url=False, + include_duration=True, + include_size=True, + include_mime=True, + ) + except Exception: + continue + metas = payload.get("metadata", []) if isinstance(payload, dict) else [] + if not isinstance(metas, list): + continue + for meta in metas: + if len(results) >= limit: + break + if not isinstance(meta, dict): + continue + mime_type = meta.get("mime") + ext = str(meta.get("ext") or "").strip().lstrip('.') + if not ext and mime_type: + for category in mime_maps.values(): + for _ext_key, info in category.items(): + if mime_type in info.get("mimes", []): + ext = str(info.get("ext", "")).strip().lstrip('.') + break + if ext: + break + if _normalize_ext_filter(ext) != ext_filter: + continue + + file_id = meta.get("file_id") + hash_hex = meta.get("hash") + size = meta.get("size", 0) + + tags_set = meta.get("tags", {}) + all_tags: list[str] = [] + title = f"Hydrus File {file_id}" + if isinstance(tags_set, dict): + def _collect(tag_list: Any) -> None: + nonlocal title + if not isinstance(tag_list, list): + return + for tag in tag_list: + tag_text = str(tag) if tag else "" + if not tag_text: + continue + tag_l = tag_text.strip().lower() + if not tag_l: + continue + all_tags.append(tag_l) + if tag_l.startswith("title:") and title == f"Hydrus File {file_id}": + title = tag_l.split(":", 1)[1].strip() + + for _service_name, service_tags in tags_set.items(): + if not isinstance(service_tags, dict): + continue + storage_tags = service_tags.get("storage_tags", {}) + if isinstance(storage_tags, dict): + for tag_list in storage_tags.values(): + _collect(tag_list) + display_tags = service_tags.get("display_tags", []) + _collect(display_tags) + + file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}" + results.append( + { + "hash": hash_hex, + "url": file_url, + "name": title, + "title": title, + "size": size, + "size_bytes": size, + "store": self.NAME, + "tag": all_tags, + "file_id": file_id, + "mime": mime_type, + "ext": ext, + } + ) + + debug(f"{prefix} {len(results)} result(s)") + return results[:limit] + + # If we only got hashes, fall back to the normal flow below. if not file_ids and not hashes: debug(f"{prefix} 0 result(s)") return [] if file_ids: - metadata = client.fetch_file_metadata(file_ids=file_ids) + metadata = client.fetch_file_metadata( + file_ids=file_ids, + include_service_keys_to_tags=True, + include_file_url=False, + include_duration=True, + include_size=True, + include_mime=True, + ) metadata_list = metadata.get("metadata", []) elif hashes: - metadata = client.fetch_file_metadata(hashes=hashes) + metadata = client.fetch_file_metadata( + hashes=hashes, + include_service_keys_to_tags=True, + include_file_url=False, + include_duration=True, + include_size=True, + include_mime=True, + ) metadata_list = metadata.get("metadata", []) else: metadata_list = [] + # If our free-text searches produce nothing (or nothing survived downstream filtering), fallback to scanning. + if (not metadata_list) and (query_lower != "*") and (":" not in query_lower): + try: + search_result = client.search_files( + tags=["system:everything"], + return_hashes=True, + return_file_ids=True, + ) + file_ids, hashes = _extract_search_ids(search_result) + + if file_ids: + metadata = client.fetch_file_metadata( + file_ids=file_ids, + include_service_keys_to_tags=True, + include_file_url=False, + include_duration=True, + include_size=True, + include_mime=True, + ) + metadata_list = metadata.get("metadata", []) + elif hashes: + metadata = client.fetch_file_metadata( + hashes=hashes, + include_service_keys_to_tags=True, + include_file_url=False, + include_duration=True, + include_size=True, + include_mime=True, + ) + metadata_list = metadata.get("metadata", []) + except Exception: + pass + if not isinstance(metadata_list, list): metadata_list = [] @@ -585,10 +832,13 @@ class HydrusNetwork(Store): tag_text = str(tag) if tag else "" if not tag_text: continue - all_tags.append(tag_text) - all_tags_str += " " + tag_text.lower() - if tag_text.lower().startswith("title:") and title == f"Hydrus File {file_id}": - title = tag_text.split(":", 1)[1].strip() + tag_l = tag_text.strip().lower() + if not tag_l: + continue + all_tags.append(tag_l) + all_tags_str += " " + tag_l + if tag_l.startswith("title:") and title == f"Hydrus File {file_id}": + title = tag_l.split(":", 1)[1].strip() for _service_name, service_tags in tags_set.items(): if not isinstance(service_tags, dict): @@ -641,20 +891,15 @@ class HydrusNetwork(Store): "ext": ext, }) else: - # Free-form search: check if search terms match the title or tags - # Match if ALL search terms are found in title or tags (AND logic) - # AND use whole word matching - - # Combine title and tags for searching - searchable_text = (title + " " + all_tags_str).lower() - + # Free-form search: check if search terms match title or FREEFORM tags. + # Do NOT implicitly match other namespace tags (except title:). + freeform_tags = [t for t in all_tags if isinstance(t, str) and t and (":" not in t)] + searchable_text = (title + " " + " ".join(freeform_tags)).lower() + match = True - if query_lower != "*": + if query_lower != "*" and search_terms: for term in search_terms: - # Regex for whole word: \bterm\b - # Escape term to handle special chars - pattern = r'\b' + re.escape(term) + r'\b' - if not re.search(pattern, searchable_text): + if term not in searchable_text: match = False break @@ -675,6 +920,17 @@ class HydrusNetwork(Store): }) debug(f"{prefix} {len(results)} result(s)") + if ext_filter: + wanted = ext_filter + filtered: list[dict[str, Any]] = [] + for item in results: + try: + if _normalize_ext_filter(str(item.get("ext") or "")) == wanted: + filtered.append(item) + except Exception: + continue + results = filtered + return results[:limit] except Exception as exc: @@ -903,8 +1159,8 @@ class HydrusNetwork(Store): # Extract tags from metadata tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name) - - return tags, "hydrus" + + return [str(t).strip().lower() for t in tags if isinstance(t, str) and t.strip()], "hydrus" except Exception as exc: debug(f"{self._log_prefix()} get_tags failed: {exc}") @@ -924,12 +1180,38 @@ class HydrusNetwork(Store): debug(f"{self._log_prefix()} add_tag: invalid file hash '{file_identifier}'") return False service_name = kwargs.get("service_name") or "my tags" - # Ensure tags is a list - tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] - if not tag_list: - return False - client.add_tag(file_hash, tag_list, service_name) - return True + + incoming_tags = [str(t).strip().lower() for t in (tags or []) if isinstance(t, str) and str(t).strip()] + if not incoming_tags: + return True + + try: + existing_tags, _src = self.get_tag(file_hash) + except Exception: + existing_tags = [] + + from metadata import compute_namespaced_tag_overwrite + + tags_to_remove, tags_to_add, _merged = compute_namespaced_tag_overwrite(existing_tags, incoming_tags) + + if not tags_to_add and not tags_to_remove: + return True + + did_any = False + if tags_to_remove: + try: + client.delete_tag(file_hash, tags_to_remove, service_name) + did_any = True + except Exception as exc: + debug(f"{self._log_prefix()} add_tag: delete_tag failed: {exc}") + if tags_to_add: + try: + client.add_tag(file_hash, tags_to_add, service_name) + did_any = True + except Exception as exc: + debug(f"{self._log_prefix()} add_tag: add_tag failed: {exc}") + + return did_any except Exception as exc: debug(f"{self._log_prefix()} add_tag failed: {exc}") return False @@ -948,7 +1230,8 @@ class HydrusNetwork(Store): debug(f"{self._log_prefix()} delete_tag: invalid file hash '{file_identifier}'") return False service_name = kwargs.get("service_name") or "my tags" - tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] + raw_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] + tag_list = [str(t).strip().lower() for t in raw_list if isinstance(t, str) and str(t).strip()] if not tag_list: return False client.delete_tag(file_hash, tag_list, service_name) @@ -1014,6 +1297,38 @@ class HydrusNetwork(Store): debug(f"{self._log_prefix()} add_url failed: {exc}") return False + def add_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool: + """Bulk associate urls with Hydrus files. + + This is a best-effort convenience wrapper used by cmdlets to batch url associations. + Hydrus' client API is still called per (hash,url) pair, but this consolidates the + cmdlet-level control flow so url association can be deferred until the end. + """ + try: + client = self._client + if client is None: + debug(f"{self._log_prefix()} add_url_bulk: client unavailable") + return False + + any_success = False + for file_identifier, urls in (items or []): + h = str(file_identifier or "").strip().lower() + if len(h) != 64: + continue + for u in (urls or []): + s = str(u or "").strip() + if not s: + continue + try: + client.associate_url(h, s) + any_success = True + except Exception: + continue + return any_success + except Exception as exc: + debug(f"{self._log_prefix()} add_url_bulk failed: {exc}") + return False + def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: """Delete one or more url from a Hydrus file. """ diff --git a/Store/_base.py b/Store/_base.py index 0d6f137..22e24b6 100644 --- a/Store/_base.py +++ b/Store/_base.py @@ -50,6 +50,51 @@ class Store(ABC): def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: raise NotImplementedError + def add_url_bulk(self, items: List[Tuple[str, List[str]]], **kwargs: Any) -> bool: + """Optional bulk url association. + + Backends may override this to batch writes (single transaction / request). + Default behavior is to call add_url() per file. + """ + changed_any = False + for file_identifier, urls in (items or []): + try: + ok = self.add_url(file_identifier, urls, **kwargs) + changed_any = changed_any or bool(ok) + except Exception: + continue + return changed_any + + def delete_url_bulk(self, items: List[Tuple[str, List[str]]], **kwargs: Any) -> bool: + """Optional bulk url deletion. + + Backends may override this to batch writes (single transaction / request). + Default behavior is to call delete_url() per file. + """ + changed_any = False + for file_identifier, urls in (items or []): + try: + ok = self.delete_url(file_identifier, urls, **kwargs) + changed_any = changed_any or bool(ok) + except Exception: + continue + return changed_any + + def set_note_bulk(self, items: List[Tuple[str, str, str]], **kwargs: Any) -> bool: + """Optional bulk note set. + + Backends may override this to batch writes (single transaction / request). + Default behavior is to call set_note() per file. + """ + changed_any = False + for file_identifier, name, text in (items or []): + try: + ok = self.set_note(file_identifier, name, text, **kwargs) + changed_any = changed_any or bool(ok) + except Exception: + continue + return changed_any + @abstractmethod def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: raise NotImplementedError diff --git a/cmdlet/add_file.py b/cmdlet/add_file.py index 91bfd55..8a20233 100644 --- a/cmdlet/add_file.py +++ b/cmdlet/add_file.py @@ -109,6 +109,7 @@ class Add_File(Cmdlet): collected_payloads: List[Dict[str, Any]] = [] pending_relationship_pairs: Dict[str, set[tuple[str, str]]] = {} + pending_url_associations: Dict[str, List[tuple[str, List[str]]]] = {} successes = 0 failures = 0 @@ -118,6 +119,110 @@ class Add_File(Cmdlet): want_final_search_store = bool(is_last_stage) and bool(is_storage_backend_location) and bool(location) auto_search_store_after_add = False + # When ingesting multiple items into a backend store, defer URL association and + # apply it once at the end (bulk) to avoid per-item URL API calls. + defer_url_association = bool(is_storage_backend_location) and bool(location) and len(items_to_process) > 1 + + # If we are going to persist results (-store / -provider) and the piped input contains + # URL download targets (e.g. playlist rows), preflight URL duplicates once up-front. + # IMPORTANT: Do not treat a *source URL* on an already-local file (e.g. screen-shot) + # as a download target; that would trigger yt-dlp preflights for non-yt-dlp URLs. + skip_url_downloads: set[str] = set() + download_mode_hint: Optional[str] = None + forced_ytdl_format: Optional[str] = None + if (provider_name or location) and isinstance(items_to_process, list) and items_to_process: + url_candidates: List[str] = [] + for it in items_to_process: + try: + po_probe = coerce_to_pipe_object(it, path_arg) + except Exception: + continue + + # If the piped item already points at a local file, we are *ingesting* it, + # not downloading it. Skip URL-preflight and yt-dlp probing for those. + try: + po_path = getattr(po_probe, "path", None) + po_path_s = str(po_path or "").strip() + if po_path_s and not po_path_s.lower().startswith(("http://", "https://", "magnet:", "torrent:")): + continue + except Exception: + pass + + try: + for u in (self._get_url(it, po_probe) or []): + s = str(u or "").strip() + if not s: + continue + if s.lower().startswith(("http://", "https://", "magnet:", "torrent:")): + url_candidates.append(s) + except Exception: + continue + + # Only meaningful when targeting a registered backend store. + if url_candidates and is_storage_backend_location and location: + # De-dupe in-order to keep logs stable. + seen: set[str] = set() + unique_urls: List[str] = [] + for u in url_candidates: + if u in seen: + continue + seen.add(u) + unique_urls.append(u) + + try: + skip_url_downloads = self._preflight_url_duplicates_bulk(unique_urls, config) + except Exception: + skip_url_downloads = set() + + # Batch-level format preflight: + # - If the sample URL only has one available format, force it for the batch. + # - If the sample URL appears audio-only (no video codecs), prefer audio mode. + try: + from cmdlet.download_media import is_url_supported_by_ytdlp, list_formats + from tool.ytdlp import YtDlpTool + + sample_url = unique_urls[0] if unique_urls else None + if sample_url and is_url_supported_by_ytdlp(str(sample_url)): + cf = None + try: + cookie_path = YtDlpTool(config).resolve_cookiefile() + if cookie_path is not None and cookie_path.is_file(): + cf = str(cookie_path) + except Exception: + cf = None + + fmts = list_formats( + str(sample_url), + no_playlist=False, + playlist_items=None, + cookiefile=cf, + ) + + if isinstance(fmts, list) and fmts: + has_video = False + try: + for f in fmts: + if not isinstance(f, dict): + continue + vcodec = str(f.get("vcodec", "none") or "none").strip().lower() + if vcodec and vcodec != "none": + has_video = True + break + except Exception: + has_video = False + + download_mode_hint = "video" if has_video else "audio" + + if len(fmts) == 1 and isinstance(fmts[0], dict): + fid = str(fmts[0].get("format_id") or "").strip() + if fid: + forced_ytdl_format = fid + except Exception: + download_mode_hint = download_mode_hint + forced_ytdl_format = forced_ytdl_format + + processed_url_items: set[str] = set() + for item in items_to_process: pipe_obj = coerce_to_pipe_object(item, path_arg) @@ -244,7 +349,148 @@ class Add_File(Cmdlet): if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith( ("http://", "https://", "magnet:", "torrent:") ): - code = self._delegate_to_download_data(item, media_path_or_url, location, provider_name, args, config) + # If the user provided a destination (-store / -provider), download here and then + # continue normal add-file logic so the downloaded file is actually ingested. + url_str = str(media_path_or_url) + if (provider_name or location): + # Avoid re-processing the same URL multiple times in a batch. + if url_str in processed_url_items: + successes += 1 + continue + processed_url_items.add(url_str) + + # If bulk preflight found this URL already stored, skip downloading. + if url_str in skip_url_downloads: + log(f"Skipping download (already stored): {url_str}", file=sys.stderr) + successes += 1 + continue + + downloaded_pipe_dicts = self._download_streaming_url_as_pipe_objects( + url_str, + config, + mode_hint=download_mode_hint, + ytdl_format_hint=forced_ytdl_format, + ) + if not downloaded_pipe_dicts: + failures += 1 + continue + + # Merge original tags/notes/relationships into each downloaded item and ingest. + for dl_item in downloaded_pipe_dicts: + try: + if isinstance(dl_item, dict): + # Merge tags + base_tags = list(getattr(pipe_obj, "tag", None) or []) + if base_tags: + dl_tags = list(dl_item.get("tag") or []) + dl_item["tag"] = merge_sequences(dl_tags, base_tags, case_sensitive=False) + + # Carry notes/relationships forward when present on the original. + base_notes = getattr(pipe_obj, "notes", None) + if base_notes and ("notes" not in dl_item): + dl_item["notes"] = base_notes + base_rels = getattr(pipe_obj, "relationships", None) + if base_rels and ("relationships" not in dl_item): + dl_item["relationships"] = base_rels + except Exception: + pass + + dl_pipe_obj = coerce_to_pipe_object(dl_item, None) + try: + dl_media_path = Path(str(getattr(dl_pipe_obj, "path", "") or "")) + except Exception: + dl_media_path = None + + if dl_media_path is None or not self._validate_source(dl_media_path): + failures += 1 + continue + + if provider_name: + if str(provider_name).strip().lower() == "matrix": + room_id = None + if provider_room: + room_id = str(provider_room).strip() + if not room_id: + try: + matrix_conf = config.get("provider", {}).get("matrix", {}) if isinstance(config, dict) else {} + room_id = str(matrix_conf.get("room_id") or "").strip() or None + except Exception: + room_id = None + if not room_id: + pending = [ + { + "path": str(dl_media_path), + "pipe_obj": dl_pipe_obj, + "delete_after": bool(delete_after_item), + } + ] + return self._matrix_prompt_room_selection(pending, config, list(args)) + + code = self._handle_matrix_upload( + dl_media_path, + dl_pipe_obj, + config, + delete_after_item, + room_id=room_id, + ) + else: + code = self._handle_provider_upload( + dl_media_path, + provider_name, + dl_pipe_obj, + config, + delete_after_item, + ) + if code == 0: + successes += 1 + else: + failures += 1 + continue + + if location: + try: + store = Store(config) + backends = store.list_backends() + if location in backends: + code = self._handle_storage_backend( + dl_item, + dl_media_path, + location, + dl_pipe_obj, + config, + delete_after_item, + collect_payloads=collected_payloads, + collect_relationship_pairs=pending_relationship_pairs, + defer_url_association=defer_url_association, + pending_url_associations=pending_url_associations, + suppress_last_stage_overlay=want_final_search_store, + auto_search_store=auto_search_store_after_add, + ) + else: + code = self._handle_local_export( + dl_media_path, + location, + dl_pipe_obj, + config, + delete_after_item, + ) + except Exception as exc: + debug(f"[add-file] ERROR: Failed to resolve location: {exc}") + log(f"Invalid location: {location}", file=sys.stderr) + failures += 1 + continue + + if code == 0: + successes += 1 + else: + failures += 1 + continue + + # Finished processing all downloaded items for this URL. + continue + + # No destination specified: keep legacy behavior (download-media only). + code = self._delegate_to_download_data(item, url_str, location, provider_name, args, config) if code == 0: successes += 1 else: @@ -303,6 +549,8 @@ class Add_File(Cmdlet): delete_after_item, collect_payloads=collected_payloads, collect_relationship_pairs=pending_relationship_pairs, + defer_url_association=defer_url_association, + pending_url_associations=pending_url_associations, suppress_last_stage_overlay=want_final_search_store, auto_search_store=auto_search_store_after_add, ) @@ -329,6 +577,13 @@ class Add_File(Cmdlet): except Exception: pass + # Apply deferred url associations (bulk) before showing the final store table. + if pending_url_associations: + try: + Add_File._apply_pending_url_associations(pending_url_associations, config) + except Exception: + pass + # Always end add-file -store (when last stage) by showing the canonical store table. # This keeps output consistent and ensures @N selection works for multi-item ingests. if want_final_search_store and collected_payloads: @@ -383,7 +638,7 @@ class Add_File(Cmdlet): query = "hash:" + ",".join(hashes) args = ["-store", str(store), query] - log(f"[add-file] Refresh: search-store -store {store} \"{query}\"", file=sys.stderr) + debug(f"[add-file] Refresh: search-store -store {store} \"{query}\"") # Run search-store under a temporary stage context so its ctx.emit() calls # don't interfere with the outer add-file pipeline stage. @@ -1440,6 +1695,292 @@ class Add_File(Cmdlet): return 0 + @staticmethod + def _preflight_url_duplicates_bulk(urls: Sequence[str], config: Dict[str, Any]) -> set[str]: + """Return a set of URLs that appear to already exist in any searchable backend. + + This is a best-effort check used to avoid re-downloading already-stored media when + a batch of URL items is piped into add-file. + """ + skip: set[str] = set() + try: + storage = Store(config) + backend_names = list(storage.list_searchable_backends() or []) + except Exception: + return skip + + for raw in urls: + u = str(raw or "").strip() + if not u: + continue + + for backend_name in backend_names: + try: + if str(backend_name).strip().lower() == "temp": + continue + except Exception: + pass + try: + backend = storage[backend_name] + except Exception: + continue + + try: + hits = backend.search(f"url:{u}", limit=1) or [] + except Exception: + hits = [] + if hits: + skip.add(u) + break + + return skip + + @staticmethod + def _download_streaming_url_as_pipe_objects( + url: str, + config: Dict[str, Any], + *, + mode_hint: Optional[str] = None, + ytdl_format_hint: Optional[str] = None, + ) -> List[Dict[str, Any]]: + """Download a yt-dlp-supported URL and return PipeObject-style dict(s). + + This does not rely on pipeline stage context and is used so add-file can ingest + URL selections directly (download -> add to store/provider) in one invocation. + """ + url_str = str(url or "").strip() + if not url_str: + return [] + + try: + from cmdlet.download_media import ( + CMDLET as dl_cmdlet, + _download_with_timeout, + is_url_supported_by_ytdlp, + list_formats, + _format_chapters_note, + _best_subtitle_sidecar, + _read_text_file, + ) + from models import DownloadOptions + from tool.ytdlp import YtDlpTool + except Exception: + return [] + + if not is_url_supported_by_ytdlp(url_str): + return [] + + try: + from config import resolve_output_dir + + out_dir = resolve_output_dir(config) + if out_dir is None: + return [] + except Exception: + return [] + + cookies_path = None + try: + cookie_candidate = YtDlpTool(config).resolve_cookiefile() + if cookie_candidate is not None and cookie_candidate.is_file(): + cookies_path = cookie_candidate + except Exception: + cookies_path = None + + quiet_download = False + try: + quiet_download = bool((config or {}).get("_quiet_background_output")) + except Exception: + quiet_download = False + + # Decide download mode. + # Default to video unless we have a hint or the URL appears to be audio-only. + mode = str(mode_hint or "").strip().lower() if mode_hint else "" + if mode not in {"audio", "video"}: + mode = "video" + # Best-effort: infer from formats for this URL (one-time, no playlist probing). + try: + cf = str(cookies_path) if cookies_path is not None and cookies_path.is_file() else None + fmts_probe = list_formats(url_str, no_playlist=False, playlist_items=None, cookiefile=cf) + if isinstance(fmts_probe, list) and fmts_probe: + has_video = False + for f in fmts_probe: + if not isinstance(f, dict): + continue + vcodec = str(f.get("vcodec", "none") or "none").strip().lower() + if vcodec and vcodec != "none": + has_video = True + break + mode = "video" if has_video else "audio" + except Exception: + mode = "video" + + # Pick a safe initial format selector. + # Important: yt-dlp defaults like "251/140" are YouTube-specific and break Bandcamp. + fmt_hint = str(ytdl_format_hint).strip() if ytdl_format_hint else "" + if fmt_hint: + chosen_format: Optional[str] = fmt_hint + else: + chosen_format = None + if mode == "audio": + # Generic audio selector that works across extractors. + chosen_format = "bestaudio/best" + + opts = DownloadOptions( + url=url_str, + mode=mode, + output_dir=Path(out_dir), + cookies_path=cookies_path, + ytdl_format=chosen_format, + quiet=quiet_download, + embed_chapters=True, + write_sub=True, + ) + + # Download with a small amount of resilience for format errors. + try: + result_obj = _download_with_timeout(opts, timeout_seconds=300) + except Exception as exc: + msg = str(exc) + # If a format is invalid/unsupported, try: + # - if only one format exists, retry with that id + # - else for audio-only sources, retry with bestaudio/best + try: + format_error = "Requested format is not available" in msg + except Exception: + format_error = False + + if format_error: + try: + cf = str(cookies_path) if cookies_path is not None and cookies_path.is_file() else None + fmts = list_formats(url_str, no_playlist=False, playlist_items=None, cookiefile=cf) + if isinstance(fmts, list) and len(fmts) == 1 and isinstance(fmts[0], dict): + fid = str(fmts[0].get("format_id") or "").strip() + if fid: + opts = DownloadOptions( + url=url_str, + mode=mode, + output_dir=Path(out_dir), + cookies_path=cookies_path, + ytdl_format=fid, + quiet=quiet_download, + embed_chapters=True, + write_sub=True, + ) + result_obj = _download_with_timeout(opts, timeout_seconds=300) + # proceed + else: + raise + elif mode == "audio" and (not chosen_format or chosen_format != "bestaudio/best"): + opts = DownloadOptions( + url=url_str, + mode=mode, + output_dir=Path(out_dir), + cookies_path=cookies_path, + ytdl_format="bestaudio/best", + quiet=quiet_download, + embed_chapters=True, + write_sub=True, + ) + result_obj = _download_with_timeout(opts, timeout_seconds=300) + else: + raise + except Exception as exc2: + log(f"[add-file] Download failed for {url_str}: {exc2}", file=sys.stderr) + return [] + else: + log(f"[add-file] Download failed for {url_str}: {exc}", file=sys.stderr) + return [] + + results: List[Any] + if isinstance(result_obj, list): + results = list(result_obj) + else: + paths = getattr(result_obj, "paths", None) + if isinstance(paths, list) and paths: + # Section downloads: create one result per file. + from models import DownloadMediaResult + + results = [] + for p in paths: + try: + p_path = Path(p) + except Exception: + continue + if not p_path.exists() or p_path.is_dir(): + continue + try: + hv = sha256_file(p_path) + except Exception: + hv = None + try: + results.append( + DownloadMediaResult( + path=p_path, + info=getattr(result_obj, "info", {}) or {}, + tag=list(getattr(result_obj, "tag", []) or []), + source_url=getattr(result_obj, "source_url", None) or url_str, + hash_value=hv, + ) + ) + except Exception: + continue + else: + results = [result_obj] + + out: List[Dict[str, Any]] = [] + for downloaded in results: + try: + po = dl_cmdlet._build_pipe_object(downloaded, url_str, opts) + + # Attach chapter timestamps note (best-effort). + try: + info = downloaded.info if isinstance(getattr(downloaded, "info", None), dict) else {} + except Exception: + info = {} + try: + chapters_text = _format_chapters_note(info) + except Exception: + chapters_text = None + if chapters_text: + notes = po.get("notes") + if not isinstance(notes, dict): + notes = {} + notes.setdefault("chapters", chapters_text) + po["notes"] = notes + + # Capture subtitle sidecar into notes and remove it so add-file won't ingest it later. + try: + media_path = Path(str(po.get("path") or "")) + except Exception: + media_path = None + if media_path is not None and media_path.exists() and media_path.is_file(): + try: + sub_path = _best_subtitle_sidecar(media_path) + except Exception: + sub_path = None + if sub_path is not None: + sub_text = _read_text_file(sub_path) + if sub_text: + notes = po.get("notes") + if not isinstance(notes, dict): + notes = {} + notes["sub"] = sub_text + po["notes"] = notes + try: + sub_path.unlink() + except Exception: + pass + + # Mark as temp artifact from download-media so add-file can auto-delete after ingest. + po["action"] = "cmdlet:download-media" + po["is_temp"] = True + out.append(po) + except Exception: + continue + + return out + @staticmethod def _download_soulseek_file( result: Any, @@ -1640,7 +2181,9 @@ class Add_File(Cmdlet): ctx.set_current_stage_table(table) print() - print(table.format_plain()) + from rich_display import stdout_console + + stdout_console().print(table) print("\nSelect room(s) with @N (e.g. @1 or @1-3) to upload the selected item(s)") return 0 @@ -1710,6 +2253,8 @@ class Add_File(Cmdlet): *, collect_payloads: Optional[List[Dict[str, Any]]] = None, collect_relationship_pairs: Optional[Dict[str, set[tuple[str, str]]]] = None, + defer_url_association: bool = False, + pending_url_associations: Optional[Dict[str, List[tuple[str, List[str]]]]] = None, suppress_last_stage_overlay: bool = False, auto_search_store: bool = True, ) -> int: @@ -1822,7 +2367,7 @@ class Add_File(Cmdlet): media_path, title=title, tag=tags, - url=url + url=[] if (defer_url_association and url) else url ) ##log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr) @@ -1859,10 +2404,16 @@ class Add_File(Cmdlet): # If we have url(s), ensure they get associated with the destination file. # This mirrors `add-url` behavior but avoids emitting extra pipeline noise. if url: - try: - backend.add_url(resolved_hash, list(url)) - except Exception: - pass + if defer_url_association and pending_url_associations is not None: + try: + pending_url_associations.setdefault(str(backend_name), []).append((str(resolved_hash), list(url))) + except Exception: + pass + else: + try: + backend.add_url(resolved_hash, list(url)) + except Exception: + pass # If a subtitle note was provided upstream (e.g., download-media writes notes.sub), # persist it automatically like add-note would. @@ -1965,6 +2516,68 @@ class Add_File(Cmdlet): # --- Helpers --- + @staticmethod + def _apply_pending_url_associations(pending: Dict[str, List[tuple[str, List[str]]]], config: Dict[str, Any]) -> None: + """Apply deferred URL associations in bulk, grouped per backend.""" + + try: + store = Store(config) + except Exception: + return + + for backend_name, pairs in (pending or {}).items(): + if not pairs: + continue + try: + backend = store[backend_name] + except Exception: + continue + + # Merge URLs per hash and de-duplicate. + merged: Dict[str, List[str]] = {} + for file_hash, urls in pairs: + h = str(file_hash or "").strip().lower() + if len(h) != 64: + continue + url_list: List[str] = [] + try: + for u in (urls or []): + s = str(u or "").strip() + if s: + url_list.append(s) + except Exception: + url_list = [] + if not url_list: + continue + + bucket = merged.setdefault(h, []) + seen = set(bucket) + for u in url_list: + if u in seen: + continue + seen.add(u) + bucket.append(u) + + items: List[tuple[str, List[str]]] = [(h, u) for h, u in merged.items() if u] + if not items: + continue + + bulk = getattr(backend, "add_url_bulk", None) + if callable(bulk): + try: + bulk(items) + continue + except Exception: + pass + + single = getattr(backend, "add_url", None) + if callable(single): + for h, u in items: + try: + single(h, u) + except Exception: + continue + @staticmethod def _load_sidecar_bundle( media_path: Path, diff --git a/cmdlet/add_note.py b/cmdlet/add_note.py index ddbf0a1..07dcb9d 100644 --- a/cmdlet/add_note.py +++ b/cmdlet/add_note.py @@ -1,7 +1,7 @@ from __future__ import annotations from pathlib import Path -from typing import Any, Dict, Optional, Sequence +from typing import Any, Dict, List, Optional, Sequence, Tuple import sys from SYS.logger import log @@ -103,6 +103,9 @@ class Add_Note(Cmdlet): store_registry = Store(config) updated = 0 + # Batch write plan: store -> [(hash, name, text), ...] + note_ops: Dict[str, List[Tuple[str, str, str]]] = {} + # Optional global fallback for note text from pipeline values. # Allows patterns like: ... | add-note sub pipeline_default_text = None @@ -177,20 +180,43 @@ class Add_Note(Cmdlet): log(f"[add_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr) return 1 - ok = False - try: - ok = bool(backend.set_note(resolved_hash, note_name, item_note_text, config=config)) - except Exception as exc: - log(f"[add_note] Error: Failed to set note: {exc}", file=sys.stderr) - ok = False - - if ok: - updated += 1 + # Queue for bulk write per store. We still emit items immediately; + # the pipeline only advances after this cmdlet returns. + note_ops.setdefault(store_name, []).append((resolved_hash, note_name, item_note_text)) + updated += 1 ctx.emit(res) + # Execute bulk writes per store. + wrote_any = False + for store_name, ops in note_ops.items(): + if not ops: + continue + try: + backend = store_registry[store_name] + except Exception: + continue + + bulk_fn = getattr(backend, "set_note_bulk", None) + if callable(bulk_fn): + try: + ok = bool(bulk_fn(list(ops), config=config)) + wrote_any = wrote_any or ok or True + ctx.print_if_visible(f"✓ add-note: {len(ops)} item(s) in '{store_name}'", file=sys.stderr) + continue + except Exception as exc: + log(f"[add_note] Warning: bulk set_note failed for '{store_name}': {exc}; falling back", file=sys.stderr) + + # Fallback: per-item writes + for file_hash, name, text in ops: + try: + ok = bool(backend.set_note(file_hash, name, text, config=config)) + wrote_any = wrote_any or ok + except Exception: + continue + log(f"[add_note] Updated {updated} item(s)", file=sys.stderr) - return 0 if updated > 0 else 1 + return 0 if (updated > 0 and wrote_any) else (0 if updated > 0 else 1) CMDLET = Add_Note() diff --git a/cmdlet/add_tag.py b/cmdlet/add_tag.py index d08c08f..b08abc0 100644 --- a/cmdlet/add_tag.py +++ b/cmdlet/add_tag.py @@ -520,45 +520,13 @@ class Add_Tag(Cmdlet): if new_tag.lower() not in existing_lower: item_tag_to_add.append(new_tag) - # Namespace replacement: delete old namespace:* when adding namespace:value - removed_namespace_tag: list[str] = [] - for new_tag in item_tag_to_add: - if not isinstance(new_tag, str) or ":" not in new_tag: - continue - ns = new_tag.split(":", 1)[0].strip() - if not ns: - continue - ns_prefix = ns.lower() + ":" - for t in existing_tag_list: - if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower(): - removed_namespace_tag.append(t) - - removed_namespace_tag = sorted({t for t in removed_namespace_tag}) - - actual_tag_to_add = [t for t in item_tag_to_add if isinstance(t, str) and t.lower() not in existing_lower] - changed = False - if removed_namespace_tag: - try: - ok_del = backend.delete_tag(resolved_hash, removed_namespace_tag, config=config) - if ok_del: - changed = True - except Exception as exc: - log(f"[add_tag] Warning: Failed deleting namespace tag: {exc}", file=sys.stderr) - - if actual_tag_to_add: - try: - ok_add = backend.add_tag(resolved_hash, actual_tag_to_add, config=config) - if ok_add: - changed = True - else: - log("[add_tag] Warning: Store rejected tag update", file=sys.stderr) - except Exception as exc: - log(f"[add_tag] Warning: Failed adding tag: {exc}", file=sys.stderr) - - if changed: - total_added += len(actual_tag_to_add) - total_modified += 1 + try: + ok_add = backend.add_tag(resolved_hash, item_tag_to_add, config=config) + if not ok_add: + log("[add_tag] Warning: Store rejected tag update", file=sys.stderr) + except Exception as exc: + log(f"[add_tag] Warning: Failed adding tag: {exc}", file=sys.stderr) try: refreshed_tag, _src2 = backend.get_tag(resolved_hash, config=config) @@ -566,6 +534,14 @@ class Add_Tag(Cmdlet): except Exception: refreshed_list = existing_tag_list + # Decide whether anything actually changed (case-sensitive so title casing updates count). + if set(refreshed_list) != set(existing_tag_list): + changed = True + before_lower = {t.lower() for t in existing_tag_list} + after_lower = {t.lower() for t in refreshed_list} + total_added += len(after_lower - before_lower) + total_modified += 1 + # Update the result's tag using canonical field if isinstance(res, models.PipeObject): res.tag = refreshed_list @@ -575,7 +551,7 @@ class Add_Tag(Cmdlet): final_title = _extract_title_tag(refreshed_list) _apply_title_to_result(res, final_title) - if final_title and (not original_title or final_title.lower() != original_title.lower()): + if final_title and (not original_title or final_title != original_title): _refresh_result_table_title(final_title, resolved_hash, str(store_name), raw_path) if changed: diff --git a/cmdlet/add_url.py b/cmdlet/add_url.py index b1bfc04..eb04fe7 100644 --- a/cmdlet/add_url.py +++ b/cmdlet/add_url.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Dict, Sequence +from typing import Any, Dict, List, Optional, Sequence, Tuple import sys import pipeline as ctx @@ -39,28 +39,37 @@ class Add_Url(sh.Cmdlet): log("Error: -query must be of the form hash:") return 1 + # Bulk input is common in pipelines; treat a list of PipeObjects as a batch. + results: List[Any] = result if isinstance(result, list) else ([result] if result is not None else []) + + if query_hash and len(results) > 1: + log("Error: -query hash: cannot be used with multiple piped items") + return 1 + # Extract hash and store from result or args - file_hash = query_hash or sh.get_field(result, "hash") - store_name = parsed.get("store") or sh.get_field(result, "store") + file_hash = query_hash or (sh.get_field(result, "hash") if result is not None else None) + store_name = parsed.get("store") or (sh.get_field(result, "store") if result is not None else None) url_arg = parsed.get("url") - if not file_hash: - log("Error: No file hash provided (pipe an item or use -query \"hash:\")") - return 1 - - if not store_name: - log("Error: No store name provided") - return 1 + # If we have multiple piped items, we will resolve hash/store per item below. + if not results: + if not file_hash: + log("Error: No file hash provided (pipe an item or use -query \"hash:\")") + return 1 + if not store_name: + log("Error: No store name provided") + return 1 if not url_arg: log("Error: No URL provided") return 1 - # Normalize hash - file_hash = sh.normalize_hash(file_hash) - if not file_hash: - log("Error: Invalid hash format") - return 1 + # Normalize hash (single-item mode) + if not results and file_hash: + file_hash = sh.normalize_hash(file_hash) + if not file_hash: + log("Error: Invalid hash format") + return 1 # Parse url (comma-separated) urls = [u.strip() for u in str(url_arg).split(',') if u.strip()] @@ -71,12 +80,118 @@ class Add_Url(sh.Cmdlet): # Get backend and add url try: storage = Store(config) - backend = storage[store_name] - backend.add_url(file_hash, urls) - for u in urls: - ctx.emit(f"Added URL: {u}") - + def _merge_urls(existing: Any, incoming: List[str]) -> List[str]: + out: List[str] = [] + try: + if isinstance(existing, str): + out.extend([p.strip() for p in existing.split(",") if p.strip()]) + elif isinstance(existing, (list, tuple)): + out.extend([str(u).strip() for u in existing if str(u).strip()]) + except Exception: + out = [] + for u in incoming: + if u and u not in out: + out.append(u) + return out + + def _set_item_url(item: Any, merged: List[str]) -> None: + try: + if isinstance(item, dict): + if len(merged) == 1: + item["url"] = merged[0] + else: + item["url"] = list(merged) + return + # PipeObject-like + if hasattr(item, "url"): + if len(merged) == 1: + setattr(item, "url", merged[0]) + else: + setattr(item, "url", list(merged)) + except Exception: + return + + # Build batches per store. + store_override = parsed.get("store") + batch: Dict[str, List[Tuple[str, List[str]]]] = {} + pass_through: List[Any] = [] + + if results: + for item in results: + pass_through.append(item) + + raw_hash = query_hash or sh.get_field(item, "hash") + raw_store = store_override or sh.get_field(item, "store") + if not raw_hash or not raw_store: + ctx.print_if_visible("[add-url] Warning: Item missing hash/store; skipping", file=sys.stderr) + continue + + normalized = sh.normalize_hash(raw_hash) + if not normalized: + ctx.print_if_visible("[add-url] Warning: Item has invalid hash; skipping", file=sys.stderr) + continue + + store_text = str(raw_store).strip() + if not store_text: + ctx.print_if_visible("[add-url] Warning: Item has empty store; skipping", file=sys.stderr) + continue + + # Validate backend exists (skip PATH/unknown). + if not storage.is_available(store_text): + ctx.print_if_visible( + f"[add-url] Warning: Store '{store_text}' not configured; skipping", file=sys.stderr + ) + continue + + batch.setdefault(store_text, []).append((normalized, list(urls))) + + # Execute per-store batches. + for store_text, pairs in batch.items(): + try: + backend = storage[store_text] + except Exception: + continue + + # Coalesce duplicates per hash before passing to backend. + merged: Dict[str, List[str]] = {} + for h, ulist in pairs: + merged.setdefault(h, []) + for u in (ulist or []): + if u and u not in merged[h]: + merged[h].append(u) + + bulk_pairs = [(h, merged[h]) for h in merged.keys()] + + bulk_fn = getattr(backend, "add_url_bulk", None) + if callable(bulk_fn): + bulk_fn(bulk_pairs, config=config) + else: + for h, ulist in bulk_pairs: + backend.add_url(h, ulist, config=config) + + ctx.print_if_visible( + f"✓ add-url: {len(urls)} url(s) for {len(bulk_pairs)} item(s) in '{store_text}'", + file=sys.stderr, + ) + + # Pass items through unchanged (but update url field for convenience). + for item in pass_through: + existing = sh.get_field(item, "url") + merged = _merge_urls(existing, list(urls)) + _set_item_url(item, merged) + ctx.emit(item) + return 0 + + # Single-item mode + backend = storage[str(store_name)] + backend.add_url(str(file_hash), urls, config=config) + ctx.print_if_visible(f"✓ add-url: {len(urls)} url(s) added", file=sys.stderr) + if result is not None: + existing = sh.get_field(result, "url") + merged = _merge_urls(existing, list(urls)) + _set_item_url(result, merged) + ctx.emit(result) return 0 except KeyError: diff --git a/cmdlet/delete_file.py b/cmdlet/delete_file.py index 7679962..5b91c78 100644 --- a/cmdlet/delete_file.py +++ b/cmdlet/delete_file.py @@ -1,16 +1,19 @@ """Delete-file cmdlet: Delete files from local storage and/or Hydrus.""" from __future__ import annotations -from typing import Any, Dict, Sequence +from typing import Any, Dict, List, Sequence import sys from pathlib import Path from SYS.logger import debug, log +from SYS.utils import format_bytes from Store.Folder import Folder from Store import Store from . import _shared as sh from API import HydrusNetwork as hydrus_wrapper import pipeline as ctx +from result_table import ResultTable, _format_size +from rich_display import stdout_console class Delete_File(sh.Cmdlet): @@ -38,9 +41,20 @@ class Delete_File(sh.Cmdlet): ) self.register() - def _process_single_item(self, item: Any, override_hash: str | None, conserve: str | None, - lib_root: str | None, reason: str, config: Dict[str, Any]) -> bool: - """Process deletion for a single item.""" + def _process_single_item( + self, + item: Any, + override_hash: str | None, + conserve: str | None, + lib_root: str | None, + reason: str, + config: Dict[str, Any], + ) -> List[Dict[str, Any]]: + """Process deletion for a single item. + + Returns display rows (for the final Rich table). Returning an empty list + indicates no delete occurred. + """ # Handle item as either dict or object if isinstance(item, dict): hash_hex_raw = item.get("hash_hex") or item.get("hash") @@ -50,6 +64,44 @@ class Delete_File(sh.Cmdlet): hash_hex_raw = sh.get_field(item, "hash_hex") or sh.get_field(item, "hash") target = sh.get_field(item, "target") or sh.get_field(item, "file_path") or sh.get_field(item, "path") title_val = sh.get_field(item, "title") or sh.get_field(item, "name") + + def _get_ext_from_item() -> str: + try: + if isinstance(item, dict): + ext_val = item.get("ext") + if ext_val: + return str(ext_val) + extra = item.get("extra") + if isinstance(extra, dict) and extra.get("ext"): + return str(extra.get("ext")) + else: + ext_val = sh.get_field(item, "ext") + if ext_val: + return str(ext_val) + extra = sh.get_field(item, "extra") + if isinstance(extra, dict) and extra.get("ext"): + return str(extra.get("ext")) + except Exception: + pass + + # Fallback: infer from target path or title if it looks like a filename + try: + if isinstance(target, str) and target: + suffix = Path(target).suffix + if suffix: + return suffix.lstrip(".") + except Exception: + pass + + try: + if title_val: + suffix = Path(str(title_val)).suffix + if suffix: + return suffix.lstrip(".") + except Exception: + pass + + return "" store = None if isinstance(item, dict): @@ -70,9 +122,16 @@ class Delete_File(sh.Cmdlet): local_deleted = False local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://")) + deleted_rows: List[Dict[str, Any]] = [] if conserve != "local" and local_target: path = Path(str(target)) + size_bytes: int | None = None + try: + if path.exists() and path.is_file(): + size_bytes = int(path.stat().st_size) + except Exception: + size_bytes = None # If lib_root is provided and this is from a folder store, use the Folder class if lib_root: @@ -80,8 +139,15 @@ class Delete_File(sh.Cmdlet): folder = Folder(Path(lib_root), name=store or "local") if folder.delete_file(str(path)): local_deleted = True - ctx.emit(f"Removed file: {path.name}") - log(f"Deleted: {path.name}", file=sys.stderr) + deleted_rows.append( + { + "title": str(title_val).strip() if title_val else path.name, + "store": store_label, + "hash": hash_hex or sh.normalize_hash(path.stem) or "", + "size_bytes": size_bytes, + "ext": _get_ext_from_item() or path.suffix.lstrip("."), + } + ) except Exception as exc: debug(f"Folder.delete_file failed: {exc}", file=sys.stderr) # Fallback to manual deletion @@ -89,8 +155,15 @@ class Delete_File(sh.Cmdlet): if path.exists() and path.is_file(): path.unlink() local_deleted = True - ctx.emit(f"Removed local file: {path}") - log(f"Deleted: {path.name}", file=sys.stderr) + deleted_rows.append( + { + "title": str(title_val).strip() if title_val else path.name, + "store": store_label, + "hash": hash_hex or sh.normalize_hash(path.stem) or "", + "size_bytes": size_bytes, + "ext": _get_ext_from_item() or path.suffix.lstrip("."), + } + ) except Exception as exc: log(f"Local delete failed: {exc}", file=sys.stderr) else: @@ -99,8 +172,15 @@ class Delete_File(sh.Cmdlet): if path.exists() and path.is_file(): path.unlink() local_deleted = True - ctx.emit(f"Removed local file: {path}") - log(f"Deleted: {path.name}", file=sys.stderr) + deleted_rows.append( + { + "title": str(title_val).strip() if title_val else path.name, + "store": store_label, + "hash": hash_hex or sh.normalize_hash(path.stem) or "", + "size_bytes": size_bytes, + "ext": _get_ext_from_item() or path.suffix.lstrip("."), + } + ) except Exception as exc: log(f"Local delete failed: {exc}", file=sys.stderr) @@ -168,26 +248,32 @@ class Delete_File(sh.Cmdlet): except Exception: # If it's not in Hydrus (e.g. 404 or similar), that's fine if not local_deleted: - return False + return [] if hydrus_deleted and hash_hex: - title_str = str(title_val).strip() if title_val else "" - if reason: - if title_str: - ctx.emit(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex} (reason: {reason}).") + size_hint = None + try: + if isinstance(item, dict): + size_hint = item.get("size_bytes") or item.get("size") else: - ctx.emit(f"{hydrus_prefix} Deleted hash:{hash_hex} (reason: {reason}).") - else: - if title_str: - ctx.emit(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}.") - else: - ctx.emit(f"{hydrus_prefix} Deleted hash:{hash_hex}.") + size_hint = sh.get_field(item, "size_bytes") or sh.get_field(item, "size") + except Exception: + size_hint = None + deleted_rows.append( + { + "title": str(title_val).strip() if title_val else "", + "store": store_label, + "hash": hash_hex, + "size_bytes": size_hint, + "ext": _get_ext_from_item(), + } + ) if hydrus_deleted or local_deleted: - return True + return deleted_rows log("Selected result has neither Hydrus hash nor local file target") - return False + return [] def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Execute delete-file command.""" @@ -257,15 +343,34 @@ class Delete_File(sh.Cmdlet): return 1 success_count = 0 + deleted_rows: List[Dict[str, Any]] = [] for item in items: - if self._process_single_item(item, override_hash, conserve, lib_root, reason, config): + rows = self._process_single_item(item, override_hash, conserve, lib_root, reason, config) + if rows: success_count += 1 + deleted_rows.extend(rows) - if success_count > 0: - # Clear cached tables/items so deleted entries are not redisplayed + if deleted_rows: + table = ResultTable("Deleted") + table.set_no_choice(True).set_preserve_order(True) + for row in deleted_rows: + result_row = table.add_row() + result_row.add_column("Title", row.get("title", "")) + result_row.add_column("Store", row.get("store", "")) + result_row.add_column("Hash", row.get("hash", "")) + result_row.add_column("Size", _format_size(row.get("size_bytes"), integer_only=False)) + result_row.add_column("Ext", row.get("ext", "")) + + # Display-only: print directly and do not affect selection/history. + try: + stdout_console().print() + stdout_console().print(table) + setattr(table, "_rendered_by_cmdlet", True) + except Exception: + pass + + # Ensure no stale overlay/selection carries forward. try: - ctx.set_last_result_table_overlay(None, None, None) - ctx.set_last_result_table(None, []) ctx.set_last_result_items_only([]) ctx.set_current_stage_table(None) except Exception: diff --git a/cmdlet/delete_url.py b/cmdlet/delete_url.py index ac652be..c9dd999 100644 --- a/cmdlet/delete_url.py +++ b/cmdlet/delete_url.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import Any, Dict, Sequence +from typing import Any, Dict, List, Optional, Sequence, Tuple import sys import pipeline as ctx @@ -48,28 +48,37 @@ class Delete_Url(Cmdlet): log("Error: -query must be of the form hash:") return 1 + # Bulk input is common in pipelines; treat a list of PipeObjects as a batch. + results: List[Any] = result if isinstance(result, list) else ([result] if result is not None else []) + + if query_hash and len(results) > 1: + log("Error: -query hash: cannot be used with multiple piped items") + return 1 + # Extract hash and store from result or args - file_hash = query_hash or get_field(result, "hash") - store_name = parsed.get("store") or get_field(result, "store") + file_hash = query_hash or (get_field(result, "hash") if result is not None else None) + store_name = parsed.get("store") or (get_field(result, "store") if result is not None else None) url_arg = parsed.get("url") - if not file_hash: - log("Error: No file hash provided (pipe an item or use -query \"hash:\")") - return 1 - - if not store_name: - log("Error: No store name provided") - return 1 + # If we have multiple piped items, we will resolve hash/store per item below. + if not results: + if not file_hash: + log("Error: No file hash provided (pipe an item or use -query \"hash:\")") + return 1 + if not store_name: + log("Error: No store name provided") + return 1 if not url_arg: log("Error: No URL provided") return 1 - # Normalize hash - file_hash = normalize_hash(file_hash) - if not file_hash: - log("Error: Invalid hash format") - return 1 + # Normalize hash (single-item mode) + if not results and file_hash: + file_hash = normalize_hash(file_hash) + if not file_hash: + log("Error: Invalid hash format") + return 1 # Parse url (comma-separated) urls = [u.strip() for u in str(url_arg).split(',') if u.strip()] @@ -80,12 +89,104 @@ class Delete_Url(Cmdlet): # Get backend and delete url try: storage = Store(config) - backend = storage[store_name] - backend.delete_url(file_hash, urls) - for u in urls: - ctx.emit(f"Deleted URL: {u}") - + def _remove_urls(existing: Any, remove: List[str]) -> Any: + # Preserve prior shape: keep str when 1 url, list when multiple. + current: List[str] = [] + try: + if isinstance(existing, str): + current = [p.strip() for p in existing.split(",") if p.strip()] + elif isinstance(existing, (list, tuple)): + current = [str(u).strip() for u in existing if str(u).strip()] + except Exception: + current = [] + remove_set = {u for u in (remove or []) if u} + new_urls = [u for u in current if u not in remove_set] + if len(new_urls) == 1: + return new_urls[0] + return new_urls + + def _set_item_url(item: Any, merged: Any) -> None: + try: + if isinstance(item, dict): + item["url"] = merged + return + if hasattr(item, "url"): + setattr(item, "url", merged) + except Exception: + return + + store_override = parsed.get("store") + batch: Dict[str, List[Tuple[str, List[str]]]] = {} + pass_through: List[Any] = [] + + if results: + for item in results: + pass_through.append(item) + + raw_hash = query_hash or get_field(item, "hash") + raw_store = store_override or get_field(item, "store") + if not raw_hash or not raw_store: + ctx.print_if_visible("[delete-url] Warning: Item missing hash/store; skipping", file=sys.stderr) + continue + + normalized = normalize_hash(raw_hash) + if not normalized: + ctx.print_if_visible("[delete-url] Warning: Item has invalid hash; skipping", file=sys.stderr) + continue + + store_text = str(raw_store).strip() + if not store_text: + ctx.print_if_visible("[delete-url] Warning: Item has empty store; skipping", file=sys.stderr) + continue + if not storage.is_available(store_text): + ctx.print_if_visible( + f"[delete-url] Warning: Store '{store_text}' not configured; skipping", file=sys.stderr + ) + continue + + batch.setdefault(store_text, []).append((normalized, list(urls))) + + for store_text, pairs in batch.items(): + try: + backend = storage[store_text] + except Exception: + continue + + merged: Dict[str, List[str]] = {} + for h, ulist in pairs: + merged.setdefault(h, []) + for u in (ulist or []): + if u and u not in merged[h]: + merged[h].append(u) + bulk_pairs = [(h, merged[h]) for h in merged.keys()] + + bulk_fn = getattr(backend, "delete_url_bulk", None) + if callable(bulk_fn): + bulk_fn(bulk_pairs, config=config) + else: + for h, ulist in bulk_pairs: + backend.delete_url(h, ulist, config=config) + + ctx.print_if_visible( + f"✓ delete-url: {len(urls)} url(s) for {len(bulk_pairs)} item(s) in '{store_text}'", + file=sys.stderr, + ) + + for item in pass_through: + existing = get_field(item, "url") + _set_item_url(item, _remove_urls(existing, list(urls))) + ctx.emit(item) + return 0 + + # Single-item mode + backend = storage[str(store_name)] + backend.delete_url(str(file_hash), urls, config=config) + ctx.print_if_visible(f"✓ delete-url: {len(urls)} url(s) removed", file=sys.stderr) + if result is not None: + existing = get_field(result, "url") + _set_item_url(result, _remove_urls(existing, list(urls))) + ctx.emit(result) return 0 except KeyError: diff --git a/cmdlet/download_media.py b/cmdlet/download_media.py index 99388a9..c04180c 100644 --- a/cmdlet/download_media.py +++ b/cmdlet/download_media.py @@ -11,11 +11,6 @@ Focused cmdlet for video/audio downloads from yt-dlp-supported sites: from __future__ import annotations -import sys -import os -from pathlib import Path -from typing import Any, Dict, List, Optional, Sequence - import glob # noqa: F401 import hashlib import json # noqa: F401 @@ -27,17 +22,17 @@ import sys import tempfile import time import traceback -from typing import Any, Dict, Iterator, List, Optional -from urllib.parse import urljoin, urlparse - -import httpx +from pathlib import Path +from typing import Any, Dict, Iterator, List, Optional, Sequence +from urllib.parse import urlparse from SYS.logger import log, debug -from SYS.utils import ensure_directory, sha256_file -from API.HTTP import HTTPClient +from SYS.utils import sha256_file from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLogger, ProgressBar import pipeline as pipeline_context from result_table import ResultTable +from rich.prompt import Confirm +from rich_display import stderr_console as get_stderr_console from tool.ytdlp import YtDlpTool @@ -233,7 +228,12 @@ def is_url_supported_by_ytdlp(url: str) -> bool: return False -def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]: +def list_formats( + url: str, + no_playlist: bool = False, + playlist_items: Optional[str] = None, + cookiefile: Optional[str] = None, +) -> Optional[List[Dict[str, Any]]]: _ensure_yt_dlp_ready() try: assert yt_dlp is not None @@ -242,6 +242,8 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s ydl_opts["noplaylist"] = True if playlist_items: ydl_opts["playlist_items"] = playlist_items + if cookiefile: + ydl_opts["cookiefile"] = cookiefile debug(f"Fetching format list for: {url}") with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type] @@ -418,119 +420,6 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect return session_id, first_section_info or {} -def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]: - ensure_directory(opts.output_dir) - outtmpl = str((opts.output_dir / "%(title)s.%(ext)s").resolve()) - base_options: Dict[str, Any] = { - "outtmpl": outtmpl, - "quiet": True, - "no_warnings": True, - "noprogress": True, - "socket_timeout": 30, - "retries": 10, - "fragment_retries": 10, - "http_chunk_size": 10_485_760, - "restrictfilenames": True, - } - - # Prefer the bundled ffmpeg shipped with the repo (used for merges/remux/postproc). - try: - repo_root = Path(__file__).resolve().parents[1] - bundled_ffmpeg_dir = repo_root / "MPV" / "ffmpeg" / "bin" - if bundled_ffmpeg_dir.exists(): - base_options.setdefault("ffmpeg_location", str(bundled_ffmpeg_dir)) - except Exception: - pass - - # On Windows, AV/indexers can transiently lock files at the end of a download. - # yt-dlp uses file_access_retries for renames (e.g. .part -> final). Default is low. - try: - if os.name == "nt": - base_options.setdefault("file_access_retries", 40) - except Exception: - pass - - # Always show a progress bar. The hook prints to stderr so piped stdout stays clean. - base_options["progress_hooks"] = [_progress_callback] - - if opts.cookies_path and opts.cookies_path.is_file(): - base_options["cookiefile"] = str(opts.cookies_path) - - if opts.no_playlist: - base_options["noplaylist"] = True - - if opts.mode == "audio": - base_options["format"] = opts.ytdl_format or "251/140/bestaudio" - base_options["postprocessors"] = [{"key": "FFmpegExtractAudio"}] - else: - base_options["format"] = opts.ytdl_format or "bestvideo+bestaudio/best" - base_options["format_sort"] = ["res:4320", "res:2880", "res:2160", "res:1440", "res:1080", "res:720", "res"] - - # Optional yt-dlp features - if getattr(opts, "embed_chapters", False): - # Prefer explicit FFmpegMetadata PP so chapter embedding runs even when - # we already specified other postprocessors (e.g. FFmpegExtractAudio). - pps = base_options.get("postprocessors") - if not isinstance(pps, list): - pps = [] - already_has_metadata = any( - isinstance(pp, dict) and str(pp.get("key") or "") == "FFmpegMetadata" for pp in pps - ) - if not already_has_metadata: - pps.append( - { - "key": "FFmpegMetadata", - "add_metadata": True, - "add_chapters": True, - "add_infojson": "if_exists", - } - ) - base_options["postprocessors"] = pps - - # Chapter embedding is most reliable in mkv/mp4 containers. - # When merging separate video+audio streams, prefer mkv so mpv sees chapters. - if opts.mode != "audio": - base_options.setdefault("merge_output_format", "mkv") - - if getattr(opts, "write_sub", False): - base_options["writesubtitles"] = True - base_options["writeautomaticsub"] = True - base_options["subtitlesformat"] = "vtt" - - if opts.clip_sections: - sections: List[str] = [] - - def _secs_to_hms(seconds: float) -> str: - total = max(0, int(seconds)) - minutes, secs = divmod(total, 60) - hours, minutes = divmod(minutes, 60) - return f"{hours:02d}:{minutes:02d}:{secs:02d}" - - for section_range in str(opts.clip_sections).split(","): - section_range = section_range.strip() - if not section_range: - continue - try: - start_s_raw, end_s_raw = section_range.split("-", 1) - start_s = float(start_s_raw.strip()) - end_s = float(end_s_raw.strip()) - if start_s >= end_s: - continue - sections.append(f"*{_secs_to_hms(start_s)}-{_secs_to_hms(end_s)}") - except (ValueError, AttributeError): - continue - - if sections: - base_options["download_sections"] = sections - debug(f"Download sections configured: {', '.join(sections)}") - - if opts.playlist_items: - base_options["playlist_items"] = opts.playlist_items - - if not opts.quiet: - debug(f"yt-dlp: mode={opts.mode}, format={base_options.get('format')}") - return base_options - def _iter_download_entries(info: Dict[str, Any]) -> Iterator[Dict[str, Any]]: queue: List[Dict[str, Any]] = [info] @@ -617,61 +506,6 @@ def _extract_sha256(info: Dict[str, Any]) -> Optional[str]: return None -def _get_libgen_download_url(libgen_url: str) -> Optional[str]: - try: - from urllib.parse import urlparse - import requests - parsed = urlparse(libgen_url) - if 'libgen' not in parsed.netloc.lower(): - return None - if '/file.php' not in parsed.path.lower(): - return None - session = requests.Session() - session.headers.update({'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}) - debug(f"Following LibGen redirect chain for: {libgen_url}") - try: - response = session.get(libgen_url, timeout=10, allow_redirects=True) - final_url = response.url - try: - try: - from lxml import html as lxml_html - except ImportError: - lxml_html = None - - if lxml_html is not None: - doc = lxml_html.fromstring(response.content) - for a in doc.xpath("//a[@href]"): - href = str(a.get("href") or "").strip() - if href and "get.php" in href.lower(): - return urljoin(final_url, href) - else: - for m in re.finditer( - r"href=[\"\']([^\"\']+)[\"\']", - response.text or "", - flags=re.IGNORECASE, - ): - href = str(m.group(1) or "").strip() - if href and "get.php" in href.lower(): - return urljoin(final_url, href) - except Exception: - pass - if final_url != libgen_url: - debug(f"LibGen resolved to mirror: {final_url}") - return final_url - except requests.RequestException as e: - log(f"Error following LibGen redirects: {e}", file=sys.stderr) - try: - response = session.head(libgen_url, allow_redirects=True, timeout=10) - if response.url != libgen_url: - return response.url - except: - pass - return None - except Exception as e: - log(f"Error resolving LibGen URL: {e}", file=sys.stderr) - return None - - def _progress_callback(status: Dict[str, Any]) -> None: """Simple progress callback using logger.""" event = status.get("status") @@ -683,208 +517,18 @@ def _progress_callback(status: Dict[str, Any]) -> None: speed = status.get("_speed_str") eta = status.get("_eta_str") - try: - line = _YTDLP_PROGRESS_BAR.format_progress( - percent_str=str(percent) if percent is not None else None, - downloaded=int(downloaded) if downloaded is not None else None, - total=int(total) if total is not None else None, - speed_str=str(speed) if speed is not None else None, - eta_str=str(eta) if eta is not None else None, - ) - except Exception: - pct = str(percent) if percent is not None else "?" - spd = str(speed) if speed is not None else "?" - et = str(eta) if eta is not None else "?" - line = f"[download] {pct} at {spd} ETA {et}" - - sys.stderr.write("\r" + line + " ") - sys.stderr.flush() + _YTDLP_PROGRESS_BAR.update( + downloaded=int(downloaded) if downloaded is not None else None, + total=int(total) if total is not None else None, + label="download", + file=sys.stderr, + ) elif event == "finished": - # Clear the in-place progress line. - sys.stderr.write("\r" + (" " * 140) + "\r") - sys.stderr.write("\n") - sys.stderr.flush() + _YTDLP_PROGRESS_BAR.finish() elif event in ("postprocessing", "processing"): return -def _download_direct_file( - url: str, - output_dir: Path, - debug_logger: Optional[DebugLogger] = None, - quiet: bool = False, -) -> DownloadMediaResult: - """Download a direct file (PDF, image, document, etc.) without yt-dlp.""" - ensure_directory(output_dir) - - from urllib.parse import unquote, urlparse, parse_qs - import re - - # Extract filename from URL - parsed_url = urlparse(url) - url_path = parsed_url.path - - # Try to get filename from query parameters first (for LibGen and similar services) - # e.g., ?filename=Book+Title.pdf or &download=filename.pdf - filename = None - if parsed_url.query: - query_params = parse_qs(parsed_url.query) - for param_name in ('filename', 'download', 'file', 'name'): - if param_name in query_params and query_params[param_name]: - filename = query_params[param_name][0] - filename = unquote(filename) - break - - # If not found in query params, extract from URL path - if not filename or not filename.strip(): - filename = url_path.split("/")[-1] if url_path else "" - filename = unquote(filename) - - # Remove query strings from filename if any - if "?" in filename: - filename = filename.split("?")[0] - - # Try to get real filename from Content-Disposition header (HEAD request) - try: - with HTTPClient(timeout=10.0) as client: - response = client._request("HEAD", url, follow_redirects=True) - content_disposition = response.headers.get("content-disposition", "") - if content_disposition: - # Extract filename from Content-Disposition header - # Format: attachment; filename="filename.pdf" or filename=filename.pdf - match = re.search(r'filename\*?=(?:"([^"]*)"|([^;\s]*))', content_disposition) - if match: - extracted_name = match.group(1) or match.group(2) - if extracted_name: - filename = unquote(extracted_name) - if not quiet: - debug(f"Filename from Content-Disposition: {filename}") - except Exception as e: - if not quiet: - log(f"Could not get filename from headers: {e}", file=sys.stderr) - - # Fallback if we still don't have a good filename - if not filename or "." not in filename: - filename = "downloaded_file.bin" - - file_path = output_dir / filename - progress_bar = ProgressBar() - - if not quiet: - debug(f"Direct download: {filename}") - - try: - start_time = time.time() - downloaded_bytes = [0] - total_bytes = [0] - last_progress_time = [start_time] - - def progress_callback(bytes_downloaded: int, content_length: int) -> None: - downloaded_bytes[0] = bytes_downloaded - total_bytes[0] = content_length - - now = time.time() - if now - last_progress_time[0] >= 0.5 and total_bytes[0] > 0: - elapsed = now - start_time - percent = (bytes_downloaded / content_length) * 100 if content_length > 0 else 0 - speed = bytes_downloaded / elapsed if elapsed > 0 else 0 - eta_seconds = (content_length - bytes_downloaded) / speed if speed > 0 else 0 - - speed_str = progress_bar.format_bytes(speed) + "/s" - minutes, seconds = divmod(int(eta_seconds), 60) - hours, minutes = divmod(minutes, 60) - eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}" - - progress_line = progress_bar.format_progress( - percent_str=f"{percent:.1f}%", - downloaded=bytes_downloaded, - total=content_length, - speed_str=speed_str, - eta_str=eta_str, - ) - sys.stderr.write("\r" + progress_line + " ") - sys.stderr.flush() - last_progress_time[0] = now - - with HTTPClient(timeout=30.0) as client: - client.download(url, str(file_path), progress_callback=progress_callback) - - # Clear progress line after completion. - sys.stderr.write("\r" + (" " * 140) + "\r") - sys.stderr.write("\n") - sys.stderr.flush() - - # For direct file downloads, create minimal info dict without filename as title - # This prevents creating duplicate title: tags when filename gets auto-generated - # We'll add title back later only if we couldn't extract meaningful tags - info = { - "id": filename.rsplit(".", 1)[0], - "ext": filename.rsplit(".", 1)[1] if "." in filename else "bin", - "webpage_url": url, - } - - hash_value = None - try: - hash_value = sha256_file(file_path) - except Exception: - pass - - tags = [] - if extract_ytdlp_tags: - try: - tags = extract_ytdlp_tags(info) - except Exception as e: - log(f"Error extracting tags: {e}", file=sys.stderr) - - # Only use filename as a title tag if we couldn't extract any meaningful tags - # This prevents duplicate title: tags when the filename could be mistaken for metadata - if not any(t.startswith('title:') for t in tags): - # Re-extract tags with filename as title only if needed - info['title'] = filename - tags = [] - if extract_ytdlp_tags: - try: - tags = extract_ytdlp_tags(info) - except Exception as e: - log(f"Error extracting tags with filename: {e}", file=sys.stderr) - - if debug_logger is not None: - debug_logger.write_record( - "direct-file-downloaded", - {"url": url, "path": str(file_path), "hash": hash_value}, - ) - - return DownloadMediaResult( - path=file_path, - info=info, - tag=tags, - source_url=url, - hash_value=hash_value, - ) - - except (httpx.HTTPError, httpx.RequestError) as exc: - log(f"Download error: {exc}", file=sys.stderr) - if debug_logger is not None: - debug_logger.write_record( - "exception", - {"phase": "direct-file", "url": url, "error": str(exc)}, - ) - raise DownloadError(f"Failed to download {url}: {exc}") from exc - except Exception as exc: - log(f"Error downloading file: {exc}", file=sys.stderr) - if debug_logger is not None: - debug_logger.write_record( - "exception", - { - "phase": "direct-file", - "url": url, - "error": str(exc), - "traceback": traceback.format_exc(), - }, - ) - raise DownloadError(f"Error downloading file: {exc}") from exc - - def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15, *, cookiefile: Optional[str] = None) -> Optional[Dict[str, Any]]: """Probe URL to extract metadata WITHOUT downloading. @@ -959,7 +603,7 @@ def probe_url(url: str, no_playlist: bool = False, timeout_seconds: int = 15, *, thread.join(timeout=timeout_seconds) if thread.is_alive(): - # Probe timed out - return None to fall back to direct download + # Probe timed out - return None so the caller can raise an error debug(f"Probe timeout for {url} (>={timeout_seconds}s), proceeding with download") return None @@ -975,7 +619,7 @@ def download_media( *, debug_logger: Optional[DebugLogger] = None, ) -> Any: - """Download media from URL using yt-dlp or direct HTTP download. + """Download streaming media exclusively via yt-dlp. Args: opts: DownloadOptions with url, mode, output_dir, etc. @@ -985,40 +629,9 @@ def download_media( DownloadMediaResult with path, info, tags, hash Raises: - DownloadError: If download fails + DownloadError: If the URL is unsupported or yt-dlp detects no media """ - # Handle LibGen url specially - # file.php redirects to mirrors, get.php is direct from modern API - if 'libgen' in opts.url.lower(): - if '/get.php' in opts.url.lower(): - # Modern API get.php links are direct downloads from mirrors (not file redirects) - if not opts.quiet: - log(f"Detected LibGen get.php URL, downloading directly...") - if debug_logger is not None: - debug_logger.write_record("libgen-direct", {"url": opts.url}) - return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet) - elif '/file.php' in opts.url.lower(): - # Old-style file.php redirects to mirrors, we need to resolve - if not opts.quiet: - log(f"Detected LibGen file.php URL, resolving to actual mirror...") - actual_url = _get_libgen_download_url(opts.url) - if actual_url and actual_url != opts.url: - if not opts.quiet: - log(f"Resolved LibGen URL to mirror: {actual_url}") - opts.url = actual_url - # After resolution, this will typically be an onion link or direct file - # Skip yt-dlp for this (it won't support onion/mirrors), go direct - if debug_logger is not None: - debug_logger.write_record("libgen-resolved", {"original": opts.url, "resolved": actual_url}) - return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet) - else: - if not opts.quiet: - log(f"Could not resolve LibGen URL, trying direct download anyway", file=sys.stderr) - if debug_logger is not None: - debug_logger.write_record("libgen-resolve-failed", {"url": opts.url}) - return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet) - - # Handle GoFile shares with a dedicated resolver before yt-dlp/direct fallbacks + # Handle GoFile shares before yt-dlp (they remain unsupported) try: netloc = urlparse(opts.url).netloc.lower() except Exception: @@ -1033,38 +646,47 @@ def download_media( # Determine if yt-dlp should be used ytdlp_supported = is_url_supported_by_ytdlp(opts.url) - if ytdlp_supported: - # Skip probe for playlists with item selection (probe can hang on large playlists) - # Just proceed straight to download which will handle item selection - if opts.playlist_items: - debug(f"Skipping probe for playlist (item selection: {opts.playlist_items}), proceeding with download") - probe_result = {"url": opts.url} # Minimal probe result - else: - probe_cookiefile = None - try: - if opts.cookies_path and opts.cookies_path.is_file(): - probe_cookiefile = str(opts.cookies_path) - except Exception: - probe_cookiefile = None - - probe_result = probe_url(opts.url, no_playlist=opts.no_playlist, timeout_seconds=15, cookiefile=probe_cookiefile) - - if probe_result is None: - if not opts.quiet: - log(f"URL supported by yt-dlp but no media detected, falling back to direct download: {opts.url}") - if debug_logger is not None: - debug_logger.write_record("ytdlp-skip-no-media", {"url": opts.url}) - return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet) - else: + if not ytdlp_supported: + msg = "URL not supported by yt-dlp; try download-file for manual downloads" if not opts.quiet: - log(f"URL not supported by yt-dlp, trying direct download: {opts.url}") + log(msg) if debug_logger is not None: - debug_logger.write_record("direct-file-attempt", {"url": opts.url}) - return _download_direct_file(opts.url, opts.output_dir, debug_logger, quiet=opts.quiet) + debug_logger.write_record("ytdlp-unsupported", {"url": opts.url}) + raise DownloadError(msg) + + # Skip probe for playlists with item selection (probe can hang on large playlists) + # Just proceed straight to download which will handle item selection + if opts.playlist_items: + debug(f"Skipping probe for playlist (item selection: {opts.playlist_items}), proceeding with download") + probe_result = {"url": opts.url} # Minimal probe result + else: + probe_cookiefile = None + try: + if opts.cookies_path and opts.cookies_path.is_file(): + probe_cookiefile = str(opts.cookies_path) + except Exception: + probe_cookiefile = None + + probe_result = probe_url(opts.url, no_playlist=opts.no_playlist, timeout_seconds=15, cookiefile=probe_cookiefile) + + if probe_result is None: + msg = "yt-dlp could not detect media for this URL; use download-file for direct downloads" + if not opts.quiet: + log(msg) + if debug_logger is not None: + debug_logger.write_record("ytdlp-skip-no-media", {"url": opts.url}) + raise DownloadError(msg) _ensure_yt_dlp_ready() - ytdl_options = _build_ytdlp_options(opts) + ytdlp_tool = YtDlpTool() + ytdl_options = ytdlp_tool.build_ytdlp_options(opts) + hooks = ytdl_options.get("progress_hooks") + if not isinstance(hooks, list): + hooks = [] + ytdl_options["progress_hooks"] = hooks + if _progress_callback not in hooks: + hooks.append(_progress_callback) if not opts.quiet: debug(f"Starting yt-dlp download: {opts.url}") if debug_logger is not None: @@ -1760,15 +1382,259 @@ class Download_Media(Cmdlet): for item in url_matches: if "title" not in item: item["title"] = item.get("name") or item.get("target") or item.get("path") or "Result" - table.add_result(item) + + # Keep the full payload for history/inspection, but display a focused table. + display_row = { + "title": item.get("title"), + "store": item.get("store"), + "hash": item.get("hash") or item.get("file_hash") or item.get("sha256"), + } + table.add_result(display_row) results_list.append(item) pipeline_context.set_current_stage_table(table) pipeline_context.set_last_result_table(table, results_list) - print(f"\n{table}") - response = input("Continue anyway? (y/n): ").strip().lower() - if response not in {"y", "yes"}: + get_stderr_console().print(table) + setattr(table, "_rendered_by_cmdlet", True) + if not Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()): + return False + return True + + def _preflight_url_duplicates_bulk(urls: Sequence[str]) -> bool: + """Preflight URL de-dup for a batch of URLs. + + Purpose: + - Avoid per-item interactive URL checks inside a playlist loop. + - Let the user see ALL duplicates up front, before any downloads start. + """ + if storage is None: + debug("Bulk URL preflight skipped: storage unavailable") + return True + + unique_urls: List[str] = [] + for u in urls or []: + s = str(u or "").strip() + if s and s not in unique_urls: + unique_urls.append(s) + if len(unique_urls) <= 1: + return True + + try: + from metadata import normalize_urls + except Exception: + normalize_urls = None # type: ignore[assignment] + + def _httpish(value: str) -> bool: + try: + return bool(value) and (value.startswith("http://") or value.startswith("https://")) + except Exception: + return False + + url_needles: Dict[str, List[str]] = {} + for u in unique_urls: + needles: List[str] = [] + if normalize_urls is not None: + try: + needles.extend([n for n in (normalize_urls(u) or []) if isinstance(n, str)]) + except Exception: + needles = [] + if not needles: + needles = [u] + # Prefer http(s) needles for store lookups. + filtered: List[str] = [] + for n in needles: + n2 = str(n or "").strip() + if not n2: + continue + if not _httpish(n2): + continue + if n2 not in filtered: + filtered.append(n2) + url_needles[u] = filtered if filtered else [u] + + # Determine backends once (same filtering as per-URL preflight). + backend_names: List[str] = [] + try: + backend_names_all = storage.list_searchable_backends() + except Exception: + backend_names_all = [] + + for backend_name in backend_names_all: + try: + backend = storage[backend_name] + except Exception: + continue + + try: + if str(backend_name).strip().lower() == "temp": + continue + except Exception: + pass + + try: + backend_location = getattr(backend, "_location", None) + if backend_location and final_output_dir: + backend_path = Path(str(backend_location)).expanduser().resolve() + temp_path = Path(str(final_output_dir)).expanduser().resolve() + if backend_path == temp_path: + continue + except Exception: + pass + + backend_names.append(backend_name) + + if not backend_names: + debug("Bulk URL preflight skipped: no searchable backends") + return True + + # Collect matches as display rows (cap to keep output reasonable) + seen_pairs: set[tuple[str, str]] = set() + matched_urls: set[str] = set() + match_rows: List[Dict[str, Any]] = [] + max_rows = 200 + + try: + from Store.HydrusNetwork import HydrusNetwork + except Exception: + HydrusNetwork = None # type: ignore + + for backend_name in backend_names: + if len(match_rows) >= max_rows: + break + try: + backend = storage[backend_name] + except Exception: + continue + + if HydrusNetwork is not None and isinstance(backend, HydrusNetwork): + if not hydrus_available: + continue + + client = getattr(backend, "_client", None) + if client is None: + continue + + for original_url, needles in url_needles.items(): + if len(match_rows) >= max_rows: + break + if (original_url, str(backend_name)) in seen_pairs: + continue + + # Fast-path: ask Hydrus whether it already knows this URL. + found_hash: Optional[str] = None + found = False + for needle in (needles or [])[:3]: + if not _httpish(needle): + continue + try: + from API.HydrusNetwork import HydrusRequestSpec + + spec = HydrusRequestSpec( + method="GET", + endpoint="/add_urls/get_url_files", + query={"url": needle}, + ) + response = client._perform_request(spec) # type: ignore[attr-defined] + raw_hashes = None + if isinstance(response, dict): + raw_hashes = response.get("hashes") or response.get("file_hashes") + raw_ids = response.get("file_ids") + has_ids = isinstance(raw_ids, list) and len(raw_ids) > 0 + has_hashes = isinstance(raw_hashes, list) and len(raw_hashes) > 0 + if has_hashes: + try: + found_hash = str(raw_hashes[0]).strip() # type: ignore[index] + except Exception: + found_hash = None + if has_ids or has_hashes: + found = True + break + except Exception: + continue + + if not found: + continue + + seen_pairs.add((original_url, str(backend_name))) + matched_urls.add(original_url) + display_row = { + "title": "(exists)", + "store": str(backend_name), + "hash": found_hash or "", + "url": original_url, + "columns": [ + ("Title", "(exists)"), + ("Store", str(backend_name)), + ("Hash", found_hash or ""), + ("URL", original_url), + ], + } + match_rows.append(display_row) + continue + + # Generic backends: use the existing search() contract. + for original_url, needles in url_needles.items(): + if len(match_rows) >= max_rows: + break + if (original_url, str(backend_name)) in seen_pairs: + continue + + backend_hits: List[Dict[str, Any]] = [] + for needle in (needles or [])[:3]: + try: + backend_hits = backend.search(f"url:{needle}", limit=1) or [] + if backend_hits: + break + except Exception: + continue + + if not backend_hits: + continue + + seen_pairs.add((original_url, str(backend_name))) + matched_urls.add(original_url) + hit = backend_hits[0] + title = hit.get("title") or hit.get("name") or hit.get("target") or hit.get("path") or "(exists)" + file_hash = hit.get("hash") or hit.get("file_hash") or hit.get("sha256") or "" + display_row = { + "title": str(title), + "store": str(hit.get("store") or backend_name), + "hash": str(file_hash or ""), + "url": original_url, + "columns": [ + ("Title", str(title)), + ("Store", str(hit.get("store") or backend_name)), + ("Hash", str(file_hash or "")), + ("URL", original_url), + ], + } + match_rows.append(display_row) + + if not match_rows: + debug("Bulk URL preflight: no matches") + return True + + table = ResultTable(f"URL already exists ({len(matched_urls)} url(s))") + table.set_no_choice(True) + try: + table.set_preserve_order(True) + except Exception: + pass + + for row in match_rows: + table.add_result(row) + + # Display as an overlay so we don't clobber the current selectable table/history. + try: + pipeline_context.set_last_result_table_overlay(table, match_rows) + except Exception: + pass + + get_stderr_console().print(table) + setattr(table, "_rendered_by_cmdlet", True) + + if not Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()): return False return True @@ -1854,15 +1720,15 @@ class Download_Media(Cmdlet): return None return sorted(indices) - def _maybe_prompt_playlist_items(url: str) -> Optional[Dict[str, Any]]: - """If URL appears to be a playlist/channel/collection, prompt user for @ selection. + def _maybe_show_playlist_table(url: str) -> bool: + """If URL appears to be a playlist/channel/collection, show a normal selectable table. - Returns: - - None if URL is not a playlist-like multi-entry page (or probe fails) - - Dict with keys: - - cancel: bool - - playlist_items: Optional[str] (None means download all) - - selected_urls: Optional[List[str]] (expanded per-entry urls when available) + This intentionally avoids a special input() prompt so the user can use + the regular REPL prompt with autocomplete and standard @ selection: + download-media -url "" (shows table) + @* | download-media [options] | add-file ... + + Returns True if a playlist table was shown. """ try: cf = None @@ -1876,10 +1742,10 @@ class Download_Media(Cmdlet): except Exception: pr = None if not isinstance(pr, dict): - return None + return False entries = pr.get("entries") if not isinstance(entries, list) or len(entries) <= 1: - return None + return False # Display table (limit rows to keep output reasonable) max_rows = 200 @@ -1909,7 +1775,8 @@ class Download_Media(Cmdlet): return None table = ResultTable() - table.title = f"Playlist items ({total}{' shown ' + str(len(display_entries)) if total > max_rows else ''})" + safe_url = str(url or "").strip() + table.title = f'download-media -url "{safe_url}"' if safe_url else "download-media" table.set_source_command("download-media", [url]) try: table.set_preserve_order(True) @@ -1921,6 +1788,7 @@ class Download_Media(Cmdlet): title = None uploader = None duration = None + entry_url = _entry_to_url(entry) try: if isinstance(entry, dict): title = entry.get("title") @@ -1935,6 +1803,10 @@ class Download_Media(Cmdlet): "detail": str(uploader or ""), "media_kind": "playlist-item", "playlist_index": idx, + # Critical for normal @ selection piping: downstream cmdlets + # (including download-media itself) look for url/target. + "url": entry_url, + "target": entry_url, "columns": [ ("#", str(idx)), ("Title", str(title or "")), @@ -1948,61 +1820,71 @@ class Download_Media(Cmdlet): pipeline_context.set_current_stage_table(table) pipeline_context.set_last_result_table(table, results_list) - print(f"\n{table}") - choice = input("Select items to download (@N, @2-5, @{1,3}, @*, or 'q' to cancel): ").strip() - if not choice or choice.lower() in {"q", "quit", "cancel"}: - return {"cancel": True, "playlist_items": None, "selected_urls": []} - if choice.strip() == "@*" or choice.strip() == "*": - # @* means all entries, not just displayed rows. - selected_urls: List[str] = [] - for entry in entries: - u = _entry_to_url(entry) - if u and u not in selected_urls: - selected_urls.append(u) - # Only expand when we can derive URLs for all entries; otherwise fall back to yt-dlp playlist handling. - if len(selected_urls) == len(entries): - return {"cancel": False, "playlist_items": None, "selected_urls": selected_urls} - return {"cancel": False, "playlist_items": None, "selected_urls": []} - - parsed_indices = _parse_at_selection(choice, max_index=len(display_entries)) - if not parsed_indices: - log("Invalid selection. Use @N, @2-5, @{1,3}, or @*", file=sys.stderr) - return {"cancel": True, "playlist_items": None, "selected_urls": []} - - selected_urls: List[str] = [] - for i in parsed_indices: - try: - entry = display_entries[i - 1] - except Exception: - continue - u = _entry_to_url(entry) - if u and u not in selected_urls: - selected_urls.append(u) - - # If we can expand per-entry URLs, return them. - if selected_urls and len(selected_urls) == len(parsed_indices): - return {"cancel": False, "playlist_items": None, "selected_urls": selected_urls} - - # yt-dlp accepts comma-separated 1-based indices for playlist_items - return {"cancel": False, "playlist_items": ",".join(str(i) for i in parsed_indices), "selected_urls": []} + get_stderr_console().print(table) + setattr(table, "_rendered_by_cmdlet", True) + return True # Playlist/multi-entry detection: if the URL has multiple items and the user didn't - # specify -item, prompt for @ selection (supports @* for all). + # specify -item or -format, show a normal selectable table and return. if len(supported_url) == 1 and not playlist_items and not ytdl_format: candidate_url = supported_url[0] - selection_info = _maybe_prompt_playlist_items(candidate_url) - if selection_info is not None: + if _maybe_show_playlist_table(candidate_url): playlist_selection_handled = True - if bool(selection_info.get("cancel")): - return 0 - selected_urls = selection_info.get("selected_urls") - if isinstance(selected_urls, list) and selected_urls: - # Expand playlist/channel URL into per-entry URLs so that de-dup preflight - # and downloads operate per file. - supported_url = selected_urls - playlist_items = None - else: - playlist_items = selection_info.get("playlist_items") + # Let the user pick items using the normal REPL prompt: + # @* | download-media ... + return 0 + + # Bulk preflight for playlist selections (per-entry URLs): check all URLs once before downloading. + skip_per_url_preflight = False + if len(supported_url) > 1: + if not _preflight_url_duplicates_bulk(list(supported_url)): + return 0 + skip_per_url_preflight = True + + # Playlist-level format preflight: if the batch has only one available format, + # discover it once and force it for every item. This avoids per-item failures + # and per-item --list-formats calls (e.g. Bandcamp albums). + formats_cache: Dict[str, Optional[List[Dict[str, Any]]]] = {} + + def _cookiefile_str() -> Optional[str]: + try: + cookie_path = ytdlp_tool.resolve_cookiefile() + if cookie_path is not None and cookie_path.is_file(): + return str(cookie_path) + except Exception: + pass + return None + + def _list_formats_cached(u: str, *, playlist_items_value: Optional[str]) -> Optional[List[Dict[str, Any]]]: + key = f"{u}||{playlist_items_value or ''}" + if key in formats_cache: + return formats_cache[key] + fmts = list_formats( + u, + no_playlist=False, + playlist_items=playlist_items_value, + cookiefile=_cookiefile_str(), + ) + formats_cache[key] = fmts + return fmts + + forced_single_format_id: Optional[str] = None + forced_single_format_for_batch = False + if len(supported_url) > 1 and not playlist_items and not ytdl_format: + try: + sample_url = str(supported_url[0]) + fmts = _list_formats_cached(sample_url, playlist_items_value=None) + if isinstance(fmts, list) and len(fmts) == 1 and isinstance(fmts[0], dict): + only_id = str(fmts[0].get("format_id") or "").strip() + if only_id: + forced_single_format_id = only_id + forced_single_format_for_batch = True + debug( + f"Playlist format preflight: only one format available; using {forced_single_format_id} for all items" + ) + except Exception: + forced_single_format_id = None + forced_single_format_for_batch = False # If no -item, no explicit -format specified, and single URL, show the format table. # Do NOT stop to show formats when -audio is used (auto-pick) or when -clip is used. @@ -2021,7 +1903,7 @@ class Download_Media(Cmdlet): log(f"Skipping download: {url}", file=sys.stderr) return 0 - formats = list_formats(url, no_playlist=False) + formats = _list_formats_cached(url, playlist_items_value=None) if formats and len(formats) > 1: # Formatlist filtering @@ -2171,7 +2053,7 @@ class Download_Media(Cmdlet): # print the table here and mark it as already rendered to avoid duplicates # in runners that also print tables (e.g. CLI.py). try: - sys.stderr.write(table.format_plain() + "\n") + get_stderr_console().print(table) setattr(table, "_rendered_by_cmdlet", True) except Exception: pass @@ -2186,6 +2068,8 @@ class Download_Media(Cmdlet): # Download each URL downloaded_count = 0 + downloaded_pipe_objects: List[Dict[str, Any]] = [] + pipe_seq = 0 clip_sections_spec = self._build_clip_sections_spec(clip_ranges) if clip_sections_spec: @@ -2201,21 +2085,23 @@ class Download_Media(Cmdlet): canonical_url = _canonicalize_url_for_storage(url) # Preflight: warn if URL already exists in storage backends. - if not _preflight_url_duplicate(canonical_url, extra_urls=[url]): - log(f"Skipping download: {url}", file=sys.stderr) - continue + # For playlist selections we already did a bulk preflight; skip per-item checks. + if not skip_per_url_preflight: + if not _preflight_url_duplicate(canonical_url, extra_urls=[url]): + log(f"Skipping download: {url}", file=sys.stderr) + continue # If playlist_items is specified but looks like a format ID (e.g. from table selection), # treat it as a format selector instead of playlist items. # This handles the case where @N selection passes -item actual_format = ytdl_format actual_playlist_items = playlist_items - + if playlist_items and not ytdl_format: # Heuristic: if it contains non-numeric chars (excluding ranges/commas) # it is likely a format ID (e.g. '140-drc', 'best', '137+140') import re - if re.search(r'[^0-9,-]', playlist_items): + if re.search(r"[^0-9,-]", playlist_items): actual_format = playlist_items actual_playlist_items = None @@ -2230,6 +2116,13 @@ class Download_Media(Cmdlet): if configured and configured != "bestvideo+bestaudio/best": actual_format = configured + # If the batch has exactly one available format, force it. + # This prevents the "Requested format is not available" error loop entirely. + forced_single_applied = False + if forced_single_format_for_batch and forced_single_format_id and not ytdl_format and not actual_playlist_items: + actual_format = forced_single_format_id + forced_single_applied = True + # If a single format id was chosen and it is video-only, auto-merge best audio. if ( actual_format @@ -2239,9 +2132,10 @@ class Download_Media(Cmdlet): and "/" not in actual_format and "[" not in actual_format and actual_format not in {"best", "bv", "ba", "b"} + and not forced_single_applied ): try: - formats = list_formats(url, no_playlist=False, playlist_items=actual_playlist_items) + formats = _list_formats_cached(url, playlist_items_value=actual_playlist_items) if formats: fmt_match = next( (f for f in formats if str(f.get("format_id", "")) == actual_format), @@ -2258,24 +2152,170 @@ class Download_Media(Cmdlet): except Exception: pass - opts = DownloadOptions( - url=url, - mode=mode, - output_dir=final_output_dir, - ytdl_format=actual_format, - cookies_path=ytdlp_tool.resolve_cookiefile(), - clip_sections=clip_sections_spec, - playlist_items=actual_playlist_items, - quiet=quiet_mode, - no_playlist=False, - embed_chapters=embed_chapters, - write_sub=write_sub, - ) + attempted_single_format_fallback = False + while True: + try: + opts = DownloadOptions( + url=url, + mode=mode, + output_dir=final_output_dir, + ytdl_format=actual_format, + cookies_path=ytdlp_tool.resolve_cookiefile(), + clip_sections=clip_sections_spec, + playlist_items=actual_playlist_items, + quiet=quiet_mode, + no_playlist=False, + embed_chapters=embed_chapters, + write_sub=write_sub, + ) - # Use timeout wrapper to prevent hanging - debug(f"Starting download with 5-minute timeout...") - result_obj = _download_with_timeout(opts, timeout_seconds=300) - debug(f"Download completed, building pipe object...") + # Use timeout wrapper to prevent hanging + debug(f"Starting download with 5-minute timeout...") + result_obj = _download_with_timeout(opts, timeout_seconds=300) + debug(f"Download completed, building pipe object...") + break + except DownloadError as e: + # If yt-dlp failed due to an unavailable default format and there is only + # one available format, auto-retry with that format instead of prompting. + cause = getattr(e, "__cause__", None) + detail = "" + try: + detail = str(cause or "") + except Exception: + detail = "" + + if ("requested format is not available" in (detail or "").lower()) and mode != "audio": + # If we already discovered there's only one format for the batch, + # retry directly with it instead of re-listing formats per item. + if ( + forced_single_format_for_batch + and forced_single_format_id + and not ytdl_format + and not actual_playlist_items + and not attempted_single_format_fallback + ): + attempted_single_format_fallback = True + actual_format = forced_single_format_id + debug(f"Only one format available (playlist preflight); retrying with: {actual_format}") + continue + + formats = _list_formats_cached(url, playlist_items_value=actual_playlist_items) + if ( + (not attempted_single_format_fallback) + and isinstance(formats, list) + and len(formats) == 1 + and isinstance(formats[0], dict) + ): + only = formats[0] + fallback_format = str(only.get("format_id") or "").strip() + selection_format_id = fallback_format + try: + vcodec = str(only.get("vcodec", "none")) + acodec = str(only.get("acodec", "none")) + if vcodec != "none" and acodec == "none" and fallback_format: + selection_format_id = f"{fallback_format}+ba" + except Exception: + selection_format_id = fallback_format + + if selection_format_id: + attempted_single_format_fallback = True + actual_format = selection_format_id + debug(f"Only one format available; retrying with: {actual_format}") + continue + + # Fall back to interactive selection when there are multiple formats. + if formats: + formats_to_show = formats + + table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True) + table.set_table("ytdlp.formatlist") + table.set_source_command("download-media", [url]) + + results_list: List[Dict[str, Any]] = [] + for idx, fmt in enumerate(formats_to_show, 1): + resolution = fmt.get("resolution", "") + ext = fmt.get("ext", "") + vcodec = fmt.get("vcodec", "none") + acodec = fmt.get("acodec", "none") + filesize = fmt.get("filesize") + filesize_approx = fmt.get("filesize_approx") + format_id = fmt.get("format_id", "") + + selection_format_id = format_id + try: + if vcodec != "none" and acodec == "none" and format_id: + selection_format_id = f"{format_id}+ba" + except Exception: + selection_format_id = format_id + + size_str = "" + size_prefix = "" + size_bytes = filesize + if not size_bytes: + size_bytes = filesize_approx + if size_bytes: + size_prefix = "~" + try: + if isinstance(size_bytes, (int, float)) and size_bytes > 0: + size_mb = float(size_bytes) / (1024 * 1024) + size_str = f"{size_prefix}{size_mb:.1f}MB" + except Exception: + size_str = "" + + desc_parts: List[str] = [] + if resolution and resolution != "audio only": + desc_parts.append(str(resolution)) + if ext: + desc_parts.append(str(ext).upper()) + if vcodec != "none": + desc_parts.append(f"v:{vcodec}") + if acodec != "none": + desc_parts.append(f"a:{acodec}") + if size_str: + desc_parts.append(size_str) + format_desc = " | ".join(desc_parts) + + format_dict: Dict[str, Any] = { + "table": "download-media", + "title": f"Format {format_id}", + "url": url, + "target": url, + "detail": format_desc, + "media_kind": "format", + "columns": [ + ("ID", format_id), + ("Resolution", resolution or "N/A"), + ("Ext", ext), + ("Size", size_str or ""), + ("Video", vcodec), + ("Audio", acodec), + ], + "full_metadata": { + "format_id": format_id, + "url": url, + "item_selector": selection_format_id, + }, + "_selection_args": ["-format", selection_format_id], + } + + results_list.append(format_dict) + table.add_result(format_dict) + + pipeline_context.set_current_stage_table(table) + pipeline_context.set_last_result_table(table, results_list) + + # Always print for interactive selection. + try: + get_stderr_console().print(table) + setattr(table, "_rendered_by_cmdlet", True) + except Exception: + pass + + log("Requested format is not available; select a working format with @N", file=sys.stderr) + return 0 + + # Non-format DownloadError: surface and skip this URL. + raise # Expand result set: # - playlists return a list @@ -2321,6 +2361,11 @@ class Download_Media(Cmdlet): pipe_objects: List[Dict[str, Any]] = [] for downloaded in results_to_emit: po = self._build_pipe_object(downloaded, url, opts) + pipe_seq += 1 + try: + po.setdefault("pipe_index", pipe_seq) + except Exception: + pass # Attach chapter timestamps for downstream consumers (e.g., mpv scripts) # even if container embedding fails. @@ -2385,118 +2430,23 @@ class Download_Media(Cmdlet): pipe_obj = coerce_to_pipe_object(pipe_obj_dict) register_url_with_local_library(pipe_obj, config) + # Keep a copy so we can optionally run a tail pipeline entered at the playlist prompt. + try: + downloaded_pipe_objects.append(pipe_obj_dict) + except Exception: + pass + downloaded_count += len(pipe_objects) debug("✓ Downloaded and emitted") except DownloadError as e: - # Special-case yt-dlp format errors: show a selectable format list table so - # the user can pick a working format_id and continue the pipeline via @N. - cause = getattr(e, "__cause__", None) - detail = "" - try: - detail = str(cause or "") - except Exception: - detail = "" - - if "requested format is not available" in (detail or "").lower() and mode != "audio": - formats = list_formats(url, no_playlist=False, playlist_items=actual_playlist_items) - if formats: - formats_to_show = formats - - table = ResultTable(title=f"Available formats for {url}", max_columns=10, preserve_order=True) - table.set_table("ytdlp.formatlist") - table.set_source_command("download-media", [str(a) for a in (args or [])]) - - results_list: List[Dict[str, Any]] = [] - for idx, fmt in enumerate(formats_to_show, 1): - resolution = fmt.get("resolution", "") - ext = fmt.get("ext", "") - vcodec = fmt.get("vcodec", "none") - acodec = fmt.get("acodec", "none") - filesize = fmt.get("filesize") - filesize_approx = fmt.get("filesize_approx") - format_id = fmt.get("format_id", "") - - selection_format_id = format_id - try: - if vcodec != "none" and acodec == "none" and format_id: - selection_format_id = f"{format_id}+ba" - except Exception: - selection_format_id = format_id - - size_str = "" - size_prefix = "" - size_bytes = filesize - if not size_bytes: - size_bytes = filesize_approx - if size_bytes: - size_prefix = "~" - try: - if isinstance(size_bytes, (int, float)) and size_bytes > 0: - size_mb = float(size_bytes) / (1024 * 1024) - size_str = f"{size_prefix}{size_mb:.1f}MB" - except Exception: - size_str = "" - - desc_parts: List[str] = [] - if resolution and resolution != "audio only": - desc_parts.append(str(resolution)) - if ext: - desc_parts.append(str(ext).upper()) - if vcodec != "none": - desc_parts.append(f"v:{vcodec}") - if acodec != "none": - desc_parts.append(f"a:{acodec}") - if size_str: - desc_parts.append(size_str) - format_desc = " | ".join(desc_parts) - - format_dict: Dict[str, Any] = { - "table": "download-media", - "title": f"Format {format_id}", - "url": url, - "target": url, - "detail": format_desc, - "media_kind": "format", - "columns": [ - ("ID", format_id), - ("Resolution", resolution or "N/A"), - ("Ext", ext), - ("Size", size_str or ""), - ("Video", vcodec), - ("Audio", acodec), - ], - "full_metadata": { - "format_id": format_id, - "url": url, - "item_selector": selection_format_id, - }, - "_selection_args": ["-format", selection_format_id], - } - - results_list.append(format_dict) - table.add_result(format_dict) - - pipeline_context.set_current_stage_table(table) - pipeline_context.set_last_result_table(table, results_list) - - # See comment in the main formatlist path: always print for interactive selection. - try: - sys.stderr.write(table.format_plain() + "\n") - setattr(table, "_rendered_by_cmdlet", True) - except Exception: - pass - - # Returning 0 with no emits lets the CLI pause the pipeline for @N selection. - log("Requested format is not available; select a working format with @N", file=sys.stderr) - return 0 - log(f"Download failed for {url}: {e}", file=sys.stderr) except Exception as e: log(f"Error processing {url}: {e}", file=sys.stderr) if downloaded_count > 0: debug(f"✓ Successfully processed {downloaded_count} URL(s)") + return 0 log("No downloads completed", file=sys.stderr) @@ -2717,6 +2667,7 @@ class Download_Media(Cmdlet): "tag": tag, "action": "cmdlet:download-media", "is_temp": True, + "ytdl_format": getattr(opts, "ytdl_format", None), # download_mode removed (deprecated), keep media_kind "store": getattr(opts, "storage_name", None) or getattr(opts, "storage_location", None) or "PATH", "media_kind": "video" if opts.mode == "video" else "audio", diff --git a/cmdlet/get_file.py b/cmdlet/get_file.py index 8028901..73aaf03 100644 --- a/cmdlet/get_file.py +++ b/cmdlet/get_file.py @@ -6,7 +6,14 @@ import os import sys import shutil import subprocess +import tempfile +import threading +import time +import http.server +from urllib.parse import quote import webbrowser +from urllib.parse import urljoin +from urllib.request import pathname2url import pipeline as ctx from . import _shared as sh @@ -56,7 +63,7 @@ class Get_File(sh.Cmdlet): output_path = parsed.get("path") output_name = parsed.get("name") - debug(f"[get-file] file_hash={file_hash[:12] if file_hash else None}... store_name={store_name}") + debug(f"[get-file] file_hash={file_hash} store_name={store_name}") if not file_hash: log("Error: No file hash provided (pipe an item or use -query \"hash:\")") @@ -83,7 +90,7 @@ class Get_File(sh.Cmdlet): debug(f"[get-file] Getting metadata for hash...") metadata = backend.get_metadata(file_hash) if not metadata: - log(f"Error: File metadata not found for hash {file_hash[:12]}...") + log(f"Error: File metadata not found for hash {file_hash}") return 1 debug(f"[get-file] Metadata retrieved: title={metadata.get('title')}, ext={metadata.get('ext')}") @@ -104,7 +111,7 @@ class Get_File(sh.Cmdlet): return text return "" - debug(f"[get-file] Calling backend.get_file({file_hash[:12]}...)") + debug(f"[get-file] Calling backend.get_file({file_hash})") # Get file from backend (may return Path or URL string depending on backend) source_path = backend.get_file(file_hash) @@ -135,7 +142,7 @@ class Get_File(sh.Cmdlet): source_path = Path(source_path) if not source_path or not source_path.exists(): - log(f"Error: Backend could not retrieve file for hash {file_hash[:12]}...") + log(f"Error: Backend could not retrieve file for hash {file_hash}") return 1 # Folder store UX: without -path, just open the file in the default app. @@ -202,6 +209,18 @@ class Get_File(sh.Cmdlet): def _open_file_default(self, path: Path) -> None: """Open a local file in the OS default application.""" try: + suffix = str(path.suffix or "").lower() + if sys.platform.startswith("win"): + # On Windows, file associations for common media types can point at + # editors (Paint/VS Code). Prefer opening a localhost URL. + if self._open_local_file_in_browser_via_http(path): + return + + if suffix in {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tif", ".tiff", ".svg"}: + # Use default web browser for images. + if self._open_image_in_default_browser(path): + return + if sys.platform.startswith("win"): os.startfile(str(path)) # type: ignore[attr-defined] return @@ -211,6 +230,122 @@ class Get_File(sh.Cmdlet): subprocess.Popen(["xdg-open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) except Exception as exc: log(f"Error opening file: {exc}", file=sys.stderr) + + def _open_local_file_in_browser_via_http(self, file_path: Path) -> bool: + """Serve a single local file via localhost HTTP and open in browser. + + This avoids Windows file-association issues (e.g., PNG -> Paint, HTML -> VS Code). + The server is bound to 127.0.0.1 on an ephemeral port and is shut down after + a timeout. + """ + try: + resolved = file_path.resolve() + directory = resolved.parent + filename = resolved.name + except Exception: + return False + + class OneFileHandler(http.server.SimpleHTTPRequestHandler): + def __init__(self, *handler_args, **handler_kwargs): + super().__init__(*handler_args, directory=str(directory), **handler_kwargs) + + def log_message(self, format: str, *args) -> None: # noqa: A003 + # Keep normal output clean. + return + + def do_GET(self) -> None: # noqa: N802 + if self.path in {"/", ""}: + self.path = "/" + filename + return super().do_GET() + + if self.path == "/" + filename or self.path == "/" + quote(filename): + return super().do_GET() + + self.send_error(404) + + def do_HEAD(self) -> None: # noqa: N802 + if self.path in {"/", ""}: + self.path = "/" + filename + return super().do_HEAD() + + if self.path == "/" + filename or self.path == "/" + quote(filename): + return super().do_HEAD() + + self.send_error(404) + + try: + httpd = http.server.ThreadingHTTPServer(("127.0.0.1", 0), OneFileHandler) + except Exception: + return False + + port = httpd.server_address[1] + url = f"http://127.0.0.1:{port}/{quote(filename)}" + + # Run server in the background. + server_thread = threading.Thread(target=httpd.serve_forever, kwargs={"poll_interval": 0.2}, daemon=True) + server_thread.start() + + # Auto-shutdown after a timeout to avoid lingering servers. + def shutdown_later() -> None: + time.sleep(10 * 60) + try: + httpd.shutdown() + except Exception: + pass + try: + httpd.server_close() + except Exception: + pass + + threading.Thread(target=shutdown_later, daemon=True).start() + + try: + debug(f"[get-file] Opening via localhost: {url}") + return bool(webbrowser.open(url)) + except Exception: + return False + + def _open_image_in_default_browser(self, image_path: Path) -> bool: + """Open an image file in the user's default web browser. + + We intentionally avoid opening the image path directly on Windows because + file associations may point to editors/viewers (e.g., Paint). Instead we + generate a tiny HTML wrapper and open that (HTML is typically associated + with the default browser). + """ + try: + resolved = image_path.resolve() + image_url = urljoin("file:", pathname2url(str(resolved))) + except Exception: + return False + + # Create a stable wrapper filename to reduce temp-file spam. + wrapper_path = Path(tempfile.gettempdir()) / f"medeia-open-image-{resolved.stem}.html" + try: + wrapper_path.write_text( + "\n".join( + [ + "", + "", + f"{resolved.name}", + "", + f"\"{resolved.name}\"", + ] + ), + encoding="utf-8", + ) + except Exception: + return False + + # Prefer localhost server when possible (reliable on Windows). + if self._open_local_file_in_browser_via_http(image_path): + return True + + wrapper_url = wrapper_path.as_uri() + try: + return bool(webbrowser.open(wrapper_url)) + except Exception: + return False def _sanitize_filename(self, name: str) -> str: """Sanitize filename by removing invalid characters.""" diff --git a/cmdlet/get_relationship.py b/cmdlet/get_relationship.py index f1e3676..eec1787 100644 --- a/cmdlet/get_relationship.py +++ b/cmdlet/get_relationship.py @@ -450,7 +450,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: table.set_row_selection_args(i, ["-store", str(item['store']), "-query", f"hash:{item['hash']}"]) ctx.set_last_result_table(table, pipeline_results) - print(table) + from rich_display import stdout_console + + stdout_console().print(table) return 0 diff --git a/cmdlet/merge_file.py b/cmdlet/merge_file.py index 9ec97ea..8f01b32 100644 --- a/cmdlet/merge_file.py +++ b/cmdlet/merge_file.py @@ -112,6 +112,107 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: item = files_to_merge[0] ctx.emit(item) return 0 + + def _resolve_existing_path(item: Dict[str, Any]) -> Optional[Path]: + raw_path = get_pipe_object_path(item) + target_path: Optional[Path] = None + if isinstance(raw_path, Path): + target_path = raw_path + elif isinstance(raw_path, str) and raw_path.strip(): + candidate = Path(raw_path).expanduser() + if candidate.exists(): + target_path = candidate + if target_path and target_path.exists(): + return target_path + return None + + def _extract_url(item: Dict[str, Any]) -> Optional[str]: + u = get_field(item, "url") or get_field(item, "target") + if isinstance(u, str): + s = u.strip() + if s.lower().startswith(("http://", "https://")): + return s + return None + + # If the user piped URL-only playlist selections (no local paths yet), download first. + # This keeps the pipeline order intuitive: + # @* | merge-file | add-file -store ... + urls_to_download: List[str] = [] + for it in files_to_merge: + if _resolve_existing_path(it) is not None: + continue + u = _extract_url(it) + if u: + urls_to_download.append(u) + + if urls_to_download and len(urls_to_download) >= 2: + try: + # Compute a batch hint (audio vs video + single-format id) once. + mode_hint: Optional[str] = None + forced_format: Optional[str] = None + try: + from cmdlet.download_media import list_formats + from tool.ytdlp import YtDlpTool + + sample_url = urls_to_download[0] + cookiefile = None + try: + cookie_path = YtDlpTool(config).resolve_cookiefile() + if cookie_path is not None and cookie_path.is_file(): + cookiefile = str(cookie_path) + except Exception: + cookiefile = None + + fmts = list_formats(sample_url, no_playlist=False, playlist_items=None, cookiefile=cookiefile) + if isinstance(fmts, list) and fmts: + has_video = False + for f in fmts: + if not isinstance(f, dict): + continue + vcodec = str(f.get("vcodec", "none") or "none").strip().lower() + if vcodec and vcodec != "none": + has_video = True + break + mode_hint = "video" if has_video else "audio" + + if len(fmts) == 1 and isinstance(fmts[0], dict): + fid = str(fmts[0].get("format_id") or "").strip() + if fid: + forced_format = fid + except Exception: + mode_hint = None + forced_format = None + + from cmdlet.add_file import Add_File + + expanded: List[Dict[str, Any]] = [] + downloaded_any = False + for it in files_to_merge: + if _resolve_existing_path(it) is not None: + expanded.append(it) + continue + u = _extract_url(it) + if not u: + expanded.append(it) + continue + + downloaded = Add_File._download_streaming_url_as_pipe_objects( + u, + config, + mode_hint=mode_hint, + ytdl_format_hint=forced_format, + ) + if downloaded: + expanded.extend(downloaded) + downloaded_any = True + else: + expanded.append(it) + + if downloaded_any: + files_to_merge = expanded + except Exception: + # If downloads fail, we fall back to the existing path-based merge behavior. + pass # Extract file paths and metadata from result objects source_files: List[Path] = [] @@ -120,14 +221,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: source_tags: List[str] = [] # tags read from .tag sidecars source_item_tag_lists: List[List[str]] = [] # tags carried in-memory on piped items for item in files_to_merge: - raw_path = get_pipe_object_path(item) - target_path = None - if isinstance(raw_path, Path): - target_path = raw_path - elif isinstance(raw_path, str) and raw_path.strip(): - candidate = Path(raw_path).expanduser() - if candidate.exists(): - target_path = candidate + target_path = _resolve_existing_path(item) if target_path and target_path.exists(): source_files.append(target_path) diff --git a/cmdlet/screen_shot.py b/cmdlet/screen_shot.py index 98c4ee0..cf77630 100644 --- a/cmdlet/screen_shot.py +++ b/cmdlet/screen_shot.py @@ -266,27 +266,27 @@ def _archive_url(url: str, timeout: float) -> Tuple[List[str], List[str]]: (_submit_archive_ph, "archive.ph"), ): try: - log(f"Archiving to {label}...", flush=True) + debug(f"Archiving to {label}...") archived = submitter(url, timeout) except httpx.HTTPStatusError as exc: if exc.response.status_code == 429: warnings.append(f"archive {label} rate limited (HTTP 429)") - log(f"{label}: Rate limited (HTTP 429)", flush=True) + debug(f"{label}: Rate limited (HTTP 429)") else: warnings.append(f"archive {label} failed: HTTP {exc.response.status_code}") - log(f"{label}: HTTP {exc.response.status_code}", flush=True) + debug(f"{label}: HTTP {exc.response.status_code}") except httpx.RequestError as exc: warnings.append(f"archive {label} failed: {exc}") - log(f"{label}: Connection error: {exc}", flush=True) + debug(f"{label}: Connection error: {exc}") except Exception as exc: warnings.append(f"archive {label} failed: {exc}") - log(f"{label}: {exc}", flush=True) + debug(f"{label}: {exc}") else: if archived: archives.append(archived) - log(f"{label}: Success - {archived}", flush=True) + debug(f"{label}: Success - {archived}") else: - log(f"{label}: No archive link returned", flush=True) + debug(f"{label}: No archive link returned") return archives, warnings @@ -335,7 +335,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str]) tool.debug_dump() - log("Launching browser...", flush=True) + debug("Launching browser...") format_name = _normalise_format(options.output_format) headless = options.headless or format_name == "pdf" debug(f"[_capture] Format: {format_name}, Headless: {headless}") @@ -345,29 +345,29 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str]) try: with tool.open_page(headless=headless) as page: - log(f"Navigating to {options.url}...", flush=True) + debug(f"Navigating to {options.url}...") try: tool.goto(page, options.url) - log("Page loaded successfully", flush=True) + debug("Page loaded successfully") except PlaywrightTimeoutError: warnings.append("navigation timeout; capturing current page state") - log("Navigation timeout; proceeding with current state", flush=True) + debug("Navigation timeout; proceeding with current state") # Skip article lookup by default (wait_for_article defaults to False) if options.wait_for_article: try: - log("Waiting for article element...", flush=True) + debug("Waiting for article element...") page.wait_for_selector("article", timeout=10_000) - log("Article element found", flush=True) + debug("Article element found") except PlaywrightTimeoutError: warnings.append("
selector not found; capturing fallback") - log("Article element not found; using fallback", flush=True) + debug("Article element not found; using fallback") if options.wait_after_load > 0: - log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True) + debug(f"Waiting {options.wait_after_load}s for page stabilization...") time.sleep(min(10.0, max(0.0, options.wait_after_load))) if options.replace_video_posters: - log("Replacing video elements with posters...", flush=True) + debug("Replacing video elements with posters...") page.evaluate( """ document.querySelectorAll('video').forEach(v => { @@ -384,7 +384,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str]) # Attempt platform-specific target capture if requested (and not PDF) element_captured = False if options.prefer_platform_target and format_name != "pdf": - log("Attempting platform-specific content capture...", flush=True) + debug("Attempting platform-specific content capture...") try: _platform_preprocess(options.url, page, warnings) except Exception as e: @@ -397,36 +397,36 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str]) debug(f"[_capture] Trying selectors: {selectors}") for sel in selectors: try: - log(f"Trying selector: {sel}", flush=True) + debug(f"Trying selector: {sel}") el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms))) except PlaywrightTimeoutError: - log(f"Selector not found: {sel}", flush=True) + debug(f"Selector not found: {sel}") continue try: if el is not None: - log(f"Found element with selector: {sel}", flush=True) + debug(f"Found element with selector: {sel}") try: el.scroll_into_view_if_needed(timeout=1000) except Exception: pass - log(f"Capturing element to {destination}...", flush=True) + debug(f"Capturing element to {destination}...") el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None)) element_captured = True - log("Element captured successfully", flush=True) + debug("Element captured successfully") break except Exception as exc: warnings.append(f"element capture failed for '{sel}': {exc}") - log(f"Failed to capture element: {exc}", flush=True) + debug(f"Failed to capture element: {exc}") # Fallback to default capture paths if element_captured: pass elif format_name == "pdf": - log("Generating PDF...", flush=True) + debug("Generating PDF...") page.emulate_media(media="print") page.pdf(path=str(destination), print_background=True) - log(f"PDF saved to {destination}", flush=True) + debug(f"PDF saved to {destination}") else: - log(f"Capturing full page to {destination}...", flush=True) + debug(f"Capturing full page to {destination}...") screenshot_kwargs: Dict[str, Any] = {"path": str(destination)} if format_name == "jpeg": screenshot_kwargs["type"] = "jpeg" @@ -441,7 +441,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str]) article.screenshot(**article_kwargs) else: page.screenshot(**screenshot_kwargs) - log(f"Screenshot saved to {destination}", flush=True) + debug(f"Screenshot saved to {destination}") except Exception as exc: debug(f"[_capture] Exception launching browser/page: {exc}") msg = str(exc).lower() @@ -587,7 +587,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: if storage_value: try: screenshot_dir = SharedArgs.resolve_storage(storage_value) - log(f"[screen_shot] Using --storage {storage_value}: {screenshot_dir}", flush=True) + debug(f"[screen_shot] Using --storage {storage_value}: {screenshot_dir}") except ValueError as e: log(str(e), file=sys.stderr) return 1 @@ -596,7 +596,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: if screenshot_dir is None and resolve_output_dir is not None: try: screenshot_dir = resolve_output_dir(config) - log(f"[screen_shot] Using config resolver: {screenshot_dir}", flush=True) + debug(f"[screen_shot] Using config resolver: {screenshot_dir}") except Exception: pass @@ -604,14 +604,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: if screenshot_dir is None and config and config.get("outfile"): try: screenshot_dir = Path(config["outfile"]).expanduser() - log(f"[screen_shot] Using config outfile: {screenshot_dir}", flush=True) + debug(f"[screen_shot] Using config outfile: {screenshot_dir}") except Exception: pass # Default: User's Videos directory if screenshot_dir is None: screenshot_dir = Path.home() / "Videos" - log(f"[screen_shot] Using default directory: {screenshot_dir}", flush=True) + debug(f"[screen_shot] Using default directory: {screenshot_dir}") ensure_directory(screenshot_dir) @@ -693,11 +693,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: screenshot_result = _capture_screenshot(options) # Log results and warnings - log(f"Screenshot captured to {screenshot_result.path}", flush=True) + debug(f"Screenshot captured to {screenshot_result.path}") if screenshot_result.archive_url: - log(f"Archives: {', '.join(screenshot_result.archive_url)}", flush=True) + debug(f"Archives: {', '.join(screenshot_result.archive_url)}") for warning in screenshot_result.warnings: - log(f"Warning: {warning}", flush=True) + debug(f"Warning: {warning}") # Compute hash of screenshot file screenshot_hash = None @@ -762,8 +762,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: log(f"No screenshots were successfully captured", file=sys.stderr) return 1 - # Log completion message - log(f"✓ Successfully captured {len(all_emitted)} screenshot(s)", flush=True) + # Log completion message (keep this as normal output) + log(f"✓ Successfully captured {len(all_emitted)} screenshot(s)") return exit_code CMDLET = Cmdlet( diff --git a/cmdlet/search_store.py b/cmdlet/search_store.py index ce406f4..614c9b6 100644 --- a/cmdlet/search_store.py +++ b/cmdlet/search_store.py @@ -45,6 +45,8 @@ class Search_Store(Cmdlet): "Search across storage backends: Folder stores and Hydrus instances", "Use -store to search a specific backend by name", "URL search: url:* (any URL) or url: (URL substring)", + "Extension search: ext: (e.g., ext:png)", + "Hydrus-style extension: system:filetype = png", "Results include hash for downstream commands (get-file, add-tag, etc.)", "Examples:", "search-store -query foo # Search all storage backends", @@ -53,6 +55,8 @@ class Search_Store(Cmdlet): "search-store -query 'hash:deadbeef...' # Search by SHA256 hash", "search-store -query 'url:*' # Files that have any URL", "search-store -query 'url:youtube.com' # Files whose URL contains substring", + "search-store -query 'ext:png' # Files whose metadata ext is png", + "search-store -query 'system:filetype = png' # Hydrus: native; Folder: maps to metadata.ext", ], exec=self.run, ) @@ -107,6 +111,35 @@ class Search_Store(Cmdlet): args_list = [str(arg) for arg in (args or [])] + refresh_mode = any(str(a).strip().lower() in {"--refresh", "-refresh"} for a in args_list) + + def _format_command_title(command: str, raw_args: List[str]) -> str: + def _quote(value: str) -> str: + text = str(value) + if not text: + return '""' + needs_quotes = any(ch.isspace() for ch in text) or '"' in text + if not needs_quotes: + return text + return '"' + text.replace('"', '\\"') + '"' + + cleaned = [ + str(a) + for a in (raw_args or []) + if str(a).strip().lower() not in {"--refresh", "-refresh"} + ] + if not cleaned: + return command + return " ".join([command, *[_quote(a) for a in cleaned]]) + + raw_title = None + try: + raw_title = ctx.get_current_stage_text("") if hasattr(ctx, "get_current_stage_text") else None + except Exception: + raw_title = None + + command_title = (str(raw_title).strip() if raw_title else "") or _format_command_title("search-store", list(args_list)) + # Build dynamic flag variants from cmdlet arg definitions. # This avoids hardcoding flag spellings in parsing loops. flag_registry = self.build_flag_registry() @@ -188,11 +221,7 @@ class Search_Store(Cmdlet): importlib.reload(result_table) from result_table import ResultTable - table_title = f"Search: {query}" - if storage_backend: - table_title += f" [{storage_backend}]" - - table = ResultTable(table_title) + table = ResultTable(command_title) try: table.set_source_command("search-store", list(args_list)) except Exception: @@ -326,26 +355,23 @@ class Search_Store(Cmdlet): ctx.emit(payload) if found_any: - # Title should reflect the command, query, and only stores present in the table. - store_counts: "OrderedDict[str, int]" = OrderedDict() - for row_item in results_list: - store_val = str(row_item.get("store") or "").strip() - if not store_val: - continue - if store_val not in store_counts: - store_counts[store_val] = 0 - store_counts[store_val] += 1 + table.title = command_title - counts_part = " ".join(f"{name}:{count}" for name, count in store_counts.items() if count > 0) - base_title = f"search-store: {query}".strip() - table.title = f"{base_title} | {counts_part}" if counts_part else base_title - - ctx.set_last_result_table(table, results_list) + if refresh_mode: + ctx.set_last_result_table_preserve_history(table, results_list) + else: + ctx.set_last_result_table(table, results_list) db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2)) db.update_worker_status(worker_id, 'completed') return 0 log("No results found", file=sys.stderr) + if refresh_mode: + try: + table.title = command_title + ctx.set_last_result_table_preserve_history(table, []) + except Exception: + pass db.append_worker_stdout(worker_id, json.dumps([], indent=2)) db.update_worker_status(worker_id, 'completed') return 0 @@ -413,24 +439,21 @@ class Search_Store(Cmdlet): results_list.append(normalized) ctx.emit(normalized) - # Title should reflect the command, query, and only stores present in the table. - store_counts: "OrderedDict[str, int]" = OrderedDict() - for row_item in results_list: - store_val = str(row_item.get("store") or "").strip() - if not store_val: - continue - if store_val not in store_counts: - store_counts[store_val] = 0 - store_counts[store_val] += 1 + table.title = command_title - counts_part = " ".join(f"{name}:{count}" for name, count in store_counts.items() if count > 0) - base_title = f"search-store: {query}".strip() - table.title = f"{base_title} | {counts_part}" if counts_part else base_title - - ctx.set_last_result_table(table, results_list) + if refresh_mode: + ctx.set_last_result_table_preserve_history(table, results_list) + else: + ctx.set_last_result_table(table, results_list) db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2)) else: log("No results found", file=sys.stderr) + if refresh_mode: + try: + table.title = command_title + ctx.set_last_result_table_preserve_history(table, []) + except Exception: + pass db.append_worker_stdout(worker_id, json.dumps([], indent=2)) db.update_worker_status(worker_id, 'completed') diff --git a/cmdnat/adjective.py b/cmdnat/adjective.py index 3b9476c..1337524 100644 --- a/cmdnat/adjective.py +++ b/cmdnat/adjective.py @@ -48,7 +48,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: table.set_source_command(".adjective") ctx.set_last_result_table_overlay(table, list(data.keys())) ctx.set_current_stage_table(table) - print(table) + from rich_display import stdout_console + + stdout_console().print(table) return 0 # We have args. First arg is likely category. @@ -129,7 +131,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: table.set_source_command(".adjective") ctx.set_last_result_table_overlay(table, tags) ctx.set_current_stage_table(table) - print(table) + from rich_display import stdout_console + + stdout_console().print(table) return 0 diff --git a/cmdnat/help.py b/cmdnat/help.py index 82a394c..bfc2373 100644 --- a/cmdnat/help.py +++ b/cmdnat/help.py @@ -62,7 +62,9 @@ def _render_list(metadata: Dict[str, Dict[str, Any]], filter_text: Optional[str] ctx.set_last_result_table(table, items) ctx.set_current_stage_table(table) - print(table) + from rich_display import stdout_console + + stdout_console().print(table) def _render_detail(meta: Dict[str, Any], args: Sequence[str]) -> None: @@ -130,7 +132,9 @@ def _render_detail(meta: Dict[str, Any], args: Sequence[str]) -> None: ctx.set_last_result_table_overlay(table, [meta]) ctx.set_current_stage_table(table) - print(table) + from rich_display import stdout_console + + stdout_console().print(table) def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: diff --git a/cmdnat/matrix.py b/cmdnat/matrix.py index a9880a7..e973461 100644 --- a/cmdnat/matrix.py +++ b/cmdnat/matrix.py @@ -463,7 +463,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: ctx.set_pending_pipeline_tail([[".matrix", "-send"]], ".matrix") print() - print(table.format_plain()) + from rich_display import stdout_console + + stdout_console().print(table) print("\nSelect room(s) with @N (e.g. @1 or @1-3) to send the selected item(s)") return 0 diff --git a/cmdnat/pipe.py b/cmdnat/pipe.py index 2c1020f..f2a40e5 100644 --- a/cmdnat/pipe.py +++ b/cmdnat/pipe.py @@ -1196,7 +1196,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # In pipeline mode, the CLI renders current-stage tables; printing here duplicates output. suppress_direct_print = bool(isinstance(config, dict) and config.get("_quiet_background_output")) if not suppress_direct_print: - print(table) + from rich_display import stdout_console + + stdout_console().print(table) return 0 # Everything below was originally outside a try block; keep it inside so `start_opts` is in scope. @@ -1514,7 +1516,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # In pipeline mode, the CLI renders current-stage tables; printing here duplicates output. suppress_direct_print = bool(isinstance(config, dict) and config.get("_quiet_background_output")) if not suppress_direct_print: - print(table) + from rich_display import stdout_console + + stdout_console().print(table) return 0 finally: diff --git a/metadata.py b/metadata.py index ea53a14..9f5aa1e 100644 --- a/metadata.py +++ b/metadata.py @@ -1515,7 +1515,7 @@ def _normalise_string_list(values: Optional[Iterable[Any]]) -> List[str]: for value in values: if value is None: continue - text = str(value).strip() + text = str(value).strip().lower() if not text: continue if text in seen: @@ -1569,7 +1569,7 @@ def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str] urls.append(url_clean) else: # Everything else is a tag (including relationship: lines) - tags.append(line) + tags.append(line.lower()) return hash_value, tags, urls @@ -1644,11 +1644,12 @@ def write_tags(media_path: Path, tags: Iterable[str], url: Iterable[str], hash_v # Prepare tags lines and convert to list if needed (tags only) tag_list = list(tags) if not isinstance(tags, list) else tags + tag_list = [str(tag).strip().lower() for tag in tag_list if str(tag).strip()] # If database provided, insert directly and skip sidecar if db is not None: try: - db_tags = [str(tag).strip() for tag in tag_list if str(tag).strip()] + db_tags = [str(tag).strip().lower() for tag in tag_list if str(tag).strip()] if db_tags: db.add_tags(media_path, db_tags) @@ -1675,7 +1676,7 @@ def write_tags(media_path: Path, tags: Iterable[str], url: Iterable[str], hash_v # Write via consolidated function try: lines = [] - lines.extend(str(tag).strip() for tag in tag_list if str(tag).strip()) + lines.extend(str(tag).strip().lower() for tag in tag_list if str(tag).strip()) if lines: sidecar.write_text("\n".join(lines) + "\n", encoding="utf-8") @@ -2263,7 +2264,7 @@ def read_tags_from_file(file_path: Path) -> List[str]: continue # Normalize the tag - normalized = value_normalize(line) + normalized = value_normalize(line).lower() if normalized and normalized not in seen: seen.add(normalized) tags.append(normalized) @@ -2443,7 +2444,7 @@ def write_tags_to_file( # Add tags if tags: - content_lines.extend(tags) + content_lines.extend([str(t).strip().lower() for t in tags if str(t).strip()]) # Write to file mode = 'a' if (append and file_path.exists()) else 'w' @@ -2969,6 +2970,86 @@ def normalize_tags(tags: List[Any]) -> List[str]: return sort_tags(normalized) +def compute_namespaced_tag_overwrite( + existing_tags: Sequence[Any], + incoming_tags: Sequence[Any], +) -> Tuple[List[str], List[str], List[str]]: + """Compute a tag mutation with namespace overwrite semantics. + + Rules: + - Incoming namespaced tags ("ns:value") overwrite any existing tags in that namespace. + - Overwrite is based on namespace match (case-insensitive). + - Additions are deduped case-insensitively against kept existing tags and within the incoming list. + - If an existing tag matches an incoming tag exactly, it is kept (no remove/add). + + Returns: + (tags_to_remove, tags_to_add, merged_tags) + + Notes: + This is intentionally store-agnostic: stores decide how to persist/apply + the returned mutation (DB merge write, Hydrus delete/add, etc.). + """ + + def _clean(values: Sequence[Any]) -> List[str]: + out: List[str] = [] + for v in values or []: + if not isinstance(v, str): + continue + t = v.strip() + if t: + out.append(t.lower()) + return out + + def _ns_of(tag: str) -> str: + if ":" not in tag: + return "" + return tag.split(":", 1)[0].strip().lower() + + existing = _clean(existing_tags) + incoming = _clean(incoming_tags) + if not incoming: + return [], [], existing + + namespaces_to_replace: Set[str] = set() + for t in incoming: + ns = _ns_of(t) + if ns: + namespaces_to_replace.add(ns) + + kept_existing: List[str] = [] + kept_existing_lower: Set[str] = set() + tags_to_remove: List[str] = [] + + for t in existing: + ns = _ns_of(t) + if ns and ns in namespaces_to_replace: + # If it matches exactly, keep it; otherwise remove it. + if t in incoming: + kept_existing.append(t) + kept_existing_lower.add(t.lower()) + else: + # If incoming has the same tag value but different casing, treat as replace. + tags_to_remove.append(t) + continue + + kept_existing.append(t) + kept_existing_lower.add(t.lower()) + + tags_to_add: List[str] = [] + added_lower: Set[str] = set() + for t in incoming: + tl = t.lower() + if tl in kept_existing_lower: + continue + if tl in added_lower: + continue + tags_to_add.append(t) + added_lower.add(tl) + + merged = kept_existing + tags_to_add + return tags_to_remove, tags_to_add, merged + + def merge_tag_lists(*tag_lists: List[str]) -> List[str]: """ Merge multiple tag lists, removing duplicates. diff --git a/models.py b/models.py index 50dd484..3557644 100644 --- a/models.py +++ b/models.py @@ -3,14 +3,25 @@ import datetime import hashlib import json -import math import os import shutil import sys import time from dataclasses import dataclass, field from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Protocol, TextIO, Tuple +from typing import Any, Callable, Dict, List, Optional, Protocol, TextIO + +from rich.console import Console +from rich.progress import ( + BarColumn, + DownloadColumn, + Progress, + TaskID, + TaskProgressColumn, + TextColumn, + TimeRemainingColumn, + TransferSpeedColumn, +) @dataclass(slots=True) @@ -75,154 +86,49 @@ class PipeObject: return self.relationships.copy() if self.relationships else {} def debug_table(self) -> None: - """Print a formatted debug table showing PipeObject state. - - Only prints when debug logging is enabled. Useful for tracking - object state throughout the pipeline. - """ + """Rich-inspect the PipeObject when debug logging is enabled.""" try: - from SYS.logger import is_debug_enabled, debug - import shutil - - if not is_debug_enabled(): - return + from SYS.logger import is_debug_enabled, debug_inspect except Exception: return - - # Prepare display values - hash_display = str(self.hash or "N/A") - store_display = str(self.store or "N/A") - provider_display = str(self.provider or "N/A") - title_display = str(self.title or "N/A") - tag_display = ", ".join(self.tag[:3]) if self.tag else "[]" - if len(self.tag) > 3: - tag_display += f" (+{len(self.tag) - 3} more)" - file_path_display = str(self.path or "N/A") - url_display: Any = self.url or "N/A" - if isinstance(url_display, (list, tuple, set)): - parts = [str(x) for x in url_display if x] - url_display = ", ".join(parts) if parts else "N/A" - else: - url_display = str(url_display) - - relationships_display = "N/A" - if self.relationships: - rel_parts = [] - for key, val in self.relationships.items(): - if isinstance(val, list): - rel_parts.append(f"{key}({len(val)})") - else: - rel_parts.append(key) - relationships_display = ", ".join(rel_parts) - - warnings_display = f"{len(self.warnings)} warning(s)" if self.warnings else "none" + if not is_debug_enabled(): + return - def _fit(text: str, max_len: int) -> str: - if max_len <= 0: - return "" - if len(text) <= max_len: - return text - if max_len <= 3: - return text[:max_len] - return text[: max_len - 3] + "..." - - # Compute box width from terminal size, but never allow overflow. + # Prefer a stable, human-friendly title: + # "1 - download-media", "2 - download-media", ... + # The index is preserved when possible via `pipe_index` in the PipeObject's extra. + idx = None try: - term_cols = int(getattr(shutil.get_terminal_size((120, 20)), "columns", 120)) + if isinstance(self.extra, dict): + idx = self.extra.get("pipe_index") except Exception: - term_cols = 120 - box_inner_max = max(60, term_cols - 3) # line length = box_inner + 3 + idx = None - rows = [ - ("Hash", hash_display), - ("Store", store_display), - ("Provider", provider_display), - ("Title", title_display), - ("Tag", tag_display), - ("URL", str(url_display)), - ("File Path", file_path_display), - ("Relationships", relationships_display), - ("Warnings", warnings_display), - ] - label_width = max(len(k) for k, _ in rows) + cmdlet_name = "PipeObject" + try: + import pipeline as ctx + current = ctx.get_current_cmdlet_name("") if hasattr(ctx, "get_current_cmdlet_name") else "" + if current: + cmdlet_name = current + else: + action = str(self.action or "").strip() + if action.lower().startswith("cmdlet:"): + cmdlet_name = action.split(":", 1)[1].strip() or cmdlet_name + elif action: + cmdlet_name = action + except Exception: + cmdlet_name = "PipeObject" - # Estimate a good inner width from current content, capped to terminal. - base_contents = [f"{k:<{label_width}} : {v}" for k, v in rows] - desired_inner = max([len("PipeObject Debug Info"), *[len(x) for x in base_contents], 60]) - box_inner = min(desired_inner, box_inner_max) + title_text = cmdlet_name + try: + if idx is not None and str(idx).strip(): + title_text = f"{idx} - {cmdlet_name}" + except Exception: + title_text = cmdlet_name - def _line(content: str) -> str: - return f"│ {_fit(content, box_inner):<{box_inner}}│" - - # Print table - debug("┌" + ("─" * (box_inner + 1)) + "┐") - debug(_line("PipeObject Debug Info")) - debug("├" + ("─" * (box_inner + 1)) + "┤") - for key, val in rows: - content = f"{key:<{label_width}} : {val}" - debug(_line(content)) - - # Show extra keys as individual rows - if self.extra: - debug("├" + ("─" * (box_inner + 1)) + "┤") - debug(_line("Extra Fields:")) - for key, val in self.extra.items(): - # Format value for display - if isinstance(val, (list, set)): - val_display = f"{type(val).__name__}({len(val)})" - elif isinstance(val, dict): - val_display = f"dict({len(val)})" - elif isinstance(val, (int, float)): - val_display = str(val) - else: - val_str = str(val) - val_display = val_str if len(val_str) <= 40 else val_str[:37] + "..." - - # Truncate key if needed - key_display = str(key) - key_display = key_display if len(key_display) <= 15 else key_display[:12] + "..." - content = f" {key_display:<15}: {val_display}" - debug(_line(content)) - - # If we have structured provider metadata, expand it for debugging. - full_md = self.extra.get("full_metadata") - if isinstance(full_md, dict) and full_md: - debug("├" + ("─" * (box_inner + 1)) + "┤") - debug(_line("full_metadata:")) - for md_key in sorted(full_md.keys(), key=lambda x: str(x)): - md_val = full_md.get(md_key) - if isinstance(md_val, (str, int, float)) or md_val is None or isinstance(md_val, bool): - md_display = str(md_val) - elif isinstance(md_val, list): - if len(md_val) <= 6 and all(isinstance(x, (str, int, float, bool)) or x is None for x in md_val): - md_display = "[" + ", ".join(str(x) for x in md_val) + "]" - else: - md_display = f"list({len(md_val)})" - elif isinstance(md_val, dict): - # Avoid dumping huge nested dicts (like raw provider docs). - keys = list(md_val.keys()) - preview = ",".join(str(k) for k in keys[:6]) - md_display = f"dict({len(keys)})[{preview}{',...' if len(keys) > 6 else ''}]" - else: - md_str = str(md_val) - md_display = md_str if len(md_str) <= 40 else md_str[:37] + "..." - - md_key_display = str(md_key) - md_key_display = md_key_display if len(md_key_display) <= 15 else md_key_display[:12] + "..." - content = f" {md_key_display:<15}: {md_display}" - debug(_line(content)) - - if self.action: - debug("├─────────────────────────────────────────────────────────────┤") - action_display = self.action[:48] - debug(f"│ Action : {action_display:<48}│") - if self.parent_hash: - if not self.action: - debug("├─────────────────────────────────────────────────────────────┤") - parent_display = self.parent_hash[:12] + "..." if len(self.parent_hash) > 12 else self.parent_hash - debug(f"│ Parent Hash : {parent_display:<48}│") - debug("└─────────────────────────────────────────────────────────────┘") + # Color the title (requested: yellow instead of Rich's default blue-ish title). + debug_inspect(self, title=f"[yellow]{title_text}[/yellow]") def to_dict(self) -> Dict[str, Any]: """Serialize to dictionary, excluding None and empty values.""" @@ -482,18 +388,76 @@ def _sanitise_for_json(value: Any, *, max_depth: int = 8, _seen: Optional[set[in return repr(value) class ProgressBar: - """Formats download progress with visual bar, speed, ETA, and file size.""" + """Rich progress helper for byte-based transfers. + + Opinionated: requires `rich` and always renders via Rich. + """ def __init__(self, width: Optional[int] = None): - """Initialize progress bar with optional custom width. - - Args: - width: Terminal width, defaults to auto-detect. - """ + """Initialize progress bar with optional custom width.""" if width is None: width = shutil.get_terminal_size((80, 20))[0] self.width = max(40, width) # Minimum 40 chars for readability + self._console: Optional[Console] = None + self._progress: Optional[Progress] = None + self._task_id: Optional[TaskID] = None + + def _ensure_started(self, *, label: str, total: Optional[int], file: Any = None) -> None: + if self._progress is not None and self._task_id is not None: + if total is not None and total > 0: + self._progress.update(self._task_id, total=int(total)) + return + + stream = file if file is not None else sys.stderr + console = Console(file=stream) + progress = Progress( + TextColumn("[progress.description]{task.description}"), + BarColumn(), + TaskProgressColumn(), + DownloadColumn(), + TransferSpeedColumn(), + TimeRemainingColumn(), + console=console, + transient=True, + ) + progress.start() + + task_total = int(total) if isinstance(total, int) and total > 0 else None + task_id: TaskID = progress.add_task(str(label or "download"), total=task_total) + + self._console = console + self._progress = progress + self._task_id = task_id + + def update( + self, + *, + downloaded: Optional[int], + total: Optional[int], + label: str = "download", + file: Any = None, + ) -> None: + if downloaded is None and total is None: + return + self._ensure_started(label=label, total=total, file=file) + if self._progress is None or self._task_id is None: + return + if total is not None and total > 0: + self._progress.update(self._task_id, completed=int(downloaded or 0), total=int(total), refresh=True) + else: + self._progress.update(self._task_id, completed=int(downloaded or 0), refresh=True) + + def finish(self) -> None: + if self._progress is None: + return + try: + self._progress.stop() + finally: + self._console = None + self._progress = None + self._task_id = None + def format_bytes(self, bytes_val: Optional[float]) -> str: """Format bytes to human-readable size. @@ -513,152 +477,7 @@ class ProgressBar: return f"{bytes_val:.1f} PB" - def format_speed(self, speed_str: Optional[str]) -> str: - """Format download speed. - - Args: - speed_str: Speed string from yt-dlp (e.g., "1.23MiB/s"). - - Returns: - Formatted speed string or "?.? KB/s". - """ - if not speed_str or speed_str.strip() == "": - return "?.? KB/s" - return speed_str.strip() - - def format_eta(self, eta_str: Optional[str]) -> str: - """Format estimated time remaining. - - Args: - eta_str: ETA string from yt-dlp (e.g., "00:12:34"). - - Returns: - Formatted ETA string or "?:?:?". - """ - if not eta_str or eta_str.strip() == "": - return "?:?:?" - return eta_str.strip() - - def format_percent(self, percent_str: Optional[str]) -> float: - """Extract percent as float. - - Args: - percent_str: Percent string from yt-dlp (e.g., "45.2%"). - - Returns: - Float 0-100 or 0 if invalid. - """ - if not percent_str: - return 0.0 - try: - return float(percent_str.replace("%", "").strip()) - except ValueError: - return 0.0 - - def build_bar(self, percent: float, width: int = 30) -> str: - """Build ASCII progress bar. - - Args: - percent: Completion percentage (0-100). - width: Bar width in characters. - - Returns: - Progress bar string (e.g., "[████████░░░░░░░░░░░░░░░░░░]"). - """ - percent = max(0, min(100, percent)) # Clamp to 0-100 - filled = int(percent * width / 100) - empty = width - filled - - # Use box-drawing characters for nice appearance - bar = "█" * filled + "░" * empty - return f"[{bar}]" - - def format_progress( - self, - percent_str: Optional[str] = None, - downloaded: Optional[int] = None, - total: Optional[int] = None, - speed_str: Optional[str] = None, - eta_str: Optional[str] = None, - ) -> str: - """Format complete progress line. - - Args: - percent_str: Percent string (e.g., "45.2%"). - downloaded: Downloaded bytes. - total: Total bytes. - speed_str: Speed string (e.g., "1.23MiB/s"). - eta_str: ETA string (e.g., "00:12:34"). - - Returns: - Formatted progress string. - """ - percent = self.format_percent(percent_str) - # Some callers (e.g. yt-dlp hooks) may not provide a stable percent string. - # When we have byte counts, derive percent from them so the bar advances. - if (not percent_str or percent == 0.0) and downloaded is not None and total is not None and total > 0: - try: - percent = (float(downloaded) / float(total)) * 100.0 - except Exception: - percent = percent - bar = self.build_bar(percent) - - # Format sizes - if downloaded is not None and total is not None and total > 0: - size_str = f"{self.format_bytes(downloaded)} / {self.format_bytes(total)}" - elif total is not None and total > 0: - size_str = f"/ {self.format_bytes(total)}" - elif downloaded is not None and downloaded > 0: - size_str = f"{self.format_bytes(downloaded)} downloaded" - else: - size_str = "" - - speed = self.format_speed(speed_str) - eta = self.format_eta(eta_str) - - # Build complete line - # Format: [████░░░░] 45.2% | 125.5 MB / 278.3 MB | 1.23 MB/s | ETA 00:12:34 - parts = [ - bar, - f"{percent:5.1f}%", - ] - - if size_str: - parts.append(f"| {size_str}") - - parts.append(f"| {speed}") - parts.append(f"| ETA {eta}") - - return " ".join(parts) - - def format_summary( - self, - total: Optional[int] = None, - speed_str: Optional[str] = None, - elapsed_str: Optional[str] = None, - ) -> str: - """Format completion summary. - - Args: - total: Total bytes downloaded. - speed_str: Average speed. - elapsed_str: Total time elapsed. - - Returns: - Summary string. - """ - parts = ["✓ Download complete"] - - if total is not None and total > 0: - parts.append(f"| {self.format_bytes(total)}") - - if speed_str: - parts.append(f"| {speed_str.strip()}") - - if elapsed_str: - parts.append(f"| {elapsed_str.strip()}") - - return " ".join(parts) + # NOTE: rich.Progress handles the visual formatting; format_bytes remains as a general utility. class ProgressFileReader: @@ -687,34 +506,14 @@ class ProgressFileReader: now = time.time() if now - self._last < self._min_interval_s: return - elapsed = max(0.001, now - self._start) - speed = float(self._read) / elapsed - eta_s = (float(self._total) - float(self._read)) / speed if speed > 0 else 0.0 - - minutes, seconds = divmod(int(max(0.0, eta_s)), 60) - hours, minutes = divmod(minutes, 60) - eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}" - speed_str = self._bar.format_bytes(speed) + "/s" - percent = (float(self._read) / float(self._total)) * 100.0 if self._total > 0 else 0.0 - - line = self._bar.format_progress( - percent_str=f"{percent:.1f}%", - downloaded=int(self._read), - total=int(self._total), - speed_str=speed_str, - eta_str=eta_str, - ) - sys.stderr.write("\r" + f"[{self._label}] " + line + " ") - sys.stderr.flush() + self._bar.update(downloaded=int(self._read), total=int(self._total), label=str(self._label or "upload"), file=sys.stderr) self._last = now def _finish(self) -> None: if self._done: return self._done = True - sys.stderr.write("\r" + (" " * 180) + "\r") - sys.stderr.write("\n") - sys.stderr.flush() + self._bar.finish() def read(self, size: int = -1) -> Any: chunk = self._f.read(size) diff --git a/pipeline.py b/pipeline.py index 39395dc..bd6f23a 100644 --- a/pipeline.py +++ b/pipeline.py @@ -19,6 +19,7 @@ PowerShell-like piping model: from __future__ import annotations import sys +import shlex from typing import Any, Dict, List, Optional, Sequence from models import PipelineStageContext @@ -76,6 +77,13 @@ _PIPELINE_LAST_SELECTION: List[int] = [] # Track the currently executing command/pipeline string for worker attribution _PIPELINE_COMMAND_TEXT: str = "" +# Track the currently executing cmdlet name so debug helpers can label objects +# with the active stage (e.g., "1 - add-file"). +_CURRENT_CMDLET_NAME: str = "" + +# Track the currently executing stage text (best-effort, quotes preserved). +_CURRENT_STAGE_TEXT: str = "" + # Shared scratchpad for cmdlet/funacts to stash structured data between stages _PIPELINE_VALUES: Dict[str, Any] = {} _PIPELINE_MISSING = object() @@ -367,6 +375,93 @@ def clear_current_command_text() -> None: _PIPELINE_COMMAND_TEXT = "" +def split_pipeline_text(pipeline_text: str) -> List[str]: + """Split a pipeline string on unquoted '|' characters. + + Preserves original quoting/spacing within each returned stage segment. + """ + text = str(pipeline_text or "") + if not text: + return [] + + stages: List[str] = [] + buf: List[str] = [] + quote: Optional[str] = None + escape = False + + for ch in text: + if escape: + buf.append(ch) + escape = False + continue + + if ch == "\\" and quote is not None: + buf.append(ch) + escape = True + continue + + if ch in ("\"", "'"): + if quote is None: + quote = ch + elif quote == ch: + quote = None + buf.append(ch) + continue + + if ch == "|" and quote is None: + stages.append("".join(buf).strip()) + buf = [] + continue + + buf.append(ch) + + tail = "".join(buf).strip() + if tail: + stages.append(tail) + return [s for s in stages if s] + + +def get_current_command_stages() -> List[str]: + """Return the raw stage segments for the current command text.""" + return split_pipeline_text(get_current_command_text("")) + + +def set_current_stage_text(stage_text: Optional[str]) -> None: + """Record the raw stage text currently being executed.""" + global _CURRENT_STAGE_TEXT + _CURRENT_STAGE_TEXT = str(stage_text or "").strip() + + +def get_current_stage_text(default: str = "") -> str: + """Return the raw stage text currently being executed.""" + text = _CURRENT_STAGE_TEXT.strip() + return text if text else default + + +def clear_current_stage_text() -> None: + """Clear the cached stage text after a stage completes.""" + global _CURRENT_STAGE_TEXT + _CURRENT_STAGE_TEXT = "" + + +def set_current_cmdlet_name(cmdlet_name: Optional[str]) -> None: + """Record the currently executing cmdlet name (stage-local).""" + global _CURRENT_CMDLET_NAME + _CURRENT_CMDLET_NAME = str(cmdlet_name or "").strip() + + +def get_current_cmdlet_name(default: str = "") -> str: + """Return the currently executing cmdlet name (stage-local).""" + text = _CURRENT_CMDLET_NAME.strip() + return text if text else default + + +def clear_current_cmdlet_name() -> None: + """Clear the cached cmdlet name after a stage completes.""" + global _CURRENT_CMDLET_NAME + _CURRENT_CMDLET_NAME = "" + + def set_search_query(query: Optional[str]) -> None: """Set the last search query for refresh purposes.""" global _LAST_SEARCH_QUERY diff --git a/requirements.txt b/requirements.txt index e027b51..ca42b16 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ # Core CLI and TUI frameworks typer>=0.9.0 +rich>=13.7.0 prompt-toolkit>=3.0.0 textual>=0.30.0 diff --git a/result_table.py b/result_table.py index 0e64f63..9c39891 100644 --- a/result_table.py +++ b/result_table.py @@ -12,11 +12,18 @@ Features: from __future__ import annotations from dataclasses import dataclass, field -from typing import Any, Dict, List, Optional, Callable +from typing import Any, Dict, List, Optional, Callable, Set from pathlib import Path import json import shutil +from rich.box import SIMPLE +from rich.console import Group +from rich.panel import Panel +from rich.prompt import Prompt +from rich.table import Table as RichTable +from rich.text import Text + # Optional Textual imports - graceful fallback if not available try: from textual.widgets import Tree @@ -26,7 +33,7 @@ except ImportError: def _sanitize_cell_text(value: Any) -> str: - """Coerce to a single-line, tab-free string suitable for ASCII tables.""" + """Coerce to a single-line, tab-free string suitable for terminal display.""" if value is None: return "" text = str(value) @@ -136,10 +143,15 @@ class ResultRow: def add_column(self, name: str, value: Any) -> None: """Add a column to this row.""" + # Normalize column header names. + normalized_name = str(name or "").strip() + if normalized_name.lower() == "name": + normalized_name = "Title" + str_value = _sanitize_cell_text(value) # Normalize extension columns globally and cap to 5 characters - if str(name).strip().lower() == "ext": + if normalized_name.lower() == "ext": str_value = str_value.strip().lstrip(".") for idx, ch in enumerate(str_value): if not ch.isalnum(): @@ -147,7 +159,7 @@ class ResultRow: break str_value = str_value[:5] - self.columns.append(ResultColumn(name, str_value)) + self.columns.append(ResultColumn(normalized_name, str_value)) def get_column(self, name: str) -> Optional[str]: """Get column value by name.""" @@ -195,6 +207,30 @@ class ResultTable: preserve_order: When True, skip automatic sorting so row order matches source """ self.title = title + try: + import pipeline as ctx + + cmdlet_name = "" + try: + cmdlet_name = ctx.get_current_cmdlet_name("") if hasattr(ctx, "get_current_cmdlet_name") else "" + except Exception: + cmdlet_name = "" + + stage_text = "" + try: + stage_text = ctx.get_current_stage_text("") if hasattr(ctx, "get_current_stage_text") else "" + except Exception: + stage_text = "" + + if cmdlet_name and stage_text: + normalized_cmd = str(cmdlet_name).replace("_", "-").strip().lower() + normalized_title = str(self.title or "").strip().lower() + normalized_stage = str(stage_text).strip() + if normalized_stage and normalized_stage.lower().startswith(normalized_cmd): + if (not normalized_title) or normalized_title.replace("_", "-").startswith(normalized_cmd): + self.title = normalized_stage + except Exception: + pass self.title_width = title_width self.max_columns = max_columns if max_columns is not None else 5 # Default 5 for cleaner display self.rows: List[ResultRow] = [] @@ -214,6 +250,26 @@ class ResultTable: self.table: Optional[str] = None """Table type (e.g., 'youtube', 'soulseek') for context-aware selection logic.""" + self.value_case: str = "lower" + """Display-only value casing: 'lower' (default), 'upper', or 'preserve'.""" + + def set_value_case(self, value_case: str) -> "ResultTable": + """Configure display-only casing for rendered cell values.""" + case = str(value_case or "").strip().lower() + if case not in {"lower", "upper", "preserve"}: + case = "lower" + self.value_case = case + return self + + def _apply_value_case(self, text: str) -> str: + if not text: + return "" + if self.value_case == "upper": + return text.upper() + if self.value_case == "preserve": + return text + return text.lower() + def set_table(self, table: str) -> "ResultTable": """Set the table type for context-aware selection logic.""" self.table = table @@ -459,7 +515,7 @@ class ResultTable: # Size (for files) if hasattr(result, 'size_bytes') and result.size_bytes: - row.add_column("Size (Mb)", _format_size(result.size_bytes, integer_only=True)) + row.add_column("Size", _format_size(result.size_bytes, integer_only=False)) # Annotations if hasattr(result, 'annotations') and result.annotations: @@ -505,9 +561,9 @@ class ResultTable: elif getattr(item, 'store', None): row.add_column("Storage", str(getattr(item, 'store'))) - # Size (for files) - integer MB only + # Size (for files) if hasattr(item, 'size_bytes') and item.size_bytes: - row.add_column("Size (Mb)", _format_size(item.size_bytes, integer_only=True)) + row.add_column("Size", _format_size(item.size_bytes, integer_only=False)) def _add_tag_item(self, row: ResultRow, item: Any) -> None: """Extract and add TagItem fields to row (compact tag display). @@ -575,9 +631,9 @@ class ResultTable: Priority field groups (first match per group): - title | name | filename - - ext - - size | size_bytes - store | table | source + - size | size_bytes + - ext """ # Helper to determine if a field should be hidden from display def is_hidden_field(field_name: Any) -> bool: @@ -670,9 +726,9 @@ class ResultTable: # Explicitly set which columns to display in order priority_groups = [ ('title', ['title', 'name', 'filename']), - ('ext', ['ext']), - ('size', ['size', 'size_bytes']), ('store', ['store', 'table', 'source']), + ('size', ['size', 'size_bytes']), + ('ext', ['ext']), ] # Add priority field groups first - use first match in each group @@ -681,9 +737,9 @@ class ResultTable: break for field in field_options: if field in visible_data and field not in added_fields: - # Special handling for size fields - format as MB integer + # Special handling for size fields - format with unit and decimals if field in ['size', 'size_bytes']: - value_str = _format_size(visible_data[field], integer_only=True) + value_str = _format_size(visible_data[field], integer_only=False) else: value_str = format_value(visible_data[field]) @@ -694,7 +750,7 @@ class ResultTable: if field in ['store', 'table', 'source']: col_name = "Store" elif field in ['size', 'size_bytes']: - col_name = "Size (Mb)" + col_name = "Size" elif field in ['title', 'name', 'filename']: col_name = "Title" else: @@ -727,115 +783,56 @@ class ResultTable: row.add_column(key.replace('_', ' ').title(), value_str) - def format_plain(self) -> str: - """Format table as plain text with aligned columns and row numbers. - - Returns: - Formatted table string - """ + def to_rich(self): + """Return a Rich renderable representing this table.""" if not self.rows: - return "No results" + empty = Text("No results") + return Panel(empty, title=self.title) if self.title else empty - # Cap rendering to terminal width so long tables don't hard-wrap and - # visually break the border/shape. - term_width = shutil.get_terminal_size(fallback=(120, 24)).columns - if not term_width or term_width <= 0: - term_width = 120 - - # Calculate column widths - col_widths: Dict[str, int] = {} + col_names: List[str] = [] + seen: Set[str] = set() for row in self.rows: for col in row.columns: - col_name = col.name - value_width = len(col.value) - if col_name.lower() == "ext": - value_width = min(value_width, 5) - col_widths[col_name] = max( - col_widths.get(col_name, 0), - len(col.name), - value_width - ) - - # Calculate row number column width (skip if no-choice) - num_width = 0 if self.no_choice else len(str(len(self.rows))) + 1 + if col.name not in seen: + seen.add(col.name) + col_names.append(col.name) - # Preserve column order - column_names = list(col_widths.keys()) + table = RichTable( + show_header=True, + header_style="bold", + box=SIMPLE, + expand=True, + show_lines=False, + ) - def capped_width(name: str) -> int: + if not self.no_choice: + table.add_column("#", justify="right", no_wrap=True) + + # Render headers in uppercase, but keep original column keys for lookup. + header_by_key: Dict[str, str] = {name: str(name).upper() for name in col_names} + + for name in col_names: + header = header_by_key.get(name, str(name).upper()) if name.lower() == "ext": - cap = 5 + table.add_column(header, no_wrap=True) else: - # Single-column tables (e.g., get-tag) can use more horizontal space, - # but still must stay within the terminal to avoid hard wrapping. - if len(column_names) == 1: - # Keep room for side walls and optional row-number column. - cap = max(30, min(240, term_width - 6)) - else: - cap = 90 - return min(col_widths[name], cap) + table.add_column(header) - widths = ([] if self.no_choice else [num_width]) + [capped_width(name) for name in column_names] - base_inner_width = sum(widths) + (len(widths) - 1) * 3 # account for " | " separators + for row_idx, row in enumerate(self.rows, 1): + cells: List[str] = [] + if not self.no_choice: + cells.append(str(row_idx)) + for name in col_names: + val = row.get_column(name) or "" + cells.append(self._apply_value_case(_sanitize_cell_text(val))) + table.add_row(*cells) - # Compute final table width (with side walls) to accommodate headers/titles - table_width = base_inner_width + 2 # side walls - if self.title: - table_width = max(table_width, len(self.title) + 2) - if self.header_lines: - table_width = max(table_width, max(len(line) for line in self.header_lines) + 2) + if self.title or self.header_lines: + header_bits = [Text(line) for line in (self.header_lines or [])] + renderable = Group(*header_bits, table) if header_bits else table + return Panel(renderable, title=self.title) if self.title else renderable - # Ensure final render doesn't exceed terminal width (minus 1 safety column). - safe_term_width = max(20, term_width - 1) - table_width = min(table_width, safe_term_width) - - def wrap(text: str) -> str: - """Wrap content with side walls and pad to table width.""" - if len(text) > table_width - 2: - text = text[: table_width - 5] + "..." # keep walls intact - return "|" + text.ljust(table_width - 2) + "|" - - lines = [] - - # Title block - if self.title: - lines.append("|" + "=" * (table_width - 2) + "|") - safe_title = _sanitize_cell_text(self.title) - lines.append(wrap(safe_title.ljust(table_width - 2))) - lines.append("|" + "=" * (table_width - 2) + "|") - - # Optional header metadata lines - for meta in self.header_lines: - safe_meta = _sanitize_cell_text(meta) - lines.append(wrap(safe_meta)) - - # Add header with # column - header_parts = [] if self.no_choice else ["#".ljust(num_width)] - separator_parts = [] if self.no_choice else ["-" * num_width] - for col_name in column_names: - width = capped_width(col_name) - header_parts.append(col_name.ljust(width)) - separator_parts.append("-" * width) - - lines.append(wrap(" | ".join(header_parts))) - lines.append(wrap("-+-".join(separator_parts))) - - # Add rows with row numbers - for row_num, row in enumerate(self.rows, 1): - row_parts = [] if self.no_choice else [str(row_num).ljust(num_width)] - for col_name in column_names: - width = capped_width(col_name) - col_value = row.get_column(col_name) or "" - col_value = _sanitize_cell_text(col_value) - if len(col_value) > width: - col_value = col_value[: width - 3] + "..." - row_parts.append(col_value.ljust(width)) - lines.append(wrap(" | ".join(row_parts))) - - # Bottom border to close the rectangle - lines.append("|" + "=" * (table_width - 2) + "|") - - return "\n".join(lines) + return table def format_compact(self) -> str: """Format table in compact form (one line per row). @@ -880,8 +877,16 @@ class ResultTable: } def __str__(self) -> str: - """String representation (plain text format).""" - return self.format_plain() + """String representation. + + Rich is the primary rendering path. This keeps accidental `print(table)` + usage from emitting ASCII box-drawn tables. + """ + label = self.title or "ResultTable" + return f"{label} ({len(self.rows)} rows)" + + def __rich__(self): + return self.to_rich() def __repr__(self) -> str: """Developer representation.""" @@ -921,20 +926,24 @@ class ResultTable: If accept_args=True: Dict with "indices" and "args" keys, or None if cancelled """ if self.no_choice: - print(f"\n{self}") - print("Selection is disabled for this table.") + from rich_display import stdout_console + + stdout_console().print(self) + stdout_console().print(Panel(Text("Selection is disabled for this table."))) return None # Display the table - print(f"\n{self}") + from rich_display import stdout_console + + stdout_console().print(self) # Get user input while True: try: if accept_args: - choice = input(f"\n{prompt} (e.g., '5' or '2 -storage hydrus' or 'q' to quit): ").strip() + choice = Prompt.ask(f"{prompt} (e.g., '5' or '2 -storage hydrus' or 'q' to quit)").strip() else: - choice = input(f"\n{prompt} (e.g., '5' or '3-5' or '1,3,5' or 'q' to quit): ").strip() + choice = Prompt.ask(f"{prompt} (e.g., '5' or '3-5' or '1,3,5' or 'q' to quit)").strip() if choice.lower() == 'q': return None @@ -944,18 +953,18 @@ class ResultTable: result = self._parse_selection_with_args(choice) if result is not None: return result - print(f"Invalid format. Use: selection (5 or 3-5 or 1,3,5) optionally followed by flags (e.g., '5 -storage hydrus').") + stdout_console().print(Panel(Text("Invalid format. Use: selection (5 or 3-5 or 1,3,5) optionally followed by flags (e.g., '5 -storage hydrus')."))) else: # Parse just the selection selected_indices = self._parse_selection(choice) if selected_indices is not None: return selected_indices - print(f"Invalid format. Use: single (5), range (3-5), list (1,3,5), combined (1-3,7,9-11), or 'q' to quit.") + stdout_console().print(Panel(Text("Invalid format. Use: single (5), range (3-5), list (1,3,5), combined (1-3,7,9-11), or 'q' to quit."))) except (ValueError, EOFError): if accept_args: - print(f"Invalid format. Use: selection (5 or 3-5 or 1,3,5) optionally followed by flags (e.g., '5 -storage hydrus').") + stdout_console().print(Panel(Text("Invalid format. Use: selection (5 or 3-5 or 1,3,5) optionally followed by flags (e.g., '5 -storage hydrus')."))) else: - print(f"Invalid format. Use: single (5), range (3-5), list (1,3,5), combined (1-3,7,9-11), or 'q' to quit.") + stdout_console().print(Panel(Text("Invalid format. Use: single (5), range (3-5), list (1,3,5), combined (1-3,7,9-11), or 'q' to quit."))) def _parse_selection(self, selection_str: str) -> Optional[List[int]]: """Parse user selection string into list of 0-based indices. @@ -1317,10 +1326,10 @@ def _format_size(size: Any, integer_only: bool = False) -> str: Args: size: Size in bytes or already formatted string - integer_only: If True, show MB as integer only (e.g., "250 MB" not "250.5 MB") + integer_only: If True, show MB as an integer (e.g., "250 MB") Returns: - Formatted size string (e.g., "250 MB", "1.5 MB" or "250 MB" if integer_only=True) + Formatted size string with units (e.g., "3.53 MB", "0.57 MB", "1.2 GB") """ if isinstance(size, str): return size if size else "" @@ -1329,23 +1338,22 @@ def _format_size(size: Any, integer_only: bool = False) -> str: bytes_val = int(size) if bytes_val < 0: return "" - - if integer_only: - # For table display: always show as integer MB if >= 1MB - mb_val = int(bytes_val / (1024 * 1024)) - if mb_val > 0: - return str(mb_val) - kb_val = int(bytes_val / 1024) - if kb_val > 0: - return str(kb_val) - return str(bytes_val) + + # Keep display consistent with the CLI expectation: show MB with unit + # (including values under 1 MB as fractional MB), and show GB for very + # large sizes. + if bytes_val >= 1024**3: + value = bytes_val / (1024**3) + unit = "GB" else: - # For descriptions: show with one decimal place - for unit, divisor in [("GB", 1024**3), ("MB", 1024**2), ("KB", 1024)]: - if bytes_val >= divisor: - return f"{bytes_val / divisor:.1f} {unit}" - - return f"{bytes_val} B" + value = bytes_val / (1024**2) + unit = "MB" + + if integer_only: + return f"{int(round(value))} {unit}" + + num = f"{value:.2f}".rstrip("0").rstrip(".") + return f"{num} {unit}" except (ValueError, TypeError): return "" diff --git a/rich_display.py b/rich_display.py new file mode 100644 index 0000000..2bcee4d --- /dev/null +++ b/rich_display.py @@ -0,0 +1,39 @@ +"""Central Rich output helpers. + +Opinionated: `rich` is a required dependency. + +This module centralizes Console instances so tables/panels render consistently and +so callers can choose stdout vs stderr explicitly (important for pipeline-safe +output). +""" + +from __future__ import annotations + +import sys +from typing import Any, TextIO + +from rich.console import Console + + +_STDOUT_CONSOLE = Console(file=sys.stdout) +_STDERR_CONSOLE = Console(file=sys.stderr) + + +def stdout_console() -> Console: + return _STDOUT_CONSOLE + + +def stderr_console() -> Console: + return _STDERR_CONSOLE + + +def console_for(file: TextIO | None) -> Console: + if file is None or file is sys.stdout: + return _STDOUT_CONSOLE + if file is sys.stderr: + return _STDERR_CONSOLE + return Console(file=file) + + +def rprint(renderable: Any = "", *, file: TextIO | None = None) -> None: + console_for(file).print(renderable) diff --git a/tool/ytdlp.py b/tool/ytdlp.py index b774675..f379565 100644 --- a/tool/ytdlp.py +++ b/tool/ytdlp.py @@ -1,10 +1,14 @@ from __future__ import annotations +import os + from dataclasses import dataclass from pathlib import Path from typing import Any, Dict, List, Optional, Sequence from SYS.logger import debug +from SYS.utils import ensure_directory +from models import DownloadOptions def _get_nested(config: Dict[str, Any], *path: str) -> Any: @@ -140,6 +144,124 @@ class YtDlpTool: return self.defaults.audio_format return self.defaults.video_format + def build_ytdlp_options(self, opts: DownloadOptions) -> Dict[str, Any]: + """Translate DownloadOptions into yt-dlp API options.""" + ensure_directory(opts.output_dir) + outtmpl = str((opts.output_dir / "%(title)s.%(ext)s").resolve()) + base_options: Dict[str, Any] = { + "outtmpl": outtmpl, + "quiet": True, + "no_warnings": True, + "noprogress": True, + "socket_timeout": 30, + "retries": 10, + "fragment_retries": 10, + "http_chunk_size": 10_485_760, + "restrictfilenames": True, + } + + try: + repo_root = Path(__file__).resolve().parents[1] + bundled_ffmpeg_dir = repo_root / "MPV" / "ffmpeg" / "bin" + if bundled_ffmpeg_dir.exists(): + base_options.setdefault("ffmpeg_location", str(bundled_ffmpeg_dir)) + except Exception: + pass + + try: + if os.name == "nt": + base_options.setdefault("file_access_retries", 40) + except Exception: + pass + + if opts.cookies_path and opts.cookies_path.is_file(): + base_options["cookiefile"] = str(opts.cookies_path) + else: + cookiefile = self.resolve_cookiefile() + if cookiefile is not None and cookiefile.is_file(): + base_options["cookiefile"] = str(cookiefile) + + if opts.no_playlist: + base_options["noplaylist"] = True + + fmt = opts.ytdl_format or self.default_format(opts.mode) + base_options["format"] = fmt + + if opts.mode == "audio": + base_options["postprocessors"] = [{"key": "FFmpegExtractAudio"}] + else: + format_sort = self.defaults.format_sort or [ + "res:4320", + "res:2880", + "res:2160", + "res:1440", + "res:1080", + "res:720", + "res", + ] + base_options["format_sort"] = format_sort + + if getattr(opts, "embed_chapters", False): + pps = base_options.get("postprocessors") + if not isinstance(pps, list): + pps = [] + already_has_metadata = any( + isinstance(pp, dict) and str(pp.get("key") or "") == "FFmpegMetadata" for pp in pps + ) + if not already_has_metadata: + pps.append( + { + "key": "FFmpegMetadata", + "add_metadata": True, + "add_chapters": True, + "add_infojson": "if_exists", + } + ) + base_options["postprocessors"] = pps + + if opts.mode != "audio": + base_options.setdefault("merge_output_format", "mkv") + + if getattr(opts, "write_sub", False): + base_options["writesubtitles"] = True + base_options["writeautomaticsub"] = True + base_options["subtitlesformat"] = "vtt" + + if opts.clip_sections: + sections: List[str] = [] + + def _secs_to_hms(seconds: float) -> str: + total = max(0, int(seconds)) + minutes, secs = divmod(total, 60) + hours, minutes = divmod(minutes, 60) + return f"{hours:02d}:{minutes:02d}:{secs:02d}" + + for section_range in str(opts.clip_sections).split(","): + section_range = section_range.strip() + if not section_range: + continue + try: + start_s_raw, end_s_raw = section_range.split("-", 1) + start_s = float(start_s_raw.strip()) + end_s = float(end_s_raw.strip()) + if start_s >= end_s: + continue + sections.append(f"*{_secs_to_hms(start_s)}-{_secs_to_hms(end_s)}") + except (ValueError, AttributeError): + continue + + if sections: + base_options["download_sections"] = sections + debug(f"Download sections configured: {', '.join(sections)}") + + if opts.playlist_items: + base_options["playlist_items"] = opts.playlist_items + + if not opts.quiet: + debug(f"yt-dlp: mode={opts.mode}, format={base_options.get('format')}") + + return base_options + def build_yt_dlp_cli_args( self, *,