dfd

2025-12-22 02:11:53 -08:00
parent d0b821b5dd
commit 16316bb3fd
20 changed files with 4218 additions and 2422 deletions
--- a/API/HydrusNetwork.py
+++ b/API/HydrusNetwork.py
@@ -147,7 +147,11 @@ class HydrusNetwork:
                    file_size = file_path.stat().st_size
                    headers["Content-Type"] = spec.content_type or "application/octet-stream"
-                    headers["Content-Length"] = str(file_size)
+                    # Do not set Content-Length when streaming an iterator body.
                    # If the file size changes between stat() and read() (or the source is truncated),
                    # h11 will raise: "Too little data for declared Content-Length".
                    # Let httpx choose chunked transfer encoding for safety.
                    headers.pop("Content-Length", None)
                    logger.debug(f"{self._log_prefix()} Uploading file {file_path.name} ({file_size} bytes)")
--- a/CLI.py
+++ b/CLI.py
@@ -1245,25 +1245,17 @@ class PipelineExecutor:
            stages.append(current)
        return stages
-    def execute_tokens(self, tokens: List[str]) -> None:
+    @staticmethod
-        from cmdlet import REGISTRY
+    def _try_clear_pipeline_stop(ctx: Any) -> None:
        import pipeline as ctx
        try:
-            try:
+            if hasattr(ctx, "clear_pipeline_stop"):
-                if hasattr(ctx, "clear_pipeline_stop"):
+                ctx.clear_pipeline_stop()
-                    ctx.clear_pipeline_stop()
+        except Exception:
-            except Exception:
+            pass
                pass
            stages = self._split_stages(tokens)
            if not stages:
                print("Invalid pipeline syntax\n")
                return
            pending_tail = ctx.get_pending_pipeline_tail() if hasattr(ctx, "get_pending_pipeline_tail") else []
            pending_source = ctx.get_pending_pipeline_source() if hasattr(ctx, "get_pending_pipeline_source") else None
    @staticmethod
    def _maybe_seed_current_stage_table(ctx: Any) -> None:
        try:
            if hasattr(ctx, "get_current_stage_table") and not ctx.get_current_stage_table():
                display_table = ctx.get_display_table() if hasattr(ctx, "get_display_table") else None
                if display_table:
@@ -1272,188 +1264,512 @@ class PipelineExecutor:
                    last_table = ctx.get_last_result_table() if hasattr(ctx, "get_last_result_table") else None
                    if last_table:
                        ctx.set_current_stage_table(last_table)
        except Exception:
            pass
    @staticmethod
    def _maybe_apply_pending_pipeline_tail(ctx: Any, stages: List[List[str]]) -> List[List[str]]:
        try:
            pending_tail = ctx.get_pending_pipeline_tail() if hasattr(ctx, "get_pending_pipeline_tail") else []
            pending_source = ctx.get_pending_pipeline_source() if hasattr(ctx, "get_pending_pipeline_source") else None
        except Exception:
            pending_tail = []
            pending_source = None
        try:
            current_source = (
-                ctx.get_current_stage_table_source_command() if hasattr(ctx, "get_current_stage_table_source_command") else None
+                ctx.get_current_stage_table_source_command()
                if hasattr(ctx, "get_current_stage_table_source_command")
                else None
            )
        except Exception:
            current_source = None
        try:
            effective_source = current_source or (
-                ctx.get_last_result_table_source_command() if hasattr(ctx, "get_last_result_table_source_command") else None
+                ctx.get_last_result_table_source_command()
                if hasattr(ctx, "get_last_result_table_source_command")
                else None
            )
-            selection_only = len(stages) == 1 and stages[0] and stages[0][0].startswith("@")
+        except Exception:
-            if pending_tail and selection_only:
+            effective_source = current_source
-                if (pending_source is None) or (effective_source and pending_source == effective_source):
+
-                    stages.extend(pending_tail)
+        selection_only = bool(len(stages) == 1 and stages[0] and stages[0][0].startswith("@"))
        if pending_tail and selection_only:
            if (pending_source is None) or (effective_source and pending_source == effective_source):
                stages = list(stages) + list(pending_tail)
                try:
                    if hasattr(ctx, "clear_pending_pipeline_tail"):
                        ctx.clear_pending_pipeline_tail()
                elif hasattr(ctx, "clear_pending_pipeline_tail"):
                    ctx.clear_pending_pipeline_tail()
            config = self._config_loader.load()
            if isinstance(config, dict):
                # This executor is used by both the REPL and the `pipeline` subcommand.
                # Quiet/background mode is helpful for detached/background runners, but
                # it suppresses interactive UX (like the pipeline Live progress UI).
                config["_quiet_background_output"] = bool(self._toolbar_output is None)
            def _resolve_items_for_selection(table_obj, items_list):
                return items_list if items_list else []
            def _maybe_run_class_selector(selected_items: list, *, stage_is_last: bool) -> bool:
                if not stage_is_last:
                    return False
                candidates: list[str] = []
                seen: set[str] = set()
                def _add(value) -> None:
                    try:
                        text = str(value or "").strip().lower()
                    except Exception:
                        return
                    if not text or text in seen:
                        return
                    seen.add(text)
                    candidates.append(text)
                try:
                    current_table = ctx.get_current_stage_table() or ctx.get_last_result_table()
                    _add(current_table.table if current_table and hasattr(current_table, "table") else None)
                except Exception:
                    pass
-
+            else:
                for item in selected_items or []:
                    if isinstance(item, dict):
                        _add(item.get("provider"))
                        _add(item.get("store"))
                        _add(item.get("table"))
                    else:
                        _add(getattr(item, "provider", None))
                        _add(getattr(item, "store", None))
                        _add(getattr(item, "table", None))
                try:
-                    from ProviderCore.registry import get_provider, is_known_provider_name
+                    if hasattr(ctx, "clear_pending_pipeline_tail"):
                        ctx.clear_pending_pipeline_tail()
                except Exception:
-                    get_provider = None  # type: ignore
+                    pass
-                    is_known_provider_name = None  # type: ignore
+        return stages
-                if get_provider is not None:
+    def _apply_quiet_background_flag(self, config: Any) -> Any:
-                    for key in candidates:
+        if isinstance(config, dict):
-                        try:
+            # This executor is used by both the REPL and the `pipeline` subcommand.
-                            if is_known_provider_name is not None and (not is_known_provider_name(key)):
+            # Quiet/background mode is helpful for detached/background runners, but
-                                continue
+            # it suppresses interactive UX (like the pipeline Live progress UI).
-                        except Exception:
+            config["_quiet_background_output"] = bool(self._toolbar_output is None)
-                            # If the predicate fails for any reason, fall back to legacy behavior.
+        return config
                            pass
                        try:
                            provider = get_provider(key, config)
                        except Exception:
                            continue
                        selector = getattr(provider, "selector", None)
                        if selector is None:
                            continue
                        try:
                            handled = bool(selector(selected_items, ctx=ctx, stage_is_last=True))
                        except Exception as exc:
                            print(f"{key} selector failed: {exc}\n")
                            return True
                        if handled:
                            return True
-                store_keys: list[str] = []
+    @staticmethod
-                for item in selected_items or []:
+    def _extract_first_stage_selection_tokens(stages: List[List[str]]) -> tuple[List[List[str]], List[int], bool, bool]:
-                    if isinstance(item, dict):
+        first_stage_tokens = stages[0] if stages else []
-                        v = item.get("store")
+        first_stage_selection_indices: List[int] = []
-                    else:
+        first_stage_had_extra_args = False
-                        v = getattr(item, "store", None)
+        first_stage_select_all = False
                    name = str(v or "").strip()
                    if name:
                        store_keys.append(name)
-                if store_keys:
+        if first_stage_tokens:
            new_first_stage: List[str] = []
            for token in first_stage_tokens:
                if token.startswith("@"):  # selection
                    selection = SelectionSyntax.parse(token)
                    if selection is not None:
                        first_stage_selection_indices = sorted([i - 1 for i in selection])
                        continue
                    if token == "@*":
                        first_stage_select_all = True
                        continue
                new_first_stage.append(token)
            if new_first_stage:
                stages = list(stages)
                stages[0] = new_first_stage
                if first_stage_selection_indices or first_stage_select_all:
                    first_stage_had_extra_args = True
            elif first_stage_selection_indices or first_stage_select_all:
                stages = list(stages)
                stages.pop(0)
        return stages, first_stage_selection_indices, first_stage_had_extra_args, first_stage_select_all
    @staticmethod
    def _apply_select_all_if_requested(ctx: Any, indices: List[int], select_all: bool) -> List[int]:
        if not select_all:
            return indices
        try:
            last_items = ctx.get_last_result_items()
        except Exception:
            last_items = None
        if last_items:
            return list(range(len(last_items)))
        return indices
    @staticmethod
    def _maybe_run_class_selector(ctx: Any, config: Any, selected_items: list, *, stage_is_last: bool) -> bool:
        if not stage_is_last:
            return False
        candidates: list[str] = []
        seen: set[str] = set()
        def _add(value) -> None:
            try:
                text = str(value or "").strip().lower()
            except Exception:
                return
            if not text or text in seen:
                return
            seen.add(text)
            candidates.append(text)
        try:
            current_table = ctx.get_current_stage_table() or ctx.get_last_result_table()
            _add(current_table.table if current_table and hasattr(current_table, "table") else None)
        except Exception:
            pass
        for item in selected_items or []:
            if isinstance(item, dict):
                _add(item.get("provider"))
                _add(item.get("store"))
                _add(item.get("table"))
            else:
                _add(getattr(item, "provider", None))
                _add(getattr(item, "store", None))
                _add(getattr(item, "table", None))
        try:
            from ProviderCore.registry import get_provider, is_known_provider_name
        except Exception:
            get_provider = None  # type: ignore
            is_known_provider_name = None  # type: ignore
        if get_provider is not None:
            for key in candidates:
                try:
                    if is_known_provider_name is not None and (not is_known_provider_name(key)):
                        continue
                except Exception:
                    # If the predicate fails for any reason, fall back to legacy behavior.
                    pass
                try:
                    provider = get_provider(key, config)
                except Exception:
                    continue
                selector = getattr(provider, "selector", None)
                if selector is None:
                    continue
                try:
                    handled = bool(selector(selected_items, ctx=ctx, stage_is_last=True))
                except Exception as exc:
                    print(f"{key} selector failed: {exc}\n")
                    return True
                if handled:
                    return True
        store_keys: list[str] = []
        for item in selected_items or []:
            if isinstance(item, dict):
                v = item.get("store")
            else:
                v = getattr(item, "store", None)
            name = str(v or "").strip()
            if name:
                store_keys.append(name)
        if store_keys:
            try:
                from Store.registry import Store as StoreRegistry
                store_registry = StoreRegistry(config, suppress_debug=True)
                _backend_names = list(store_registry.list_backends() or [])
                _backend_by_lower = {str(n).lower(): str(n) for n in _backend_names if str(n).strip()}
                for name in store_keys:
                    resolved_name = name
                    if not store_registry.is_available(resolved_name):
                        resolved_name = _backend_by_lower.get(str(name).lower(), name)
                    if not store_registry.is_available(resolved_name):
                        continue
                    backend = store_registry[resolved_name]
                    selector = getattr(backend, "selector", None)
                    if selector is None:
                        continue
                    handled = bool(selector(selected_items, ctx=ctx, stage_is_last=True))
                    if handled:
                        return True
            except Exception:
                pass
        return False
    def _maybe_enable_background_notifier(self, worker_manager: Any, config: Any, pipeline_session: Any) -> None:
        if not (pipeline_session and worker_manager and isinstance(config, dict)):
            return
        session_worker_ids = config.get("_session_worker_ids")
        if not session_worker_ids:
            return
        try:
            output_fn = self._toolbar_output
            quiet_mode = bool(config.get("_quiet_background_output"))
            terminal_only = quiet_mode and not output_fn
            kwargs: Dict[str, Any] = {
                "session_worker_ids": session_worker_ids,
                "only_terminal_updates": terminal_only,
                "overlay_mode": bool(output_fn),
            }
            if output_fn:
                kwargs["output"] = output_fn
            ensure_background_notifier(worker_manager, **kwargs)
        except Exception:
            pass
    @staticmethod
    def _get_raw_stage_texts(ctx: Any) -> List[str]:
        raw_stage_texts: List[str] = []
        try:
            if hasattr(ctx, "get_current_command_stages"):
                raw_stage_texts = ctx.get_current_command_stages() or []
        except Exception:
            raw_stage_texts = []
        return raw_stage_texts
    def _maybe_apply_initial_selection(
        self,
        ctx: Any,
        config: Any,
        stages: List[List[str]],
        *,
        selection_indices: List[int],
        first_stage_had_extra_args: bool,
        worker_manager: Any,
        pipeline_session: Any,
    ) -> tuple[bool, Any]:
        if not selection_indices:
            return True, None
        try:
            if not ctx.get_current_stage_table_source_command():
                display_table = ctx.get_display_table() if hasattr(ctx, "get_display_table") else None
                table_for_stage = display_table or ctx.get_last_result_table()
                if table_for_stage:
                    ctx.set_current_stage_table(table_for_stage)
        except Exception:
            pass
        source_cmd = None
        source_args_raw = None
        try:
            source_cmd = ctx.get_current_stage_table_source_command()
            source_args_raw = ctx.get_current_stage_table_source_args()
        except Exception:
            source_cmd = None
            source_args_raw = None
        if isinstance(source_args_raw, str):
            source_args: List[str] = [source_args_raw]
        elif isinstance(source_args_raw, list):
            source_args = [str(x) for x in source_args_raw if x is not None]
        else:
            source_args = []
        current_table = None
        try:
            current_table = ctx.get_current_stage_table()
        except Exception:
            current_table = None
        table_type = current_table.table if current_table and hasattr(current_table, "table") else None
        command_expanded = False
        if table_type in {"youtube", "soulseek"}:
            command_expanded = False
        elif source_cmd == "search-file" and source_args and "youtube" in source_args:
            command_expanded = False
        else:
            selected_row_args: List[str] = []
            skip_pipe_expansion = source_cmd == ".pipe" and len(stages) > 0
            if source_cmd and not skip_pipe_expansion:
                for idx in selection_indices:
                    row_args = ctx.get_current_stage_table_row_selection_args(idx)
                    if row_args:
                        selected_row_args.extend(row_args)
                    break
            if selected_row_args:
                if isinstance(source_cmd, list):
                    cmd_list: List[str] = [str(x) for x in source_cmd if x is not None]
                elif isinstance(source_cmd, str):
                    cmd_list = [source_cmd]
                else:
                    cmd_list = []
                expanded_stage: List[str] = cmd_list + source_args + selected_row_args
                if first_stage_had_extra_args and stages:
                    expanded_stage += stages[0]
                    stages[0] = expanded_stage
                else:
                    stages.insert(0, expanded_stage)
                if pipeline_session and worker_manager:
                    try:
-                        from Store.registry import Store as StoreRegistry
+                        worker_manager.log_step(
-
+                            pipeline_session.worker_id,
-                        store_registry = StoreRegistry(config, suppress_debug=True)
+                            f"@N expansion: {source_cmd} + {' '.join(str(x) for x in selected_row_args)}",
-                        _backend_names = list(store_registry.list_backends() or [])
+                        )
                        _backend_by_lower = {str(n).lower(): str(n) for n in _backend_names if str(n).strip()}
                        for name in store_keys:
                            resolved_name = name
                            if not store_registry.is_available(resolved_name):
                                resolved_name = _backend_by_lower.get(str(name).lower(), name)
                            if not store_registry.is_available(resolved_name):
                                continue
                            backend = store_registry[resolved_name]
                            selector = getattr(backend, "selector", None)
                            if selector is None:
                                continue
                            handled = bool(selector(selected_items, ctx=ctx, stage_is_last=True))
                            if handled:
                                return True
                    except Exception:
                        pass
-                return False
+                selection_indices = []
                command_expanded = True
-            first_stage_tokens = stages[0] if stages else []
+        if (not command_expanded) and selection_indices:
-            first_stage_selection_indices: List[int] = []
+            last_piped_items = None
-            first_stage_had_extra_args = False
+            try:
-            first_stage_select_all = False
+                last_piped_items = ctx.get_last_result_items()
            except Exception:
                last_piped_items = None
-            if first_stage_tokens:
+            stage_table = None
-                new_first_stage: List[str] = []
+            try:
-                for token in first_stage_tokens:
+                stage_table = ctx.get_current_stage_table()
-                    if token.startswith("@"):  # selection
+            except Exception:
-                        selection = SelectionSyntax.parse(token)
+                stage_table = None
-                        if selection is not None:
+            if not stage_table and hasattr(ctx, "get_display_table"):
-                            first_stage_selection_indices = sorted([i - 1 for i in selection])
+                try:
-                            continue
+                    stage_table = ctx.get_display_table()
-                        if token == "@*":
+                except Exception:
-                            first_stage_select_all = True
+                    stage_table = None
-                            continue
+            if not stage_table:
-                    new_first_stage.append(token)
+                try:
                    stage_table = ctx.get_last_result_table()
                except Exception:
                    stage_table = None
-                if new_first_stage:
+            resolved_items = last_piped_items if last_piped_items else []
-                    stages[0] = new_first_stage
+            if last_piped_items:
-                    if first_stage_selection_indices or first_stage_select_all:
+                filtered = [resolved_items[i] for i in selection_indices if 0 <= i < len(resolved_items)]
-                        first_stage_had_extra_args = True
+                if not filtered:
-                elif first_stage_selection_indices or first_stage_select_all:
+                    print("No items matched selection in pipeline\n")
-                    stages.pop(0)
+                    return False, None
-            if first_stage_select_all:
+                if PipelineExecutor._maybe_run_class_selector(ctx, config, filtered, stage_is_last=(not stages)):
-                last_items = ctx.get_last_result_items()
+                    return False, None
-                if last_items:
+
-                    first_stage_selection_indices = list(range(len(last_items)))
+                from cmdlet._shared import coerce_to_pipe_object
                filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered]
                piped_result = filtered_pipe_objs if len(filtered_pipe_objs) > 1 else filtered_pipe_objs[0]
                if pipeline_session and worker_manager:
                    try:
                        selection_parts = [f"@{i+1}" for i in selection_indices]
                        worker_manager.log_step(
                            pipeline_session.worker_id,
                            f"Applied @N selection {' | '.join(selection_parts)}",
                        )
                    except Exception:
                        pass
                # Auto-insert downloader stages for provider tables.
                try:
                    current_table = ctx.get_current_stage_table() or ctx.get_last_result_table()
                except Exception:
                    current_table = None
                table_type = current_table.table if current_table and hasattr(current_table, "table") else None
                if not stages:
                    if table_type == "youtube":
                        print("Auto-running YouTube selection via download-media")
                        stages.append(["download-media"])
                    elif table_type == "bandcamp":
                        print("Auto-running Bandcamp selection via download-media")
                        stages.append(["download-media"])
                    elif table_type in {"soulseek", "openlibrary", "libgen"}:
                        print("Auto-piping selection to download-file")
                        stages.append(["download-file"])
                else:
                    first_cmd = stages[0][0] if stages and stages[0] else None
                    if table_type == "soulseek" and first_cmd not in (
                        "download-file",
                        "download-media",
                        "download_media",
                        ".pipe",
                    ):
                        debug("Auto-inserting download-file after Soulseek selection")
                        stages.insert(0, ["download-file"])
                    if table_type == "youtube" and first_cmd not in (
                        "download-media",
                        "download_media",
                        "download-file",
                        ".pipe",
                    ):
                        debug("Auto-inserting download-media after YouTube selection")
                        stages.insert(0, ["download-media"])
                    if table_type == "bandcamp" and first_cmd not in (
                        "download-media",
                        "download_media",
                        "download-file",
                        ".pipe",
                    ):
                        print("Auto-inserting download-media after Bandcamp selection")
                        stages.insert(0, ["download-media"])
                    if table_type == "libgen" and first_cmd not in (
                        "download-file",
                        "download-media",
                        "download_media",
                        ".pipe",
                    ):
                        print("Auto-inserting download-file after Libgen selection")
                        stages.insert(0, ["download-file"])
                return True, piped_result
            else:
                print("No previous results to select from\n")
                return False, None
        return True, None
    @staticmethod
    def _maybe_start_live_progress(config: Any, stages: List[List[str]]) -> tuple[Any, Dict[int, int]]:
        progress_ui = None
        pipe_index_by_stage: Dict[int, int] = {}
        try:
            quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
        except Exception:
            quiet_mode = False
        try:
            import sys as _sys
            if (not quiet_mode) and bool(getattr(_sys.stderr, "isatty", lambda: False)()):
                from models import PipelineLiveProgress
                pipe_stage_indices: List[int] = []
                pipe_labels: List[str] = []
                for idx, stage_tokens in enumerate(stages):
                    if not stage_tokens:
                        continue
                    name = str(stage_tokens[0]).replace("_", "-").lower()
                    if name == "@" or name.startswith("@"):
                        continue
                    # `.pipe` (MPV) is an interactive launcher; disable pipeline Live progress
                    # for it because it doesn't meaningfully "complete" (mpv may keep running)
                    # and Live output interferes with MPV playlist UI.
                    if name == ".pipe":
                        continue
                    # `.matrix` uses a two-phase picker (@N then .matrix -send). Pipeline Live
                    # progress can linger across those phases and interfere with interactive output.
                    if name == ".matrix":
                        continue
                    pipe_stage_indices.append(idx)
                    pipe_labels.append(name)
                if pipe_labels:
                    progress_ui = PipelineLiveProgress(pipe_labels, enabled=True)
                    progress_ui.start()
                    try:
                        import pipeline as _pipeline_ctx
                        if hasattr(_pipeline_ctx, "set_live_progress"):
                            _pipeline_ctx.set_live_progress(progress_ui)
                    except Exception:
                        pass
                    pipe_index_by_stage = {stage_idx: pipe_idx for pipe_idx, stage_idx in enumerate(pipe_stage_indices)}
        except Exception:
            progress_ui = None
            pipe_index_by_stage = {}
        return progress_ui, pipe_index_by_stage
    def execute_tokens(self, tokens: List[str]) -> None:
        from cmdlet import REGISTRY
        import pipeline as ctx
        try:
            self._try_clear_pipeline_stop(ctx)
            stages = self._split_stages(tokens)
            if not stages:
                print("Invalid pipeline syntax\n")
                return
            self._maybe_seed_current_stage_table(ctx)
            stages = self._maybe_apply_pending_pipeline_tail(ctx, stages)
            config = self._config_loader.load()
            config = self._apply_quiet_background_flag(config)
            stages, first_stage_selection_indices, first_stage_had_extra_args, first_stage_select_all = (
                self._extract_first_stage_selection_tokens(stages)
            )
            first_stage_selection_indices = self._apply_select_all_if_requested(
                ctx, first_stage_selection_indices, first_stage_select_all
            )
            piped_result: Any = None
            worker_manager = WorkerManagerRegistry.ensure(config)
            pipeline_text = " | ".join(" ".join(stage) for stage in stages)
            pipeline_session = WorkerStages.begin_pipeline(worker_manager, pipeline_text=pipeline_text, config=config)
-
+            raw_stage_texts = self._get_raw_stage_texts(ctx)
-            raw_stage_texts: List[str] = []
+            self._maybe_enable_background_notifier(worker_manager, config, pipeline_session)
            try:
                if hasattr(ctx, "get_current_command_stages"):
                    raw_stage_texts = ctx.get_current_command_stages() or []
            except Exception:
                raw_stage_texts = []
            if pipeline_session and worker_manager and isinstance(config, dict):
                session_worker_ids = config.get("_session_worker_ids")
                if session_worker_ids:
                    try:
                        output_fn = self._toolbar_output
                        quiet_mode = bool(config.get("_quiet_background_output"))
                        terminal_only = quiet_mode and not output_fn
                        kwargs: Dict[str, Any] = {
                            "session_worker_ids": session_worker_ids,
                            "only_terminal_updates": terminal_only,
                            "overlay_mode": bool(output_fn),
                        }
                        if output_fn:
                            kwargs["output"] = output_fn
                        ensure_background_notifier(worker_manager, **kwargs)
                    except Exception:
                        pass
            pipeline_status = "completed"
            pipeline_error = ""
@@ -1462,201 +1778,24 @@ class PipelineExecutor:
            pipe_index_by_stage: Dict[int, int] = {}
            try:
-                if first_stage_selection_indices:
+                ok, initial_piped = self._maybe_apply_initial_selection(
-                    if not ctx.get_current_stage_table_source_command():
+                    ctx,
-                        display_table = ctx.get_display_table() if hasattr(ctx, "get_display_table") else None
+                    config,
-                        table_for_stage = display_table or ctx.get_last_result_table()
+                    stages,
-                        if table_for_stage:
+                    selection_indices=first_stage_selection_indices,
-                            ctx.set_current_stage_table(table_for_stage)
+                    first_stage_had_extra_args=first_stage_had_extra_args,
-
+                    worker_manager=worker_manager,
-                    source_cmd = ctx.get_current_stage_table_source_command()
+                    pipeline_session=pipeline_session,
-                    source_args_raw = ctx.get_current_stage_table_source_args()
+                )
-                    if isinstance(source_args_raw, str):
+                if not ok:
-                        source_args: List[str] = [source_args_raw]
+                    return
-                    elif isinstance(source_args_raw, list):
+                if initial_piped is not None:
-                        source_args = [str(x) for x in source_args_raw if x is not None]
+                    piped_result = initial_piped
                    else:
                        source_args = []
                    current_table = ctx.get_current_stage_table()
                    table_type = current_table.table if current_table and hasattr(current_table, "table") else None
                    command_expanded = False
                    if table_type in {"youtube", "soulseek"}:
                        command_expanded = False
                    elif source_cmd == "search-file" and source_args and "youtube" in source_args:
                        command_expanded = False
                    else:
                        selected_row_args: List[str] = []
                        skip_pipe_expansion = source_cmd == ".pipe" and len(stages) > 0
                        if source_cmd and not skip_pipe_expansion:
                            for idx in first_stage_selection_indices:
                                row_args = ctx.get_current_stage_table_row_selection_args(idx)
                                if row_args:
                                    selected_row_args.extend(row_args)
                                break
                        if selected_row_args:
                            if isinstance(source_cmd, list):
                                cmd_list: List[str] = [str(x) for x in source_cmd if x is not None]
                            elif isinstance(source_cmd, str):
                                cmd_list = [source_cmd]
                            else:
                                cmd_list = []
                            expanded_stage: List[str] = cmd_list + source_args + selected_row_args
                            if first_stage_had_extra_args and stages:
                                expanded_stage += stages[0]
                                stages[0] = expanded_stage
                            else:
                                stages.insert(0, expanded_stage)
                            if pipeline_session and worker_manager:
                                try:
                                    worker_manager.log_step(
                                        pipeline_session.worker_id,
                                        f"@N expansion: {source_cmd} + {' '.join(str(x) for x in selected_row_args)}",
                                    )
                                except Exception:
                                    pass
                            first_stage_selection_indices = []
                            command_expanded = True
                    if not command_expanded and first_stage_selection_indices:
                        last_piped_items = ctx.get_last_result_items()
                        stage_table = ctx.get_current_stage_table()
                        if not stage_table and hasattr(ctx, "get_display_table"):
                            stage_table = ctx.get_display_table()
                        if not stage_table:
                            stage_table = ctx.get_last_result_table()
                        resolved_items = _resolve_items_for_selection(stage_table, last_piped_items)
                        if last_piped_items:
                            filtered = [
                                resolved_items[i]
                                for i in first_stage_selection_indices
                                if 0 <= i < len(resolved_items)
                            ]
                            if not filtered:
                                print("No items matched selection in pipeline\n")
                                return
                            if _maybe_run_class_selector(filtered, stage_is_last=(not stages)):
                                return
                            from cmdlet._shared import coerce_to_pipe_object
                            filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered]
                            piped_result = filtered_pipe_objs if len(filtered_pipe_objs) > 1 else filtered_pipe_objs[0]
                            if pipeline_session and worker_manager:
                                try:
                                    selection_parts = [f"@{i+1}" for i in first_stage_selection_indices]
                                    worker_manager.log_step(
                                        pipeline_session.worker_id,
                                        f"Applied @N selection {' | '.join(selection_parts)}",
                                    )
                                except Exception:
                                    pass
                            # Auto-insert downloader stages for provider tables.
                            current_table = ctx.get_current_stage_table() or ctx.get_last_result_table()
                            table_type = current_table.table if current_table and hasattr(current_table, "table") else None
                            if not stages:
                                if table_type == "youtube":
                                    print("Auto-running YouTube selection via download-media")
                                    stages.append(["download-media"])
                                elif table_type == "bandcamp":
                                    print("Auto-running Bandcamp selection via download-media")
                                    stages.append(["download-media"])
                                elif table_type in {"soulseek", "openlibrary", "libgen"}:
                                    print("Auto-piping selection to download-file")
                                    stages.append(["download-file"])
                            else:
                                first_cmd = stages[0][0] if stages and stages[0] else None
                                if table_type == "soulseek" and first_cmd not in (
                                    "download-file",
                                    "download-media",
                                    "download_media",
                                    ".pipe",
                                ):
                                    debug("Auto-inserting download-file after Soulseek selection")
                                    stages.insert(0, ["download-file"])
                                if table_type == "youtube" and first_cmd not in (
                                    "download-media",
                                    "download_media",
                                    "download-file",
                                    ".pipe",
                                ):
                                    debug("Auto-inserting download-media after YouTube selection")
                                    stages.insert(0, ["download-media"])
                                if table_type == "bandcamp" and first_cmd not in (
                                    "download-media",
                                    "download_media",
                                    "download-file",
                                    ".pipe",
                                ):
                                    print("Auto-inserting download-media after Bandcamp selection")
                                    stages.insert(0, ["download-media"])
                                if table_type == "libgen" and first_cmd not in (
                                    "download-file",
                                    "download-media",
                                    "download_media",
                                    ".pipe",
                                ):
                                    print("Auto-inserting download-file after Libgen selection")
                                    stages.insert(0, ["download-file"])
                        else:
                            print("No previous results to select from\n")
                            return
                # ------------------------------------------------------------------
                # Multi-level pipeline progress (pipes = stages, tasks = items)
                # ------------------------------------------------------------------
-                try:
+                progress_ui, pipe_index_by_stage = self._maybe_start_live_progress(config, stages)
                    quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
                except Exception:
                    quiet_mode = False
                try:
                    import sys as _sys
                    if (not quiet_mode) and bool(getattr(_sys.stderr, "isatty", lambda: False)()):
                        from models import PipelineLiveProgress
                        pipe_stage_indices: List[int] = []
                        pipe_labels: List[str] = []
                        for idx, tokens in enumerate(stages):
                            if not tokens:
                                continue
                            name = str(tokens[0]).replace("_", "-").lower()
                            if name == "@" or name.startswith("@"):
                                continue
                            # `.pipe` (MPV) is an interactive launcher; disable pipeline Live progress
                            # for it because it doesn't meaningfully "complete" (mpv may keep running)
                            # and Live output interferes with MPV playlist UI.
                            if name == ".pipe":
                                continue
                            pipe_stage_indices.append(idx)
                            pipe_labels.append(name)
                        if pipe_labels:
                            progress_ui = PipelineLiveProgress(pipe_labels, enabled=True)
                            progress_ui.start()
                            try:
                                import pipeline as _pipeline_ctx
                                if hasattr(_pipeline_ctx, "set_live_progress"):
                                    _pipeline_ctx.set_live_progress(progress_ui)
                            except Exception:
                                pass
                            pipe_index_by_stage = {stage_idx: pipe_idx for pipe_idx, stage_idx in enumerate(pipe_stage_indices)}
                except Exception:
                    progress_ui = None
                    pipe_index_by_stage = {}
                for stage_index, stage_tokens in enumerate(stages):
                    if not stage_tokens:
@@ -1707,7 +1846,7 @@ class PipelineExecutor:
                        if not stage_table:
                            stage_table = ctx.get_last_result_table()
                        items_list = ctx.get_last_result_items() or []
-                        resolved_items = _resolve_items_for_selection(stage_table, items_list)
+                        resolved_items = items_list if items_list else []
                        filtered = [resolved_items[i] for i in selected_indices if 0 <= i < len(resolved_items)]
                        if not filtered:
                            print("No items matched selection\n")
@@ -1715,7 +1854,7 @@ class PipelineExecutor:
                            pipeline_error = "Empty selection"
                            return
-                        if _maybe_run_class_selector(filtered, stage_is_last=(stage_index + 1 >= len(stages))):
+                        if PipelineExecutor._maybe_run_class_selector(ctx, config, filtered, stage_is_last=(stage_index + 1 >= len(stages))):
                            return
                        # Special case: selecting multiple tags from get-tag and piping into delete-tag
@@ -1841,9 +1980,11 @@ class PipelineExecutor:
                    on_emit = None
                    if progress_ui is not None and pipe_idx is not None:
-                        def _on_emit(obj: Any, _idx: int = int(pipe_idx)) -> None:
+                        _ui = cast(Any, progress_ui)
                        def _on_emit(obj: Any, _idx: int = int(pipe_idx), _progress=_ui) -> None:
                            try:
-                                progress_ui.on_emit(_idx, obj)
+                                _progress.on_emit(_idx, obj)
                            except Exception:
                                pass
                        on_emit = _on_emit
--- a/Provider/libgen.py
+++ b/Provider/libgen.py
@@ -23,6 +23,15 @@ except ImportError:
 class Libgen(Provider):
    # Domains that should be routed to this provider when the user supplies a URL.
    # (Used by ProviderCore.registry.match_provider_name_for_url)
    URL_DOMAINS = (
        "libgen.gl",
        "libgen.li",
        "libgen.is",
        "libgen.rs",
        "libgen.st",
    )
    """Search provider for Library Genesis books."""
    def search(
--- a/Provider/metadata_provider.py
+++ b/Provider/metadata_provider.py
@@ -1,9 +1,11 @@
 from __future__ import annotations
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional, Type
+from typing import Any, Dict, List, Optional, Type, cast
 import requests
 import sys
 import json
 import subprocess
 from SYS.logger import log, debug
@@ -13,6 +15,12 @@ except ImportError:  # pragma: no cover - optional
    musicbrainzngs = None
 try:  # Optional dependency
    import yt_dlp  # type: ignore
 except ImportError:  # pragma: no cover - optional
    yt_dlp = None
 class MetadataProvider(ABC):
    """Base class for metadata providers (music, movies, books, etc.)."""
@@ -351,6 +359,157 @@ class MusicBrainzMetadataProvider(MetadataProvider):
        return tags
 class YtdlpMetadataProvider(MetadataProvider):
    """Metadata provider that extracts tags from a supported URL using yt-dlp.
    This does NOT download media; it only probes metadata.
    """
    @property
    def name(self) -> str:  # type: ignore[override]
        return "ytdlp"
    def _extract_info(self, url: str) -> Optional[Dict[str, Any]]:
        url = (url or "").strip()
        if not url:
            return None
        # Prefer Python module when available.
        if yt_dlp is not None:
            try:
                opts: Any = {
                    "quiet": True,
                    "no_warnings": True,
                    "skip_download": True,
                    "noprogress": True,
                    "socket_timeout": 15,
                    "retries": 1,
                    "playlist_items": "1-10",
                }
                with yt_dlp.YoutubeDL(opts) as ydl:  # type: ignore[attr-defined]
                    info = ydl.extract_info(url, download=False)
                return cast(Dict[str, Any], info) if isinstance(info, dict) else None
            except Exception:
                pass
        # Fallback to CLI.
        try:
            cmd = [
                "yt-dlp",
                "-J",
                "--no-warnings",
                "--skip-download",
                "--playlist-items",
                "1-10",
                url,
            ]
            proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
            if proc.returncode != 0:
                return None
            payload = (proc.stdout or "").strip()
            if not payload:
                return None
            data = json.loads(payload)
            return data if isinstance(data, dict) else None
        except Exception:
            return None
    def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
        url = (query or "").strip()
        if not url.startswith(("http://", "https://")):
            return []
        info = self._extract_info(url)
        if not isinstance(info, dict):
            return []
        upload_date = str(info.get("upload_date") or "")
        release_date = str(info.get("release_date") or "")
        year = (release_date or upload_date)[:4] if (release_date or upload_date) else ""
        # Provide basic columns for the standard metadata selection table.
        # NOTE: This is best-effort; many extractors don't provide artist/album.
        artist = (
            info.get("artist")
            or info.get("uploader")
            or info.get("channel")
            or ""
        )
        album = info.get("album") or info.get("playlist_title") or ""
        title = info.get("title") or ""
        return [
            {
                "title": title,
                "artist": str(artist or ""),
                "album": str(album or ""),
                "year": str(year or ""),
                "provider": self.name,
                "url": url,
                "raw": info,
            }
        ]
    def to_tags(self, item: Dict[str, Any]) -> List[str]:
        raw = item.get("raw")
        if not isinstance(raw, dict):
            return super().to_tags(item)
        tags: List[str] = []
        try:
            from metadata import extract_ytdlp_tags
        except Exception:
            extract_ytdlp_tags = None  # type: ignore[assignment]
        if extract_ytdlp_tags:
            try:
                tags.extend(extract_ytdlp_tags(raw))
            except Exception:
                pass
        # Subtitle availability tags
        def _langs(value: Any) -> List[str]:
            if not isinstance(value, dict):
                return []
            out: List[str] = []
            for k in value.keys():
                if isinstance(k, str) and k.strip():
                    out.append(k.strip().lower())
            return sorted(set(out))
        # If this is a playlist container, subtitle/captions are usually per-entry.
        info_for_subs: Dict[str, Any] = raw
        entries = raw.get("entries")
        if isinstance(entries, list) and entries:
            first = entries[0]
            if isinstance(first, dict):
                info_for_subs = first
        for lang in _langs(info_for_subs.get("subtitles")):
            tags.append(f"subs:{lang}")
        for lang in _langs(info_for_subs.get("automatic_captions")):
            tags.append(f"subs_auto:{lang}")
        # Always include source tag for parity with other providers.
        tags.append(f"source:{self.name}")
        # Dedup case-insensitively, preserve order.
        seen = set()
        out: List[str] = []
        for t in tags:
            if not isinstance(t, str):
                continue
            s = t.strip()
            if not s:
                continue
            k = s.lower()
            if k in seen:
                continue
            seen.add(k)
            out.append(s)
        return out
 # Registry ---------------------------------------------------------------
 _METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = {
@@ -359,6 +518,7 @@ _METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = {
    "googlebooks": GoogleBooksMetadataProvider,
    "google": GoogleBooksMetadataProvider,
    "musicbrainz": MusicBrainzMetadataProvider,
    "ytdlp": YtdlpMetadataProvider,
 }
@@ -370,7 +530,7 @@ def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str
    availability: Dict[str, bool] = {}
    for name, cls in _METADATA_PROVIDERS.items():
        try:
-            provider = cls(config)
+            _ = cls(config)
            # Basic availability check: perform lightweight validation if defined
            availability[name] = True
        except Exception:
--- a/Provider/openlibrary.py
+++ b/Provider/openlibrary.py
@@ -11,7 +11,8 @@ import sys
 import tempfile
 import time
 from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
+from typing import Any, Callable, Dict, List, Optional, Tuple
 from urllib.parse import urlparse
 import requests
@@ -183,7 +184,44 @@ def _resolve_archive_id(session: requests.Session, edition_id: str, ia_candidate
    return ""
 def _archive_id_from_url(url: str) -> str:
    """Best-effort extraction of an Archive.org item identifier from a URL."""
    u = str(url or "").strip()
    if not u:
        return ""
    try:
        p = urlparse(u)
        host = (p.hostname or "").lower().strip()
        if not host.endswith("archive.org"):
            return ""
        parts = [x for x in (p.path or "").split("/") if x]
    except Exception:
        return ""
    # Common patterns:
    # - /details/<id>/...
    # - /borrow/<id>
    # - /download/<id>/...
    if len(parts) >= 2 and parts[0].lower() in {"details", "borrow", "download", "stream"}:
        return str(parts[1]).strip()
    # Sometimes the identifier is the first segment.
    if len(parts) >= 1:
        first = str(parts[0]).strip()
        if first and first.lower() not in {"account", "services", "search", "advancedsearch.php"}:
            return first
    return ""
 class OpenLibrary(Provider):
    # Domains that should be routed to this provider when the user supplies a URL.
    # (Used by ProviderCore.registry.match_provider_name_for_url)
    URL_DOMAINS = (
        "openlibrary.org",
        "archive.org",
    )
    """Search provider for OpenLibrary books + Archive.org direct/borrow download."""
    def __init__(self, config: Optional[Dict[str, Any]] = None):
@@ -311,6 +349,60 @@ class OpenLibrary(Provider):
                pass
        raise RuntimeError("Something went wrong when trying to return the book")
    @staticmethod
    def _archive_logout(session: requests.Session) -> None:
        """Best-effort logout from archive.org.
        Archive sessions are cookie-based; returning the loan is the critical step.
        Logout is attempted for cleanliness but failures should not abort the workflow.
        """
        if session is None:
            return
        for url in (
            "https://archive.org/account/logout",
            "https://archive.org/account/logout.php",
        ):
            try:
                resp = session.get(url, timeout=15, allow_redirects=True)
                code = int(getattr(resp, "status_code", 0) or 0)
                if code and code < 500:
                    return
            except Exception:
                continue
    @staticmethod
    def _archive_is_lendable(book_id: str) -> tuple[bool, str]:
        """Heuristic lendable check using Archive.org item metadata.
        Some lendable items do not map cleanly to an OpenLibrary edition id.
        In practice, Archive metadata collections often include markers like:
        - inlibrary
        - printdisabled
        """
        ident = str(book_id or "").strip()
        if not ident:
            return False, "no-archive-id"
        try:
            resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=8)
            resp.raise_for_status()
            data = resp.json() if resp is not None else {}
            meta = data.get("metadata", {}) if isinstance(data, dict) else {}
            collection = meta.get("collection") if isinstance(meta, dict) else None
            values: List[str] = []
            if isinstance(collection, list):
                values = [str(x).strip().lower() for x in collection if str(x).strip()]
            elif isinstance(collection, str):
                values = [collection.strip().lower()]
            if any(v in {"inlibrary", "printdisabled", "lendinglibrary"} for v in values):
                return True, "archive-collection"
            return False, "archive-not-lendable"
        except Exception:
            return False, "archive-metadata-error"
    @staticmethod
    def _archive_get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]:
        """Extract page links from Archive.org book reader."""
@@ -430,6 +522,7 @@ class OpenLibrary(Provider):
        links: List[str],
        scale: int,
        book_id: str,
        progress_callback: Optional[Callable[[int, int], None]] = None,
    ) -> List[str]:
        links_scaled = [f"{link}&rotate=0&scale={scale}" for link in links]
        pages = len(links_scaled)
@@ -448,7 +541,20 @@ class OpenLibrary(Provider):
                        pages=pages,
                    )
                )
-            if tqdm:
+            if progress_callback is not None:
                done = 0
                total = len(tasks)
                for fut in futures.as_completed(tasks):
                    try:
                        _ = fut.result()
                    except Exception:
                        pass
                    done += 1
                    try:
                        progress_callback(done, total)
                    except Exception:
                        pass
            elif tqdm:
                for _ in tqdm(futures.as_completed(tasks), total=len(tasks)):  # type: ignore
                    pass
            else:
@@ -904,15 +1010,20 @@ class OpenLibrary(Provider):
        return results
-    def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
+    def download(
        self,
        result: SearchResult,
        output_dir: Path,
        progress_callback: Optional[Callable[[str, int, Optional[int], str], None]] = None,
    ) -> Optional[Path]:
        output_dir = Path(output_dir)
        output_dir.mkdir(parents=True, exist_ok=True)
        meta = result.full_metadata or {}
        edition_id = str(meta.get("openlibrary_id") or "").strip()
-        if not edition_id:
+
-            log("[openlibrary] Missing openlibrary_id; cannot download", file=sys.stderr)
+        # Accept direct Archive.org URLs too (details/borrow/download) even when no OL edition id is known.
-            return None
+        archive_id = str(meta.get("archive_id") or "").strip()
        ia_ids = meta.get("ia") or []
        if isinstance(ia_ids, str):
@@ -921,12 +1032,23 @@ class OpenLibrary(Provider):
            ia_ids = []
        ia_candidates = [str(x) for x in ia_ids if x]
-        archive_id = _resolve_archive_id(self._session, edition_id, ia_candidates)
+        if not archive_id:
            archive_id = _first_str(ia_candidates) or ""
        if not archive_id and edition_id:
            archive_id = _resolve_archive_id(self._session, edition_id, ia_candidates)
        if not archive_id:
            # Try to extract identifier from the SearchResult path (URL).
            archive_id = _archive_id_from_url(str(getattr(result, "path", "") or ""))
        if not archive_id:
            log("[openlibrary] No archive identifier available; cannot download", file=sys.stderr)
            return None
        safe_title = sanitize_filename(result.title)
        if not safe_title or "http" in safe_title.lower():
            safe_title = sanitize_filename(archive_id) or "archive"
        # 1) Direct download if available.
        try:
@@ -935,8 +1057,22 @@ class OpenLibrary(Provider):
            can_direct, pdf_url = False, ""
        if can_direct and pdf_url:
            try:
                if progress_callback is not None:
                    progress_callback("step", 0, None, "direct download")
            except Exception:
                pass
            out_path = unique_path(output_dir / f"{safe_title}.pdf")
-            ok = download_file(pdf_url, out_path, session=self._session)
+            ok = download_file(
                pdf_url,
                out_path,
                session=self._session,
                progress_callback=(
                    (lambda downloaded, total, label: progress_callback("bytes", downloaded, total, label))
                    if progress_callback is not None
                    else None
                ),
            )
            if ok:
                return out_path
            log("[openlibrary] Direct download failed", file=sys.stderr)
@@ -949,65 +1085,131 @@ class OpenLibrary(Provider):
                log("[openlibrary] Archive credentials missing; cannot borrow", file=sys.stderr)
                return None
-            lendable, reason = _check_lendable(self._session, edition_id)
+            lendable = True
            reason = ""
            if edition_id:
                lendable, reason = _check_lendable(self._session, edition_id)
                if not lendable:
                    # OpenLibrary API can be a false-negative; fall back to Archive metadata.
                    lendable2, reason2 = self._archive_is_lendable(archive_id)
                    if lendable2:
                        lendable, reason = True, reason2
            else:
                lendable, reason = self._archive_is_lendable(archive_id)
            if not lendable:
                log(f"[openlibrary] Not lendable: {reason}", file=sys.stderr)
                return None
            session = self._archive_login(email, password)
            loaned = False
            try:
                session = self._archive_loan(session, archive_id, verbose=False)
            except self.BookNotAvailableError:
                log("[openlibrary] Book not available to borrow", file=sys.stderr)
                return None
            except Exception:
                log("[openlibrary] Borrow failed", file=sys.stderr)
                return None
            urls = [f"https://archive.org/borrow/{archive_id}", f"https://archive.org/details/{archive_id}"]
            title = safe_title
            links: Optional[List[str]] = None
            last_exc: Optional[Exception] = None
            for u in urls:
                try:
-                    title_raw, links, _metadata = self._archive_get_book_infos(session, u)
+                    if progress_callback is not None:
-                    if title_raw:
+                        progress_callback("step", 0, None, "login")
                        title = sanitize_filename(title_raw)
                    break
                except Exception as exc:
                    last_exc = exc
                    continue
            if not links:
                log(f"[openlibrary] Failed to extract pages: {last_exc}", file=sys.stderr)
                return None
            temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=str(output_dir))
            try:
                images = self._archive_download(session=session, n_threads=10, directory=temp_dir, links=links, scale=3, book_id=archive_id)
                pdf_bytes = _image_paths_to_pdf_bytes(images)
                if not pdf_bytes:
                    # Keep images folder for manual conversion.
                    log("[openlibrary] PDF conversion failed; keeping images folder", file=sys.stderr)
                    return Path(temp_dir)
                pdf_path = unique_path(output_dir / f"{title}.pdf")
                with open(pdf_path, "wb") as f:
                    f.write(pdf_bytes)
                try:
                    shutil.rmtree(temp_dir)
                except Exception:
                    pass
                return pdf_path
            except Exception:
                try:
-                    shutil.rmtree(temp_dir)
+                    session = self._archive_loan(session, archive_id, verbose=False)
                    loaned = True
                except self.BookNotAvailableError:
                    log("[openlibrary] Book not available to borrow", file=sys.stderr)
                    return None
                except Exception:
                    log("[openlibrary] Borrow failed", file=sys.stderr)
                    return None
                try:
                    if progress_callback is not None:
                        progress_callback("step", 0, None, "borrow")
                except Exception:
                    pass
                urls = [f"https://archive.org/borrow/{archive_id}", f"https://archive.org/details/{archive_id}"]
                title = safe_title
                links: Optional[List[str]] = None
                last_exc: Optional[Exception] = None
                for u in urls:
                    try:
                        title_raw, links, _metadata = self._archive_get_book_infos(session, u)
                        if title_raw:
                            title = sanitize_filename(title_raw)
                        break
                    except Exception as exc:
                        last_exc = exc
                        continue
                if not links:
                    log(f"[openlibrary] Failed to extract pages: {last_exc}", file=sys.stderr)
                    return None
                try:
                    if progress_callback is not None:
                        progress_callback("step", 0, None, "download pages")
                except Exception:
                    pass
                temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=str(output_dir))
                try:
                    images = self._archive_download(
                        session=session,
                        n_threads=10,
                        directory=temp_dir,
                        links=links,
                        scale=3,
                        book_id=archive_id,
                        progress_callback=(
                            (lambda done, total: progress_callback("pages", done, total, "pages"))
                            if progress_callback is not None
                            else None
                        ),
                    )
                    pdf_bytes = _image_paths_to_pdf_bytes(images)
                    if not pdf_bytes:
                        # Keep images folder for manual conversion.
                        log("[openlibrary] PDF conversion failed; keeping images folder", file=sys.stderr)
                        return Path(temp_dir)
                    try:
                        if progress_callback is not None:
                            progress_callback("step", 0, None, "stitch pdf")
                    except Exception:
                        pass
                    pdf_path = unique_path(output_dir / f"{title}.pdf")
                    with open(pdf_path, "wb") as f:
                        f.write(pdf_bytes)
                    try:
                        shutil.rmtree(temp_dir)
                    except Exception:
                        pass
                    return pdf_path
                except Exception:
                    try:
                        shutil.rmtree(temp_dir)
                    except Exception:
                        pass
                    raise
            finally:
                # Always return the loan after a successful borrow, even if download/stitch fails.
                if loaned:
                    try:
                        if progress_callback is not None:
                            progress_callback("step", 0, None, "return book")
                    except Exception:
                        pass
                    try:
                        self._archive_return_loan(session, archive_id)
                    except Exception as exc:
                        log(f"[openlibrary] Warning: failed to return loan: {exc}", file=sys.stderr)
                try:
                    self._archive_logout(session)
                except Exception:
                    pass
                raise
        except Exception as exc:
            log(f"[openlibrary] Borrow workflow error: {exc}", file=sys.stderr)
--- a/ProviderCore/download.py
+++ b/ProviderCore/download.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 from pathlib import Path
-from typing import Optional
+from typing import Callable, Optional
 import sys
 import requests
@@ -22,13 +22,20 @@ def sanitize_filename(name: str, *, max_len: int = 150) -> str:
    return cleaned[:max_len]
-def download_file(url: str, output_path: Path, *, session: Optional[requests.Session] = None, timeout_s: float = 30.0) -> bool:
+def download_file(
    url: str,
    output_path: Path,
    *,
    session: Optional[requests.Session] = None,
    timeout_s: float = 30.0,
    progress_callback: Optional[Callable[[int, Optional[int], str], None]] = None,
 ) -> bool:
    output_path = Path(output_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    s = session or requests.Session()
-    bar = ProgressBar()
+    bar = ProgressBar() if progress_callback is None else None
    downloaded = 0
    total = None
@@ -41,9 +48,14 @@ def download_file(url: str, output_path: Path, *, session: Optional[requests.Ses
            except Exception:
                total = None
            label = str(output_path.name or "download")
            # Render once immediately so fast downloads still show something.
            try:
-                bar.update(downloaded=0, total=total, label=str(output_path.name or "download"), file=sys.stderr)
+                if progress_callback is not None:
                    progress_callback(0, total, label)
                elif bar is not None:
                    bar.update(downloaded=0, total=total, label=label, file=sys.stderr)
            except Exception:
                pass
@@ -53,18 +65,23 @@ def download_file(url: str, output_path: Path, *, session: Optional[requests.Ses
                        f.write(chunk)
                        downloaded += len(chunk)
                        try:
-                            bar.update(downloaded=downloaded, total=total, label=str(output_path.name or "download"), file=sys.stderr)
+                            if progress_callback is not None:
                                progress_callback(downloaded, total, label)
                            elif bar is not None:
                                bar.update(downloaded=downloaded, total=total, label=label, file=sys.stderr)
                        except Exception:
                            pass
        try:
-            bar.finish()
+            if bar is not None:
                bar.finish()
        except Exception:
            pass
        return output_path.exists() and output_path.stat().st_size > 0
    except Exception:
        try:
-            bar.finish()
+            if bar is not None:
                bar.finish()
        except Exception:
            pass
        try:
--- a/ProviderCore/registry.py
+++ b/ProviderCore/registry.py
@@ -6,8 +6,9 @@ This module is the single source of truth for provider discovery.
 from __future__ import annotations
-from typing import Any, Dict, Optional, Type
+from typing import Any, Dict, Optional, Sequence, Type
 import sys
 from urllib.parse import urlparse
 from SYS.logger import log
@@ -141,6 +142,45 @@ def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bo
    return availability
 def match_provider_name_for_url(url: str) -> Optional[str]:
    """Return a registered provider name that claims the URL's domain.
    Providers can declare domains via a class attribute `URL_DOMAINS` (sequence of strings).
    This matcher is intentionally cheap (no provider instantiation, no network).
    """
    try:
        parsed = urlparse(str(url))
        host = (parsed.hostname or "").strip().lower()
    except Exception:
        host = ""
    if not host:
        return None
    for name, provider_class in _PROVIDERS.items():
        domains = getattr(provider_class, "URL_DOMAINS", None)
        if not isinstance(domains, (list, tuple)):
            continue
        for d in domains:
            dom = str(d or "").strip().lower()
            if not dom:
                continue
            if host == dom or host.endswith("." + dom):
                return name
    return None
 def get_provider_for_url(url: str, config: Optional[Dict[str, Any]] = None) -> Optional[Provider]:
    """Instantiate and return the matching provider for a URL, if any."""
    name = match_provider_name_for_url(url)
    if not name:
        return None
    return get_provider(name, config)
 __all__ = [
    "SearchResult",
    "Provider",
@@ -152,5 +192,7 @@ __all__ = [
    "list_search_providers",
    "get_file_provider",
    "list_file_providers",
    "match_provider_name_for_url",
    "get_provider_for_url",
    "download_soulseek_file",
 ]
--- a/SYS/download.py
+++ b/SYS/download.py
@@ -584,10 +584,15 @@ def _download_direct_file(
        filename = filename.split("?")[0]
    # Try to get real filename from Content-Disposition header (HEAD request)
    content_type = ""
    try:
        with HTTPClient(timeout=10.0) as client:
            response = client._request("HEAD", url, follow_redirects=True)
            content_disposition = response.headers.get("content-disposition", "")
            try:
                content_type = str(response.headers.get("content-type", "") or "").strip().lower()
            except Exception:
                content_type = ""
            if content_disposition:
                # Extract filename from Content-Disposition header
                # Format: attachment; filename="filename.pdf" or filename=filename.pdf
@@ -620,9 +625,36 @@ def _download_direct_file(
            else:
                filename = suggested
-    # Final fallback if we still don't have a good filename
+    # If we still don't have an extension, try to infer one from Content-Type.
-    if not filename or "." not in filename:
+    # Never fall back to a generic `.bin` extension.
-        filename = "downloaded_file.bin"
+    try:
        has_ext = bool(filename and Path(str(filename)).suffix)
    except Exception:
        has_ext = False
    if filename and (not has_ext):
        ct = (content_type or "").split(";")[0].strip().lower()
        ext_by_ct = {
            "application/pdf": ".pdf",
            "application/epub+zip": ".epub",
            "application/x-mobipocket-ebook": ".mobi",
            "image/jpeg": ".jpg",
            "image/png": ".png",
            "image/webp": ".webp",
            "image/gif": ".gif",
            "text/plain": ".txt",
            "application/zip": ".zip",
        }
        if ct in ext_by_ct:
            filename = f"{filename}{ext_by_ct[ct]}"
        elif ct.startswith("text/html"):
            # Guardrail: HTML landing pages should not be downloaded as opaque files.
            raise DownloadError("URL appears to be an HTML page, not a direct file")
    # Final guardrail: if filename is empty, refuse rather than inventing `download.bin`.
    if not filename or not str(filename).strip():
        raise DownloadError("Could not determine filename for URL (no Content-Disposition and no path filename)")
    file_path = _unique_path(output_dir / filename)
    progress_bar = ProgressBar()
@@ -684,9 +716,15 @@ def _download_direct_file(
        # For direct file downloads, create minimal info dict without filename as title
        # This prevents creating duplicate title: tags when filename gets auto-generated
        # We'll add title back later only if we couldn't extract meaningful tags
        ext = ""
        try:
            ext = Path(str(filename)).suffix.lstrip(".")
        except Exception:
            ext = ""
        info = {
-            "id": filename.rsplit(".", 1)[0],
+            "id": str(filename).rsplit(".", 1)[0] if "." in str(filename) else str(filename),
-            "ext": filename.rsplit(".", 1)[1] if "." in filename else "bin",
+            "ext": ext,
            "webpage_url": url,
        }
--- a/SYS/pipeline_progress.py
+++ b/SYS/pipeline_progress.py
@@ -0,0 +1,218 @@
 from __future__ import annotations
 import sys
 from contextlib import contextmanager
 from typing import Any, Iterator, Optional, Sequence, Tuple
 class PipelineProgress:
    """Small adapter around PipelineLiveProgress.
    This centralizes the boilerplate used across cmdlets:
    - locating the active Live UI (if any)
    - resolving the current pipe_index from stage context
    - step-based progress (begin_pipe_steps/advance_pipe_step)
    - optional pipe percent/status updates
    - optional byte transfer bars
    - optional local Live panel when a cmdlet runs standalone
    The class is intentionally defensive: all UI operations are best-effort.
    """
    def __init__(self, pipeline_module: Any):
        self._ctx = pipeline_module
        self._local_ui: Optional[Any] = None
        self._local_attached: bool = False
    def ui_and_pipe_index(self) -> Tuple[Optional[Any], int]:
        ui = None
        try:
            ui = self._ctx.get_live_progress() if hasattr(self._ctx, "get_live_progress") else None
        except Exception:
            ui = None
        pipe_idx: int = 0
        try:
            stage_ctx = self._ctx.get_stage_context() if hasattr(self._ctx, "get_stage_context") else None
            maybe_idx = getattr(stage_ctx, "pipe_index", None) if stage_ctx is not None else None
            if isinstance(maybe_idx, int):
                pipe_idx = int(maybe_idx)
        except Exception:
            pipe_idx = 0
        return ui, pipe_idx
    def begin_steps(self, total_steps: int) -> None:
        ui, pipe_idx = self.ui_and_pipe_index()
        if ui is None:
            return
        try:
            begin = getattr(ui, "begin_pipe_steps", None)
            if callable(begin):
                begin(int(pipe_idx), total_steps=int(total_steps))
        except Exception:
            return
    def step(self, text: str) -> None:
        ui, pipe_idx = self.ui_and_pipe_index()
        if ui is None:
            return
        try:
            adv = getattr(ui, "advance_pipe_step", None)
            if callable(adv):
                adv(int(pipe_idx), str(text))
        except Exception:
            return
    def set_percent(self, percent: int) -> None:
        ui, pipe_idx = self.ui_and_pipe_index()
        if ui is None:
            return
        try:
            set_pct = getattr(ui, "set_pipe_percent", None)
            if callable(set_pct):
                set_pct(int(pipe_idx), int(percent))
        except Exception:
            return
    def set_status(self, text: str) -> None:
        ui, pipe_idx = self.ui_and_pipe_index()
        if ui is None:
            return
        try:
            setter = getattr(ui, "set_pipe_status_text", None)
            if callable(setter):
                setter(int(pipe_idx), str(text))
        except Exception:
            return
    def clear_status(self) -> None:
        ui, pipe_idx = self.ui_and_pipe_index()
        if ui is None:
            return
        try:
            clr = getattr(ui, "clear_pipe_status_text", None)
            if callable(clr):
                clr(int(pipe_idx))
        except Exception:
            return
    def begin_transfer(self, *, label: str, total: Optional[int] = None) -> None:
        ui, _ = self.ui_and_pipe_index()
        if ui is None:
            return
        try:
            fn = getattr(ui, "begin_transfer", None)
            if callable(fn):
                fn(label=str(label or "transfer"), total=total)
        except Exception:
            return
    def update_transfer(self, *, label: str, completed: Optional[int], total: Optional[int] = None) -> None:
        ui, _ = self.ui_and_pipe_index()
        if ui is None:
            return
        try:
            fn = getattr(ui, "update_transfer", None)
            if callable(fn):
                fn(label=str(label or "transfer"), completed=completed, total=total)
        except Exception:
            return
    def finish_transfer(self, *, label: str) -> None:
        ui, _ = self.ui_and_pipe_index()
        if ui is None:
            return
        try:
            fn = getattr(ui, "finish_transfer", None)
            if callable(fn):
                fn(label=str(label or "transfer"))
        except Exception:
            return
    def on_emit(self, emitted: Any) -> None:
        """Advance local pipe progress after pipeline_context.emit().
        The shared PipelineExecutor wires on_emit automatically for pipelines.
        Standalone cmdlet runs do not, so cmdlets call this explicitly.
        """
        if self._local_ui is None:
            return
        try:
            self._local_ui.on_emit(0, emitted)
        except Exception:
            return
    def ensure_local_ui(self, *, label: str, total_items: int, items_preview: Optional[Sequence[Any]] = None) -> bool:
        """Start a local PipelineLiveProgress panel if no shared UI exists."""
        try:
            existing = self._ctx.get_live_progress() if hasattr(self._ctx, "get_live_progress") else None
        except Exception:
            existing = None
        if existing is not None:
            return False
        if not bool(getattr(sys.stderr, "isatty", lambda: False)()):
            return False
        try:
            from models import PipelineLiveProgress
            ui = PipelineLiveProgress([str(label or "pipeline")], enabled=True)
            ui.start()
            try:
                if hasattr(self._ctx, "set_live_progress"):
                    self._ctx.set_live_progress(ui)
                    self._local_attached = True
            except Exception:
                self._local_attached = False
            try:
                ui.begin_pipe(0, total_items=max(1, int(total_items)), items_preview=list(items_preview or []))
            except Exception:
                pass
            self._local_ui = ui
            return True
        except Exception:
            self._local_ui = None
            self._local_attached = False
            return False
    def close_local_ui(self, *, force_complete: bool = True) -> None:
        if self._local_ui is None:
            return
        try:
            try:
                self._local_ui.finish_pipe(0, force_complete=bool(force_complete))
            except Exception:
                pass
            try:
                self._local_ui.stop()
            except Exception:
                pass
        finally:
            self._local_ui = None
            try:
                if self._local_attached and hasattr(self._ctx, "set_live_progress"):
                    self._ctx.set_live_progress(None)
            except Exception:
                pass
            self._local_attached = False
    @contextmanager
    def local_ui_if_needed(
        self,
        *,
        label: str,
        total_items: int,
        items_preview: Optional[Sequence[Any]] = None,
    ) -> Iterator["PipelineProgress"]:
        created = self.ensure_local_ui(label=label, total_items=total_items, items_preview=items_preview)
        try:
            yield self
        finally:
            if created:
                self.close_local_ui(force_complete=True)
--- a/cmdlet/_shared.py
+++ b/cmdlet/_shared.py
@@ -1585,9 +1585,46 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
 		"warnings", "path", "relationships", "is_temp", "action", "parent_hash",
 	}
-	# Convert ResultItem to dict to preserve all attributes
+	# Convert common object-like results into a dict so we can preserve fields like
 	# hash/store/url when they come from result tables (e.g., get-url emits UrlItem).
 	#
 	# Priority:
 	# 1) explicit to_dict()
 	# 2) best-effort attribute extraction for known PipeObject-ish fields
 	if hasattr(value, 'to_dict'):
 		value = value.to_dict()
 	elif not isinstance(value, dict):
 		try:
 			obj_map: Dict[str, Any] = {}
 			for k in (
 				"hash",
 				"store",
 				"provider",
 				"prov",
 				"tag",
 				"title",
 				"url",
 				"source_url",
 				"duration",
 				"duration_seconds",
 				"metadata",
 				"full_metadata",
 				"warnings",
 				"path",
 				"target",
 				"relationships",
 				"is_temp",
 				"action",
 				"parent_hash",
 				"extra",
 				"media_kind",
 			):
 				if hasattr(value, k):
 					obj_map[k] = getattr(value, k)
 			if obj_map:
 				value = obj_map
 		except Exception:
 			pass
 	if isinstance(value, dict):
 		# Extract hash and store (canonical identifiers)
@@ -1695,8 +1732,19 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
 	# Fallback: build from path argument or bare value
 	hash_val = "unknown"
 	path_val = default_path or getattr(value, "path", None)
 	url_val: Optional[str] = None
 	title_val = None
 	# If the raw value is a string, treat it as either a URL or a file path.
 	# This is important for @-selection results that are plain URL strings.
 	if isinstance(value, str):
 		s = value.strip()
 		if s.lower().startswith(("http://", "https://")):
 			url_val = s
 			path_val = None
 		else:
 			path_val = s
 	if path_val and path_val != "unknown":
 		try:
 			from SYS.utils import sha256_file
@@ -1708,8 +1756,9 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
 		except Exception:
 			pass
-	# When coming from path argument, store should be "PATH" (file path, not a backend)
+	# When coming from a raw URL string, mark it explicitly as URL.
-	store_val = "PATH"
+	# Otherwise treat it as a local path.
 	store_val = "URL" if url_val else "PATH"
 	pipe_obj = models.PipeObject(
 		hash=hash_val,
@@ -1717,6 +1766,8 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
 		provider=None,
 		path=str(path_val) if path_val and path_val != "unknown" else None,
 		title=title_val,
 		url=url_val,
 		source_url=url_val,
 		tag=[],
 		extra={},
 	)
--- a/cmdlet/add_file.py
+++ b/cmdlet/add_file.py
@@ -12,6 +12,7 @@ import models
 import pipeline as ctx
 from API import HydrusNetwork as hydrus_wrapper
 from SYS.logger import log, debug
 from SYS.pipeline_progress import PipelineProgress
 from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS
 from Store import Store
 from . import _shared as sh
@@ -73,6 +74,7 @@ class Add_File(Cmdlet):
    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        """Main execution entry point."""
        parsed = parse_cmdlet_args(args, self)
        progress = PipelineProgress(ctx)
        path_arg = parsed.get("path")
        location = parsed.get("store")
@@ -80,6 +82,35 @@ class Add_File(Cmdlet):
        provider_room = parsed.get("room")
        delete_after = parsed.get("delete", False)
        # Convenience: when piping a file into add-file, allow `-path <existing dir>`
        # to act as the destination export directory.
        # Example: screen-shot "https://..." | add-file -path "C:\Users\Admin\Desktop"
        if path_arg and not location and not provider_name:
            try:
                candidate_dir = Path(str(path_arg))
                if candidate_dir.exists() and candidate_dir.is_dir():
                    piped_items = result if isinstance(result, list) else [result]
                    has_local_source = False
                    for it in piped_items:
                        try:
                            po = coerce_to_pipe_object(it, None)
                            src = str(getattr(po, "path", "") or "").strip()
                            if not src:
                                continue
                            if src.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
                                continue
                            if Path(src).is_file():
                                has_local_source = True
                                break
                        except Exception:
                            continue
                    if has_local_source:
                        debug(f"[add-file] Treating -path directory as destination: {candidate_dir}")
                        location = str(candidate_dir)
                        path_arg = None
            except Exception:
                pass
        stage_ctx = ctx.get_stage_context()
        is_last_stage = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
@@ -93,7 +124,7 @@ class Add_File(Cmdlet):
                is_storage_backend_location = False
        # Decide which items to process.
-        # - If user provided -path, treat this invocation as single-item.
+        # - If user provided -path (and it was not reinterpreted as destination), treat this invocation as single-item.
        # - Otherwise, if piped input is a list, ingest each item.
        if path_arg:
            items_to_process: List[Any] = [result]
@@ -102,6 +133,17 @@ class Add_File(Cmdlet):
        else:
            items_to_process = [result]
        # Minimal step-based progress for single-item runs.
        # Many add-file flows don't emit intermediate items, so without steps the pipe can look "stuck".
        use_steps = False
        steps_started = False
        step2_done = False
        try:
            ui, _ = progress.ui_and_pipe_index()
            use_steps = (ui is not None) and (len(items_to_process) == 1)
        except Exception:
            use_steps = False
        debug(f"[add-file] INPUT result type={type(result).__name__}")
        if isinstance(result, list):
            debug(f"[add-file] INPUT result is list with {len(result)} items")
@@ -235,6 +277,14 @@ class Add_File(Cmdlet):
                    failures += 1
                    continue
                is_url_target = isinstance(media_path_or_url, str) and str(media_path_or_url).lower().startswith(
                    ("http://", "https://", "magnet:", "torrent:")
                )
                if use_steps and (not steps_started) and (not is_url_target):
                    progress.begin_steps(3)
                    progress.step("resolving source")
                    steps_started = True
                # Update pipe_obj with resolved path
                pipe_obj.path = str(media_path_or_url)
@@ -300,13 +350,34 @@ class Add_File(Cmdlet):
                            pass
                        temp_dir_to_cleanup = Path(tempfile.mkdtemp(prefix="medios_openlibrary_"))
                        # Wire OpenLibrary download progress into pipeline Live UI (no tqdm spam).
                        def _ol_progress(kind: str, completed: int, total: Optional[int], label: str) -> None:
                            try:
                                if kind == "pages" and total:
                                    progress.set_status(f"downloading pages {completed}/{total}")
                                    progress.set_percent(int(round((completed / max(1, total)) * 100.0)))
                                elif kind == "bytes" and total:
                                    progress.set_status(f"downloading {label} {completed}/{total} bytes")
                                    progress.set_percent(int(round((completed / max(1, total)) * 100.0)))
                                else:
                                    progress.set_status("downloading")
                            except Exception:
                                return
                        try:
                            progress.set_percent(0)
                            progress.set_status("downloading openlibrary")
                        except Exception:
                            pass
                        sr = SearchResult(
                            table="openlibrary",
                            title=str(getattr(pipe_obj, "title", None) or "Unknown"),
                            path=str(media_path_or_url),
                            full_metadata=full_metadata if isinstance(full_metadata, dict) else {},
                        )
-                        downloaded = provider.download(sr, temp_dir_to_cleanup)
+                        downloaded = provider.download(sr, temp_dir_to_cleanup, progress_callback=_ol_progress)
                        if downloaded is None:
                            log("[add-file] OpenLibrary download failed", file=sys.stderr)
                            failures += 1
@@ -325,6 +396,13 @@ class Add_File(Cmdlet):
                        pipe_obj.path = str(downloaded_path)
                        delete_after_item = True
                        try:
                            if ui is not None:
                                ui.set_pipe_percent(int(pipe_idx), 100)
                                ui.set_pipe_status_text(int(pipe_idx), "downloaded")
                        except Exception:
                            pass
                    # For non-provider URLs, or if still a URL after provider attempt, delegate to download-media.
                    if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
                        ("http://", "https://", "magnet:", "torrent:")
@@ -562,6 +640,10 @@ class Add_File(Cmdlet):
                        failures += 1
                        continue
                    if use_steps and steps_started and (not step2_done):
                        progress.step("writing destination")
                        step2_done = True
                    if code == 0:
                        successes += 1
                    else:
@@ -619,6 +701,9 @@ class Add_File(Cmdlet):
            except Exception:
                pass
        if use_steps and steps_started:
            progress.step("finalized")
        if successes > 0:
            return 0
        return 1
--- a/cmdlet/add_url.py
+++ b/cmdlet/add_url.py
@@ -34,6 +34,19 @@ class Add_Url(sh.Cmdlet):
        """Add URL to file via hash+store backend."""
        parsed = sh.parse_cmdlet_args(args, self)
        # Compatibility/piping fix:
        # `SharedArgs.QUERY` is positional in the shared parser, so `add-url <url>`
        # (and `@N | add-url <url>`) can mistakenly parse the URL into `query`.
        # If `url` is missing and `query` looks like an http(s) URL, treat it as `url`.
        try:
            if (not parsed.get("url")) and isinstance(parsed.get("query"), str):
                q = str(parsed.get("query") or "").strip()
                if q.startswith(("http://", "https://")):
                    parsed["url"] = q
                    parsed.pop("query", None)
        except Exception:
            pass
        query_hash = sh.parse_single_hash_query(parsed.get("query"))
        if parsed.get("query") and not query_hash:
            log("Error: -query must be of the form hash:<sha256>")
--- a/cmdlet/delete_url.py
+++ b/cmdlet/delete_url.py
@@ -29,7 +29,7 @@ class Delete_Url(Cmdlet):
            arg=[
                SharedArgs.QUERY,
                SharedArgs.STORE,
-                CmdletArg("url", required=True, description="URL to remove"),
+                CmdletArg("url", required=False, description="URL to remove (optional when piping url rows)"),
            ],
            detail=[
                "- Removes URL association from file identified by hash+store",
@@ -69,22 +69,24 @@ class Delete_Url(Cmdlet):
                log("Error: No store name provided")
                return 1
        if not url_arg:
            log("Error: No URL provided")
            return 1
        # Normalize hash (single-item mode)
        if not results and file_hash:
            file_hash = normalize_hash(file_hash)
            if not file_hash:
                log("Error: Invalid hash format")
                return 1
-        
+
-        # Parse url (comma-separated)
+        from metadata import normalize_urls
-        urls = [u.strip() for u in str(url_arg).split(',') if u.strip()]
+
-        if not urls:
+        def _urls_from_arg(raw: Any) -> List[str]:
-            log("Error: No valid url provided")
+            if raw is None:
-            return 1
+                return []
            # Support comma-separated input for backwards compatibility
            if isinstance(raw, str) and "," in raw:
                return [u.strip() for u in raw.split(",") if u.strip()]
            return [u.strip() for u in normalize_urls(raw) if str(u).strip()]
        urls_from_cli = _urls_from_arg(url_arg)
        # Get backend and delete url
        try:
@@ -145,7 +147,17 @@ class Delete_Url(Cmdlet):
                        )
                        continue
-                    batch.setdefault(store_text, []).append((normalized, list(urls)))
+                    # Determine which URLs to delete.
                    # - If user passed an explicit <url>, apply it to all items.
                    # - Otherwise, when piping url rows from get-url, delete the url(s) from each item.
                    item_urls = list(urls_from_cli)
                    if not item_urls:
                        item_urls = [u.strip() for u in normalize_urls(get_field(item, "url") or get_field(item, "source_url")) if str(u).strip()]
                    if not item_urls:
                        ctx.print_if_visible("[delete-url] Warning: Item has no url field; skipping", file=sys.stderr)
                        continue
                    batch.setdefault(store_text, []).append((normalized, item_urls))
                for store_text, pairs in batch.items():
                    try:
@@ -168,24 +180,39 @@ class Delete_Url(Cmdlet):
                        for h, ulist in bulk_pairs:
                            backend.delete_url(h, ulist, config=config)
                    deleted_count = 0
                    for _h, ulist in bulk_pairs:
                        deleted_count += len(ulist or [])
                    ctx.print_if_visible(
-                        f"✓ delete-url: {len(urls)} url(s) for {len(bulk_pairs)} item(s) in '{store_text}'",
+                        f"✓ delete-url: {deleted_count} url(s) for {len(bulk_pairs)} item(s) in '{store_text}'",
                        file=sys.stderr,
                    )
                for item in pass_through:
                    existing = get_field(item, "url")
-                    _set_item_url(item, _remove_urls(existing, list(urls)))
+                    # In batch mode we removed the union of requested urls for the file.
                    # Using urls_from_cli (if present) matches the user's explicit intent; otherwise
                    # remove the piped url row(s).
                    remove_set = urls_from_cli
                    if not remove_set:
                        remove_set = [u.strip() for u in normalize_urls(get_field(item, "url") or get_field(item, "source_url")) if str(u).strip()]
                    _set_item_url(item, _remove_urls(existing, list(remove_set)))
                    ctx.emit(item)
                return 0
            # Single-item mode
            if not urls_from_cli:
                urls_from_cli = [u.strip() for u in normalize_urls(get_field(result, "url") or get_field(result, "source_url")) if str(u).strip()]
            if not urls_from_cli:
                log("Error: No URL provided")
                return 1
            backend = storage[str(store_name)]
-            backend.delete_url(str(file_hash), urls, config=config)
+            backend.delete_url(str(file_hash), list(urls_from_cli), config=config)
-            ctx.print_if_visible(f"✓ delete-url: {len(urls)} url(s) removed", file=sys.stderr)
+            ctx.print_if_visible(f"✓ delete-url: {len(urls_from_cli)} url(s) removed", file=sys.stderr)
            if result is not None:
                existing = get_field(result, "url")
-                _set_item_url(result, _remove_urls(existing, list(urls)))
+                _set_item_url(result, _remove_urls(existing, list(urls_from_cli)))
                ctx.emit(result)
            return 0
--- a/cmdlet/download_file.py
+++ b/cmdlet/download_file.py
--- a/cmdlet/download_media.py
+++ b/cmdlet/download_media.py
--- a/cmdlet/get_file.py
+++ b/cmdlet/get_file.py
@@ -126,7 +126,7 @@ class Get_File(sh.Cmdlet):
            except Exception as exc:
                log(f"Error opening browser: {exc}", file=sys.stderr)
            else:
-                log(f"Opened in browser: {source_path}", file=sys.stderr)
+                debug(f"Opened in browser: {source_path}", file=sys.stderr)
            # Emit result for pipeline
            ctx.emit({
--- a/cmdlet/get_tag.py
+++ b/cmdlet/get_tag.py
@@ -47,6 +47,210 @@ except ImportError:
 	extract_title = None
 def _dedup_tags_preserve_order(tags: List[str]) -> List[str]:
 	"""Deduplicate tags case-insensitively while preserving order."""
 	out: List[str] = []
 	seen: set[str] = set()
 	for t in tags or []:
 		if not isinstance(t, str):
 			continue
 		s = t.strip()
 		if not s:
 			continue
 		key = s.lower()
 		if key in seen:
 			continue
 		seen.add(key)
 		out.append(s)
 	return out
 def _extract_subtitle_tags(info: Dict[str, Any]) -> List[str]:
 	"""Extract subtitle availability tags from a yt-dlp info dict.
 	Produces multi-valued tags so languages can coexist:
 	- subs:<lang>
 	- subs_auto:<lang>
 	"""
 	def _langs(value: Any) -> List[str]:
 		if not isinstance(value, dict):
 			return []
 		langs: List[str] = []
 		for k in value.keys():
 			if not isinstance(k, str):
 				continue
 			lang = k.strip().lower()
 			if lang:
 				langs.append(lang)
 		return sorted(set(langs))
 	out: List[str] = []
 	for lang in _langs(info.get("subtitles")):
 		out.append(f"subs:{lang}")
 	for lang in _langs(info.get("automatic_captions")):
 		out.append(f"subs_auto:{lang}")
 	return out
 def _scrape_ytdlp_info(url: str) -> Optional[Dict[str, Any]]:
 	"""Fetch a yt-dlp info dict without downloading media."""
 	if not isinstance(url, str) or not url.strip():
 		return None
 	url = url.strip()
 	# Prefer the Python module when available (faster, avoids shell quoting issues).
 	try:
 		import yt_dlp  # type: ignore
 		opts: Any = {
 			"quiet": True,
 			"no_warnings": True,
 			"skip_download": True,
 			"noprogress": True,
 			"socket_timeout": 15,
 			"retries": 1,
 			"playlist_items": "1-10",
 		}
 		with yt_dlp.YoutubeDL(opts) as ydl:
 			info = ydl.extract_info(url, download=False)
 			return info if isinstance(info, dict) else None
 	except Exception:
 		pass
 	# Fallback to yt-dlp CLI if the module isn't available.
 	try:
 		import json as json_module
 		cmd = [
 			"yt-dlp",
 			"-J",
 			"--no-warnings",
 			"--skip-download",
 			"--playlist-items",
 			"1-10",
 			url,
 		]
 		result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
 		if result.returncode != 0:
 			return None
 		payload = (result.stdout or "").strip()
 		if not payload:
 			return None
 		data = json_module.loads(payload)
 		return data if isinstance(data, dict) else None
 	except Exception:
 		return None
 def _resolve_candidate_urls_for_item(
 	result: Any,
 	backend: Any,
 	file_hash: str,
 	config: Dict[str, Any],
 ) -> List[str]:
 	"""Get candidate URLs from backend and/or piped result."""
 	try:
 		from metadata import normalize_urls
 	except Exception:
 		normalize_urls = None  # type: ignore[assignment]
 	urls: List[str] = []
 	# 1) Backend URL association (best source of truth)
 	try:
 		backend_urls = backend.get_url(file_hash, config=config)
 		if backend_urls:
 			if normalize_urls:
 				urls.extend(normalize_urls(backend_urls))
 			else:
 				urls.extend([str(u).strip() for u in backend_urls if isinstance(u, str) and str(u).strip()])
 	except Exception:
 		pass
 	# 2) Backend metadata url field
 	try:
 		meta = backend.get_metadata(file_hash, config=config)
 		if isinstance(meta, dict) and meta.get("url"):
 			if normalize_urls:
 				urls.extend(normalize_urls(meta.get("url")))
 			else:
 				raw = meta.get("url")
 				if isinstance(raw, list):
 					urls.extend([str(u).strip() for u in raw if isinstance(u, str) and str(u).strip()])
 				elif isinstance(raw, str) and raw.strip():
 					urls.append(raw.strip())
 	except Exception:
 		pass
 	# 3) Piped result fields
 	def _get(obj: Any, key: str, default: Any = None) -> Any:
 		if isinstance(obj, dict):
 			return obj.get(key, default)
 		return getattr(obj, key, default)
 	for key in ("url", "webpage_url", "source_url", "target"):
 		val = _get(result, key, None)
 		if not val:
 			continue
 		if normalize_urls:
 			urls.extend(normalize_urls(val))
 			continue
 		if isinstance(val, str) and val.strip():
 			urls.append(val.strip())
 		elif isinstance(val, list):
 			urls.extend([str(u).strip() for u in val if isinstance(u, str) and str(u).strip()])
 	meta_field = _get(result, "metadata", None)
 	if isinstance(meta_field, dict) and meta_field.get("url"):
 		val = meta_field.get("url")
 		if normalize_urls:
 			urls.extend(normalize_urls(val))
 		elif isinstance(val, list):
 			urls.extend([str(u).strip() for u in val if isinstance(u, str) and str(u).strip()])
 		elif isinstance(val, str) and val.strip():
 			urls.append(val.strip())
 	# Dedup
 	return _dedup_tags_preserve_order(urls)
 def _pick_supported_ytdlp_url(urls: List[str]) -> Optional[str]:
 	"""Pick the first URL that looks supported by yt-dlp (best effort)."""
 	if not urls:
 		return None
 	def _is_hydrus_file_url(u: str) -> bool:
 		text = str(u or "").strip().lower()
 		if not text:
 			return False
 		# Hydrus-local file URLs are retrievable blobs, not original source pages.
 		# yt-dlp generally can't extract meaningful metadata from these.
 		return ("/get_files/file" in text) and ("hash=" in text)
 	http_urls: List[str] = []
 	for u in urls:
 		text = str(u or "").strip()
 		if text.lower().startswith(("http://", "https://")):
 			http_urls.append(text)
 	# Prefer non-Hydrus URLs for yt-dlp scraping.
 	candidates = [u for u in http_urls if not _is_hydrus_file_url(u)]
 	if not candidates:
 		return None
 	# Prefer a true support check when the Python module is available.
 	try:
 		from SYS.download import is_url_supported_by_ytdlp
 		for text in candidates:
 			try:
 				if is_url_supported_by_ytdlp(text):
 					return text
 			except Exception:
 				continue
 	except Exception:
 		pass
 	# Fallback: use the first non-Hydrus http(s) URL and let extraction decide.
 	return candidates[0] if candidates else None
 _scrape_isbn_metadata = _ol_scrape_isbn_metadata  # type: ignore[assignment]
 _scrape_openlibrary_metadata = _ol_scrape_openlibrary_metadata  # type: ignore[assignment]
@@ -853,7 +1057,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 	scrape_url = parsed_args.get("scrape")
 	scrape_requested = scrape_flag_present or scrape_url is not None
-	if scrape_requested and (not scrape_url or str(scrape_url).strip() == ""):
+	# Convenience: `-scrape` with no value defaults to `ytdlp` (store-backed URL scrape).
 	if scrape_flag_present and (scrape_url is None or str(scrape_url).strip() == ""):
 		scrape_url = "ytdlp"
 		scrape_requested = True
 	if scrape_requested and (scrape_url is None or str(scrape_url).strip() == ""):
 		log("-scrape requires a URL or provider name", file=sys.stderr)
 		return 1
@@ -861,6 +1070,123 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 	if scrape_requested and scrape_url:
 		import json as json_module
 		if str(scrape_url).strip().lower() == "ytdlp":
 			# Scrape metadata from the selected item's URL via yt-dlp (no download),
 			# then OVERWRITE all existing tags (including title:).
 			#
 			# This mode requires a store-backed item (hash + store).
 			#
 			# NOTE: We intentionally do not reuse _scrape_url_metadata() here because it
 			# performs namespace deduplication that would collapse multi-valued tags.
 			file_hash = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash", None))
 			store_name = get_field(result, "store", None)
 			subject_path = get_field(result, "path", None) or get_field(result, "target", None) or get_field(result, "filename", None)
 			item_title = get_field(result, "title", None) or get_field(result, "name", None) or get_field(result, "filename", None)
 			# Only run overwrite-apply when the item is store-backed.
 			# If this is a URL-only PipeObject, fall through to provider mode below.
 			if file_hash and store_name and str(file_hash).strip().lower() != "unknown" and str(store_name).strip().upper() not in {"PATH", "URL"}:
 				try:
 					from Store import Store
 					storage = Store(config)
 					backend = storage[str(store_name)]
 				except Exception as exc:
 					log(f"Failed to resolve store backend '{store_name}': {exc}", file=sys.stderr)
 					return 1
 				candidate_urls = _resolve_candidate_urls_for_item(result, backend, file_hash, config)
 				scrape_target = _pick_supported_ytdlp_url(candidate_urls)
 				if not scrape_target:
 					log(
 						"No yt-dlp-supported source URL found for this item (Hydrus /get_files/file URLs are ignored). ",
 						file=sys.stderr,
 					)
 					log(
 						"Add the original page URL to the file (e.g. via add-url), then retry get-tag -scrape.",
 						file=sys.stderr,
 					)
 					return 1
 				info = _scrape_ytdlp_info(scrape_target)
 				if not info:
 					log("yt-dlp could not extract metadata for this URL (unsupported or failed)", file=sys.stderr)
 					return 1
 				try:
 					from metadata import extract_ytdlp_tags
 				except Exception:
 					extract_ytdlp_tags = None  # type: ignore[assignment]
 				# Prefer the top-level metadata, but if this is a playlist container, use
 				# the first entry for per-item fields like subtitles.
 				info_for_subs = info
 				entries = info.get("entries") if isinstance(info, dict) else None
 				if isinstance(entries, list) and entries:
 					first = entries[0]
 					if isinstance(first, dict):
 						info_for_subs = first
 				tags: List[str] = []
 				if extract_ytdlp_tags:
 					try:
 						tags.extend(extract_ytdlp_tags(info))
 					except Exception:
 						pass
 				# Subtitle availability tags
 				try:
 					tags.extend(_extract_subtitle_tags(info_for_subs if isinstance(info_for_subs, dict) else {}))
 				except Exception:
 					pass
 				# Ensure we actually have something to apply.
 				tags = _dedup_tags_preserve_order(tags)
 				if not tags:
 					log("No tags extracted from yt-dlp metadata", file=sys.stderr)
 					return 1
 				# Full overwrite: delete all existing tags, then add the new set.
 				try:
 					existing_tags, _src = backend.get_tag(file_hash, config=config)
 				except Exception:
 					existing_tags = []
 				try:
 					if existing_tags:
 						backend.delete_tag(file_hash, list(existing_tags), config=config)
 				except Exception as exc:
 					debug(f"[get_tag] ytdlp overwrite: delete_tag failed: {exc}")
 				try:
 					backend.add_tag(file_hash, list(tags), config=config)
 				except Exception as exc:
 					log(f"Failed to apply yt-dlp tags: {exc}", file=sys.stderr)
 					return 1
 				# Show updated tags
 				try:
 					updated_tags, _src = backend.get_tag(file_hash, config=config)
 				except Exception:
 					updated_tags = tags
 				if not updated_tags:
 					updated_tags = tags
 				_emit_tags_as_table(
 					tags_list=list(updated_tags),
 					file_hash=file_hash,
 					store=str(store_name),
 					service_name=None,
 					config=config,
 					item_title=str(item_title or "ytdlp"),
 					path=str(subject_path) if subject_path else None,
 					subject={
 						"hash": file_hash,
 						"store": str(store_name),
 						"path": str(subject_path) if subject_path else None,
 						"title": item_title,
 						"extra": {"applied_provider": "ytdlp", "scrape_url": scrape_target},
 					},
 				)
 				return 0
 		if scrape_url.startswith("http://") or scrape_url.startswith("https://"):
 			# URL scraping (existing behavior)
 			title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url)
@@ -951,7 +1277,16 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 			else:
 				combined_query = f"{title_hint} {artist_hint}"
-		query_hint = identifier_query or combined_query or title_hint
+		# yt-dlp isn't a search provider; it requires a URL.
 		url_hint: Optional[str] = None
 		if provider.name == "ytdlp":
 			raw_url = get_field(result, "url", None) or get_field(result, "source_url", None) or get_field(result, "target", None)
 			if isinstance(raw_url, list) and raw_url:
 				raw_url = raw_url[0]
 			if isinstance(raw_url, str) and raw_url.strip().startswith(("http://", "https://")):
 				url_hint = raw_url.strip()
 		query_hint = url_hint or identifier_query or combined_query or title_hint
 		if not query_hint:
 			log("No title or identifier available to search for metadata", file=sys.stderr)
 			return 1
@@ -967,6 +1302,27 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 		if not items:
 			log("No metadata results found", file=sys.stderr)
 			return 1
 		# For yt-dlp, emit tags directly (there is no meaningful multi-result selection step).
 		if provider.name == "ytdlp":
 			try:
 				tags = [str(t) for t in provider.to_tags(items[0]) if t is not None]
 			except Exception:
 				tags = []
 			if not tags:
 				log("No tags extracted from yt-dlp metadata", file=sys.stderr)
 				return 1
 			_emit_tags_as_table(
 				tags_list=list(tags),
 				file_hash=None,
 				store="url",
 				service_name=None,
 				config=config,
 				item_title=str(items[0].get("title") or "ytdlp"),
 				path=None,
 				subject={"provider": "ytdlp", "url": str(query_hint)},
 			)
 			return 0
 		from result_table import ResultTable
 		table = ResultTable(f"Metadata: {provider.name}")
@@ -1040,7 +1396,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 			return 0
 		# Apply tags to the store backend (no sidecar writing here).
-		apply_tags = _filter_scraped_tags([str(t) for t in result_tags if t is not None])
+		if str(result_provider).strip().lower() == "ytdlp":
 			apply_tags = [str(t) for t in result_tags if t is not None]
 		else:
 			apply_tags = _filter_scraped_tags([str(t) for t in result_tags if t is not None])
 		if not apply_tags:
 			log("No applicable scraped tags to apply (title:/artist:/source: are skipped)", file=sys.stderr)
 			return 0
@@ -1167,6 +1526,11 @@ try:
 except Exception:
 	_SCRAPE_CHOICES = ["itunes", "openlibrary", "googlebooks", "google", "musicbrainz"]
 # Special scrape mode: pull tags from an item's URL via yt-dlp (no download)
 if "ytdlp" not in _SCRAPE_CHOICES:
 	_SCRAPE_CHOICES.append("ytdlp")
 	_SCRAPE_CHOICES = sorted(_SCRAPE_CHOICES)
 class Get_Tag(Cmdlet):
 	"""Class-based get-tag cmdlet with self-registration."""
@@ -1195,7 +1559,7 @@ class Get_Tag(Cmdlet):
 				CmdletArg(
 					name="-scrape",
 					type="string",
-					description="Scrape metadata from URL or provider name (returns tags as JSON or table)",
+					description="Scrape metadata from URL/provider, or use 'ytdlp' to scrape from the item's URL and overwrite tags",
 					required=False,
 					choices=_SCRAPE_CHOICES,
 				)
--- a/cmdlet/screen_shot.py
+++ b/cmdlet/screen_shot.py
@@ -14,10 +14,11 @@ import httpx
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Sequence, Tuple
-from urllib.parse import urlsplit, quote, urljoin
+from urllib.parse import urlsplit, quote, urljoin, unquote
 from SYS.logger import log, debug
 from API.HTTP import HTTPClient
 from SYS.pipeline_progress import PipelineProgress
 from SYS.utils import ensure_directory, unique_path, unique_preserve_order
 from . import _shared as sh
@@ -31,54 +32,6 @@ get_field = sh.get_field
 parse_cmdlet_args = sh.parse_cmdlet_args
 import pipeline as pipeline_context
 def _live_ui_and_pipe_index() -> tuple[Optional[Any], int]:
    ui = None
    try:
        ui = pipeline_context.get_live_progress() if hasattr(pipeline_context, "get_live_progress") else None
    except Exception:
        ui = None
    pipe_idx: int = 0
    try:
        stage_ctx = pipeline_context.get_stage_context() if hasattr(pipeline_context, "get_stage_context") else None
        maybe_idx = getattr(stage_ctx, "pipe_index", None) if stage_ctx is not None else None
        if isinstance(maybe_idx, int):
            pipe_idx = int(maybe_idx)
    except Exception:
        pipe_idx = 0
    return ui, pipe_idx
 def _begin_live_steps(total_steps: int) -> None:
    """Declare the total number of steps for this cmdlet run (per-pipe)."""
    ui, pipe_idx = _live_ui_and_pipe_index()
    if ui is None:
        return
    try:
        begin = getattr(ui, "begin_pipe_steps", None)
        if callable(begin):
            begin(int(pipe_idx), total_steps=int(total_steps))
    except Exception:
        return
 def _step(text: str) -> None:
    """Emit a *new* step.
    Each call increments the step counter and advances percent automatically.
    """
    ui, pipe_idx = _live_ui_and_pipe_index()
    if ui is None:
        return
    try:
        adv = getattr(ui, "advance_pipe_step", None)
        if callable(adv):
            adv(int(pipe_idx), str(text))
    except Exception:
        return
 # ============================================================================
 # CMDLET Metadata Declaration
 # ============================================================================
@@ -115,6 +68,10 @@ USER_AGENT = (
 DEFAULT_VIEWPORT: dict[str, int] = {"width": 1920, "height": 1080}
 ARCHIVE_TIMEOUT = 30.0
 # WebP has a hard maximum dimension per side.
 # Pillow typically fails with: "encoding error 5: Image size exceeds WebP limit of 16383 pixels"
 WEBP_MAX_DIM = 16_383
 # Configurable selectors for specific websites
 SITE_SELECTORS: Dict[str, List[str]] = {
    "twitter.com": [
@@ -200,6 +157,80 @@ def _slugify_url(url: str) -> str:
    return slug[:100]
 def _tags_from_url(url: str) -> List[str]:
    """Derive simple tags from a URL.
    - site:<domain> (strips leading www.)
    - title:<slug> derived from the last path segment, with extension removed
      and separators (-, _, %) normalized to spaces.
    """
    u = str(url or "").strip()
    if not u:
        return []
    parsed = None
    try:
        parsed = urlsplit(u)
        host = str(getattr(parsed, "hostname", None) or getattr(parsed, "netloc", "") or "").strip().lower()
    except Exception:
        parsed = None
        host = ""
    if host:
        # Drop credentials and port if present.
        if "@" in host:
            host = host.rsplit("@", 1)[-1]
        if ":" in host:
            host = host.split(":", 1)[0]
        if host.startswith("www."):
            host = host[len("www.") :]
    path = ""
    if parsed is not None:
        try:
            path = str(getattr(parsed, "path", "") or "")
        except Exception:
            path = ""
    last = ""
    if path:
        try:
            last = path.rsplit("/", 1)[-1]
        except Exception:
            last = ""
    try:
        last = unquote(last or "")
    except Exception:
        last = last or ""
    if last and "." in last:
        # Drop a single trailing extension (e.g. .html, .php).
        last = last.rsplit(".", 1)[0]
    for sep in ("_", "-", "%"):
        if last and sep in last:
            last = last.replace(sep, " ")
    title = " ".join(str(last or "").split()).strip().lower()
    tags: List[str] = []
    if host:
        tags.append(f"site:{host}")
    if title:
        tags.append(f"title:{title}")
    return tags
 def _title_from_url(url: str) -> str:
    """Return the normalized title derived from a URL's last path segment."""
    for t in _tags_from_url(url):
        if str(t).lower().startswith("title:"):
            return str(t)[len("title:") :].strip()
    return ""
 def _normalise_format(fmt: Optional[str]) -> str:
    """Normalize output format to valid values."""
    if not fmt:
@@ -218,6 +249,89 @@ def _format_suffix(fmt: str) -> str:
        return ".jpg"
    return f".{fmt}"
 def _convert_to_webp(
    src_png: Path,
    dst_webp: Path,
    *,
    quality: int = 90,
    method: int = 6,
    max_dim: int = WEBP_MAX_DIM,
    downscale_if_oversize: bool = True,
 ) -> bool:
    """Convert a PNG screenshot to WebP via Pillow.
    Playwright does not currently support emitting WebP directly.
    """
    if not src_png or not Path(src_png).is_file():
        raise ScreenshotError(f"Source image not found: {src_png}")
    dst_webp = Path(dst_webp)
    try:
        dst_webp.parent.mkdir(parents=True, exist_ok=True)
    except Exception:
        pass
    try:
        from PIL import Image
    except Exception as exc:
        raise ScreenshotError(f"Pillow is required for webp conversion: {exc}") from exc
    # Write atomically to avoid partial files if conversion is interrupted.
    tmp_path = unique_path(dst_webp.with_suffix(".tmp.webp"))
    try:
        with Image.open(src_png) as im:
            did_downscale = False
            save_kwargs: Dict[str, Any] = {
                "format": "WEBP",
                "quality": int(quality),
                "method": int(method),
            }
            # Preserve alpha when present; Pillow handles it for WEBP.
            # Normalize palette images to RGBA to avoid odd palette artifacts.
            if im.mode == "P":
                im = im.convert("RGBA")
            # WebP enforces a hard max dimension per side (16383px).
            # When full-page captures are very tall, downscale proportionally to fit.
            try:
                w, h = im.size
            except Exception:
                w, h = 0, 0
            if downscale_if_oversize and isinstance(max_dim, int) and max_dim > 0 and (w > max_dim or h > max_dim):
                scale = 1.0
                try:
                    scale = min(float(max_dim) / float(w), float(max_dim) / float(h))
                except Exception:
                    scale = 1.0
                if scale > 0.0 and scale < 1.0:
                    new_w = max(1, int(w * scale))
                    new_h = max(1, int(h * scale))
                    debug(
                        f"[_convert_to_webp] Image exceeds WebP limit ({w}x{h}); downscaling -> {new_w}x{new_h}"
                    )
                    try:
                        resample = getattr(getattr(Image, "Resampling", Image), "LANCZOS", None)
                        if resample is None:
                            resample = getattr(Image, "LANCZOS", 1)
                        im = im.resize((new_w, new_h), resample=resample)
                        did_downscale = True
                    except Exception as exc:
                        debug(f"[_convert_to_webp] Downscale failed; attempting direct WEBP save anyway: {exc}")
            im.save(tmp_path, **save_kwargs)
        tmp_path.replace(dst_webp)
        return bool(did_downscale)
    finally:
        try:
            tmp_path.unlink(missing_ok=True)
        except Exception:
            pass
 def _matched_site_selectors(url: str) -> List[str]:
    """Return SITE_SELECTORS for a matched domain; empty if no match.
@@ -231,6 +345,16 @@ def _matched_site_selectors(url: str) -> List[str]:
    return sels
 def _selectors_for_url(url: str) -> List[str]:
    """Return selectors to try for a URL.
    For now, prefer a minimal behavior: only return known SITE_SELECTORS.
    (The cmdlet already falls back to full-page capture when no selectors match.)
    """
    return _matched_site_selectors(url)
 def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000) -> None:
    """Best-effort page tweaks for popular platforms before capture."""
    try:
@@ -366,11 +490,11 @@ def _prepare_output_path(options: ScreenshotOptions) -> Path:
    return unique_path(path)
-def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str]) -> None:
+def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str], progress: PipelineProgress) -> None:
    """Capture screenshot using Playwright."""
    debug(f"[_capture] Starting capture for {options.url} -> {destination}")
    try:
-        _step("loading launching browser")
+        progress.step("loading launching browser")
        tool = options.playwright_tool or PlaywrightTool({})
        # Ensure Chromium engine is used for the screen-shot cmdlet (force for consistency)
@@ -405,16 +529,16 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
        try:
            with tool.open_page(headless=headless) as page:
-                _step("loading navigating")
+                progress.step("loading navigating")
                debug(f"Navigating to {options.url}...")
                try:
                    tool.goto(page, options.url)
                    debug("Page loaded successfully")
-                    _step("loading page loaded")
+                    progress.step("loading page loaded")
                except PlaywrightTimeoutError:
                    warnings.append("navigation timeout; capturing current page state")
                    debug("Navigation timeout; proceeding with current state")
-                    _step("loading navigation timeout")
+                    progress.step("loading navigation timeout")
                # Skip article lookup by default (wait_for_article defaults to False)
                if options.wait_for_article:
@@ -430,9 +554,9 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
                    debug(f"Waiting {options.wait_after_load}s for page stabilization...")
                    time.sleep(min(10.0, max(0.0, options.wait_after_load)))
-                _step("loading stabilized")
+                progress.step("loading stabilized")
-                _step("capturing preparing")
+                progress.step("capturing preparing")
                if options.replace_video_posters:
                    debug("Replacing video elements with posters...")
                    page.evaluate(
@@ -453,7 +577,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
                if options.prefer_platform_target and format_name != "pdf":
                    debug(f"[_capture] Target capture enabled")
                    debug("Attempting platform-specific content capture...")
-                    _step("capturing locating target")
+                    progress.step("capturing locating target")
                    try:
                        _platform_preprocess(options.url, page, warnings)
                    except Exception as e:
@@ -478,7 +602,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
                                    el.scroll_into_view_if_needed(timeout=1000)
                                except Exception:
                                    pass
-                                _step("capturing output")
+                                progress.step("capturing output")
                                debug(f"Capturing element to {destination}...")
                                el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
                                element_captured = True
@@ -489,14 +613,14 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
                            debug(f"Failed to capture element: {exc}")
                # Fallback to default capture paths
                if element_captured:
-                    _step("capturing saved")
+                    progress.step("capturing saved")
                elif format_name == "pdf":
                    debug("Generating PDF...")
                    page.emulate_media(media="print")
-                    _step("capturing output")
+                    progress.step("capturing output")
                    page.pdf(path=str(destination), print_background=True)
                    debug(f"PDF saved to {destination}")
-                    _step("capturing saved")
+                    progress.step("capturing saved")
                else:
                    debug(f"Capturing full page to {destination}...")
                    screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
@@ -504,20 +628,20 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
                        screenshot_kwargs["type"] = "jpeg"
                        screenshot_kwargs["quality"] = 90
                    if options.full_page:
-                        _step("capturing output")
+                        progress.step("capturing output")
                        page.screenshot(full_page=True, **screenshot_kwargs)
                    else:
                        article = page.query_selector("article")
                        if article is not None:
                            article_kwargs = dict(screenshot_kwargs)
                            article_kwargs.pop("full_page", None)
-                            _step("capturing output")
+                            progress.step("capturing output")
                            article.screenshot(**article_kwargs)
                        else:
-                            _step("capturing output")
+                            progress.step("capturing output")
                            page.screenshot(**screenshot_kwargs)
                    debug(f"Screenshot saved to {destination}")
-                    _step("capturing saved")
+                    progress.step("capturing saved")
        except Exception as exc:
            debug(f"[_capture] Exception launching browser/page: {exc}")
            msg = str(exc).lower()
@@ -532,7 +656,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
        raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc
-def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
+def _capture_screenshot(options: ScreenshotOptions, progress: PipelineProgress) -> ScreenshotResult:
    """Capture a screenshot for the given options."""
    debug(f"[_capture_screenshot] Preparing capture for {options.url}")
    requested_format = _normalise_format(options.output_format)
@@ -543,8 +667,8 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
    will_convert = requested_format == "webp"
    will_archive = bool(options.archive and options.url)
    total_steps = 9 + (1 if will_target else 0) + (1 if will_convert else 0) + (1 if will_archive else 0)
-    _begin_live_steps(total_steps)
+    progress.begin_steps(total_steps)
-    _step("loading starting")
+    progress.step("loading starting")
    # Playwright screenshots do not natively support WebP output.
    # Capture as PNG, then convert via Pillow.
@@ -553,17 +677,22 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
        capture_path = unique_path(destination.with_suffix(".png"))
        debug(f"[_capture_screenshot] Requested webp; capturing intermediate png -> {capture_path}")
        options.output_format = "png"
-    _capture(options, capture_path, warnings)
+    _capture(options, capture_path, warnings, progress)
    if requested_format == "webp":
-        _step("capturing converting to webp")
+        progress.step("capturing converting to webp")
        debug(f"[_capture_screenshot] Converting png -> webp: {destination}")
        try:
-            _convert_to_webp(capture_path, destination)
+            did_downscale = _convert_to_webp(capture_path, destination)
-            try:
+            if did_downscale:
-                capture_path.unlink(missing_ok=True)
+                warnings.append(
-            except Exception:
+                    f"webp conversion used downscaling to fit {WEBP_MAX_DIM}px limit; keeping original png: {capture_path.name}"
-                pass
+                )
            else:
                try:
                    capture_path.unlink(missing_ok=True)
                except Exception:
                    pass
        except Exception as exc:
            warnings.append(f"webp conversion failed; keeping png: {exc}")
            destination = capture_path
@@ -572,7 +701,7 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
    url: List[str] = [options.url] if options.url else []
    archive_url: List[str] = []
    if options.archive and options.url:
-        _step("capturing archiving")
+        progress.step("capturing archiving")
        debug(f"[_capture_screenshot] Archiving enabled for {options.url}")
        archives, archive_warnings = _archive_url(options.url, options.archive_timeout)
        archive_url.extend(archives)
@@ -580,7 +709,7 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
        if archives:
            url = unique_preserve_order([*url, *archives])
-    _step("capturing finalized")
+    progress.step("capturing finalized")
    applied_tag = unique_preserve_order(list(tag for tag in options.tag if tag.strip()))
@@ -627,6 +756,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        )
        return 1
    progress = PipelineProgress(pipeline_context)
    # ========================================================================
    # ARGUMENT PARSING
    # ========================================================================
@@ -685,32 +816,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    debug(f"[_run] url to process: {[u for u, _ in url_to_process]}")
    # If the caller isn't running the shared pipeline Live progress UI (e.g. direct
    # cmdlet execution), start a minimal local pipeline progress panel so this cmdlet
    # still shows step-level progress.
    local_progress_ui = None
    try:
        existing_ui = pipeline_context.get_live_progress() if hasattr(pipeline_context, "get_live_progress") else None
    except Exception:
        existing_ui = None
    try:
        if existing_ui is None and bool(getattr(sys.stderr, "isatty", lambda: False)()):
            from models import PipelineLiveProgress
            local_progress_ui = PipelineLiveProgress(["screen-shot"], enabled=True)
            local_progress_ui.start()
            try:
                if hasattr(pipeline_context, "set_live_progress"):
                    pipeline_context.set_live_progress(local_progress_ui)
            except Exception:
                pass
            try:
                local_progress_ui.begin_pipe(0, total_items=len(url_to_process), items_preview=[u for u, _ in url_to_process])
            except Exception:
                pass
    except Exception:
        local_progress_ui = None
    # ========================================================================
    # OUTPUT DIRECTORY RESOLUTION - Priority chain
    # ========================================================================
@@ -749,6 +854,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    ensure_directory(screenshot_dir)
    # If the caller isn't running the shared pipeline Live progress UI (e.g. direct
    # cmdlet execution), start a minimal local pipeline progress panel so this cmdlet
    # still shows step-level progress.
    try:
        progress.ensure_local_ui(
            label="screen-shot",
            total_items=len(url_to_process),
            items_preview=[u for u, _ in url_to_process],
        )
    except Exception:
        pass
    # ========================================================================
    # PREPARE SCREENSHOT OPTIONS
    # ========================================================================
@@ -850,7 +967,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
                options.target_selectors = auto_selectors
                debug(f"[screen_shot] Auto selectors matched for url: {auto_selectors}")
-            screenshot_result = _capture_screenshot(options)
+            screenshot_result = _capture_screenshot(options, progress)
            # Log results and warnings
            debug(f"Screenshot captured to {screenshot_result.path}")
@@ -875,15 +992,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
                capture_date = datetime.now().date().isoformat()
            upstream_title = _clean_title(_extract_item_title(origin_item))
-            display_title = upstream_title or url
+            url_title = _title_from_url(url)
            display_title = upstream_title or url_title or url
            upstream_tags = _extract_item_tags(origin_item)
            filtered_upstream_tags = [
                t for t in upstream_tags
                if not str(t).strip().lower().startswith(("type:", "date:"))
            ]
            url_tags = _tags_from_url(url)
            merged_tags = unique_preserve_order(
-                ["type:screenshot", f"date:{capture_date}"] + filtered_upstream_tags
+                ["type:screenshot", f"date:{capture_date}"] + filtered_upstream_tags + url_tags
            )
            pipe_obj = create_pipe_object_result(
@@ -910,11 +1030,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
            all_emitted.append(pipe_obj)
            # If we created a local progress UI, advance it per completed item.
-            if local_progress_ui is not None:
+            progress.on_emit(pipe_obj)
                try:
                    local_progress_ui.on_emit(0, pipe_obj)
                except Exception:
                    pass
        except ScreenshotError as exc:
            log(f"Error taking screenshot of {url}: {exc}", file=sys.stderr)
@@ -925,23 +1041,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
            traceback.print_exc(file=sys.stderr)
            exit_code = 1
-    try:
+    progress.close_local_ui(force_complete=True)
        if local_progress_ui is not None:
            try:
                local_progress_ui.finish_pipe(0, force_complete=True)
            except Exception:
                pass
    finally:
        if local_progress_ui is not None:
            try:
                local_progress_ui.stop()
            except Exception:
                pass
            try:
                if hasattr(pipeline_context, "set_live_progress"):
                    pipeline_context.set_live_progress(None)
            except Exception:
                pass
    if not all_emitted:
        log(f"No screenshots were successfully captured", file=sys.stderr)
--- a/cmdnat/matrix.py
+++ b/cmdnat/matrix.py
@@ -336,6 +336,18 @@ def _resolve_upload_path(item: Any, config: Dict[str, Any]) -> Optional[str]:
 def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    # Internal stage: send previously selected items to selected rooms.
    if any(str(a).lower() == "-send" for a in (args or [])):
        # Ensure we don't re-print the rooms picker table on the send stage.
        try:
            if hasattr(ctx, "set_last_result_table_overlay"):
                ctx.set_last_result_table_overlay(None, None, None)
        except Exception:
            pass
        try:
            if hasattr(ctx, "set_current_stage_table"):
                ctx.set_current_stage_table(None)
        except Exception:
            pass
        rooms = _normalize_to_list(result)
        room_ids: List[str] = []
        for r in rooms:
@@ -430,7 +442,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        log("No joined rooms found.", file=sys.stderr)
        return 0
-    table = ResultTable("Matrix Rooms")
+    table = ResultTable("Matrix Rooms (select with @N)")
    table.set_table("matrix")
    table.set_source_command(".matrix", [])
@@ -461,12 +473,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    ctx.set_last_result_table_overlay(table, room_items)
    ctx.set_current_stage_table(table)
    ctx.set_pending_pipeline_tail([[".matrix", "-send"]], ".matrix")
    print()
    from rich_display import stdout_console
    stdout_console().print(table)
    print("\nSelect room(s) with @N (e.g. @1 or @1-3) to send the selected item(s)")
    return 0
 CMDLET = Cmdlet(
--- a/readme.md
+++ b/readme.md
@@ -1,6 +1,6 @@
 # Medios-Macina
-Medios-Macina is a CLI-first media ingestion and management toolkit focused on reliably downloading, tagging, and storing media (audio, video, images, and text) from a variety of providers and sources. It is designed around a compact, pipeable command language ("cmdlets") so complex workflows can be composed simply and repeatably.
+Medios-Macina is a CLI media manager and toolkit focused on downloading, tagging, and media storage (audio, video, images, and text) from a variety of providers and sources. It is designed around a compact, pipeable command language ("cmdlets") so complex workflows can be composed simply and repeatably.
 ## Highlights ✅
 - Flexible pipeline-based CLI: chain cmdlets with `|` and use saved selections with `@N`.