dfd

2025-12-22 02:11:53 -08:00
parent d0b821b5dd
commit 16316bb3fd
20 changed files with 4218 additions and 2422 deletions
--- a/cmdlet/_shared.py
+++ b/cmdlet/_shared.py
@@ -1585,9 +1585,46 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
 		"warnings", "path", "relationships", "is_temp", "action", "parent_hash",
 	}

-	# Convert ResultItem to dict to preserve all attributes
+	# Convert common object-like results into a dict so we can preserve fields like
+	# hash/store/url when they come from result tables (e.g., get-url emits UrlItem).
+	#
+	# Priority:
+	# 1) explicit to_dict()
+	# 2) best-effort attribute extraction for known PipeObject-ish fields
 	if hasattr(value, 'to_dict'):
 		value = value.to_dict()
+	elif not isinstance(value, dict):
+		try:
+			obj_map: Dict[str, Any] = {}
+			for k in (
+				"hash",
+				"store",
+				"provider",
+				"prov",
+				"tag",
+				"title",
+				"url",
+				"source_url",
+				"duration",
+				"duration_seconds",
+				"metadata",
+				"full_metadata",
+				"warnings",
+				"path",
+				"target",
+				"relationships",
+				"is_temp",
+				"action",
+				"parent_hash",
+				"extra",
+				"media_kind",
+			):
+				if hasattr(value, k):
+					obj_map[k] = getattr(value, k)
+			if obj_map:
+				value = obj_map
+		except Exception:
+			pass

 	if isinstance(value, dict):
 		# Extract hash and store (canonical identifiers)
@@ -1695,8 +1732,19 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
 	# Fallback: build from path argument or bare value
 	hash_val = "unknown"
 	path_val = default_path or getattr(value, "path", None)
+	url_val: Optional[str] = None
 	title_val = None

+	# If the raw value is a string, treat it as either a URL or a file path.
+	# This is important for @-selection results that are plain URL strings.
+	if isinstance(value, str):
+		s = value.strip()
+		if s.lower().startswith(("http://", "https://")):
+			url_val = s
+			path_val = None
+		else:
+			path_val = s
+
 	if path_val and path_val != "unknown":
 		try:
 			from SYS.utils import sha256_file
@@ -1708,8 +1756,9 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
 		except Exception:
 			pass
    
-	# When coming from path argument, store should be "PATH" (file path, not a backend)
-	store_val = "PATH"
+	# When coming from a raw URL string, mark it explicitly as URL.
+	# Otherwise treat it as a local path.
+	store_val = "URL" if url_val else "PATH"

 	pipe_obj = models.PipeObject(
 		hash=hash_val,
@@ -1717,6 +1766,8 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
 		provider=None,
 		path=str(path_val) if path_val and path_val != "unknown" else None,
 		title=title_val,
+		url=url_val,
+		source_url=url_val,
 		tag=[],
 		extra={},
 	)
--- a/cmdlet/add_file.py
+++ b/cmdlet/add_file.py
@@ -12,6 +12,7 @@ import models
 import pipeline as ctx
 from API import HydrusNetwork as hydrus_wrapper
 from SYS.logger import log, debug
+from SYS.pipeline_progress import PipelineProgress
 from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS
 from Store import Store
 from . import _shared as sh
@@ -73,6 +74,7 @@ class Add_File(Cmdlet):
    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        """Main execution entry point."""
        parsed = parse_cmdlet_args(args, self)
+        progress = PipelineProgress(ctx)

        path_arg = parsed.get("path")
        location = parsed.get("store")
@@ -80,6 +82,35 @@ class Add_File(Cmdlet):
        provider_room = parsed.get("room")
        delete_after = parsed.get("delete", False)

+        # Convenience: when piping a file into add-file, allow `-path <existing dir>`
+        # to act as the destination export directory.
+        # Example: screen-shot "https://..." | add-file -path "C:\Users\Admin\Desktop"
+        if path_arg and not location and not provider_name:
+            try:
+                candidate_dir = Path(str(path_arg))
+                if candidate_dir.exists() and candidate_dir.is_dir():
+                    piped_items = result if isinstance(result, list) else [result]
+                    has_local_source = False
+                    for it in piped_items:
+                        try:
+                            po = coerce_to_pipe_object(it, None)
+                            src = str(getattr(po, "path", "") or "").strip()
+                            if not src:
+                                continue
+                            if src.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
+                                continue
+                            if Path(src).is_file():
+                                has_local_source = True
+                                break
+                        except Exception:
+                            continue
+                    if has_local_source:
+                        debug(f"[add-file] Treating -path directory as destination: {candidate_dir}")
+                        location = str(candidate_dir)
+                        path_arg = None
+            except Exception:
+                pass
+
        stage_ctx = ctx.get_stage_context()
        is_last_stage = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))

@@ -93,7 +124,7 @@ class Add_File(Cmdlet):
                is_storage_backend_location = False

        # Decide which items to process.
-        # - If user provided -path, treat this invocation as single-item.
+        # - If user provided -path (and it was not reinterpreted as destination), treat this invocation as single-item.
        # - Otherwise, if piped input is a list, ingest each item.
        if path_arg:
            items_to_process: List[Any] = [result]
@@ -102,6 +133,17 @@ class Add_File(Cmdlet):
        else:
            items_to_process = [result]

+        # Minimal step-based progress for single-item runs.
+        # Many add-file flows don't emit intermediate items, so without steps the pipe can look "stuck".
+        use_steps = False
+        steps_started = False
+        step2_done = False
+        try:
+            ui, _ = progress.ui_and_pipe_index()
+            use_steps = (ui is not None) and (len(items_to_process) == 1)
+        except Exception:
+            use_steps = False
+
        debug(f"[add-file] INPUT result type={type(result).__name__}")
        if isinstance(result, list):
            debug(f"[add-file] INPUT result is list with {len(result)} items")
@@ -235,6 +277,14 @@ class Add_File(Cmdlet):
                    failures += 1
                    continue

+                is_url_target = isinstance(media_path_or_url, str) and str(media_path_or_url).lower().startswith(
+                    ("http://", "https://", "magnet:", "torrent:")
+                )
+                if use_steps and (not steps_started) and (not is_url_target):
+                    progress.begin_steps(3)
+                    progress.step("resolving source")
+                    steps_started = True
+
                # Update pipe_obj with resolved path
                pipe_obj.path = str(media_path_or_url)

@@ -300,13 +350,34 @@ class Add_File(Cmdlet):
                            pass

                        temp_dir_to_cleanup = Path(tempfile.mkdtemp(prefix="medios_openlibrary_"))
+
+                        # Wire OpenLibrary download progress into pipeline Live UI (no tqdm spam).
+                        def _ol_progress(kind: str, completed: int, total: Optional[int], label: str) -> None:
+                            try:
+                                if kind == "pages" and total:
+                                    progress.set_status(f"downloading pages {completed}/{total}")
+                                    progress.set_percent(int(round((completed / max(1, total)) * 100.0)))
+                                elif kind == "bytes" and total:
+                                    progress.set_status(f"downloading {label} {completed}/{total} bytes")
+                                    progress.set_percent(int(round((completed / max(1, total)) * 100.0)))
+                                else:
+                                    progress.set_status("downloading")
+                            except Exception:
+                                return
+
+                        try:
+                            progress.set_percent(0)
+                            progress.set_status("downloading openlibrary")
+                        except Exception:
+                            pass
+
                        sr = SearchResult(
                            table="openlibrary",
                            title=str(getattr(pipe_obj, "title", None) or "Unknown"),
                            path=str(media_path_or_url),
                            full_metadata=full_metadata if isinstance(full_metadata, dict) else {},
                        )
-                        downloaded = provider.download(sr, temp_dir_to_cleanup)
+                        downloaded = provider.download(sr, temp_dir_to_cleanup, progress_callback=_ol_progress)
                        if downloaded is None:
                            log("[add-file] OpenLibrary download failed", file=sys.stderr)
                            failures += 1
@@ -325,6 +396,13 @@ class Add_File(Cmdlet):
                        pipe_obj.path = str(downloaded_path)
                        delete_after_item = True

+                        try:
+                            if ui is not None:
+                                ui.set_pipe_percent(int(pipe_idx), 100)
+                                ui.set_pipe_status_text(int(pipe_idx), "downloaded")
+                        except Exception:
+                            pass
+
                    # For non-provider URLs, or if still a URL after provider attempt, delegate to download-media.
                    if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
                        ("http://", "https://", "magnet:", "torrent:")
@@ -562,6 +640,10 @@ class Add_File(Cmdlet):
                        failures += 1
                        continue

+                    if use_steps and steps_started and (not step2_done):
+                        progress.step("writing destination")
+                        step2_done = True
+
                    if code == 0:
                        successes += 1
                    else:
@@ -619,6 +701,9 @@ class Add_File(Cmdlet):
            except Exception:
                pass

+        if use_steps and steps_started:
+            progress.step("finalized")
+
        if successes > 0:
            return 0
        return 1
--- a/cmdlet/add_url.py
+++ b/cmdlet/add_url.py
@@ -34,6 +34,19 @@ class Add_Url(sh.Cmdlet):
        """Add URL to file via hash+store backend."""
        parsed = sh.parse_cmdlet_args(args, self)

+        # Compatibility/piping fix:
+        # `SharedArgs.QUERY` is positional in the shared parser, so `add-url <url>`
+        # (and `@N | add-url <url>`) can mistakenly parse the URL into `query`.
+        # If `url` is missing and `query` looks like an http(s) URL, treat it as `url`.
+        try:
+            if (not parsed.get("url")) and isinstance(parsed.get("query"), str):
+                q = str(parsed.get("query") or "").strip()
+                if q.startswith(("http://", "https://")):
+                    parsed["url"] = q
+                    parsed.pop("query", None)
+        except Exception:
+            pass
+
        query_hash = sh.parse_single_hash_query(parsed.get("query"))
        if parsed.get("query") and not query_hash:
            log("Error: -query must be of the form hash:<sha256>")
--- a/cmdlet/delete_url.py
+++ b/cmdlet/delete_url.py
@@ -29,7 +29,7 @@ class Delete_Url(Cmdlet):
            arg=[
                SharedArgs.QUERY,
                SharedArgs.STORE,
-                CmdletArg("url", required=True, description="URL to remove"),
+                CmdletArg("url", required=False, description="URL to remove (optional when piping url rows)"),
            ],
            detail=[
                "- Removes URL association from file identified by hash+store",
@@ -69,22 +69,24 @@ class Delete_Url(Cmdlet):
                log("Error: No store name provided")
                return 1
            
-        if not url_arg:
-            log("Error: No URL provided")
-            return 1
-        
        # Normalize hash (single-item mode)
        if not results and file_hash:
            file_hash = normalize_hash(file_hash)
            if not file_hash:
                log("Error: Invalid hash format")
                return 1
-        
-        # Parse url (comma-separated)
-        urls = [u.strip() for u in str(url_arg).split(',') if u.strip()]
-        if not urls:
-            log("Error: No valid url provided")
-            return 1
+
+        from metadata import normalize_urls
+
+        def _urls_from_arg(raw: Any) -> List[str]:
+            if raw is None:
+                return []
+            # Support comma-separated input for backwards compatibility
+            if isinstance(raw, str) and "," in raw:
+                return [u.strip() for u in raw.split(",") if u.strip()]
+            return [u.strip() for u in normalize_urls(raw) if str(u).strip()]
+
+        urls_from_cli = _urls_from_arg(url_arg)
        
        # Get backend and delete url
        try:
@@ -145,7 +147,17 @@ class Delete_Url(Cmdlet):
                        )
                        continue

-                    batch.setdefault(store_text, []).append((normalized, list(urls)))
+                    # Determine which URLs to delete.
+                    # - If user passed an explicit <url>, apply it to all items.
+                    # - Otherwise, when piping url rows from get-url, delete the url(s) from each item.
+                    item_urls = list(urls_from_cli)
+                    if not item_urls:
+                        item_urls = [u.strip() for u in normalize_urls(get_field(item, "url") or get_field(item, "source_url")) if str(u).strip()]
+                    if not item_urls:
+                        ctx.print_if_visible("[delete-url] Warning: Item has no url field; skipping", file=sys.stderr)
+                        continue
+
+                    batch.setdefault(store_text, []).append((normalized, item_urls))

                for store_text, pairs in batch.items():
                    try:
@@ -168,24 +180,39 @@ class Delete_Url(Cmdlet):
                        for h, ulist in bulk_pairs:
                            backend.delete_url(h, ulist, config=config)

+                    deleted_count = 0
+                    for _h, ulist in bulk_pairs:
+                        deleted_count += len(ulist or [])
                    ctx.print_if_visible(
-                        f"✓ delete-url: {len(urls)} url(s) for {len(bulk_pairs)} item(s) in '{store_text}'",
+                        f"✓ delete-url: {deleted_count} url(s) for {len(bulk_pairs)} item(s) in '{store_text}'",
                        file=sys.stderr,
                    )

                for item in pass_through:
                    existing = get_field(item, "url")
-                    _set_item_url(item, _remove_urls(existing, list(urls)))
+                    # In batch mode we removed the union of requested urls for the file.
+                    # Using urls_from_cli (if present) matches the user's explicit intent; otherwise
+                    # remove the piped url row(s).
+                    remove_set = urls_from_cli
+                    if not remove_set:
+                        remove_set = [u.strip() for u in normalize_urls(get_field(item, "url") or get_field(item, "source_url")) if str(u).strip()]
+                    _set_item_url(item, _remove_urls(existing, list(remove_set)))
                    ctx.emit(item)
                return 0

            # Single-item mode
+            if not urls_from_cli:
+                urls_from_cli = [u.strip() for u in normalize_urls(get_field(result, "url") or get_field(result, "source_url")) if str(u).strip()]
+            if not urls_from_cli:
+                log("Error: No URL provided")
+                return 1
+
            backend = storage[str(store_name)]
-            backend.delete_url(str(file_hash), urls, config=config)
-            ctx.print_if_visible(f"✓ delete-url: {len(urls)} url(s) removed", file=sys.stderr)
+            backend.delete_url(str(file_hash), list(urls_from_cli), config=config)
+            ctx.print_if_visible(f"✓ delete-url: {len(urls_from_cli)} url(s) removed", file=sys.stderr)
            if result is not None:
                existing = get_field(result, "url")
-                _set_item_url(result, _remove_urls(existing, list(urls)))
+                _set_item_url(result, _remove_urls(existing, list(urls_from_cli)))
                ctx.emit(result)
            return 0
            
--- a/cmdlet/download_file.py
+++ b/cmdlet/download_file.py
--- a/cmdlet/download_media.py
+++ b/cmdlet/download_media.py
--- a/cmdlet/get_file.py
+++ b/cmdlet/get_file.py
@@ -126,7 +126,7 @@ class Get_File(sh.Cmdlet):
            except Exception as exc:
                log(f"Error opening browser: {exc}", file=sys.stderr)
            else:
-                log(f"Opened in browser: {source_path}", file=sys.stderr)
+                debug(f"Opened in browser: {source_path}", file=sys.stderr)
            
            # Emit result for pipeline
            ctx.emit({
--- a/cmdlet/get_tag.py
+++ b/cmdlet/get_tag.py
@@ -47,6 +47,210 @@ except ImportError:
 	extract_title = None


+def _dedup_tags_preserve_order(tags: List[str]) -> List[str]:
+	"""Deduplicate tags case-insensitively while preserving order."""
+	out: List[str] = []
+	seen: set[str] = set()
+	for t in tags or []:
+		if not isinstance(t, str):
+			continue
+		s = t.strip()
+		if not s:
+			continue
+		key = s.lower()
+		if key in seen:
+			continue
+		seen.add(key)
+		out.append(s)
+	return out
+
+
+def _extract_subtitle_tags(info: Dict[str, Any]) -> List[str]:
+	"""Extract subtitle availability tags from a yt-dlp info dict.
+
+	Produces multi-valued tags so languages can coexist:
+	- subs:<lang>
+	- subs_auto:<lang>
+	"""
+	def _langs(value: Any) -> List[str]:
+		if not isinstance(value, dict):
+			return []
+		langs: List[str] = []
+		for k in value.keys():
+			if not isinstance(k, str):
+				continue
+			lang = k.strip().lower()
+			if lang:
+				langs.append(lang)
+		return sorted(set(langs))
+
+	out: List[str] = []
+	for lang in _langs(info.get("subtitles")):
+		out.append(f"subs:{lang}")
+	for lang in _langs(info.get("automatic_captions")):
+		out.append(f"subs_auto:{lang}")
+	return out
+
+
+def _scrape_ytdlp_info(url: str) -> Optional[Dict[str, Any]]:
+	"""Fetch a yt-dlp info dict without downloading media."""
+	if not isinstance(url, str) or not url.strip():
+		return None
+	url = url.strip()
+
+	# Prefer the Python module when available (faster, avoids shell quoting issues).
+	try:
+		import yt_dlp  # type: ignore
+		opts: Any = {
+			"quiet": True,
+			"no_warnings": True,
+			"skip_download": True,
+			"noprogress": True,
+			"socket_timeout": 15,
+			"retries": 1,
+			"playlist_items": "1-10",
+		}
+		with yt_dlp.YoutubeDL(opts) as ydl:
+			info = ydl.extract_info(url, download=False)
+			return info if isinstance(info, dict) else None
+	except Exception:
+		pass
+
+	# Fallback to yt-dlp CLI if the module isn't available.
+	try:
+		import json as json_module
+		cmd = [
+			"yt-dlp",
+			"-J",
+			"--no-warnings",
+			"--skip-download",
+			"--playlist-items",
+			"1-10",
+			url,
+		]
+		result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
+		if result.returncode != 0:
+			return None
+		payload = (result.stdout or "").strip()
+		if not payload:
+			return None
+		data = json_module.loads(payload)
+		return data if isinstance(data, dict) else None
+	except Exception:
+		return None
+
+
+def _resolve_candidate_urls_for_item(
+	result: Any,
+	backend: Any,
+	file_hash: str,
+	config: Dict[str, Any],
+) -> List[str]:
+	"""Get candidate URLs from backend and/or piped result."""
+	try:
+		from metadata import normalize_urls
+	except Exception:
+		normalize_urls = None  # type: ignore[assignment]
+
+	urls: List[str] = []
+	# 1) Backend URL association (best source of truth)
+	try:
+		backend_urls = backend.get_url(file_hash, config=config)
+		if backend_urls:
+			if normalize_urls:
+				urls.extend(normalize_urls(backend_urls))
+			else:
+				urls.extend([str(u).strip() for u in backend_urls if isinstance(u, str) and str(u).strip()])
+	except Exception:
+		pass
+
+	# 2) Backend metadata url field
+	try:
+		meta = backend.get_metadata(file_hash, config=config)
+		if isinstance(meta, dict) and meta.get("url"):
+			if normalize_urls:
+				urls.extend(normalize_urls(meta.get("url")))
+			else:
+				raw = meta.get("url")
+				if isinstance(raw, list):
+					urls.extend([str(u).strip() for u in raw if isinstance(u, str) and str(u).strip()])
+				elif isinstance(raw, str) and raw.strip():
+					urls.append(raw.strip())
+	except Exception:
+		pass
+
+	# 3) Piped result fields
+	def _get(obj: Any, key: str, default: Any = None) -> Any:
+		if isinstance(obj, dict):
+			return obj.get(key, default)
+		return getattr(obj, key, default)
+
+	for key in ("url", "webpage_url", "source_url", "target"):
+		val = _get(result, key, None)
+		if not val:
+			continue
+		if normalize_urls:
+			urls.extend(normalize_urls(val))
+			continue
+		if isinstance(val, str) and val.strip():
+			urls.append(val.strip())
+		elif isinstance(val, list):
+			urls.extend([str(u).strip() for u in val if isinstance(u, str) and str(u).strip()])
+
+	meta_field = _get(result, "metadata", None)
+	if isinstance(meta_field, dict) and meta_field.get("url"):
+		val = meta_field.get("url")
+		if normalize_urls:
+			urls.extend(normalize_urls(val))
+		elif isinstance(val, list):
+			urls.extend([str(u).strip() for u in val if isinstance(u, str) and str(u).strip()])
+		elif isinstance(val, str) and val.strip():
+			urls.append(val.strip())
+
+	# Dedup
+	return _dedup_tags_preserve_order(urls)
+
+
+def _pick_supported_ytdlp_url(urls: List[str]) -> Optional[str]:
+	"""Pick the first URL that looks supported by yt-dlp (best effort)."""
+	if not urls:
+		return None
+
+	def _is_hydrus_file_url(u: str) -> bool:
+		text = str(u or "").strip().lower()
+		if not text:
+			return False
+		# Hydrus-local file URLs are retrievable blobs, not original source pages.
+		# yt-dlp generally can't extract meaningful metadata from these.
+		return ("/get_files/file" in text) and ("hash=" in text)
+
+	http_urls: List[str] = []
+	for u in urls:
+		text = str(u or "").strip()
+		if text.lower().startswith(("http://", "https://")):
+			http_urls.append(text)
+
+	# Prefer non-Hydrus URLs for yt-dlp scraping.
+	candidates = [u for u in http_urls if not _is_hydrus_file_url(u)]
+	if not candidates:
+		return None
+
+	# Prefer a true support check when the Python module is available.
+	try:
+		from SYS.download import is_url_supported_by_ytdlp
+		for text in candidates:
+			try:
+				if is_url_supported_by_ytdlp(text):
+					return text
+			except Exception:
+				continue
+	except Exception:
+		pass
+
+	# Fallback: use the first non-Hydrus http(s) URL and let extraction decide.
+	return candidates[0] if candidates else None
+
+
 _scrape_isbn_metadata = _ol_scrape_isbn_metadata  # type: ignore[assignment]
 _scrape_openlibrary_metadata = _ol_scrape_openlibrary_metadata  # type: ignore[assignment]

@@ -853,7 +1057,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 	scrape_url = parsed_args.get("scrape")
 	scrape_requested = scrape_flag_present or scrape_url is not None

-	if scrape_requested and (not scrape_url or str(scrape_url).strip() == ""):
+	# Convenience: `-scrape` with no value defaults to `ytdlp` (store-backed URL scrape).
+	if scrape_flag_present and (scrape_url is None or str(scrape_url).strip() == ""):
+		scrape_url = "ytdlp"
+		scrape_requested = True
+
+	if scrape_requested and (scrape_url is None or str(scrape_url).strip() == ""):
 		log("-scrape requires a URL or provider name", file=sys.stderr)
 		return 1
 	
@@ -861,6 +1070,123 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 	if scrape_requested and scrape_url:
 		import json as json_module

+		if str(scrape_url).strip().lower() == "ytdlp":
+			# Scrape metadata from the selected item's URL via yt-dlp (no download),
+			# then OVERWRITE all existing tags (including title:).
+			#
+			# This mode requires a store-backed item (hash + store).
+			#
+			# NOTE: We intentionally do not reuse _scrape_url_metadata() here because it
+			# performs namespace deduplication that would collapse multi-valued tags.
+			file_hash = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash", None))
+			store_name = get_field(result, "store", None)
+			subject_path = get_field(result, "path", None) or get_field(result, "target", None) or get_field(result, "filename", None)
+			item_title = get_field(result, "title", None) or get_field(result, "name", None) or get_field(result, "filename", None)
+
+			# Only run overwrite-apply when the item is store-backed.
+			# If this is a URL-only PipeObject, fall through to provider mode below.
+			if file_hash and store_name and str(file_hash).strip().lower() != "unknown" and str(store_name).strip().upper() not in {"PATH", "URL"}:
+				try:
+					from Store import Store
+					storage = Store(config)
+					backend = storage[str(store_name)]
+				except Exception as exc:
+					log(f"Failed to resolve store backend '{store_name}': {exc}", file=sys.stderr)
+					return 1
+
+				candidate_urls = _resolve_candidate_urls_for_item(result, backend, file_hash, config)
+				scrape_target = _pick_supported_ytdlp_url(candidate_urls)
+				if not scrape_target:
+					log(
+						"No yt-dlp-supported source URL found for this item (Hydrus /get_files/file URLs are ignored). ",
+						file=sys.stderr,
+					)
+					log(
+						"Add the original page URL to the file (e.g. via add-url), then retry get-tag -scrape.",
+						file=sys.stderr,
+					)
+					return 1
+
+				info = _scrape_ytdlp_info(scrape_target)
+				if not info:
+					log("yt-dlp could not extract metadata for this URL (unsupported or failed)", file=sys.stderr)
+					return 1
+
+				try:
+					from metadata import extract_ytdlp_tags
+				except Exception:
+					extract_ytdlp_tags = None  # type: ignore[assignment]
+
+				# Prefer the top-level metadata, but if this is a playlist container, use
+				# the first entry for per-item fields like subtitles.
+				info_for_subs = info
+				entries = info.get("entries") if isinstance(info, dict) else None
+				if isinstance(entries, list) and entries:
+					first = entries[0]
+					if isinstance(first, dict):
+						info_for_subs = first
+
+				tags: List[str] = []
+				if extract_ytdlp_tags:
+					try:
+						tags.extend(extract_ytdlp_tags(info))
+					except Exception:
+						pass
+
+				# Subtitle availability tags
+				try:
+					tags.extend(_extract_subtitle_tags(info_for_subs if isinstance(info_for_subs, dict) else {}))
+				except Exception:
+					pass
+
+				# Ensure we actually have something to apply.
+				tags = _dedup_tags_preserve_order(tags)
+				if not tags:
+					log("No tags extracted from yt-dlp metadata", file=sys.stderr)
+					return 1
+
+				# Full overwrite: delete all existing tags, then add the new set.
+				try:
+					existing_tags, _src = backend.get_tag(file_hash, config=config)
+				except Exception:
+					existing_tags = []
+				try:
+					if existing_tags:
+						backend.delete_tag(file_hash, list(existing_tags), config=config)
+				except Exception as exc:
+					debug(f"[get_tag] ytdlp overwrite: delete_tag failed: {exc}")
+				try:
+					backend.add_tag(file_hash, list(tags), config=config)
+				except Exception as exc:
+					log(f"Failed to apply yt-dlp tags: {exc}", file=sys.stderr)
+					return 1
+
+				# Show updated tags
+				try:
+					updated_tags, _src = backend.get_tag(file_hash, config=config)
+				except Exception:
+					updated_tags = tags
+				if not updated_tags:
+					updated_tags = tags
+
+				_emit_tags_as_table(
+					tags_list=list(updated_tags),
+					file_hash=file_hash,
+					store=str(store_name),
+					service_name=None,
+					config=config,
+					item_title=str(item_title or "ytdlp"),
+					path=str(subject_path) if subject_path else None,
+					subject={
+						"hash": file_hash,
+						"store": str(store_name),
+						"path": str(subject_path) if subject_path else None,
+						"title": item_title,
+						"extra": {"applied_provider": "ytdlp", "scrape_url": scrape_target},
+					},
+				)
+				return 0
+
 		if scrape_url.startswith("http://") or scrape_url.startswith("https://"):
 			# URL scraping (existing behavior)
 			title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url)
@@ -951,7 +1277,16 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 			else:
 				combined_query = f"{title_hint} {artist_hint}"
 		
-		query_hint = identifier_query or combined_query or title_hint
+		# yt-dlp isn't a search provider; it requires a URL.
+		url_hint: Optional[str] = None
+		if provider.name == "ytdlp":
+			raw_url = get_field(result, "url", None) or get_field(result, "source_url", None) or get_field(result, "target", None)
+			if isinstance(raw_url, list) and raw_url:
+				raw_url = raw_url[0]
+			if isinstance(raw_url, str) and raw_url.strip().startswith(("http://", "https://")):
+				url_hint = raw_url.strip()
+
+		query_hint = url_hint or identifier_query or combined_query or title_hint
 		if not query_hint:
 			log("No title or identifier available to search for metadata", file=sys.stderr)
 			return 1
@@ -967,6 +1302,27 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 		if not items:
 			log("No metadata results found", file=sys.stderr)
 			return 1
+
+		# For yt-dlp, emit tags directly (there is no meaningful multi-result selection step).
+		if provider.name == "ytdlp":
+			try:
+				tags = [str(t) for t in provider.to_tags(items[0]) if t is not None]
+			except Exception:
+				tags = []
+			if not tags:
+				log("No tags extracted from yt-dlp metadata", file=sys.stderr)
+				return 1
+			_emit_tags_as_table(
+				tags_list=list(tags),
+				file_hash=None,
+				store="url",
+				service_name=None,
+				config=config,
+				item_title=str(items[0].get("title") or "ytdlp"),
+				path=None,
+				subject={"provider": "ytdlp", "url": str(query_hint)},
+			)
+			return 0
 		
 		from result_table import ResultTable
 		table = ResultTable(f"Metadata: {provider.name}")
@@ -1040,7 +1396,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 			return 0

 		# Apply tags to the store backend (no sidecar writing here).
-		apply_tags = _filter_scraped_tags([str(t) for t in result_tags if t is not None])
+		if str(result_provider).strip().lower() == "ytdlp":
+			apply_tags = [str(t) for t in result_tags if t is not None]
+		else:
+			apply_tags = _filter_scraped_tags([str(t) for t in result_tags if t is not None])
 		if not apply_tags:
 			log("No applicable scraped tags to apply (title:/artist:/source: are skipped)", file=sys.stderr)
 			return 0
@@ -1167,6 +1526,11 @@ try:
 except Exception:
 	_SCRAPE_CHOICES = ["itunes", "openlibrary", "googlebooks", "google", "musicbrainz"]

+# Special scrape mode: pull tags from an item's URL via yt-dlp (no download)
+if "ytdlp" not in _SCRAPE_CHOICES:
+	_SCRAPE_CHOICES.append("ytdlp")
+	_SCRAPE_CHOICES = sorted(_SCRAPE_CHOICES)
+

 class Get_Tag(Cmdlet):
 	"""Class-based get-tag cmdlet with self-registration."""
@@ -1195,7 +1559,7 @@ class Get_Tag(Cmdlet):
 				CmdletArg(
 					name="-scrape",
 					type="string",
-					description="Scrape metadata from URL or provider name (returns tags as JSON or table)",
+					description="Scrape metadata from URL/provider, or use 'ytdlp' to scrape from the item's URL and overwrite tags",
 					required=False,
 					choices=_SCRAPE_CHOICES,
 				)
--- a/cmdlet/screen_shot.py
+++ b/cmdlet/screen_shot.py
@@ -14,10 +14,11 @@ import httpx
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Sequence, Tuple
-from urllib.parse import urlsplit, quote, urljoin
+from urllib.parse import urlsplit, quote, urljoin, unquote

 from SYS.logger import log, debug
 from API.HTTP import HTTPClient
+from SYS.pipeline_progress import PipelineProgress
 from SYS.utils import ensure_directory, unique_path, unique_preserve_order
 from . import _shared as sh

@@ -31,54 +32,6 @@ get_field = sh.get_field
 parse_cmdlet_args = sh.parse_cmdlet_args
 import pipeline as pipeline_context

-
-def _live_ui_and_pipe_index() -> tuple[Optional[Any], int]:
-    ui = None
-    try:
-        ui = pipeline_context.get_live_progress() if hasattr(pipeline_context, "get_live_progress") else None
-    except Exception:
-        ui = None
-
-    pipe_idx: int = 0
-    try:
-        stage_ctx = pipeline_context.get_stage_context() if hasattr(pipeline_context, "get_stage_context") else None
-        maybe_idx = getattr(stage_ctx, "pipe_index", None) if stage_ctx is not None else None
-        if isinstance(maybe_idx, int):
-            pipe_idx = int(maybe_idx)
-    except Exception:
-        pipe_idx = 0
-
-    return ui, pipe_idx
-
-
-def _begin_live_steps(total_steps: int) -> None:
-    """Declare the total number of steps for this cmdlet run (per-pipe)."""
-    ui, pipe_idx = _live_ui_and_pipe_index()
-    if ui is None:
-        return
-    try:
-        begin = getattr(ui, "begin_pipe_steps", None)
-        if callable(begin):
-            begin(int(pipe_idx), total_steps=int(total_steps))
-    except Exception:
-        return
-
-
-def _step(text: str) -> None:
-    """Emit a *new* step.
-
-    Each call increments the step counter and advances percent automatically.
-    """
-    ui, pipe_idx = _live_ui_and_pipe_index()
-    if ui is None:
-        return
-    try:
-        adv = getattr(ui, "advance_pipe_step", None)
-        if callable(adv):
-            adv(int(pipe_idx), str(text))
-    except Exception:
-        return
-
 # ============================================================================
 # CMDLET Metadata Declaration
 # ============================================================================
@@ -115,6 +68,10 @@ USER_AGENT = (
 DEFAULT_VIEWPORT: dict[str, int] = {"width": 1920, "height": 1080}
 ARCHIVE_TIMEOUT = 30.0

+# WebP has a hard maximum dimension per side.
+# Pillow typically fails with: "encoding error 5: Image size exceeds WebP limit of 16383 pixels"
+WEBP_MAX_DIM = 16_383
+
 # Configurable selectors for specific websites
 SITE_SELECTORS: Dict[str, List[str]] = {
    "twitter.com": [
@@ -200,6 +157,80 @@ def _slugify_url(url: str) -> str:
    return slug[:100]


+def _tags_from_url(url: str) -> List[str]:
+    """Derive simple tags from a URL.
+
+    - site:<domain> (strips leading www.)
+    - title:<slug> derived from the last path segment, with extension removed
+      and separators (-, _, %) normalized to spaces.
+    """
+
+    u = str(url or "").strip()
+    if not u:
+        return []
+
+    parsed = None
+    try:
+        parsed = urlsplit(u)
+        host = str(getattr(parsed, "hostname", None) or getattr(parsed, "netloc", "") or "").strip().lower()
+    except Exception:
+        parsed = None
+        host = ""
+
+    if host:
+        # Drop credentials and port if present.
+        if "@" in host:
+            host = host.rsplit("@", 1)[-1]
+        if ":" in host:
+            host = host.split(":", 1)[0]
+        if host.startswith("www."):
+            host = host[len("www.") :]
+
+    path = ""
+    if parsed is not None:
+        try:
+            path = str(getattr(parsed, "path", "") or "")
+        except Exception:
+            path = ""
+
+    last = ""
+    if path:
+        try:
+            last = path.rsplit("/", 1)[-1]
+        except Exception:
+            last = ""
+
+    try:
+        last = unquote(last or "")
+    except Exception:
+        last = last or ""
+
+    if last and "." in last:
+        # Drop a single trailing extension (e.g. .html, .php).
+        last = last.rsplit(".", 1)[0]
+
+    for sep in ("_", "-", "%"):
+        if last and sep in last:
+            last = last.replace(sep, " ")
+
+    title = " ".join(str(last or "").split()).strip().lower()
+
+    tags: List[str] = []
+    if host:
+        tags.append(f"site:{host}")
+    if title:
+        tags.append(f"title:{title}")
+    return tags
+
+
+def _title_from_url(url: str) -> str:
+    """Return the normalized title derived from a URL's last path segment."""
+    for t in _tags_from_url(url):
+        if str(t).lower().startswith("title:"):
+            return str(t)[len("title:") :].strip()
+    return ""
+
+
 def _normalise_format(fmt: Optional[str]) -> str:
    """Normalize output format to valid values."""
    if not fmt:
@@ -218,6 +249,89 @@ def _format_suffix(fmt: str) -> str:
        return ".jpg"
    return f".{fmt}"

+
+def _convert_to_webp(
+    src_png: Path,
+    dst_webp: Path,
+    *,
+    quality: int = 90,
+    method: int = 6,
+    max_dim: int = WEBP_MAX_DIM,
+    downscale_if_oversize: bool = True,
+) -> bool:
+    """Convert a PNG screenshot to WebP via Pillow.
+
+    Playwright does not currently support emitting WebP directly.
+    """
+    if not src_png or not Path(src_png).is_file():
+        raise ScreenshotError(f"Source image not found: {src_png}")
+
+    dst_webp = Path(dst_webp)
+    try:
+        dst_webp.parent.mkdir(parents=True, exist_ok=True)
+    except Exception:
+        pass
+
+    try:
+        from PIL import Image
+    except Exception as exc:
+        raise ScreenshotError(f"Pillow is required for webp conversion: {exc}") from exc
+
+    # Write atomically to avoid partial files if conversion is interrupted.
+    tmp_path = unique_path(dst_webp.with_suffix(".tmp.webp"))
+    try:
+        with Image.open(src_png) as im:
+            did_downscale = False
+            save_kwargs: Dict[str, Any] = {
+                "format": "WEBP",
+                "quality": int(quality),
+                "method": int(method),
+            }
+
+            # Preserve alpha when present; Pillow handles it for WEBP.
+            # Normalize palette images to RGBA to avoid odd palette artifacts.
+            if im.mode == "P":
+                im = im.convert("RGBA")
+
+            # WebP enforces a hard max dimension per side (16383px).
+            # When full-page captures are very tall, downscale proportionally to fit.
+            try:
+                w, h = im.size
+            except Exception:
+                w, h = 0, 0
+
+            if downscale_if_oversize and isinstance(max_dim, int) and max_dim > 0 and (w > max_dim or h > max_dim):
+                scale = 1.0
+                try:
+                    scale = min(float(max_dim) / float(w), float(max_dim) / float(h))
+                except Exception:
+                    scale = 1.0
+
+                if scale > 0.0 and scale < 1.0:
+                    new_w = max(1, int(w * scale))
+                    new_h = max(1, int(h * scale))
+                    debug(
+                        f"[_convert_to_webp] Image exceeds WebP limit ({w}x{h}); downscaling -> {new_w}x{new_h}"
+                    )
+                    try:
+                        resample = getattr(getattr(Image, "Resampling", Image), "LANCZOS", None)
+                        if resample is None:
+                            resample = getattr(Image, "LANCZOS", 1)
+                        im = im.resize((new_w, new_h), resample=resample)
+                        did_downscale = True
+                    except Exception as exc:
+                        debug(f"[_convert_to_webp] Downscale failed; attempting direct WEBP save anyway: {exc}")
+
+            im.save(tmp_path, **save_kwargs)
+
+        tmp_path.replace(dst_webp)
+        return bool(did_downscale)
+    finally:
+        try:
+            tmp_path.unlink(missing_ok=True)
+        except Exception:
+            pass
+
 def _matched_site_selectors(url: str) -> List[str]:
    """Return SITE_SELECTORS for a matched domain; empty if no match.

@@ -231,6 +345,16 @@ def _matched_site_selectors(url: str) -> List[str]:
    return sels


+def _selectors_for_url(url: str) -> List[str]:
+    """Return selectors to try for a URL.
+
+    For now, prefer a minimal behavior: only return known SITE_SELECTORS.
+    (The cmdlet already falls back to full-page capture when no selectors match.)
+    """
+
+    return _matched_site_selectors(url)
+
+
 def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000) -> None:
    """Best-effort page tweaks for popular platforms before capture."""
    try:
@@ -366,11 +490,11 @@ def _prepare_output_path(options: ScreenshotOptions) -> Path:
    return unique_path(path)


-def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str]) -> None:
+def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str], progress: PipelineProgress) -> None:
    """Capture screenshot using Playwright."""
    debug(f"[_capture] Starting capture for {options.url} -> {destination}")
    try:
-        _step("loading launching browser")
+        progress.step("loading launching browser")
        tool = options.playwright_tool or PlaywrightTool({})

        # Ensure Chromium engine is used for the screen-shot cmdlet (force for consistency)
@@ -405,16 +529,16 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])

        try:
            with tool.open_page(headless=headless) as page:
-                _step("loading navigating")
+                progress.step("loading navigating")
                debug(f"Navigating to {options.url}...")
                try:
                    tool.goto(page, options.url)
                    debug("Page loaded successfully")
-                    _step("loading page loaded")
+                    progress.step("loading page loaded")
                except PlaywrightTimeoutError:
                    warnings.append("navigation timeout; capturing current page state")
                    debug("Navigation timeout; proceeding with current state")
-                    _step("loading navigation timeout")
+                    progress.step("loading navigation timeout")
                
                # Skip article lookup by default (wait_for_article defaults to False)
                if options.wait_for_article:
@@ -430,9 +554,9 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
                    debug(f"Waiting {options.wait_after_load}s for page stabilization...")
                    time.sleep(min(10.0, max(0.0, options.wait_after_load)))

-                _step("loading stabilized")
+                progress.step("loading stabilized")

-                _step("capturing preparing")
+                progress.step("capturing preparing")
                if options.replace_video_posters:
                    debug("Replacing video elements with posters...")
                    page.evaluate(
@@ -453,7 +577,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
                if options.prefer_platform_target and format_name != "pdf":
                    debug(f"[_capture] Target capture enabled")
                    debug("Attempting platform-specific content capture...")
-                    _step("capturing locating target")
+                    progress.step("capturing locating target")
                    try:
                        _platform_preprocess(options.url, page, warnings)
                    except Exception as e:
@@ -478,7 +602,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
                                    el.scroll_into_view_if_needed(timeout=1000)
                                except Exception:
                                    pass
-                                _step("capturing output")
+                                progress.step("capturing output")
                                debug(f"Capturing element to {destination}...")
                                el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
                                element_captured = True
@@ -489,14 +613,14 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
                            debug(f"Failed to capture element: {exc}")
                # Fallback to default capture paths
                if element_captured:
-                    _step("capturing saved")
+                    progress.step("capturing saved")
                elif format_name == "pdf":
                    debug("Generating PDF...")
                    page.emulate_media(media="print")
-                    _step("capturing output")
+                    progress.step("capturing output")
                    page.pdf(path=str(destination), print_background=True)
                    debug(f"PDF saved to {destination}")
-                    _step("capturing saved")
+                    progress.step("capturing saved")
                else:
                    debug(f"Capturing full page to {destination}...")
                    screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
@@ -504,20 +628,20 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
                        screenshot_kwargs["type"] = "jpeg"
                        screenshot_kwargs["quality"] = 90
                    if options.full_page:
-                        _step("capturing output")
+                        progress.step("capturing output")
                        page.screenshot(full_page=True, **screenshot_kwargs)
                    else:
                        article = page.query_selector("article")
                        if article is not None:
                            article_kwargs = dict(screenshot_kwargs)
                            article_kwargs.pop("full_page", None)
-                            _step("capturing output")
+                            progress.step("capturing output")
                            article.screenshot(**article_kwargs)
                        else:
-                            _step("capturing output")
+                            progress.step("capturing output")
                            page.screenshot(**screenshot_kwargs)
                    debug(f"Screenshot saved to {destination}")
-                    _step("capturing saved")
+                    progress.step("capturing saved")
        except Exception as exc:
            debug(f"[_capture] Exception launching browser/page: {exc}")
            msg = str(exc).lower()
@@ -532,7 +656,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
        raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc


-def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
+def _capture_screenshot(options: ScreenshotOptions, progress: PipelineProgress) -> ScreenshotResult:
    """Capture a screenshot for the given options."""
    debug(f"[_capture_screenshot] Preparing capture for {options.url}")
    requested_format = _normalise_format(options.output_format)
@@ -543,8 +667,8 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
    will_convert = requested_format == "webp"
    will_archive = bool(options.archive and options.url)
    total_steps = 9 + (1 if will_target else 0) + (1 if will_convert else 0) + (1 if will_archive else 0)
-    _begin_live_steps(total_steps)
-    _step("loading starting")
+    progress.begin_steps(total_steps)
+    progress.step("loading starting")

    # Playwright screenshots do not natively support WebP output.
    # Capture as PNG, then convert via Pillow.
@@ -553,17 +677,22 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
        capture_path = unique_path(destination.with_suffix(".png"))
        debug(f"[_capture_screenshot] Requested webp; capturing intermediate png -> {capture_path}")
        options.output_format = "png"
-    _capture(options, capture_path, warnings)
+    _capture(options, capture_path, warnings, progress)

    if requested_format == "webp":
-        _step("capturing converting to webp")
+        progress.step("capturing converting to webp")
        debug(f"[_capture_screenshot] Converting png -> webp: {destination}")
        try:
-            _convert_to_webp(capture_path, destination)
-            try:
-                capture_path.unlink(missing_ok=True)
-            except Exception:
-                pass
+            did_downscale = _convert_to_webp(capture_path, destination)
+            if did_downscale:
+                warnings.append(
+                    f"webp conversion used downscaling to fit {WEBP_MAX_DIM}px limit; keeping original png: {capture_path.name}"
+                )
+            else:
+                try:
+                    capture_path.unlink(missing_ok=True)
+                except Exception:
+                    pass
        except Exception as exc:
            warnings.append(f"webp conversion failed; keeping png: {exc}")
            destination = capture_path
@@ -572,7 +701,7 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
    url: List[str] = [options.url] if options.url else []
    archive_url: List[str] = []
    if options.archive and options.url:
-        _step("capturing archiving")
+        progress.step("capturing archiving")
        debug(f"[_capture_screenshot] Archiving enabled for {options.url}")
        archives, archive_warnings = _archive_url(options.url, options.archive_timeout)
        archive_url.extend(archives)
@@ -580,7 +709,7 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
        if archives:
            url = unique_preserve_order([*url, *archives])

-    _step("capturing finalized")
+    progress.step("capturing finalized")

    applied_tag = unique_preserve_order(list(tag for tag in options.tag if tag.strip()))

@@ -627,6 +756,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        )
        return 1

+    progress = PipelineProgress(pipeline_context)
+
    # ========================================================================
    # ARGUMENT PARSING
    # ========================================================================
@@ -685,32 +816,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:

    debug(f"[_run] url to process: {[u for u, _ in url_to_process]}")

-    # If the caller isn't running the shared pipeline Live progress UI (e.g. direct
-    # cmdlet execution), start a minimal local pipeline progress panel so this cmdlet
-    # still shows step-level progress.
-    local_progress_ui = None
-    try:
-        existing_ui = pipeline_context.get_live_progress() if hasattr(pipeline_context, "get_live_progress") else None
-    except Exception:
-        existing_ui = None
-    try:
-        if existing_ui is None and bool(getattr(sys.stderr, "isatty", lambda: False)()):
-            from models import PipelineLiveProgress
-
-            local_progress_ui = PipelineLiveProgress(["screen-shot"], enabled=True)
-            local_progress_ui.start()
-            try:
-                if hasattr(pipeline_context, "set_live_progress"):
-                    pipeline_context.set_live_progress(local_progress_ui)
-            except Exception:
-                pass
-            try:
-                local_progress_ui.begin_pipe(0, total_items=len(url_to_process), items_preview=[u for u, _ in url_to_process])
-            except Exception:
-                pass
-    except Exception:
-        local_progress_ui = None
-
    # ========================================================================
    # OUTPUT DIRECTORY RESOLUTION - Priority chain
    # ========================================================================
@@ -749,6 +854,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
    
    ensure_directory(screenshot_dir)

+    # If the caller isn't running the shared pipeline Live progress UI (e.g. direct
+    # cmdlet execution), start a minimal local pipeline progress panel so this cmdlet
+    # still shows step-level progress.
+    try:
+        progress.ensure_local_ui(
+            label="screen-shot",
+            total_items=len(url_to_process),
+            items_preview=[u for u, _ in url_to_process],
+        )
+    except Exception:
+        pass
+
    # ========================================================================
    # PREPARE SCREENSHOT OPTIONS
    # ========================================================================
@@ -850,7 +967,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
                options.target_selectors = auto_selectors
                debug(f"[screen_shot] Auto selectors matched for url: {auto_selectors}")
            
-            screenshot_result = _capture_screenshot(options)
+            screenshot_result = _capture_screenshot(options, progress)
            
            # Log results and warnings
            debug(f"Screenshot captured to {screenshot_result.path}")
@@ -875,15 +992,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
                capture_date = datetime.now().date().isoformat()

            upstream_title = _clean_title(_extract_item_title(origin_item))
-            display_title = upstream_title or url
+            url_title = _title_from_url(url)
+            display_title = upstream_title or url_title or url

            upstream_tags = _extract_item_tags(origin_item)
            filtered_upstream_tags = [
                t for t in upstream_tags
                if not str(t).strip().lower().startswith(("type:", "date:"))
            ]
+
+            url_tags = _tags_from_url(url)
            merged_tags = unique_preserve_order(
-                ["type:screenshot", f"date:{capture_date}"] + filtered_upstream_tags
+                ["type:screenshot", f"date:{capture_date}"] + filtered_upstream_tags + url_tags
            )

            pipe_obj = create_pipe_object_result(
@@ -910,11 +1030,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
            all_emitted.append(pipe_obj)

            # If we created a local progress UI, advance it per completed item.
-            if local_progress_ui is not None:
-                try:
-                    local_progress_ui.on_emit(0, pipe_obj)
-                except Exception:
-                    pass
+            progress.on_emit(pipe_obj)
            
        except ScreenshotError as exc:
            log(f"Error taking screenshot of {url}: {exc}", file=sys.stderr)
@@ -925,23 +1041,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
            traceback.print_exc(file=sys.stderr)
            exit_code = 1
    
-    try:
-        if local_progress_ui is not None:
-            try:
-                local_progress_ui.finish_pipe(0, force_complete=True)
-            except Exception:
-                pass
-    finally:
-        if local_progress_ui is not None:
-            try:
-                local_progress_ui.stop()
-            except Exception:
-                pass
-            try:
-                if hasattr(pipeline_context, "set_live_progress"):
-                    pipeline_context.set_live_progress(None)
-            except Exception:
-                pass
+    progress.close_local_ui(force_complete=True)

    if not all_emitted:
        log(f"No screenshots were successfully captured", file=sys.stderr)