dfdsf
This commit is contained in:
@@ -525,8 +525,8 @@ def parse_cmdlet_args(args: Sequence[str], cmdlet_spec: Dict[str, Any] | Cmdlet)
|
||||
token_lower = token.lower()
|
||||
|
||||
# Legacy guidance: -hash/--hash was removed in favor of -query "hash:...".
|
||||
# We don't error hard here because some cmdlets also accept free-form args.
|
||||
if token_lower in {"-hash", "--hash"}:
|
||||
# However, some cmdlets may explicitly re-introduce a -hash flag.
|
||||
if token_lower in {"-hash", "--hash"} and token_lower not in arg_spec_map:
|
||||
try:
|
||||
log("Legacy flag -hash is no longer supported. Use: -query \"hash:<sha256>\"", file=sys.stderr)
|
||||
except Exception:
|
||||
|
||||
@@ -3,6 +3,7 @@ from __future__ import annotations
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
import sys
|
||||
import re
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
@@ -25,13 +26,12 @@ class Add_Note(Cmdlet):
|
||||
super().__init__(
|
||||
name="add-note",
|
||||
summary="Add file store note",
|
||||
usage="add-note -store <store> [-query \"hash:<sha256>\"] <name> <text...>",
|
||||
usage="add-note (-query \"title:<title>,text:<text>\") [ -store <store> -hash <sha256> | <piped> ]",
|
||||
alias=[""],
|
||||
arg=[
|
||||
SharedArgs.STORE,
|
||||
CmdletArg("hash", type="string", required=False, description="Target file hash (sha256). When omitted, uses piped item hash."),
|
||||
SharedArgs.QUERY,
|
||||
CmdletArg("name", type="string", required=True, description="The note name/key to set (e.g. 'comment', 'lyric')."),
|
||||
CmdletArg("text", type="string", required=True, description="Note text/content to store.", variadic=True),
|
||||
],
|
||||
detail=[
|
||||
"""
|
||||
@@ -47,6 +47,68 @@ class Add_Note(Cmdlet):
|
||||
pass
|
||||
self.register()
|
||||
|
||||
@staticmethod
|
||||
def _commas_to_spaces_outside_quotes(text: str) -> str:
|
||||
buf: List[str] = []
|
||||
quote: Optional[str] = None
|
||||
escaped = False
|
||||
for ch in str(text or ""):
|
||||
if escaped:
|
||||
buf.append(ch)
|
||||
escaped = False
|
||||
continue
|
||||
if ch == "\\" and quote is not None:
|
||||
buf.append(ch)
|
||||
escaped = True
|
||||
continue
|
||||
if ch in ('"', "'"):
|
||||
if quote is None:
|
||||
quote = ch
|
||||
elif quote == ch:
|
||||
quote = None
|
||||
buf.append(ch)
|
||||
continue
|
||||
if ch == "," and quote is None:
|
||||
buf.append(" ")
|
||||
continue
|
||||
buf.append(ch)
|
||||
return "".join(buf)
|
||||
|
||||
@staticmethod
|
||||
def _parse_note_query(query: str) -> Tuple[Optional[str], Optional[str]]:
|
||||
"""Parse note payload from -query.
|
||||
|
||||
Expected:
|
||||
title:<title>,text:<text>
|
||||
Commas are treated as separators when not inside quotes.
|
||||
"""
|
||||
raw = str(query or "").strip()
|
||||
if not raw:
|
||||
return None, None
|
||||
|
||||
try:
|
||||
from cli_syntax import parse_query, get_field
|
||||
except Exception:
|
||||
parse_query = None # type: ignore
|
||||
get_field = None # type: ignore
|
||||
|
||||
normalized = Add_Note._commas_to_spaces_outside_quotes(raw)
|
||||
|
||||
if callable(parse_query) and callable(get_field):
|
||||
parsed = parse_query(normalized)
|
||||
name = get_field(parsed, "title")
|
||||
text = get_field(parsed, "text")
|
||||
name_s = str(name or "").strip() if name is not None else ""
|
||||
text_s = str(text or "").strip() if text is not None else ""
|
||||
return (name_s or None, text_s or None)
|
||||
|
||||
# Fallback: best-effort regex.
|
||||
name_match = re.search(r"\btitle\s*:\s*([^,\s]+)", normalized, flags=re.IGNORECASE)
|
||||
text_match = re.search(r"\btext\s*:\s*(.+)$", normalized, flags=re.IGNORECASE)
|
||||
note_name = (name_match.group(1).strip() if name_match else "")
|
||||
note_text = (text_match.group(1).strip() if text_match else "")
|
||||
return (note_name or None, note_text or None)
|
||||
|
||||
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
|
||||
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
|
||||
if resolved:
|
||||
@@ -72,32 +134,42 @@ class Add_Note(Cmdlet):
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
store_override = parsed.get("store")
|
||||
query_hash = sh.parse_single_hash_query(parsed.get("query"))
|
||||
if parsed.get("query") and not query_hash:
|
||||
log("[add_note] Error: -query must be of the form hash:<sha256>", file=sys.stderr)
|
||||
return 1
|
||||
note_name = str(parsed.get("name") or "").strip()
|
||||
text_parts = parsed.get("text")
|
||||
|
||||
if not note_name:
|
||||
log("[add_note] Error: Requires <name>", file=sys.stderr)
|
||||
hash_override = normalize_hash(parsed.get("hash"))
|
||||
note_name, note_text = self._parse_note_query(str(parsed.get("query") or ""))
|
||||
if not note_name or not note_text:
|
||||
log("[add_note] Error: -query must include title:<title> and text:<text>", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if isinstance(text_parts, list):
|
||||
note_text = " ".join([str(p) for p in text_parts]).strip()
|
||||
else:
|
||||
note_text = str(text_parts or "").strip()
|
||||
if hash_override and not store_override:
|
||||
log("[add_note] Error: -hash requires -store <store>", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Note text can be omitted when upstream stages provide it (e.g. download-media --write-sub
|
||||
# attaches notes.sub). In that case we resolve per-item below.
|
||||
user_provided_text = bool(note_text)
|
||||
explicit_target = bool(hash_override and store_override)
|
||||
|
||||
results = normalize_result_input(result)
|
||||
if results and explicit_target:
|
||||
# Direct targeting mode: apply note once to the explicit target and
|
||||
# pass through any piped items unchanged.
|
||||
try:
|
||||
store_registry = Store(config)
|
||||
backend = store_registry[str(store_override)]
|
||||
ok = bool(backend.set_note(str(hash_override), note_name, note_text, config=config))
|
||||
if ok:
|
||||
ctx.print_if_visible(f"✓ add-note: 1 item in '{store_override}'", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
log(f"[add_note] Error: Failed to set note: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
for res in results:
|
||||
ctx.emit(res)
|
||||
return 0
|
||||
|
||||
if not results:
|
||||
if store_override and query_hash:
|
||||
results = [{"store": str(store_override), "hash": query_hash}]
|
||||
if explicit_target:
|
||||
# Allow standalone use (no piped input) and enable piping the target forward.
|
||||
results = [{"store": str(store_override), "hash": hash_override}]
|
||||
else:
|
||||
log("[add_note] Error: Requires piped item(s) or -store and -query \"hash:<sha256>\"", file=sys.stderr)
|
||||
log("[add_note] Error: Requires piped item(s) from add-file, or explicit -store <store> and -hash <sha256>", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
store_registry = Store(config)
|
||||
@@ -106,55 +178,12 @@ class Add_Note(Cmdlet):
|
||||
# Batch write plan: store -> [(hash, name, text), ...]
|
||||
note_ops: Dict[str, List[Tuple[str, str, str]]] = {}
|
||||
|
||||
# Optional global fallback for note text from pipeline values.
|
||||
# Allows patterns like: ... | add-note sub
|
||||
pipeline_default_text = None
|
||||
if not user_provided_text:
|
||||
try:
|
||||
pipeline_default_text = ctx.load_value(note_name)
|
||||
except Exception:
|
||||
pipeline_default_text = None
|
||||
if isinstance(pipeline_default_text, list):
|
||||
pipeline_default_text = " ".join([str(x) for x in pipeline_default_text]).strip()
|
||||
elif pipeline_default_text is not None:
|
||||
pipeline_default_text = str(pipeline_default_text).strip()
|
||||
|
||||
for res in results:
|
||||
if not isinstance(res, dict):
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
# Resolve note text for this item when not provided explicitly.
|
||||
item_note_text = note_text
|
||||
if not user_provided_text:
|
||||
# Prefer item-scoped notes dict.
|
||||
candidate = None
|
||||
try:
|
||||
notes = res.get("notes")
|
||||
if isinstance(notes, dict):
|
||||
candidate = notes.get(note_name)
|
||||
except Exception:
|
||||
candidate = None
|
||||
|
||||
# Also allow direct field fallback: res["sub"], etc.
|
||||
if candidate is None:
|
||||
try:
|
||||
candidate = res.get(note_name)
|
||||
except Exception:
|
||||
candidate = None
|
||||
|
||||
if candidate is None:
|
||||
candidate = pipeline_default_text
|
||||
|
||||
if isinstance(candidate, list):
|
||||
item_note_text = " ".join([str(x) for x in candidate]).strip()
|
||||
else:
|
||||
item_note_text = str(candidate or "").strip()
|
||||
|
||||
if not item_note_text:
|
||||
log(f"[add_note] Warning: No note text found for '{note_name}'; skipping", file=sys.stderr)
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
store_name = str(store_override or res.get("store") or "").strip()
|
||||
raw_hash = res.get("hash")
|
||||
@@ -167,7 +196,7 @@ class Add_Note(Cmdlet):
|
||||
resolved_hash = self._resolve_hash(
|
||||
raw_hash=str(raw_hash) if raw_hash else None,
|
||||
raw_path=str(raw_path) if raw_path else None,
|
||||
override_hash=str(query_hash) if query_hash else None,
|
||||
override_hash=str(hash_override) if hash_override else None,
|
||||
)
|
||||
if not resolved_hash:
|
||||
log("[add_note] Warning: Item missing usable hash; skipping", file=sys.stderr)
|
||||
|
||||
@@ -254,6 +254,22 @@ def list_formats(
|
||||
return None
|
||||
|
||||
formats = info.get("formats") or []
|
||||
|
||||
# Some URLs (notably playlist contexts) yield a playlist-shaped payload with
|
||||
# `entries` rather than a direct video payload. If so, try to pull formats
|
||||
# from the first concrete entry.
|
||||
if (not formats) and isinstance(info.get("entries"), list):
|
||||
try:
|
||||
for entry in info.get("entries") or []:
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
entry_formats = entry.get("formats")
|
||||
if isinstance(entry_formats, list) and entry_formats:
|
||||
formats = entry_formats
|
||||
break
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not isinstance(formats, list) or not formats:
|
||||
log("No formats available", file=sys.stderr)
|
||||
return None
|
||||
@@ -704,7 +720,30 @@ def download_media(
|
||||
session_id = None
|
||||
first_section_info = {}
|
||||
if ytdl_options.get("download_sections"):
|
||||
session_id, first_section_info = _download_with_sections_via_cli(opts.url, ytdl_options, ytdl_options.get("download_sections", []), quiet=opts.quiet)
|
||||
# The CLI path emits yt-dlp's own progress output; pause the pipeline Live UI
|
||||
# so those progress bars remain visible instead of being clobbered.
|
||||
try:
|
||||
from contextlib import nullcontext
|
||||
except Exception:
|
||||
nullcontext = None # type: ignore
|
||||
|
||||
suspend = getattr(pipeline_context, "suspend_live_progress", None)
|
||||
cm = suspend() if callable(suspend) else (nullcontext() if nullcontext else None)
|
||||
if cm is None:
|
||||
session_id, first_section_info = _download_with_sections_via_cli(
|
||||
opts.url,
|
||||
ytdl_options,
|
||||
ytdl_options.get("download_sections", []),
|
||||
quiet=opts.quiet,
|
||||
)
|
||||
else:
|
||||
with cm:
|
||||
session_id, first_section_info = _download_with_sections_via_cli(
|
||||
opts.url,
|
||||
ytdl_options,
|
||||
ytdl_options.get("download_sections", []),
|
||||
quiet=opts.quiet,
|
||||
)
|
||||
info = None
|
||||
else:
|
||||
with yt_dlp.YoutubeDL(ytdl_options) as ydl: # type: ignore[arg-type]
|
||||
@@ -1384,21 +1423,50 @@ class Download_Media(Cmdlet):
|
||||
item["title"] = item.get("name") or item.get("target") or item.get("path") or "Result"
|
||||
|
||||
# Keep the full payload for history/inspection, but display a focused table.
|
||||
display_row = {
|
||||
"title": item.get("title"),
|
||||
"store": item.get("store"),
|
||||
"hash": item.get("hash") or item.get("file_hash") or item.get("sha256"),
|
||||
}
|
||||
# Use shared extractors so Ext/Size/Store/Hash remain consistent everywhere.
|
||||
try:
|
||||
from result_table import build_display_row
|
||||
except Exception:
|
||||
build_display_row = None # type: ignore
|
||||
|
||||
if callable(build_display_row):
|
||||
display_row = build_display_row(item, keys=["title", "store", "hash", "ext", "size"])
|
||||
else:
|
||||
display_row = {
|
||||
"title": item.get("title"),
|
||||
"store": item.get("store"),
|
||||
"hash": item.get("hash") or item.get("file_hash") or item.get("sha256"),
|
||||
"ext": str(item.get("ext") or ""),
|
||||
"size": item.get("size") or item.get("size_bytes"),
|
||||
}
|
||||
table.add_result(display_row)
|
||||
results_list.append(item)
|
||||
|
||||
pipeline_context.set_current_stage_table(table)
|
||||
pipeline_context.set_last_result_table(table, results_list)
|
||||
|
||||
get_stderr_console().print(table)
|
||||
setattr(table, "_rendered_by_cmdlet", True)
|
||||
if not Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()):
|
||||
return False
|
||||
try:
|
||||
from contextlib import nullcontext
|
||||
except Exception:
|
||||
nullcontext = None # type: ignore
|
||||
|
||||
suspend = getattr(pipeline_context, "suspend_live_progress", None)
|
||||
cm = suspend() if callable(suspend) else (nullcontext() if nullcontext else None)
|
||||
if cm is None:
|
||||
get_stderr_console().print(table)
|
||||
setattr(table, "_rendered_by_cmdlet", True)
|
||||
if not Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()):
|
||||
return False
|
||||
else:
|
||||
with cm:
|
||||
get_stderr_console().print(table)
|
||||
setattr(table, "_rendered_by_cmdlet", True)
|
||||
if not Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()):
|
||||
try:
|
||||
pipeline_context.request_pipeline_stop(reason="duplicate-url declined", exit_code=0)
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
return True
|
||||
|
||||
def _preflight_url_duplicates_bulk(urls: Sequence[str]) -> bool:
|
||||
@@ -1597,15 +1665,45 @@ class Download_Media(Cmdlet):
|
||||
hit = backend_hits[0]
|
||||
title = hit.get("title") or hit.get("name") or hit.get("target") or hit.get("path") or "(exists)"
|
||||
file_hash = hit.get("hash") or hit.get("file_hash") or hit.get("sha256") or ""
|
||||
|
||||
try:
|
||||
from result_table import build_display_row
|
||||
except Exception:
|
||||
build_display_row = None # type: ignore
|
||||
|
||||
extracted = {
|
||||
"title": str(title),
|
||||
"store": str(hit.get("store") or backend_name),
|
||||
"hash": str(file_hash or ""),
|
||||
"ext": "",
|
||||
"size": None,
|
||||
}
|
||||
if callable(build_display_row):
|
||||
try:
|
||||
extracted = build_display_row(hit, keys=["title", "store", "hash", "ext", "size"])
|
||||
except Exception:
|
||||
pass
|
||||
# Ensure we still prefer the precomputed values for title/store/hash.
|
||||
extracted["title"] = str(title)
|
||||
extracted["store"] = str(hit.get("store") or backend_name)
|
||||
extracted["hash"] = str(file_hash or "")
|
||||
|
||||
ext = extracted.get("ext")
|
||||
size_val = extracted.get("size")
|
||||
|
||||
display_row = {
|
||||
"title": str(title),
|
||||
"store": str(hit.get("store") or backend_name),
|
||||
"hash": str(file_hash or ""),
|
||||
"ext": str(ext or ""),
|
||||
"size": size_val,
|
||||
"url": original_url,
|
||||
"columns": [
|
||||
("Title", str(title)),
|
||||
("Store", str(hit.get("store") or backend_name)),
|
||||
("Hash", str(file_hash or "")),
|
||||
("Ext", str(ext or "")),
|
||||
("Size", size_val),
|
||||
("URL", original_url),
|
||||
],
|
||||
}
|
||||
@@ -1615,7 +1713,8 @@ class Download_Media(Cmdlet):
|
||||
debug("Bulk URL preflight: no matches")
|
||||
return True
|
||||
|
||||
table = ResultTable(f"URL already exists ({len(matched_urls)} url(s))")
|
||||
# This table is non-interactive and intentionally wide (we want URL + ext/size).
|
||||
table = ResultTable(f"URL already exists ({len(matched_urls)} url(s))", max_columns=10)
|
||||
table.set_no_choice(True)
|
||||
try:
|
||||
table.set_preserve_order(True)
|
||||
@@ -1777,7 +1876,10 @@ class Download_Media(Cmdlet):
|
||||
table = ResultTable()
|
||||
safe_url = str(url or "").strip()
|
||||
table.title = f'download-media -url "{safe_url}"' if safe_url else "download-media"
|
||||
table.set_source_command("download-media", [url])
|
||||
# Selection tables should expand '@N' into a runnable command.
|
||||
# For playlist-item rows we prefer the concrete per-item URL so the
|
||||
# expanded command targets a single video (not the whole playlist).
|
||||
table.set_source_command("download-media", [])
|
||||
try:
|
||||
table.set_preserve_order(True)
|
||||
except Exception:
|
||||
@@ -1803,6 +1905,9 @@ class Download_Media(Cmdlet):
|
||||
"detail": str(uploader or ""),
|
||||
"media_kind": "playlist-item",
|
||||
"playlist_index": idx,
|
||||
# Enable '@N' expansion into a concrete command.
|
||||
# Prefer selecting the resolved per-item URL when available.
|
||||
"_selection_args": (["-url", str(entry_url)] if entry_url else ["-url", str(url), "-item", str(idx)]),
|
||||
# Critical for normal @ selection piping: downstream cmdlets
|
||||
# (including download-media itself) look for url/target.
|
||||
"url": entry_url,
|
||||
|
||||
@@ -6,7 +6,6 @@ Playwright, marking them as temporary artifacts for cleanup.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import hashlib
|
||||
import sys
|
||||
import time
|
||||
@@ -32,6 +31,22 @@ get_field = sh.get_field
|
||||
parse_cmdlet_args = sh.parse_cmdlet_args
|
||||
import pipeline as pipeline_context
|
||||
|
||||
|
||||
def _set_live_step(text: str) -> None:
|
||||
"""Best-effort update to the pipeline Live progress title (if enabled)."""
|
||||
try:
|
||||
ui = pipeline_context.get_live_progress() if hasattr(pipeline_context, "get_live_progress") else None
|
||||
except Exception:
|
||||
ui = None
|
||||
if ui is None:
|
||||
return
|
||||
try:
|
||||
setter = getattr(ui, "set_active_subtask_text", None)
|
||||
if callable(setter):
|
||||
setter(str(text or "").strip())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ============================================================================
|
||||
# CMDLET Metadata Declaration
|
||||
# ============================================================================
|
||||
@@ -65,7 +80,7 @@ USER_AGENT = (
|
||||
"Chrome/120.0.0.0 Safari/537.36"
|
||||
)
|
||||
|
||||
DEFAULT_VIEWPORT: dict[str, int] = {"width": 1280, "height": 1200}
|
||||
DEFAULT_VIEWPORT: dict[str, int] = {"width": 1920, "height": 1080}
|
||||
ARCHIVE_TIMEOUT = 30.0
|
||||
|
||||
# Configurable selectors for specific websites
|
||||
@@ -114,7 +129,7 @@ class ScreenshotOptions:
|
||||
output_path: Optional[Path] = None
|
||||
full_page: bool = True
|
||||
headless: bool = True
|
||||
wait_after_load: float = 2.0
|
||||
wait_after_load: float = 6.0
|
||||
wait_for_article: bool = False
|
||||
replace_video_posters: bool = True
|
||||
tag: Sequence[str] = ()
|
||||
@@ -156,13 +171,13 @@ def _slugify_url(url: str) -> str:
|
||||
def _normalise_format(fmt: Optional[str]) -> str:
|
||||
"""Normalize output format to valid values."""
|
||||
if not fmt:
|
||||
return "png"
|
||||
return "webp"
|
||||
value = fmt.strip().lower()
|
||||
if value in {"jpg", "jpeg"}:
|
||||
return "jpeg"
|
||||
if value in {"png", "pdf"}:
|
||||
if value in {"png", "pdf", "webp"}:
|
||||
return value
|
||||
return "png"
|
||||
return "webp"
|
||||
|
||||
|
||||
def _format_suffix(fmt: str) -> str:
|
||||
@@ -172,6 +187,15 @@ def _format_suffix(fmt: str) -> str:
|
||||
return f".{fmt}"
|
||||
|
||||
|
||||
def _convert_to_webp(source_path: Path, dest_path: Path) -> None:
|
||||
"""Convert an image file to WebP using Pillow."""
|
||||
from PIL import Image
|
||||
|
||||
with Image.open(source_path) as img:
|
||||
# Keep a sensible default: good quality + small size.
|
||||
img.save(dest_path, format="WEBP", quality=100, method=6)
|
||||
|
||||
|
||||
def _selectors_for_url(url: str) -> List[str]:
|
||||
"""Return a list of likely content selectors for known platforms."""
|
||||
u = url.lower()
|
||||
@@ -184,6 +208,19 @@ def _selectors_for_url(url: str) -> List[str]:
|
||||
return sels or ["article"]
|
||||
|
||||
|
||||
def _matched_site_selectors(url: str) -> List[str]:
|
||||
"""Return SITE_SELECTORS for a matched domain; empty if no match.
|
||||
|
||||
Unlike `_selectors_for_url()`, this does not return a generic fallback.
|
||||
"""
|
||||
u = str(url or "").lower()
|
||||
sels: List[str] = []
|
||||
for domain, selectors in SITE_SELECTORS.items():
|
||||
if domain in u:
|
||||
sels.extend(selectors)
|
||||
return sels
|
||||
|
||||
|
||||
def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000) -> None:
|
||||
"""Best-effort page tweaks for popular platforms before capture."""
|
||||
u = url.lower()
|
||||
@@ -322,6 +359,10 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
|
||||
"""Capture screenshot using Playwright."""
|
||||
debug(f"[_capture] Starting capture for {options.url} -> {destination}")
|
||||
try:
|
||||
# Two-phase Live progress:
|
||||
# 1) load + stabilize (ends right after the wait_after_load sleep)
|
||||
# 2) capture + save (and any post-processing)
|
||||
_set_live_step("screen-shot: loading")
|
||||
tool = options.playwright_tool or PlaywrightTool({})
|
||||
|
||||
# Ensure Chromium engine is used for the screen-shot cmdlet (force for consistency)
|
||||
@@ -329,7 +370,18 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
|
||||
current_browser = getattr(tool.defaults, "browser", "").lower() if getattr(tool, "defaults", None) is not None else ""
|
||||
if current_browser != "chromium":
|
||||
debug(f"[_capture] Overriding Playwright browser '{current_browser}' -> 'chromium' for screen-shot cmdlet")
|
||||
tool = PlaywrightTool({"tool": {"playwright": {"browser": "chromium"}}})
|
||||
base_cfg = {}
|
||||
try:
|
||||
base_cfg = dict(getattr(tool, "_config", {}) or {})
|
||||
except Exception:
|
||||
base_cfg = {}
|
||||
tool_block = dict(base_cfg.get("tool") or {}) if isinstance(base_cfg, dict) else {}
|
||||
pw_block = dict(tool_block.get("playwright") or {}) if isinstance(tool_block, dict) else {}
|
||||
pw_block["browser"] = "chromium"
|
||||
tool_block["playwright"] = pw_block
|
||||
if isinstance(base_cfg, dict):
|
||||
base_cfg["tool"] = tool_block
|
||||
tool = PlaywrightTool(base_cfg)
|
||||
except Exception:
|
||||
tool = PlaywrightTool({"tool": {"playwright": {"browser": "chromium"}}})
|
||||
|
||||
@@ -366,6 +418,9 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
|
||||
if options.wait_after_load > 0:
|
||||
debug(f"Waiting {options.wait_after_load}s for page stabilization...")
|
||||
time.sleep(min(10.0, max(0.0, options.wait_after_load)))
|
||||
|
||||
# Phase 2 begins here (per request).
|
||||
_set_live_step("screen-shot: capturing")
|
||||
if options.replace_video_posters:
|
||||
debug("Replacing video elements with posters...")
|
||||
page.evaluate(
|
||||
@@ -384,6 +439,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
|
||||
# Attempt platform-specific target capture if requested (and not PDF)
|
||||
element_captured = False
|
||||
if options.prefer_platform_target and format_name != "pdf":
|
||||
debug(f"[_capture] Target capture enabled")
|
||||
debug("Attempting platform-specific content capture...")
|
||||
try:
|
||||
_platform_preprocess(options.url, page, warnings)
|
||||
@@ -393,7 +449,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
|
||||
selectors = list(options.target_selectors or [])
|
||||
if not selectors:
|
||||
selectors = _selectors_for_url(options.url)
|
||||
|
||||
|
||||
debug(f"[_capture] Trying selectors: {selectors}")
|
||||
for sel in selectors:
|
||||
try:
|
||||
@@ -459,14 +515,36 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
|
||||
def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
|
||||
"""Capture a screenshot for the given options."""
|
||||
debug(f"[_capture_screenshot] Preparing capture for {options.url}")
|
||||
requested_format = _normalise_format(options.output_format)
|
||||
destination = _prepare_output_path(options)
|
||||
warnings: List[str] = []
|
||||
_capture(options, destination, warnings)
|
||||
|
||||
# Playwright screenshots do not natively support WebP output.
|
||||
# Capture as PNG, then convert via Pillow.
|
||||
capture_path = destination
|
||||
if requested_format == "webp":
|
||||
capture_path = unique_path(destination.with_suffix(".png"))
|
||||
debug(f"[_capture_screenshot] Requested webp; capturing intermediate png -> {capture_path}")
|
||||
options.output_format = "png"
|
||||
_capture(options, capture_path, warnings)
|
||||
|
||||
if requested_format == "webp":
|
||||
debug(f"[_capture_screenshot] Converting png -> webp: {destination}")
|
||||
try:
|
||||
_convert_to_webp(capture_path, destination)
|
||||
try:
|
||||
capture_path.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as exc:
|
||||
warnings.append(f"webp conversion failed; keeping png: {exc}")
|
||||
destination = capture_path
|
||||
|
||||
# Build URL list from captured url and any archives
|
||||
url: List[str] = [options.url] if options.url else []
|
||||
archive_url: List[str] = []
|
||||
if options.archive and options.url:
|
||||
_set_live_step("screen-shot: archiving")
|
||||
debug(f"[_capture_screenshot] Archiving enabled for {options.url}")
|
||||
archives, archive_warnings = _archive_url(options.url, options.archive_timeout)
|
||||
archive_url.extend(archives)
|
||||
@@ -538,7 +616,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
except Exception:
|
||||
pass
|
||||
if not format_value:
|
||||
format_value = "png"
|
||||
format_value = "webp"
|
||||
storage_value = parsed.get("storage")
|
||||
selector_arg = parsed.get("selector")
|
||||
selectors = [selector_arg] if selector_arg else []
|
||||
@@ -549,27 +627,27 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
positional_url = [str(url_arg)] if url_arg else []
|
||||
|
||||
# ========================================================================
|
||||
# INPUT PROCESSING - Extract url from pipeline or command arguments
|
||||
# INPUT PROCESSING - Extract url from command args or pipeline
|
||||
# ========================================================================
|
||||
|
||||
piped_results = normalize_result_input(result)
|
||||
url_to_process: List[Tuple[str, Any]] = []
|
||||
|
||||
# Extract url from piped results
|
||||
if piped_results:
|
||||
for item in piped_results:
|
||||
url = (
|
||||
get_field(item, 'path')
|
||||
or get_field(item, 'url')
|
||||
or get_field(item, 'target')
|
||||
)
|
||||
|
||||
if url:
|
||||
url_to_process.append((str(url), item))
|
||||
|
||||
# Use positional arguments if no pipeline input
|
||||
if not url_to_process and positional_url:
|
||||
# If the user provided an explicit URL argument, prefer it.
|
||||
url_to_process: List[Tuple[str, Any]] = []
|
||||
if positional_url:
|
||||
url_to_process = [(u, None) for u in positional_url]
|
||||
else:
|
||||
piped_results = normalize_result_input(result)
|
||||
|
||||
# Extract url from piped results
|
||||
if piped_results:
|
||||
for item in piped_results:
|
||||
url = (
|
||||
get_field(item, 'path')
|
||||
or get_field(item, 'url')
|
||||
or get_field(item, 'target')
|
||||
)
|
||||
|
||||
if url:
|
||||
url_to_process.append((str(url), item))
|
||||
|
||||
if not url_to_process:
|
||||
log(f"No url to process for screen-shot cmdlet", file=sys.stderr)
|
||||
@@ -577,6 +655,32 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
|
||||
debug(f"[_run] url to process: {[u for u, _ in url_to_process]}")
|
||||
|
||||
# If the caller isn't running the shared pipeline Live progress UI (e.g. direct
|
||||
# cmdlet execution), start a minimal local pipeline progress panel so this cmdlet
|
||||
# still shows step-level progress.
|
||||
local_progress_ui = None
|
||||
try:
|
||||
existing_ui = pipeline_context.get_live_progress() if hasattr(pipeline_context, "get_live_progress") else None
|
||||
except Exception:
|
||||
existing_ui = None
|
||||
try:
|
||||
if existing_ui is None and bool(getattr(sys.stderr, "isatty", lambda: False)()):
|
||||
from models import PipelineLiveProgress
|
||||
|
||||
local_progress_ui = PipelineLiveProgress(["screen-shot"], enabled=True)
|
||||
local_progress_ui.start()
|
||||
try:
|
||||
if hasattr(pipeline_context, "set_live_progress"):
|
||||
pipeline_context.set_live_progress(local_progress_ui)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
local_progress_ui.begin_pipe(0, total_items=len(url_to_process), items_preview=[u for u, _ in url_to_process])
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
local_progress_ui = None
|
||||
|
||||
# ========================================================================
|
||||
# OUTPUT DIRECTORY RESOLUTION - Priority chain
|
||||
# ========================================================================
|
||||
@@ -621,7 +725,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
|
||||
format_name = _normalise_format(format_value)
|
||||
filtered_selectors = [str(s).strip() for s in selectors if str(s).strip()]
|
||||
target_selectors = filtered_selectors if filtered_selectors else None
|
||||
manual_target_selectors = filtered_selectors if filtered_selectors else None
|
||||
|
||||
all_emitted = []
|
||||
exit_code = 0
|
||||
@@ -664,6 +768,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
continue
|
||||
|
||||
try:
|
||||
_set_live_step("screen-shot: starting")
|
||||
# Create screenshot with provided options
|
||||
# Force the Playwright engine to Chromium for the screen-shot cmdlet
|
||||
# (this ensures consistent rendering and supports PDF output requirements).
|
||||
@@ -672,23 +777,49 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
tool_block = dict(config.get("tool") or {})
|
||||
pw_block = dict(tool_block.get("playwright") or {})
|
||||
pw_block["browser"] = "chromium"
|
||||
# Use Playwright-native UA/headers (matches bundled Chromium version).
|
||||
pw_block["user_agent"] = "native"
|
||||
pw_block["viewport_width"] = int(DEFAULT_VIEWPORT.get("width", 1920))
|
||||
pw_block["viewport_height"] = int(DEFAULT_VIEWPORT.get("height", 1080))
|
||||
tool_block["playwright"] = pw_block
|
||||
pw_local_cfg = dict(config)
|
||||
pw_local_cfg["tool"] = tool_block
|
||||
else:
|
||||
pw_local_cfg = {"tool": {"playwright": {"browser": "chromium"}}}
|
||||
pw_local_cfg = {
|
||||
"tool": {
|
||||
"playwright": {
|
||||
"browser": "chromium",
|
||||
"user_agent": "native",
|
||||
"viewport_width": int(DEFAULT_VIEWPORT.get("width", 1920)),
|
||||
"viewport_height": int(DEFAULT_VIEWPORT.get("height", 1080)),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
options = ScreenshotOptions(
|
||||
url=url,
|
||||
output_dir=screenshot_dir,
|
||||
output_format=format_name,
|
||||
archive=archive_enabled,
|
||||
target_selectors=target_selectors,
|
||||
target_selectors=None,
|
||||
prefer_platform_target=False,
|
||||
wait_for_article=False,
|
||||
full_page=True,
|
||||
playwright_tool=PlaywrightTool(pw_local_cfg),
|
||||
)
|
||||
|
||||
# Auto element capture for known sites (x.com/twitter/etc.).
|
||||
# - If the user provided --selector, treat that as an explicit target.
|
||||
# - Otherwise, if SITE_SELECTORS matches the URL, auto-capture the post/content element.
|
||||
auto_selectors = _matched_site_selectors(url)
|
||||
if manual_target_selectors:
|
||||
options.prefer_platform_target = True
|
||||
options.target_selectors = manual_target_selectors
|
||||
debug(f"[screen_shot] Using explicit selector(s): {manual_target_selectors}")
|
||||
elif auto_selectors:
|
||||
options.prefer_platform_target = True
|
||||
options.target_selectors = auto_selectors
|
||||
debug(f"[screen_shot] Auto selectors matched for url: {auto_selectors}")
|
||||
|
||||
screenshot_result = _capture_screenshot(options)
|
||||
|
||||
@@ -748,6 +879,13 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Emit the result so downstream cmdlet (like add-file) can use it
|
||||
pipeline_context.emit(pipe_obj)
|
||||
all_emitted.append(pipe_obj)
|
||||
|
||||
# If we created a local progress UI, advance it per completed item.
|
||||
if local_progress_ui is not None:
|
||||
try:
|
||||
local_progress_ui.on_emit(0, pipe_obj)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
except ScreenshotError as exc:
|
||||
log(f"Error taking screenshot of {url}: {exc}", file=sys.stderr)
|
||||
@@ -758,13 +896,31 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
exit_code = 1
|
||||
|
||||
try:
|
||||
if local_progress_ui is not None:
|
||||
try:
|
||||
local_progress_ui.finish_pipe(0, force_complete=True)
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
if local_progress_ui is not None:
|
||||
try:
|
||||
local_progress_ui.stop()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if hasattr(pipeline_context, "set_live_progress"):
|
||||
pipeline_context.set_live_progress(None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if not all_emitted:
|
||||
log(f"No screenshots were successfully captured", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
# Log completion message (keep this as normal output)
|
||||
log(f"✓ Successfully captured {len(all_emitted)} screenshot(s)")
|
||||
|
||||
|
||||
return exit_code
|
||||
CMDLET = Cmdlet(
|
||||
name="screen-shot",
|
||||
@@ -773,7 +929,7 @@ CMDLET = Cmdlet(
|
||||
alias=["screenshot", "ss"],
|
||||
arg=[
|
||||
SharedArgs.URL,
|
||||
CmdletArg(name="format", type="string", description="Output format: png, jpeg, or pdf"),
|
||||
CmdletArg(name="format", type="string", description="Output format: webp, png, jpeg, or pdf"),
|
||||
CmdletArg(name="selector", type="string", description="CSS selector for element capture"),
|
||||
|
||||
],
|
||||
|
||||
Reference in New Issue
Block a user