This commit is contained in:
nose
2025-12-11 12:47:30 -08:00
parent 6b05dc5552
commit 65d12411a2
92 changed files with 17447 additions and 14308 deletions

View File

@@ -1,4 +1,4 @@
"""Screen-shot cmdlet for capturing screenshots of URLs in a pipeline.
"""Screen-shot cmdlet for capturing screenshots of url in a pipeline.
This cmdlet processes files through the pipeline and creates screenshots using
Playwright, marking them as temporary artifacts for cleanup.
@@ -23,7 +23,7 @@ from helper.http_client import HTTPClient
from helper.utils import ensure_directory, unique_path, unique_preserve_order
from . import register
from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input
from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input, should_show_help, get_field
import models
import pipeline as pipeline_context
@@ -113,8 +113,8 @@ class ScreenshotError(RuntimeError):
class ScreenshotOptions:
"""Options controlling screenshot capture and post-processing."""
url: str
output_dir: Path
url: Sequence[str] = ()
output_path: Optional[Path] = None
full_page: bool = True
headless: bool = True
@@ -124,7 +124,7 @@ class ScreenshotOptions:
tags: Sequence[str] = ()
archive: bool = False
archive_timeout: float = ARCHIVE_TIMEOUT
known_urls: Sequence[str] = ()
url: Sequence[str] = ()
output_format: Optional[str] = None
prefer_platform_target: bool = False
target_selectors: Optional[Sequence[str]] = None
@@ -136,10 +136,9 @@ class ScreenshotResult:
"""Details about the captured screenshot."""
path: Path
url: str
tags_applied: List[str]
archive_urls: List[str]
known_urls: List[str]
archive_url: List[str]
url: List[str]
warnings: List[str] = field(default_factory=list)
@@ -471,24 +470,24 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
warnings: List[str] = []
_capture(options, destination, warnings)
known_urls = unique_preserve_order([options.url, *options.known_urls])
archive_urls: List[str] = []
# Build URL list from provided options.url (sequence) and deduplicate
url = unique_preserve_order(list(options.url))
archive_url: List[str] = []
if options.archive:
debug(f"[_capture_screenshot] Archiving enabled for {options.url}")
archives, archive_warnings = _archive_url(options.url, options.archive_timeout)
archive_urls.extend(archives)
archive_url.extend(archives)
warnings.extend(archive_warnings)
if archives:
known_urls = unique_preserve_order([*known_urls, *archives])
url = unique_preserve_order([*url, *archives])
applied_tags = unique_preserve_order(list(tag for tag in options.tags if tag.strip()))
return ScreenshotResult(
path=destination,
url=options.url,
tags_applied=applied_tags,
archive_urls=archive_urls,
known_urls=known_urls,
archive_url=archive_url,
url=url,
warnings=warnings,
)
@@ -498,10 +497,10 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
# ============================================================================
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Take screenshots of URLs in the pipeline.
"""Take screenshots of url in the pipeline.
Accepts:
- Single result object (dict or PipeObject) with 'file_path' field
- Single result object (dict or PipeObject) with 'path' field
- List of result objects to screenshot each
- Direct URL as string
@@ -518,12 +517,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
debug(f"[_run] screen-shot invoked with args: {args}")
# Help check
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
if should_show_help(args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
# ========================================================================
# ARGUMENT PARSING
@@ -539,36 +535,36 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Positional URL argument (if provided)
url_arg = parsed.get("url")
positional_urls = [str(url_arg)] if url_arg else []
positional_url = [str(url_arg)] if url_arg else []
# ========================================================================
# INPUT PROCESSING - Extract URLs from pipeline or command arguments
# INPUT PROCESSING - Extract url from pipeline or command arguments
# ========================================================================
piped_results = normalize_result_input(result)
urls_to_process = []
url_to_process = []
# Extract URLs from piped results
# Extract url from piped results
if piped_results:
for item in piped_results:
url = None
if isinstance(item, dict):
url = item.get('file_path') or item.get('path') or item.get('url') or item.get('target')
else:
url = getattr(item, 'file_path', None) or getattr(item, 'path', None) or getattr(item, 'url', None) or getattr(item, 'target', None)
url = (
get_field(item, 'path')
or get_field(item, 'url')
or get_field(item, 'target')
)
if url:
urls_to_process.append(str(url))
url_to_process.append(str(url))
# Use positional arguments if no pipeline input
if not urls_to_process and positional_urls:
urls_to_process = positional_urls
if not url_to_process and positional_url:
url_to_process = positional_url
if not urls_to_process:
log(f"No URLs to process for screen-shot cmdlet", file=sys.stderr)
if not url_to_process:
log(f"No url to process for screen-shot cmdlet", file=sys.stderr)
return 1
debug(f"[_run] URLs to process: {urls_to_process}")
debug(f"[_run] url to process: {url_to_process}")
# ========================================================================
# OUTPUT DIRECTORY RESOLUTION - Priority chain
@@ -619,10 +615,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
all_emitted = []
exit_code = 0
# ========================================================================
# PROCESS URLs AND CAPTURE SCREENSHOTS
# PROCESS url AND CAPTURE SCREENSHOTS
# ========================================================================
for url in urls_to_process:
for url in url_to_process:
# Validate URL format
if not url.lower().startswith(("http://", "https://", "file://")):
log(f"[screen_shot] Skipping non-URL input: {url}", file=sys.stderr)
@@ -631,7 +627,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
try:
# Create screenshot with provided options
options = ScreenshotOptions(
url=url,
url=[url],
output_dir=screenshot_dir,
output_format=format_name,
archive=archive_enabled,
@@ -645,8 +641,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Log results and warnings
log(f"Screenshot captured to {screenshot_result.path}", flush=True)
if screenshot_result.archive_urls:
log(f"Archives: {', '.join(screenshot_result.archive_urls)}", flush=True)
if screenshot_result.archive_url:
log(f"Archives: {', '.join(screenshot_result.archive_url)}", flush=True)
for warning in screenshot_result.warnings:
log(f"Warning: {warning}", flush=True)
@@ -670,8 +666,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
parent_hash=hashlib.sha256(url.encode()).hexdigest(),
extra={
'source_url': url,
'archive_urls': screenshot_result.archive_urls,
'known_urls': screenshot_result.known_urls,
'archive_url': screenshot_result.archive_url,
'url': screenshot_result.url,
'target': str(screenshot_result.path), # Explicit target for add-file
}
)
@@ -701,16 +697,16 @@ CMDLET = Cmdlet(
name="screen-shot",
summary="Capture a screenshot of a URL or file and mark as temporary artifact",
usage="screen-shot <url> [options] or download-data <url> | screen-shot [options]",
aliases=["screenshot", "ss"],
args=[
alias=["screenshot", "ss"],
arg=[
CmdletArg(name="url", type="string", required=False, description="URL to screenshot (or from pipeline)"),
CmdletArg(name="format", type="string", description="Output format: png, jpeg, or pdf"),
CmdletArg(name="selector", type="string", description="CSS selector for element capture"),
SharedArgs.ARCHIVE, # Use shared archive argument
SharedArgs.STORAGE, # Use shared storage argument
SharedArgs.STORE, # Use shared storage argument
],
details=[
"Take screenshots of URLs with optional archiving and element targeting.",
detail=[
"Take screenshots of url with optional archiving and element targeting.",
"Screenshots are marked as temporary artifacts for cleanup by the cleanup cmdlet.",
"",
"Arguments:",