This commit is contained in:
nose
2025-12-16 01:45:01 -08:00
parent a03eb0d1be
commit 9873280f0e
36 changed files with 4911 additions and 1225 deletions

View File

@@ -10,6 +10,7 @@ import contextlib
import hashlib
import sys
import time
from datetime import datetime
import httpx
from dataclasses import dataclass, field
from pathlib import Path
@@ -547,7 +548,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# ========================================================================
piped_results = normalize_result_input(result)
url_to_process = []
url_to_process: List[Tuple[str, Any]] = []
# Extract url from piped results
if piped_results:
@@ -559,17 +560,17 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
)
if url:
url_to_process.append(str(url))
url_to_process.append((str(url), item))
# Use positional arguments if no pipeline input
if not url_to_process and positional_url:
url_to_process = positional_url
url_to_process = [(u, None) for u in positional_url]
if not url_to_process:
log(f"No url to process for screen-shot cmdlet", file=sys.stderr)
return 1
debug(f"[_run] url to process: {url_to_process}")
debug(f"[_run] url to process: {[u for u, _ in url_to_process]}")
# ========================================================================
# OUTPUT DIRECTORY RESOLUTION - Priority chain
@@ -623,7 +624,35 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# PROCESS url AND CAPTURE SCREENSHOTS
# ========================================================================
for url in url_to_process:
def _extract_item_tags(item: Any) -> List[str]:
if item is None:
return []
raw = get_field(item, 'tag')
if isinstance(raw, list):
return [str(t) for t in raw if t is not None and str(t).strip()]
if isinstance(raw, str) and raw.strip():
return [raw.strip()]
return []
def _extract_item_title(item: Any) -> str:
if item is None:
return ""
for key in ("title", "name", "filename"):
val = get_field(item, key)
if val is None:
continue
text = str(val).strip()
if text:
return text
return ""
def _clean_title(text: str) -> str:
value = (text or "").strip()
if value.lower().startswith("screenshot:"):
value = value.split(":", 1)[1].strip()
return value
for url, origin_item in url_to_process:
# Validate URL format
if not url.lower().startswith(("http://", "https://", "file://")):
log(f"[screen_shot] Skipping non-URL input: {url}", file=sys.stderr)
@@ -660,15 +689,34 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
pass
# Create PipeObject result - marked as TEMP since derivative artifact
capture_date = ""
try:
capture_date = datetime.fromtimestamp(screenshot_result.path.stat().st_mtime).date().isoformat()
except Exception:
capture_date = datetime.now().date().isoformat()
upstream_title = _clean_title(_extract_item_title(origin_item))
display_title = upstream_title or url
upstream_tags = _extract_item_tags(origin_item)
filtered_upstream_tags = [
t for t in upstream_tags
if not str(t).strip().lower().startswith(("type:", "date:"))
]
merged_tags = unique_preserve_order(
["type:screenshot", f"date:{capture_date}"] + filtered_upstream_tags
)
pipe_obj = create_pipe_object_result(
source='screenshot',
identifier=Path(screenshot_result.path).stem,
file_path=str(screenshot_result.path),
cmdlet_name='screen-shot',
title=f"Screenshot: {Path(screenshot_result.path).name}",
title=display_title,
hash_value=screenshot_hash,
is_temp=True,
parent_hash=hashlib.sha256(url.encode()).hexdigest(),
tag=merged_tags,
extra={
'source_url': url,
'archive_url': screenshot_result.archive_url,