Add YAPF style + ignore, and format tracked Python files

2025-12-29 18:42:02 -08:00
parent c019c00aed
commit 507946a3e4
108 changed files with 11664 additions and 6494 deletions
--- a/cmdlet/screen_shot.py
+++ b/cmdlet/screen_shot.py
@@ -36,7 +36,6 @@ import pipeline as pipeline_context
 # CMDLET Metadata Declaration
 # ============================================================================

-
 # ============================================================================
 # Playwright & Screenshot Dependencies
 # ============================================================================
@@ -44,13 +43,13 @@ import pipeline as pipeline_context
 from tool.playwright import HAS_PLAYWRIGHT, PlaywrightTimeoutError, PlaywrightTool

 try:
-    from config import resolve_output_dir
+    from SYS.config import resolve_output_dir
 except ImportError:
    try:
        _parent_dir = str(Path(__file__).parent.parent)
        if _parent_dir not in sys.path:
            sys.path.insert(0, _parent_dir)
-        from config import resolve_output_dir
+        from SYS.config import resolve_output_dir
    except ImportError:
        resolve_output_dir = None

@@ -64,7 +63,11 @@ USER_AGENT = (
    "Chrome/120.0.0.0 Safari/537.36"
 )

-DEFAULT_VIEWPORT: dict[str, int] = {"width": 1920, "height": 1080}
+DEFAULT_VIEWPORT: dict[str,
+                       int] = {
+                           "width": 1920,
+                           "height": 1080
+                       }
 ARCHIVE_TIMEOUT = 30.0

 # WebP has a hard maximum dimension per side.
@@ -72,35 +75,36 @@ ARCHIVE_TIMEOUT = 30.0
 WEBP_MAX_DIM = 16_383

 # Configurable selectors for specific websites
-SITE_SELECTORS: Dict[str, List[str]] = {
-    "twitter.com": [
-        "article[role='article']",
-        "div[data-testid='tweet']",
-        "div[data-testid='cellInnerDiv'] article",
-    ],
-    "x.com": [
-        "article[role='article']",
-        "div[data-testid='tweet']",
-        "div[data-testid='cellInnerDiv'] article",
-    ],
-    "instagram.com": [
-        "article[role='presentation']",
-        "article[role='article']",
-        "div[role='dialog'] article",
-        "section main article",
-    ],
-    "reddit.com": [
-        "shreddit-post",
-        "div[data-testid='post-container']",
-        "div[data-click-id='background']",
-        "article",
-    ],
-    "rumble.com": [
-        "rumble-player, iframe.rumble",
-        "div.video-item--main",
-        "main article",
-    ],
-}
+SITE_SELECTORS: Dict[str,
+                     List[str]] = {
+                         "twitter.com": [
+                             "article[role='article']",
+                             "div[data-testid='tweet']",
+                             "div[data-testid='cellInnerDiv'] article",
+                         ],
+                         "x.com": [
+                             "article[role='article']",
+                             "div[data-testid='tweet']",
+                             "div[data-testid='cellInnerDiv'] article",
+                         ],
+                         "instagram.com": [
+                             "article[role='presentation']",
+                             "article[role='article']",
+                             "div[role='dialog'] article",
+                             "section main article",
+                         ],
+                         "reddit.com": [
+                             "shreddit-post",
+                             "div[data-testid='post-container']",
+                             "div[data-click-id='background']",
+                             "article",
+                         ],
+                         "rumble.com": [
+                             "rumble-player, iframe.rumble",
+                             "div.video-item--main",
+                             "main article",
+                         ],
+                     }


 class ScreenshotError(RuntimeError):
@@ -172,9 +176,13 @@ def _tags_from_url(url: str) -> List[str]:
    try:
        parsed = urlsplit(u)
        host = (
-            str(getattr(parsed, "hostname", None) or getattr(parsed, "netloc", "") or "")
-            .strip()
-            .lower()
+            str(
+                getattr(parsed,
+                        "hostname",
+                        None) or getattr(parsed,
+                                         "netloc",
+                                         "") or ""
+            ).strip().lower()
        )
    except Exception:
        parsed = None
@@ -187,7 +195,7 @@ def _tags_from_url(url: str) -> List[str]:
        if ":" in host:
            host = host.split(":", 1)[0]
        if host.startswith("www."):
-            host = host[len("www.") :]
+            host = host[len("www."):]

    path = ""
    if parsed is not None:
@@ -230,7 +238,7 @@ def _title_from_url(url: str) -> str:
    """Return the normalized title derived from a URL's last path segment."""
    for t in _tags_from_url(url):
        if str(t).lower().startswith("title:"):
-            return str(t)[len("title:") :].strip()
+            return str(t)[len("title:"):].strip()
    return ""


@@ -239,9 +247,12 @@ def _normalise_format(fmt: Optional[str]) -> str:
    if not fmt:
        return "webp"
    value = fmt.strip().lower()
-    if value in {"jpg", "jpeg"}:
+    if value in {"jpg",
+                 "jpeg"}:
        return "jpeg"
-    if value in {"png", "pdf", "webp"}:
+    if value in {"png",
+                 "pdf",
+                 "webp"}:
        return value
    return "webp"

@@ -285,11 +296,12 @@ def _convert_to_webp(
    try:
        with Image.open(src_png) as im:
            did_downscale = False
-            save_kwargs: Dict[str, Any] = {
-                "format": "WEBP",
-                "quality": int(quality),
-                "method": int(method),
-            }
+            save_kwargs: Dict[str,
+                              Any] = {
+                                  "format": "WEBP",
+                                  "quality": int(quality),
+                                  "method": int(method),
+                              }

            # Preserve alpha when present; Pillow handles it for WEBP.
            # Normalize palette images to RGBA to avoid odd palette artifacts.
@@ -303,12 +315,9 @@ def _convert_to_webp(
            except Exception:
                w, h = 0, 0

-            if (
-                downscale_if_oversize
-                and isinstance(max_dim, int)
-                and max_dim > 0
-                and (w > max_dim or h > max_dim)
-            ):
+            if (downscale_if_oversize and isinstance(max_dim,
+                                                     int) and max_dim > 0
+                    and (w > max_dim or h > max_dim)):
                scale = 1.0
                try:
                    scale = min(float(max_dim) / float(w), float(max_dim) / float(h))
@@ -322,7 +331,13 @@ def _convert_to_webp(
                        f"[_convert_to_webp] Image exceeds WebP limit ({w}x{h}); downscaling -> {new_w}x{new_h}"
                    )
                    try:
-                        resample = getattr(getattr(Image, "Resampling", Image), "LANCZOS", None)
+                        resample = getattr(
+                            getattr(Image,
+                                    "Resampling",
+                                    Image),
+                            "LANCZOS",
+                            None
+                        )
                        if resample is None:
                            resample = getattr(Image, "LANCZOS", 1)
                        im = im.resize((new_w, new_h), resample=resample)
@@ -367,13 +382,20 @@ def _selectors_for_url(url: str) -> List[str]:


 def _platform_preprocess(
-    url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000
+    url: str,
+    page: Any,
+    warnings: List[str],
+    timeout_ms: int = 10_000
 ) -> None:
    """Best-effort page tweaks for popular platforms before capture."""
    try:
        u = str(url or "").lower()

-        def _try_click_buttons(names: List[str], passes: int = 2, per_timeout: int = 700) -> int:
+        def _try_click_buttons(
+            names: List[str],
+            passes: int = 2,
+            per_timeout: int = 700
+        ) -> int:
            clicks = 0
            for _ in range(max(1, int(passes))):
                for name in names:
@@ -411,7 +433,9 @@ def _platform_preprocess(

 def _submit_wayback(url: str, timeout: float) -> Optional[str]:
    encoded = quote(url, safe="/:?=&")
-    with HTTPClient(headers={"User-Agent": USER_AGENT}) as client:
+    with HTTPClient(headers={
+            "User-Agent": USER_AGENT
+    }) as client:
        response = client.get(f"https://web.archive.org/save/{encoded}")
        content_location = response.headers.get("Content-Location")
        if content_location:
@@ -422,7 +446,9 @@ def _submit_wayback(url: str, timeout: float) -> Optional[str]:
 def _submit_archive_today(url: str, timeout: float) -> Optional[str]:
    """Submit URL to Archive.today."""
    encoded = quote(url, safe=":/?#[]@!$&'()*+,;=")
-    with HTTPClient(headers={"User-Agent": USER_AGENT}) as client:
+    with HTTPClient(headers={
+            "User-Agent": USER_AGENT
+    }) as client:
        response = client.get(f"https://archive.today/submit/?url={encoded}")
        response.raise_for_status()
        final = str(response.url)
@@ -434,7 +460,9 @@ def _submit_archive_today(url: str, timeout: float) -> Optional[str]:
 def _submit_archive_ph(url: str, timeout: float) -> Optional[str]:
    """Submit URL to Archive.ph."""
    encoded = quote(url, safe=":/?#[]@!$&'()*+,;=")
-    with HTTPClient(headers={"User-Agent": USER_AGENT}) as client:
+    with HTTPClient(headers={
+            "User-Agent": USER_AGENT
+    }) as client:
        response = client.get(f"https://archive.ph/submit/?url={encoded}")
        response.raise_for_status()
        final = str(response.url)
@@ -460,7 +488,9 @@ def _archive_url(url: str, timeout: float) -> Tuple[List[str], List[str]]:
                warnings.append(f"archive {label} rate limited (HTTP 429)")
                debug(f"{label}: Rate limited (HTTP 429)")
            else:
-                warnings.append(f"archive {label} failed: HTTP {exc.response.status_code}")
+                warnings.append(
+                    f"archive {label} failed: HTTP {exc.response.status_code}"
+                )
                debug(f"{label}: HTTP {exc.response.status_code}")
        except httpx.RequestError as exc:
            warnings.append(f"archive {label} failed: {exc}")
@@ -480,7 +510,9 @@ def _archive_url(url: str, timeout: float) -> Tuple[List[str], List[str]]:
 def _prepare_output_path(options: ScreenshotOptions) -> Path:
    """Prepare and validate output path for screenshot."""
    ensure_directory(options.output_dir)
-    explicit_format = _normalise_format(options.output_format) if options.output_format else None
+    explicit_format = _normalise_format(
+        options.output_format
+    ) if options.output_format else None
    inferred_format: Optional[str] = None
    if options.output_path is not None:
        path = options.output_path
@@ -506,7 +538,10 @@ def _prepare_output_path(options: ScreenshotOptions) -> Path:


 def _capture(
-    options: ScreenshotOptions, destination: Path, warnings: List[str], progress: PipelineProgress
+    options: ScreenshotOptions,
+    destination: Path,
+    warnings: List[str],
+    progress: PipelineProgress
 ) -> None:
    """Capture screenshot using Playwright."""
    debug(f"[_capture] Starting capture for {options.url} -> {destination}")
@@ -517,9 +552,11 @@ def _capture(
        # Ensure Chromium engine is used for the screen-shot cmdlet (force for consistency)
        try:
            current_browser = (
-                getattr(tool.defaults, "browser", "").lower()
-                if getattr(tool, "defaults", None) is not None
-                else ""
+                getattr(tool.defaults,
+                        "browser",
+                        "").lower() if getattr(tool,
+                                               "defaults",
+                                               None) is not None else ""
            )
            if current_browser != "chromium":
                debug(
@@ -527,12 +564,18 @@ def _capture(
                )
                base_cfg = {}
                try:
-                    base_cfg = dict(getattr(tool, "_config", {}) or {})
+                    base_cfg = dict(getattr(tool,
+                                            "_config",
+                                            {}) or {})
                except Exception:
                    base_cfg = {}
-                tool_block = dict(base_cfg.get("tool") or {}) if isinstance(base_cfg, dict) else {}
+                tool_block = dict(base_cfg.get("tool") or {}
+                                  ) if isinstance(base_cfg,
+                                                  dict) else {}
                pw_block = (
-                    dict(tool_block.get("playwright") or {}) if isinstance(tool_block, dict) else {}
+                    dict(tool_block.get("playwright") or {})
+                    if isinstance(tool_block,
+                                  dict) else {}
                )
                pw_block["browser"] = "chromium"
                tool_block["playwright"] = pw_block
@@ -540,7 +583,13 @@ def _capture(
                    base_cfg["tool"] = tool_block
                tool = PlaywrightTool(base_cfg)
        except Exception:
-            tool = PlaywrightTool({"tool": {"playwright": {"browser": "chromium"}}})
+            tool = PlaywrightTool({
+                "tool": {
+                    "playwright": {
+                        "browser": "chromium"
+                    }
+                }
+            })

        tool.debug_dump()

@@ -550,7 +599,9 @@ def _capture(
        debug(f"[_capture] Format: {format_name}, Headless: {headless}")

        if format_name == "pdf" and not options.headless:
-            warnings.append("pdf output requires headless Chromium; overriding headless mode")
+            warnings.append(
+                "pdf output requires headless Chromium; overriding headless mode"
+            )

        try:
            with tool.open_page(headless=headless) as page:
@@ -572,11 +623,15 @@ def _capture(
                        page.wait_for_selector("article", timeout=10_000)
                        debug("Article element found")
                    except PlaywrightTimeoutError:
-                        warnings.append("<article> selector not found; capturing fallback")
+                        warnings.append(
+                            "<article> selector not found; capturing fallback"
+                        )
                        debug("Article element not found; using fallback")

                if options.wait_after_load > 0:
-                    debug(f"Waiting {options.wait_after_load}s for page stabilization...")
+                    debug(
+                        f"Waiting {options.wait_after_load}s for page stabilization..."
+                    )
                    time.sleep(min(10.0, max(0.0, options.wait_after_load)))

                progress.step("loading stabilized")
@@ -617,7 +672,9 @@ def _capture(
                        try:
                            debug(f"Trying selector: {sel}")
                            el = page.wait_for_selector(
-                                sel, timeout=max(0, int(options.selector_timeout_ms))
+                                sel,
+                                timeout=max(0,
+                                            int(options.selector_timeout_ms))
                            )
                        except PlaywrightTimeoutError:
                            debug(f"Selector not found: {sel}")
@@ -639,7 +696,9 @@ def _capture(
                                debug("Element captured successfully")
                                break
                        except Exception as exc:
-                            warnings.append(f"element capture failed for '{sel}': {exc}")
+                            warnings.append(
+                                f"element capture failed for '{sel}': {exc}"
+                            )
                            debug(f"Failed to capture element: {exc}")
                # Fallback to default capture paths
                if element_captured:
@@ -653,7 +712,10 @@ def _capture(
                    progress.step("capturing saved")
                else:
                    debug(f"Capturing full page to {destination}...")
-                    screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
+                    screenshot_kwargs: Dict[str,
+                                            Any] = {
+                                                "path": str(destination)
+                                            }
                    if format_name == "jpeg":
                        screenshot_kwargs["type"] = "jpeg"
                        screenshot_kwargs["quality"] = 90
@@ -675,10 +737,8 @@ def _capture(
        except Exception as exc:
            debug(f"[_capture] Exception launching browser/page: {exc}")
            msg = str(exc).lower()
-            if any(
-                k in msg
-                for k in ["executable", "not found", "no such file", "cannot find", "install"]
-            ):
+            if any(k in msg for k in ["executable", "not found", "no such file",
+                                      "cannot find", "install"]):
                raise ScreenshotError(
                    "Chromium Playwright browser binaries not found. Install them: python ./scripts/bootstrap.py --playwright-only --browsers chromium"
                ) from exc
@@ -691,7 +751,10 @@ def _capture(
        raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc


-def _capture_screenshot(options: ScreenshotOptions, progress: PipelineProgress) -> ScreenshotResult:
+def _capture_screenshot(
+    options: ScreenshotOptions,
+    progress: PipelineProgress
+) -> ScreenshotResult:
    """Capture a screenshot for the given options."""
    debug(f"[_capture_screenshot] Preparing capture for {options.url}")
    requested_format = _normalise_format(options.output_format)
@@ -702,7 +765,8 @@ def _capture_screenshot(options: ScreenshotOptions, progress: PipelineProgress)
    will_convert = requested_format == "webp"
    will_archive = bool(options.archive and options.url)
    total_steps = (
-        9 + (1 if will_target else 0) + (1 if will_convert else 0) + (1 if will_archive else 0)
+        9 + (1 if will_target else 0) + (1 if will_convert else 0) +
+        (1 if will_archive else 0)
    )
    progress.begin_steps(total_steps)
    progress.step("loading starting")
@@ -712,7 +776,9 @@ def _capture_screenshot(options: ScreenshotOptions, progress: PipelineProgress)
    capture_path = destination
    if requested_format == "webp":
        capture_path = unique_path(destination.with_suffix(".png"))
-        debug(f"[_capture_screenshot] Requested webp; capturing intermediate png -> {capture_path}")
+        debug(
+            f"[_capture_screenshot] Requested webp; capturing intermediate png -> {capture_path}"
+        )
        options.output_format = "png"
    _capture(options, capture_path, warnings, progress)

@@ -808,7 +874,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        #   [tool=playwright]
        #   format="pdf"
        try:
-            tool_cfg = config.get("tool", {}) if isinstance(config, dict) else {}
+            tool_cfg = config.get("tool",
+                                  {}) if isinstance(config,
+                                                    dict) else {}
            pw_cfg = tool_cfg.get("playwright") if isinstance(tool_cfg, dict) else None
            if isinstance(pw_cfg, dict):
                format_value = pw_cfg.get("format")
@@ -839,7 +907,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        # Extract url from piped results
        if piped_results:
            for item in piped_results:
-                url = get_field(item, "path") or get_field(item, "url") or get_field(item, "target")
+                url = get_field(item,
+                                "path") or get_field(item,
+                                                     "url"
+                                                     ) or get_field(item,
+                                                                    "target")

                if url:
                    url_to_process.append((str(url), item))
@@ -910,6 +982,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:

    all_emitted = []
    exit_code = 0
+
    # ========================================================================
    # PROCESS url AND CAPTURE SCREENSHOTS
    # ========================================================================
@@ -970,8 +1043,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
                        "playwright": {
                            "browser": "chromium",
                            "user_agent": "native",
-                            "viewport_width": int(DEFAULT_VIEWPORT.get("width", 1920)),
-                            "viewport_height": int(DEFAULT_VIEWPORT.get("height", 1080)),
+                            "viewport_width": int(DEFAULT_VIEWPORT.get("width",
+                                                                       1920)),
+                            "viewport_height":
+                            int(DEFAULT_VIEWPORT.get("height",
+                                                     1080)),
                        }
                    }
                }
@@ -995,7 +1071,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
            if manual_target_selectors:
                options.prefer_platform_target = True
                options.target_selectors = manual_target_selectors
-                debug(f"[screen_shot] Using explicit selector(s): {manual_target_selectors}")
+                debug(
+                    f"[screen_shot] Using explicit selector(s): {manual_target_selectors}"
+                )
            elif auto_selectors:
                options.prefer_platform_target = True
                options.target_selectors = auto_selectors
@@ -1022,9 +1100,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
            capture_date = ""
            try:
                capture_date = (
-                    datetime.fromtimestamp(screenshot_result.path.stat().st_mtime)
-                    .date()
-                    .isoformat()
+                    datetime.fromtimestamp(screenshot_result.path.stat().st_mtime
+                                           ).date().isoformat()
                )
            except Exception:
                capture_date = datetime.now().date().isoformat()
@@ -1035,14 +1112,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:

            upstream_tags = _extract_item_tags(origin_item)
            filtered_upstream_tags = [
-                t
-                for t in upstream_tags
+                t for t in upstream_tags
                if not str(t).strip().lower().startswith(("type:", "date:"))
            ]

            url_tags = _tags_from_url(url)
            merged_tags = unique_preserve_order(
-                ["type:screenshot", f"date:{capture_date}"] + filtered_upstream_tags + url_tags
+                ["type:screenshot",
+                 f"date:{capture_date}"] + filtered_upstream_tags + url_tags
            )

            pipe_obj = create_pipe_object_result(
@@ -1097,13 +1174,20 @@ CMDLET = Cmdlet(
    name="screen-shot",
    summary="Capture a website screenshot",
    usage="screen-shot <url> [options]",
-    alias=["screenshot", "ss"],
+    alias=["screenshot",
+           "ss"],
    arg=[
        SharedArgs.URL,
        CmdletArg(
-            name="format", type="string", description="Output format: webp, png, jpeg, or pdf"
+            name="format",
+            type="string",
+            description="Output format: webp, png, jpeg, or pdf"
+        ),
+        CmdletArg(
+            name="selector",
+            type="string",
+            description="CSS selector for element capture"
        ),
-        CmdletArg(name="selector", type="string", description="CSS selector for element capture"),
        SharedArgs.PATH,
    ],
    detail=[