f

2026-02-11 18:16:07 -08:00
parent cc715e1fef
commit 1d0de1118b
27 changed files with 1167 additions and 1075 deletions
--- a/SYS/field_access.py
+++ b/SYS/field_access.py
@@ -0,0 +1,37 @@
+"""Lightweight helpers for accessing fields on mixed pipeline objects.
+
+This intentionally avoids importing cmdlet modules so it can be used from
+providers and core pipeline code without pulling in the full cmdlet stack.
+"""
+
+from __future__ import annotations
+
+from typing import Any, Optional
+
+
+def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any:
+    """Extract a field from either a dict or object with fallback default.
+
+    - Supports dict.get(field)
+    - Supports getattr(obj, field)
+    - If obj is a list, uses the first element
+    - If obj has `.extra` dict (PipeObject pattern), also checks extra[field]
+    """
+
+    if isinstance(obj, list):
+        if not obj:
+            return default
+        obj = obj[0]
+
+    if isinstance(obj, dict):
+        return obj.get(field, default)
+
+    value = getattr(obj, field, None)
+    if value is not None:
+        return value
+
+    extra_val = getattr(obj, "extra", None)
+    if isinstance(extra_val, dict):
+        return extra_val.get(field, default)
+
+    return default
--- a/SYS/pipe_object.py
+++ b/SYS/pipe_object.py
@@ -0,0 +1,265 @@
+from __future__ import annotations
+
+from typing import Any, Dict, Optional
+
+from SYS import models
+
+
+def coerce_to_pipe_object(
+    value: Any,
+    default_path: Optional[str] = None,
+) -> models.PipeObject:
+    """Normalize any incoming result to a PipeObject for single-source-of-truth state.
+
+    Uses hash+store canonical pattern.
+    """
+    # Debug: Print ResultItem details if coming from search_file.py
+    try:
+        from SYS.logger import is_debug_enabled, debug
+
+        if (
+            is_debug_enabled()
+            and hasattr(value, "__class__")
+            and value.__class__.__name__ == "ResultItem"
+        ):
+            debug("[ResultItem -> PipeObject conversion]")
+            debug(f"  title={getattr(value, 'title', None)}")
+            debug(f"  target={getattr(value, 'target', None)}")
+            debug(f"  hash={getattr(value, 'hash', None)}")
+            debug(f"  media_kind={getattr(value, 'media_kind', None)}")
+            debug(f"  tag={getattr(value, 'tag', None)}")
+            debug(f"  tag_summary={getattr(value, 'tag_summary', None)}")
+            debug(f"  size_bytes={getattr(value, 'size_bytes', None)}")
+            debug(f"  duration_seconds={getattr(value, 'duration_seconds', None)}")
+            debug(f"  relationships={getattr(value, 'relationships', None)}")
+            debug(f"  url={getattr(value, 'url', None)}")
+            debug(
+                f"  full_metadata keys={list(getattr(value, 'full_metadata', {}).keys()) if hasattr(value, 'full_metadata') and value.full_metadata else []}"
+            )
+    except Exception:
+        pass
+
+    if isinstance(value, models.PipeObject):
+        return value
+
+    known_keys = {
+        "hash",
+        "store",
+        "tag",
+        "title",
+        "url",
+        "source_url",
+        "duration",
+        "metadata",
+        "warnings",
+        "path",
+        "relationships",
+        "is_temp",
+        "action",
+        "parent_hash",
+    }
+
+    # Convert common object-like results into a dict so we can preserve fields like
+    # hash/store/url when they come from result tables (e.g., get-url emits UrlItem).
+    #
+    # Priority:
+    # 1) explicit to_dict()
+    # 2) best-effort attribute extraction for known PipeObject-ish fields
+    if hasattr(value, "to_dict"):
+        value = value.to_dict()
+    elif not isinstance(value, dict):
+        try:
+            obj_map: Dict[str, Any] = {}
+            for k in (
+                "hash",
+                "store",
+                "provider",
+                "prov",
+                "tag",
+                "title",
+                "url",
+                "source_url",
+                "duration",
+                "duration_seconds",
+                "metadata",
+                "full_metadata",
+                "warnings",
+                "path",
+                "target",
+                "relationships",
+                "is_temp",
+                "action",
+                "parent_hash",
+                "extra",
+                "media_kind",
+            ):
+                if hasattr(value, k):
+                    obj_map[k] = getattr(value, k)
+            if obj_map:
+                value = obj_map
+        except Exception:
+            pass
+
+    if isinstance(value, dict):
+        # Extract hash and store (canonical identifiers)
+        hash_val = value.get("hash")
+        store_val = value.get("store") or "PATH"
+        if not store_val or store_val == "PATH":
+            try:
+                extra_store = value.get("extra", {}).get("store")
+            except Exception:
+                extra_store = None
+            if extra_store:
+                store_val = extra_store
+
+        # If no hash, try to compute from path or use placeholder
+        if not hash_val:
+            path_val = value.get("path")
+            if path_val:
+                try:
+                    from pathlib import Path
+
+                    from SYS.utils import sha256_file
+
+                    hash_val = sha256_file(Path(path_val))
+                except Exception:
+                    hash_val = "unknown"
+            else:
+                hash_val = "unknown"
+
+        # Extract title from filename if not provided
+        title_val = value.get("title")
+        if not title_val:
+            path_val = value.get("path")
+            if path_val:
+                try:
+                    from pathlib import Path
+
+                    title_val = Path(path_val).stem
+                except Exception:
+                    pass
+
+        extra = {k: v for k, v in value.items() if k not in known_keys}
+
+        # Extract URL: prefer direct url field, then url list
+        from SYS.metadata import normalize_urls
+
+        url_list = normalize_urls(value.get("url"))
+        url_val = url_list[0] if url_list else None
+        if len(url_list) > 1:
+            extra["url"] = url_list
+
+        # Extract relationships
+        rels = value.get("relationships") or {}
+
+        # Canonical tag: accept list or single string
+        tag_val: list[str] = []
+        if "tag" in value:
+            raw_tag = value["tag"]
+            if isinstance(raw_tag, list):
+                tag_val = [str(t) for t in raw_tag if t is not None]
+            elif isinstance(raw_tag, str):
+                tag_val = [raw_tag]
+
+        # Consolidate path: prefer explicit path key, but NOT target if it's a URL
+        path_val = value.get("path")
+        # Only use target as path if it's not a URL (url should stay in url field)
+        if not path_val and "target" in value:
+            target = value["target"]
+            if target and not (
+                isinstance(target, str)
+                and (target.startswith("http://") or target.startswith("https://"))
+            ):
+                path_val = target
+
+        # If the path value is actually a URL, move it to url_val and clear path_val
+        try:
+            if isinstance(path_val, str) and (
+                path_val.startswith("http://") or path_val.startswith("https://")
+            ):
+                # Prefer existing url_val if present, otherwise move path_val into url_val
+                if not url_val:
+                    url_val = path_val
+                path_val = None
+        except Exception:
+            pass
+
+        # Extract media_kind if available
+        if "media_kind" in value:
+            extra["media_kind"] = value["media_kind"]
+
+        pipe_obj = models.PipeObject(
+            hash=hash_val,
+            store=store_val,
+            provider=str(
+                value.get("provider")
+                or value.get("prov")
+                or value.get("source")
+                or extra.get("provider")
+                or extra.get("source")
+                or ""
+            ).strip()
+            or None,
+            tag=tag_val,
+            title=title_val,
+            url=url_val,
+            source_url=value.get("source_url"),
+            duration=value.get("duration") or value.get("duration_seconds"),
+            metadata=value.get("metadata") or value.get("full_metadata") or {},
+            warnings=list(value.get("warnings") or []),
+            path=path_val,
+            relationships=rels,
+            is_temp=bool(value.get("is_temp", False)),
+            action=value.get("action"),
+            parent_hash=value.get("parent_hash"),
+            extra=extra,
+        )
+
+        return pipe_obj
+
+    # Fallback: build from path argument or bare value
+    hash_val = "unknown"
+    path_val = default_path or getattr(value, "path", None)
+    url_val: Optional[str] = None
+    title_val = None
+
+    # If the raw value is a string, treat it as either a URL or a file path.
+    # This is important for @-selection results that are plain URL strings.
+    if isinstance(value, str):
+        s = value.strip()
+        if s.lower().startswith(("http://", "https://")):
+            url_val = s
+            path_val = None
+        else:
+            path_val = s
+
+    if path_val and path_val != "unknown":
+        try:
+            from pathlib import Path
+
+            from SYS.utils import sha256_file
+
+            path_obj = Path(path_val)
+            hash_val = sha256_file(path_obj)
+            # Extract title from filename (without extension)
+            title_val = path_obj.stem
+        except Exception:
+            pass
+
+    # When coming from a raw URL string, mark it explicitly as URL.
+    # Otherwise treat it as a local path.
+    store_val = "URL" if url_val else "PATH"
+
+    pipe_obj = models.PipeObject(
+        hash=hash_val,
+        store=store_val,
+        provider=None,
+        path=str(path_val) if path_val and path_val != "unknown" else None,
+        title=title_val,
+        url=url_val,
+        source_url=url_val,
+        tag=[],
+        extra={},
+    )
+
+    return pipe_obj
--- a/SYS/pipeline.py
+++ b/SYS/pipeline.py
@@ -1607,7 +1607,7 @@ class PipelineExecutor:
        item = selected_items[0]
        url = None
        try:
-            from cmdlet._shared import get_field
+            from SYS.field_access import get_field

            url = get_field(item, "url")
        except Exception:
@@ -2043,7 +2043,7 @@ class PipelineExecutor:
                    return False, None
                debug(f"@N: _maybe_run_class_selector returned False, continuing")

-                from cmdlet._shared import coerce_to_pipe_object
+                from SYS.pipe_object import coerce_to_pipe_object

                filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered]
                piped_result = (
@@ -2467,7 +2467,7 @@ class PipelineExecutor:
                        last_items = None

                    if last_items:
-                        from cmdlet._shared import coerce_to_pipe_object
+                        from SYS.pipe_object import coerce_to_pipe_object

                        try:
                            pipe_items = [
@@ -2706,7 +2706,7 @@ class PipelineExecutor:
                    if (next_cmd in {"delete-tag",
                                     "delete_tag"} and len(filtered) > 1
                            and all(_is_tag_row(x) for x in filtered)):
-                        from cmdlet._shared import get_field
+                        from SYS.field_access import get_field

                        tags: List[str] = []
                        first_hash = None
@@ -2739,7 +2739,7 @@ class PipelineExecutor:
                            piped_result = grouped
                            continue

-                    from cmdlet._shared import coerce_to_pipe_object
+                    from SYS.pipe_object import coerce_to_pipe_object

                    filtered_pipe_objs = [
                        coerce_to_pipe_object(item) for item in filtered
--- a/SYS/worker_manager.py
+++ b/SYS/worker_manager.py
@@ -8,7 +8,7 @@ import logging
 from pathlib import Path
 from typing import Optional, Dict, Any, List, Callable, Tuple
 from datetime import datetime
-from threading import Thread, Lock
+from threading import Thread, Lock, Event
 import time

 from SYS.logger import log
@@ -273,6 +273,7 @@ class WorkerManager:
        self.refresh_callbacks: List[Callable] = []
        self.refresh_thread: Optional[Thread] = None
        self._stop_refresh = False
+        self._refresh_stop_event = Event()
        self._lock = Lock()
        self.worker_handlers: Dict[str, WorkerLoggingHandler] = {}
        self._worker_last_step: Dict[str, str] = {}
@@ -658,6 +659,7 @@ class WorkerManager:
            f"[WorkerManager] Starting auto-refresh with {self.auto_refresh_interval}s interval"
        )
        self._stop_refresh = False
+        self._refresh_stop_event.clear()
        self.refresh_thread = Thread(target=self._auto_refresh_loop, daemon=True)
        self.refresh_thread.start()

@@ -665,6 +667,7 @@ class WorkerManager:
        """Stop the auto-refresh thread."""
        logger.info("[WorkerManager] Stopping auto-refresh")
        self._stop_refresh = True
+        self._refresh_stop_event.set()
        if self.refresh_thread:
            self.refresh_thread.join(timeout=5)
            self.refresh_thread = None
@@ -679,7 +682,8 @@ class WorkerManager:
        """Main auto-refresh loop that periodically queries and notifies."""
        try:
            while not self._stop_refresh:
-                time.sleep(self.auto_refresh_interval)
+                if self._refresh_stop_event.wait(self.auto_refresh_interval):
+                    break

                # Check if there are active workers
                active = self.get_active_workers()