AST

2025-11-25 20:09:33 -08:00
parent d75c644a82
commit bd69119996
80 changed files with 39615 additions and 0 deletions
--- a/TUI/pipeline_runner.py
+++ b/TUI/pipeline_runner.py
@@ -0,0 +1,356 @@
+"""Pipeline execution utilities for the Textual UI.
+
+This module mirrors the CLI pipeline behaviour while exposing a class-based
+interface that the TUI can call. It keeps all pipeline/cmdlet integration in
+one place so the interface layer stays focused on presentation.
+"""
+from __future__ import annotations
+
+import contextlib
+import io
+import shlex
+import uuid
+from dataclasses import dataclass, field
+import sys
+from pathlib import Path
+from typing import Any, Callable, Dict, List, Optional, Sequence
+
+BASE_DIR = Path(__file__).resolve().parent
+ROOT_DIR = BASE_DIR.parent
+for path in (ROOT_DIR, BASE_DIR):
+    str_path = str(path)
+    if str_path not in sys.path:
+        sys.path.insert(0, str_path)
+
+import pipeline as ctx
+from cmdlets import REGISTRY
+from config import get_local_storage_path, load_config
+from helper.worker_manager import WorkerManager
+
+try:  # Reuse the CLI selection parser instead of reimplementing it.
+    from CLI import _parse_selection_syntax
+except ImportError:  # pragma: no cover - fallback for atypical environments
+    _parse_selection_syntax = None  # type: ignore
+
+
+@dataclass(slots=True)
+class PipelineStageResult:
+    """Summary for a single pipeline stage."""
+
+    name: str
+    args: Sequence[str]
+    emitted: List[Any] = field(default_factory=list)
+    status: str = "pending"
+    error: Optional[str] = None
+
+
+@dataclass(slots=True)
+class PipelineRunResult:
+    """Aggregate result for a pipeline run."""
+
+    pipeline: str
+    success: bool
+    stages: List[PipelineStageResult] = field(default_factory=list)
+    emitted: List[Any] = field(default_factory=list)
+    stdout: str = ""
+    stderr: str = ""
+    error: Optional[str] = None
+
+    def to_summary(self) -> Dict[str, Any]:
+        """Provide a JSON-friendly representation for logging or UI."""
+        return {
+            "pipeline": self.pipeline,
+            "success": self.success,
+            "error": self.error,
+            "stages": [
+                {
+                    "name": stage.name,
+                    "status": stage.status,
+                    "error": stage.error,
+                    "emitted": len(stage.emitted),
+                }
+                for stage in self.stages
+            ],
+        }
+
+
+class PipelineExecutor:
+    """Thin wrapper over the cmdlet registry + pipeline context."""
+
+    def __init__(
+        self,
+        *,
+        config: Optional[Dict[str, Any]] = None,
+        worker_manager: Optional[WorkerManager] = None,
+    ) -> None:
+        self._config = config or load_config()
+        self._worker_manager = worker_manager
+        if self._worker_manager is None:
+            self._worker_manager = self._ensure_worker_manager()
+        if self._worker_manager:
+            self._config["_worker_manager"] = self._worker_manager
+
+    @property
+    def worker_manager(self) -> Optional[WorkerManager]:
+        return self._worker_manager
+
+    def run_pipeline(
+        self,
+        pipeline_text: str,
+        *,
+        on_log: Optional[Callable[[str], None]] = None,
+    ) -> PipelineRunResult:
+        """Execute a pipeline string and return structured results.
+
+        Args:
+            pipeline_text: Raw pipeline text entered by the user.
+            on_log: Optional callback that receives human-readable log lines.
+        """
+        normalized = pipeline_text.strip()
+        result = PipelineRunResult(pipeline=normalized, success=False)
+        if not normalized:
+            result.error = "Pipeline is empty"
+            return result
+
+        tokens = self._tokenize(normalized)
+        stages = self._split_stages(tokens)
+        if not stages:
+            result.error = "Pipeline contains no stages"
+            return result
+
+        ctx.reset()
+        ctx.set_current_command_text(normalized)
+
+        stdout_buffer = io.StringIO()
+        stderr_buffer = io.StringIO()
+        piped_result: Any = None
+        worker_session = self._start_worker_session(normalized)
+
+        try:
+            with contextlib.redirect_stdout(stdout_buffer), contextlib.redirect_stderr(
+                stderr_buffer
+            ):
+                for index, stage_tokens in enumerate(stages):
+                    stage = self._execute_stage(
+                        index=index,
+                        total=len(stages),
+                        stage_tokens=stage_tokens,
+                        piped_input=piped_result,
+                        on_log=on_log,
+                    )
+                    result.stages.append(stage)
+
+                    if stage.status != "completed":
+                        result.error = stage.error or f"Stage {stage.name} failed"
+                        return result
+
+                    if index == len(stages) - 1:
+                        result.emitted = stage.emitted
+                    else:
+                        piped_result = stage.emitted
+
+            result.success = True
+            return result
+        finally:
+            result.stdout = stdout_buffer.getvalue()
+            result.stderr = stderr_buffer.getvalue()
+            ctx.clear_current_command_text()
+            if worker_session is not None:
+                status = "completed" if result.success else "error"
+                worker_session.finish(status=status, message=result.error or "")
+
+    # ------------------------------------------------------------------
+    # Stage execution helpers
+    # ------------------------------------------------------------------
+    def _execute_stage(
+        self,
+        *,
+        index: int,
+        total: int,
+        stage_tokens: Sequence[str],
+        piped_input: Any,
+        on_log: Optional[Callable[[str], None]],
+    ) -> PipelineStageResult:
+        if not stage_tokens:
+            return PipelineStageResult(name="(empty)", args=[], status="skipped")
+
+        cmd_name = stage_tokens[0].replace("_", "-").lower()
+        stage_args = stage_tokens[1:]
+        stage = PipelineStageResult(name=cmd_name, args=stage_args)
+
+        if cmd_name.startswith("@"):
+            return self._apply_selection_stage(
+                token=cmd_name,
+                stage=stage,
+                piped_input=piped_input,
+                on_log=on_log,
+            )
+
+        cmd_fn = REGISTRY.get(cmd_name)
+        if not cmd_fn:
+            stage.status = "failed"
+            stage.error = f"Unknown command: {cmd_name}"
+            return stage
+
+        pipeline_ctx = ctx.PipelineStageContext(stage_index=index, total_stages=total)
+        ctx.set_stage_context(pipeline_ctx)
+        ctx.set_active(True)
+        ctx.set_last_stage(index == total - 1)
+
+        try:
+            return_code = cmd_fn(piped_input, list(stage_args), self._config)
+        except Exception as exc:  # pragma: no cover - surfaced in UI
+            stage.status = "failed"
+            stage.error = f"{type(exc).__name__}: {exc}"
+            if on_log:
+                on_log(stage.error)
+            return stage
+        finally:
+            ctx.set_stage_context(None)
+            ctx.set_active(False)
+
+        emitted = list(getattr(pipeline_ctx, "emits", []) or [])
+        stage.emitted = emitted
+
+        if return_code != 0:
+            stage.status = "failed"
+            stage.error = f"Exit code {return_code}"
+        else:
+            stage.status = "completed"
+            stage.error = None
+
+        worker_id = self._current_worker_id()
+        if self._worker_manager and worker_id:
+            label = f"[Stage {index + 1}/{total}] {cmd_name} {stage.status}"
+            self._worker_manager.log_step(worker_id, label)
+
+        ctx.set_last_result_table(None, emitted)
+        ctx.set_last_items(emitted)
+        return stage
+
+    def _apply_selection_stage(
+        self,
+        *,
+        token: str,
+        stage: PipelineStageResult,
+        piped_input: Any,
+        on_log: Optional[Callable[[str], None]],
+    ) -> PipelineStageResult:
+        selection = self._parse_selection(token)
+        items = piped_input or []
+        if not isinstance(items, list):
+            items = list(items if isinstance(items, Sequence) else [items])
+
+        if not items:
+            stage.status = "failed"
+            stage.error = "Selection requested but there is no upstream data"
+            return stage
+
+        if selection is None:
+            stage.emitted = list(items)
+        else:
+            zero_based = sorted(i - 1 for i in selection if i > 0)
+            stage.emitted = [items[i] for i in zero_based if 0 <= i < len(items)]
+
+        if not stage.emitted:
+            stage.status = "failed"
+            stage.error = "Selection matched no rows"
+            return stage
+
+        ctx.set_last_items(stage.emitted)
+        ctx.set_last_result_table(None, stage.emitted)
+        stage.status = "completed"
+        if on_log:
+            on_log(f"Selected {len(stage.emitted)} item(s) via {token}")
+        return stage
+
+    # ------------------------------------------------------------------
+    # Worker/session helpers
+    # ------------------------------------------------------------------
+    def _start_worker_session(self, pipeline_text: str) -> Optional[_WorkerSession]:
+        manager = self._ensure_worker_manager()
+        if manager is None:
+            return None
+
+        worker_id = f"tui_pipeline_{uuid.uuid4().hex[:8]}"
+        tracked = manager.track_worker(
+            worker_id,
+            worker_type="pipeline",
+            title="Pipeline run",
+            description=pipeline_text,
+            pipe=pipeline_text,
+        )
+        if not tracked:
+            return None
+
+        manager.log_step(worker_id, "Pipeline started")
+        self._config["_current_worker_id"] = worker_id
+        return _WorkerSession(manager=manager, worker_id=worker_id, config=self._config)
+
+    def _ensure_worker_manager(self) -> Optional[WorkerManager]:
+        if self._worker_manager:
+            return self._worker_manager
+        library_root = get_local_storage_path(self._config)
+        if not library_root:
+            return None
+        try:
+            self._worker_manager = WorkerManager(Path(library_root), auto_refresh_interval=0)
+            self._config["_worker_manager"] = self._worker_manager
+        except Exception:
+            self._worker_manager = None
+        return self._worker_manager
+
+    def _current_worker_id(self) -> Optional[str]:
+        worker_id = self._config.get("_current_worker_id")
+        return str(worker_id) if worker_id else None
+
+    # ------------------------------------------------------------------
+    # Parsing helpers
+    # ------------------------------------------------------------------
+    @staticmethod
+    def _tokenize(pipeline_text: str) -> List[str]:
+        try:
+            return shlex.split(pipeline_text)
+        except ValueError:
+            return pipeline_text.split()
+
+    @staticmethod
+    def _split_stages(tokens: Sequence[str]) -> List[List[str]]:
+        stages: List[List[str]] = []
+        current: List[str] = []
+        for token in tokens:
+            if token == "|":
+                if current:
+                    stages.append(current)
+                    current = []
+            else:
+                current.append(token)
+        if current:
+            stages.append(current)
+        return stages
+
+    @staticmethod
+    def _parse_selection(token: str) -> Optional[Sequence[int]]:
+        if _parse_selection_syntax:
+              parsed = _parse_selection_syntax(token)
+              if parsed:
+                 return sorted(parsed)
+        return None
+
+
+class _WorkerSession:
+    """Minimal worker session wrapper for the TUI executor."""
+
+    def __init__(self, *, manager: WorkerManager, worker_id: str, config: Optional[Dict[str, Any]] = None) -> None:
+        self._manager = manager
+        self.worker_id = worker_id
+        self._config = config
+
+    def finish(self, *, status: str, message: str) -> None:
+        try:
+            self._manager.finish_worker(self.worker_id, result=status, error_msg=message)
+            self._manager.log_step(self.worker_id, f"Pipeline {status}")
+        except Exception:
+            pass
+        if self._config and self._config.get("_current_worker_id") == self.worker_id:
+            self._config.pop("_current_worker_id", None)