"""Pipeline execution utilities for the Textual UI. This module mirrors the CLI pipeline behaviour while exposing a class-based interface that the TUI can call. It keeps all pipeline/cmdlet integration in one place so the interface layer stays focused on presentation. """ from __future__ import annotations import contextlib import io import shlex import uuid from dataclasses import dataclass, field import sys from pathlib import Path from typing import Any, Callable, Dict, List, Optional, Sequence BASE_DIR = Path(__file__).resolve().parent ROOT_DIR = BASE_DIR.parent for path in (ROOT_DIR, BASE_DIR): str_path = str(path) if str_path not in sys.path: sys.path.insert(0, str_path) import pipeline as ctx from cmdlets import REGISTRY from config import get_local_storage_path, load_config from helper.worker_manager import WorkerManager try: # Reuse the CLI selection parser instead of reimplementing it. from CLI import _parse_selection_syntax except ImportError: # pragma: no cover - fallback for atypical environments _parse_selection_syntax = None # type: ignore @dataclass(slots=True) class PipelineStageResult: """Summary for a single pipeline stage.""" name: str args: Sequence[str] emitted: List[Any] = field(default_factory=list) result_table: Optional[Any] = None # ResultTable object if available status: str = "pending" error: Optional[str] = None @dataclass(slots=True) class PipelineRunResult: """Aggregate result for a pipeline run.""" pipeline: str success: bool stages: List[PipelineStageResult] = field(default_factory=list) emitted: List[Any] = field(default_factory=list) result_table: Optional[Any] = None # Final ResultTable object if available stdout: str = "" stderr: str = "" error: Optional[str] = None def to_summary(self) -> Dict[str, Any]: """Provide a JSON-friendly representation for logging or UI.""" return { "pipeline": self.pipeline, "success": self.success, "error": self.error, "stages": [ { "name": stage.name, "status": stage.status, "error": stage.error, "emitted": len(stage.emitted), } for stage in self.stages ], } class PipelineExecutor: """Thin wrapper over the cmdlet registry + pipeline context.""" def __init__( self, *, config: Optional[Dict[str, Any]] = None, worker_manager: Optional[WorkerManager] = None, ) -> None: self._config = config or load_config() self._worker_manager = worker_manager if self._worker_manager is None: self._worker_manager = self._ensure_worker_manager() if self._worker_manager: self._config["_worker_manager"] = self._worker_manager @property def worker_manager(self) -> Optional[WorkerManager]: return self._worker_manager def run_pipeline( self, pipeline_text: str, *, on_log: Optional[Callable[[str], None]] = None, ) -> PipelineRunResult: """Execute a pipeline string and return structured results. Args: pipeline_text: Raw pipeline text entered by the user. on_log: Optional callback that receives human-readable log lines. """ normalized = pipeline_text.strip() result = PipelineRunResult(pipeline=normalized, success=False) if not normalized: result.error = "Pipeline is empty" return result tokens = self._tokenize(normalized) stages = self._split_stages(tokens) if not stages: result.error = "Pipeline contains no stages" return result ctx.reset() ctx.set_current_command_text(normalized) stdout_buffer = io.StringIO() stderr_buffer = io.StringIO() piped_result: Any = None worker_session = self._start_worker_session(normalized) try: with contextlib.redirect_stdout(stdout_buffer), contextlib.redirect_stderr( stderr_buffer ): for index, stage_tokens in enumerate(stages): stage = self._execute_stage( index=index, total=len(stages), stage_tokens=stage_tokens, piped_input=piped_result, on_log=on_log, ) result.stages.append(stage) if stage.status != "completed": result.error = stage.error or f"Stage {stage.name} failed" return result if index == len(stages) - 1: result.emitted = stage.emitted result.result_table = stage.result_table else: piped_result = stage.emitted result.success = True return result finally: result.stdout = stdout_buffer.getvalue() result.stderr = stderr_buffer.getvalue() ctx.clear_current_command_text() if worker_session is not None: status = "completed" if result.success else "error" worker_session.finish(status=status, message=result.error or "") # ------------------------------------------------------------------ # Stage execution helpers # ------------------------------------------------------------------ def _execute_stage( self, *, index: int, total: int, stage_tokens: Sequence[str], piped_input: Any, on_log: Optional[Callable[[str], None]], ) -> PipelineStageResult: if not stage_tokens: return PipelineStageResult(name="(empty)", args=[], status="skipped") cmd_name = stage_tokens[0].replace("_", "-").lower() stage_args = stage_tokens[1:] stage = PipelineStageResult(name=cmd_name, args=stage_args) if cmd_name.startswith("@"): return self._apply_selection_stage( token=cmd_name, stage=stage, piped_input=piped_input, on_log=on_log, ) cmd_fn = REGISTRY.get(cmd_name) if not cmd_fn: stage.status = "failed" stage.error = f"Unknown command: {cmd_name}" return stage pipeline_ctx = ctx.PipelineStageContext(stage_index=index, total_stages=total) ctx.set_stage_context(pipeline_ctx) ctx.set_active(True) ctx.set_last_stage(index == total - 1) try: return_code = cmd_fn(piped_input, list(stage_args), self._config) except Exception as exc: # pragma: no cover - surfaced in UI stage.status = "failed" stage.error = f"{type(exc).__name__}: {exc}" if on_log: on_log(stage.error) return stage finally: ctx.set_stage_context(None) ctx.set_active(False) emitted = list(getattr(pipeline_ctx, "emits", []) or []) stage.emitted = emitted # Capture the ResultTable if the cmdlet set one # Check display table first (overlay), then last result table stage.result_table = ctx.get_display_table() or ctx.get_last_result_table() if return_code != 0: stage.status = "failed" stage.error = f"Exit code {return_code}" else: stage.status = "completed" stage.error = None worker_id = self._current_worker_id() if self._worker_manager and worker_id: label = f"[Stage {index + 1}/{total}] {cmd_name} {stage.status}" self._worker_manager.log_step(worker_id, label) # Don't clear the table if we just captured it, but ensure items are set for next stage # If we have a table, we should probably keep it in ctx for history if needed # But for pipeline execution, we mainly care about passing items to next stage # ctx.set_last_result_table(None, emitted) <-- This was clearing it # Ensure items are available for next stage ctx.set_last_items(emitted) return stage def _apply_selection_stage( self, *, token: str, stage: PipelineStageResult, piped_input: Any, on_log: Optional[Callable[[str], None]], ) -> PipelineStageResult: selection = self._parse_selection(token) items = piped_input or [] if not isinstance(items, list): items = list(items if isinstance(items, Sequence) else [items]) if not items: stage.status = "failed" stage.error = "Selection requested but there is no upstream data" return stage if selection is None: stage.emitted = list(items) else: zero_based = sorted(i - 1 for i in selection if i > 0) stage.emitted = [items[i] for i in zero_based if 0 <= i < len(items)] if not stage.emitted: stage.status = "failed" stage.error = "Selection matched no rows" return stage ctx.set_last_items(stage.emitted) ctx.set_last_result_table(None, stage.emitted) stage.status = "completed" if on_log: on_log(f"Selected {len(stage.emitted)} item(s) via {token}") return stage # ------------------------------------------------------------------ # Worker/session helpers # ------------------------------------------------------------------ def _start_worker_session(self, pipeline_text: str) -> Optional[_WorkerSession]: manager = self._ensure_worker_manager() if manager is None: return None worker_id = f"tui_pipeline_{uuid.uuid4().hex[:8]}" tracked = manager.track_worker( worker_id, worker_type="pipeline", title="Pipeline run", description=pipeline_text, pipe=pipeline_text, ) if not tracked: return None manager.log_step(worker_id, "Pipeline started") self._config["_current_worker_id"] = worker_id return _WorkerSession(manager=manager, worker_id=worker_id, config=self._config) def _ensure_worker_manager(self) -> Optional[WorkerManager]: if self._worker_manager: return self._worker_manager library_root = get_local_storage_path(self._config) if not library_root: return None try: self._worker_manager = WorkerManager(Path(library_root), auto_refresh_interval=0) self._config["_worker_manager"] = self._worker_manager except Exception: self._worker_manager = None return self._worker_manager def _current_worker_id(self) -> Optional[str]: worker_id = self._config.get("_current_worker_id") return str(worker_id) if worker_id else None # ------------------------------------------------------------------ # Parsing helpers # ------------------------------------------------------------------ @staticmethod def _tokenize(pipeline_text: str) -> List[str]: try: return shlex.split(pipeline_text) except ValueError: return pipeline_text.split() @staticmethod def _split_stages(tokens: Sequence[str]) -> List[List[str]]: stages: List[List[str]] = [] current: List[str] = [] for token in tokens: if token == "|": if current: stages.append(current) current = [] else: current.append(token) if current: stages.append(current) return stages @staticmethod def _parse_selection(token: str) -> Optional[Sequence[int]]: if _parse_selection_syntax: parsed = _parse_selection_syntax(token) if parsed: return sorted(parsed) return None class _WorkerSession: """Minimal worker session wrapper for the TUI executor.""" def __init__(self, *, manager: WorkerManager, worker_id: str, config: Optional[Dict[str, Any]] = None) -> None: self._manager = manager self.worker_id = worker_id self._config = config def finish(self, *, status: str, message: str) -> None: try: self._manager.finish_worker(self.worker_id, result=status, error_msg=message) self._manager.log_step(self.worker_id, f"Pipeline {status}") except Exception: pass if self._config and self._config.get("_current_worker_id") == self.worker_id: self._config.pop("_current_worker_id", None)