2025-11-25 20:09:33 -08:00
|
|
|
"""Pipeline execution utilities for the Textual UI.
|
|
|
|
|
|
|
|
|
|
This module mirrors the CLI pipeline behaviour while exposing a class-based
|
|
|
|
|
interface that the TUI can call. It keeps all pipeline/cmdlet integration in
|
|
|
|
|
one place so the interface layer stays focused on presentation.
|
|
|
|
|
"""
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
import contextlib
|
|
|
|
|
import io
|
|
|
|
|
import shlex
|
|
|
|
|
import uuid
|
|
|
|
|
from dataclasses import dataclass, field
|
|
|
|
|
import sys
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import Any, Callable, Dict, List, Optional, Sequence
|
|
|
|
|
|
|
|
|
|
BASE_DIR = Path(__file__).resolve().parent
|
|
|
|
|
ROOT_DIR = BASE_DIR.parent
|
|
|
|
|
for path in (ROOT_DIR, BASE_DIR):
|
|
|
|
|
str_path = str(path)
|
|
|
|
|
if str_path not in sys.path:
|
|
|
|
|
sys.path.insert(0, str_path)
|
|
|
|
|
|
|
|
|
|
import pipeline as ctx
|
|
|
|
|
from cmdlets import REGISTRY
|
|
|
|
|
from config import get_local_storage_path, load_config
|
|
|
|
|
from helper.worker_manager import WorkerManager
|
|
|
|
|
|
|
|
|
|
try: # Reuse the CLI selection parser instead of reimplementing it.
|
|
|
|
|
from CLI import _parse_selection_syntax
|
|
|
|
|
except ImportError: # pragma: no cover - fallback for atypical environments
|
|
|
|
|
_parse_selection_syntax = None # type: ignore
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass(slots=True)
|
|
|
|
|
class PipelineStageResult:
|
|
|
|
|
"""Summary for a single pipeline stage."""
|
|
|
|
|
|
|
|
|
|
name: str
|
|
|
|
|
args: Sequence[str]
|
|
|
|
|
emitted: List[Any] = field(default_factory=list)
|
2025-11-27 10:59:01 -08:00
|
|
|
result_table: Optional[Any] = None # ResultTable object if available
|
2025-11-25 20:09:33 -08:00
|
|
|
status: str = "pending"
|
|
|
|
|
error: Optional[str] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass(slots=True)
|
|
|
|
|
class PipelineRunResult:
|
|
|
|
|
"""Aggregate result for a pipeline run."""
|
|
|
|
|
|
|
|
|
|
pipeline: str
|
|
|
|
|
success: bool
|
|
|
|
|
stages: List[PipelineStageResult] = field(default_factory=list)
|
|
|
|
|
emitted: List[Any] = field(default_factory=list)
|
2025-11-27 10:59:01 -08:00
|
|
|
result_table: Optional[Any] = None # Final ResultTable object if available
|
2025-11-25 20:09:33 -08:00
|
|
|
stdout: str = ""
|
|
|
|
|
stderr: str = ""
|
|
|
|
|
error: Optional[str] = None
|
|
|
|
|
|
|
|
|
|
def to_summary(self) -> Dict[str, Any]:
|
|
|
|
|
"""Provide a JSON-friendly representation for logging or UI."""
|
|
|
|
|
return {
|
|
|
|
|
"pipeline": self.pipeline,
|
|
|
|
|
"success": self.success,
|
|
|
|
|
"error": self.error,
|
|
|
|
|
"stages": [
|
|
|
|
|
{
|
|
|
|
|
"name": stage.name,
|
|
|
|
|
"status": stage.status,
|
|
|
|
|
"error": stage.error,
|
|
|
|
|
"emitted": len(stage.emitted),
|
|
|
|
|
}
|
|
|
|
|
for stage in self.stages
|
|
|
|
|
],
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class PipelineExecutor:
|
|
|
|
|
"""Thin wrapper over the cmdlet registry + pipeline context."""
|
|
|
|
|
|
|
|
|
|
def __init__(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
config: Optional[Dict[str, Any]] = None,
|
|
|
|
|
worker_manager: Optional[WorkerManager] = None,
|
|
|
|
|
) -> None:
|
|
|
|
|
self._config = config or load_config()
|
|
|
|
|
self._worker_manager = worker_manager
|
|
|
|
|
if self._worker_manager is None:
|
|
|
|
|
self._worker_manager = self._ensure_worker_manager()
|
|
|
|
|
if self._worker_manager:
|
|
|
|
|
self._config["_worker_manager"] = self._worker_manager
|
|
|
|
|
|
|
|
|
|
@property
|
|
|
|
|
def worker_manager(self) -> Optional[WorkerManager]:
|
|
|
|
|
return self._worker_manager
|
|
|
|
|
|
|
|
|
|
def run_pipeline(
|
|
|
|
|
self,
|
|
|
|
|
pipeline_text: str,
|
|
|
|
|
*,
|
|
|
|
|
on_log: Optional[Callable[[str], None]] = None,
|
|
|
|
|
) -> PipelineRunResult:
|
|
|
|
|
"""Execute a pipeline string and return structured results.
|
|
|
|
|
|
|
|
|
|
Args:
|
|
|
|
|
pipeline_text: Raw pipeline text entered by the user.
|
|
|
|
|
on_log: Optional callback that receives human-readable log lines.
|
|
|
|
|
"""
|
|
|
|
|
normalized = pipeline_text.strip()
|
|
|
|
|
result = PipelineRunResult(pipeline=normalized, success=False)
|
|
|
|
|
if not normalized:
|
|
|
|
|
result.error = "Pipeline is empty"
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
tokens = self._tokenize(normalized)
|
|
|
|
|
stages = self._split_stages(tokens)
|
|
|
|
|
if not stages:
|
|
|
|
|
result.error = "Pipeline contains no stages"
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
ctx.reset()
|
|
|
|
|
ctx.set_current_command_text(normalized)
|
|
|
|
|
|
|
|
|
|
stdout_buffer = io.StringIO()
|
|
|
|
|
stderr_buffer = io.StringIO()
|
|
|
|
|
piped_result: Any = None
|
|
|
|
|
worker_session = self._start_worker_session(normalized)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
with contextlib.redirect_stdout(stdout_buffer), contextlib.redirect_stderr(
|
|
|
|
|
stderr_buffer
|
|
|
|
|
):
|
|
|
|
|
for index, stage_tokens in enumerate(stages):
|
|
|
|
|
stage = self._execute_stage(
|
|
|
|
|
index=index,
|
|
|
|
|
total=len(stages),
|
|
|
|
|
stage_tokens=stage_tokens,
|
|
|
|
|
piped_input=piped_result,
|
|
|
|
|
on_log=on_log,
|
|
|
|
|
)
|
|
|
|
|
result.stages.append(stage)
|
|
|
|
|
|
|
|
|
|
if stage.status != "completed":
|
|
|
|
|
result.error = stage.error or f"Stage {stage.name} failed"
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
if index == len(stages) - 1:
|
|
|
|
|
result.emitted = stage.emitted
|
2025-11-27 10:59:01 -08:00
|
|
|
result.result_table = stage.result_table
|
2025-11-25 20:09:33 -08:00
|
|
|
else:
|
|
|
|
|
piped_result = stage.emitted
|
|
|
|
|
|
|
|
|
|
result.success = True
|
|
|
|
|
return result
|
|
|
|
|
finally:
|
|
|
|
|
result.stdout = stdout_buffer.getvalue()
|
|
|
|
|
result.stderr = stderr_buffer.getvalue()
|
|
|
|
|
ctx.clear_current_command_text()
|
|
|
|
|
if worker_session is not None:
|
|
|
|
|
status = "completed" if result.success else "error"
|
|
|
|
|
worker_session.finish(status=status, message=result.error or "")
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
# Stage execution helpers
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
def _execute_stage(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
index: int,
|
|
|
|
|
total: int,
|
|
|
|
|
stage_tokens: Sequence[str],
|
|
|
|
|
piped_input: Any,
|
|
|
|
|
on_log: Optional[Callable[[str], None]],
|
|
|
|
|
) -> PipelineStageResult:
|
|
|
|
|
if not stage_tokens:
|
|
|
|
|
return PipelineStageResult(name="(empty)", args=[], status="skipped")
|
|
|
|
|
|
|
|
|
|
cmd_name = stage_tokens[0].replace("_", "-").lower()
|
|
|
|
|
stage_args = stage_tokens[1:]
|
|
|
|
|
stage = PipelineStageResult(name=cmd_name, args=stage_args)
|
|
|
|
|
|
|
|
|
|
if cmd_name.startswith("@"):
|
|
|
|
|
return self._apply_selection_stage(
|
|
|
|
|
token=cmd_name,
|
|
|
|
|
stage=stage,
|
|
|
|
|
piped_input=piped_input,
|
|
|
|
|
on_log=on_log,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
cmd_fn = REGISTRY.get(cmd_name)
|
|
|
|
|
if not cmd_fn:
|
|
|
|
|
stage.status = "failed"
|
|
|
|
|
stage.error = f"Unknown command: {cmd_name}"
|
|
|
|
|
return stage
|
|
|
|
|
|
|
|
|
|
pipeline_ctx = ctx.PipelineStageContext(stage_index=index, total_stages=total)
|
|
|
|
|
ctx.set_stage_context(pipeline_ctx)
|
|
|
|
|
ctx.set_active(True)
|
|
|
|
|
ctx.set_last_stage(index == total - 1)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
return_code = cmd_fn(piped_input, list(stage_args), self._config)
|
|
|
|
|
except Exception as exc: # pragma: no cover - surfaced in UI
|
|
|
|
|
stage.status = "failed"
|
|
|
|
|
stage.error = f"{type(exc).__name__}: {exc}"
|
|
|
|
|
if on_log:
|
|
|
|
|
on_log(stage.error)
|
|
|
|
|
return stage
|
|
|
|
|
finally:
|
|
|
|
|
ctx.set_stage_context(None)
|
|
|
|
|
ctx.set_active(False)
|
|
|
|
|
|
|
|
|
|
emitted = list(getattr(pipeline_ctx, "emits", []) or [])
|
|
|
|
|
stage.emitted = emitted
|
2025-11-27 10:59:01 -08:00
|
|
|
|
|
|
|
|
# Capture the ResultTable if the cmdlet set one
|
|
|
|
|
# Check display table first (overlay), then last result table
|
|
|
|
|
stage.result_table = ctx.get_display_table() or ctx.get_last_result_table()
|
2025-11-25 20:09:33 -08:00
|
|
|
|
|
|
|
|
if return_code != 0:
|
|
|
|
|
stage.status = "failed"
|
|
|
|
|
stage.error = f"Exit code {return_code}"
|
|
|
|
|
else:
|
|
|
|
|
stage.status = "completed"
|
|
|
|
|
stage.error = None
|
|
|
|
|
|
|
|
|
|
worker_id = self._current_worker_id()
|
|
|
|
|
if self._worker_manager and worker_id:
|
|
|
|
|
label = f"[Stage {index + 1}/{total}] {cmd_name} {stage.status}"
|
|
|
|
|
self._worker_manager.log_step(worker_id, label)
|
|
|
|
|
|
2025-11-27 10:59:01 -08:00
|
|
|
# Don't clear the table if we just captured it, but ensure items are set for next stage
|
|
|
|
|
# If we have a table, we should probably keep it in ctx for history if needed
|
|
|
|
|
# But for pipeline execution, we mainly care about passing items to next stage
|
|
|
|
|
# ctx.set_last_result_table(None, emitted) <-- This was clearing it
|
|
|
|
|
|
|
|
|
|
# Ensure items are available for next stage
|
2025-11-25 20:09:33 -08:00
|
|
|
ctx.set_last_items(emitted)
|
|
|
|
|
return stage
|
|
|
|
|
|
|
|
|
|
def _apply_selection_stage(
|
|
|
|
|
self,
|
|
|
|
|
*,
|
|
|
|
|
token: str,
|
|
|
|
|
stage: PipelineStageResult,
|
|
|
|
|
piped_input: Any,
|
|
|
|
|
on_log: Optional[Callable[[str], None]],
|
|
|
|
|
) -> PipelineStageResult:
|
|
|
|
|
selection = self._parse_selection(token)
|
|
|
|
|
items = piped_input or []
|
|
|
|
|
if not isinstance(items, list):
|
|
|
|
|
items = list(items if isinstance(items, Sequence) else [items])
|
|
|
|
|
|
|
|
|
|
if not items:
|
|
|
|
|
stage.status = "failed"
|
|
|
|
|
stage.error = "Selection requested but there is no upstream data"
|
|
|
|
|
return stage
|
|
|
|
|
|
|
|
|
|
if selection is None:
|
|
|
|
|
stage.emitted = list(items)
|
|
|
|
|
else:
|
|
|
|
|
zero_based = sorted(i - 1 for i in selection if i > 0)
|
|
|
|
|
stage.emitted = [items[i] for i in zero_based if 0 <= i < len(items)]
|
|
|
|
|
|
|
|
|
|
if not stage.emitted:
|
|
|
|
|
stage.status = "failed"
|
|
|
|
|
stage.error = "Selection matched no rows"
|
|
|
|
|
return stage
|
|
|
|
|
|
|
|
|
|
ctx.set_last_items(stage.emitted)
|
|
|
|
|
ctx.set_last_result_table(None, stage.emitted)
|
|
|
|
|
stage.status = "completed"
|
|
|
|
|
if on_log:
|
|
|
|
|
on_log(f"Selected {len(stage.emitted)} item(s) via {token}")
|
|
|
|
|
return stage
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
# Worker/session helpers
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
def _start_worker_session(self, pipeline_text: str) -> Optional[_WorkerSession]:
|
|
|
|
|
manager = self._ensure_worker_manager()
|
|
|
|
|
if manager is None:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
worker_id = f"tui_pipeline_{uuid.uuid4().hex[:8]}"
|
|
|
|
|
tracked = manager.track_worker(
|
|
|
|
|
worker_id,
|
|
|
|
|
worker_type="pipeline",
|
|
|
|
|
title="Pipeline run",
|
|
|
|
|
description=pipeline_text,
|
|
|
|
|
pipe=pipeline_text,
|
|
|
|
|
)
|
|
|
|
|
if not tracked:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
manager.log_step(worker_id, "Pipeline started")
|
|
|
|
|
self._config["_current_worker_id"] = worker_id
|
|
|
|
|
return _WorkerSession(manager=manager, worker_id=worker_id, config=self._config)
|
|
|
|
|
|
|
|
|
|
def _ensure_worker_manager(self) -> Optional[WorkerManager]:
|
|
|
|
|
if self._worker_manager:
|
|
|
|
|
return self._worker_manager
|
|
|
|
|
library_root = get_local_storage_path(self._config)
|
|
|
|
|
if not library_root:
|
|
|
|
|
return None
|
|
|
|
|
try:
|
|
|
|
|
self._worker_manager = WorkerManager(Path(library_root), auto_refresh_interval=0)
|
|
|
|
|
self._config["_worker_manager"] = self._worker_manager
|
|
|
|
|
except Exception:
|
|
|
|
|
self._worker_manager = None
|
|
|
|
|
return self._worker_manager
|
|
|
|
|
|
|
|
|
|
def _current_worker_id(self) -> Optional[str]:
|
|
|
|
|
worker_id = self._config.get("_current_worker_id")
|
|
|
|
|
return str(worker_id) if worker_id else None
|
|
|
|
|
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
# Parsing helpers
|
|
|
|
|
# ------------------------------------------------------------------
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _tokenize(pipeline_text: str) -> List[str]:
|
|
|
|
|
try:
|
|
|
|
|
return shlex.split(pipeline_text)
|
|
|
|
|
except ValueError:
|
|
|
|
|
return pipeline_text.split()
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _split_stages(tokens: Sequence[str]) -> List[List[str]]:
|
|
|
|
|
stages: List[List[str]] = []
|
|
|
|
|
current: List[str] = []
|
|
|
|
|
for token in tokens:
|
|
|
|
|
if token == "|":
|
|
|
|
|
if current:
|
|
|
|
|
stages.append(current)
|
|
|
|
|
current = []
|
|
|
|
|
else:
|
|
|
|
|
current.append(token)
|
|
|
|
|
if current:
|
|
|
|
|
stages.append(current)
|
|
|
|
|
return stages
|
|
|
|
|
|
|
|
|
|
@staticmethod
|
|
|
|
|
def _parse_selection(token: str) -> Optional[Sequence[int]]:
|
|
|
|
|
if _parse_selection_syntax:
|
|
|
|
|
parsed = _parse_selection_syntax(token)
|
|
|
|
|
if parsed:
|
|
|
|
|
return sorted(parsed)
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class _WorkerSession:
|
|
|
|
|
"""Minimal worker session wrapper for the TUI executor."""
|
|
|
|
|
|
|
|
|
|
def __init__(self, *, manager: WorkerManager, worker_id: str, config: Optional[Dict[str, Any]] = None) -> None:
|
|
|
|
|
self._manager = manager
|
|
|
|
|
self.worker_id = worker_id
|
|
|
|
|
self._config = config
|
|
|
|
|
|
|
|
|
|
def finish(self, *, status: str, message: str) -> None:
|
|
|
|
|
try:
|
|
|
|
|
self._manager.finish_worker(self.worker_id, result=status, error_msg=message)
|
|
|
|
|
self._manager.log_step(self.worker_id, f"Pipeline {status}")
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
if self._config and self._config.get("_current_worker_id") == self.worker_id:
|
|
|
|
|
self._config.pop("_current_worker_id", None)
|