Files
Medios-Macina/TUI/pipeline_runner.py
2025-11-27 10:59:01 -08:00

369 lines
13 KiB
Python

"""Pipeline execution utilities for the Textual UI.
This module mirrors the CLI pipeline behaviour while exposing a class-based
interface that the TUI can call. It keeps all pipeline/cmdlet integration in
one place so the interface layer stays focused on presentation.
"""
from __future__ import annotations
import contextlib
import io
import shlex
import uuid
from dataclasses import dataclass, field
import sys
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Sequence
BASE_DIR = Path(__file__).resolve().parent
ROOT_DIR = BASE_DIR.parent
for path in (ROOT_DIR, BASE_DIR):
str_path = str(path)
if str_path not in sys.path:
sys.path.insert(0, str_path)
import pipeline as ctx
from cmdlets import REGISTRY
from config import get_local_storage_path, load_config
from helper.worker_manager import WorkerManager
try: # Reuse the CLI selection parser instead of reimplementing it.
from CLI import _parse_selection_syntax
except ImportError: # pragma: no cover - fallback for atypical environments
_parse_selection_syntax = None # type: ignore
@dataclass(slots=True)
class PipelineStageResult:
"""Summary for a single pipeline stage."""
name: str
args: Sequence[str]
emitted: List[Any] = field(default_factory=list)
result_table: Optional[Any] = None # ResultTable object if available
status: str = "pending"
error: Optional[str] = None
@dataclass(slots=True)
class PipelineRunResult:
"""Aggregate result for a pipeline run."""
pipeline: str
success: bool
stages: List[PipelineStageResult] = field(default_factory=list)
emitted: List[Any] = field(default_factory=list)
result_table: Optional[Any] = None # Final ResultTable object if available
stdout: str = ""
stderr: str = ""
error: Optional[str] = None
def to_summary(self) -> Dict[str, Any]:
"""Provide a JSON-friendly representation for logging or UI."""
return {
"pipeline": self.pipeline,
"success": self.success,
"error": self.error,
"stages": [
{
"name": stage.name,
"status": stage.status,
"error": stage.error,
"emitted": len(stage.emitted),
}
for stage in self.stages
],
}
class PipelineExecutor:
"""Thin wrapper over the cmdlet registry + pipeline context."""
def __init__(
self,
*,
config: Optional[Dict[str, Any]] = None,
worker_manager: Optional[WorkerManager] = None,
) -> None:
self._config = config or load_config()
self._worker_manager = worker_manager
if self._worker_manager is None:
self._worker_manager = self._ensure_worker_manager()
if self._worker_manager:
self._config["_worker_manager"] = self._worker_manager
@property
def worker_manager(self) -> Optional[WorkerManager]:
return self._worker_manager
def run_pipeline(
self,
pipeline_text: str,
*,
on_log: Optional[Callable[[str], None]] = None,
) -> PipelineRunResult:
"""Execute a pipeline string and return structured results.
Args:
pipeline_text: Raw pipeline text entered by the user.
on_log: Optional callback that receives human-readable log lines.
"""
normalized = pipeline_text.strip()
result = PipelineRunResult(pipeline=normalized, success=False)
if not normalized:
result.error = "Pipeline is empty"
return result
tokens = self._tokenize(normalized)
stages = self._split_stages(tokens)
if not stages:
result.error = "Pipeline contains no stages"
return result
ctx.reset()
ctx.set_current_command_text(normalized)
stdout_buffer = io.StringIO()
stderr_buffer = io.StringIO()
piped_result: Any = None
worker_session = self._start_worker_session(normalized)
try:
with contextlib.redirect_stdout(stdout_buffer), contextlib.redirect_stderr(
stderr_buffer
):
for index, stage_tokens in enumerate(stages):
stage = self._execute_stage(
index=index,
total=len(stages),
stage_tokens=stage_tokens,
piped_input=piped_result,
on_log=on_log,
)
result.stages.append(stage)
if stage.status != "completed":
result.error = stage.error or f"Stage {stage.name} failed"
return result
if index == len(stages) - 1:
result.emitted = stage.emitted
result.result_table = stage.result_table
else:
piped_result = stage.emitted
result.success = True
return result
finally:
result.stdout = stdout_buffer.getvalue()
result.stderr = stderr_buffer.getvalue()
ctx.clear_current_command_text()
if worker_session is not None:
status = "completed" if result.success else "error"
worker_session.finish(status=status, message=result.error or "")
# ------------------------------------------------------------------
# Stage execution helpers
# ------------------------------------------------------------------
def _execute_stage(
self,
*,
index: int,
total: int,
stage_tokens: Sequence[str],
piped_input: Any,
on_log: Optional[Callable[[str], None]],
) -> PipelineStageResult:
if not stage_tokens:
return PipelineStageResult(name="(empty)", args=[], status="skipped")
cmd_name = stage_tokens[0].replace("_", "-").lower()
stage_args = stage_tokens[1:]
stage = PipelineStageResult(name=cmd_name, args=stage_args)
if cmd_name.startswith("@"):
return self._apply_selection_stage(
token=cmd_name,
stage=stage,
piped_input=piped_input,
on_log=on_log,
)
cmd_fn = REGISTRY.get(cmd_name)
if not cmd_fn:
stage.status = "failed"
stage.error = f"Unknown command: {cmd_name}"
return stage
pipeline_ctx = ctx.PipelineStageContext(stage_index=index, total_stages=total)
ctx.set_stage_context(pipeline_ctx)
ctx.set_active(True)
ctx.set_last_stage(index == total - 1)
try:
return_code = cmd_fn(piped_input, list(stage_args), self._config)
except Exception as exc: # pragma: no cover - surfaced in UI
stage.status = "failed"
stage.error = f"{type(exc).__name__}: {exc}"
if on_log:
on_log(stage.error)
return stage
finally:
ctx.set_stage_context(None)
ctx.set_active(False)
emitted = list(getattr(pipeline_ctx, "emits", []) or [])
stage.emitted = emitted
# Capture the ResultTable if the cmdlet set one
# Check display table first (overlay), then last result table
stage.result_table = ctx.get_display_table() or ctx.get_last_result_table()
if return_code != 0:
stage.status = "failed"
stage.error = f"Exit code {return_code}"
else:
stage.status = "completed"
stage.error = None
worker_id = self._current_worker_id()
if self._worker_manager and worker_id:
label = f"[Stage {index + 1}/{total}] {cmd_name} {stage.status}"
self._worker_manager.log_step(worker_id, label)
# Don't clear the table if we just captured it, but ensure items are set for next stage
# If we have a table, we should probably keep it in ctx for history if needed
# But for pipeline execution, we mainly care about passing items to next stage
# ctx.set_last_result_table(None, emitted) <-- This was clearing it
# Ensure items are available for next stage
ctx.set_last_items(emitted)
return stage
def _apply_selection_stage(
self,
*,
token: str,
stage: PipelineStageResult,
piped_input: Any,
on_log: Optional[Callable[[str], None]],
) -> PipelineStageResult:
selection = self._parse_selection(token)
items = piped_input or []
if not isinstance(items, list):
items = list(items if isinstance(items, Sequence) else [items])
if not items:
stage.status = "failed"
stage.error = "Selection requested but there is no upstream data"
return stage
if selection is None:
stage.emitted = list(items)
else:
zero_based = sorted(i - 1 for i in selection if i > 0)
stage.emitted = [items[i] for i in zero_based if 0 <= i < len(items)]
if not stage.emitted:
stage.status = "failed"
stage.error = "Selection matched no rows"
return stage
ctx.set_last_items(stage.emitted)
ctx.set_last_result_table(None, stage.emitted)
stage.status = "completed"
if on_log:
on_log(f"Selected {len(stage.emitted)} item(s) via {token}")
return stage
# ------------------------------------------------------------------
# Worker/session helpers
# ------------------------------------------------------------------
def _start_worker_session(self, pipeline_text: str) -> Optional[_WorkerSession]:
manager = self._ensure_worker_manager()
if manager is None:
return None
worker_id = f"tui_pipeline_{uuid.uuid4().hex[:8]}"
tracked = manager.track_worker(
worker_id,
worker_type="pipeline",
title="Pipeline run",
description=pipeline_text,
pipe=pipeline_text,
)
if not tracked:
return None
manager.log_step(worker_id, "Pipeline started")
self._config["_current_worker_id"] = worker_id
return _WorkerSession(manager=manager, worker_id=worker_id, config=self._config)
def _ensure_worker_manager(self) -> Optional[WorkerManager]:
if self._worker_manager:
return self._worker_manager
library_root = get_local_storage_path(self._config)
if not library_root:
return None
try:
self._worker_manager = WorkerManager(Path(library_root), auto_refresh_interval=0)
self._config["_worker_manager"] = self._worker_manager
except Exception:
self._worker_manager = None
return self._worker_manager
def _current_worker_id(self) -> Optional[str]:
worker_id = self._config.get("_current_worker_id")
return str(worker_id) if worker_id else None
# ------------------------------------------------------------------
# Parsing helpers
# ------------------------------------------------------------------
@staticmethod
def _tokenize(pipeline_text: str) -> List[str]:
try:
return shlex.split(pipeline_text)
except ValueError:
return pipeline_text.split()
@staticmethod
def _split_stages(tokens: Sequence[str]) -> List[List[str]]:
stages: List[List[str]] = []
current: List[str] = []
for token in tokens:
if token == "|":
if current:
stages.append(current)
current = []
else:
current.append(token)
if current:
stages.append(current)
return stages
@staticmethod
def _parse_selection(token: str) -> Optional[Sequence[int]]:
if _parse_selection_syntax:
parsed = _parse_selection_syntax(token)
if parsed:
return sorted(parsed)
return None
class _WorkerSession:
"""Minimal worker session wrapper for the TUI executor."""
def __init__(self, *, manager: WorkerManager, worker_id: str, config: Optional[Dict[str, Any]] = None) -> None:
self._manager = manager
self.worker_id = worker_id
self._config = config
def finish(self, *, status: str, message: str) -> None:
try:
self._manager.finish_worker(self.worker_id, result=status, error_msg=message)
self._manager.log_step(self.worker_id, f"Pipeline {status}")
except Exception:
pass
if self._config and self._config.get("_current_worker_id") == self.worker_id:
self._config.pop("_current_worker_id", None)