Medios-Macina/TUI/pipeline_runner.py

"""Pipeline execution utilities for the Textual UI.

This module mirrors the CLI pipeline behaviour while exposing a class-based
interface that the TUI can call. It keeps all pipeline/cmdlet integration in
one place so the interface layer stays focused on presentation.
"""
from __future__ import annotations

import contextlib
import io
import shlex
import uuid
from dataclasses import dataclass, field
import sys
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Sequence

BASE_DIR = Path(__file__).resolve().parent
ROOT_DIR = BASE_DIR.parent
for path in (ROOT_DIR, BASE_DIR):
    str_path = str(path)
    if str_path not in sys.path:
        sys.path.insert(0, str_path)

import pipeline as ctx
from cmdlets import REGISTRY
from config import get_local_storage_path, load_config
from helper.worker_manager import WorkerManager

try:  # Reuse the CLI selection parser instead of reimplementing it.
    from CLI import _parse_selection_syntax
except ImportError:  # pragma: no cover - fallback for atypical environments
    _parse_selection_syntax = None  # type: ignore


@dataclass(slots=True)
class PipelineStageResult:
    """Summary for a single pipeline stage."""

    name: str
    args: Sequence[str]
    emitted: List[Any] = field(default_factory=list)
    result_table: Optional[Any] = None  # ResultTable object if available
    status: str = "pending"
    error: Optional[str] = None


@dataclass(slots=True)
class PipelineRunResult:
    """Aggregate result for a pipeline run."""

    pipeline: str
    success: bool
    stages: List[PipelineStageResult] = field(default_factory=list)
    emitted: List[Any] = field(default_factory=list)
    result_table: Optional[Any] = None  # Final ResultTable object if available
    stdout: str = ""
    stderr: str = ""
    error: Optional[str] = None

    def to_summary(self) -> Dict[str, Any]:
        """Provide a JSON-friendly representation for logging or UI."""
        return {
            "pipeline": self.pipeline,
            "success": self.success,
            "error": self.error,
            "stages": [
                {
                    "name": stage.name,
                    "status": stage.status,
                    "error": stage.error,
                    "emitted": len(stage.emitted),
                }
                for stage in self.stages
            ],
        }


class PipelineExecutor:
    """Thin wrapper over the cmdlet registry + pipeline context."""

    def __init__(
        self,
        *,
        config: Optional[Dict[str, Any]] = None,
        worker_manager: Optional[WorkerManager] = None,
    ) -> None:
        self._config = config or load_config()
        self._worker_manager = worker_manager
        if self._worker_manager is None:
            self._worker_manager = self._ensure_worker_manager()
        if self._worker_manager:
            self._config["_worker_manager"] = self._worker_manager

    @property
    def worker_manager(self) -> Optional[WorkerManager]:
        return self._worker_manager

    def run_pipeline(
        self,
        pipeline_text: str,
        *,
        on_log: Optional[Callable[[str], None]] = None,
    ) -> PipelineRunResult:
        """Execute a pipeline string and return structured results.

        Args:
            pipeline_text: Raw pipeline text entered by the user.
            on_log: Optional callback that receives human-readable log lines.
        """
        normalized = pipeline_text.strip()
        result = PipelineRunResult(pipeline=normalized, success=False)
        if not normalized:
            result.error = "Pipeline is empty"
            return result

        tokens = self._tokenize(normalized)
        stages = self._split_stages(tokens)
        if not stages:
            result.error = "Pipeline contains no stages"
            return result

        ctx.reset()
        ctx.set_current_command_text(normalized)

        stdout_buffer = io.StringIO()
        stderr_buffer = io.StringIO()
        piped_result: Any = None
        worker_session = self._start_worker_session(normalized)

        try:
            with contextlib.redirect_stdout(stdout_buffer), contextlib.redirect_stderr(
                stderr_buffer
            ):
                for index, stage_tokens in enumerate(stages):
                    stage = self._execute_stage(
                        index=index,
                        total=len(stages),
                        stage_tokens=stage_tokens,
                        piped_input=piped_result,
                        on_log=on_log,
                    )
                    result.stages.append(stage)

                    if stage.status != "completed":
                        result.error = stage.error or f"Stage {stage.name} failed"
                        return result

                    if index == len(stages) - 1:
                        result.emitted = stage.emitted
                        result.result_table = stage.result_table
                    else:
                        piped_result = stage.emitted

            result.success = True
            return result
        finally:
            result.stdout = stdout_buffer.getvalue()
            result.stderr = stderr_buffer.getvalue()
            ctx.clear_current_command_text()
            if worker_session is not None:
                status = "completed" if result.success else "error"
                worker_session.finish(status=status, message=result.error or "")

    # ------------------------------------------------------------------
    # Stage execution helpers
    # ------------------------------------------------------------------
    def _execute_stage(
        self,
        *,
        index: int,
        total: int,
        stage_tokens: Sequence[str],
        piped_input: Any,
        on_log: Optional[Callable[[str], None]],
    ) -> PipelineStageResult:
        if not stage_tokens:
            return PipelineStageResult(name="(empty)", args=[], status="skipped")

        cmd_name = stage_tokens[0].replace("_", "-").lower()
        stage_args = stage_tokens[1:]
        stage = PipelineStageResult(name=cmd_name, args=stage_args)

        if cmd_name.startswith("@"):
            return self._apply_selection_stage(
                token=cmd_name,
                stage=stage,
                piped_input=piped_input,
                on_log=on_log,
            )

        cmd_fn = REGISTRY.get(cmd_name)
        if not cmd_fn:
            stage.status = "failed"
            stage.error = f"Unknown command: {cmd_name}"
            return stage

        pipeline_ctx = ctx.PipelineStageContext(stage_index=index, total_stages=total)
        ctx.set_stage_context(pipeline_ctx)
        ctx.set_active(True)
        ctx.set_last_stage(index == total - 1)

        try:
            return_code = cmd_fn(piped_input, list(stage_args), self._config)
        except Exception as exc:  # pragma: no cover - surfaced in UI
            stage.status = "failed"
            stage.error = f"{type(exc).__name__}: {exc}"
            if on_log:
                on_log(stage.error)
            return stage
        finally:
            ctx.set_stage_context(None)
            ctx.set_active(False)

        emitted = list(getattr(pipeline_ctx, "emits", []) or [])
        stage.emitted = emitted

        # Capture the ResultTable if the cmdlet set one
        # Check display table first (overlay), then last result table
        stage.result_table = ctx.get_display_table() or ctx.get_last_result_table()

        if return_code != 0:
            stage.status = "failed"
            stage.error = f"Exit code {return_code}"
        else:
            stage.status = "completed"
            stage.error = None

        worker_id = self._current_worker_id()
        if self._worker_manager and worker_id:
            label = f"[Stage {index + 1}/{total}] {cmd_name} {stage.status}"
            self._worker_manager.log_step(worker_id, label)

        # Don't clear the table if we just captured it, but ensure items are set for next stage
        # If we have a table, we should probably keep it in ctx for history if needed
        # But for pipeline execution, we mainly care about passing items to next stage
        # ctx.set_last_result_table(None, emitted)  <-- This was clearing it

        # Ensure items are available for next stage
        ctx.set_last_items(emitted)
        return stage

    def _apply_selection_stage(
        self,
        *,
        token: str,
        stage: PipelineStageResult,
        piped_input: Any,
        on_log: Optional[Callable[[str], None]],
    ) -> PipelineStageResult:
        selection = self._parse_selection(token)
        items = piped_input or []
        if not isinstance(items, list):
            items = list(items if isinstance(items, Sequence) else [items])

        if not items:
            stage.status = "failed"
            stage.error = "Selection requested but there is no upstream data"
            return stage

        if selection is None:
            stage.emitted = list(items)
        else:
            zero_based = sorted(i - 1 for i in selection if i > 0)
            stage.emitted = [items[i] for i in zero_based if 0 <= i < len(items)]

        if not stage.emitted:
            stage.status = "failed"
            stage.error = "Selection matched no rows"
            return stage

        ctx.set_last_items(stage.emitted)
        ctx.set_last_result_table(None, stage.emitted)
        stage.status = "completed"
        if on_log:
            on_log(f"Selected {len(stage.emitted)} item(s) via {token}")
        return stage

    # ------------------------------------------------------------------
    # Worker/session helpers
    # ------------------------------------------------------------------
    def _start_worker_session(self, pipeline_text: str) -> Optional[_WorkerSession]:
        manager = self._ensure_worker_manager()
        if manager is None:
            return None

        worker_id = f"tui_pipeline_{uuid.uuid4().hex[:8]}"
        tracked = manager.track_worker(
            worker_id,
            worker_type="pipeline",
            title="Pipeline run",
            description=pipeline_text,
            pipe=pipeline_text,
        )
        if not tracked:
            return None

        manager.log_step(worker_id, "Pipeline started")
        self._config["_current_worker_id"] = worker_id
        return _WorkerSession(manager=manager, worker_id=worker_id, config=self._config)

    def _ensure_worker_manager(self) -> Optional[WorkerManager]:
        if self._worker_manager:
            return self._worker_manager
        library_root = get_local_storage_path(self._config)
        if not library_root:
            return None
        try:
            self._worker_manager = WorkerManager(Path(library_root), auto_refresh_interval=0)
            self._config["_worker_manager"] = self._worker_manager
        except Exception:
            self._worker_manager = None
        return self._worker_manager

    def _current_worker_id(self) -> Optional[str]:
        worker_id = self._config.get("_current_worker_id")
        return str(worker_id) if worker_id else None

    # ------------------------------------------------------------------
    # Parsing helpers
    # ------------------------------------------------------------------
    @staticmethod
    def _tokenize(pipeline_text: str) -> List[str]:
        try:
            return shlex.split(pipeline_text)
        except ValueError:
            return pipeline_text.split()

    @staticmethod
    def _split_stages(tokens: Sequence[str]) -> List[List[str]]:
        stages: List[List[str]] = []
        current: List[str] = []
        for token in tokens:
            if token == "|":
                if current:
                    stages.append(current)
                    current = []
            else:
                current.append(token)
        if current:
            stages.append(current)
        return stages

    @staticmethod
    def _parse_selection(token: str) -> Optional[Sequence[int]]:
        if _parse_selection_syntax:
              parsed = _parse_selection_syntax(token)
              if parsed:
                 return sorted(parsed)
        return None


class _WorkerSession:
    """Minimal worker session wrapper for the TUI executor."""

    def __init__(self, *, manager: WorkerManager, worker_id: str, config: Optional[Dict[str, Any]] = None) -> None:
        self._manager = manager
        self.worker_id = worker_id
        self._config = config

    def finish(self, *, status: str, message: str) -> None:
        try:
            self._manager.finish_worker(self.worker_id, result=status, error_msg=message)
            self._manager.log_step(self.worker_id, f"Pipeline {status}")
        except Exception:
            pass
        if self._config and self._config.get("_current_worker_id") == self.worker_id:
            self._config.pop("_current_worker_id", None)