This commit is contained in:
nose
2025-12-21 05:10:09 -08:00
parent 8ca5783970
commit 11a13edb84
15 changed files with 1712 additions and 213 deletions

603
models.py
View File

@@ -12,14 +12,20 @@ from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Protocol, TextIO
from rich.console import Console
from rich.console import ConsoleOptions
from rich.console import Group
from rich.live import Live
from rich.panel import Panel
from rich.progress import (
BarColumn,
DownloadColumn,
Progress,
SpinnerColumn,
TaskID,
TaskProgressColumn,
TextColumn,
TimeRemainingColumn,
TimeElapsedColumn,
TransferSpeedColumn,
)
@@ -403,14 +409,56 @@ class ProgressBar:
self._progress: Optional[Progress] = None
self._task_id: Optional[TaskID] = None
# Optional: when a PipelineLiveProgress is active, prefer rendering
# transfers inside it instead of creating a nested Rich Progress.
self._pipeline_ui: Any = None
self._pipeline_label: Optional[str] = None
def _ensure_started(self, *, label: str, total: Optional[int], file: Any = None) -> None:
if self._pipeline_ui is not None and self._pipeline_label:
# Pipeline-backed transfer task is already registered; update its total if needed.
try:
if total is not None and total > 0:
self._pipeline_ui.update_transfer(label=self._pipeline_label, completed=None, total=int(total))
except Exception:
pass
return
if self._progress is not None and self._task_id is not None:
if total is not None and total > 0:
self._progress.update(self._task_id, total=int(total))
return
# Prefer integrating with the pipeline Live UI to avoid nested Rich Live instances.
try:
import pipeline as pipeline_context
ui = pipeline_context.get_live_progress()
if ui is not None and hasattr(ui, "begin_transfer") and hasattr(ui, "update_transfer"):
self._pipeline_ui = ui
self._pipeline_label = str(label or "download")
try:
ui.begin_transfer(label=self._pipeline_label, total=int(total) if isinstance(total, int) and total > 0 else None)
except Exception:
# If pipeline integration fails, fall back to standalone progress.
self._pipeline_ui = None
self._pipeline_label = None
else:
return
except Exception:
pass
stream = file if file is not None else sys.stderr
console = Console(file=stream)
# Use shared stderr console when rendering to stderr (cooperates with PipelineLiveProgress).
if stream is sys.stderr:
try:
from rich_display import stderr_console
console = stderr_console()
except Exception:
console = Console(file=stream)
else:
console = Console(file=stream)
progress = Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
@@ -441,6 +489,17 @@ class ProgressBar:
if downloaded is None and total is None:
return
self._ensure_started(label=label, total=total, file=file)
if self._pipeline_ui is not None and self._pipeline_label:
try:
self._pipeline_ui.update_transfer(
label=self._pipeline_label,
completed=int(downloaded or 0) if downloaded is not None else None,
total=int(total) if isinstance(total, int) and total > 0 else None,
)
except Exception:
pass
return
if self._progress is None or self._task_id is None:
return
if total is not None and total > 0:
@@ -449,6 +508,15 @@ class ProgressBar:
self._progress.update(self._task_id, completed=int(downloaded or 0), refresh=True)
def finish(self) -> None:
if self._pipeline_ui is not None and self._pipeline_label:
try:
self._pipeline_ui.finish_transfer(label=self._pipeline_label)
except Exception:
pass
finally:
self._pipeline_ui = None
self._pipeline_label = None
return
if self._progress is None:
return
try:
@@ -562,28 +630,519 @@ class ProgressFileReader:
# ============================================================================
# Note: Pipeline functions and state variables moved to pipeline.py
def _pipeline_progress_item_label(value: Any, *, max_len: int = 72) -> str:
def _clip(text: str) -> str:
text = str(text or "").strip()
if not text:
return "(item)"
if len(text) <= max_len:
return text
return text[: max(0, max_len - 1)] + ""
try:
if isinstance(value, PipeObject):
if value.title:
return _clip(value.title)
if value.url:
return _clip(value.url)
if value.source_url:
return _clip(value.source_url)
if value.path:
return _clip(value.path)
if value.hash:
return _clip(value.hash)
if isinstance(value, dict):
for key in ("title", "url", "source_url", "path", "hash", "target"):
raw = value.get(key)
if raw is not None and str(raw).strip():
return _clip(str(raw))
return _clip(str(value))
except Exception:
return "(item)"
class PipelineLiveProgress:
"""Multi-level pipeline progress UI.
- Each pipeline step (pipe) is a persistent bar.
- Each per-item operation is shown as a transient sub-task (spinner).
Designed to render to stderr so pipelines remain clean.
"""
def __init__(self, pipe_labels: List[str], *, enabled: bool = True) -> None:
self._enabled = bool(enabled)
self._pipe_labels = [str(x) for x in (pipe_labels or [])]
self._console: Optional[Console] = None
self._live: Optional[Live] = None
self._overall: Optional[Progress] = None
self._pipe_progress: Optional[Progress] = None
self._subtasks: Optional[Progress] = None
self._transfers: Optional[Progress] = None
self._overall_task: Optional[TaskID] = None
self._pipe_tasks: List[TaskID] = []
self._transfer_tasks: Dict[str, TaskID] = {}
# Per-pipe state
self._pipe_totals: List[int] = [0 for _ in self._pipe_labels]
self._pipe_done: List[int] = [0 for _ in self._pipe_labels]
self._subtask_ids: List[List[TaskID]] = [[] for _ in self._pipe_labels]
self._subtask_active_index: List[int] = [0 for _ in self._pipe_labels]
# Title line state (active per-item context)
self._active_subtask_text: Optional[str] = None
def _title_text(self) -> str:
"""Compute the Pipeline panel title.
We keep per-pipe elapsed time on the pipe rows. The panel title is used
to show the currently active item (cmd + url/path) with a lightweight
spinner so the UI reads as "working on X".
"""
active = str(self._active_subtask_text or "").strip()
if not active:
return "Pipeline"
# Lightweight spinner frames (similar intent to Rich's simpleDots).
try:
import time
frames = [".", "..", "..."]
idx = int(time.monotonic() * 4) % len(frames)
prefix = frames[idx]
except Exception:
prefix = "..."
return f"{prefix} {active}"
def set_active_subtask_text(self, text: Optional[str]) -> None:
"""Update the Pipeline panel title to reflect the current in-item step.
This is intentionally lightweight: it does not affect pipe counters.
Cmdlets may call this to surface step-level progress for long-running
single-item work (e.g. Playwright page load -> capture -> convert).
"""
if not self._enabled:
return
try:
value = str(text or "").strip()
except Exception:
value = ""
self._active_subtask_text = value or None
def __rich_console__(self, console: "Console", options: "ConsoleOptions"):
"""Renderable hook used by Rich Live.
Using a dynamic renderable keeps the panel title up to date and animates
the spinner without needing manual Live.update() calls.
"""
pipe_progress = self._pipe_progress
transfers = self._transfers
overall = self._overall
if pipe_progress is None or transfers is None or overall is None:
# Not started (or stopped).
yield Panel("", title="Pipeline", expand=False)
return
yield Group(
Panel(Group(pipe_progress, transfers), title=self._title_text(), expand=False),
overall,
)
def _render_group(self) -> Group:
# Backward-compatible helper (some callers may still expect a Group).
pipe_progress = self._pipe_progress
transfers = self._transfers
overall = self._overall
assert pipe_progress is not None
assert transfers is not None
assert overall is not None
return Group(
Panel(Group(pipe_progress, transfers), title=self._title_text(), expand=False),
overall,
)
def start(self) -> None:
if not self._enabled:
return
if self._live is not None:
return
# IMPORTANT: use the shared stderr Console instance so that any
# `stderr_console().print(...)` calls from inside cmdlets (e.g. preflight
# tables/prompts in download-media) cooperate with Rich Live rendering.
# If we create a separate Console(file=sys.stderr), output will fight for
# terminal cursor control and appear "blocked"/truncated.
from rich_display import stderr_console
self._console = stderr_console()
# Persistent per-pipe bars.
self._pipe_progress = Progress(
TextColumn("{task.description}"),
TimeElapsedColumn(),
BarColumn(),
TaskProgressColumn(),
console=self._console,
transient=False,
)
# Transient, per-item spinner for the currently-active subtask.
self._subtasks = Progress(
TextColumn(" "),
SpinnerColumn("simpleDots"),
TextColumn("{task.description}"),
console=self._console,
transient=False,
)
# Byte-based transfer bars (download/upload) integrated into the Live view.
self._transfers = Progress(
TextColumn(" {task.description}"),
BarColumn(),
TaskProgressColumn(),
DownloadColumn(),
TransferSpeedColumn(),
TimeRemainingColumn(),
console=self._console,
transient=False,
)
self._overall = Progress(
TimeElapsedColumn(),
BarColumn(),
TextColumn("{task.description}"),
console=self._console,
transient=False,
)
# Create pipe tasks up-front so the user sees the pipe structure immediately.
self._pipe_tasks = []
for idx, label in enumerate(self._pipe_labels):
# Start timers only when the pipe actually begins.
task_id = self._pipe_progress.add_task(
f"{idx + 1}/{len(self._pipe_labels)} {label}",
total=1,
start=False,
)
self._pipe_progress.update(task_id, completed=0, total=1)
self._pipe_tasks.append(task_id)
self._overall_task = self._overall.add_task(
f"Pipeline: 0/{len(self._pipe_labels)} pipes completed",
total=max(1, len(self._pipe_labels)),
)
self._live = Live(self, console=self._console, refresh_per_second=10, transient=True)
self._live.start()
def pause(self) -> None:
"""Temporarily stop Live rendering without losing progress state."""
if self._live is None:
return
try:
self._live.stop()
finally:
self._live = None
def resume(self) -> None:
"""Resume Live rendering after pause()."""
if not self._enabled:
return
if self._live is not None:
return
if self._console is None or self._pipe_progress is None or self._subtasks is None or self._transfers is None or self._overall is None:
# Not initialized yet; start fresh.
self.start()
return
self._live = Live(self, console=self._console, refresh_per_second=10, transient=True)
self._live.start()
def stop(self) -> None:
# Safe to call whether Live is running or paused.
if self._live is not None:
try:
self._live.stop()
except Exception:
pass
self._live = None
self._console = None
self._overall = None
self._pipe_progress = None
self._subtasks = None
self._transfers = None
self._overall_task = None
self._pipe_tasks = []
self._transfer_tasks = {}
self._active_subtask_text = None
def begin_transfer(self, *, label: str, total: Optional[int] = None) -> None:
if not self._enabled:
return
if self._transfers is None:
return
key = str(label or "transfer")
if key in self._transfer_tasks:
# If it already exists, treat as an update to total.
try:
if total is not None and total > 0:
self._transfers.update(self._transfer_tasks[key], total=int(total))
except Exception:
pass
return
task_total = int(total) if isinstance(total, int) and total > 0 else None
try:
task_id = self._transfers.add_task(key, total=task_total)
self._transfer_tasks[key] = task_id
except Exception:
pass
def update_transfer(self, *, label: str, completed: Optional[int], total: Optional[int] = None) -> None:
if not self._enabled:
return
if self._transfers is None:
return
key = str(label or "transfer")
if key not in self._transfer_tasks:
self.begin_transfer(label=key, total=total)
task_id = self._transfer_tasks.get(key)
if task_id is None:
return
try:
kwargs: Dict[str, Any] = {}
if completed is not None:
kwargs["completed"] = int(completed)
if total is not None and total > 0:
kwargs["total"] = int(total)
self._transfers.update(task_id, refresh=True, **kwargs)
except Exception:
pass
def finish_transfer(self, *, label: str) -> None:
if self._transfers is None:
return
key = str(label or "transfer")
task_id = self._transfer_tasks.pop(key, None)
if task_id is None:
return
try:
self._transfers.remove_task(task_id)
except Exception:
pass
def _ensure_pipe(self, pipe_index: int) -> bool:
if not self._enabled:
return False
if self._pipe_progress is None or self._subtasks is None or self._overall is None:
return False
if pipe_index < 0 or pipe_index >= len(self._pipe_labels):
return False
return True
def begin_pipe(self, pipe_index: int, *, total_items: int, items_preview: Optional[List[Any]] = None) -> None:
if not self._ensure_pipe(pipe_index):
return
pipe_progress = self._pipe_progress
subtasks = self._subtasks
assert pipe_progress is not None
assert subtasks is not None
total_items = int(total_items) if isinstance(total_items, int) else 0
total_items = max(1, total_items)
self._pipe_totals[pipe_index] = total_items
self._pipe_done[pipe_index] = 0
self._subtask_active_index[pipe_index] = 0
self._subtask_ids[pipe_index] = []
pipe_task = self._pipe_tasks[pipe_index]
pipe_progress.update(pipe_task, completed=0, total=total_items)
# Start the per-pipe timer now that the pipe is actually running.
try:
pipe_progress.start_task(pipe_task)
except Exception:
pass
labels: List[str] = []
if isinstance(items_preview, list) and items_preview:
labels = [_pipeline_progress_item_label(x) for x in items_preview]
for i in range(total_items):
suffix = labels[i] if i < len(labels) else f"item {i + 1}/{total_items}"
# Use start=False so elapsed time starts when we explicitly start_task().
sub_id = subtasks.add_task(f"{self._pipe_labels[pipe_index]}: {suffix}", start=False)
subtasks.update(sub_id, visible=False)
self._subtask_ids[pipe_index].append(sub_id)
# Show the first subtask spinner.
if self._subtask_ids[pipe_index]:
first = self._subtask_ids[pipe_index][0]
subtasks.update(first, visible=True)
subtasks.start_task(first)
try:
t = subtasks.tasks[first]
self._active_subtask_text = str(getattr(t, "description", "") or "").strip() or None
except Exception:
self._active_subtask_text = None
def on_emit(self, pipe_index: int, emitted: Any) -> None:
if not self._ensure_pipe(pipe_index):
return
pipe_progress = self._pipe_progress
subtasks = self._subtasks
assert pipe_progress is not None
assert subtasks is not None
done = self._pipe_done[pipe_index]
total = self._pipe_totals[pipe_index]
active = self._subtask_active_index[pipe_index]
# If a stage emits more than expected, extend totals dynamically.
if done >= total:
total = done + 1
self._pipe_totals[pipe_index] = total
pipe_task = self._pipe_tasks[pipe_index]
pipe_progress.update(pipe_task, total=total)
# Add a placeholder subtask.
sub_id = subtasks.add_task(
f"{self._pipe_labels[pipe_index]}: {_pipeline_progress_item_label(emitted)}"
)
subtasks.stop_task(sub_id)
subtasks.update(sub_id, visible=False)
self._subtask_ids[pipe_index].append(sub_id)
# Complete & hide current active subtask.
if active < len(self._subtask_ids[pipe_index]):
current = self._subtask_ids[pipe_index][active]
try:
# If we didnt have a preview label, set it now.
subtasks.update(
current,
description=f"{self._pipe_labels[pipe_index]}: {_pipeline_progress_item_label(emitted)}",
)
except Exception:
pass
subtasks.stop_task(current)
subtasks.update(current, visible=False)
done += 1
self._pipe_done[pipe_index] = done
pipe_task = self._pipe_tasks[pipe_index]
pipe_progress.update(pipe_task, completed=done)
# Start next subtask spinner.
next_index = active + 1
self._subtask_active_index[pipe_index] = next_index
if next_index < len(self._subtask_ids[pipe_index]):
nxt = self._subtask_ids[pipe_index][next_index]
subtasks.update(nxt, visible=True)
subtasks.start_task(nxt)
try:
t = subtasks.tasks[nxt]
self._active_subtask_text = str(getattr(t, "description", "") or "").strip() or None
except Exception:
self._active_subtask_text = None
else:
self._active_subtask_text = None
def finish_pipe(self, pipe_index: int, *, force_complete: bool = True) -> None:
if not self._ensure_pipe(pipe_index):
return
pipe_progress = self._pipe_progress
subtasks = self._subtasks
overall = self._overall
assert pipe_progress is not None
assert subtasks is not None
assert overall is not None
total = self._pipe_totals[pipe_index]
done = self._pipe_done[pipe_index]
# Ensure the pipe bar finishes even if cmdlet didnt emit per item.
if force_complete and done < total:
pipe_task = self._pipe_tasks[pipe_index]
pipe_progress.update(pipe_task, completed=total)
self._pipe_done[pipe_index] = total
# Hide any remaining subtask spinners.
for sub_id in self._subtask_ids[pipe_index]:
try:
subtasks.stop_task(sub_id)
subtasks.update(sub_id, visible=False)
except Exception:
pass
# If we just finished the active pipe, clear the title context.
self._active_subtask_text = None
# Stop the per-pipe timer once the pipe is finished.
try:
pipe_task = self._pipe_tasks[pipe_index]
pipe_progress.stop_task(pipe_task)
except Exception:
pass
if self._overall_task is not None:
completed = 0
try:
completed = sum(1 for i in range(len(self._pipe_labels)) if self._pipe_done[i] >= max(1, self._pipe_totals[i]))
except Exception:
completed = 0
overall.update(
self._overall_task,
completed=min(completed, max(1, len(self._pipe_labels))),
description=f"Pipeline: {completed}/{len(self._pipe_labels)} pipes completed",
)
class PipelineStageContext:
"""Context information for the current pipeline stage."""
def __init__(self, stage_index: int, total_stages: int, worker_id: Optional[str] = None):
self.stage_index = stage_index
self.total_stages = total_stages
self.is_last_stage = (stage_index == total_stages - 1)
self.worker_id = worker_id
self.emits: List[Any] = []
def emit(self, obj: Any) -> None:
"""Emit an object to the next pipeline stage."""
self.emits.append(obj)
def get_current_command_text(self) -> str:
"""Get the current command text (for backward compatibility)."""
# This is maintained for backward compatibility with old code
# In a real implementation, this would come from the stage context
return ""
def __repr__(self) -> str:
return f"PipelineStageContext(stage={self.stage_index}/{self.total_stages}, is_last={self.is_last_stage}, worker_id={self.worker_id})"
"""Context information for the current pipeline stage."""
def __init__(
self,
stage_index: int,
total_stages: int,
worker_id: Optional[str] = None,
on_emit: Optional[Callable[[Any], None]] = None,
):
self.stage_index = stage_index
self.total_stages = total_stages
self.is_last_stage = (stage_index == total_stages - 1)
self.worker_id = worker_id
self._on_emit = on_emit
self.emits: List[Any] = []
def emit(self, obj: Any) -> None:
"""Emit an object to the next pipeline stage."""
self.emits.append(obj)
cb = getattr(self, "_on_emit", None)
if cb:
try:
cb(obj)
except Exception:
pass
def get_current_command_text(self) -> str:
"""Get the current command text (for backward compatibility)."""
# This is maintained for backward compatibility with old code
# In a real implementation, this would come from the stage context
return ""
def __repr__(self) -> str:
return (
f"PipelineStageContext(stage={self.stage_index}/{self.total_stages}, "
f"is_last={self.is_last_stage}, worker_id={self.worker_id})"
)
# ============================================================================