This commit is contained in:
2026-01-01 20:37:27 -08:00
parent f3c79609d8
commit deb05c0d44
35 changed files with 5030 additions and 4879 deletions

195
CLI.py
View File

@@ -17,6 +17,7 @@ import threading
import time
import uuid
from copy import deepcopy
from datetime import datetime
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Sequence, Set, TextIO, Tuple, cast
@@ -57,7 +58,6 @@ from SYS.logger import debug, set_debug
from SYS.worker_manager import WorkerManager
from SYS.cmdlet_catalog import (
ensure_registry_loaded,
get_cmdlet_arg_choices,
get_cmdlet_arg_flags,
get_cmdlet_metadata,
@@ -871,7 +871,7 @@ class CmdletCompleter(Completer):
) -> Set[str]:
"""Return logical argument names already used in this cmdlet stage.
Example: if the user has typed `download-media -url ...`, then `url`
Example: if the user has typed `download-file -url ...`, then `url`
is considered used and should not be suggested again (even as `--url`).
"""
arg_flags = CmdletIntrospection.cmdlet_args(cmd_name, config)
@@ -970,8 +970,9 @@ class CmdletCompleter(Completer):
)
if choices:
for choice in choices:
if choice.lower().startswith(current_token):
yield Completion(choice, start_position=-len(current_token))
yield Completion(choice, start_position=-len(current_token))
# Example: if the user has typed `download-file -url ...`, then `url`
# is considered used and should not be suggested again (even as `--url`).
return
arg_names = CmdletIntrospection.cmdlet_args(cmd_name, config)
@@ -1347,8 +1348,6 @@ class CmdletExecutor:
from SYS import pipeline as ctx
from cmdlet import REGISTRY
ensure_registry_loaded()
# REPL guard: stage-local selection tables should not leak across independent
# commands. @ selection can always re-seed from the last result table.
try:
@@ -1732,11 +1731,9 @@ class CmdletExecutor:
selectable_commands = {
"search-file",
"download-data",
"download-media",
"download-file",
"search_file",
"download_data",
"download_media",
"download_file",
".config",
".worker",
@@ -1924,14 +1921,14 @@ class PipelineExecutor:
return stages
@staticmethod
def _validate_download_media_relationship_order(stages: List[List[str]]) -> bool:
"""Guard against running add-relationship on unstored download-media results.
def _validate_download_file_relationship_order(stages: List[List[str]]) -> bool:
"""Guard against running add-relationship on unstored download-file results.
Intended UX:
download-media ... | add-file -store <store> | add-relationship
download-file ... | add-file -store <store> | add-relationship
Rationale:
download-media outputs items that may not yet have a stable store+hash.
download-file outputs items that may not yet have a stable store+hash.
add-relationship is designed to operate in store/hash mode.
"""
@@ -1944,14 +1941,14 @@ class PipelineExecutor:
continue
names.append(_norm(stage[0]))
dl_idxs = [i for i, n in enumerate(names) if n == "download-media"]
dl_idxs = [i for i, n in enumerate(names) if n == "download-file"]
rel_idxs = [i for i, n in enumerate(names) if n == "add-relationship"]
add_file_idxs = [i for i, n in enumerate(names) if n == "add-file"]
if not dl_idxs or not rel_idxs:
return True
# If download-media is upstream of add-relationship, require an add-file in between.
# If download-file is upstream of add-relationship, require an add-file in between.
for rel_i in rel_idxs:
dl_before = [d for d in dl_idxs if d < rel_i]
if not dl_before:
@@ -1959,9 +1956,9 @@ class PipelineExecutor:
dl_i = max(dl_before)
if not any(dl_i < a < rel_i for a in add_file_idxs):
print(
"Pipeline order error: when using download-media with add-relationship, "
"Pipeline order error: when using download-file with add-relationship, "
"add-relationship must come after add-file (so items are stored and have store+hash).\n"
"Example: download-media <...> | add-file -store <store> | add-relationship\n"
"Example: download-file <...> | add-file -store <store> | add-relationship\n"
)
return False
@@ -2238,6 +2235,37 @@ class PipelineExecutor:
return False
@staticmethod
def _summarize_stage_text(stage_tokens: Sequence[str], limit: int = 140) -> str:
combined = " ".join(str(tok) for tok in stage_tokens if tok is not None).strip()
if not combined:
return ""
normalized = re.sub(r"\s+", " ", combined)
if len(normalized) <= limit:
return normalized
return normalized[:limit - 3].rstrip() + "..."
@staticmethod
def _log_pipeline_event(
worker_manager: Any,
worker_id: Optional[str],
message: str,
) -> None:
if not worker_manager or not worker_id or not message:
return
try:
timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
except Exception:
timestamp = ""
if timestamp:
text = f"{timestamp} - PIPELINE - {message}"
else:
text = f"PIPELINE - {message}"
try:
worker_manager.append_stdout(worker_id, text + "\n", channel="log")
except Exception:
pass
@staticmethod
def _maybe_open_url_selection(
current_table: Any,
@@ -2571,11 +2599,11 @@ class PipelineExecutor:
if not stages:
if table_type == "youtube":
print("Auto-running YouTube selection via download-media")
stages.append(["download-media"])
print("Auto-running YouTube selection via download-file")
stages.append(["download-file"])
elif table_type == "bandcamp":
print("Auto-running Bandcamp selection via download-media")
stages.append(["download-media"])
print("Auto-running Bandcamp selection via download-file")
stages.append(["download-file"])
elif table_type == "internetarchive":
print("Auto-loading Internet Archive item via download-file")
stages.append(["download-file"])
@@ -2594,32 +2622,24 @@ class PipelineExecutor:
first_cmd = stages[0][0] if stages and stages[0] else None
if table_type == "soulseek" and first_cmd not in (
"download-file",
"download-media",
"download_media",
".pipe",
):
debug("Auto-inserting download-file after Soulseek selection")
stages.insert(0, ["download-file"])
if table_type == "youtube" and first_cmd not in (
"download-media",
"download_media",
"download-file",
".pipe",
):
debug("Auto-inserting download-media after YouTube selection")
stages.insert(0, ["download-media"])
debug("Auto-inserting download-file after YouTube selection")
stages.insert(0, ["download-file"])
if table_type == "bandcamp" and first_cmd not in (
"download-media",
"download_media",
"download-file",
".pipe",
):
print("Auto-inserting download-media after Bandcamp selection")
stages.insert(0, ["download-media"])
print("Auto-inserting download-file after Bandcamp selection")
stages.insert(0, ["download-file"])
if table_type == "internetarchive" and first_cmd not in (
"download-file",
"download-media",
"download_media",
".pipe",
):
debug(
@@ -2628,16 +2648,12 @@ class PipelineExecutor:
stages.insert(0, ["download-file"])
if table_type == "podcastindex.episodes" and first_cmd not in (
"download-file",
"download-media",
"download_media",
".pipe",
):
print("Auto-inserting download-file after PodcastIndex episode selection")
stages.insert(0, ["download-file"])
if table_type == "libgen" and first_cmd not in (
"download-file",
"download-media",
"download_media",
".pipe",
):
print("Auto-inserting download-file after Libgen selection")
@@ -2814,6 +2830,12 @@ class PipelineExecutor:
pipeline_text=pipeline_text,
config=config
)
if pipeline_session and worker_manager:
self._log_pipeline_event(
worker_manager,
pipeline_session.worker_id,
f"Pipeline start: {pipeline_text or '(empty pipeline)'}",
)
raw_stage_texts = self._get_raw_stage_texts(ctx)
self._maybe_enable_background_notifier(
worker_manager,
@@ -2843,8 +2865,8 @@ class PipelineExecutor:
if initial_piped is not None:
piped_result = initial_piped
# REPL guard: prevent add-relationship before add-file for download-media pipelines.
if not self._validate_download_media_relationship_order(stages):
# REPL guard: prevent add-relationship before add-file for download-file pipelines.
if not self._validate_download_file_relationship_order(stages):
pipeline_status = "failed"
pipeline_error = "Invalid pipeline order"
return
@@ -3144,11 +3166,11 @@ class PipelineExecutor:
if filter_spec is None:
if stage_index + 1 >= len(stages):
if table_type == "youtube":
print("Auto-running YouTube selection via download-media")
stages.append(["download-media", *stage_args])
print("Auto-running YouTube selection via download-file")
stages.append(["download-file", *stage_args])
elif table_type == "bandcamp":
print("Auto-running Bandcamp selection via download-media")
stages.append(["download-media"])
print("Auto-running Bandcamp selection via download-file")
stages.append(["download-file"])
elif table_type == "internetarchive":
print("Auto-loading Internet Archive item via download-file")
stages.append(["download-file"])
@@ -3161,56 +3183,53 @@ class PipelineExecutor:
else:
if table_type == "soulseek" and next_cmd not in (
"download-file",
"download-media",
"download_media",
".pipe",
):
debug("Auto-inserting download-file after Soulseek selection")
stages.insert(stage_index + 1, ["download-file"])
if table_type == "youtube" and next_cmd not in (
"download-media",
"download_media",
"download-file",
".pipe",
):
debug("Auto-inserting download-media after YouTube selection")
stages.insert(stage_index + 1, ["download-media"])
debug("Auto-inserting download-file after YouTube selection")
stages.insert(stage_index + 1, ["download-file"])
if table_type == "bandcamp" and next_cmd not in (
"download-media",
"download_media",
"download-file",
".pipe",
):
print("Auto-inserting download-media after Bandcamp selection")
stages.insert(stage_index + 1, ["download-media"])
print("Auto-inserting download-file after Bandcamp selection")
stages.insert(stage_index + 1, ["download-file"])
if table_type == "internetarchive" and next_cmd not in (
"download-file",
"download-media",
"download_media",
".pipe",
):
debug("Auto-inserting download-file after Internet Archive selection")
stages.insert(stage_index + 1, ["download-file"])
if table_type == "podcastindex.episodes" and next_cmd not in (
"download-file",
"download-media",
"download_media",
".pipe",
):
print("Auto-inserting download-file after PodcastIndex episode selection")
stages.insert(stage_index + 1, ["download-file"])
if table_type == "libgen" and next_cmd not in (
"download-file",
"download-media",
"download_media",
".pipe",
):
print("Auto-inserting download-file after Libgen selection")
stages.insert(stage_index + 1, ["download-file"])
continue
ensure_registry_loaded()
cmd_fn = REGISTRY.get(cmd_name)
if not cmd_fn:
try:
mod = import_cmd_module(cmd_name)
data = getattr(mod, "CMDLET", None) if mod else None
if data and hasattr(data, "exec") and callable(getattr(data, "exec")):
run_fn = getattr(data, "exec")
REGISTRY[cmd_name] = run_fn
cmd_fn = run_fn
except Exception:
cmd_fn = None
if not cmd_fn:
print(f"Unknown command: {cmd_name}\n")
pipeline_status = "failed"
@@ -3226,6 +3245,14 @@ class PipelineExecutor:
)
stage_worker_id = stage_session.worker_id if stage_session else None
stage_summary = self._summarize_stage_text(stage_tokens)
if pipeline_session and worker_manager:
summary_text = stage_summary or cmd_name
self._log_pipeline_event(
worker_manager,
pipeline_session.worker_id,
f"Stage {stage_index + 1}/{len(stages)} start: {summary_text}",
)
# Estimate how many per-item tasks this pipe will run.
pipe_idx = pipe_index_by_stage.get(stage_index)
@@ -3433,7 +3460,7 @@ class PipelineExecutor:
else:
piped_result = None
# Some cmdlets (notably download-media format selection) populate a selectable
# Some cmdlets (notably download-file format selection) populate a selectable
# current-stage table without emitting pipeline items. In these cases, render
# the table and pause the pipeline so the user can pick @N.
stage_table = (
@@ -3458,21 +3485,18 @@ class PipelineExecutor:
except Exception:
stage_table_source = ""
if ((not stage_is_last) and (not emits) and cmd_name in {
"download-media",
"download_media",
"download-data",
"download_data",
"download-file",
"download-data",
"download_data",
} and stage_table is not None
and (stage_table_type in {
"ytdlp.formatlist",
"download-media",
"download_media",
"bandcamp",
"youtube",
} or stage_table_source in {"download-media",
"download_media"}
or stage_table_type in {"internetarchive.formats"}
or stage_table_source in {"download-file"})):
and (stage_table_type in {
"ytdlp.formatlist",
"download-file",
"bandcamp",
"youtube",
} or stage_table_source in {"download-file"}
or stage_table_type in {"internetarchive.formats"}
or stage_table_source in {"download-file"})):
try:
is_selectable = not bool(
getattr(stage_table,
@@ -3671,6 +3695,18 @@ class PipelineExecutor:
pipeline_error = f"{stage_label} error: {exc}"
return
finally:
if pipeline_session and worker_manager:
status_label = (
"completed" if stage_status == "completed" else "failed"
)
msg = f"{stage_label} {status_label}"
if stage_error and stage_status != "completed":
msg += f": {stage_error}"
self._log_pipeline_event(
worker_manager,
pipeline_session.worker_id,
msg,
)
if progress_ui is not None and pipe_idx is not None:
try:
progress_ui.finish_pipe(
@@ -3820,6 +3856,17 @@ class PipelineExecutor:
ctx.set_current_stage_table(None)
except Exception:
pass
if pipeline_session and worker_manager:
final_msg = f"Pipeline {pipeline_status}"
if pipeline_error:
final_msg += f": {pipeline_error}"
else:
final_msg += " (ok)"
self._log_pipeline_event(
worker_manager,
pipeline_session.worker_id,
final_msg,
)
if pipeline_session:
pipeline_session.close(
status=pipeline_status,