This commit is contained in:
nose
2025-12-22 02:11:53 -08:00
parent d0b821b5dd
commit 16316bb3fd
20 changed files with 4218 additions and 2422 deletions

View File

@@ -147,7 +147,11 @@ class HydrusNetwork:
file_size = file_path.stat().st_size file_size = file_path.stat().st_size
headers["Content-Type"] = spec.content_type or "application/octet-stream" headers["Content-Type"] = spec.content_type or "application/octet-stream"
headers["Content-Length"] = str(file_size) # Do not set Content-Length when streaming an iterator body.
# If the file size changes between stat() and read() (or the source is truncated),
# h11 will raise: "Too little data for declared Content-Length".
# Let httpx choose chunked transfer encoding for safety.
headers.pop("Content-Length", None)
logger.debug(f"{self._log_prefix()} Uploading file {file_path.name} ({file_size} bytes)") logger.debug(f"{self._log_prefix()} Uploading file {file_path.name} ({file_size} bytes)")

875
CLI.py
View File

@@ -1245,25 +1245,17 @@ class PipelineExecutor:
stages.append(current) stages.append(current)
return stages return stages
def execute_tokens(self, tokens: List[str]) -> None: @staticmethod
from cmdlet import REGISTRY def _try_clear_pipeline_stop(ctx: Any) -> None:
import pipeline as ctx
try: try:
try: if hasattr(ctx, "clear_pipeline_stop"):
if hasattr(ctx, "clear_pipeline_stop"): ctx.clear_pipeline_stop()
ctx.clear_pipeline_stop() except Exception:
except Exception: pass
pass
stages = self._split_stages(tokens)
if not stages:
print("Invalid pipeline syntax\n")
return
pending_tail = ctx.get_pending_pipeline_tail() if hasattr(ctx, "get_pending_pipeline_tail") else []
pending_source = ctx.get_pending_pipeline_source() if hasattr(ctx, "get_pending_pipeline_source") else None
@staticmethod
def _maybe_seed_current_stage_table(ctx: Any) -> None:
try:
if hasattr(ctx, "get_current_stage_table") and not ctx.get_current_stage_table(): if hasattr(ctx, "get_current_stage_table") and not ctx.get_current_stage_table():
display_table = ctx.get_display_table() if hasattr(ctx, "get_display_table") else None display_table = ctx.get_display_table() if hasattr(ctx, "get_display_table") else None
if display_table: if display_table:
@@ -1272,188 +1264,512 @@ class PipelineExecutor:
last_table = ctx.get_last_result_table() if hasattr(ctx, "get_last_result_table") else None last_table = ctx.get_last_result_table() if hasattr(ctx, "get_last_result_table") else None
if last_table: if last_table:
ctx.set_current_stage_table(last_table) ctx.set_current_stage_table(last_table)
except Exception:
pass
@staticmethod
def _maybe_apply_pending_pipeline_tail(ctx: Any, stages: List[List[str]]) -> List[List[str]]:
try:
pending_tail = ctx.get_pending_pipeline_tail() if hasattr(ctx, "get_pending_pipeline_tail") else []
pending_source = ctx.get_pending_pipeline_source() if hasattr(ctx, "get_pending_pipeline_source") else None
except Exception:
pending_tail = []
pending_source = None
try:
current_source = ( current_source = (
ctx.get_current_stage_table_source_command() if hasattr(ctx, "get_current_stage_table_source_command") else None ctx.get_current_stage_table_source_command()
if hasattr(ctx, "get_current_stage_table_source_command")
else None
) )
except Exception:
current_source = None
try:
effective_source = current_source or ( effective_source = current_source or (
ctx.get_last_result_table_source_command() if hasattr(ctx, "get_last_result_table_source_command") else None ctx.get_last_result_table_source_command()
if hasattr(ctx, "get_last_result_table_source_command")
else None
) )
selection_only = len(stages) == 1 and stages[0] and stages[0][0].startswith("@") except Exception:
if pending_tail and selection_only: effective_source = current_source
if (pending_source is None) or (effective_source and pending_source == effective_source):
stages.extend(pending_tail) selection_only = bool(len(stages) == 1 and stages[0] and stages[0][0].startswith("@"))
if pending_tail and selection_only:
if (pending_source is None) or (effective_source and pending_source == effective_source):
stages = list(stages) + list(pending_tail)
try:
if hasattr(ctx, "clear_pending_pipeline_tail"): if hasattr(ctx, "clear_pending_pipeline_tail"):
ctx.clear_pending_pipeline_tail() ctx.clear_pending_pipeline_tail()
elif hasattr(ctx, "clear_pending_pipeline_tail"):
ctx.clear_pending_pipeline_tail()
config = self._config_loader.load()
if isinstance(config, dict):
# This executor is used by both the REPL and the `pipeline` subcommand.
# Quiet/background mode is helpful for detached/background runners, but
# it suppresses interactive UX (like the pipeline Live progress UI).
config["_quiet_background_output"] = bool(self._toolbar_output is None)
def _resolve_items_for_selection(table_obj, items_list):
return items_list if items_list else []
def _maybe_run_class_selector(selected_items: list, *, stage_is_last: bool) -> bool:
if not stage_is_last:
return False
candidates: list[str] = []
seen: set[str] = set()
def _add(value) -> None:
try:
text = str(value or "").strip().lower()
except Exception:
return
if not text or text in seen:
return
seen.add(text)
candidates.append(text)
try:
current_table = ctx.get_current_stage_table() or ctx.get_last_result_table()
_add(current_table.table if current_table and hasattr(current_table, "table") else None)
except Exception: except Exception:
pass pass
else:
for item in selected_items or []:
if isinstance(item, dict):
_add(item.get("provider"))
_add(item.get("store"))
_add(item.get("table"))
else:
_add(getattr(item, "provider", None))
_add(getattr(item, "store", None))
_add(getattr(item, "table", None))
try: try:
from ProviderCore.registry import get_provider, is_known_provider_name if hasattr(ctx, "clear_pending_pipeline_tail"):
ctx.clear_pending_pipeline_tail()
except Exception: except Exception:
get_provider = None # type: ignore pass
is_known_provider_name = None # type: ignore return stages
if get_provider is not None: def _apply_quiet_background_flag(self, config: Any) -> Any:
for key in candidates: if isinstance(config, dict):
try: # This executor is used by both the REPL and the `pipeline` subcommand.
if is_known_provider_name is not None and (not is_known_provider_name(key)): # Quiet/background mode is helpful for detached/background runners, but
continue # it suppresses interactive UX (like the pipeline Live progress UI).
except Exception: config["_quiet_background_output"] = bool(self._toolbar_output is None)
# If the predicate fails for any reason, fall back to legacy behavior. return config
pass
try:
provider = get_provider(key, config)
except Exception:
continue
selector = getattr(provider, "selector", None)
if selector is None:
continue
try:
handled = bool(selector(selected_items, ctx=ctx, stage_is_last=True))
except Exception as exc:
print(f"{key} selector failed: {exc}\n")
return True
if handled:
return True
store_keys: list[str] = [] @staticmethod
for item in selected_items or []: def _extract_first_stage_selection_tokens(stages: List[List[str]]) -> tuple[List[List[str]], List[int], bool, bool]:
if isinstance(item, dict): first_stage_tokens = stages[0] if stages else []
v = item.get("store") first_stage_selection_indices: List[int] = []
else: first_stage_had_extra_args = False
v = getattr(item, "store", None) first_stage_select_all = False
name = str(v or "").strip()
if name:
store_keys.append(name)
if store_keys: if first_stage_tokens:
new_first_stage: List[str] = []
for token in first_stage_tokens:
if token.startswith("@"): # selection
selection = SelectionSyntax.parse(token)
if selection is not None:
first_stage_selection_indices = sorted([i - 1 for i in selection])
continue
if token == "@*":
first_stage_select_all = True
continue
new_first_stage.append(token)
if new_first_stage:
stages = list(stages)
stages[0] = new_first_stage
if first_stage_selection_indices or first_stage_select_all:
first_stage_had_extra_args = True
elif first_stage_selection_indices or first_stage_select_all:
stages = list(stages)
stages.pop(0)
return stages, first_stage_selection_indices, first_stage_had_extra_args, first_stage_select_all
@staticmethod
def _apply_select_all_if_requested(ctx: Any, indices: List[int], select_all: bool) -> List[int]:
if not select_all:
return indices
try:
last_items = ctx.get_last_result_items()
except Exception:
last_items = None
if last_items:
return list(range(len(last_items)))
return indices
@staticmethod
def _maybe_run_class_selector(ctx: Any, config: Any, selected_items: list, *, stage_is_last: bool) -> bool:
if not stage_is_last:
return False
candidates: list[str] = []
seen: set[str] = set()
def _add(value) -> None:
try:
text = str(value or "").strip().lower()
except Exception:
return
if not text or text in seen:
return
seen.add(text)
candidates.append(text)
try:
current_table = ctx.get_current_stage_table() or ctx.get_last_result_table()
_add(current_table.table if current_table and hasattr(current_table, "table") else None)
except Exception:
pass
for item in selected_items or []:
if isinstance(item, dict):
_add(item.get("provider"))
_add(item.get("store"))
_add(item.get("table"))
else:
_add(getattr(item, "provider", None))
_add(getattr(item, "store", None))
_add(getattr(item, "table", None))
try:
from ProviderCore.registry import get_provider, is_known_provider_name
except Exception:
get_provider = None # type: ignore
is_known_provider_name = None # type: ignore
if get_provider is not None:
for key in candidates:
try:
if is_known_provider_name is not None and (not is_known_provider_name(key)):
continue
except Exception:
# If the predicate fails for any reason, fall back to legacy behavior.
pass
try:
provider = get_provider(key, config)
except Exception:
continue
selector = getattr(provider, "selector", None)
if selector is None:
continue
try:
handled = bool(selector(selected_items, ctx=ctx, stage_is_last=True))
except Exception as exc:
print(f"{key} selector failed: {exc}\n")
return True
if handled:
return True
store_keys: list[str] = []
for item in selected_items or []:
if isinstance(item, dict):
v = item.get("store")
else:
v = getattr(item, "store", None)
name = str(v or "").strip()
if name:
store_keys.append(name)
if store_keys:
try:
from Store.registry import Store as StoreRegistry
store_registry = StoreRegistry(config, suppress_debug=True)
_backend_names = list(store_registry.list_backends() or [])
_backend_by_lower = {str(n).lower(): str(n) for n in _backend_names if str(n).strip()}
for name in store_keys:
resolved_name = name
if not store_registry.is_available(resolved_name):
resolved_name = _backend_by_lower.get(str(name).lower(), name)
if not store_registry.is_available(resolved_name):
continue
backend = store_registry[resolved_name]
selector = getattr(backend, "selector", None)
if selector is None:
continue
handled = bool(selector(selected_items, ctx=ctx, stage_is_last=True))
if handled:
return True
except Exception:
pass
return False
def _maybe_enable_background_notifier(self, worker_manager: Any, config: Any, pipeline_session: Any) -> None:
if not (pipeline_session and worker_manager and isinstance(config, dict)):
return
session_worker_ids = config.get("_session_worker_ids")
if not session_worker_ids:
return
try:
output_fn = self._toolbar_output
quiet_mode = bool(config.get("_quiet_background_output"))
terminal_only = quiet_mode and not output_fn
kwargs: Dict[str, Any] = {
"session_worker_ids": session_worker_ids,
"only_terminal_updates": terminal_only,
"overlay_mode": bool(output_fn),
}
if output_fn:
kwargs["output"] = output_fn
ensure_background_notifier(worker_manager, **kwargs)
except Exception:
pass
@staticmethod
def _get_raw_stage_texts(ctx: Any) -> List[str]:
raw_stage_texts: List[str] = []
try:
if hasattr(ctx, "get_current_command_stages"):
raw_stage_texts = ctx.get_current_command_stages() or []
except Exception:
raw_stage_texts = []
return raw_stage_texts
def _maybe_apply_initial_selection(
self,
ctx: Any,
config: Any,
stages: List[List[str]],
*,
selection_indices: List[int],
first_stage_had_extra_args: bool,
worker_manager: Any,
pipeline_session: Any,
) -> tuple[bool, Any]:
if not selection_indices:
return True, None
try:
if not ctx.get_current_stage_table_source_command():
display_table = ctx.get_display_table() if hasattr(ctx, "get_display_table") else None
table_for_stage = display_table or ctx.get_last_result_table()
if table_for_stage:
ctx.set_current_stage_table(table_for_stage)
except Exception:
pass
source_cmd = None
source_args_raw = None
try:
source_cmd = ctx.get_current_stage_table_source_command()
source_args_raw = ctx.get_current_stage_table_source_args()
except Exception:
source_cmd = None
source_args_raw = None
if isinstance(source_args_raw, str):
source_args: List[str] = [source_args_raw]
elif isinstance(source_args_raw, list):
source_args = [str(x) for x in source_args_raw if x is not None]
else:
source_args = []
current_table = None
try:
current_table = ctx.get_current_stage_table()
except Exception:
current_table = None
table_type = current_table.table if current_table and hasattr(current_table, "table") else None
command_expanded = False
if table_type in {"youtube", "soulseek"}:
command_expanded = False
elif source_cmd == "search-file" and source_args and "youtube" in source_args:
command_expanded = False
else:
selected_row_args: List[str] = []
skip_pipe_expansion = source_cmd == ".pipe" and len(stages) > 0
if source_cmd and not skip_pipe_expansion:
for idx in selection_indices:
row_args = ctx.get_current_stage_table_row_selection_args(idx)
if row_args:
selected_row_args.extend(row_args)
break
if selected_row_args:
if isinstance(source_cmd, list):
cmd_list: List[str] = [str(x) for x in source_cmd if x is not None]
elif isinstance(source_cmd, str):
cmd_list = [source_cmd]
else:
cmd_list = []
expanded_stage: List[str] = cmd_list + source_args + selected_row_args
if first_stage_had_extra_args and stages:
expanded_stage += stages[0]
stages[0] = expanded_stage
else:
stages.insert(0, expanded_stage)
if pipeline_session and worker_manager:
try: try:
from Store.registry import Store as StoreRegistry worker_manager.log_step(
pipeline_session.worker_id,
store_registry = StoreRegistry(config, suppress_debug=True) f"@N expansion: {source_cmd} + {' '.join(str(x) for x in selected_row_args)}",
_backend_names = list(store_registry.list_backends() or []) )
_backend_by_lower = {str(n).lower(): str(n) for n in _backend_names if str(n).strip()}
for name in store_keys:
resolved_name = name
if not store_registry.is_available(resolved_name):
resolved_name = _backend_by_lower.get(str(name).lower(), name)
if not store_registry.is_available(resolved_name):
continue
backend = store_registry[resolved_name]
selector = getattr(backend, "selector", None)
if selector is None:
continue
handled = bool(selector(selected_items, ctx=ctx, stage_is_last=True))
if handled:
return True
except Exception: except Exception:
pass pass
return False selection_indices = []
command_expanded = True
first_stage_tokens = stages[0] if stages else [] if (not command_expanded) and selection_indices:
first_stage_selection_indices: List[int] = [] last_piped_items = None
first_stage_had_extra_args = False try:
first_stage_select_all = False last_piped_items = ctx.get_last_result_items()
except Exception:
last_piped_items = None
if first_stage_tokens: stage_table = None
new_first_stage: List[str] = [] try:
for token in first_stage_tokens: stage_table = ctx.get_current_stage_table()
if token.startswith("@"): # selection except Exception:
selection = SelectionSyntax.parse(token) stage_table = None
if selection is not None: if not stage_table and hasattr(ctx, "get_display_table"):
first_stage_selection_indices = sorted([i - 1 for i in selection]) try:
continue stage_table = ctx.get_display_table()
if token == "@*": except Exception:
first_stage_select_all = True stage_table = None
continue if not stage_table:
new_first_stage.append(token) try:
stage_table = ctx.get_last_result_table()
except Exception:
stage_table = None
if new_first_stage: resolved_items = last_piped_items if last_piped_items else []
stages[0] = new_first_stage if last_piped_items:
if first_stage_selection_indices or first_stage_select_all: filtered = [resolved_items[i] for i in selection_indices if 0 <= i < len(resolved_items)]
first_stage_had_extra_args = True if not filtered:
elif first_stage_selection_indices or first_stage_select_all: print("No items matched selection in pipeline\n")
stages.pop(0) return False, None
if first_stage_select_all: if PipelineExecutor._maybe_run_class_selector(ctx, config, filtered, stage_is_last=(not stages)):
last_items = ctx.get_last_result_items() return False, None
if last_items:
first_stage_selection_indices = list(range(len(last_items))) from cmdlet._shared import coerce_to_pipe_object
filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered]
piped_result = filtered_pipe_objs if len(filtered_pipe_objs) > 1 else filtered_pipe_objs[0]
if pipeline_session and worker_manager:
try:
selection_parts = [f"@{i+1}" for i in selection_indices]
worker_manager.log_step(
pipeline_session.worker_id,
f"Applied @N selection {' | '.join(selection_parts)}",
)
except Exception:
pass
# Auto-insert downloader stages for provider tables.
try:
current_table = ctx.get_current_stage_table() or ctx.get_last_result_table()
except Exception:
current_table = None
table_type = current_table.table if current_table and hasattr(current_table, "table") else None
if not stages:
if table_type == "youtube":
print("Auto-running YouTube selection via download-media")
stages.append(["download-media"])
elif table_type == "bandcamp":
print("Auto-running Bandcamp selection via download-media")
stages.append(["download-media"])
elif table_type in {"soulseek", "openlibrary", "libgen"}:
print("Auto-piping selection to download-file")
stages.append(["download-file"])
else:
first_cmd = stages[0][0] if stages and stages[0] else None
if table_type == "soulseek" and first_cmd not in (
"download-file",
"download-media",
"download_media",
".pipe",
):
debug("Auto-inserting download-file after Soulseek selection")
stages.insert(0, ["download-file"])
if table_type == "youtube" and first_cmd not in (
"download-media",
"download_media",
"download-file",
".pipe",
):
debug("Auto-inserting download-media after YouTube selection")
stages.insert(0, ["download-media"])
if table_type == "bandcamp" and first_cmd not in (
"download-media",
"download_media",
"download-file",
".pipe",
):
print("Auto-inserting download-media after Bandcamp selection")
stages.insert(0, ["download-media"])
if table_type == "libgen" and first_cmd not in (
"download-file",
"download-media",
"download_media",
".pipe",
):
print("Auto-inserting download-file after Libgen selection")
stages.insert(0, ["download-file"])
return True, piped_result
else:
print("No previous results to select from\n")
return False, None
return True, None
@staticmethod
def _maybe_start_live_progress(config: Any, stages: List[List[str]]) -> tuple[Any, Dict[int, int]]:
progress_ui = None
pipe_index_by_stage: Dict[int, int] = {}
try:
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
except Exception:
quiet_mode = False
try:
import sys as _sys
if (not quiet_mode) and bool(getattr(_sys.stderr, "isatty", lambda: False)()):
from models import PipelineLiveProgress
pipe_stage_indices: List[int] = []
pipe_labels: List[str] = []
for idx, stage_tokens in enumerate(stages):
if not stage_tokens:
continue
name = str(stage_tokens[0]).replace("_", "-").lower()
if name == "@" or name.startswith("@"):
continue
# `.pipe` (MPV) is an interactive launcher; disable pipeline Live progress
# for it because it doesn't meaningfully "complete" (mpv may keep running)
# and Live output interferes with MPV playlist UI.
if name == ".pipe":
continue
# `.matrix` uses a two-phase picker (@N then .matrix -send). Pipeline Live
# progress can linger across those phases and interfere with interactive output.
if name == ".matrix":
continue
pipe_stage_indices.append(idx)
pipe_labels.append(name)
if pipe_labels:
progress_ui = PipelineLiveProgress(pipe_labels, enabled=True)
progress_ui.start()
try:
import pipeline as _pipeline_ctx
if hasattr(_pipeline_ctx, "set_live_progress"):
_pipeline_ctx.set_live_progress(progress_ui)
except Exception:
pass
pipe_index_by_stage = {stage_idx: pipe_idx for pipe_idx, stage_idx in enumerate(pipe_stage_indices)}
except Exception:
progress_ui = None
pipe_index_by_stage = {}
return progress_ui, pipe_index_by_stage
def execute_tokens(self, tokens: List[str]) -> None:
from cmdlet import REGISTRY
import pipeline as ctx
try:
self._try_clear_pipeline_stop(ctx)
stages = self._split_stages(tokens)
if not stages:
print("Invalid pipeline syntax\n")
return
self._maybe_seed_current_stage_table(ctx)
stages = self._maybe_apply_pending_pipeline_tail(ctx, stages)
config = self._config_loader.load()
config = self._apply_quiet_background_flag(config)
stages, first_stage_selection_indices, first_stage_had_extra_args, first_stage_select_all = (
self._extract_first_stage_selection_tokens(stages)
)
first_stage_selection_indices = self._apply_select_all_if_requested(
ctx, first_stage_selection_indices, first_stage_select_all
)
piped_result: Any = None piped_result: Any = None
worker_manager = WorkerManagerRegistry.ensure(config) worker_manager = WorkerManagerRegistry.ensure(config)
pipeline_text = " | ".join(" ".join(stage) for stage in stages) pipeline_text = " | ".join(" ".join(stage) for stage in stages)
pipeline_session = WorkerStages.begin_pipeline(worker_manager, pipeline_text=pipeline_text, config=config) pipeline_session = WorkerStages.begin_pipeline(worker_manager, pipeline_text=pipeline_text, config=config)
raw_stage_texts = self._get_raw_stage_texts(ctx)
raw_stage_texts: List[str] = [] self._maybe_enable_background_notifier(worker_manager, config, pipeline_session)
try:
if hasattr(ctx, "get_current_command_stages"):
raw_stage_texts = ctx.get_current_command_stages() or []
except Exception:
raw_stage_texts = []
if pipeline_session and worker_manager and isinstance(config, dict):
session_worker_ids = config.get("_session_worker_ids")
if session_worker_ids:
try:
output_fn = self._toolbar_output
quiet_mode = bool(config.get("_quiet_background_output"))
terminal_only = quiet_mode and not output_fn
kwargs: Dict[str, Any] = {
"session_worker_ids": session_worker_ids,
"only_terminal_updates": terminal_only,
"overlay_mode": bool(output_fn),
}
if output_fn:
kwargs["output"] = output_fn
ensure_background_notifier(worker_manager, **kwargs)
except Exception:
pass
pipeline_status = "completed" pipeline_status = "completed"
pipeline_error = "" pipeline_error = ""
@@ -1462,201 +1778,24 @@ class PipelineExecutor:
pipe_index_by_stage: Dict[int, int] = {} pipe_index_by_stage: Dict[int, int] = {}
try: try:
if first_stage_selection_indices: ok, initial_piped = self._maybe_apply_initial_selection(
if not ctx.get_current_stage_table_source_command(): ctx,
display_table = ctx.get_display_table() if hasattr(ctx, "get_display_table") else None config,
table_for_stage = display_table or ctx.get_last_result_table() stages,
if table_for_stage: selection_indices=first_stage_selection_indices,
ctx.set_current_stage_table(table_for_stage) first_stage_had_extra_args=first_stage_had_extra_args,
worker_manager=worker_manager,
source_cmd = ctx.get_current_stage_table_source_command() pipeline_session=pipeline_session,
source_args_raw = ctx.get_current_stage_table_source_args() )
if isinstance(source_args_raw, str): if not ok:
source_args: List[str] = [source_args_raw] return
elif isinstance(source_args_raw, list): if initial_piped is not None:
source_args = [str(x) for x in source_args_raw if x is not None] piped_result = initial_piped
else:
source_args = []
current_table = ctx.get_current_stage_table()
table_type = current_table.table if current_table and hasattr(current_table, "table") else None
command_expanded = False
if table_type in {"youtube", "soulseek"}:
command_expanded = False
elif source_cmd == "search-file" and source_args and "youtube" in source_args:
command_expanded = False
else:
selected_row_args: List[str] = []
skip_pipe_expansion = source_cmd == ".pipe" and len(stages) > 0
if source_cmd and not skip_pipe_expansion:
for idx in first_stage_selection_indices:
row_args = ctx.get_current_stage_table_row_selection_args(idx)
if row_args:
selected_row_args.extend(row_args)
break
if selected_row_args:
if isinstance(source_cmd, list):
cmd_list: List[str] = [str(x) for x in source_cmd if x is not None]
elif isinstance(source_cmd, str):
cmd_list = [source_cmd]
else:
cmd_list = []
expanded_stage: List[str] = cmd_list + source_args + selected_row_args
if first_stage_had_extra_args and stages:
expanded_stage += stages[0]
stages[0] = expanded_stage
else:
stages.insert(0, expanded_stage)
if pipeline_session and worker_manager:
try:
worker_manager.log_step(
pipeline_session.worker_id,
f"@N expansion: {source_cmd} + {' '.join(str(x) for x in selected_row_args)}",
)
except Exception:
pass
first_stage_selection_indices = []
command_expanded = True
if not command_expanded and first_stage_selection_indices:
last_piped_items = ctx.get_last_result_items()
stage_table = ctx.get_current_stage_table()
if not stage_table and hasattr(ctx, "get_display_table"):
stage_table = ctx.get_display_table()
if not stage_table:
stage_table = ctx.get_last_result_table()
resolved_items = _resolve_items_for_selection(stage_table, last_piped_items)
if last_piped_items:
filtered = [
resolved_items[i]
for i in first_stage_selection_indices
if 0 <= i < len(resolved_items)
]
if not filtered:
print("No items matched selection in pipeline\n")
return
if _maybe_run_class_selector(filtered, stage_is_last=(not stages)):
return
from cmdlet._shared import coerce_to_pipe_object
filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered]
piped_result = filtered_pipe_objs if len(filtered_pipe_objs) > 1 else filtered_pipe_objs[0]
if pipeline_session and worker_manager:
try:
selection_parts = [f"@{i+1}" for i in first_stage_selection_indices]
worker_manager.log_step(
pipeline_session.worker_id,
f"Applied @N selection {' | '.join(selection_parts)}",
)
except Exception:
pass
# Auto-insert downloader stages for provider tables.
current_table = ctx.get_current_stage_table() or ctx.get_last_result_table()
table_type = current_table.table if current_table and hasattr(current_table, "table") else None
if not stages:
if table_type == "youtube":
print("Auto-running YouTube selection via download-media")
stages.append(["download-media"])
elif table_type == "bandcamp":
print("Auto-running Bandcamp selection via download-media")
stages.append(["download-media"])
elif table_type in {"soulseek", "openlibrary", "libgen"}:
print("Auto-piping selection to download-file")
stages.append(["download-file"])
else:
first_cmd = stages[0][0] if stages and stages[0] else None
if table_type == "soulseek" and first_cmd not in (
"download-file",
"download-media",
"download_media",
".pipe",
):
debug("Auto-inserting download-file after Soulseek selection")
stages.insert(0, ["download-file"])
if table_type == "youtube" and first_cmd not in (
"download-media",
"download_media",
"download-file",
".pipe",
):
debug("Auto-inserting download-media after YouTube selection")
stages.insert(0, ["download-media"])
if table_type == "bandcamp" and first_cmd not in (
"download-media",
"download_media",
"download-file",
".pipe",
):
print("Auto-inserting download-media after Bandcamp selection")
stages.insert(0, ["download-media"])
if table_type == "libgen" and first_cmd not in (
"download-file",
"download-media",
"download_media",
".pipe",
):
print("Auto-inserting download-file after Libgen selection")
stages.insert(0, ["download-file"])
else:
print("No previous results to select from\n")
return
# ------------------------------------------------------------------ # ------------------------------------------------------------------
# Multi-level pipeline progress (pipes = stages, tasks = items) # Multi-level pipeline progress (pipes = stages, tasks = items)
# ------------------------------------------------------------------ # ------------------------------------------------------------------
try: progress_ui, pipe_index_by_stage = self._maybe_start_live_progress(config, stages)
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
except Exception:
quiet_mode = False
try:
import sys as _sys
if (not quiet_mode) and bool(getattr(_sys.stderr, "isatty", lambda: False)()):
from models import PipelineLiveProgress
pipe_stage_indices: List[int] = []
pipe_labels: List[str] = []
for idx, tokens in enumerate(stages):
if not tokens:
continue
name = str(tokens[0]).replace("_", "-").lower()
if name == "@" or name.startswith("@"):
continue
# `.pipe` (MPV) is an interactive launcher; disable pipeline Live progress
# for it because it doesn't meaningfully "complete" (mpv may keep running)
# and Live output interferes with MPV playlist UI.
if name == ".pipe":
continue
pipe_stage_indices.append(idx)
pipe_labels.append(name)
if pipe_labels:
progress_ui = PipelineLiveProgress(pipe_labels, enabled=True)
progress_ui.start()
try:
import pipeline as _pipeline_ctx
if hasattr(_pipeline_ctx, "set_live_progress"):
_pipeline_ctx.set_live_progress(progress_ui)
except Exception:
pass
pipe_index_by_stage = {stage_idx: pipe_idx for pipe_idx, stage_idx in enumerate(pipe_stage_indices)}
except Exception:
progress_ui = None
pipe_index_by_stage = {}
for stage_index, stage_tokens in enumerate(stages): for stage_index, stage_tokens in enumerate(stages):
if not stage_tokens: if not stage_tokens:
@@ -1707,7 +1846,7 @@ class PipelineExecutor:
if not stage_table: if not stage_table:
stage_table = ctx.get_last_result_table() stage_table = ctx.get_last_result_table()
items_list = ctx.get_last_result_items() or [] items_list = ctx.get_last_result_items() or []
resolved_items = _resolve_items_for_selection(stage_table, items_list) resolved_items = items_list if items_list else []
filtered = [resolved_items[i] for i in selected_indices if 0 <= i < len(resolved_items)] filtered = [resolved_items[i] for i in selected_indices if 0 <= i < len(resolved_items)]
if not filtered: if not filtered:
print("No items matched selection\n") print("No items matched selection\n")
@@ -1715,7 +1854,7 @@ class PipelineExecutor:
pipeline_error = "Empty selection" pipeline_error = "Empty selection"
return return
if _maybe_run_class_selector(filtered, stage_is_last=(stage_index + 1 >= len(stages))): if PipelineExecutor._maybe_run_class_selector(ctx, config, filtered, stage_is_last=(stage_index + 1 >= len(stages))):
return return
# Special case: selecting multiple tags from get-tag and piping into delete-tag # Special case: selecting multiple tags from get-tag and piping into delete-tag
@@ -1841,9 +1980,11 @@ class PipelineExecutor:
on_emit = None on_emit = None
if progress_ui is not None and pipe_idx is not None: if progress_ui is not None and pipe_idx is not None:
def _on_emit(obj: Any, _idx: int = int(pipe_idx)) -> None: _ui = cast(Any, progress_ui)
def _on_emit(obj: Any, _idx: int = int(pipe_idx), _progress=_ui) -> None:
try: try:
progress_ui.on_emit(_idx, obj) _progress.on_emit(_idx, obj)
except Exception: except Exception:
pass pass
on_emit = _on_emit on_emit = _on_emit

View File

@@ -23,6 +23,15 @@ except ImportError:
class Libgen(Provider): class Libgen(Provider):
# Domains that should be routed to this provider when the user supplies a URL.
# (Used by ProviderCore.registry.match_provider_name_for_url)
URL_DOMAINS = (
"libgen.gl",
"libgen.li",
"libgen.is",
"libgen.rs",
"libgen.st",
)
"""Search provider for Library Genesis books.""" """Search provider for Library Genesis books."""
def search( def search(

View File

@@ -1,9 +1,11 @@
from __future__ import annotations from __future__ import annotations
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional, Type from typing import Any, Dict, List, Optional, Type, cast
import requests import requests
import sys import sys
import json
import subprocess
from SYS.logger import log, debug from SYS.logger import log, debug
@@ -13,6 +15,12 @@ except ImportError: # pragma: no cover - optional
musicbrainzngs = None musicbrainzngs = None
try: # Optional dependency
import yt_dlp # type: ignore
except ImportError: # pragma: no cover - optional
yt_dlp = None
class MetadataProvider(ABC): class MetadataProvider(ABC):
"""Base class for metadata providers (music, movies, books, etc.).""" """Base class for metadata providers (music, movies, books, etc.)."""
@@ -351,6 +359,157 @@ class MusicBrainzMetadataProvider(MetadataProvider):
return tags return tags
class YtdlpMetadataProvider(MetadataProvider):
"""Metadata provider that extracts tags from a supported URL using yt-dlp.
This does NOT download media; it only probes metadata.
"""
@property
def name(self) -> str: # type: ignore[override]
return "ytdlp"
def _extract_info(self, url: str) -> Optional[Dict[str, Any]]:
url = (url or "").strip()
if not url:
return None
# Prefer Python module when available.
if yt_dlp is not None:
try:
opts: Any = {
"quiet": True,
"no_warnings": True,
"skip_download": True,
"noprogress": True,
"socket_timeout": 15,
"retries": 1,
"playlist_items": "1-10",
}
with yt_dlp.YoutubeDL(opts) as ydl: # type: ignore[attr-defined]
info = ydl.extract_info(url, download=False)
return cast(Dict[str, Any], info) if isinstance(info, dict) else None
except Exception:
pass
# Fallback to CLI.
try:
cmd = [
"yt-dlp",
"-J",
"--no-warnings",
"--skip-download",
"--playlist-items",
"1-10",
url,
]
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
if proc.returncode != 0:
return None
payload = (proc.stdout or "").strip()
if not payload:
return None
data = json.loads(payload)
return data if isinstance(data, dict) else None
except Exception:
return None
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
url = (query or "").strip()
if not url.startswith(("http://", "https://")):
return []
info = self._extract_info(url)
if not isinstance(info, dict):
return []
upload_date = str(info.get("upload_date") or "")
release_date = str(info.get("release_date") or "")
year = (release_date or upload_date)[:4] if (release_date or upload_date) else ""
# Provide basic columns for the standard metadata selection table.
# NOTE: This is best-effort; many extractors don't provide artist/album.
artist = (
info.get("artist")
or info.get("uploader")
or info.get("channel")
or ""
)
album = info.get("album") or info.get("playlist_title") or ""
title = info.get("title") or ""
return [
{
"title": title,
"artist": str(artist or ""),
"album": str(album or ""),
"year": str(year or ""),
"provider": self.name,
"url": url,
"raw": info,
}
]
def to_tags(self, item: Dict[str, Any]) -> List[str]:
raw = item.get("raw")
if not isinstance(raw, dict):
return super().to_tags(item)
tags: List[str] = []
try:
from metadata import extract_ytdlp_tags
except Exception:
extract_ytdlp_tags = None # type: ignore[assignment]
if extract_ytdlp_tags:
try:
tags.extend(extract_ytdlp_tags(raw))
except Exception:
pass
# Subtitle availability tags
def _langs(value: Any) -> List[str]:
if not isinstance(value, dict):
return []
out: List[str] = []
for k in value.keys():
if isinstance(k, str) and k.strip():
out.append(k.strip().lower())
return sorted(set(out))
# If this is a playlist container, subtitle/captions are usually per-entry.
info_for_subs: Dict[str, Any] = raw
entries = raw.get("entries")
if isinstance(entries, list) and entries:
first = entries[0]
if isinstance(first, dict):
info_for_subs = first
for lang in _langs(info_for_subs.get("subtitles")):
tags.append(f"subs:{lang}")
for lang in _langs(info_for_subs.get("automatic_captions")):
tags.append(f"subs_auto:{lang}")
# Always include source tag for parity with other providers.
tags.append(f"source:{self.name}")
# Dedup case-insensitively, preserve order.
seen = set()
out: List[str] = []
for t in tags:
if not isinstance(t, str):
continue
s = t.strip()
if not s:
continue
k = s.lower()
if k in seen:
continue
seen.add(k)
out.append(s)
return out
# Registry --------------------------------------------------------------- # Registry ---------------------------------------------------------------
_METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = { _METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = {
@@ -359,6 +518,7 @@ _METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = {
"googlebooks": GoogleBooksMetadataProvider, "googlebooks": GoogleBooksMetadataProvider,
"google": GoogleBooksMetadataProvider, "google": GoogleBooksMetadataProvider,
"musicbrainz": MusicBrainzMetadataProvider, "musicbrainz": MusicBrainzMetadataProvider,
"ytdlp": YtdlpMetadataProvider,
} }
@@ -370,7 +530,7 @@ def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str
availability: Dict[str, bool] = {} availability: Dict[str, bool] = {}
for name, cls in _METADATA_PROVIDERS.items(): for name, cls in _METADATA_PROVIDERS.items():
try: try:
provider = cls(config) _ = cls(config)
# Basic availability check: perform lightweight validation if defined # Basic availability check: perform lightweight validation if defined
availability[name] = True availability[name] = True
except Exception: except Exception:

View File

@@ -11,7 +11,8 @@ import sys
import tempfile import tempfile
import time import time
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple from typing import Any, Callable, Dict, List, Optional, Tuple
from urllib.parse import urlparse
import requests import requests
@@ -183,7 +184,44 @@ def _resolve_archive_id(session: requests.Session, edition_id: str, ia_candidate
return "" return ""
def _archive_id_from_url(url: str) -> str:
"""Best-effort extraction of an Archive.org item identifier from a URL."""
u = str(url or "").strip()
if not u:
return ""
try:
p = urlparse(u)
host = (p.hostname or "").lower().strip()
if not host.endswith("archive.org"):
return ""
parts = [x for x in (p.path or "").split("/") if x]
except Exception:
return ""
# Common patterns:
# - /details/<id>/...
# - /borrow/<id>
# - /download/<id>/...
if len(parts) >= 2 and parts[0].lower() in {"details", "borrow", "download", "stream"}:
return str(parts[1]).strip()
# Sometimes the identifier is the first segment.
if len(parts) >= 1:
first = str(parts[0]).strip()
if first and first.lower() not in {"account", "services", "search", "advancedsearch.php"}:
return first
return ""
class OpenLibrary(Provider): class OpenLibrary(Provider):
# Domains that should be routed to this provider when the user supplies a URL.
# (Used by ProviderCore.registry.match_provider_name_for_url)
URL_DOMAINS = (
"openlibrary.org",
"archive.org",
)
"""Search provider for OpenLibrary books + Archive.org direct/borrow download.""" """Search provider for OpenLibrary books + Archive.org direct/borrow download."""
def __init__(self, config: Optional[Dict[str, Any]] = None): def __init__(self, config: Optional[Dict[str, Any]] = None):
@@ -311,6 +349,60 @@ class OpenLibrary(Provider):
pass pass
raise RuntimeError("Something went wrong when trying to return the book") raise RuntimeError("Something went wrong when trying to return the book")
@staticmethod
def _archive_logout(session: requests.Session) -> None:
"""Best-effort logout from archive.org.
Archive sessions are cookie-based; returning the loan is the critical step.
Logout is attempted for cleanliness but failures should not abort the workflow.
"""
if session is None:
return
for url in (
"https://archive.org/account/logout",
"https://archive.org/account/logout.php",
):
try:
resp = session.get(url, timeout=15, allow_redirects=True)
code = int(getattr(resp, "status_code", 0) or 0)
if code and code < 500:
return
except Exception:
continue
@staticmethod
def _archive_is_lendable(book_id: str) -> tuple[bool, str]:
"""Heuristic lendable check using Archive.org item metadata.
Some lendable items do not map cleanly to an OpenLibrary edition id.
In practice, Archive metadata collections often include markers like:
- inlibrary
- printdisabled
"""
ident = str(book_id or "").strip()
if not ident:
return False, "no-archive-id"
try:
resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=8)
resp.raise_for_status()
data = resp.json() if resp is not None else {}
meta = data.get("metadata", {}) if isinstance(data, dict) else {}
collection = meta.get("collection") if isinstance(meta, dict) else None
values: List[str] = []
if isinstance(collection, list):
values = [str(x).strip().lower() for x in collection if str(x).strip()]
elif isinstance(collection, str):
values = [collection.strip().lower()]
if any(v in {"inlibrary", "printdisabled", "lendinglibrary"} for v in values):
return True, "archive-collection"
return False, "archive-not-lendable"
except Exception:
return False, "archive-metadata-error"
@staticmethod @staticmethod
def _archive_get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]: def _archive_get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]:
"""Extract page links from Archive.org book reader.""" """Extract page links from Archive.org book reader."""
@@ -430,6 +522,7 @@ class OpenLibrary(Provider):
links: List[str], links: List[str],
scale: int, scale: int,
book_id: str, book_id: str,
progress_callback: Optional[Callable[[int, int], None]] = None,
) -> List[str]: ) -> List[str]:
links_scaled = [f"{link}&rotate=0&scale={scale}" for link in links] links_scaled = [f"{link}&rotate=0&scale={scale}" for link in links]
pages = len(links_scaled) pages = len(links_scaled)
@@ -448,7 +541,20 @@ class OpenLibrary(Provider):
pages=pages, pages=pages,
) )
) )
if tqdm: if progress_callback is not None:
done = 0
total = len(tasks)
for fut in futures.as_completed(tasks):
try:
_ = fut.result()
except Exception:
pass
done += 1
try:
progress_callback(done, total)
except Exception:
pass
elif tqdm:
for _ in tqdm(futures.as_completed(tasks), total=len(tasks)): # type: ignore for _ in tqdm(futures.as_completed(tasks), total=len(tasks)): # type: ignore
pass pass
else: else:
@@ -904,15 +1010,20 @@ class OpenLibrary(Provider):
return results return results
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]: def download(
self,
result: SearchResult,
output_dir: Path,
progress_callback: Optional[Callable[[str, int, Optional[int], str], None]] = None,
) -> Optional[Path]:
output_dir = Path(output_dir) output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True)
meta = result.full_metadata or {} meta = result.full_metadata or {}
edition_id = str(meta.get("openlibrary_id") or "").strip() edition_id = str(meta.get("openlibrary_id") or "").strip()
if not edition_id:
log("[openlibrary] Missing openlibrary_id; cannot download", file=sys.stderr) # Accept direct Archive.org URLs too (details/borrow/download) even when no OL edition id is known.
return None archive_id = str(meta.get("archive_id") or "").strip()
ia_ids = meta.get("ia") or [] ia_ids = meta.get("ia") or []
if isinstance(ia_ids, str): if isinstance(ia_ids, str):
@@ -921,12 +1032,23 @@ class OpenLibrary(Provider):
ia_ids = [] ia_ids = []
ia_candidates = [str(x) for x in ia_ids if x] ia_candidates = [str(x) for x in ia_ids if x]
archive_id = _resolve_archive_id(self._session, edition_id, ia_candidates) if not archive_id:
archive_id = _first_str(ia_candidates) or ""
if not archive_id and edition_id:
archive_id = _resolve_archive_id(self._session, edition_id, ia_candidates)
if not archive_id:
# Try to extract identifier from the SearchResult path (URL).
archive_id = _archive_id_from_url(str(getattr(result, "path", "") or ""))
if not archive_id: if not archive_id:
log("[openlibrary] No archive identifier available; cannot download", file=sys.stderr) log("[openlibrary] No archive identifier available; cannot download", file=sys.stderr)
return None return None
safe_title = sanitize_filename(result.title) safe_title = sanitize_filename(result.title)
if not safe_title or "http" in safe_title.lower():
safe_title = sanitize_filename(archive_id) or "archive"
# 1) Direct download if available. # 1) Direct download if available.
try: try:
@@ -935,8 +1057,22 @@ class OpenLibrary(Provider):
can_direct, pdf_url = False, "" can_direct, pdf_url = False, ""
if can_direct and pdf_url: if can_direct and pdf_url:
try:
if progress_callback is not None:
progress_callback("step", 0, None, "direct download")
except Exception:
pass
out_path = unique_path(output_dir / f"{safe_title}.pdf") out_path = unique_path(output_dir / f"{safe_title}.pdf")
ok = download_file(pdf_url, out_path, session=self._session) ok = download_file(
pdf_url,
out_path,
session=self._session,
progress_callback=(
(lambda downloaded, total, label: progress_callback("bytes", downloaded, total, label))
if progress_callback is not None
else None
),
)
if ok: if ok:
return out_path return out_path
log("[openlibrary] Direct download failed", file=sys.stderr) log("[openlibrary] Direct download failed", file=sys.stderr)
@@ -949,65 +1085,131 @@ class OpenLibrary(Provider):
log("[openlibrary] Archive credentials missing; cannot borrow", file=sys.stderr) log("[openlibrary] Archive credentials missing; cannot borrow", file=sys.stderr)
return None return None
lendable, reason = _check_lendable(self._session, edition_id) lendable = True
reason = ""
if edition_id:
lendable, reason = _check_lendable(self._session, edition_id)
if not lendable:
# OpenLibrary API can be a false-negative; fall back to Archive metadata.
lendable2, reason2 = self._archive_is_lendable(archive_id)
if lendable2:
lendable, reason = True, reason2
else:
lendable, reason = self._archive_is_lendable(archive_id)
if not lendable: if not lendable:
log(f"[openlibrary] Not lendable: {reason}", file=sys.stderr) log(f"[openlibrary] Not lendable: {reason}", file=sys.stderr)
return None return None
session = self._archive_login(email, password) session = self._archive_login(email, password)
loaned = False
try: try:
session = self._archive_loan(session, archive_id, verbose=False)
except self.BookNotAvailableError:
log("[openlibrary] Book not available to borrow", file=sys.stderr)
return None
except Exception:
log("[openlibrary] Borrow failed", file=sys.stderr)
return None
urls = [f"https://archive.org/borrow/{archive_id}", f"https://archive.org/details/{archive_id}"]
title = safe_title
links: Optional[List[str]] = None
last_exc: Optional[Exception] = None
for u in urls:
try: try:
title_raw, links, _metadata = self._archive_get_book_infos(session, u) if progress_callback is not None:
if title_raw: progress_callback("step", 0, None, "login")
title = sanitize_filename(title_raw)
break
except Exception as exc:
last_exc = exc
continue
if not links:
log(f"[openlibrary] Failed to extract pages: {last_exc}", file=sys.stderr)
return None
temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=str(output_dir))
try:
images = self._archive_download(session=session, n_threads=10, directory=temp_dir, links=links, scale=3, book_id=archive_id)
pdf_bytes = _image_paths_to_pdf_bytes(images)
if not pdf_bytes:
# Keep images folder for manual conversion.
log("[openlibrary] PDF conversion failed; keeping images folder", file=sys.stderr)
return Path(temp_dir)
pdf_path = unique_path(output_dir / f"{title}.pdf")
with open(pdf_path, "wb") as f:
f.write(pdf_bytes)
try:
shutil.rmtree(temp_dir)
except Exception: except Exception:
pass pass
return pdf_path
except Exception:
try: try:
shutil.rmtree(temp_dir) session = self._archive_loan(session, archive_id, verbose=False)
loaned = True
except self.BookNotAvailableError:
log("[openlibrary] Book not available to borrow", file=sys.stderr)
return None
except Exception:
log("[openlibrary] Borrow failed", file=sys.stderr)
return None
try:
if progress_callback is not None:
progress_callback("step", 0, None, "borrow")
except Exception:
pass
urls = [f"https://archive.org/borrow/{archive_id}", f"https://archive.org/details/{archive_id}"]
title = safe_title
links: Optional[List[str]] = None
last_exc: Optional[Exception] = None
for u in urls:
try:
title_raw, links, _metadata = self._archive_get_book_infos(session, u)
if title_raw:
title = sanitize_filename(title_raw)
break
except Exception as exc:
last_exc = exc
continue
if not links:
log(f"[openlibrary] Failed to extract pages: {last_exc}", file=sys.stderr)
return None
try:
if progress_callback is not None:
progress_callback("step", 0, None, "download pages")
except Exception:
pass
temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=str(output_dir))
try:
images = self._archive_download(
session=session,
n_threads=10,
directory=temp_dir,
links=links,
scale=3,
book_id=archive_id,
progress_callback=(
(lambda done, total: progress_callback("pages", done, total, "pages"))
if progress_callback is not None
else None
),
)
pdf_bytes = _image_paths_to_pdf_bytes(images)
if not pdf_bytes:
# Keep images folder for manual conversion.
log("[openlibrary] PDF conversion failed; keeping images folder", file=sys.stderr)
return Path(temp_dir)
try:
if progress_callback is not None:
progress_callback("step", 0, None, "stitch pdf")
except Exception:
pass
pdf_path = unique_path(output_dir / f"{title}.pdf")
with open(pdf_path, "wb") as f:
f.write(pdf_bytes)
try:
shutil.rmtree(temp_dir)
except Exception:
pass
return pdf_path
except Exception:
try:
shutil.rmtree(temp_dir)
except Exception:
pass
raise
finally:
# Always return the loan after a successful borrow, even if download/stitch fails.
if loaned:
try:
if progress_callback is not None:
progress_callback("step", 0, None, "return book")
except Exception:
pass
try:
self._archive_return_loan(session, archive_id)
except Exception as exc:
log(f"[openlibrary] Warning: failed to return loan: {exc}", file=sys.stderr)
try:
self._archive_logout(session)
except Exception: except Exception:
pass pass
raise
except Exception as exc: except Exception as exc:
log(f"[openlibrary] Borrow workflow error: {exc}", file=sys.stderr) log(f"[openlibrary] Borrow workflow error: {exc}", file=sys.stderr)

View File

@@ -1,7 +1,7 @@
from __future__ import annotations from __future__ import annotations
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Callable, Optional
import sys import sys
import requests import requests
@@ -22,13 +22,20 @@ def sanitize_filename(name: str, *, max_len: int = 150) -> str:
return cleaned[:max_len] return cleaned[:max_len]
def download_file(url: str, output_path: Path, *, session: Optional[requests.Session] = None, timeout_s: float = 30.0) -> bool: def download_file(
url: str,
output_path: Path,
*,
session: Optional[requests.Session] = None,
timeout_s: float = 30.0,
progress_callback: Optional[Callable[[int, Optional[int], str], None]] = None,
) -> bool:
output_path = Path(output_path) output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True) output_path.parent.mkdir(parents=True, exist_ok=True)
s = session or requests.Session() s = session or requests.Session()
bar = ProgressBar() bar = ProgressBar() if progress_callback is None else None
downloaded = 0 downloaded = 0
total = None total = None
@@ -41,9 +48,14 @@ def download_file(url: str, output_path: Path, *, session: Optional[requests.Ses
except Exception: except Exception:
total = None total = None
label = str(output_path.name or "download")
# Render once immediately so fast downloads still show something. # Render once immediately so fast downloads still show something.
try: try:
bar.update(downloaded=0, total=total, label=str(output_path.name or "download"), file=sys.stderr) if progress_callback is not None:
progress_callback(0, total, label)
elif bar is not None:
bar.update(downloaded=0, total=total, label=label, file=sys.stderr)
except Exception: except Exception:
pass pass
@@ -53,18 +65,23 @@ def download_file(url: str, output_path: Path, *, session: Optional[requests.Ses
f.write(chunk) f.write(chunk)
downloaded += len(chunk) downloaded += len(chunk)
try: try:
bar.update(downloaded=downloaded, total=total, label=str(output_path.name or "download"), file=sys.stderr) if progress_callback is not None:
progress_callback(downloaded, total, label)
elif bar is not None:
bar.update(downloaded=downloaded, total=total, label=label, file=sys.stderr)
except Exception: except Exception:
pass pass
try: try:
bar.finish() if bar is not None:
bar.finish()
except Exception: except Exception:
pass pass
return output_path.exists() and output_path.stat().st_size > 0 return output_path.exists() and output_path.stat().st_size > 0
except Exception: except Exception:
try: try:
bar.finish() if bar is not None:
bar.finish()
except Exception: except Exception:
pass pass
try: try:

View File

@@ -6,8 +6,9 @@ This module is the single source of truth for provider discovery.
from __future__ import annotations from __future__ import annotations
from typing import Any, Dict, Optional, Type from typing import Any, Dict, Optional, Sequence, Type
import sys import sys
from urllib.parse import urlparse
from SYS.logger import log from SYS.logger import log
@@ -141,6 +142,45 @@ def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bo
return availability return availability
def match_provider_name_for_url(url: str) -> Optional[str]:
"""Return a registered provider name that claims the URL's domain.
Providers can declare domains via a class attribute `URL_DOMAINS` (sequence of strings).
This matcher is intentionally cheap (no provider instantiation, no network).
"""
try:
parsed = urlparse(str(url))
host = (parsed.hostname or "").strip().lower()
except Exception:
host = ""
if not host:
return None
for name, provider_class in _PROVIDERS.items():
domains = getattr(provider_class, "URL_DOMAINS", None)
if not isinstance(domains, (list, tuple)):
continue
for d in domains:
dom = str(d or "").strip().lower()
if not dom:
continue
if host == dom or host.endswith("." + dom):
return name
return None
def get_provider_for_url(url: str, config: Optional[Dict[str, Any]] = None) -> Optional[Provider]:
"""Instantiate and return the matching provider for a URL, if any."""
name = match_provider_name_for_url(url)
if not name:
return None
return get_provider(name, config)
__all__ = [ __all__ = [
"SearchResult", "SearchResult",
"Provider", "Provider",
@@ -152,5 +192,7 @@ __all__ = [
"list_search_providers", "list_search_providers",
"get_file_provider", "get_file_provider",
"list_file_providers", "list_file_providers",
"match_provider_name_for_url",
"get_provider_for_url",
"download_soulseek_file", "download_soulseek_file",
] ]

View File

@@ -584,10 +584,15 @@ def _download_direct_file(
filename = filename.split("?")[0] filename = filename.split("?")[0]
# Try to get real filename from Content-Disposition header (HEAD request) # Try to get real filename from Content-Disposition header (HEAD request)
content_type = ""
try: try:
with HTTPClient(timeout=10.0) as client: with HTTPClient(timeout=10.0) as client:
response = client._request("HEAD", url, follow_redirects=True) response = client._request("HEAD", url, follow_redirects=True)
content_disposition = response.headers.get("content-disposition", "") content_disposition = response.headers.get("content-disposition", "")
try:
content_type = str(response.headers.get("content-type", "") or "").strip().lower()
except Exception:
content_type = ""
if content_disposition: if content_disposition:
# Extract filename from Content-Disposition header # Extract filename from Content-Disposition header
# Format: attachment; filename="filename.pdf" or filename=filename.pdf # Format: attachment; filename="filename.pdf" or filename=filename.pdf
@@ -620,9 +625,36 @@ def _download_direct_file(
else: else:
filename = suggested filename = suggested
# Final fallback if we still don't have a good filename # If we still don't have an extension, try to infer one from Content-Type.
if not filename or "." not in filename: # Never fall back to a generic `.bin` extension.
filename = "downloaded_file.bin" try:
has_ext = bool(filename and Path(str(filename)).suffix)
except Exception:
has_ext = False
if filename and (not has_ext):
ct = (content_type or "").split(";")[0].strip().lower()
ext_by_ct = {
"application/pdf": ".pdf",
"application/epub+zip": ".epub",
"application/x-mobipocket-ebook": ".mobi",
"image/jpeg": ".jpg",
"image/png": ".png",
"image/webp": ".webp",
"image/gif": ".gif",
"text/plain": ".txt",
"application/zip": ".zip",
}
if ct in ext_by_ct:
filename = f"{filename}{ext_by_ct[ct]}"
elif ct.startswith("text/html"):
# Guardrail: HTML landing pages should not be downloaded as opaque files.
raise DownloadError("URL appears to be an HTML page, not a direct file")
# Final guardrail: if filename is empty, refuse rather than inventing `download.bin`.
if not filename or not str(filename).strip():
raise DownloadError("Could not determine filename for URL (no Content-Disposition and no path filename)")
file_path = _unique_path(output_dir / filename) file_path = _unique_path(output_dir / filename)
progress_bar = ProgressBar() progress_bar = ProgressBar()
@@ -684,9 +716,15 @@ def _download_direct_file(
# For direct file downloads, create minimal info dict without filename as title # For direct file downloads, create minimal info dict without filename as title
# This prevents creating duplicate title: tags when filename gets auto-generated # This prevents creating duplicate title: tags when filename gets auto-generated
# We'll add title back later only if we couldn't extract meaningful tags # We'll add title back later only if we couldn't extract meaningful tags
ext = ""
try:
ext = Path(str(filename)).suffix.lstrip(".")
except Exception:
ext = ""
info = { info = {
"id": filename.rsplit(".", 1)[0], "id": str(filename).rsplit(".", 1)[0] if "." in str(filename) else str(filename),
"ext": filename.rsplit(".", 1)[1] if "." in filename else "bin", "ext": ext,
"webpage_url": url, "webpage_url": url,
} }

218
SYS/pipeline_progress.py Normal file
View File

@@ -0,0 +1,218 @@
from __future__ import annotations
import sys
from contextlib import contextmanager
from typing import Any, Iterator, Optional, Sequence, Tuple
class PipelineProgress:
"""Small adapter around PipelineLiveProgress.
This centralizes the boilerplate used across cmdlets:
- locating the active Live UI (if any)
- resolving the current pipe_index from stage context
- step-based progress (begin_pipe_steps/advance_pipe_step)
- optional pipe percent/status updates
- optional byte transfer bars
- optional local Live panel when a cmdlet runs standalone
The class is intentionally defensive: all UI operations are best-effort.
"""
def __init__(self, pipeline_module: Any):
self._ctx = pipeline_module
self._local_ui: Optional[Any] = None
self._local_attached: bool = False
def ui_and_pipe_index(self) -> Tuple[Optional[Any], int]:
ui = None
try:
ui = self._ctx.get_live_progress() if hasattr(self._ctx, "get_live_progress") else None
except Exception:
ui = None
pipe_idx: int = 0
try:
stage_ctx = self._ctx.get_stage_context() if hasattr(self._ctx, "get_stage_context") else None
maybe_idx = getattr(stage_ctx, "pipe_index", None) if stage_ctx is not None else None
if isinstance(maybe_idx, int):
pipe_idx = int(maybe_idx)
except Exception:
pipe_idx = 0
return ui, pipe_idx
def begin_steps(self, total_steps: int) -> None:
ui, pipe_idx = self.ui_and_pipe_index()
if ui is None:
return
try:
begin = getattr(ui, "begin_pipe_steps", None)
if callable(begin):
begin(int(pipe_idx), total_steps=int(total_steps))
except Exception:
return
def step(self, text: str) -> None:
ui, pipe_idx = self.ui_and_pipe_index()
if ui is None:
return
try:
adv = getattr(ui, "advance_pipe_step", None)
if callable(adv):
adv(int(pipe_idx), str(text))
except Exception:
return
def set_percent(self, percent: int) -> None:
ui, pipe_idx = self.ui_and_pipe_index()
if ui is None:
return
try:
set_pct = getattr(ui, "set_pipe_percent", None)
if callable(set_pct):
set_pct(int(pipe_idx), int(percent))
except Exception:
return
def set_status(self, text: str) -> None:
ui, pipe_idx = self.ui_and_pipe_index()
if ui is None:
return
try:
setter = getattr(ui, "set_pipe_status_text", None)
if callable(setter):
setter(int(pipe_idx), str(text))
except Exception:
return
def clear_status(self) -> None:
ui, pipe_idx = self.ui_and_pipe_index()
if ui is None:
return
try:
clr = getattr(ui, "clear_pipe_status_text", None)
if callable(clr):
clr(int(pipe_idx))
except Exception:
return
def begin_transfer(self, *, label: str, total: Optional[int] = None) -> None:
ui, _ = self.ui_and_pipe_index()
if ui is None:
return
try:
fn = getattr(ui, "begin_transfer", None)
if callable(fn):
fn(label=str(label or "transfer"), total=total)
except Exception:
return
def update_transfer(self, *, label: str, completed: Optional[int], total: Optional[int] = None) -> None:
ui, _ = self.ui_and_pipe_index()
if ui is None:
return
try:
fn = getattr(ui, "update_transfer", None)
if callable(fn):
fn(label=str(label or "transfer"), completed=completed, total=total)
except Exception:
return
def finish_transfer(self, *, label: str) -> None:
ui, _ = self.ui_and_pipe_index()
if ui is None:
return
try:
fn = getattr(ui, "finish_transfer", None)
if callable(fn):
fn(label=str(label or "transfer"))
except Exception:
return
def on_emit(self, emitted: Any) -> None:
"""Advance local pipe progress after pipeline_context.emit().
The shared PipelineExecutor wires on_emit automatically for pipelines.
Standalone cmdlet runs do not, so cmdlets call this explicitly.
"""
if self._local_ui is None:
return
try:
self._local_ui.on_emit(0, emitted)
except Exception:
return
def ensure_local_ui(self, *, label: str, total_items: int, items_preview: Optional[Sequence[Any]] = None) -> bool:
"""Start a local PipelineLiveProgress panel if no shared UI exists."""
try:
existing = self._ctx.get_live_progress() if hasattr(self._ctx, "get_live_progress") else None
except Exception:
existing = None
if existing is not None:
return False
if not bool(getattr(sys.stderr, "isatty", lambda: False)()):
return False
try:
from models import PipelineLiveProgress
ui = PipelineLiveProgress([str(label or "pipeline")], enabled=True)
ui.start()
try:
if hasattr(self._ctx, "set_live_progress"):
self._ctx.set_live_progress(ui)
self._local_attached = True
except Exception:
self._local_attached = False
try:
ui.begin_pipe(0, total_items=max(1, int(total_items)), items_preview=list(items_preview or []))
except Exception:
pass
self._local_ui = ui
return True
except Exception:
self._local_ui = None
self._local_attached = False
return False
def close_local_ui(self, *, force_complete: bool = True) -> None:
if self._local_ui is None:
return
try:
try:
self._local_ui.finish_pipe(0, force_complete=bool(force_complete))
except Exception:
pass
try:
self._local_ui.stop()
except Exception:
pass
finally:
self._local_ui = None
try:
if self._local_attached and hasattr(self._ctx, "set_live_progress"):
self._ctx.set_live_progress(None)
except Exception:
pass
self._local_attached = False
@contextmanager
def local_ui_if_needed(
self,
*,
label: str,
total_items: int,
items_preview: Optional[Sequence[Any]] = None,
) -> Iterator["PipelineProgress"]:
created = self.ensure_local_ui(label=label, total_items=total_items, items_preview=items_preview)
try:
yield self
finally:
if created:
self.close_local_ui(force_complete=True)

View File

@@ -1585,9 +1585,46 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
"warnings", "path", "relationships", "is_temp", "action", "parent_hash", "warnings", "path", "relationships", "is_temp", "action", "parent_hash",
} }
# Convert ResultItem to dict to preserve all attributes # Convert common object-like results into a dict so we can preserve fields like
# hash/store/url when they come from result tables (e.g., get-url emits UrlItem).
#
# Priority:
# 1) explicit to_dict()
# 2) best-effort attribute extraction for known PipeObject-ish fields
if hasattr(value, 'to_dict'): if hasattr(value, 'to_dict'):
value = value.to_dict() value = value.to_dict()
elif not isinstance(value, dict):
try:
obj_map: Dict[str, Any] = {}
for k in (
"hash",
"store",
"provider",
"prov",
"tag",
"title",
"url",
"source_url",
"duration",
"duration_seconds",
"metadata",
"full_metadata",
"warnings",
"path",
"target",
"relationships",
"is_temp",
"action",
"parent_hash",
"extra",
"media_kind",
):
if hasattr(value, k):
obj_map[k] = getattr(value, k)
if obj_map:
value = obj_map
except Exception:
pass
if isinstance(value, dict): if isinstance(value, dict):
# Extract hash and store (canonical identifiers) # Extract hash and store (canonical identifiers)
@@ -1695,8 +1732,19 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
# Fallback: build from path argument or bare value # Fallback: build from path argument or bare value
hash_val = "unknown" hash_val = "unknown"
path_val = default_path or getattr(value, "path", None) path_val = default_path or getattr(value, "path", None)
url_val: Optional[str] = None
title_val = None title_val = None
# If the raw value is a string, treat it as either a URL or a file path.
# This is important for @-selection results that are plain URL strings.
if isinstance(value, str):
s = value.strip()
if s.lower().startswith(("http://", "https://")):
url_val = s
path_val = None
else:
path_val = s
if path_val and path_val != "unknown": if path_val and path_val != "unknown":
try: try:
from SYS.utils import sha256_file from SYS.utils import sha256_file
@@ -1708,8 +1756,9 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
except Exception: except Exception:
pass pass
# When coming from path argument, store should be "PATH" (file path, not a backend) # When coming from a raw URL string, mark it explicitly as URL.
store_val = "PATH" # Otherwise treat it as a local path.
store_val = "URL" if url_val else "PATH"
pipe_obj = models.PipeObject( pipe_obj = models.PipeObject(
hash=hash_val, hash=hash_val,
@@ -1717,6 +1766,8 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
provider=None, provider=None,
path=str(path_val) if path_val and path_val != "unknown" else None, path=str(path_val) if path_val and path_val != "unknown" else None,
title=title_val, title=title_val,
url=url_val,
source_url=url_val,
tag=[], tag=[],
extra={}, extra={},
) )

View File

@@ -12,6 +12,7 @@ import models
import pipeline as ctx import pipeline as ctx
from API import HydrusNetwork as hydrus_wrapper from API import HydrusNetwork as hydrus_wrapper
from SYS.logger import log, debug from SYS.logger import log, debug
from SYS.pipeline_progress import PipelineProgress
from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS
from Store import Store from Store import Store
from . import _shared as sh from . import _shared as sh
@@ -73,6 +74,7 @@ class Add_File(Cmdlet):
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Main execution entry point.""" """Main execution entry point."""
parsed = parse_cmdlet_args(args, self) parsed = parse_cmdlet_args(args, self)
progress = PipelineProgress(ctx)
path_arg = parsed.get("path") path_arg = parsed.get("path")
location = parsed.get("store") location = parsed.get("store")
@@ -80,6 +82,35 @@ class Add_File(Cmdlet):
provider_room = parsed.get("room") provider_room = parsed.get("room")
delete_after = parsed.get("delete", False) delete_after = parsed.get("delete", False)
# Convenience: when piping a file into add-file, allow `-path <existing dir>`
# to act as the destination export directory.
# Example: screen-shot "https://..." | add-file -path "C:\Users\Admin\Desktop"
if path_arg and not location and not provider_name:
try:
candidate_dir = Path(str(path_arg))
if candidate_dir.exists() and candidate_dir.is_dir():
piped_items = result if isinstance(result, list) else [result]
has_local_source = False
for it in piped_items:
try:
po = coerce_to_pipe_object(it, None)
src = str(getattr(po, "path", "") or "").strip()
if not src:
continue
if src.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
continue
if Path(src).is_file():
has_local_source = True
break
except Exception:
continue
if has_local_source:
debug(f"[add-file] Treating -path directory as destination: {candidate_dir}")
location = str(candidate_dir)
path_arg = None
except Exception:
pass
stage_ctx = ctx.get_stage_context() stage_ctx = ctx.get_stage_context()
is_last_stage = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False)) is_last_stage = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
@@ -93,7 +124,7 @@ class Add_File(Cmdlet):
is_storage_backend_location = False is_storage_backend_location = False
# Decide which items to process. # Decide which items to process.
# - If user provided -path, treat this invocation as single-item. # - If user provided -path (and it was not reinterpreted as destination), treat this invocation as single-item.
# - Otherwise, if piped input is a list, ingest each item. # - Otherwise, if piped input is a list, ingest each item.
if path_arg: if path_arg:
items_to_process: List[Any] = [result] items_to_process: List[Any] = [result]
@@ -102,6 +133,17 @@ class Add_File(Cmdlet):
else: else:
items_to_process = [result] items_to_process = [result]
# Minimal step-based progress for single-item runs.
# Many add-file flows don't emit intermediate items, so without steps the pipe can look "stuck".
use_steps = False
steps_started = False
step2_done = False
try:
ui, _ = progress.ui_and_pipe_index()
use_steps = (ui is not None) and (len(items_to_process) == 1)
except Exception:
use_steps = False
debug(f"[add-file] INPUT result type={type(result).__name__}") debug(f"[add-file] INPUT result type={type(result).__name__}")
if isinstance(result, list): if isinstance(result, list):
debug(f"[add-file] INPUT result is list with {len(result)} items") debug(f"[add-file] INPUT result is list with {len(result)} items")
@@ -235,6 +277,14 @@ class Add_File(Cmdlet):
failures += 1 failures += 1
continue continue
is_url_target = isinstance(media_path_or_url, str) and str(media_path_or_url).lower().startswith(
("http://", "https://", "magnet:", "torrent:")
)
if use_steps and (not steps_started) and (not is_url_target):
progress.begin_steps(3)
progress.step("resolving source")
steps_started = True
# Update pipe_obj with resolved path # Update pipe_obj with resolved path
pipe_obj.path = str(media_path_or_url) pipe_obj.path = str(media_path_or_url)
@@ -300,13 +350,34 @@ class Add_File(Cmdlet):
pass pass
temp_dir_to_cleanup = Path(tempfile.mkdtemp(prefix="medios_openlibrary_")) temp_dir_to_cleanup = Path(tempfile.mkdtemp(prefix="medios_openlibrary_"))
# Wire OpenLibrary download progress into pipeline Live UI (no tqdm spam).
def _ol_progress(kind: str, completed: int, total: Optional[int], label: str) -> None:
try:
if kind == "pages" and total:
progress.set_status(f"downloading pages {completed}/{total}")
progress.set_percent(int(round((completed / max(1, total)) * 100.0)))
elif kind == "bytes" and total:
progress.set_status(f"downloading {label} {completed}/{total} bytes")
progress.set_percent(int(round((completed / max(1, total)) * 100.0)))
else:
progress.set_status("downloading")
except Exception:
return
try:
progress.set_percent(0)
progress.set_status("downloading openlibrary")
except Exception:
pass
sr = SearchResult( sr = SearchResult(
table="openlibrary", table="openlibrary",
title=str(getattr(pipe_obj, "title", None) or "Unknown"), title=str(getattr(pipe_obj, "title", None) or "Unknown"),
path=str(media_path_or_url), path=str(media_path_or_url),
full_metadata=full_metadata if isinstance(full_metadata, dict) else {}, full_metadata=full_metadata if isinstance(full_metadata, dict) else {},
) )
downloaded = provider.download(sr, temp_dir_to_cleanup) downloaded = provider.download(sr, temp_dir_to_cleanup, progress_callback=_ol_progress)
if downloaded is None: if downloaded is None:
log("[add-file] OpenLibrary download failed", file=sys.stderr) log("[add-file] OpenLibrary download failed", file=sys.stderr)
failures += 1 failures += 1
@@ -325,6 +396,13 @@ class Add_File(Cmdlet):
pipe_obj.path = str(downloaded_path) pipe_obj.path = str(downloaded_path)
delete_after_item = True delete_after_item = True
try:
if ui is not None:
ui.set_pipe_percent(int(pipe_idx), 100)
ui.set_pipe_status_text(int(pipe_idx), "downloaded")
except Exception:
pass
# For non-provider URLs, or if still a URL after provider attempt, delegate to download-media. # For non-provider URLs, or if still a URL after provider attempt, delegate to download-media.
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith( if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
("http://", "https://", "magnet:", "torrent:") ("http://", "https://", "magnet:", "torrent:")
@@ -562,6 +640,10 @@ class Add_File(Cmdlet):
failures += 1 failures += 1
continue continue
if use_steps and steps_started and (not step2_done):
progress.step("writing destination")
step2_done = True
if code == 0: if code == 0:
successes += 1 successes += 1
else: else:
@@ -619,6 +701,9 @@ class Add_File(Cmdlet):
except Exception: except Exception:
pass pass
if use_steps and steps_started:
progress.step("finalized")
if successes > 0: if successes > 0:
return 0 return 0
return 1 return 1

View File

@@ -34,6 +34,19 @@ class Add_Url(sh.Cmdlet):
"""Add URL to file via hash+store backend.""" """Add URL to file via hash+store backend."""
parsed = sh.parse_cmdlet_args(args, self) parsed = sh.parse_cmdlet_args(args, self)
# Compatibility/piping fix:
# `SharedArgs.QUERY` is positional in the shared parser, so `add-url <url>`
# (and `@N | add-url <url>`) can mistakenly parse the URL into `query`.
# If `url` is missing and `query` looks like an http(s) URL, treat it as `url`.
try:
if (not parsed.get("url")) and isinstance(parsed.get("query"), str):
q = str(parsed.get("query") or "").strip()
if q.startswith(("http://", "https://")):
parsed["url"] = q
parsed.pop("query", None)
except Exception:
pass
query_hash = sh.parse_single_hash_query(parsed.get("query")) query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash: if parsed.get("query") and not query_hash:
log("Error: -query must be of the form hash:<sha256>") log("Error: -query must be of the form hash:<sha256>")

View File

@@ -29,7 +29,7 @@ class Delete_Url(Cmdlet):
arg=[ arg=[
SharedArgs.QUERY, SharedArgs.QUERY,
SharedArgs.STORE, SharedArgs.STORE,
CmdletArg("url", required=True, description="URL to remove"), CmdletArg("url", required=False, description="URL to remove (optional when piping url rows)"),
], ],
detail=[ detail=[
"- Removes URL association from file identified by hash+store", "- Removes URL association from file identified by hash+store",
@@ -69,22 +69,24 @@ class Delete_Url(Cmdlet):
log("Error: No store name provided") log("Error: No store name provided")
return 1 return 1
if not url_arg:
log("Error: No URL provided")
return 1
# Normalize hash (single-item mode) # Normalize hash (single-item mode)
if not results and file_hash: if not results and file_hash:
file_hash = normalize_hash(file_hash) file_hash = normalize_hash(file_hash)
if not file_hash: if not file_hash:
log("Error: Invalid hash format") log("Error: Invalid hash format")
return 1 return 1
# Parse url (comma-separated) from metadata import normalize_urls
urls = [u.strip() for u in str(url_arg).split(',') if u.strip()]
if not urls: def _urls_from_arg(raw: Any) -> List[str]:
log("Error: No valid url provided") if raw is None:
return 1 return []
# Support comma-separated input for backwards compatibility
if isinstance(raw, str) and "," in raw:
return [u.strip() for u in raw.split(",") if u.strip()]
return [u.strip() for u in normalize_urls(raw) if str(u).strip()]
urls_from_cli = _urls_from_arg(url_arg)
# Get backend and delete url # Get backend and delete url
try: try:
@@ -145,7 +147,17 @@ class Delete_Url(Cmdlet):
) )
continue continue
batch.setdefault(store_text, []).append((normalized, list(urls))) # Determine which URLs to delete.
# - If user passed an explicit <url>, apply it to all items.
# - Otherwise, when piping url rows from get-url, delete the url(s) from each item.
item_urls = list(urls_from_cli)
if not item_urls:
item_urls = [u.strip() for u in normalize_urls(get_field(item, "url") or get_field(item, "source_url")) if str(u).strip()]
if not item_urls:
ctx.print_if_visible("[delete-url] Warning: Item has no url field; skipping", file=sys.stderr)
continue
batch.setdefault(store_text, []).append((normalized, item_urls))
for store_text, pairs in batch.items(): for store_text, pairs in batch.items():
try: try:
@@ -168,24 +180,39 @@ class Delete_Url(Cmdlet):
for h, ulist in bulk_pairs: for h, ulist in bulk_pairs:
backend.delete_url(h, ulist, config=config) backend.delete_url(h, ulist, config=config)
deleted_count = 0
for _h, ulist in bulk_pairs:
deleted_count += len(ulist or [])
ctx.print_if_visible( ctx.print_if_visible(
f"✓ delete-url: {len(urls)} url(s) for {len(bulk_pairs)} item(s) in '{store_text}'", f"✓ delete-url: {deleted_count} url(s) for {len(bulk_pairs)} item(s) in '{store_text}'",
file=sys.stderr, file=sys.stderr,
) )
for item in pass_through: for item in pass_through:
existing = get_field(item, "url") existing = get_field(item, "url")
_set_item_url(item, _remove_urls(existing, list(urls))) # In batch mode we removed the union of requested urls for the file.
# Using urls_from_cli (if present) matches the user's explicit intent; otherwise
# remove the piped url row(s).
remove_set = urls_from_cli
if not remove_set:
remove_set = [u.strip() for u in normalize_urls(get_field(item, "url") or get_field(item, "source_url")) if str(u).strip()]
_set_item_url(item, _remove_urls(existing, list(remove_set)))
ctx.emit(item) ctx.emit(item)
return 0 return 0
# Single-item mode # Single-item mode
if not urls_from_cli:
urls_from_cli = [u.strip() for u in normalize_urls(get_field(result, "url") or get_field(result, "source_url")) if str(u).strip()]
if not urls_from_cli:
log("Error: No URL provided")
return 1
backend = storage[str(store_name)] backend = storage[str(store_name)]
backend.delete_url(str(file_hash), urls, config=config) backend.delete_url(str(file_hash), list(urls_from_cli), config=config)
ctx.print_if_visible(f"✓ delete-url: {len(urls)} url(s) removed", file=sys.stderr) ctx.print_if_visible(f"✓ delete-url: {len(urls_from_cli)} url(s) removed", file=sys.stderr)
if result is not None: if result is not None:
existing = get_field(result, "url") existing = get_field(result, "url")
_set_item_url(result, _remove_urls(existing, list(urls))) _set_item_url(result, _remove_urls(existing, list(urls_from_cli)))
ctx.emit(result) ctx.emit(result)
return 0 return 0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -126,7 +126,7 @@ class Get_File(sh.Cmdlet):
except Exception as exc: except Exception as exc:
log(f"Error opening browser: {exc}", file=sys.stderr) log(f"Error opening browser: {exc}", file=sys.stderr)
else: else:
log(f"Opened in browser: {source_path}", file=sys.stderr) debug(f"Opened in browser: {source_path}", file=sys.stderr)
# Emit result for pipeline # Emit result for pipeline
ctx.emit({ ctx.emit({

View File

@@ -47,6 +47,210 @@ except ImportError:
extract_title = None extract_title = None
def _dedup_tags_preserve_order(tags: List[str]) -> List[str]:
"""Deduplicate tags case-insensitively while preserving order."""
out: List[str] = []
seen: set[str] = set()
for t in tags or []:
if not isinstance(t, str):
continue
s = t.strip()
if not s:
continue
key = s.lower()
if key in seen:
continue
seen.add(key)
out.append(s)
return out
def _extract_subtitle_tags(info: Dict[str, Any]) -> List[str]:
"""Extract subtitle availability tags from a yt-dlp info dict.
Produces multi-valued tags so languages can coexist:
- subs:<lang>
- subs_auto:<lang>
"""
def _langs(value: Any) -> List[str]:
if not isinstance(value, dict):
return []
langs: List[str] = []
for k in value.keys():
if not isinstance(k, str):
continue
lang = k.strip().lower()
if lang:
langs.append(lang)
return sorted(set(langs))
out: List[str] = []
for lang in _langs(info.get("subtitles")):
out.append(f"subs:{lang}")
for lang in _langs(info.get("automatic_captions")):
out.append(f"subs_auto:{lang}")
return out
def _scrape_ytdlp_info(url: str) -> Optional[Dict[str, Any]]:
"""Fetch a yt-dlp info dict without downloading media."""
if not isinstance(url, str) or not url.strip():
return None
url = url.strip()
# Prefer the Python module when available (faster, avoids shell quoting issues).
try:
import yt_dlp # type: ignore
opts: Any = {
"quiet": True,
"no_warnings": True,
"skip_download": True,
"noprogress": True,
"socket_timeout": 15,
"retries": 1,
"playlist_items": "1-10",
}
with yt_dlp.YoutubeDL(opts) as ydl:
info = ydl.extract_info(url, download=False)
return info if isinstance(info, dict) else None
except Exception:
pass
# Fallback to yt-dlp CLI if the module isn't available.
try:
import json as json_module
cmd = [
"yt-dlp",
"-J",
"--no-warnings",
"--skip-download",
"--playlist-items",
"1-10",
url,
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
if result.returncode != 0:
return None
payload = (result.stdout or "").strip()
if not payload:
return None
data = json_module.loads(payload)
return data if isinstance(data, dict) else None
except Exception:
return None
def _resolve_candidate_urls_for_item(
result: Any,
backend: Any,
file_hash: str,
config: Dict[str, Any],
) -> List[str]:
"""Get candidate URLs from backend and/or piped result."""
try:
from metadata import normalize_urls
except Exception:
normalize_urls = None # type: ignore[assignment]
urls: List[str] = []
# 1) Backend URL association (best source of truth)
try:
backend_urls = backend.get_url(file_hash, config=config)
if backend_urls:
if normalize_urls:
urls.extend(normalize_urls(backend_urls))
else:
urls.extend([str(u).strip() for u in backend_urls if isinstance(u, str) and str(u).strip()])
except Exception:
pass
# 2) Backend metadata url field
try:
meta = backend.get_metadata(file_hash, config=config)
if isinstance(meta, dict) and meta.get("url"):
if normalize_urls:
urls.extend(normalize_urls(meta.get("url")))
else:
raw = meta.get("url")
if isinstance(raw, list):
urls.extend([str(u).strip() for u in raw if isinstance(u, str) and str(u).strip()])
elif isinstance(raw, str) and raw.strip():
urls.append(raw.strip())
except Exception:
pass
# 3) Piped result fields
def _get(obj: Any, key: str, default: Any = None) -> Any:
if isinstance(obj, dict):
return obj.get(key, default)
return getattr(obj, key, default)
for key in ("url", "webpage_url", "source_url", "target"):
val = _get(result, key, None)
if not val:
continue
if normalize_urls:
urls.extend(normalize_urls(val))
continue
if isinstance(val, str) and val.strip():
urls.append(val.strip())
elif isinstance(val, list):
urls.extend([str(u).strip() for u in val if isinstance(u, str) and str(u).strip()])
meta_field = _get(result, "metadata", None)
if isinstance(meta_field, dict) and meta_field.get("url"):
val = meta_field.get("url")
if normalize_urls:
urls.extend(normalize_urls(val))
elif isinstance(val, list):
urls.extend([str(u).strip() for u in val if isinstance(u, str) and str(u).strip()])
elif isinstance(val, str) and val.strip():
urls.append(val.strip())
# Dedup
return _dedup_tags_preserve_order(urls)
def _pick_supported_ytdlp_url(urls: List[str]) -> Optional[str]:
"""Pick the first URL that looks supported by yt-dlp (best effort)."""
if not urls:
return None
def _is_hydrus_file_url(u: str) -> bool:
text = str(u or "").strip().lower()
if not text:
return False
# Hydrus-local file URLs are retrievable blobs, not original source pages.
# yt-dlp generally can't extract meaningful metadata from these.
return ("/get_files/file" in text) and ("hash=" in text)
http_urls: List[str] = []
for u in urls:
text = str(u or "").strip()
if text.lower().startswith(("http://", "https://")):
http_urls.append(text)
# Prefer non-Hydrus URLs for yt-dlp scraping.
candidates = [u for u in http_urls if not _is_hydrus_file_url(u)]
if not candidates:
return None
# Prefer a true support check when the Python module is available.
try:
from SYS.download import is_url_supported_by_ytdlp
for text in candidates:
try:
if is_url_supported_by_ytdlp(text):
return text
except Exception:
continue
except Exception:
pass
# Fallback: use the first non-Hydrus http(s) URL and let extraction decide.
return candidates[0] if candidates else None
_scrape_isbn_metadata = _ol_scrape_isbn_metadata # type: ignore[assignment] _scrape_isbn_metadata = _ol_scrape_isbn_metadata # type: ignore[assignment]
_scrape_openlibrary_metadata = _ol_scrape_openlibrary_metadata # type: ignore[assignment] _scrape_openlibrary_metadata = _ol_scrape_openlibrary_metadata # type: ignore[assignment]
@@ -853,7 +1057,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
scrape_url = parsed_args.get("scrape") scrape_url = parsed_args.get("scrape")
scrape_requested = scrape_flag_present or scrape_url is not None scrape_requested = scrape_flag_present or scrape_url is not None
if scrape_requested and (not scrape_url or str(scrape_url).strip() == ""): # Convenience: `-scrape` with no value defaults to `ytdlp` (store-backed URL scrape).
if scrape_flag_present and (scrape_url is None or str(scrape_url).strip() == ""):
scrape_url = "ytdlp"
scrape_requested = True
if scrape_requested and (scrape_url is None or str(scrape_url).strip() == ""):
log("-scrape requires a URL or provider name", file=sys.stderr) log("-scrape requires a URL or provider name", file=sys.stderr)
return 1 return 1
@@ -861,6 +1070,123 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if scrape_requested and scrape_url: if scrape_requested and scrape_url:
import json as json_module import json as json_module
if str(scrape_url).strip().lower() == "ytdlp":
# Scrape metadata from the selected item's URL via yt-dlp (no download),
# then OVERWRITE all existing tags (including title:).
#
# This mode requires a store-backed item (hash + store).
#
# NOTE: We intentionally do not reuse _scrape_url_metadata() here because it
# performs namespace deduplication that would collapse multi-valued tags.
file_hash = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash", None))
store_name = get_field(result, "store", None)
subject_path = get_field(result, "path", None) or get_field(result, "target", None) or get_field(result, "filename", None)
item_title = get_field(result, "title", None) or get_field(result, "name", None) or get_field(result, "filename", None)
# Only run overwrite-apply when the item is store-backed.
# If this is a URL-only PipeObject, fall through to provider mode below.
if file_hash and store_name and str(file_hash).strip().lower() != "unknown" and str(store_name).strip().upper() not in {"PATH", "URL"}:
try:
from Store import Store
storage = Store(config)
backend = storage[str(store_name)]
except Exception as exc:
log(f"Failed to resolve store backend '{store_name}': {exc}", file=sys.stderr)
return 1
candidate_urls = _resolve_candidate_urls_for_item(result, backend, file_hash, config)
scrape_target = _pick_supported_ytdlp_url(candidate_urls)
if not scrape_target:
log(
"No yt-dlp-supported source URL found for this item (Hydrus /get_files/file URLs are ignored). ",
file=sys.stderr,
)
log(
"Add the original page URL to the file (e.g. via add-url), then retry get-tag -scrape.",
file=sys.stderr,
)
return 1
info = _scrape_ytdlp_info(scrape_target)
if not info:
log("yt-dlp could not extract metadata for this URL (unsupported or failed)", file=sys.stderr)
return 1
try:
from metadata import extract_ytdlp_tags
except Exception:
extract_ytdlp_tags = None # type: ignore[assignment]
# Prefer the top-level metadata, but if this is a playlist container, use
# the first entry for per-item fields like subtitles.
info_for_subs = info
entries = info.get("entries") if isinstance(info, dict) else None
if isinstance(entries, list) and entries:
first = entries[0]
if isinstance(first, dict):
info_for_subs = first
tags: List[str] = []
if extract_ytdlp_tags:
try:
tags.extend(extract_ytdlp_tags(info))
except Exception:
pass
# Subtitle availability tags
try:
tags.extend(_extract_subtitle_tags(info_for_subs if isinstance(info_for_subs, dict) else {}))
except Exception:
pass
# Ensure we actually have something to apply.
tags = _dedup_tags_preserve_order(tags)
if not tags:
log("No tags extracted from yt-dlp metadata", file=sys.stderr)
return 1
# Full overwrite: delete all existing tags, then add the new set.
try:
existing_tags, _src = backend.get_tag(file_hash, config=config)
except Exception:
existing_tags = []
try:
if existing_tags:
backend.delete_tag(file_hash, list(existing_tags), config=config)
except Exception as exc:
debug(f"[get_tag] ytdlp overwrite: delete_tag failed: {exc}")
try:
backend.add_tag(file_hash, list(tags), config=config)
except Exception as exc:
log(f"Failed to apply yt-dlp tags: {exc}", file=sys.stderr)
return 1
# Show updated tags
try:
updated_tags, _src = backend.get_tag(file_hash, config=config)
except Exception:
updated_tags = tags
if not updated_tags:
updated_tags = tags
_emit_tags_as_table(
tags_list=list(updated_tags),
file_hash=file_hash,
store=str(store_name),
service_name=None,
config=config,
item_title=str(item_title or "ytdlp"),
path=str(subject_path) if subject_path else None,
subject={
"hash": file_hash,
"store": str(store_name),
"path": str(subject_path) if subject_path else None,
"title": item_title,
"extra": {"applied_provider": "ytdlp", "scrape_url": scrape_target},
},
)
return 0
if scrape_url.startswith("http://") or scrape_url.startswith("https://"): if scrape_url.startswith("http://") or scrape_url.startswith("https://"):
# URL scraping (existing behavior) # URL scraping (existing behavior)
title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url) title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url)
@@ -951,7 +1277,16 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
else: else:
combined_query = f"{title_hint} {artist_hint}" combined_query = f"{title_hint} {artist_hint}"
query_hint = identifier_query or combined_query or title_hint # yt-dlp isn't a search provider; it requires a URL.
url_hint: Optional[str] = None
if provider.name == "ytdlp":
raw_url = get_field(result, "url", None) or get_field(result, "source_url", None) or get_field(result, "target", None)
if isinstance(raw_url, list) and raw_url:
raw_url = raw_url[0]
if isinstance(raw_url, str) and raw_url.strip().startswith(("http://", "https://")):
url_hint = raw_url.strip()
query_hint = url_hint or identifier_query or combined_query or title_hint
if not query_hint: if not query_hint:
log("No title or identifier available to search for metadata", file=sys.stderr) log("No title or identifier available to search for metadata", file=sys.stderr)
return 1 return 1
@@ -967,6 +1302,27 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if not items: if not items:
log("No metadata results found", file=sys.stderr) log("No metadata results found", file=sys.stderr)
return 1 return 1
# For yt-dlp, emit tags directly (there is no meaningful multi-result selection step).
if provider.name == "ytdlp":
try:
tags = [str(t) for t in provider.to_tags(items[0]) if t is not None]
except Exception:
tags = []
if not tags:
log("No tags extracted from yt-dlp metadata", file=sys.stderr)
return 1
_emit_tags_as_table(
tags_list=list(tags),
file_hash=None,
store="url",
service_name=None,
config=config,
item_title=str(items[0].get("title") or "ytdlp"),
path=None,
subject={"provider": "ytdlp", "url": str(query_hint)},
)
return 0
from result_table import ResultTable from result_table import ResultTable
table = ResultTable(f"Metadata: {provider.name}") table = ResultTable(f"Metadata: {provider.name}")
@@ -1040,7 +1396,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 0 return 0
# Apply tags to the store backend (no sidecar writing here). # Apply tags to the store backend (no sidecar writing here).
apply_tags = _filter_scraped_tags([str(t) for t in result_tags if t is not None]) if str(result_provider).strip().lower() == "ytdlp":
apply_tags = [str(t) for t in result_tags if t is not None]
else:
apply_tags = _filter_scraped_tags([str(t) for t in result_tags if t is not None])
if not apply_tags: if not apply_tags:
log("No applicable scraped tags to apply (title:/artist:/source: are skipped)", file=sys.stderr) log("No applicable scraped tags to apply (title:/artist:/source: are skipped)", file=sys.stderr)
return 0 return 0
@@ -1167,6 +1526,11 @@ try:
except Exception: except Exception:
_SCRAPE_CHOICES = ["itunes", "openlibrary", "googlebooks", "google", "musicbrainz"] _SCRAPE_CHOICES = ["itunes", "openlibrary", "googlebooks", "google", "musicbrainz"]
# Special scrape mode: pull tags from an item's URL via yt-dlp (no download)
if "ytdlp" not in _SCRAPE_CHOICES:
_SCRAPE_CHOICES.append("ytdlp")
_SCRAPE_CHOICES = sorted(_SCRAPE_CHOICES)
class Get_Tag(Cmdlet): class Get_Tag(Cmdlet):
"""Class-based get-tag cmdlet with self-registration.""" """Class-based get-tag cmdlet with self-registration."""
@@ -1195,7 +1559,7 @@ class Get_Tag(Cmdlet):
CmdletArg( CmdletArg(
name="-scrape", name="-scrape",
type="string", type="string",
description="Scrape metadata from URL or provider name (returns tags as JSON or table)", description="Scrape metadata from URL/provider, or use 'ytdlp' to scrape from the item's URL and overwrite tags",
required=False, required=False,
choices=_SCRAPE_CHOICES, choices=_SCRAPE_CHOICES,
) )

View File

@@ -14,10 +14,11 @@ import httpx
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Tuple from typing import Any, Dict, List, Optional, Sequence, Tuple
from urllib.parse import urlsplit, quote, urljoin from urllib.parse import urlsplit, quote, urljoin, unquote
from SYS.logger import log, debug from SYS.logger import log, debug
from API.HTTP import HTTPClient from API.HTTP import HTTPClient
from SYS.pipeline_progress import PipelineProgress
from SYS.utils import ensure_directory, unique_path, unique_preserve_order from SYS.utils import ensure_directory, unique_path, unique_preserve_order
from . import _shared as sh from . import _shared as sh
@@ -31,54 +32,6 @@ get_field = sh.get_field
parse_cmdlet_args = sh.parse_cmdlet_args parse_cmdlet_args = sh.parse_cmdlet_args
import pipeline as pipeline_context import pipeline as pipeline_context
def _live_ui_and_pipe_index() -> tuple[Optional[Any], int]:
ui = None
try:
ui = pipeline_context.get_live_progress() if hasattr(pipeline_context, "get_live_progress") else None
except Exception:
ui = None
pipe_idx: int = 0
try:
stage_ctx = pipeline_context.get_stage_context() if hasattr(pipeline_context, "get_stage_context") else None
maybe_idx = getattr(stage_ctx, "pipe_index", None) if stage_ctx is not None else None
if isinstance(maybe_idx, int):
pipe_idx = int(maybe_idx)
except Exception:
pipe_idx = 0
return ui, pipe_idx
def _begin_live_steps(total_steps: int) -> None:
"""Declare the total number of steps for this cmdlet run (per-pipe)."""
ui, pipe_idx = _live_ui_and_pipe_index()
if ui is None:
return
try:
begin = getattr(ui, "begin_pipe_steps", None)
if callable(begin):
begin(int(pipe_idx), total_steps=int(total_steps))
except Exception:
return
def _step(text: str) -> None:
"""Emit a *new* step.
Each call increments the step counter and advances percent automatically.
"""
ui, pipe_idx = _live_ui_and_pipe_index()
if ui is None:
return
try:
adv = getattr(ui, "advance_pipe_step", None)
if callable(adv):
adv(int(pipe_idx), str(text))
except Exception:
return
# ============================================================================ # ============================================================================
# CMDLET Metadata Declaration # CMDLET Metadata Declaration
# ============================================================================ # ============================================================================
@@ -115,6 +68,10 @@ USER_AGENT = (
DEFAULT_VIEWPORT: dict[str, int] = {"width": 1920, "height": 1080} DEFAULT_VIEWPORT: dict[str, int] = {"width": 1920, "height": 1080}
ARCHIVE_TIMEOUT = 30.0 ARCHIVE_TIMEOUT = 30.0
# WebP has a hard maximum dimension per side.
# Pillow typically fails with: "encoding error 5: Image size exceeds WebP limit of 16383 pixels"
WEBP_MAX_DIM = 16_383
# Configurable selectors for specific websites # Configurable selectors for specific websites
SITE_SELECTORS: Dict[str, List[str]] = { SITE_SELECTORS: Dict[str, List[str]] = {
"twitter.com": [ "twitter.com": [
@@ -200,6 +157,80 @@ def _slugify_url(url: str) -> str:
return slug[:100] return slug[:100]
def _tags_from_url(url: str) -> List[str]:
"""Derive simple tags from a URL.
- site:<domain> (strips leading www.)
- title:<slug> derived from the last path segment, with extension removed
and separators (-, _, %) normalized to spaces.
"""
u = str(url or "").strip()
if not u:
return []
parsed = None
try:
parsed = urlsplit(u)
host = str(getattr(parsed, "hostname", None) or getattr(parsed, "netloc", "") or "").strip().lower()
except Exception:
parsed = None
host = ""
if host:
# Drop credentials and port if present.
if "@" in host:
host = host.rsplit("@", 1)[-1]
if ":" in host:
host = host.split(":", 1)[0]
if host.startswith("www."):
host = host[len("www.") :]
path = ""
if parsed is not None:
try:
path = str(getattr(parsed, "path", "") or "")
except Exception:
path = ""
last = ""
if path:
try:
last = path.rsplit("/", 1)[-1]
except Exception:
last = ""
try:
last = unquote(last or "")
except Exception:
last = last or ""
if last and "." in last:
# Drop a single trailing extension (e.g. .html, .php).
last = last.rsplit(".", 1)[0]
for sep in ("_", "-", "%"):
if last and sep in last:
last = last.replace(sep, " ")
title = " ".join(str(last or "").split()).strip().lower()
tags: List[str] = []
if host:
tags.append(f"site:{host}")
if title:
tags.append(f"title:{title}")
return tags
def _title_from_url(url: str) -> str:
"""Return the normalized title derived from a URL's last path segment."""
for t in _tags_from_url(url):
if str(t).lower().startswith("title:"):
return str(t)[len("title:") :].strip()
return ""
def _normalise_format(fmt: Optional[str]) -> str: def _normalise_format(fmt: Optional[str]) -> str:
"""Normalize output format to valid values.""" """Normalize output format to valid values."""
if not fmt: if not fmt:
@@ -218,6 +249,89 @@ def _format_suffix(fmt: str) -> str:
return ".jpg" return ".jpg"
return f".{fmt}" return f".{fmt}"
def _convert_to_webp(
src_png: Path,
dst_webp: Path,
*,
quality: int = 90,
method: int = 6,
max_dim: int = WEBP_MAX_DIM,
downscale_if_oversize: bool = True,
) -> bool:
"""Convert a PNG screenshot to WebP via Pillow.
Playwright does not currently support emitting WebP directly.
"""
if not src_png or not Path(src_png).is_file():
raise ScreenshotError(f"Source image not found: {src_png}")
dst_webp = Path(dst_webp)
try:
dst_webp.parent.mkdir(parents=True, exist_ok=True)
except Exception:
pass
try:
from PIL import Image
except Exception as exc:
raise ScreenshotError(f"Pillow is required for webp conversion: {exc}") from exc
# Write atomically to avoid partial files if conversion is interrupted.
tmp_path = unique_path(dst_webp.with_suffix(".tmp.webp"))
try:
with Image.open(src_png) as im:
did_downscale = False
save_kwargs: Dict[str, Any] = {
"format": "WEBP",
"quality": int(quality),
"method": int(method),
}
# Preserve alpha when present; Pillow handles it for WEBP.
# Normalize palette images to RGBA to avoid odd palette artifacts.
if im.mode == "P":
im = im.convert("RGBA")
# WebP enforces a hard max dimension per side (16383px).
# When full-page captures are very tall, downscale proportionally to fit.
try:
w, h = im.size
except Exception:
w, h = 0, 0
if downscale_if_oversize and isinstance(max_dim, int) and max_dim > 0 and (w > max_dim or h > max_dim):
scale = 1.0
try:
scale = min(float(max_dim) / float(w), float(max_dim) / float(h))
except Exception:
scale = 1.0
if scale > 0.0 and scale < 1.0:
new_w = max(1, int(w * scale))
new_h = max(1, int(h * scale))
debug(
f"[_convert_to_webp] Image exceeds WebP limit ({w}x{h}); downscaling -> {new_w}x{new_h}"
)
try:
resample = getattr(getattr(Image, "Resampling", Image), "LANCZOS", None)
if resample is None:
resample = getattr(Image, "LANCZOS", 1)
im = im.resize((new_w, new_h), resample=resample)
did_downscale = True
except Exception as exc:
debug(f"[_convert_to_webp] Downscale failed; attempting direct WEBP save anyway: {exc}")
im.save(tmp_path, **save_kwargs)
tmp_path.replace(dst_webp)
return bool(did_downscale)
finally:
try:
tmp_path.unlink(missing_ok=True)
except Exception:
pass
def _matched_site_selectors(url: str) -> List[str]: def _matched_site_selectors(url: str) -> List[str]:
"""Return SITE_SELECTORS for a matched domain; empty if no match. """Return SITE_SELECTORS for a matched domain; empty if no match.
@@ -231,6 +345,16 @@ def _matched_site_selectors(url: str) -> List[str]:
return sels return sels
def _selectors_for_url(url: str) -> List[str]:
"""Return selectors to try for a URL.
For now, prefer a minimal behavior: only return known SITE_SELECTORS.
(The cmdlet already falls back to full-page capture when no selectors match.)
"""
return _matched_site_selectors(url)
def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000) -> None: def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000) -> None:
"""Best-effort page tweaks for popular platforms before capture.""" """Best-effort page tweaks for popular platforms before capture."""
try: try:
@@ -366,11 +490,11 @@ def _prepare_output_path(options: ScreenshotOptions) -> Path:
return unique_path(path) return unique_path(path)
def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str]) -> None: def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str], progress: PipelineProgress) -> None:
"""Capture screenshot using Playwright.""" """Capture screenshot using Playwright."""
debug(f"[_capture] Starting capture for {options.url} -> {destination}") debug(f"[_capture] Starting capture for {options.url} -> {destination}")
try: try:
_step("loading launching browser") progress.step("loading launching browser")
tool = options.playwright_tool or PlaywrightTool({}) tool = options.playwright_tool or PlaywrightTool({})
# Ensure Chromium engine is used for the screen-shot cmdlet (force for consistency) # Ensure Chromium engine is used for the screen-shot cmdlet (force for consistency)
@@ -405,16 +529,16 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
try: try:
with tool.open_page(headless=headless) as page: with tool.open_page(headless=headless) as page:
_step("loading navigating") progress.step("loading navigating")
debug(f"Navigating to {options.url}...") debug(f"Navigating to {options.url}...")
try: try:
tool.goto(page, options.url) tool.goto(page, options.url)
debug("Page loaded successfully") debug("Page loaded successfully")
_step("loading page loaded") progress.step("loading page loaded")
except PlaywrightTimeoutError: except PlaywrightTimeoutError:
warnings.append("navigation timeout; capturing current page state") warnings.append("navigation timeout; capturing current page state")
debug("Navigation timeout; proceeding with current state") debug("Navigation timeout; proceeding with current state")
_step("loading navigation timeout") progress.step("loading navigation timeout")
# Skip article lookup by default (wait_for_article defaults to False) # Skip article lookup by default (wait_for_article defaults to False)
if options.wait_for_article: if options.wait_for_article:
@@ -430,9 +554,9 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
debug(f"Waiting {options.wait_after_load}s for page stabilization...") debug(f"Waiting {options.wait_after_load}s for page stabilization...")
time.sleep(min(10.0, max(0.0, options.wait_after_load))) time.sleep(min(10.0, max(0.0, options.wait_after_load)))
_step("loading stabilized") progress.step("loading stabilized")
_step("capturing preparing") progress.step("capturing preparing")
if options.replace_video_posters: if options.replace_video_posters:
debug("Replacing video elements with posters...") debug("Replacing video elements with posters...")
page.evaluate( page.evaluate(
@@ -453,7 +577,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
if options.prefer_platform_target and format_name != "pdf": if options.prefer_platform_target and format_name != "pdf":
debug(f"[_capture] Target capture enabled") debug(f"[_capture] Target capture enabled")
debug("Attempting platform-specific content capture...") debug("Attempting platform-specific content capture...")
_step("capturing locating target") progress.step("capturing locating target")
try: try:
_platform_preprocess(options.url, page, warnings) _platform_preprocess(options.url, page, warnings)
except Exception as e: except Exception as e:
@@ -478,7 +602,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
el.scroll_into_view_if_needed(timeout=1000) el.scroll_into_view_if_needed(timeout=1000)
except Exception: except Exception:
pass pass
_step("capturing output") progress.step("capturing output")
debug(f"Capturing element to {destination}...") debug(f"Capturing element to {destination}...")
el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None)) el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
element_captured = True element_captured = True
@@ -489,14 +613,14 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
debug(f"Failed to capture element: {exc}") debug(f"Failed to capture element: {exc}")
# Fallback to default capture paths # Fallback to default capture paths
if element_captured: if element_captured:
_step("capturing saved") progress.step("capturing saved")
elif format_name == "pdf": elif format_name == "pdf":
debug("Generating PDF...") debug("Generating PDF...")
page.emulate_media(media="print") page.emulate_media(media="print")
_step("capturing output") progress.step("capturing output")
page.pdf(path=str(destination), print_background=True) page.pdf(path=str(destination), print_background=True)
debug(f"PDF saved to {destination}") debug(f"PDF saved to {destination}")
_step("capturing saved") progress.step("capturing saved")
else: else:
debug(f"Capturing full page to {destination}...") debug(f"Capturing full page to {destination}...")
screenshot_kwargs: Dict[str, Any] = {"path": str(destination)} screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
@@ -504,20 +628,20 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
screenshot_kwargs["type"] = "jpeg" screenshot_kwargs["type"] = "jpeg"
screenshot_kwargs["quality"] = 90 screenshot_kwargs["quality"] = 90
if options.full_page: if options.full_page:
_step("capturing output") progress.step("capturing output")
page.screenshot(full_page=True, **screenshot_kwargs) page.screenshot(full_page=True, **screenshot_kwargs)
else: else:
article = page.query_selector("article") article = page.query_selector("article")
if article is not None: if article is not None:
article_kwargs = dict(screenshot_kwargs) article_kwargs = dict(screenshot_kwargs)
article_kwargs.pop("full_page", None) article_kwargs.pop("full_page", None)
_step("capturing output") progress.step("capturing output")
article.screenshot(**article_kwargs) article.screenshot(**article_kwargs)
else: else:
_step("capturing output") progress.step("capturing output")
page.screenshot(**screenshot_kwargs) page.screenshot(**screenshot_kwargs)
debug(f"Screenshot saved to {destination}") debug(f"Screenshot saved to {destination}")
_step("capturing saved") progress.step("capturing saved")
except Exception as exc: except Exception as exc:
debug(f"[_capture] Exception launching browser/page: {exc}") debug(f"[_capture] Exception launching browser/page: {exc}")
msg = str(exc).lower() msg = str(exc).lower()
@@ -532,7 +656,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc
def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult: def _capture_screenshot(options: ScreenshotOptions, progress: PipelineProgress) -> ScreenshotResult:
"""Capture a screenshot for the given options.""" """Capture a screenshot for the given options."""
debug(f"[_capture_screenshot] Preparing capture for {options.url}") debug(f"[_capture_screenshot] Preparing capture for {options.url}")
requested_format = _normalise_format(options.output_format) requested_format = _normalise_format(options.output_format)
@@ -543,8 +667,8 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
will_convert = requested_format == "webp" will_convert = requested_format == "webp"
will_archive = bool(options.archive and options.url) will_archive = bool(options.archive and options.url)
total_steps = 9 + (1 if will_target else 0) + (1 if will_convert else 0) + (1 if will_archive else 0) total_steps = 9 + (1 if will_target else 0) + (1 if will_convert else 0) + (1 if will_archive else 0)
_begin_live_steps(total_steps) progress.begin_steps(total_steps)
_step("loading starting") progress.step("loading starting")
# Playwright screenshots do not natively support WebP output. # Playwright screenshots do not natively support WebP output.
# Capture as PNG, then convert via Pillow. # Capture as PNG, then convert via Pillow.
@@ -553,17 +677,22 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
capture_path = unique_path(destination.with_suffix(".png")) capture_path = unique_path(destination.with_suffix(".png"))
debug(f"[_capture_screenshot] Requested webp; capturing intermediate png -> {capture_path}") debug(f"[_capture_screenshot] Requested webp; capturing intermediate png -> {capture_path}")
options.output_format = "png" options.output_format = "png"
_capture(options, capture_path, warnings) _capture(options, capture_path, warnings, progress)
if requested_format == "webp": if requested_format == "webp":
_step("capturing converting to webp") progress.step("capturing converting to webp")
debug(f"[_capture_screenshot] Converting png -> webp: {destination}") debug(f"[_capture_screenshot] Converting png -> webp: {destination}")
try: try:
_convert_to_webp(capture_path, destination) did_downscale = _convert_to_webp(capture_path, destination)
try: if did_downscale:
capture_path.unlink(missing_ok=True) warnings.append(
except Exception: f"webp conversion used downscaling to fit {WEBP_MAX_DIM}px limit; keeping original png: {capture_path.name}"
pass )
else:
try:
capture_path.unlink(missing_ok=True)
except Exception:
pass
except Exception as exc: except Exception as exc:
warnings.append(f"webp conversion failed; keeping png: {exc}") warnings.append(f"webp conversion failed; keeping png: {exc}")
destination = capture_path destination = capture_path
@@ -572,7 +701,7 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
url: List[str] = [options.url] if options.url else [] url: List[str] = [options.url] if options.url else []
archive_url: List[str] = [] archive_url: List[str] = []
if options.archive and options.url: if options.archive and options.url:
_step("capturing archiving") progress.step("capturing archiving")
debug(f"[_capture_screenshot] Archiving enabled for {options.url}") debug(f"[_capture_screenshot] Archiving enabled for {options.url}")
archives, archive_warnings = _archive_url(options.url, options.archive_timeout) archives, archive_warnings = _archive_url(options.url, options.archive_timeout)
archive_url.extend(archives) archive_url.extend(archives)
@@ -580,7 +709,7 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
if archives: if archives:
url = unique_preserve_order([*url, *archives]) url = unique_preserve_order([*url, *archives])
_step("capturing finalized") progress.step("capturing finalized")
applied_tag = unique_preserve_order(list(tag for tag in options.tag if tag.strip())) applied_tag = unique_preserve_order(list(tag for tag in options.tag if tag.strip()))
@@ -627,6 +756,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
) )
return 1 return 1
progress = PipelineProgress(pipeline_context)
# ======================================================================== # ========================================================================
# ARGUMENT PARSING # ARGUMENT PARSING
# ======================================================================== # ========================================================================
@@ -685,32 +816,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
debug(f"[_run] url to process: {[u for u, _ in url_to_process]}") debug(f"[_run] url to process: {[u for u, _ in url_to_process]}")
# If the caller isn't running the shared pipeline Live progress UI (e.g. direct
# cmdlet execution), start a minimal local pipeline progress panel so this cmdlet
# still shows step-level progress.
local_progress_ui = None
try:
existing_ui = pipeline_context.get_live_progress() if hasattr(pipeline_context, "get_live_progress") else None
except Exception:
existing_ui = None
try:
if existing_ui is None and bool(getattr(sys.stderr, "isatty", lambda: False)()):
from models import PipelineLiveProgress
local_progress_ui = PipelineLiveProgress(["screen-shot"], enabled=True)
local_progress_ui.start()
try:
if hasattr(pipeline_context, "set_live_progress"):
pipeline_context.set_live_progress(local_progress_ui)
except Exception:
pass
try:
local_progress_ui.begin_pipe(0, total_items=len(url_to_process), items_preview=[u for u, _ in url_to_process])
except Exception:
pass
except Exception:
local_progress_ui = None
# ======================================================================== # ========================================================================
# OUTPUT DIRECTORY RESOLUTION - Priority chain # OUTPUT DIRECTORY RESOLUTION - Priority chain
# ======================================================================== # ========================================================================
@@ -749,6 +854,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
ensure_directory(screenshot_dir) ensure_directory(screenshot_dir)
# If the caller isn't running the shared pipeline Live progress UI (e.g. direct
# cmdlet execution), start a minimal local pipeline progress panel so this cmdlet
# still shows step-level progress.
try:
progress.ensure_local_ui(
label="screen-shot",
total_items=len(url_to_process),
items_preview=[u for u, _ in url_to_process],
)
except Exception:
pass
# ======================================================================== # ========================================================================
# PREPARE SCREENSHOT OPTIONS # PREPARE SCREENSHOT OPTIONS
# ======================================================================== # ========================================================================
@@ -850,7 +967,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
options.target_selectors = auto_selectors options.target_selectors = auto_selectors
debug(f"[screen_shot] Auto selectors matched for url: {auto_selectors}") debug(f"[screen_shot] Auto selectors matched for url: {auto_selectors}")
screenshot_result = _capture_screenshot(options) screenshot_result = _capture_screenshot(options, progress)
# Log results and warnings # Log results and warnings
debug(f"Screenshot captured to {screenshot_result.path}") debug(f"Screenshot captured to {screenshot_result.path}")
@@ -875,15 +992,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
capture_date = datetime.now().date().isoformat() capture_date = datetime.now().date().isoformat()
upstream_title = _clean_title(_extract_item_title(origin_item)) upstream_title = _clean_title(_extract_item_title(origin_item))
display_title = upstream_title or url url_title = _title_from_url(url)
display_title = upstream_title or url_title or url
upstream_tags = _extract_item_tags(origin_item) upstream_tags = _extract_item_tags(origin_item)
filtered_upstream_tags = [ filtered_upstream_tags = [
t for t in upstream_tags t for t in upstream_tags
if not str(t).strip().lower().startswith(("type:", "date:")) if not str(t).strip().lower().startswith(("type:", "date:"))
] ]
url_tags = _tags_from_url(url)
merged_tags = unique_preserve_order( merged_tags = unique_preserve_order(
["type:screenshot", f"date:{capture_date}"] + filtered_upstream_tags ["type:screenshot", f"date:{capture_date}"] + filtered_upstream_tags + url_tags
) )
pipe_obj = create_pipe_object_result( pipe_obj = create_pipe_object_result(
@@ -910,11 +1030,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
all_emitted.append(pipe_obj) all_emitted.append(pipe_obj)
# If we created a local progress UI, advance it per completed item. # If we created a local progress UI, advance it per completed item.
if local_progress_ui is not None: progress.on_emit(pipe_obj)
try:
local_progress_ui.on_emit(0, pipe_obj)
except Exception:
pass
except ScreenshotError as exc: except ScreenshotError as exc:
log(f"Error taking screenshot of {url}: {exc}", file=sys.stderr) log(f"Error taking screenshot of {url}: {exc}", file=sys.stderr)
@@ -925,23 +1041,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
traceback.print_exc(file=sys.stderr) traceback.print_exc(file=sys.stderr)
exit_code = 1 exit_code = 1
try: progress.close_local_ui(force_complete=True)
if local_progress_ui is not None:
try:
local_progress_ui.finish_pipe(0, force_complete=True)
except Exception:
pass
finally:
if local_progress_ui is not None:
try:
local_progress_ui.stop()
except Exception:
pass
try:
if hasattr(pipeline_context, "set_live_progress"):
pipeline_context.set_live_progress(None)
except Exception:
pass
if not all_emitted: if not all_emitted:
log(f"No screenshots were successfully captured", file=sys.stderr) log(f"No screenshots were successfully captured", file=sys.stderr)

View File

@@ -336,6 +336,18 @@ def _resolve_upload_path(item: Any, config: Dict[str, Any]) -> Optional[str]:
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Internal stage: send previously selected items to selected rooms. # Internal stage: send previously selected items to selected rooms.
if any(str(a).lower() == "-send" for a in (args or [])): if any(str(a).lower() == "-send" for a in (args or [])):
# Ensure we don't re-print the rooms picker table on the send stage.
try:
if hasattr(ctx, "set_last_result_table_overlay"):
ctx.set_last_result_table_overlay(None, None, None)
except Exception:
pass
try:
if hasattr(ctx, "set_current_stage_table"):
ctx.set_current_stage_table(None)
except Exception:
pass
rooms = _normalize_to_list(result) rooms = _normalize_to_list(result)
room_ids: List[str] = [] room_ids: List[str] = []
for r in rooms: for r in rooms:
@@ -430,7 +442,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log("No joined rooms found.", file=sys.stderr) log("No joined rooms found.", file=sys.stderr)
return 0 return 0
table = ResultTable("Matrix Rooms") table = ResultTable("Matrix Rooms (select with @N)")
table.set_table("matrix") table.set_table("matrix")
table.set_source_command(".matrix", []) table.set_source_command(".matrix", [])
@@ -461,12 +473,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
ctx.set_last_result_table_overlay(table, room_items) ctx.set_last_result_table_overlay(table, room_items)
ctx.set_current_stage_table(table) ctx.set_current_stage_table(table)
ctx.set_pending_pipeline_tail([[".matrix", "-send"]], ".matrix") ctx.set_pending_pipeline_tail([[".matrix", "-send"]], ".matrix")
print()
from rich_display import stdout_console
stdout_console().print(table)
print("\nSelect room(s) with @N (e.g. @1 or @1-3) to send the selected item(s)")
return 0 return 0
CMDLET = Cmdlet( CMDLET = Cmdlet(

View File

@@ -1,6 +1,6 @@
# Medios-Macina # Medios-Macina
Medios-Macina is a CLI-first media ingestion and management toolkit focused on reliably downloading, tagging, and storing media (audio, video, images, and text) from a variety of providers and sources. It is designed around a compact, pipeable command language ("cmdlets") so complex workflows can be composed simply and repeatably. Medios-Macina is a CLI media manager and toolkit focused on downloading, tagging, and media storage (audio, video, images, and text) from a variety of providers and sources. It is designed around a compact, pipeable command language ("cmdlets") so complex workflows can be composed simply and repeatably.
## Highlights ✅ ## Highlights ✅
- Flexible pipeline-based CLI: chain cmdlets with `|` and use saved selections with `@N`. - Flexible pipeline-based CLI: chain cmdlets with `|` and use saved selections with `@N`.