dfd
This commit is contained in:
@@ -147,7 +147,11 @@ class HydrusNetwork:
|
||||
|
||||
file_size = file_path.stat().st_size
|
||||
headers["Content-Type"] = spec.content_type or "application/octet-stream"
|
||||
headers["Content-Length"] = str(file_size)
|
||||
# Do not set Content-Length when streaming an iterator body.
|
||||
# If the file size changes between stat() and read() (or the source is truncated),
|
||||
# h11 will raise: "Too little data for declared Content-Length".
|
||||
# Let httpx choose chunked transfer encoding for safety.
|
||||
headers.pop("Content-Length", None)
|
||||
|
||||
logger.debug(f"{self._log_prefix()} Uploading file {file_path.name} ({file_size} bytes)")
|
||||
|
||||
|
||||
875
CLI.py
875
CLI.py
@@ -1245,25 +1245,17 @@ class PipelineExecutor:
|
||||
stages.append(current)
|
||||
return stages
|
||||
|
||||
def execute_tokens(self, tokens: List[str]) -> None:
|
||||
from cmdlet import REGISTRY
|
||||
import pipeline as ctx
|
||||
|
||||
@staticmethod
|
||||
def _try_clear_pipeline_stop(ctx: Any) -> None:
|
||||
try:
|
||||
try:
|
||||
if hasattr(ctx, "clear_pipeline_stop"):
|
||||
ctx.clear_pipeline_stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
stages = self._split_stages(tokens)
|
||||
if not stages:
|
||||
print("Invalid pipeline syntax\n")
|
||||
return
|
||||
|
||||
pending_tail = ctx.get_pending_pipeline_tail() if hasattr(ctx, "get_pending_pipeline_tail") else []
|
||||
pending_source = ctx.get_pending_pipeline_source() if hasattr(ctx, "get_pending_pipeline_source") else None
|
||||
if hasattr(ctx, "clear_pipeline_stop"):
|
||||
ctx.clear_pipeline_stop()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def _maybe_seed_current_stage_table(ctx: Any) -> None:
|
||||
try:
|
||||
if hasattr(ctx, "get_current_stage_table") and not ctx.get_current_stage_table():
|
||||
display_table = ctx.get_display_table() if hasattr(ctx, "get_display_table") else None
|
||||
if display_table:
|
||||
@@ -1272,188 +1264,512 @@ class PipelineExecutor:
|
||||
last_table = ctx.get_last_result_table() if hasattr(ctx, "get_last_result_table") else None
|
||||
if last_table:
|
||||
ctx.set_current_stage_table(last_table)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def _maybe_apply_pending_pipeline_tail(ctx: Any, stages: List[List[str]]) -> List[List[str]]:
|
||||
try:
|
||||
pending_tail = ctx.get_pending_pipeline_tail() if hasattr(ctx, "get_pending_pipeline_tail") else []
|
||||
pending_source = ctx.get_pending_pipeline_source() if hasattr(ctx, "get_pending_pipeline_source") else None
|
||||
except Exception:
|
||||
pending_tail = []
|
||||
pending_source = None
|
||||
|
||||
try:
|
||||
current_source = (
|
||||
ctx.get_current_stage_table_source_command() if hasattr(ctx, "get_current_stage_table_source_command") else None
|
||||
ctx.get_current_stage_table_source_command()
|
||||
if hasattr(ctx, "get_current_stage_table_source_command")
|
||||
else None
|
||||
)
|
||||
except Exception:
|
||||
current_source = None
|
||||
|
||||
try:
|
||||
effective_source = current_source or (
|
||||
ctx.get_last_result_table_source_command() if hasattr(ctx, "get_last_result_table_source_command") else None
|
||||
ctx.get_last_result_table_source_command()
|
||||
if hasattr(ctx, "get_last_result_table_source_command")
|
||||
else None
|
||||
)
|
||||
selection_only = len(stages) == 1 and stages[0] and stages[0][0].startswith("@")
|
||||
if pending_tail and selection_only:
|
||||
if (pending_source is None) or (effective_source and pending_source == effective_source):
|
||||
stages.extend(pending_tail)
|
||||
except Exception:
|
||||
effective_source = current_source
|
||||
|
||||
selection_only = bool(len(stages) == 1 and stages[0] and stages[0][0].startswith("@"))
|
||||
if pending_tail and selection_only:
|
||||
if (pending_source is None) or (effective_source and pending_source == effective_source):
|
||||
stages = list(stages) + list(pending_tail)
|
||||
try:
|
||||
if hasattr(ctx, "clear_pending_pipeline_tail"):
|
||||
ctx.clear_pending_pipeline_tail()
|
||||
elif hasattr(ctx, "clear_pending_pipeline_tail"):
|
||||
ctx.clear_pending_pipeline_tail()
|
||||
|
||||
config = self._config_loader.load()
|
||||
if isinstance(config, dict):
|
||||
# This executor is used by both the REPL and the `pipeline` subcommand.
|
||||
# Quiet/background mode is helpful for detached/background runners, but
|
||||
# it suppresses interactive UX (like the pipeline Live progress UI).
|
||||
config["_quiet_background_output"] = bool(self._toolbar_output is None)
|
||||
|
||||
def _resolve_items_for_selection(table_obj, items_list):
|
||||
return items_list if items_list else []
|
||||
|
||||
def _maybe_run_class_selector(selected_items: list, *, stage_is_last: bool) -> bool:
|
||||
if not stage_is_last:
|
||||
return False
|
||||
|
||||
candidates: list[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
def _add(value) -> None:
|
||||
try:
|
||||
text = str(value or "").strip().lower()
|
||||
except Exception:
|
||||
return
|
||||
if not text or text in seen:
|
||||
return
|
||||
seen.add(text)
|
||||
candidates.append(text)
|
||||
|
||||
try:
|
||||
current_table = ctx.get_current_stage_table() or ctx.get_last_result_table()
|
||||
_add(current_table.table if current_table and hasattr(current_table, "table") else None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for item in selected_items or []:
|
||||
if isinstance(item, dict):
|
||||
_add(item.get("provider"))
|
||||
_add(item.get("store"))
|
||||
_add(item.get("table"))
|
||||
else:
|
||||
_add(getattr(item, "provider", None))
|
||||
_add(getattr(item, "store", None))
|
||||
_add(getattr(item, "table", None))
|
||||
|
||||
else:
|
||||
try:
|
||||
from ProviderCore.registry import get_provider, is_known_provider_name
|
||||
if hasattr(ctx, "clear_pending_pipeline_tail"):
|
||||
ctx.clear_pending_pipeline_tail()
|
||||
except Exception:
|
||||
get_provider = None # type: ignore
|
||||
is_known_provider_name = None # type: ignore
|
||||
pass
|
||||
return stages
|
||||
|
||||
if get_provider is not None:
|
||||
for key in candidates:
|
||||
try:
|
||||
if is_known_provider_name is not None and (not is_known_provider_name(key)):
|
||||
continue
|
||||
except Exception:
|
||||
# If the predicate fails for any reason, fall back to legacy behavior.
|
||||
pass
|
||||
try:
|
||||
provider = get_provider(key, config)
|
||||
except Exception:
|
||||
continue
|
||||
selector = getattr(provider, "selector", None)
|
||||
if selector is None:
|
||||
continue
|
||||
try:
|
||||
handled = bool(selector(selected_items, ctx=ctx, stage_is_last=True))
|
||||
except Exception as exc:
|
||||
print(f"{key} selector failed: {exc}\n")
|
||||
return True
|
||||
if handled:
|
||||
return True
|
||||
def _apply_quiet_background_flag(self, config: Any) -> Any:
|
||||
if isinstance(config, dict):
|
||||
# This executor is used by both the REPL and the `pipeline` subcommand.
|
||||
# Quiet/background mode is helpful for detached/background runners, but
|
||||
# it suppresses interactive UX (like the pipeline Live progress UI).
|
||||
config["_quiet_background_output"] = bool(self._toolbar_output is None)
|
||||
return config
|
||||
|
||||
store_keys: list[str] = []
|
||||
for item in selected_items or []:
|
||||
if isinstance(item, dict):
|
||||
v = item.get("store")
|
||||
else:
|
||||
v = getattr(item, "store", None)
|
||||
name = str(v or "").strip()
|
||||
if name:
|
||||
store_keys.append(name)
|
||||
@staticmethod
|
||||
def _extract_first_stage_selection_tokens(stages: List[List[str]]) -> tuple[List[List[str]], List[int], bool, bool]:
|
||||
first_stage_tokens = stages[0] if stages else []
|
||||
first_stage_selection_indices: List[int] = []
|
||||
first_stage_had_extra_args = False
|
||||
first_stage_select_all = False
|
||||
|
||||
if store_keys:
|
||||
if first_stage_tokens:
|
||||
new_first_stage: List[str] = []
|
||||
for token in first_stage_tokens:
|
||||
if token.startswith("@"): # selection
|
||||
selection = SelectionSyntax.parse(token)
|
||||
if selection is not None:
|
||||
first_stage_selection_indices = sorted([i - 1 for i in selection])
|
||||
continue
|
||||
if token == "@*":
|
||||
first_stage_select_all = True
|
||||
continue
|
||||
new_first_stage.append(token)
|
||||
|
||||
if new_first_stage:
|
||||
stages = list(stages)
|
||||
stages[0] = new_first_stage
|
||||
if first_stage_selection_indices or first_stage_select_all:
|
||||
first_stage_had_extra_args = True
|
||||
elif first_stage_selection_indices or first_stage_select_all:
|
||||
stages = list(stages)
|
||||
stages.pop(0)
|
||||
|
||||
return stages, first_stage_selection_indices, first_stage_had_extra_args, first_stage_select_all
|
||||
|
||||
@staticmethod
|
||||
def _apply_select_all_if_requested(ctx: Any, indices: List[int], select_all: bool) -> List[int]:
|
||||
if not select_all:
|
||||
return indices
|
||||
try:
|
||||
last_items = ctx.get_last_result_items()
|
||||
except Exception:
|
||||
last_items = None
|
||||
if last_items:
|
||||
return list(range(len(last_items)))
|
||||
return indices
|
||||
|
||||
@staticmethod
|
||||
def _maybe_run_class_selector(ctx: Any, config: Any, selected_items: list, *, stage_is_last: bool) -> bool:
|
||||
if not stage_is_last:
|
||||
return False
|
||||
|
||||
candidates: list[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
def _add(value) -> None:
|
||||
try:
|
||||
text = str(value or "").strip().lower()
|
||||
except Exception:
|
||||
return
|
||||
if not text or text in seen:
|
||||
return
|
||||
seen.add(text)
|
||||
candidates.append(text)
|
||||
|
||||
try:
|
||||
current_table = ctx.get_current_stage_table() or ctx.get_last_result_table()
|
||||
_add(current_table.table if current_table and hasattr(current_table, "table") else None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
for item in selected_items or []:
|
||||
if isinstance(item, dict):
|
||||
_add(item.get("provider"))
|
||||
_add(item.get("store"))
|
||||
_add(item.get("table"))
|
||||
else:
|
||||
_add(getattr(item, "provider", None))
|
||||
_add(getattr(item, "store", None))
|
||||
_add(getattr(item, "table", None))
|
||||
|
||||
try:
|
||||
from ProviderCore.registry import get_provider, is_known_provider_name
|
||||
except Exception:
|
||||
get_provider = None # type: ignore
|
||||
is_known_provider_name = None # type: ignore
|
||||
|
||||
if get_provider is not None:
|
||||
for key in candidates:
|
||||
try:
|
||||
if is_known_provider_name is not None and (not is_known_provider_name(key)):
|
||||
continue
|
||||
except Exception:
|
||||
# If the predicate fails for any reason, fall back to legacy behavior.
|
||||
pass
|
||||
try:
|
||||
provider = get_provider(key, config)
|
||||
except Exception:
|
||||
continue
|
||||
selector = getattr(provider, "selector", None)
|
||||
if selector is None:
|
||||
continue
|
||||
try:
|
||||
handled = bool(selector(selected_items, ctx=ctx, stage_is_last=True))
|
||||
except Exception as exc:
|
||||
print(f"{key} selector failed: {exc}\n")
|
||||
return True
|
||||
if handled:
|
||||
return True
|
||||
|
||||
store_keys: list[str] = []
|
||||
for item in selected_items or []:
|
||||
if isinstance(item, dict):
|
||||
v = item.get("store")
|
||||
else:
|
||||
v = getattr(item, "store", None)
|
||||
name = str(v or "").strip()
|
||||
if name:
|
||||
store_keys.append(name)
|
||||
|
||||
if store_keys:
|
||||
try:
|
||||
from Store.registry import Store as StoreRegistry
|
||||
|
||||
store_registry = StoreRegistry(config, suppress_debug=True)
|
||||
_backend_names = list(store_registry.list_backends() or [])
|
||||
_backend_by_lower = {str(n).lower(): str(n) for n in _backend_names if str(n).strip()}
|
||||
for name in store_keys:
|
||||
resolved_name = name
|
||||
if not store_registry.is_available(resolved_name):
|
||||
resolved_name = _backend_by_lower.get(str(name).lower(), name)
|
||||
if not store_registry.is_available(resolved_name):
|
||||
continue
|
||||
backend = store_registry[resolved_name]
|
||||
selector = getattr(backend, "selector", None)
|
||||
if selector is None:
|
||||
continue
|
||||
handled = bool(selector(selected_items, ctx=ctx, stage_is_last=True))
|
||||
if handled:
|
||||
return True
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return False
|
||||
|
||||
def _maybe_enable_background_notifier(self, worker_manager: Any, config: Any, pipeline_session: Any) -> None:
|
||||
if not (pipeline_session and worker_manager and isinstance(config, dict)):
|
||||
return
|
||||
|
||||
session_worker_ids = config.get("_session_worker_ids")
|
||||
if not session_worker_ids:
|
||||
return
|
||||
|
||||
try:
|
||||
output_fn = self._toolbar_output
|
||||
quiet_mode = bool(config.get("_quiet_background_output"))
|
||||
terminal_only = quiet_mode and not output_fn
|
||||
kwargs: Dict[str, Any] = {
|
||||
"session_worker_ids": session_worker_ids,
|
||||
"only_terminal_updates": terminal_only,
|
||||
"overlay_mode": bool(output_fn),
|
||||
}
|
||||
if output_fn:
|
||||
kwargs["output"] = output_fn
|
||||
ensure_background_notifier(worker_manager, **kwargs)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def _get_raw_stage_texts(ctx: Any) -> List[str]:
|
||||
raw_stage_texts: List[str] = []
|
||||
try:
|
||||
if hasattr(ctx, "get_current_command_stages"):
|
||||
raw_stage_texts = ctx.get_current_command_stages() or []
|
||||
except Exception:
|
||||
raw_stage_texts = []
|
||||
return raw_stage_texts
|
||||
|
||||
def _maybe_apply_initial_selection(
|
||||
self,
|
||||
ctx: Any,
|
||||
config: Any,
|
||||
stages: List[List[str]],
|
||||
*,
|
||||
selection_indices: List[int],
|
||||
first_stage_had_extra_args: bool,
|
||||
worker_manager: Any,
|
||||
pipeline_session: Any,
|
||||
) -> tuple[bool, Any]:
|
||||
if not selection_indices:
|
||||
return True, None
|
||||
|
||||
try:
|
||||
if not ctx.get_current_stage_table_source_command():
|
||||
display_table = ctx.get_display_table() if hasattr(ctx, "get_display_table") else None
|
||||
table_for_stage = display_table or ctx.get_last_result_table()
|
||||
if table_for_stage:
|
||||
ctx.set_current_stage_table(table_for_stage)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
source_cmd = None
|
||||
source_args_raw = None
|
||||
try:
|
||||
source_cmd = ctx.get_current_stage_table_source_command()
|
||||
source_args_raw = ctx.get_current_stage_table_source_args()
|
||||
except Exception:
|
||||
source_cmd = None
|
||||
source_args_raw = None
|
||||
|
||||
if isinstance(source_args_raw, str):
|
||||
source_args: List[str] = [source_args_raw]
|
||||
elif isinstance(source_args_raw, list):
|
||||
source_args = [str(x) for x in source_args_raw if x is not None]
|
||||
else:
|
||||
source_args = []
|
||||
|
||||
current_table = None
|
||||
try:
|
||||
current_table = ctx.get_current_stage_table()
|
||||
except Exception:
|
||||
current_table = None
|
||||
table_type = current_table.table if current_table and hasattr(current_table, "table") else None
|
||||
|
||||
command_expanded = False
|
||||
|
||||
if table_type in {"youtube", "soulseek"}:
|
||||
command_expanded = False
|
||||
elif source_cmd == "search-file" and source_args and "youtube" in source_args:
|
||||
command_expanded = False
|
||||
else:
|
||||
selected_row_args: List[str] = []
|
||||
skip_pipe_expansion = source_cmd == ".pipe" and len(stages) > 0
|
||||
if source_cmd and not skip_pipe_expansion:
|
||||
for idx in selection_indices:
|
||||
row_args = ctx.get_current_stage_table_row_selection_args(idx)
|
||||
if row_args:
|
||||
selected_row_args.extend(row_args)
|
||||
break
|
||||
|
||||
if selected_row_args:
|
||||
if isinstance(source_cmd, list):
|
||||
cmd_list: List[str] = [str(x) for x in source_cmd if x is not None]
|
||||
elif isinstance(source_cmd, str):
|
||||
cmd_list = [source_cmd]
|
||||
else:
|
||||
cmd_list = []
|
||||
|
||||
expanded_stage: List[str] = cmd_list + source_args + selected_row_args
|
||||
|
||||
if first_stage_had_extra_args and stages:
|
||||
expanded_stage += stages[0]
|
||||
stages[0] = expanded_stage
|
||||
else:
|
||||
stages.insert(0, expanded_stage)
|
||||
|
||||
if pipeline_session and worker_manager:
|
||||
try:
|
||||
from Store.registry import Store as StoreRegistry
|
||||
|
||||
store_registry = StoreRegistry(config, suppress_debug=True)
|
||||
_backend_names = list(store_registry.list_backends() or [])
|
||||
_backend_by_lower = {str(n).lower(): str(n) for n in _backend_names if str(n).strip()}
|
||||
for name in store_keys:
|
||||
resolved_name = name
|
||||
if not store_registry.is_available(resolved_name):
|
||||
resolved_name = _backend_by_lower.get(str(name).lower(), name)
|
||||
if not store_registry.is_available(resolved_name):
|
||||
continue
|
||||
backend = store_registry[resolved_name]
|
||||
selector = getattr(backend, "selector", None)
|
||||
if selector is None:
|
||||
continue
|
||||
handled = bool(selector(selected_items, ctx=ctx, stage_is_last=True))
|
||||
if handled:
|
||||
return True
|
||||
worker_manager.log_step(
|
||||
pipeline_session.worker_id,
|
||||
f"@N expansion: {source_cmd} + {' '.join(str(x) for x in selected_row_args)}",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return False
|
||||
selection_indices = []
|
||||
command_expanded = True
|
||||
|
||||
first_stage_tokens = stages[0] if stages else []
|
||||
first_stage_selection_indices: List[int] = []
|
||||
first_stage_had_extra_args = False
|
||||
first_stage_select_all = False
|
||||
if (not command_expanded) and selection_indices:
|
||||
last_piped_items = None
|
||||
try:
|
||||
last_piped_items = ctx.get_last_result_items()
|
||||
except Exception:
|
||||
last_piped_items = None
|
||||
|
||||
if first_stage_tokens:
|
||||
new_first_stage: List[str] = []
|
||||
for token in first_stage_tokens:
|
||||
if token.startswith("@"): # selection
|
||||
selection = SelectionSyntax.parse(token)
|
||||
if selection is not None:
|
||||
first_stage_selection_indices = sorted([i - 1 for i in selection])
|
||||
continue
|
||||
if token == "@*":
|
||||
first_stage_select_all = True
|
||||
continue
|
||||
new_first_stage.append(token)
|
||||
stage_table = None
|
||||
try:
|
||||
stage_table = ctx.get_current_stage_table()
|
||||
except Exception:
|
||||
stage_table = None
|
||||
if not stage_table and hasattr(ctx, "get_display_table"):
|
||||
try:
|
||||
stage_table = ctx.get_display_table()
|
||||
except Exception:
|
||||
stage_table = None
|
||||
if not stage_table:
|
||||
try:
|
||||
stage_table = ctx.get_last_result_table()
|
||||
except Exception:
|
||||
stage_table = None
|
||||
|
||||
if new_first_stage:
|
||||
stages[0] = new_first_stage
|
||||
if first_stage_selection_indices or first_stage_select_all:
|
||||
first_stage_had_extra_args = True
|
||||
elif first_stage_selection_indices or first_stage_select_all:
|
||||
stages.pop(0)
|
||||
resolved_items = last_piped_items if last_piped_items else []
|
||||
if last_piped_items:
|
||||
filtered = [resolved_items[i] for i in selection_indices if 0 <= i < len(resolved_items)]
|
||||
if not filtered:
|
||||
print("No items matched selection in pipeline\n")
|
||||
return False, None
|
||||
|
||||
if first_stage_select_all:
|
||||
last_items = ctx.get_last_result_items()
|
||||
if last_items:
|
||||
first_stage_selection_indices = list(range(len(last_items)))
|
||||
if PipelineExecutor._maybe_run_class_selector(ctx, config, filtered, stage_is_last=(not stages)):
|
||||
return False, None
|
||||
|
||||
from cmdlet._shared import coerce_to_pipe_object
|
||||
|
||||
filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered]
|
||||
piped_result = filtered_pipe_objs if len(filtered_pipe_objs) > 1 else filtered_pipe_objs[0]
|
||||
|
||||
if pipeline_session and worker_manager:
|
||||
try:
|
||||
selection_parts = [f"@{i+1}" for i in selection_indices]
|
||||
worker_manager.log_step(
|
||||
pipeline_session.worker_id,
|
||||
f"Applied @N selection {' | '.join(selection_parts)}",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Auto-insert downloader stages for provider tables.
|
||||
try:
|
||||
current_table = ctx.get_current_stage_table() or ctx.get_last_result_table()
|
||||
except Exception:
|
||||
current_table = None
|
||||
table_type = current_table.table if current_table and hasattr(current_table, "table") else None
|
||||
|
||||
if not stages:
|
||||
if table_type == "youtube":
|
||||
print("Auto-running YouTube selection via download-media")
|
||||
stages.append(["download-media"])
|
||||
elif table_type == "bandcamp":
|
||||
print("Auto-running Bandcamp selection via download-media")
|
||||
stages.append(["download-media"])
|
||||
elif table_type in {"soulseek", "openlibrary", "libgen"}:
|
||||
print("Auto-piping selection to download-file")
|
||||
stages.append(["download-file"])
|
||||
else:
|
||||
first_cmd = stages[0][0] if stages and stages[0] else None
|
||||
if table_type == "soulseek" and first_cmd not in (
|
||||
"download-file",
|
||||
"download-media",
|
||||
"download_media",
|
||||
".pipe",
|
||||
):
|
||||
debug("Auto-inserting download-file after Soulseek selection")
|
||||
stages.insert(0, ["download-file"])
|
||||
if table_type == "youtube" and first_cmd not in (
|
||||
"download-media",
|
||||
"download_media",
|
||||
"download-file",
|
||||
".pipe",
|
||||
):
|
||||
debug("Auto-inserting download-media after YouTube selection")
|
||||
stages.insert(0, ["download-media"])
|
||||
if table_type == "bandcamp" and first_cmd not in (
|
||||
"download-media",
|
||||
"download_media",
|
||||
"download-file",
|
||||
".pipe",
|
||||
):
|
||||
print("Auto-inserting download-media after Bandcamp selection")
|
||||
stages.insert(0, ["download-media"])
|
||||
if table_type == "libgen" and first_cmd not in (
|
||||
"download-file",
|
||||
"download-media",
|
||||
"download_media",
|
||||
".pipe",
|
||||
):
|
||||
print("Auto-inserting download-file after Libgen selection")
|
||||
stages.insert(0, ["download-file"])
|
||||
|
||||
return True, piped_result
|
||||
else:
|
||||
print("No previous results to select from\n")
|
||||
return False, None
|
||||
|
||||
return True, None
|
||||
|
||||
@staticmethod
|
||||
def _maybe_start_live_progress(config: Any, stages: List[List[str]]) -> tuple[Any, Dict[int, int]]:
|
||||
progress_ui = None
|
||||
pipe_index_by_stage: Dict[int, int] = {}
|
||||
|
||||
try:
|
||||
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
|
||||
except Exception:
|
||||
quiet_mode = False
|
||||
|
||||
try:
|
||||
import sys as _sys
|
||||
|
||||
if (not quiet_mode) and bool(getattr(_sys.stderr, "isatty", lambda: False)()):
|
||||
from models import PipelineLiveProgress
|
||||
|
||||
pipe_stage_indices: List[int] = []
|
||||
pipe_labels: List[str] = []
|
||||
for idx, stage_tokens in enumerate(stages):
|
||||
if not stage_tokens:
|
||||
continue
|
||||
name = str(stage_tokens[0]).replace("_", "-").lower()
|
||||
if name == "@" or name.startswith("@"):
|
||||
continue
|
||||
# `.pipe` (MPV) is an interactive launcher; disable pipeline Live progress
|
||||
# for it because it doesn't meaningfully "complete" (mpv may keep running)
|
||||
# and Live output interferes with MPV playlist UI.
|
||||
if name == ".pipe":
|
||||
continue
|
||||
# `.matrix` uses a two-phase picker (@N then .matrix -send). Pipeline Live
|
||||
# progress can linger across those phases and interfere with interactive output.
|
||||
if name == ".matrix":
|
||||
continue
|
||||
pipe_stage_indices.append(idx)
|
||||
pipe_labels.append(name)
|
||||
|
||||
if pipe_labels:
|
||||
progress_ui = PipelineLiveProgress(pipe_labels, enabled=True)
|
||||
progress_ui.start()
|
||||
try:
|
||||
import pipeline as _pipeline_ctx
|
||||
if hasattr(_pipeline_ctx, "set_live_progress"):
|
||||
_pipeline_ctx.set_live_progress(progress_ui)
|
||||
except Exception:
|
||||
pass
|
||||
pipe_index_by_stage = {stage_idx: pipe_idx for pipe_idx, stage_idx in enumerate(pipe_stage_indices)}
|
||||
except Exception:
|
||||
progress_ui = None
|
||||
pipe_index_by_stage = {}
|
||||
|
||||
return progress_ui, pipe_index_by_stage
|
||||
|
||||
def execute_tokens(self, tokens: List[str]) -> None:
|
||||
from cmdlet import REGISTRY
|
||||
import pipeline as ctx
|
||||
|
||||
try:
|
||||
self._try_clear_pipeline_stop(ctx)
|
||||
stages = self._split_stages(tokens)
|
||||
if not stages:
|
||||
print("Invalid pipeline syntax\n")
|
||||
return
|
||||
self._maybe_seed_current_stage_table(ctx)
|
||||
stages = self._maybe_apply_pending_pipeline_tail(ctx, stages)
|
||||
config = self._config_loader.load()
|
||||
config = self._apply_quiet_background_flag(config)
|
||||
|
||||
stages, first_stage_selection_indices, first_stage_had_extra_args, first_stage_select_all = (
|
||||
self._extract_first_stage_selection_tokens(stages)
|
||||
)
|
||||
first_stage_selection_indices = self._apply_select_all_if_requested(
|
||||
ctx, first_stage_selection_indices, first_stage_select_all
|
||||
)
|
||||
|
||||
piped_result: Any = None
|
||||
worker_manager = WorkerManagerRegistry.ensure(config)
|
||||
pipeline_text = " | ".join(" ".join(stage) for stage in stages)
|
||||
pipeline_session = WorkerStages.begin_pipeline(worker_manager, pipeline_text=pipeline_text, config=config)
|
||||
|
||||
raw_stage_texts: List[str] = []
|
||||
try:
|
||||
if hasattr(ctx, "get_current_command_stages"):
|
||||
raw_stage_texts = ctx.get_current_command_stages() or []
|
||||
except Exception:
|
||||
raw_stage_texts = []
|
||||
|
||||
if pipeline_session and worker_manager and isinstance(config, dict):
|
||||
session_worker_ids = config.get("_session_worker_ids")
|
||||
if session_worker_ids:
|
||||
try:
|
||||
output_fn = self._toolbar_output
|
||||
quiet_mode = bool(config.get("_quiet_background_output"))
|
||||
terminal_only = quiet_mode and not output_fn
|
||||
kwargs: Dict[str, Any] = {
|
||||
"session_worker_ids": session_worker_ids,
|
||||
"only_terminal_updates": terminal_only,
|
||||
"overlay_mode": bool(output_fn),
|
||||
}
|
||||
if output_fn:
|
||||
kwargs["output"] = output_fn
|
||||
ensure_background_notifier(worker_manager, **kwargs)
|
||||
except Exception:
|
||||
pass
|
||||
raw_stage_texts = self._get_raw_stage_texts(ctx)
|
||||
self._maybe_enable_background_notifier(worker_manager, config, pipeline_session)
|
||||
|
||||
pipeline_status = "completed"
|
||||
pipeline_error = ""
|
||||
@@ -1462,201 +1778,24 @@ class PipelineExecutor:
|
||||
pipe_index_by_stage: Dict[int, int] = {}
|
||||
|
||||
try:
|
||||
if first_stage_selection_indices:
|
||||
if not ctx.get_current_stage_table_source_command():
|
||||
display_table = ctx.get_display_table() if hasattr(ctx, "get_display_table") else None
|
||||
table_for_stage = display_table or ctx.get_last_result_table()
|
||||
if table_for_stage:
|
||||
ctx.set_current_stage_table(table_for_stage)
|
||||
|
||||
source_cmd = ctx.get_current_stage_table_source_command()
|
||||
source_args_raw = ctx.get_current_stage_table_source_args()
|
||||
if isinstance(source_args_raw, str):
|
||||
source_args: List[str] = [source_args_raw]
|
||||
elif isinstance(source_args_raw, list):
|
||||
source_args = [str(x) for x in source_args_raw if x is not None]
|
||||
else:
|
||||
source_args = []
|
||||
|
||||
current_table = ctx.get_current_stage_table()
|
||||
table_type = current_table.table if current_table and hasattr(current_table, "table") else None
|
||||
|
||||
command_expanded = False
|
||||
|
||||
if table_type in {"youtube", "soulseek"}:
|
||||
command_expanded = False
|
||||
elif source_cmd == "search-file" and source_args and "youtube" in source_args:
|
||||
command_expanded = False
|
||||
else:
|
||||
selected_row_args: List[str] = []
|
||||
skip_pipe_expansion = source_cmd == ".pipe" and len(stages) > 0
|
||||
if source_cmd and not skip_pipe_expansion:
|
||||
for idx in first_stage_selection_indices:
|
||||
row_args = ctx.get_current_stage_table_row_selection_args(idx)
|
||||
if row_args:
|
||||
selected_row_args.extend(row_args)
|
||||
break
|
||||
|
||||
if selected_row_args:
|
||||
if isinstance(source_cmd, list):
|
||||
cmd_list: List[str] = [str(x) for x in source_cmd if x is not None]
|
||||
elif isinstance(source_cmd, str):
|
||||
cmd_list = [source_cmd]
|
||||
else:
|
||||
cmd_list = []
|
||||
|
||||
expanded_stage: List[str] = cmd_list + source_args + selected_row_args
|
||||
|
||||
if first_stage_had_extra_args and stages:
|
||||
expanded_stage += stages[0]
|
||||
stages[0] = expanded_stage
|
||||
else:
|
||||
stages.insert(0, expanded_stage)
|
||||
|
||||
if pipeline_session and worker_manager:
|
||||
try:
|
||||
worker_manager.log_step(
|
||||
pipeline_session.worker_id,
|
||||
f"@N expansion: {source_cmd} + {' '.join(str(x) for x in selected_row_args)}",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
first_stage_selection_indices = []
|
||||
command_expanded = True
|
||||
|
||||
if not command_expanded and first_stage_selection_indices:
|
||||
last_piped_items = ctx.get_last_result_items()
|
||||
stage_table = ctx.get_current_stage_table()
|
||||
if not stage_table and hasattr(ctx, "get_display_table"):
|
||||
stage_table = ctx.get_display_table()
|
||||
if not stage_table:
|
||||
stage_table = ctx.get_last_result_table()
|
||||
|
||||
resolved_items = _resolve_items_for_selection(stage_table, last_piped_items)
|
||||
if last_piped_items:
|
||||
filtered = [
|
||||
resolved_items[i]
|
||||
for i in first_stage_selection_indices
|
||||
if 0 <= i < len(resolved_items)
|
||||
]
|
||||
if not filtered:
|
||||
print("No items matched selection in pipeline\n")
|
||||
return
|
||||
|
||||
if _maybe_run_class_selector(filtered, stage_is_last=(not stages)):
|
||||
return
|
||||
|
||||
from cmdlet._shared import coerce_to_pipe_object
|
||||
|
||||
filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered]
|
||||
piped_result = filtered_pipe_objs if len(filtered_pipe_objs) > 1 else filtered_pipe_objs[0]
|
||||
|
||||
if pipeline_session and worker_manager:
|
||||
try:
|
||||
selection_parts = [f"@{i+1}" for i in first_stage_selection_indices]
|
||||
worker_manager.log_step(
|
||||
pipeline_session.worker_id,
|
||||
f"Applied @N selection {' | '.join(selection_parts)}",
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Auto-insert downloader stages for provider tables.
|
||||
current_table = ctx.get_current_stage_table() or ctx.get_last_result_table()
|
||||
table_type = current_table.table if current_table and hasattr(current_table, "table") else None
|
||||
|
||||
if not stages:
|
||||
if table_type == "youtube":
|
||||
print("Auto-running YouTube selection via download-media")
|
||||
stages.append(["download-media"])
|
||||
elif table_type == "bandcamp":
|
||||
print("Auto-running Bandcamp selection via download-media")
|
||||
stages.append(["download-media"])
|
||||
elif table_type in {"soulseek", "openlibrary", "libgen"}:
|
||||
print("Auto-piping selection to download-file")
|
||||
stages.append(["download-file"])
|
||||
else:
|
||||
first_cmd = stages[0][0] if stages and stages[0] else None
|
||||
if table_type == "soulseek" and first_cmd not in (
|
||||
"download-file",
|
||||
"download-media",
|
||||
"download_media",
|
||||
".pipe",
|
||||
):
|
||||
debug("Auto-inserting download-file after Soulseek selection")
|
||||
stages.insert(0, ["download-file"])
|
||||
if table_type == "youtube" and first_cmd not in (
|
||||
"download-media",
|
||||
"download_media",
|
||||
"download-file",
|
||||
".pipe",
|
||||
):
|
||||
debug("Auto-inserting download-media after YouTube selection")
|
||||
stages.insert(0, ["download-media"])
|
||||
if table_type == "bandcamp" and first_cmd not in (
|
||||
"download-media",
|
||||
"download_media",
|
||||
"download-file",
|
||||
".pipe",
|
||||
):
|
||||
print("Auto-inserting download-media after Bandcamp selection")
|
||||
stages.insert(0, ["download-media"])
|
||||
if table_type == "libgen" and first_cmd not in (
|
||||
"download-file",
|
||||
"download-media",
|
||||
"download_media",
|
||||
".pipe",
|
||||
):
|
||||
print("Auto-inserting download-file after Libgen selection")
|
||||
stages.insert(0, ["download-file"])
|
||||
else:
|
||||
print("No previous results to select from\n")
|
||||
return
|
||||
ok, initial_piped = self._maybe_apply_initial_selection(
|
||||
ctx,
|
||||
config,
|
||||
stages,
|
||||
selection_indices=first_stage_selection_indices,
|
||||
first_stage_had_extra_args=first_stage_had_extra_args,
|
||||
worker_manager=worker_manager,
|
||||
pipeline_session=pipeline_session,
|
||||
)
|
||||
if not ok:
|
||||
return
|
||||
if initial_piped is not None:
|
||||
piped_result = initial_piped
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# Multi-level pipeline progress (pipes = stages, tasks = items)
|
||||
# ------------------------------------------------------------------
|
||||
try:
|
||||
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
|
||||
except Exception:
|
||||
quiet_mode = False
|
||||
|
||||
try:
|
||||
import sys as _sys
|
||||
|
||||
if (not quiet_mode) and bool(getattr(_sys.stderr, "isatty", lambda: False)()):
|
||||
from models import PipelineLiveProgress
|
||||
|
||||
pipe_stage_indices: List[int] = []
|
||||
pipe_labels: List[str] = []
|
||||
for idx, tokens in enumerate(stages):
|
||||
if not tokens:
|
||||
continue
|
||||
name = str(tokens[0]).replace("_", "-").lower()
|
||||
if name == "@" or name.startswith("@"):
|
||||
continue
|
||||
# `.pipe` (MPV) is an interactive launcher; disable pipeline Live progress
|
||||
# for it because it doesn't meaningfully "complete" (mpv may keep running)
|
||||
# and Live output interferes with MPV playlist UI.
|
||||
if name == ".pipe":
|
||||
continue
|
||||
pipe_stage_indices.append(idx)
|
||||
pipe_labels.append(name)
|
||||
|
||||
if pipe_labels:
|
||||
progress_ui = PipelineLiveProgress(pipe_labels, enabled=True)
|
||||
progress_ui.start()
|
||||
try:
|
||||
import pipeline as _pipeline_ctx
|
||||
if hasattr(_pipeline_ctx, "set_live_progress"):
|
||||
_pipeline_ctx.set_live_progress(progress_ui)
|
||||
except Exception:
|
||||
pass
|
||||
pipe_index_by_stage = {stage_idx: pipe_idx for pipe_idx, stage_idx in enumerate(pipe_stage_indices)}
|
||||
except Exception:
|
||||
progress_ui = None
|
||||
pipe_index_by_stage = {}
|
||||
progress_ui, pipe_index_by_stage = self._maybe_start_live_progress(config, stages)
|
||||
|
||||
for stage_index, stage_tokens in enumerate(stages):
|
||||
if not stage_tokens:
|
||||
@@ -1707,7 +1846,7 @@ class PipelineExecutor:
|
||||
if not stage_table:
|
||||
stage_table = ctx.get_last_result_table()
|
||||
items_list = ctx.get_last_result_items() or []
|
||||
resolved_items = _resolve_items_for_selection(stage_table, items_list)
|
||||
resolved_items = items_list if items_list else []
|
||||
filtered = [resolved_items[i] for i in selected_indices if 0 <= i < len(resolved_items)]
|
||||
if not filtered:
|
||||
print("No items matched selection\n")
|
||||
@@ -1715,7 +1854,7 @@ class PipelineExecutor:
|
||||
pipeline_error = "Empty selection"
|
||||
return
|
||||
|
||||
if _maybe_run_class_selector(filtered, stage_is_last=(stage_index + 1 >= len(stages))):
|
||||
if PipelineExecutor._maybe_run_class_selector(ctx, config, filtered, stage_is_last=(stage_index + 1 >= len(stages))):
|
||||
return
|
||||
|
||||
# Special case: selecting multiple tags from get-tag and piping into delete-tag
|
||||
@@ -1841,9 +1980,11 @@ class PipelineExecutor:
|
||||
|
||||
on_emit = None
|
||||
if progress_ui is not None and pipe_idx is not None:
|
||||
def _on_emit(obj: Any, _idx: int = int(pipe_idx)) -> None:
|
||||
_ui = cast(Any, progress_ui)
|
||||
|
||||
def _on_emit(obj: Any, _idx: int = int(pipe_idx), _progress=_ui) -> None:
|
||||
try:
|
||||
progress_ui.on_emit(_idx, obj)
|
||||
_progress.on_emit(_idx, obj)
|
||||
except Exception:
|
||||
pass
|
||||
on_emit = _on_emit
|
||||
|
||||
@@ -23,6 +23,15 @@ except ImportError:
|
||||
|
||||
|
||||
class Libgen(Provider):
|
||||
# Domains that should be routed to this provider when the user supplies a URL.
|
||||
# (Used by ProviderCore.registry.match_provider_name_for_url)
|
||||
URL_DOMAINS = (
|
||||
"libgen.gl",
|
||||
"libgen.li",
|
||||
"libgen.is",
|
||||
"libgen.rs",
|
||||
"libgen.st",
|
||||
)
|
||||
"""Search provider for Library Genesis books."""
|
||||
|
||||
def search(
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Any, Dict, List, Optional, Type
|
||||
from typing import Any, Dict, List, Optional, Type, cast
|
||||
import requests
|
||||
import sys
|
||||
import json
|
||||
import subprocess
|
||||
|
||||
from SYS.logger import log, debug
|
||||
|
||||
@@ -13,6 +15,12 @@ except ImportError: # pragma: no cover - optional
|
||||
musicbrainzngs = None
|
||||
|
||||
|
||||
try: # Optional dependency
|
||||
import yt_dlp # type: ignore
|
||||
except ImportError: # pragma: no cover - optional
|
||||
yt_dlp = None
|
||||
|
||||
|
||||
class MetadataProvider(ABC):
|
||||
"""Base class for metadata providers (music, movies, books, etc.)."""
|
||||
|
||||
@@ -351,6 +359,157 @@ class MusicBrainzMetadataProvider(MetadataProvider):
|
||||
return tags
|
||||
|
||||
|
||||
class YtdlpMetadataProvider(MetadataProvider):
|
||||
"""Metadata provider that extracts tags from a supported URL using yt-dlp.
|
||||
|
||||
This does NOT download media; it only probes metadata.
|
||||
"""
|
||||
|
||||
@property
|
||||
def name(self) -> str: # type: ignore[override]
|
||||
return "ytdlp"
|
||||
|
||||
def _extract_info(self, url: str) -> Optional[Dict[str, Any]]:
|
||||
url = (url or "").strip()
|
||||
if not url:
|
||||
return None
|
||||
|
||||
# Prefer Python module when available.
|
||||
if yt_dlp is not None:
|
||||
try:
|
||||
opts: Any = {
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"skip_download": True,
|
||||
"noprogress": True,
|
||||
"socket_timeout": 15,
|
||||
"retries": 1,
|
||||
"playlist_items": "1-10",
|
||||
}
|
||||
with yt_dlp.YoutubeDL(opts) as ydl: # type: ignore[attr-defined]
|
||||
info = ydl.extract_info(url, download=False)
|
||||
return cast(Dict[str, Any], info) if isinstance(info, dict) else None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback to CLI.
|
||||
try:
|
||||
cmd = [
|
||||
"yt-dlp",
|
||||
"-J",
|
||||
"--no-warnings",
|
||||
"--skip-download",
|
||||
"--playlist-items",
|
||||
"1-10",
|
||||
url,
|
||||
]
|
||||
proc = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
||||
if proc.returncode != 0:
|
||||
return None
|
||||
payload = (proc.stdout or "").strip()
|
||||
if not payload:
|
||||
return None
|
||||
data = json.loads(payload)
|
||||
return data if isinstance(data, dict) else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
|
||||
url = (query or "").strip()
|
||||
if not url.startswith(("http://", "https://")):
|
||||
return []
|
||||
|
||||
info = self._extract_info(url)
|
||||
if not isinstance(info, dict):
|
||||
return []
|
||||
|
||||
upload_date = str(info.get("upload_date") or "")
|
||||
release_date = str(info.get("release_date") or "")
|
||||
year = (release_date or upload_date)[:4] if (release_date or upload_date) else ""
|
||||
|
||||
# Provide basic columns for the standard metadata selection table.
|
||||
# NOTE: This is best-effort; many extractors don't provide artist/album.
|
||||
artist = (
|
||||
info.get("artist")
|
||||
or info.get("uploader")
|
||||
or info.get("channel")
|
||||
or ""
|
||||
)
|
||||
album = info.get("album") or info.get("playlist_title") or ""
|
||||
title = info.get("title") or ""
|
||||
|
||||
return [
|
||||
{
|
||||
"title": title,
|
||||
"artist": str(artist or ""),
|
||||
"album": str(album or ""),
|
||||
"year": str(year or ""),
|
||||
"provider": self.name,
|
||||
"url": url,
|
||||
"raw": info,
|
||||
}
|
||||
]
|
||||
|
||||
def to_tags(self, item: Dict[str, Any]) -> List[str]:
|
||||
raw = item.get("raw")
|
||||
if not isinstance(raw, dict):
|
||||
return super().to_tags(item)
|
||||
|
||||
tags: List[str] = []
|
||||
try:
|
||||
from metadata import extract_ytdlp_tags
|
||||
except Exception:
|
||||
extract_ytdlp_tags = None # type: ignore[assignment]
|
||||
|
||||
if extract_ytdlp_tags:
|
||||
try:
|
||||
tags.extend(extract_ytdlp_tags(raw))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Subtitle availability tags
|
||||
def _langs(value: Any) -> List[str]:
|
||||
if not isinstance(value, dict):
|
||||
return []
|
||||
out: List[str] = []
|
||||
for k in value.keys():
|
||||
if isinstance(k, str) and k.strip():
|
||||
out.append(k.strip().lower())
|
||||
return sorted(set(out))
|
||||
|
||||
# If this is a playlist container, subtitle/captions are usually per-entry.
|
||||
info_for_subs: Dict[str, Any] = raw
|
||||
entries = raw.get("entries")
|
||||
if isinstance(entries, list) and entries:
|
||||
first = entries[0]
|
||||
if isinstance(first, dict):
|
||||
info_for_subs = first
|
||||
|
||||
for lang in _langs(info_for_subs.get("subtitles")):
|
||||
tags.append(f"subs:{lang}")
|
||||
for lang in _langs(info_for_subs.get("automatic_captions")):
|
||||
tags.append(f"subs_auto:{lang}")
|
||||
|
||||
# Always include source tag for parity with other providers.
|
||||
tags.append(f"source:{self.name}")
|
||||
|
||||
# Dedup case-insensitively, preserve order.
|
||||
seen = set()
|
||||
out: List[str] = []
|
||||
for t in tags:
|
||||
if not isinstance(t, str):
|
||||
continue
|
||||
s = t.strip()
|
||||
if not s:
|
||||
continue
|
||||
k = s.lower()
|
||||
if k in seen:
|
||||
continue
|
||||
seen.add(k)
|
||||
out.append(s)
|
||||
return out
|
||||
|
||||
|
||||
# Registry ---------------------------------------------------------------
|
||||
|
||||
_METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = {
|
||||
@@ -359,6 +518,7 @@ _METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = {
|
||||
"googlebooks": GoogleBooksMetadataProvider,
|
||||
"google": GoogleBooksMetadataProvider,
|
||||
"musicbrainz": MusicBrainzMetadataProvider,
|
||||
"ytdlp": YtdlpMetadataProvider,
|
||||
}
|
||||
|
||||
|
||||
@@ -370,7 +530,7 @@ def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str
|
||||
availability: Dict[str, bool] = {}
|
||||
for name, cls in _METADATA_PROVIDERS.items():
|
||||
try:
|
||||
provider = cls(config)
|
||||
_ = cls(config)
|
||||
# Basic availability check: perform lightweight validation if defined
|
||||
availability[name] = True
|
||||
except Exception:
|
||||
|
||||
@@ -11,7 +11,8 @@ import sys
|
||||
import tempfile
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||
from urllib.parse import urlparse
|
||||
|
||||
import requests
|
||||
|
||||
@@ -183,7 +184,44 @@ def _resolve_archive_id(session: requests.Session, edition_id: str, ia_candidate
|
||||
return ""
|
||||
|
||||
|
||||
def _archive_id_from_url(url: str) -> str:
|
||||
"""Best-effort extraction of an Archive.org item identifier from a URL."""
|
||||
|
||||
u = str(url or "").strip()
|
||||
if not u:
|
||||
return ""
|
||||
try:
|
||||
p = urlparse(u)
|
||||
host = (p.hostname or "").lower().strip()
|
||||
if not host.endswith("archive.org"):
|
||||
return ""
|
||||
parts = [x for x in (p.path or "").split("/") if x]
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
# Common patterns:
|
||||
# - /details/<id>/...
|
||||
# - /borrow/<id>
|
||||
# - /download/<id>/...
|
||||
if len(parts) >= 2 and parts[0].lower() in {"details", "borrow", "download", "stream"}:
|
||||
return str(parts[1]).strip()
|
||||
|
||||
# Sometimes the identifier is the first segment.
|
||||
if len(parts) >= 1:
|
||||
first = str(parts[0]).strip()
|
||||
if first and first.lower() not in {"account", "services", "search", "advancedsearch.php"}:
|
||||
return first
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
class OpenLibrary(Provider):
|
||||
# Domains that should be routed to this provider when the user supplies a URL.
|
||||
# (Used by ProviderCore.registry.match_provider_name_for_url)
|
||||
URL_DOMAINS = (
|
||||
"openlibrary.org",
|
||||
"archive.org",
|
||||
)
|
||||
"""Search provider for OpenLibrary books + Archive.org direct/borrow download."""
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
@@ -311,6 +349,60 @@ class OpenLibrary(Provider):
|
||||
pass
|
||||
raise RuntimeError("Something went wrong when trying to return the book")
|
||||
|
||||
@staticmethod
|
||||
def _archive_logout(session: requests.Session) -> None:
|
||||
"""Best-effort logout from archive.org.
|
||||
|
||||
Archive sessions are cookie-based; returning the loan is the critical step.
|
||||
Logout is attempted for cleanliness but failures should not abort the workflow.
|
||||
"""
|
||||
|
||||
if session is None:
|
||||
return
|
||||
for url in (
|
||||
"https://archive.org/account/logout",
|
||||
"https://archive.org/account/logout.php",
|
||||
):
|
||||
try:
|
||||
resp = session.get(url, timeout=15, allow_redirects=True)
|
||||
code = int(getattr(resp, "status_code", 0) or 0)
|
||||
if code and code < 500:
|
||||
return
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
@staticmethod
|
||||
def _archive_is_lendable(book_id: str) -> tuple[bool, str]:
|
||||
"""Heuristic lendable check using Archive.org item metadata.
|
||||
|
||||
Some lendable items do not map cleanly to an OpenLibrary edition id.
|
||||
In practice, Archive metadata collections often include markers like:
|
||||
- inlibrary
|
||||
- printdisabled
|
||||
"""
|
||||
|
||||
ident = str(book_id or "").strip()
|
||||
if not ident:
|
||||
return False, "no-archive-id"
|
||||
try:
|
||||
resp = requests.get(f"https://archive.org/metadata/{ident}", timeout=8)
|
||||
resp.raise_for_status()
|
||||
data = resp.json() if resp is not None else {}
|
||||
meta = data.get("metadata", {}) if isinstance(data, dict) else {}
|
||||
collection = meta.get("collection") if isinstance(meta, dict) else None
|
||||
|
||||
values: List[str] = []
|
||||
if isinstance(collection, list):
|
||||
values = [str(x).strip().lower() for x in collection if str(x).strip()]
|
||||
elif isinstance(collection, str):
|
||||
values = [collection.strip().lower()]
|
||||
|
||||
if any(v in {"inlibrary", "printdisabled", "lendinglibrary"} for v in values):
|
||||
return True, "archive-collection"
|
||||
return False, "archive-not-lendable"
|
||||
except Exception:
|
||||
return False, "archive-metadata-error"
|
||||
|
||||
@staticmethod
|
||||
def _archive_get_book_infos(session: requests.Session, url: str) -> Tuple[str, List[str], Dict[str, Any]]:
|
||||
"""Extract page links from Archive.org book reader."""
|
||||
@@ -430,6 +522,7 @@ class OpenLibrary(Provider):
|
||||
links: List[str],
|
||||
scale: int,
|
||||
book_id: str,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||
) -> List[str]:
|
||||
links_scaled = [f"{link}&rotate=0&scale={scale}" for link in links]
|
||||
pages = len(links_scaled)
|
||||
@@ -448,7 +541,20 @@ class OpenLibrary(Provider):
|
||||
pages=pages,
|
||||
)
|
||||
)
|
||||
if tqdm:
|
||||
if progress_callback is not None:
|
||||
done = 0
|
||||
total = len(tasks)
|
||||
for fut in futures.as_completed(tasks):
|
||||
try:
|
||||
_ = fut.result()
|
||||
except Exception:
|
||||
pass
|
||||
done += 1
|
||||
try:
|
||||
progress_callback(done, total)
|
||||
except Exception:
|
||||
pass
|
||||
elif tqdm:
|
||||
for _ in tqdm(futures.as_completed(tasks), total=len(tasks)): # type: ignore
|
||||
pass
|
||||
else:
|
||||
@@ -904,15 +1010,20 @@ class OpenLibrary(Provider):
|
||||
|
||||
return results
|
||||
|
||||
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
|
||||
def download(
|
||||
self,
|
||||
result: SearchResult,
|
||||
output_dir: Path,
|
||||
progress_callback: Optional[Callable[[str, int, Optional[int], str], None]] = None,
|
||||
) -> Optional[Path]:
|
||||
output_dir = Path(output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
meta = result.full_metadata or {}
|
||||
edition_id = str(meta.get("openlibrary_id") or "").strip()
|
||||
if not edition_id:
|
||||
log("[openlibrary] Missing openlibrary_id; cannot download", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# Accept direct Archive.org URLs too (details/borrow/download) even when no OL edition id is known.
|
||||
archive_id = str(meta.get("archive_id") or "").strip()
|
||||
|
||||
ia_ids = meta.get("ia") or []
|
||||
if isinstance(ia_ids, str):
|
||||
@@ -921,12 +1032,23 @@ class OpenLibrary(Provider):
|
||||
ia_ids = []
|
||||
ia_candidates = [str(x) for x in ia_ids if x]
|
||||
|
||||
archive_id = _resolve_archive_id(self._session, edition_id, ia_candidates)
|
||||
if not archive_id:
|
||||
archive_id = _first_str(ia_candidates) or ""
|
||||
|
||||
if not archive_id and edition_id:
|
||||
archive_id = _resolve_archive_id(self._session, edition_id, ia_candidates)
|
||||
|
||||
if not archive_id:
|
||||
# Try to extract identifier from the SearchResult path (URL).
|
||||
archive_id = _archive_id_from_url(str(getattr(result, "path", "") or ""))
|
||||
|
||||
if not archive_id:
|
||||
log("[openlibrary] No archive identifier available; cannot download", file=sys.stderr)
|
||||
return None
|
||||
|
||||
safe_title = sanitize_filename(result.title)
|
||||
if not safe_title or "http" in safe_title.lower():
|
||||
safe_title = sanitize_filename(archive_id) or "archive"
|
||||
|
||||
# 1) Direct download if available.
|
||||
try:
|
||||
@@ -935,8 +1057,22 @@ class OpenLibrary(Provider):
|
||||
can_direct, pdf_url = False, ""
|
||||
|
||||
if can_direct and pdf_url:
|
||||
try:
|
||||
if progress_callback is not None:
|
||||
progress_callback("step", 0, None, "direct download")
|
||||
except Exception:
|
||||
pass
|
||||
out_path = unique_path(output_dir / f"{safe_title}.pdf")
|
||||
ok = download_file(pdf_url, out_path, session=self._session)
|
||||
ok = download_file(
|
||||
pdf_url,
|
||||
out_path,
|
||||
session=self._session,
|
||||
progress_callback=(
|
||||
(lambda downloaded, total, label: progress_callback("bytes", downloaded, total, label))
|
||||
if progress_callback is not None
|
||||
else None
|
||||
),
|
||||
)
|
||||
if ok:
|
||||
return out_path
|
||||
log("[openlibrary] Direct download failed", file=sys.stderr)
|
||||
@@ -949,65 +1085,131 @@ class OpenLibrary(Provider):
|
||||
log("[openlibrary] Archive credentials missing; cannot borrow", file=sys.stderr)
|
||||
return None
|
||||
|
||||
lendable, reason = _check_lendable(self._session, edition_id)
|
||||
lendable = True
|
||||
reason = ""
|
||||
if edition_id:
|
||||
lendable, reason = _check_lendable(self._session, edition_id)
|
||||
if not lendable:
|
||||
# OpenLibrary API can be a false-negative; fall back to Archive metadata.
|
||||
lendable2, reason2 = self._archive_is_lendable(archive_id)
|
||||
if lendable2:
|
||||
lendable, reason = True, reason2
|
||||
else:
|
||||
lendable, reason = self._archive_is_lendable(archive_id)
|
||||
|
||||
if not lendable:
|
||||
log(f"[openlibrary] Not lendable: {reason}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
session = self._archive_login(email, password)
|
||||
loaned = False
|
||||
try:
|
||||
session = self._archive_loan(session, archive_id, verbose=False)
|
||||
except self.BookNotAvailableError:
|
||||
log("[openlibrary] Book not available to borrow", file=sys.stderr)
|
||||
return None
|
||||
except Exception:
|
||||
log("[openlibrary] Borrow failed", file=sys.stderr)
|
||||
return None
|
||||
|
||||
urls = [f"https://archive.org/borrow/{archive_id}", f"https://archive.org/details/{archive_id}"]
|
||||
title = safe_title
|
||||
links: Optional[List[str]] = None
|
||||
last_exc: Optional[Exception] = None
|
||||
for u in urls:
|
||||
try:
|
||||
title_raw, links, _metadata = self._archive_get_book_infos(session, u)
|
||||
if title_raw:
|
||||
title = sanitize_filename(title_raw)
|
||||
break
|
||||
except Exception as exc:
|
||||
last_exc = exc
|
||||
continue
|
||||
|
||||
if not links:
|
||||
log(f"[openlibrary] Failed to extract pages: {last_exc}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=str(output_dir))
|
||||
try:
|
||||
images = self._archive_download(session=session, n_threads=10, directory=temp_dir, links=links, scale=3, book_id=archive_id)
|
||||
|
||||
pdf_bytes = _image_paths_to_pdf_bytes(images)
|
||||
if not pdf_bytes:
|
||||
# Keep images folder for manual conversion.
|
||||
log("[openlibrary] PDF conversion failed; keeping images folder", file=sys.stderr)
|
||||
return Path(temp_dir)
|
||||
|
||||
pdf_path = unique_path(output_dir / f"{title}.pdf")
|
||||
with open(pdf_path, "wb") as f:
|
||||
f.write(pdf_bytes)
|
||||
|
||||
try:
|
||||
shutil.rmtree(temp_dir)
|
||||
if progress_callback is not None:
|
||||
progress_callback("step", 0, None, "login")
|
||||
except Exception:
|
||||
pass
|
||||
return pdf_path
|
||||
|
||||
except Exception:
|
||||
try:
|
||||
shutil.rmtree(temp_dir)
|
||||
session = self._archive_loan(session, archive_id, verbose=False)
|
||||
loaned = True
|
||||
except self.BookNotAvailableError:
|
||||
log("[openlibrary] Book not available to borrow", file=sys.stderr)
|
||||
return None
|
||||
except Exception:
|
||||
log("[openlibrary] Borrow failed", file=sys.stderr)
|
||||
return None
|
||||
|
||||
try:
|
||||
if progress_callback is not None:
|
||||
progress_callback("step", 0, None, "borrow")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
urls = [f"https://archive.org/borrow/{archive_id}", f"https://archive.org/details/{archive_id}"]
|
||||
title = safe_title
|
||||
links: Optional[List[str]] = None
|
||||
last_exc: Optional[Exception] = None
|
||||
for u in urls:
|
||||
try:
|
||||
title_raw, links, _metadata = self._archive_get_book_infos(session, u)
|
||||
if title_raw:
|
||||
title = sanitize_filename(title_raw)
|
||||
break
|
||||
except Exception as exc:
|
||||
last_exc = exc
|
||||
continue
|
||||
|
||||
if not links:
|
||||
log(f"[openlibrary] Failed to extract pages: {last_exc}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
try:
|
||||
if progress_callback is not None:
|
||||
progress_callback("step", 0, None, "download pages")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=str(output_dir))
|
||||
try:
|
||||
images = self._archive_download(
|
||||
session=session,
|
||||
n_threads=10,
|
||||
directory=temp_dir,
|
||||
links=links,
|
||||
scale=3,
|
||||
book_id=archive_id,
|
||||
progress_callback=(
|
||||
(lambda done, total: progress_callback("pages", done, total, "pages"))
|
||||
if progress_callback is not None
|
||||
else None
|
||||
),
|
||||
)
|
||||
|
||||
pdf_bytes = _image_paths_to_pdf_bytes(images)
|
||||
if not pdf_bytes:
|
||||
# Keep images folder for manual conversion.
|
||||
log("[openlibrary] PDF conversion failed; keeping images folder", file=sys.stderr)
|
||||
return Path(temp_dir)
|
||||
|
||||
try:
|
||||
if progress_callback is not None:
|
||||
progress_callback("step", 0, None, "stitch pdf")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
pdf_path = unique_path(output_dir / f"{title}.pdf")
|
||||
with open(pdf_path, "wb") as f:
|
||||
f.write(pdf_bytes)
|
||||
|
||||
try:
|
||||
shutil.rmtree(temp_dir)
|
||||
except Exception:
|
||||
pass
|
||||
return pdf_path
|
||||
|
||||
except Exception:
|
||||
try:
|
||||
shutil.rmtree(temp_dir)
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
finally:
|
||||
# Always return the loan after a successful borrow, even if download/stitch fails.
|
||||
if loaned:
|
||||
try:
|
||||
if progress_callback is not None:
|
||||
progress_callback("step", 0, None, "return book")
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
self._archive_return_loan(session, archive_id)
|
||||
except Exception as exc:
|
||||
log(f"[openlibrary] Warning: failed to return loan: {exc}", file=sys.stderr)
|
||||
try:
|
||||
self._archive_logout(session)
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
|
||||
except Exception as exc:
|
||||
log(f"[openlibrary] Borrow workflow error: {exc}", file=sys.stderr)
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
from typing import Callable, Optional
|
||||
import sys
|
||||
|
||||
import requests
|
||||
@@ -22,13 +22,20 @@ def sanitize_filename(name: str, *, max_len: int = 150) -> str:
|
||||
return cleaned[:max_len]
|
||||
|
||||
|
||||
def download_file(url: str, output_path: Path, *, session: Optional[requests.Session] = None, timeout_s: float = 30.0) -> bool:
|
||||
def download_file(
|
||||
url: str,
|
||||
output_path: Path,
|
||||
*,
|
||||
session: Optional[requests.Session] = None,
|
||||
timeout_s: float = 30.0,
|
||||
progress_callback: Optional[Callable[[int, Optional[int], str], None]] = None,
|
||||
) -> bool:
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
s = session or requests.Session()
|
||||
|
||||
bar = ProgressBar()
|
||||
bar = ProgressBar() if progress_callback is None else None
|
||||
downloaded = 0
|
||||
total = None
|
||||
|
||||
@@ -41,9 +48,14 @@ def download_file(url: str, output_path: Path, *, session: Optional[requests.Ses
|
||||
except Exception:
|
||||
total = None
|
||||
|
||||
label = str(output_path.name or "download")
|
||||
|
||||
# Render once immediately so fast downloads still show something.
|
||||
try:
|
||||
bar.update(downloaded=0, total=total, label=str(output_path.name or "download"), file=sys.stderr)
|
||||
if progress_callback is not None:
|
||||
progress_callback(0, total, label)
|
||||
elif bar is not None:
|
||||
bar.update(downloaded=0, total=total, label=label, file=sys.stderr)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -53,18 +65,23 @@ def download_file(url: str, output_path: Path, *, session: Optional[requests.Ses
|
||||
f.write(chunk)
|
||||
downloaded += len(chunk)
|
||||
try:
|
||||
bar.update(downloaded=downloaded, total=total, label=str(output_path.name or "download"), file=sys.stderr)
|
||||
if progress_callback is not None:
|
||||
progress_callback(downloaded, total, label)
|
||||
elif bar is not None:
|
||||
bar.update(downloaded=downloaded, total=total, label=label, file=sys.stderr)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
bar.finish()
|
||||
if bar is not None:
|
||||
bar.finish()
|
||||
except Exception:
|
||||
pass
|
||||
return output_path.exists() and output_path.stat().st_size > 0
|
||||
except Exception:
|
||||
try:
|
||||
bar.finish()
|
||||
if bar is not None:
|
||||
bar.finish()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
|
||||
@@ -6,8 +6,9 @@ This module is the single source of truth for provider discovery.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional, Type
|
||||
from typing import Any, Dict, Optional, Sequence, Type
|
||||
import sys
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
@@ -141,6 +142,45 @@ def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bo
|
||||
return availability
|
||||
|
||||
|
||||
def match_provider_name_for_url(url: str) -> Optional[str]:
|
||||
"""Return a registered provider name that claims the URL's domain.
|
||||
|
||||
Providers can declare domains via a class attribute `URL_DOMAINS` (sequence of strings).
|
||||
This matcher is intentionally cheap (no provider instantiation, no network).
|
||||
"""
|
||||
|
||||
try:
|
||||
parsed = urlparse(str(url))
|
||||
host = (parsed.hostname or "").strip().lower()
|
||||
except Exception:
|
||||
host = ""
|
||||
|
||||
if not host:
|
||||
return None
|
||||
|
||||
for name, provider_class in _PROVIDERS.items():
|
||||
domains = getattr(provider_class, "URL_DOMAINS", None)
|
||||
if not isinstance(domains, (list, tuple)):
|
||||
continue
|
||||
for d in domains:
|
||||
dom = str(d or "").strip().lower()
|
||||
if not dom:
|
||||
continue
|
||||
if host == dom or host.endswith("." + dom):
|
||||
return name
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def get_provider_for_url(url: str, config: Optional[Dict[str, Any]] = None) -> Optional[Provider]:
|
||||
"""Instantiate and return the matching provider for a URL, if any."""
|
||||
|
||||
name = match_provider_name_for_url(url)
|
||||
if not name:
|
||||
return None
|
||||
return get_provider(name, config)
|
||||
|
||||
|
||||
__all__ = [
|
||||
"SearchResult",
|
||||
"Provider",
|
||||
@@ -152,5 +192,7 @@ __all__ = [
|
||||
"list_search_providers",
|
||||
"get_file_provider",
|
||||
"list_file_providers",
|
||||
"match_provider_name_for_url",
|
||||
"get_provider_for_url",
|
||||
"download_soulseek_file",
|
||||
]
|
||||
|
||||
@@ -584,10 +584,15 @@ def _download_direct_file(
|
||||
filename = filename.split("?")[0]
|
||||
|
||||
# Try to get real filename from Content-Disposition header (HEAD request)
|
||||
content_type = ""
|
||||
try:
|
||||
with HTTPClient(timeout=10.0) as client:
|
||||
response = client._request("HEAD", url, follow_redirects=True)
|
||||
content_disposition = response.headers.get("content-disposition", "")
|
||||
try:
|
||||
content_type = str(response.headers.get("content-type", "") or "").strip().lower()
|
||||
except Exception:
|
||||
content_type = ""
|
||||
if content_disposition:
|
||||
# Extract filename from Content-Disposition header
|
||||
# Format: attachment; filename="filename.pdf" or filename=filename.pdf
|
||||
@@ -620,9 +625,36 @@ def _download_direct_file(
|
||||
else:
|
||||
filename = suggested
|
||||
|
||||
# Final fallback if we still don't have a good filename
|
||||
if not filename or "." not in filename:
|
||||
filename = "downloaded_file.bin"
|
||||
# If we still don't have an extension, try to infer one from Content-Type.
|
||||
# Never fall back to a generic `.bin` extension.
|
||||
try:
|
||||
has_ext = bool(filename and Path(str(filename)).suffix)
|
||||
except Exception:
|
||||
has_ext = False
|
||||
|
||||
if filename and (not has_ext):
|
||||
ct = (content_type or "").split(";")[0].strip().lower()
|
||||
ext_by_ct = {
|
||||
"application/pdf": ".pdf",
|
||||
"application/epub+zip": ".epub",
|
||||
"application/x-mobipocket-ebook": ".mobi",
|
||||
"image/jpeg": ".jpg",
|
||||
"image/png": ".png",
|
||||
"image/webp": ".webp",
|
||||
"image/gif": ".gif",
|
||||
"text/plain": ".txt",
|
||||
"application/zip": ".zip",
|
||||
}
|
||||
|
||||
if ct in ext_by_ct:
|
||||
filename = f"{filename}{ext_by_ct[ct]}"
|
||||
elif ct.startswith("text/html"):
|
||||
# Guardrail: HTML landing pages should not be downloaded as opaque files.
|
||||
raise DownloadError("URL appears to be an HTML page, not a direct file")
|
||||
|
||||
# Final guardrail: if filename is empty, refuse rather than inventing `download.bin`.
|
||||
if not filename or not str(filename).strip():
|
||||
raise DownloadError("Could not determine filename for URL (no Content-Disposition and no path filename)")
|
||||
|
||||
file_path = _unique_path(output_dir / filename)
|
||||
progress_bar = ProgressBar()
|
||||
@@ -684,9 +716,15 @@ def _download_direct_file(
|
||||
# For direct file downloads, create minimal info dict without filename as title
|
||||
# This prevents creating duplicate title: tags when filename gets auto-generated
|
||||
# We'll add title back later only if we couldn't extract meaningful tags
|
||||
ext = ""
|
||||
try:
|
||||
ext = Path(str(filename)).suffix.lstrip(".")
|
||||
except Exception:
|
||||
ext = ""
|
||||
|
||||
info = {
|
||||
"id": filename.rsplit(".", 1)[0],
|
||||
"ext": filename.rsplit(".", 1)[1] if "." in filename else "bin",
|
||||
"id": str(filename).rsplit(".", 1)[0] if "." in str(filename) else str(filename),
|
||||
"ext": ext,
|
||||
"webpage_url": url,
|
||||
}
|
||||
|
||||
|
||||
218
SYS/pipeline_progress.py
Normal file
218
SYS/pipeline_progress.py
Normal file
@@ -0,0 +1,218 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
from contextlib import contextmanager
|
||||
from typing import Any, Iterator, Optional, Sequence, Tuple
|
||||
|
||||
|
||||
class PipelineProgress:
|
||||
"""Small adapter around PipelineLiveProgress.
|
||||
|
||||
This centralizes the boilerplate used across cmdlets:
|
||||
- locating the active Live UI (if any)
|
||||
- resolving the current pipe_index from stage context
|
||||
- step-based progress (begin_pipe_steps/advance_pipe_step)
|
||||
- optional pipe percent/status updates
|
||||
- optional byte transfer bars
|
||||
- optional local Live panel when a cmdlet runs standalone
|
||||
|
||||
The class is intentionally defensive: all UI operations are best-effort.
|
||||
"""
|
||||
|
||||
def __init__(self, pipeline_module: Any):
|
||||
self._ctx = pipeline_module
|
||||
self._local_ui: Optional[Any] = None
|
||||
self._local_attached: bool = False
|
||||
|
||||
def ui_and_pipe_index(self) -> Tuple[Optional[Any], int]:
|
||||
ui = None
|
||||
try:
|
||||
ui = self._ctx.get_live_progress() if hasattr(self._ctx, "get_live_progress") else None
|
||||
except Exception:
|
||||
ui = None
|
||||
|
||||
pipe_idx: int = 0
|
||||
try:
|
||||
stage_ctx = self._ctx.get_stage_context() if hasattr(self._ctx, "get_stage_context") else None
|
||||
maybe_idx = getattr(stage_ctx, "pipe_index", None) if stage_ctx is not None else None
|
||||
if isinstance(maybe_idx, int):
|
||||
pipe_idx = int(maybe_idx)
|
||||
except Exception:
|
||||
pipe_idx = 0
|
||||
|
||||
return ui, pipe_idx
|
||||
|
||||
def begin_steps(self, total_steps: int) -> None:
|
||||
ui, pipe_idx = self.ui_and_pipe_index()
|
||||
if ui is None:
|
||||
return
|
||||
try:
|
||||
begin = getattr(ui, "begin_pipe_steps", None)
|
||||
if callable(begin):
|
||||
begin(int(pipe_idx), total_steps=int(total_steps))
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def step(self, text: str) -> None:
|
||||
ui, pipe_idx = self.ui_and_pipe_index()
|
||||
if ui is None:
|
||||
return
|
||||
try:
|
||||
adv = getattr(ui, "advance_pipe_step", None)
|
||||
if callable(adv):
|
||||
adv(int(pipe_idx), str(text))
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def set_percent(self, percent: int) -> None:
|
||||
ui, pipe_idx = self.ui_and_pipe_index()
|
||||
if ui is None:
|
||||
return
|
||||
try:
|
||||
set_pct = getattr(ui, "set_pipe_percent", None)
|
||||
if callable(set_pct):
|
||||
set_pct(int(pipe_idx), int(percent))
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def set_status(self, text: str) -> None:
|
||||
ui, pipe_idx = self.ui_and_pipe_index()
|
||||
if ui is None:
|
||||
return
|
||||
try:
|
||||
setter = getattr(ui, "set_pipe_status_text", None)
|
||||
if callable(setter):
|
||||
setter(int(pipe_idx), str(text))
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def clear_status(self) -> None:
|
||||
ui, pipe_idx = self.ui_and_pipe_index()
|
||||
if ui is None:
|
||||
return
|
||||
try:
|
||||
clr = getattr(ui, "clear_pipe_status_text", None)
|
||||
if callable(clr):
|
||||
clr(int(pipe_idx))
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def begin_transfer(self, *, label: str, total: Optional[int] = None) -> None:
|
||||
ui, _ = self.ui_and_pipe_index()
|
||||
if ui is None:
|
||||
return
|
||||
try:
|
||||
fn = getattr(ui, "begin_transfer", None)
|
||||
if callable(fn):
|
||||
fn(label=str(label or "transfer"), total=total)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def update_transfer(self, *, label: str, completed: Optional[int], total: Optional[int] = None) -> None:
|
||||
ui, _ = self.ui_and_pipe_index()
|
||||
if ui is None:
|
||||
return
|
||||
try:
|
||||
fn = getattr(ui, "update_transfer", None)
|
||||
if callable(fn):
|
||||
fn(label=str(label or "transfer"), completed=completed, total=total)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def finish_transfer(self, *, label: str) -> None:
|
||||
ui, _ = self.ui_and_pipe_index()
|
||||
if ui is None:
|
||||
return
|
||||
try:
|
||||
fn = getattr(ui, "finish_transfer", None)
|
||||
if callable(fn):
|
||||
fn(label=str(label or "transfer"))
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def on_emit(self, emitted: Any) -> None:
|
||||
"""Advance local pipe progress after pipeline_context.emit().
|
||||
|
||||
The shared PipelineExecutor wires on_emit automatically for pipelines.
|
||||
Standalone cmdlet runs do not, so cmdlets call this explicitly.
|
||||
"""
|
||||
|
||||
if self._local_ui is None:
|
||||
return
|
||||
try:
|
||||
self._local_ui.on_emit(0, emitted)
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def ensure_local_ui(self, *, label: str, total_items: int, items_preview: Optional[Sequence[Any]] = None) -> bool:
|
||||
"""Start a local PipelineLiveProgress panel if no shared UI exists."""
|
||||
|
||||
try:
|
||||
existing = self._ctx.get_live_progress() if hasattr(self._ctx, "get_live_progress") else None
|
||||
except Exception:
|
||||
existing = None
|
||||
|
||||
if existing is not None:
|
||||
return False
|
||||
if not bool(getattr(sys.stderr, "isatty", lambda: False)()):
|
||||
return False
|
||||
|
||||
try:
|
||||
from models import PipelineLiveProgress
|
||||
|
||||
ui = PipelineLiveProgress([str(label or "pipeline")], enabled=True)
|
||||
ui.start()
|
||||
try:
|
||||
if hasattr(self._ctx, "set_live_progress"):
|
||||
self._ctx.set_live_progress(ui)
|
||||
self._local_attached = True
|
||||
except Exception:
|
||||
self._local_attached = False
|
||||
|
||||
try:
|
||||
ui.begin_pipe(0, total_items=max(1, int(total_items)), items_preview=list(items_preview or []))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
self._local_ui = ui
|
||||
return True
|
||||
except Exception:
|
||||
self._local_ui = None
|
||||
self._local_attached = False
|
||||
return False
|
||||
|
||||
def close_local_ui(self, *, force_complete: bool = True) -> None:
|
||||
if self._local_ui is None:
|
||||
return
|
||||
try:
|
||||
try:
|
||||
self._local_ui.finish_pipe(0, force_complete=bool(force_complete))
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
self._local_ui.stop()
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
self._local_ui = None
|
||||
try:
|
||||
if self._local_attached and hasattr(self._ctx, "set_live_progress"):
|
||||
self._ctx.set_live_progress(None)
|
||||
except Exception:
|
||||
pass
|
||||
self._local_attached = False
|
||||
|
||||
@contextmanager
|
||||
def local_ui_if_needed(
|
||||
self,
|
||||
*,
|
||||
label: str,
|
||||
total_items: int,
|
||||
items_preview: Optional[Sequence[Any]] = None,
|
||||
) -> Iterator["PipelineProgress"]:
|
||||
created = self.ensure_local_ui(label=label, total_items=total_items, items_preview=items_preview)
|
||||
try:
|
||||
yield self
|
||||
finally:
|
||||
if created:
|
||||
self.close_local_ui(force_complete=True)
|
||||
@@ -1585,9 +1585,46 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
|
||||
"warnings", "path", "relationships", "is_temp", "action", "parent_hash",
|
||||
}
|
||||
|
||||
# Convert ResultItem to dict to preserve all attributes
|
||||
# Convert common object-like results into a dict so we can preserve fields like
|
||||
# hash/store/url when they come from result tables (e.g., get-url emits UrlItem).
|
||||
#
|
||||
# Priority:
|
||||
# 1) explicit to_dict()
|
||||
# 2) best-effort attribute extraction for known PipeObject-ish fields
|
||||
if hasattr(value, 'to_dict'):
|
||||
value = value.to_dict()
|
||||
elif not isinstance(value, dict):
|
||||
try:
|
||||
obj_map: Dict[str, Any] = {}
|
||||
for k in (
|
||||
"hash",
|
||||
"store",
|
||||
"provider",
|
||||
"prov",
|
||||
"tag",
|
||||
"title",
|
||||
"url",
|
||||
"source_url",
|
||||
"duration",
|
||||
"duration_seconds",
|
||||
"metadata",
|
||||
"full_metadata",
|
||||
"warnings",
|
||||
"path",
|
||||
"target",
|
||||
"relationships",
|
||||
"is_temp",
|
||||
"action",
|
||||
"parent_hash",
|
||||
"extra",
|
||||
"media_kind",
|
||||
):
|
||||
if hasattr(value, k):
|
||||
obj_map[k] = getattr(value, k)
|
||||
if obj_map:
|
||||
value = obj_map
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if isinstance(value, dict):
|
||||
# Extract hash and store (canonical identifiers)
|
||||
@@ -1695,8 +1732,19 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
|
||||
# Fallback: build from path argument or bare value
|
||||
hash_val = "unknown"
|
||||
path_val = default_path or getattr(value, "path", None)
|
||||
url_val: Optional[str] = None
|
||||
title_val = None
|
||||
|
||||
# If the raw value is a string, treat it as either a URL or a file path.
|
||||
# This is important for @-selection results that are plain URL strings.
|
||||
if isinstance(value, str):
|
||||
s = value.strip()
|
||||
if s.lower().startswith(("http://", "https://")):
|
||||
url_val = s
|
||||
path_val = None
|
||||
else:
|
||||
path_val = s
|
||||
|
||||
if path_val and path_val != "unknown":
|
||||
try:
|
||||
from SYS.utils import sha256_file
|
||||
@@ -1708,8 +1756,9 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# When coming from path argument, store should be "PATH" (file path, not a backend)
|
||||
store_val = "PATH"
|
||||
# When coming from a raw URL string, mark it explicitly as URL.
|
||||
# Otherwise treat it as a local path.
|
||||
store_val = "URL" if url_val else "PATH"
|
||||
|
||||
pipe_obj = models.PipeObject(
|
||||
hash=hash_val,
|
||||
@@ -1717,6 +1766,8 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
|
||||
provider=None,
|
||||
path=str(path_val) if path_val and path_val != "unknown" else None,
|
||||
title=title_val,
|
||||
url=url_val,
|
||||
source_url=url_val,
|
||||
tag=[],
|
||||
extra={},
|
||||
)
|
||||
|
||||
@@ -12,6 +12,7 @@ import models
|
||||
import pipeline as ctx
|
||||
from API import HydrusNetwork as hydrus_wrapper
|
||||
from SYS.logger import log, debug
|
||||
from SYS.pipeline_progress import PipelineProgress
|
||||
from SYS.utils_constant import ALL_SUPPORTED_EXTENSIONS
|
||||
from Store import Store
|
||||
from . import _shared as sh
|
||||
@@ -73,6 +74,7 @@ class Add_File(Cmdlet):
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Main execution entry point."""
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
progress = PipelineProgress(ctx)
|
||||
|
||||
path_arg = parsed.get("path")
|
||||
location = parsed.get("store")
|
||||
@@ -80,6 +82,35 @@ class Add_File(Cmdlet):
|
||||
provider_room = parsed.get("room")
|
||||
delete_after = parsed.get("delete", False)
|
||||
|
||||
# Convenience: when piping a file into add-file, allow `-path <existing dir>`
|
||||
# to act as the destination export directory.
|
||||
# Example: screen-shot "https://..." | add-file -path "C:\Users\Admin\Desktop"
|
||||
if path_arg and not location and not provider_name:
|
||||
try:
|
||||
candidate_dir = Path(str(path_arg))
|
||||
if candidate_dir.exists() and candidate_dir.is_dir():
|
||||
piped_items = result if isinstance(result, list) else [result]
|
||||
has_local_source = False
|
||||
for it in piped_items:
|
||||
try:
|
||||
po = coerce_to_pipe_object(it, None)
|
||||
src = str(getattr(po, "path", "") or "").strip()
|
||||
if not src:
|
||||
continue
|
||||
if src.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
|
||||
continue
|
||||
if Path(src).is_file():
|
||||
has_local_source = True
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
if has_local_source:
|
||||
debug(f"[add-file] Treating -path directory as destination: {candidate_dir}")
|
||||
location = str(candidate_dir)
|
||||
path_arg = None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
stage_ctx = ctx.get_stage_context()
|
||||
is_last_stage = (stage_ctx is None) or bool(getattr(stage_ctx, "is_last_stage", False))
|
||||
|
||||
@@ -93,7 +124,7 @@ class Add_File(Cmdlet):
|
||||
is_storage_backend_location = False
|
||||
|
||||
# Decide which items to process.
|
||||
# - If user provided -path, treat this invocation as single-item.
|
||||
# - If user provided -path (and it was not reinterpreted as destination), treat this invocation as single-item.
|
||||
# - Otherwise, if piped input is a list, ingest each item.
|
||||
if path_arg:
|
||||
items_to_process: List[Any] = [result]
|
||||
@@ -102,6 +133,17 @@ class Add_File(Cmdlet):
|
||||
else:
|
||||
items_to_process = [result]
|
||||
|
||||
# Minimal step-based progress for single-item runs.
|
||||
# Many add-file flows don't emit intermediate items, so without steps the pipe can look "stuck".
|
||||
use_steps = False
|
||||
steps_started = False
|
||||
step2_done = False
|
||||
try:
|
||||
ui, _ = progress.ui_and_pipe_index()
|
||||
use_steps = (ui is not None) and (len(items_to_process) == 1)
|
||||
except Exception:
|
||||
use_steps = False
|
||||
|
||||
debug(f"[add-file] INPUT result type={type(result).__name__}")
|
||||
if isinstance(result, list):
|
||||
debug(f"[add-file] INPUT result is list with {len(result)} items")
|
||||
@@ -235,6 +277,14 @@ class Add_File(Cmdlet):
|
||||
failures += 1
|
||||
continue
|
||||
|
||||
is_url_target = isinstance(media_path_or_url, str) and str(media_path_or_url).lower().startswith(
|
||||
("http://", "https://", "magnet:", "torrent:")
|
||||
)
|
||||
if use_steps and (not steps_started) and (not is_url_target):
|
||||
progress.begin_steps(3)
|
||||
progress.step("resolving source")
|
||||
steps_started = True
|
||||
|
||||
# Update pipe_obj with resolved path
|
||||
pipe_obj.path = str(media_path_or_url)
|
||||
|
||||
@@ -300,13 +350,34 @@ class Add_File(Cmdlet):
|
||||
pass
|
||||
|
||||
temp_dir_to_cleanup = Path(tempfile.mkdtemp(prefix="medios_openlibrary_"))
|
||||
|
||||
# Wire OpenLibrary download progress into pipeline Live UI (no tqdm spam).
|
||||
def _ol_progress(kind: str, completed: int, total: Optional[int], label: str) -> None:
|
||||
try:
|
||||
if kind == "pages" and total:
|
||||
progress.set_status(f"downloading pages {completed}/{total}")
|
||||
progress.set_percent(int(round((completed / max(1, total)) * 100.0)))
|
||||
elif kind == "bytes" and total:
|
||||
progress.set_status(f"downloading {label} {completed}/{total} bytes")
|
||||
progress.set_percent(int(round((completed / max(1, total)) * 100.0)))
|
||||
else:
|
||||
progress.set_status("downloading")
|
||||
except Exception:
|
||||
return
|
||||
|
||||
try:
|
||||
progress.set_percent(0)
|
||||
progress.set_status("downloading openlibrary")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
sr = SearchResult(
|
||||
table="openlibrary",
|
||||
title=str(getattr(pipe_obj, "title", None) or "Unknown"),
|
||||
path=str(media_path_or_url),
|
||||
full_metadata=full_metadata if isinstance(full_metadata, dict) else {},
|
||||
)
|
||||
downloaded = provider.download(sr, temp_dir_to_cleanup)
|
||||
downloaded = provider.download(sr, temp_dir_to_cleanup, progress_callback=_ol_progress)
|
||||
if downloaded is None:
|
||||
log("[add-file] OpenLibrary download failed", file=sys.stderr)
|
||||
failures += 1
|
||||
@@ -325,6 +396,13 @@ class Add_File(Cmdlet):
|
||||
pipe_obj.path = str(downloaded_path)
|
||||
delete_after_item = True
|
||||
|
||||
try:
|
||||
if ui is not None:
|
||||
ui.set_pipe_percent(int(pipe_idx), 100)
|
||||
ui.set_pipe_status_text(int(pipe_idx), "downloaded")
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# For non-provider URLs, or if still a URL after provider attempt, delegate to download-media.
|
||||
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
|
||||
("http://", "https://", "magnet:", "torrent:")
|
||||
@@ -562,6 +640,10 @@ class Add_File(Cmdlet):
|
||||
failures += 1
|
||||
continue
|
||||
|
||||
if use_steps and steps_started and (not step2_done):
|
||||
progress.step("writing destination")
|
||||
step2_done = True
|
||||
|
||||
if code == 0:
|
||||
successes += 1
|
||||
else:
|
||||
@@ -619,6 +701,9 @@ class Add_File(Cmdlet):
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
if use_steps and steps_started:
|
||||
progress.step("finalized")
|
||||
|
||||
if successes > 0:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
@@ -34,6 +34,19 @@ class Add_Url(sh.Cmdlet):
|
||||
"""Add URL to file via hash+store backend."""
|
||||
parsed = sh.parse_cmdlet_args(args, self)
|
||||
|
||||
# Compatibility/piping fix:
|
||||
# `SharedArgs.QUERY` is positional in the shared parser, so `add-url <url>`
|
||||
# (and `@N | add-url <url>`) can mistakenly parse the URL into `query`.
|
||||
# If `url` is missing and `query` looks like an http(s) URL, treat it as `url`.
|
||||
try:
|
||||
if (not parsed.get("url")) and isinstance(parsed.get("query"), str):
|
||||
q = str(parsed.get("query") or "").strip()
|
||||
if q.startswith(("http://", "https://")):
|
||||
parsed["url"] = q
|
||||
parsed.pop("query", None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
query_hash = sh.parse_single_hash_query(parsed.get("query"))
|
||||
if parsed.get("query") and not query_hash:
|
||||
log("Error: -query must be of the form hash:<sha256>")
|
||||
|
||||
@@ -29,7 +29,7 @@ class Delete_Url(Cmdlet):
|
||||
arg=[
|
||||
SharedArgs.QUERY,
|
||||
SharedArgs.STORE,
|
||||
CmdletArg("url", required=True, description="URL to remove"),
|
||||
CmdletArg("url", required=False, description="URL to remove (optional when piping url rows)"),
|
||||
],
|
||||
detail=[
|
||||
"- Removes URL association from file identified by hash+store",
|
||||
@@ -69,22 +69,24 @@ class Delete_Url(Cmdlet):
|
||||
log("Error: No store name provided")
|
||||
return 1
|
||||
|
||||
if not url_arg:
|
||||
log("Error: No URL provided")
|
||||
return 1
|
||||
|
||||
# Normalize hash (single-item mode)
|
||||
if not results and file_hash:
|
||||
file_hash = normalize_hash(file_hash)
|
||||
if not file_hash:
|
||||
log("Error: Invalid hash format")
|
||||
return 1
|
||||
|
||||
# Parse url (comma-separated)
|
||||
urls = [u.strip() for u in str(url_arg).split(',') if u.strip()]
|
||||
if not urls:
|
||||
log("Error: No valid url provided")
|
||||
return 1
|
||||
|
||||
from metadata import normalize_urls
|
||||
|
||||
def _urls_from_arg(raw: Any) -> List[str]:
|
||||
if raw is None:
|
||||
return []
|
||||
# Support comma-separated input for backwards compatibility
|
||||
if isinstance(raw, str) and "," in raw:
|
||||
return [u.strip() for u in raw.split(",") if u.strip()]
|
||||
return [u.strip() for u in normalize_urls(raw) if str(u).strip()]
|
||||
|
||||
urls_from_cli = _urls_from_arg(url_arg)
|
||||
|
||||
# Get backend and delete url
|
||||
try:
|
||||
@@ -145,7 +147,17 @@ class Delete_Url(Cmdlet):
|
||||
)
|
||||
continue
|
||||
|
||||
batch.setdefault(store_text, []).append((normalized, list(urls)))
|
||||
# Determine which URLs to delete.
|
||||
# - If user passed an explicit <url>, apply it to all items.
|
||||
# - Otherwise, when piping url rows from get-url, delete the url(s) from each item.
|
||||
item_urls = list(urls_from_cli)
|
||||
if not item_urls:
|
||||
item_urls = [u.strip() for u in normalize_urls(get_field(item, "url") or get_field(item, "source_url")) if str(u).strip()]
|
||||
if not item_urls:
|
||||
ctx.print_if_visible("[delete-url] Warning: Item has no url field; skipping", file=sys.stderr)
|
||||
continue
|
||||
|
||||
batch.setdefault(store_text, []).append((normalized, item_urls))
|
||||
|
||||
for store_text, pairs in batch.items():
|
||||
try:
|
||||
@@ -168,24 +180,39 @@ class Delete_Url(Cmdlet):
|
||||
for h, ulist in bulk_pairs:
|
||||
backend.delete_url(h, ulist, config=config)
|
||||
|
||||
deleted_count = 0
|
||||
for _h, ulist in bulk_pairs:
|
||||
deleted_count += len(ulist or [])
|
||||
ctx.print_if_visible(
|
||||
f"✓ delete-url: {len(urls)} url(s) for {len(bulk_pairs)} item(s) in '{store_text}'",
|
||||
f"✓ delete-url: {deleted_count} url(s) for {len(bulk_pairs)} item(s) in '{store_text}'",
|
||||
file=sys.stderr,
|
||||
)
|
||||
|
||||
for item in pass_through:
|
||||
existing = get_field(item, "url")
|
||||
_set_item_url(item, _remove_urls(existing, list(urls)))
|
||||
# In batch mode we removed the union of requested urls for the file.
|
||||
# Using urls_from_cli (if present) matches the user's explicit intent; otherwise
|
||||
# remove the piped url row(s).
|
||||
remove_set = urls_from_cli
|
||||
if not remove_set:
|
||||
remove_set = [u.strip() for u in normalize_urls(get_field(item, "url") or get_field(item, "source_url")) if str(u).strip()]
|
||||
_set_item_url(item, _remove_urls(existing, list(remove_set)))
|
||||
ctx.emit(item)
|
||||
return 0
|
||||
|
||||
# Single-item mode
|
||||
if not urls_from_cli:
|
||||
urls_from_cli = [u.strip() for u in normalize_urls(get_field(result, "url") or get_field(result, "source_url")) if str(u).strip()]
|
||||
if not urls_from_cli:
|
||||
log("Error: No URL provided")
|
||||
return 1
|
||||
|
||||
backend = storage[str(store_name)]
|
||||
backend.delete_url(str(file_hash), urls, config=config)
|
||||
ctx.print_if_visible(f"✓ delete-url: {len(urls)} url(s) removed", file=sys.stderr)
|
||||
backend.delete_url(str(file_hash), list(urls_from_cli), config=config)
|
||||
ctx.print_if_visible(f"✓ delete-url: {len(urls_from_cli)} url(s) removed", file=sys.stderr)
|
||||
if result is not None:
|
||||
existing = get_field(result, "url")
|
||||
_set_item_url(result, _remove_urls(existing, list(urls)))
|
||||
_set_item_url(result, _remove_urls(existing, list(urls_from_cli)))
|
||||
ctx.emit(result)
|
||||
return 0
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
@@ -126,7 +126,7 @@ class Get_File(sh.Cmdlet):
|
||||
except Exception as exc:
|
||||
log(f"Error opening browser: {exc}", file=sys.stderr)
|
||||
else:
|
||||
log(f"Opened in browser: {source_path}", file=sys.stderr)
|
||||
debug(f"Opened in browser: {source_path}", file=sys.stderr)
|
||||
|
||||
# Emit result for pipeline
|
||||
ctx.emit({
|
||||
|
||||
@@ -47,6 +47,210 @@ except ImportError:
|
||||
extract_title = None
|
||||
|
||||
|
||||
def _dedup_tags_preserve_order(tags: List[str]) -> List[str]:
|
||||
"""Deduplicate tags case-insensitively while preserving order."""
|
||||
out: List[str] = []
|
||||
seen: set[str] = set()
|
||||
for t in tags or []:
|
||||
if not isinstance(t, str):
|
||||
continue
|
||||
s = t.strip()
|
||||
if not s:
|
||||
continue
|
||||
key = s.lower()
|
||||
if key in seen:
|
||||
continue
|
||||
seen.add(key)
|
||||
out.append(s)
|
||||
return out
|
||||
|
||||
|
||||
def _extract_subtitle_tags(info: Dict[str, Any]) -> List[str]:
|
||||
"""Extract subtitle availability tags from a yt-dlp info dict.
|
||||
|
||||
Produces multi-valued tags so languages can coexist:
|
||||
- subs:<lang>
|
||||
- subs_auto:<lang>
|
||||
"""
|
||||
def _langs(value: Any) -> List[str]:
|
||||
if not isinstance(value, dict):
|
||||
return []
|
||||
langs: List[str] = []
|
||||
for k in value.keys():
|
||||
if not isinstance(k, str):
|
||||
continue
|
||||
lang = k.strip().lower()
|
||||
if lang:
|
||||
langs.append(lang)
|
||||
return sorted(set(langs))
|
||||
|
||||
out: List[str] = []
|
||||
for lang in _langs(info.get("subtitles")):
|
||||
out.append(f"subs:{lang}")
|
||||
for lang in _langs(info.get("automatic_captions")):
|
||||
out.append(f"subs_auto:{lang}")
|
||||
return out
|
||||
|
||||
|
||||
def _scrape_ytdlp_info(url: str) -> Optional[Dict[str, Any]]:
|
||||
"""Fetch a yt-dlp info dict without downloading media."""
|
||||
if not isinstance(url, str) or not url.strip():
|
||||
return None
|
||||
url = url.strip()
|
||||
|
||||
# Prefer the Python module when available (faster, avoids shell quoting issues).
|
||||
try:
|
||||
import yt_dlp # type: ignore
|
||||
opts: Any = {
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"skip_download": True,
|
||||
"noprogress": True,
|
||||
"socket_timeout": 15,
|
||||
"retries": 1,
|
||||
"playlist_items": "1-10",
|
||||
}
|
||||
with yt_dlp.YoutubeDL(opts) as ydl:
|
||||
info = ydl.extract_info(url, download=False)
|
||||
return info if isinstance(info, dict) else None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback to yt-dlp CLI if the module isn't available.
|
||||
try:
|
||||
import json as json_module
|
||||
cmd = [
|
||||
"yt-dlp",
|
||||
"-J",
|
||||
"--no-warnings",
|
||||
"--skip-download",
|
||||
"--playlist-items",
|
||||
"1-10",
|
||||
url,
|
||||
]
|
||||
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
payload = (result.stdout or "").strip()
|
||||
if not payload:
|
||||
return None
|
||||
data = json_module.loads(payload)
|
||||
return data if isinstance(data, dict) else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_candidate_urls_for_item(
|
||||
result: Any,
|
||||
backend: Any,
|
||||
file_hash: str,
|
||||
config: Dict[str, Any],
|
||||
) -> List[str]:
|
||||
"""Get candidate URLs from backend and/or piped result."""
|
||||
try:
|
||||
from metadata import normalize_urls
|
||||
except Exception:
|
||||
normalize_urls = None # type: ignore[assignment]
|
||||
|
||||
urls: List[str] = []
|
||||
# 1) Backend URL association (best source of truth)
|
||||
try:
|
||||
backend_urls = backend.get_url(file_hash, config=config)
|
||||
if backend_urls:
|
||||
if normalize_urls:
|
||||
urls.extend(normalize_urls(backend_urls))
|
||||
else:
|
||||
urls.extend([str(u).strip() for u in backend_urls if isinstance(u, str) and str(u).strip()])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 2) Backend metadata url field
|
||||
try:
|
||||
meta = backend.get_metadata(file_hash, config=config)
|
||||
if isinstance(meta, dict) and meta.get("url"):
|
||||
if normalize_urls:
|
||||
urls.extend(normalize_urls(meta.get("url")))
|
||||
else:
|
||||
raw = meta.get("url")
|
||||
if isinstance(raw, list):
|
||||
urls.extend([str(u).strip() for u in raw if isinstance(u, str) and str(u).strip()])
|
||||
elif isinstance(raw, str) and raw.strip():
|
||||
urls.append(raw.strip())
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# 3) Piped result fields
|
||||
def _get(obj: Any, key: str, default: Any = None) -> Any:
|
||||
if isinstance(obj, dict):
|
||||
return obj.get(key, default)
|
||||
return getattr(obj, key, default)
|
||||
|
||||
for key in ("url", "webpage_url", "source_url", "target"):
|
||||
val = _get(result, key, None)
|
||||
if not val:
|
||||
continue
|
||||
if normalize_urls:
|
||||
urls.extend(normalize_urls(val))
|
||||
continue
|
||||
if isinstance(val, str) and val.strip():
|
||||
urls.append(val.strip())
|
||||
elif isinstance(val, list):
|
||||
urls.extend([str(u).strip() for u in val if isinstance(u, str) and str(u).strip()])
|
||||
|
||||
meta_field = _get(result, "metadata", None)
|
||||
if isinstance(meta_field, dict) and meta_field.get("url"):
|
||||
val = meta_field.get("url")
|
||||
if normalize_urls:
|
||||
urls.extend(normalize_urls(val))
|
||||
elif isinstance(val, list):
|
||||
urls.extend([str(u).strip() for u in val if isinstance(u, str) and str(u).strip()])
|
||||
elif isinstance(val, str) and val.strip():
|
||||
urls.append(val.strip())
|
||||
|
||||
# Dedup
|
||||
return _dedup_tags_preserve_order(urls)
|
||||
|
||||
|
||||
def _pick_supported_ytdlp_url(urls: List[str]) -> Optional[str]:
|
||||
"""Pick the first URL that looks supported by yt-dlp (best effort)."""
|
||||
if not urls:
|
||||
return None
|
||||
|
||||
def _is_hydrus_file_url(u: str) -> bool:
|
||||
text = str(u or "").strip().lower()
|
||||
if not text:
|
||||
return False
|
||||
# Hydrus-local file URLs are retrievable blobs, not original source pages.
|
||||
# yt-dlp generally can't extract meaningful metadata from these.
|
||||
return ("/get_files/file" in text) and ("hash=" in text)
|
||||
|
||||
http_urls: List[str] = []
|
||||
for u in urls:
|
||||
text = str(u or "").strip()
|
||||
if text.lower().startswith(("http://", "https://")):
|
||||
http_urls.append(text)
|
||||
|
||||
# Prefer non-Hydrus URLs for yt-dlp scraping.
|
||||
candidates = [u for u in http_urls if not _is_hydrus_file_url(u)]
|
||||
if not candidates:
|
||||
return None
|
||||
|
||||
# Prefer a true support check when the Python module is available.
|
||||
try:
|
||||
from SYS.download import is_url_supported_by_ytdlp
|
||||
for text in candidates:
|
||||
try:
|
||||
if is_url_supported_by_ytdlp(text):
|
||||
return text
|
||||
except Exception:
|
||||
continue
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Fallback: use the first non-Hydrus http(s) URL and let extraction decide.
|
||||
return candidates[0] if candidates else None
|
||||
|
||||
|
||||
_scrape_isbn_metadata = _ol_scrape_isbn_metadata # type: ignore[assignment]
|
||||
_scrape_openlibrary_metadata = _ol_scrape_openlibrary_metadata # type: ignore[assignment]
|
||||
|
||||
@@ -853,7 +1057,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
scrape_url = parsed_args.get("scrape")
|
||||
scrape_requested = scrape_flag_present or scrape_url is not None
|
||||
|
||||
if scrape_requested and (not scrape_url or str(scrape_url).strip() == ""):
|
||||
# Convenience: `-scrape` with no value defaults to `ytdlp` (store-backed URL scrape).
|
||||
if scrape_flag_present and (scrape_url is None or str(scrape_url).strip() == ""):
|
||||
scrape_url = "ytdlp"
|
||||
scrape_requested = True
|
||||
|
||||
if scrape_requested and (scrape_url is None or str(scrape_url).strip() == ""):
|
||||
log("-scrape requires a URL or provider name", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
@@ -861,6 +1070,123 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
if scrape_requested and scrape_url:
|
||||
import json as json_module
|
||||
|
||||
if str(scrape_url).strip().lower() == "ytdlp":
|
||||
# Scrape metadata from the selected item's URL via yt-dlp (no download),
|
||||
# then OVERWRITE all existing tags (including title:).
|
||||
#
|
||||
# This mode requires a store-backed item (hash + store).
|
||||
#
|
||||
# NOTE: We intentionally do not reuse _scrape_url_metadata() here because it
|
||||
# performs namespace deduplication that would collapse multi-valued tags.
|
||||
file_hash = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash", None))
|
||||
store_name = get_field(result, "store", None)
|
||||
subject_path = get_field(result, "path", None) or get_field(result, "target", None) or get_field(result, "filename", None)
|
||||
item_title = get_field(result, "title", None) or get_field(result, "name", None) or get_field(result, "filename", None)
|
||||
|
||||
# Only run overwrite-apply when the item is store-backed.
|
||||
# If this is a URL-only PipeObject, fall through to provider mode below.
|
||||
if file_hash and store_name and str(file_hash).strip().lower() != "unknown" and str(store_name).strip().upper() not in {"PATH", "URL"}:
|
||||
try:
|
||||
from Store import Store
|
||||
storage = Store(config)
|
||||
backend = storage[str(store_name)]
|
||||
except Exception as exc:
|
||||
log(f"Failed to resolve store backend '{store_name}': {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
candidate_urls = _resolve_candidate_urls_for_item(result, backend, file_hash, config)
|
||||
scrape_target = _pick_supported_ytdlp_url(candidate_urls)
|
||||
if not scrape_target:
|
||||
log(
|
||||
"No yt-dlp-supported source URL found for this item (Hydrus /get_files/file URLs are ignored). ",
|
||||
file=sys.stderr,
|
||||
)
|
||||
log(
|
||||
"Add the original page URL to the file (e.g. via add-url), then retry get-tag -scrape.",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
info = _scrape_ytdlp_info(scrape_target)
|
||||
if not info:
|
||||
log("yt-dlp could not extract metadata for this URL (unsupported or failed)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
from metadata import extract_ytdlp_tags
|
||||
except Exception:
|
||||
extract_ytdlp_tags = None # type: ignore[assignment]
|
||||
|
||||
# Prefer the top-level metadata, but if this is a playlist container, use
|
||||
# the first entry for per-item fields like subtitles.
|
||||
info_for_subs = info
|
||||
entries = info.get("entries") if isinstance(info, dict) else None
|
||||
if isinstance(entries, list) and entries:
|
||||
first = entries[0]
|
||||
if isinstance(first, dict):
|
||||
info_for_subs = first
|
||||
|
||||
tags: List[str] = []
|
||||
if extract_ytdlp_tags:
|
||||
try:
|
||||
tags.extend(extract_ytdlp_tags(info))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Subtitle availability tags
|
||||
try:
|
||||
tags.extend(_extract_subtitle_tags(info_for_subs if isinstance(info_for_subs, dict) else {}))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Ensure we actually have something to apply.
|
||||
tags = _dedup_tags_preserve_order(tags)
|
||||
if not tags:
|
||||
log("No tags extracted from yt-dlp metadata", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Full overwrite: delete all existing tags, then add the new set.
|
||||
try:
|
||||
existing_tags, _src = backend.get_tag(file_hash, config=config)
|
||||
except Exception:
|
||||
existing_tags = []
|
||||
try:
|
||||
if existing_tags:
|
||||
backend.delete_tag(file_hash, list(existing_tags), config=config)
|
||||
except Exception as exc:
|
||||
debug(f"[get_tag] ytdlp overwrite: delete_tag failed: {exc}")
|
||||
try:
|
||||
backend.add_tag(file_hash, list(tags), config=config)
|
||||
except Exception as exc:
|
||||
log(f"Failed to apply yt-dlp tags: {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Show updated tags
|
||||
try:
|
||||
updated_tags, _src = backend.get_tag(file_hash, config=config)
|
||||
except Exception:
|
||||
updated_tags = tags
|
||||
if not updated_tags:
|
||||
updated_tags = tags
|
||||
|
||||
_emit_tags_as_table(
|
||||
tags_list=list(updated_tags),
|
||||
file_hash=file_hash,
|
||||
store=str(store_name),
|
||||
service_name=None,
|
||||
config=config,
|
||||
item_title=str(item_title or "ytdlp"),
|
||||
path=str(subject_path) if subject_path else None,
|
||||
subject={
|
||||
"hash": file_hash,
|
||||
"store": str(store_name),
|
||||
"path": str(subject_path) if subject_path else None,
|
||||
"title": item_title,
|
||||
"extra": {"applied_provider": "ytdlp", "scrape_url": scrape_target},
|
||||
},
|
||||
)
|
||||
return 0
|
||||
|
||||
if scrape_url.startswith("http://") or scrape_url.startswith("https://"):
|
||||
# URL scraping (existing behavior)
|
||||
title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url)
|
||||
@@ -951,7 +1277,16 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
else:
|
||||
combined_query = f"{title_hint} {artist_hint}"
|
||||
|
||||
query_hint = identifier_query or combined_query or title_hint
|
||||
# yt-dlp isn't a search provider; it requires a URL.
|
||||
url_hint: Optional[str] = None
|
||||
if provider.name == "ytdlp":
|
||||
raw_url = get_field(result, "url", None) or get_field(result, "source_url", None) or get_field(result, "target", None)
|
||||
if isinstance(raw_url, list) and raw_url:
|
||||
raw_url = raw_url[0]
|
||||
if isinstance(raw_url, str) and raw_url.strip().startswith(("http://", "https://")):
|
||||
url_hint = raw_url.strip()
|
||||
|
||||
query_hint = url_hint or identifier_query or combined_query or title_hint
|
||||
if not query_hint:
|
||||
log("No title or identifier available to search for metadata", file=sys.stderr)
|
||||
return 1
|
||||
@@ -967,6 +1302,27 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
if not items:
|
||||
log("No metadata results found", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# For yt-dlp, emit tags directly (there is no meaningful multi-result selection step).
|
||||
if provider.name == "ytdlp":
|
||||
try:
|
||||
tags = [str(t) for t in provider.to_tags(items[0]) if t is not None]
|
||||
except Exception:
|
||||
tags = []
|
||||
if not tags:
|
||||
log("No tags extracted from yt-dlp metadata", file=sys.stderr)
|
||||
return 1
|
||||
_emit_tags_as_table(
|
||||
tags_list=list(tags),
|
||||
file_hash=None,
|
||||
store="url",
|
||||
service_name=None,
|
||||
config=config,
|
||||
item_title=str(items[0].get("title") or "ytdlp"),
|
||||
path=None,
|
||||
subject={"provider": "ytdlp", "url": str(query_hint)},
|
||||
)
|
||||
return 0
|
||||
|
||||
from result_table import ResultTable
|
||||
table = ResultTable(f"Metadata: {provider.name}")
|
||||
@@ -1040,7 +1396,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
return 0
|
||||
|
||||
# Apply tags to the store backend (no sidecar writing here).
|
||||
apply_tags = _filter_scraped_tags([str(t) for t in result_tags if t is not None])
|
||||
if str(result_provider).strip().lower() == "ytdlp":
|
||||
apply_tags = [str(t) for t in result_tags if t is not None]
|
||||
else:
|
||||
apply_tags = _filter_scraped_tags([str(t) for t in result_tags if t is not None])
|
||||
if not apply_tags:
|
||||
log("No applicable scraped tags to apply (title:/artist:/source: are skipped)", file=sys.stderr)
|
||||
return 0
|
||||
@@ -1167,6 +1526,11 @@ try:
|
||||
except Exception:
|
||||
_SCRAPE_CHOICES = ["itunes", "openlibrary", "googlebooks", "google", "musicbrainz"]
|
||||
|
||||
# Special scrape mode: pull tags from an item's URL via yt-dlp (no download)
|
||||
if "ytdlp" not in _SCRAPE_CHOICES:
|
||||
_SCRAPE_CHOICES.append("ytdlp")
|
||||
_SCRAPE_CHOICES = sorted(_SCRAPE_CHOICES)
|
||||
|
||||
|
||||
class Get_Tag(Cmdlet):
|
||||
"""Class-based get-tag cmdlet with self-registration."""
|
||||
@@ -1195,7 +1559,7 @@ class Get_Tag(Cmdlet):
|
||||
CmdletArg(
|
||||
name="-scrape",
|
||||
type="string",
|
||||
description="Scrape metadata from URL or provider name (returns tags as JSON or table)",
|
||||
description="Scrape metadata from URL/provider, or use 'ytdlp' to scrape from the item's URL and overwrite tags",
|
||||
required=False,
|
||||
choices=_SCRAPE_CHOICES,
|
||||
)
|
||||
|
||||
@@ -14,10 +14,11 @@ import httpx
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
from urllib.parse import urlsplit, quote, urljoin
|
||||
from urllib.parse import urlsplit, quote, urljoin, unquote
|
||||
|
||||
from SYS.logger import log, debug
|
||||
from API.HTTP import HTTPClient
|
||||
from SYS.pipeline_progress import PipelineProgress
|
||||
from SYS.utils import ensure_directory, unique_path, unique_preserve_order
|
||||
from . import _shared as sh
|
||||
|
||||
@@ -31,54 +32,6 @@ get_field = sh.get_field
|
||||
parse_cmdlet_args = sh.parse_cmdlet_args
|
||||
import pipeline as pipeline_context
|
||||
|
||||
|
||||
def _live_ui_and_pipe_index() -> tuple[Optional[Any], int]:
|
||||
ui = None
|
||||
try:
|
||||
ui = pipeline_context.get_live_progress() if hasattr(pipeline_context, "get_live_progress") else None
|
||||
except Exception:
|
||||
ui = None
|
||||
|
||||
pipe_idx: int = 0
|
||||
try:
|
||||
stage_ctx = pipeline_context.get_stage_context() if hasattr(pipeline_context, "get_stage_context") else None
|
||||
maybe_idx = getattr(stage_ctx, "pipe_index", None) if stage_ctx is not None else None
|
||||
if isinstance(maybe_idx, int):
|
||||
pipe_idx = int(maybe_idx)
|
||||
except Exception:
|
||||
pipe_idx = 0
|
||||
|
||||
return ui, pipe_idx
|
||||
|
||||
|
||||
def _begin_live_steps(total_steps: int) -> None:
|
||||
"""Declare the total number of steps for this cmdlet run (per-pipe)."""
|
||||
ui, pipe_idx = _live_ui_and_pipe_index()
|
||||
if ui is None:
|
||||
return
|
||||
try:
|
||||
begin = getattr(ui, "begin_pipe_steps", None)
|
||||
if callable(begin):
|
||||
begin(int(pipe_idx), total_steps=int(total_steps))
|
||||
except Exception:
|
||||
return
|
||||
|
||||
|
||||
def _step(text: str) -> None:
|
||||
"""Emit a *new* step.
|
||||
|
||||
Each call increments the step counter and advances percent automatically.
|
||||
"""
|
||||
ui, pipe_idx = _live_ui_and_pipe_index()
|
||||
if ui is None:
|
||||
return
|
||||
try:
|
||||
adv = getattr(ui, "advance_pipe_step", None)
|
||||
if callable(adv):
|
||||
adv(int(pipe_idx), str(text))
|
||||
except Exception:
|
||||
return
|
||||
|
||||
# ============================================================================
|
||||
# CMDLET Metadata Declaration
|
||||
# ============================================================================
|
||||
@@ -115,6 +68,10 @@ USER_AGENT = (
|
||||
DEFAULT_VIEWPORT: dict[str, int] = {"width": 1920, "height": 1080}
|
||||
ARCHIVE_TIMEOUT = 30.0
|
||||
|
||||
# WebP has a hard maximum dimension per side.
|
||||
# Pillow typically fails with: "encoding error 5: Image size exceeds WebP limit of 16383 pixels"
|
||||
WEBP_MAX_DIM = 16_383
|
||||
|
||||
# Configurable selectors for specific websites
|
||||
SITE_SELECTORS: Dict[str, List[str]] = {
|
||||
"twitter.com": [
|
||||
@@ -200,6 +157,80 @@ def _slugify_url(url: str) -> str:
|
||||
return slug[:100]
|
||||
|
||||
|
||||
def _tags_from_url(url: str) -> List[str]:
|
||||
"""Derive simple tags from a URL.
|
||||
|
||||
- site:<domain> (strips leading www.)
|
||||
- title:<slug> derived from the last path segment, with extension removed
|
||||
and separators (-, _, %) normalized to spaces.
|
||||
"""
|
||||
|
||||
u = str(url or "").strip()
|
||||
if not u:
|
||||
return []
|
||||
|
||||
parsed = None
|
||||
try:
|
||||
parsed = urlsplit(u)
|
||||
host = str(getattr(parsed, "hostname", None) or getattr(parsed, "netloc", "") or "").strip().lower()
|
||||
except Exception:
|
||||
parsed = None
|
||||
host = ""
|
||||
|
||||
if host:
|
||||
# Drop credentials and port if present.
|
||||
if "@" in host:
|
||||
host = host.rsplit("@", 1)[-1]
|
||||
if ":" in host:
|
||||
host = host.split(":", 1)[0]
|
||||
if host.startswith("www."):
|
||||
host = host[len("www.") :]
|
||||
|
||||
path = ""
|
||||
if parsed is not None:
|
||||
try:
|
||||
path = str(getattr(parsed, "path", "") or "")
|
||||
except Exception:
|
||||
path = ""
|
||||
|
||||
last = ""
|
||||
if path:
|
||||
try:
|
||||
last = path.rsplit("/", 1)[-1]
|
||||
except Exception:
|
||||
last = ""
|
||||
|
||||
try:
|
||||
last = unquote(last or "")
|
||||
except Exception:
|
||||
last = last or ""
|
||||
|
||||
if last and "." in last:
|
||||
# Drop a single trailing extension (e.g. .html, .php).
|
||||
last = last.rsplit(".", 1)[0]
|
||||
|
||||
for sep in ("_", "-", "%"):
|
||||
if last and sep in last:
|
||||
last = last.replace(sep, " ")
|
||||
|
||||
title = " ".join(str(last or "").split()).strip().lower()
|
||||
|
||||
tags: List[str] = []
|
||||
if host:
|
||||
tags.append(f"site:{host}")
|
||||
if title:
|
||||
tags.append(f"title:{title}")
|
||||
return tags
|
||||
|
||||
|
||||
def _title_from_url(url: str) -> str:
|
||||
"""Return the normalized title derived from a URL's last path segment."""
|
||||
for t in _tags_from_url(url):
|
||||
if str(t).lower().startswith("title:"):
|
||||
return str(t)[len("title:") :].strip()
|
||||
return ""
|
||||
|
||||
|
||||
def _normalise_format(fmt: Optional[str]) -> str:
|
||||
"""Normalize output format to valid values."""
|
||||
if not fmt:
|
||||
@@ -218,6 +249,89 @@ def _format_suffix(fmt: str) -> str:
|
||||
return ".jpg"
|
||||
return f".{fmt}"
|
||||
|
||||
|
||||
def _convert_to_webp(
|
||||
src_png: Path,
|
||||
dst_webp: Path,
|
||||
*,
|
||||
quality: int = 90,
|
||||
method: int = 6,
|
||||
max_dim: int = WEBP_MAX_DIM,
|
||||
downscale_if_oversize: bool = True,
|
||||
) -> bool:
|
||||
"""Convert a PNG screenshot to WebP via Pillow.
|
||||
|
||||
Playwright does not currently support emitting WebP directly.
|
||||
"""
|
||||
if not src_png or not Path(src_png).is_file():
|
||||
raise ScreenshotError(f"Source image not found: {src_png}")
|
||||
|
||||
dst_webp = Path(dst_webp)
|
||||
try:
|
||||
dst_webp.parent.mkdir(parents=True, exist_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
from PIL import Image
|
||||
except Exception as exc:
|
||||
raise ScreenshotError(f"Pillow is required for webp conversion: {exc}") from exc
|
||||
|
||||
# Write atomically to avoid partial files if conversion is interrupted.
|
||||
tmp_path = unique_path(dst_webp.with_suffix(".tmp.webp"))
|
||||
try:
|
||||
with Image.open(src_png) as im:
|
||||
did_downscale = False
|
||||
save_kwargs: Dict[str, Any] = {
|
||||
"format": "WEBP",
|
||||
"quality": int(quality),
|
||||
"method": int(method),
|
||||
}
|
||||
|
||||
# Preserve alpha when present; Pillow handles it for WEBP.
|
||||
# Normalize palette images to RGBA to avoid odd palette artifacts.
|
||||
if im.mode == "P":
|
||||
im = im.convert("RGBA")
|
||||
|
||||
# WebP enforces a hard max dimension per side (16383px).
|
||||
# When full-page captures are very tall, downscale proportionally to fit.
|
||||
try:
|
||||
w, h = im.size
|
||||
except Exception:
|
||||
w, h = 0, 0
|
||||
|
||||
if downscale_if_oversize and isinstance(max_dim, int) and max_dim > 0 and (w > max_dim or h > max_dim):
|
||||
scale = 1.0
|
||||
try:
|
||||
scale = min(float(max_dim) / float(w), float(max_dim) / float(h))
|
||||
except Exception:
|
||||
scale = 1.0
|
||||
|
||||
if scale > 0.0 and scale < 1.0:
|
||||
new_w = max(1, int(w * scale))
|
||||
new_h = max(1, int(h * scale))
|
||||
debug(
|
||||
f"[_convert_to_webp] Image exceeds WebP limit ({w}x{h}); downscaling -> {new_w}x{new_h}"
|
||||
)
|
||||
try:
|
||||
resample = getattr(getattr(Image, "Resampling", Image), "LANCZOS", None)
|
||||
if resample is None:
|
||||
resample = getattr(Image, "LANCZOS", 1)
|
||||
im = im.resize((new_w, new_h), resample=resample)
|
||||
did_downscale = True
|
||||
except Exception as exc:
|
||||
debug(f"[_convert_to_webp] Downscale failed; attempting direct WEBP save anyway: {exc}")
|
||||
|
||||
im.save(tmp_path, **save_kwargs)
|
||||
|
||||
tmp_path.replace(dst_webp)
|
||||
return bool(did_downscale)
|
||||
finally:
|
||||
try:
|
||||
tmp_path.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
def _matched_site_selectors(url: str) -> List[str]:
|
||||
"""Return SITE_SELECTORS for a matched domain; empty if no match.
|
||||
|
||||
@@ -231,6 +345,16 @@ def _matched_site_selectors(url: str) -> List[str]:
|
||||
return sels
|
||||
|
||||
|
||||
def _selectors_for_url(url: str) -> List[str]:
|
||||
"""Return selectors to try for a URL.
|
||||
|
||||
For now, prefer a minimal behavior: only return known SITE_SELECTORS.
|
||||
(The cmdlet already falls back to full-page capture when no selectors match.)
|
||||
"""
|
||||
|
||||
return _matched_site_selectors(url)
|
||||
|
||||
|
||||
def _platform_preprocess(url: str, page: Any, warnings: List[str], timeout_ms: int = 10_000) -> None:
|
||||
"""Best-effort page tweaks for popular platforms before capture."""
|
||||
try:
|
||||
@@ -366,11 +490,11 @@ def _prepare_output_path(options: ScreenshotOptions) -> Path:
|
||||
return unique_path(path)
|
||||
|
||||
|
||||
def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str]) -> None:
|
||||
def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str], progress: PipelineProgress) -> None:
|
||||
"""Capture screenshot using Playwright."""
|
||||
debug(f"[_capture] Starting capture for {options.url} -> {destination}")
|
||||
try:
|
||||
_step("loading launching browser")
|
||||
progress.step("loading launching browser")
|
||||
tool = options.playwright_tool or PlaywrightTool({})
|
||||
|
||||
# Ensure Chromium engine is used for the screen-shot cmdlet (force for consistency)
|
||||
@@ -405,16 +529,16 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
|
||||
|
||||
try:
|
||||
with tool.open_page(headless=headless) as page:
|
||||
_step("loading navigating")
|
||||
progress.step("loading navigating")
|
||||
debug(f"Navigating to {options.url}...")
|
||||
try:
|
||||
tool.goto(page, options.url)
|
||||
debug("Page loaded successfully")
|
||||
_step("loading page loaded")
|
||||
progress.step("loading page loaded")
|
||||
except PlaywrightTimeoutError:
|
||||
warnings.append("navigation timeout; capturing current page state")
|
||||
debug("Navigation timeout; proceeding with current state")
|
||||
_step("loading navigation timeout")
|
||||
progress.step("loading navigation timeout")
|
||||
|
||||
# Skip article lookup by default (wait_for_article defaults to False)
|
||||
if options.wait_for_article:
|
||||
@@ -430,9 +554,9 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
|
||||
debug(f"Waiting {options.wait_after_load}s for page stabilization...")
|
||||
time.sleep(min(10.0, max(0.0, options.wait_after_load)))
|
||||
|
||||
_step("loading stabilized")
|
||||
progress.step("loading stabilized")
|
||||
|
||||
_step("capturing preparing")
|
||||
progress.step("capturing preparing")
|
||||
if options.replace_video_posters:
|
||||
debug("Replacing video elements with posters...")
|
||||
page.evaluate(
|
||||
@@ -453,7 +577,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
|
||||
if options.prefer_platform_target and format_name != "pdf":
|
||||
debug(f"[_capture] Target capture enabled")
|
||||
debug("Attempting platform-specific content capture...")
|
||||
_step("capturing locating target")
|
||||
progress.step("capturing locating target")
|
||||
try:
|
||||
_platform_preprocess(options.url, page, warnings)
|
||||
except Exception as e:
|
||||
@@ -478,7 +602,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
|
||||
el.scroll_into_view_if_needed(timeout=1000)
|
||||
except Exception:
|
||||
pass
|
||||
_step("capturing output")
|
||||
progress.step("capturing output")
|
||||
debug(f"Capturing element to {destination}...")
|
||||
el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
|
||||
element_captured = True
|
||||
@@ -489,14 +613,14 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
|
||||
debug(f"Failed to capture element: {exc}")
|
||||
# Fallback to default capture paths
|
||||
if element_captured:
|
||||
_step("capturing saved")
|
||||
progress.step("capturing saved")
|
||||
elif format_name == "pdf":
|
||||
debug("Generating PDF...")
|
||||
page.emulate_media(media="print")
|
||||
_step("capturing output")
|
||||
progress.step("capturing output")
|
||||
page.pdf(path=str(destination), print_background=True)
|
||||
debug(f"PDF saved to {destination}")
|
||||
_step("capturing saved")
|
||||
progress.step("capturing saved")
|
||||
else:
|
||||
debug(f"Capturing full page to {destination}...")
|
||||
screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
|
||||
@@ -504,20 +628,20 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
|
||||
screenshot_kwargs["type"] = "jpeg"
|
||||
screenshot_kwargs["quality"] = 90
|
||||
if options.full_page:
|
||||
_step("capturing output")
|
||||
progress.step("capturing output")
|
||||
page.screenshot(full_page=True, **screenshot_kwargs)
|
||||
else:
|
||||
article = page.query_selector("article")
|
||||
if article is not None:
|
||||
article_kwargs = dict(screenshot_kwargs)
|
||||
article_kwargs.pop("full_page", None)
|
||||
_step("capturing output")
|
||||
progress.step("capturing output")
|
||||
article.screenshot(**article_kwargs)
|
||||
else:
|
||||
_step("capturing output")
|
||||
progress.step("capturing output")
|
||||
page.screenshot(**screenshot_kwargs)
|
||||
debug(f"Screenshot saved to {destination}")
|
||||
_step("capturing saved")
|
||||
progress.step("capturing saved")
|
||||
except Exception as exc:
|
||||
debug(f"[_capture] Exception launching browser/page: {exc}")
|
||||
msg = str(exc).lower()
|
||||
@@ -532,7 +656,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
|
||||
raise ScreenshotError(f"Failed to capture screenshot: {exc}") from exc
|
||||
|
||||
|
||||
def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
|
||||
def _capture_screenshot(options: ScreenshotOptions, progress: PipelineProgress) -> ScreenshotResult:
|
||||
"""Capture a screenshot for the given options."""
|
||||
debug(f"[_capture_screenshot] Preparing capture for {options.url}")
|
||||
requested_format = _normalise_format(options.output_format)
|
||||
@@ -543,8 +667,8 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
|
||||
will_convert = requested_format == "webp"
|
||||
will_archive = bool(options.archive and options.url)
|
||||
total_steps = 9 + (1 if will_target else 0) + (1 if will_convert else 0) + (1 if will_archive else 0)
|
||||
_begin_live_steps(total_steps)
|
||||
_step("loading starting")
|
||||
progress.begin_steps(total_steps)
|
||||
progress.step("loading starting")
|
||||
|
||||
# Playwright screenshots do not natively support WebP output.
|
||||
# Capture as PNG, then convert via Pillow.
|
||||
@@ -553,17 +677,22 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
|
||||
capture_path = unique_path(destination.with_suffix(".png"))
|
||||
debug(f"[_capture_screenshot] Requested webp; capturing intermediate png -> {capture_path}")
|
||||
options.output_format = "png"
|
||||
_capture(options, capture_path, warnings)
|
||||
_capture(options, capture_path, warnings, progress)
|
||||
|
||||
if requested_format == "webp":
|
||||
_step("capturing converting to webp")
|
||||
progress.step("capturing converting to webp")
|
||||
debug(f"[_capture_screenshot] Converting png -> webp: {destination}")
|
||||
try:
|
||||
_convert_to_webp(capture_path, destination)
|
||||
try:
|
||||
capture_path.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
did_downscale = _convert_to_webp(capture_path, destination)
|
||||
if did_downscale:
|
||||
warnings.append(
|
||||
f"webp conversion used downscaling to fit {WEBP_MAX_DIM}px limit; keeping original png: {capture_path.name}"
|
||||
)
|
||||
else:
|
||||
try:
|
||||
capture_path.unlink(missing_ok=True)
|
||||
except Exception:
|
||||
pass
|
||||
except Exception as exc:
|
||||
warnings.append(f"webp conversion failed; keeping png: {exc}")
|
||||
destination = capture_path
|
||||
@@ -572,7 +701,7 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
|
||||
url: List[str] = [options.url] if options.url else []
|
||||
archive_url: List[str] = []
|
||||
if options.archive and options.url:
|
||||
_step("capturing archiving")
|
||||
progress.step("capturing archiving")
|
||||
debug(f"[_capture_screenshot] Archiving enabled for {options.url}")
|
||||
archives, archive_warnings = _archive_url(options.url, options.archive_timeout)
|
||||
archive_url.extend(archives)
|
||||
@@ -580,7 +709,7 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
|
||||
if archives:
|
||||
url = unique_preserve_order([*url, *archives])
|
||||
|
||||
_step("capturing finalized")
|
||||
progress.step("capturing finalized")
|
||||
|
||||
applied_tag = unique_preserve_order(list(tag for tag in options.tag if tag.strip()))
|
||||
|
||||
@@ -627,6 +756,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
)
|
||||
return 1
|
||||
|
||||
progress = PipelineProgress(pipeline_context)
|
||||
|
||||
# ========================================================================
|
||||
# ARGUMENT PARSING
|
||||
# ========================================================================
|
||||
@@ -685,32 +816,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
|
||||
debug(f"[_run] url to process: {[u for u, _ in url_to_process]}")
|
||||
|
||||
# If the caller isn't running the shared pipeline Live progress UI (e.g. direct
|
||||
# cmdlet execution), start a minimal local pipeline progress panel so this cmdlet
|
||||
# still shows step-level progress.
|
||||
local_progress_ui = None
|
||||
try:
|
||||
existing_ui = pipeline_context.get_live_progress() if hasattr(pipeline_context, "get_live_progress") else None
|
||||
except Exception:
|
||||
existing_ui = None
|
||||
try:
|
||||
if existing_ui is None and bool(getattr(sys.stderr, "isatty", lambda: False)()):
|
||||
from models import PipelineLiveProgress
|
||||
|
||||
local_progress_ui = PipelineLiveProgress(["screen-shot"], enabled=True)
|
||||
local_progress_ui.start()
|
||||
try:
|
||||
if hasattr(pipeline_context, "set_live_progress"):
|
||||
pipeline_context.set_live_progress(local_progress_ui)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
local_progress_ui.begin_pipe(0, total_items=len(url_to_process), items_preview=[u for u, _ in url_to_process])
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
local_progress_ui = None
|
||||
|
||||
# ========================================================================
|
||||
# OUTPUT DIRECTORY RESOLUTION - Priority chain
|
||||
# ========================================================================
|
||||
@@ -749,6 +854,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
|
||||
ensure_directory(screenshot_dir)
|
||||
|
||||
# If the caller isn't running the shared pipeline Live progress UI (e.g. direct
|
||||
# cmdlet execution), start a minimal local pipeline progress panel so this cmdlet
|
||||
# still shows step-level progress.
|
||||
try:
|
||||
progress.ensure_local_ui(
|
||||
label="screen-shot",
|
||||
total_items=len(url_to_process),
|
||||
items_preview=[u for u, _ in url_to_process],
|
||||
)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ========================================================================
|
||||
# PREPARE SCREENSHOT OPTIONS
|
||||
# ========================================================================
|
||||
@@ -850,7 +967,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
options.target_selectors = auto_selectors
|
||||
debug(f"[screen_shot] Auto selectors matched for url: {auto_selectors}")
|
||||
|
||||
screenshot_result = _capture_screenshot(options)
|
||||
screenshot_result = _capture_screenshot(options, progress)
|
||||
|
||||
# Log results and warnings
|
||||
debug(f"Screenshot captured to {screenshot_result.path}")
|
||||
@@ -875,15 +992,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
capture_date = datetime.now().date().isoformat()
|
||||
|
||||
upstream_title = _clean_title(_extract_item_title(origin_item))
|
||||
display_title = upstream_title or url
|
||||
url_title = _title_from_url(url)
|
||||
display_title = upstream_title or url_title or url
|
||||
|
||||
upstream_tags = _extract_item_tags(origin_item)
|
||||
filtered_upstream_tags = [
|
||||
t for t in upstream_tags
|
||||
if not str(t).strip().lower().startswith(("type:", "date:"))
|
||||
]
|
||||
|
||||
url_tags = _tags_from_url(url)
|
||||
merged_tags = unique_preserve_order(
|
||||
["type:screenshot", f"date:{capture_date}"] + filtered_upstream_tags
|
||||
["type:screenshot", f"date:{capture_date}"] + filtered_upstream_tags + url_tags
|
||||
)
|
||||
|
||||
pipe_obj = create_pipe_object_result(
|
||||
@@ -910,11 +1030,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
all_emitted.append(pipe_obj)
|
||||
|
||||
# If we created a local progress UI, advance it per completed item.
|
||||
if local_progress_ui is not None:
|
||||
try:
|
||||
local_progress_ui.on_emit(0, pipe_obj)
|
||||
except Exception:
|
||||
pass
|
||||
progress.on_emit(pipe_obj)
|
||||
|
||||
except ScreenshotError as exc:
|
||||
log(f"Error taking screenshot of {url}: {exc}", file=sys.stderr)
|
||||
@@ -925,23 +1041,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
exit_code = 1
|
||||
|
||||
try:
|
||||
if local_progress_ui is not None:
|
||||
try:
|
||||
local_progress_ui.finish_pipe(0, force_complete=True)
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
if local_progress_ui is not None:
|
||||
try:
|
||||
local_progress_ui.stop()
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if hasattr(pipeline_context, "set_live_progress"):
|
||||
pipeline_context.set_live_progress(None)
|
||||
except Exception:
|
||||
pass
|
||||
progress.close_local_ui(force_complete=True)
|
||||
|
||||
if not all_emitted:
|
||||
log(f"No screenshots were successfully captured", file=sys.stderr)
|
||||
|
||||
@@ -336,6 +336,18 @@ def _resolve_upload_path(item: Any, config: Dict[str, Any]) -> Optional[str]:
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Internal stage: send previously selected items to selected rooms.
|
||||
if any(str(a).lower() == "-send" for a in (args or [])):
|
||||
# Ensure we don't re-print the rooms picker table on the send stage.
|
||||
try:
|
||||
if hasattr(ctx, "set_last_result_table_overlay"):
|
||||
ctx.set_last_result_table_overlay(None, None, None)
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
if hasattr(ctx, "set_current_stage_table"):
|
||||
ctx.set_current_stage_table(None)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
rooms = _normalize_to_list(result)
|
||||
room_ids: List[str] = []
|
||||
for r in rooms:
|
||||
@@ -430,7 +442,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
log("No joined rooms found.", file=sys.stderr)
|
||||
return 0
|
||||
|
||||
table = ResultTable("Matrix Rooms")
|
||||
table = ResultTable("Matrix Rooms (select with @N)")
|
||||
table.set_table("matrix")
|
||||
table.set_source_command(".matrix", [])
|
||||
|
||||
@@ -461,12 +473,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
ctx.set_last_result_table_overlay(table, room_items)
|
||||
ctx.set_current_stage_table(table)
|
||||
ctx.set_pending_pipeline_tail([[".matrix", "-send"]], ".matrix")
|
||||
|
||||
print()
|
||||
from rich_display import stdout_console
|
||||
|
||||
stdout_console().print(table)
|
||||
print("\nSelect room(s) with @N (e.g. @1 or @1-3) to send the selected item(s)")
|
||||
return 0
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# Medios-Macina
|
||||
|
||||
Medios-Macina is a CLI-first media ingestion and management toolkit focused on reliably downloading, tagging, and storing media (audio, video, images, and text) from a variety of providers and sources. It is designed around a compact, pipeable command language ("cmdlets") so complex workflows can be composed simply and repeatably.
|
||||
Medios-Macina is a CLI media manager and toolkit focused on downloading, tagging, and media storage (audio, video, images, and text) from a variety of providers and sources. It is designed around a compact, pipeable command language ("cmdlets") so complex workflows can be composed simply and repeatably.
|
||||
|
||||
## Highlights ✅
|
||||
- Flexible pipeline-based CLI: chain cmdlets with `|` and use saved selections with `@N`.
|
||||
|
||||
Reference in New Issue
Block a user