This commit is contained in:
nose
2025-12-20 23:57:44 -08:00
parent b75faa49a2
commit 8ca5783970
39 changed files with 4294 additions and 1722 deletions

View File

@@ -229,6 +229,13 @@ class HTTPClient:
response.raise_for_status()
total_bytes = int(response.headers.get("content-length", 0))
bytes_downloaded = 0
# Render progress immediately (even if the transfer is very fast)
if progress_callback:
try:
progress_callback(0, total_bytes)
except Exception:
pass
with open(path, "wb") as f:
for chunk in response.iter_bytes(chunk_size):
@@ -237,6 +244,13 @@ class HTTPClient:
bytes_downloaded += len(chunk)
if progress_callback:
progress_callback(bytes_downloaded, total_bytes)
# Ensure a final callback is emitted.
if progress_callback:
try:
progress_callback(bytes_downloaded, total_bytes)
except Exception:
pass
return path

View File

@@ -152,55 +152,24 @@ class HydrusNetwork:
logger.debug(f"{self._log_prefix()} Uploading file {file_path.name} ({file_size} bytes)")
# Stream upload body with a stderr progress bar (pipeline-safe).
try:
from models import ProgressBar
except Exception:
ProgressBar = None # type: ignore[assignment]
from models import ProgressBar
bar = ProgressBar() if ProgressBar is not None else None
bar = ProgressBar()
label = f"{self._log_prefix().strip('[]')} upload"
start_t = time.time()
last_render_t = [start_t]
last_log_t = [start_t]
sent = [0]
tty = bool(getattr(sys.stderr, "isatty", lambda: False)())
def _render_progress(final: bool = False) -> None:
if bar is None:
return
if file_size <= 0:
return
now = time.time()
if not final and (now - float(last_render_t[0])) < 0.25:
return
last_render_t[0] = now
elapsed = max(0.001, now - start_t)
speed = float(sent[0]) / elapsed
eta_s = (float(file_size) - float(sent[0])) / speed if speed > 0 else 0.0
minutes, seconds = divmod(int(max(0.0, eta_s)), 60)
hours, minutes = divmod(minutes, 60)
eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
speed_str = bar.format_bytes(speed) + "/s"
line = bar.format_progress(
percent_str=None,
downloaded=int(sent[0]),
total=int(file_size),
speed_str=speed_str,
eta_str=eta_str,
)
try:
if tty:
sys.stderr.write("\r" + f"[{label}] " + line + " ")
sys.stderr.flush()
else:
# Non-interactive: keep it quiet-ish.
if final or (now - float(last_log_t[0])) >= 2.0:
log(f"[{label}] {line}", file=sys.stderr)
last_log_t[0] = now
except Exception:
pass
bar.update(downloaded=int(sent[0]), total=int(file_size), label=str(label), file=sys.stderr)
if final:
bar.finish()
def file_gen():
try:
@@ -214,12 +183,6 @@ class HydrusNetwork:
yield chunk
finally:
_render_progress(final=True)
if tty:
try:
sys.stderr.write("\n")
sys.stderr.flush()
except Exception:
pass
response = client.request(
spec.method,

View File

@@ -258,6 +258,7 @@ class API_folder_store:
cursor.execute("CREATE INDEX IF NOT EXISTS idx_files_path ON files(file_path)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_tags_hash ON tags(hash)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_tags_tag ON tags(tag)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_metadata_ext ON metadata(ext)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_worker_id ON worker(worker_id)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_worker_status ON worker(status)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_worker_type ON worker(worker_type)")
@@ -1858,6 +1859,73 @@ class DatabaseAPI:
)
return {row[0] for row in cursor.fetchall()}
def get_file_hashes_by_ext(self, ext_value: str, limit: Optional[int] = None) -> Set[str]:
"""Get hashes of files whose metadata ext matches the given extension.
Matches case-insensitively and ignores any leading '.' in stored ext.
Supports glob wildcards '*' and '?' in the query.
"""
ext_clean = str(ext_value or "").strip().lower().lstrip(".")
ext_clean = "".join(ch for ch in ext_clean if ch.isalnum())
if not ext_clean:
return set()
cursor = self.get_cursor()
has_glob = ("*" in ext_value) or ("?" in ext_value)
if has_glob:
pattern = str(ext_value or "").strip().lower().lstrip(".")
pattern = pattern.replace("%", "\\%").replace("_", "\\_")
pattern = pattern.replace("*", "%").replace("?", "_")
cursor.execute(
"""
SELECT DISTINCT f.hash
FROM files f
JOIN metadata m ON f.hash = m.hash
WHERE LOWER(LTRIM(COALESCE(m.ext, ''), '.')) LIKE ? ESCAPE '\\'
LIMIT ?
""",
(pattern, limit or 10000),
)
else:
cursor.execute(
"""
SELECT DISTINCT f.hash
FROM files f
JOIN metadata m ON f.hash = m.hash
WHERE LOWER(LTRIM(COALESCE(m.ext, ''), '.')) = ?
LIMIT ?
""",
(ext_clean, limit or 10000),
)
return {row[0] for row in cursor.fetchall()}
def get_files_by_ext(self, ext_value: str, limit: Optional[int] = None) -> List[tuple]:
"""Get files whose metadata ext matches the given extension.
Returns (hash, file_path, size, ext) tuples.
"""
ext_clean = str(ext_value or "").strip().lower().lstrip(".")
ext_clean = "".join(ch for ch in ext_clean if ch.isalnum())
if not ext_clean:
return []
cursor = self.get_cursor()
cursor.execute(
"""
SELECT f.hash, f.file_path,
COALESCE((SELECT size FROM metadata WHERE hash = f.hash), 0) as size,
COALESCE((SELECT ext FROM metadata WHERE hash = f.hash), '') as ext
FROM files f
JOIN metadata m ON f.hash = m.hash
WHERE LOWER(LTRIM(COALESCE(m.ext, ''), '.')) = ?
ORDER BY f.file_path
LIMIT ?
""",
(ext_clean, limit or 10000),
)
return cursor.fetchall()
def get_files_with_any_url(self, limit: Optional[int] = None) -> List[tuple]:
"""Get files that have any non-empty URL metadata.

345
CLI.py
View File

@@ -28,6 +28,26 @@ from prompt_toolkit.document import Document
from prompt_toolkit.lexers import Lexer
from prompt_toolkit.styles import Style
from rich_display import stderr_console, stdout_console
def _install_rich_traceback(*, show_locals: bool = False) -> None:
"""Install Rich traceback handler as the default excepthook.
This keeps uncaught exceptions readable in the terminal.
"""
try:
from rich.traceback import install as rich_traceback_install
rich_traceback_install(show_locals=bool(show_locals))
except Exception:
# Fall back to the standard Python traceback if Rich isn't available.
return
# Default to Rich tracebacks for the whole process.
_install_rich_traceback(show_locals=False)
from SYS.background_notifier import ensure_background_notifier
from SYS.logger import debug, set_debug
from SYS.worker_manager import WorkerManager
@@ -530,6 +550,32 @@ class CmdletCompleter(Completer):
self._config_loader = config_loader
self.cmdlet_names = CmdletIntrospection.cmdlet_names()
@staticmethod
def _used_arg_logicals(cmd_name: str, stage_tokens: List[str]) -> Set[str]:
"""Return logical argument names already used in this cmdlet stage.
Example: if the user has typed `download-media -url ...`, then `url`
is considered used and should not be suggested again (even as `--url`).
"""
arg_flags = CmdletIntrospection.cmdlet_args(cmd_name)
allowed = {a.lstrip("-").strip().lower() for a in arg_flags if a}
if not allowed:
return set()
used: Set[str] = set()
for tok in stage_tokens[1:]:
if not tok or not tok.startswith("-"):
continue
if tok in {"-", "--"}:
continue
# Handle common `-arg=value` form.
raw = tok.split("=", 1)[0]
logical = raw.lstrip("-").strip().lower()
if logical and logical in allowed:
used.add(logical)
return used
def get_completions(self, document: Document, complete_event): # type: ignore[override]
text = document.text_before_cursor
tokens = text.split()
@@ -600,6 +646,7 @@ class CmdletCompleter(Completer):
return
arg_names = CmdletIntrospection.cmdlet_args(cmd_name)
used_logicals = self._used_arg_logicals(cmd_name, stage_tokens)
logical_seen: Set[str] = set()
for arg in arg_names:
arg_low = arg.lower()
@@ -607,6 +654,8 @@ class CmdletCompleter(Completer):
if prefer_single_dash and arg_low.startswith("--"):
continue
logical = arg.lstrip("-").lower()
if logical in used_logicals:
continue
if prefer_single_dash and logical in logical_seen:
continue
if arg_low.startswith(current_token):
@@ -751,26 +800,32 @@ class CmdletHelp:
def show_cmdlet_list() -> None:
try:
metadata = list_cmdlet_metadata() or {}
print("\nAvailable cmdlet:")
from rich.box import SIMPLE
from rich.panel import Panel
from rich.table import Table as RichTable
table = RichTable(show_header=True, header_style="bold", box=SIMPLE, expand=True)
table.add_column("Cmdlet", no_wrap=True)
table.add_column("Aliases")
table.add_column("Args")
table.add_column("Summary")
for cmd_name in sorted(metadata.keys()):
info = metadata[cmd_name]
aliases = info.get("aliases", [])
args = info.get("args", [])
summary = info.get("summary") or ""
alias_str = ", ".join([str(a) for a in (aliases or []) if str(a).strip()])
arg_names = [a.get("name") for a in (args or []) if isinstance(a, dict) and a.get("name")]
args_str = ", ".join([str(a) for a in arg_names if str(a).strip()])
table.add_row(str(cmd_name), alias_str, args_str, str(summary))
display = f" cmd:{cmd_name}"
if aliases:
display += f" alias:{', '.join(aliases)}"
if args:
arg_names = [a.get("name") for a in args if a.get("name")]
if arg_names:
display += f" args:{', '.join(arg_names)}"
summary = info.get("summary")
if summary:
display += f" - {summary}"
print(display)
print()
stdout_console().print(Panel(table, title="Cmdlets", expand=False))
except Exception as exc:
print(f"Error: {exc}\n")
from rich.panel import Panel
from rich.text import Text
stderr_console().print(Panel(Text(f"Error: {exc}"), title="Error", expand=False))
@staticmethod
def show_cmdlet_help(cmd_name: str) -> None:
@@ -787,7 +842,10 @@ class CmdletHelp:
def _print_metadata(cmd_name: str, data: Any) -> None:
d = data.to_dict() if hasattr(data, "to_dict") else data
if not isinstance(d, dict):
print(f"Invalid metadata for {cmd_name}\n")
from rich.panel import Panel
from rich.text import Text
stderr_console().print(Panel(Text(f"Invalid metadata for {cmd_name}"), title="Error", expand=False))
return
name = d.get("name", cmd_name)
@@ -797,45 +855,48 @@ class CmdletHelp:
args = d.get("args", [])
details = d.get("details", [])
print("\nNAME")
print(f" {name}")
from rich.box import SIMPLE
from rich.console import Group
from rich.panel import Panel
from rich.table import Table as RichTable
from rich.text import Text
print("\nSYNOPSIS")
print(f" {usage or name}")
header = Text.assemble((str(name), "bold"))
synopsis = Text(str(usage or name))
stdout_console().print(Panel(Group(header, synopsis), title="Help", expand=False))
if summary or description:
print("\nDESCRIPTION")
desc_bits: List[Text] = []
if summary:
print(f" {summary}")
desc_bits.append(Text(str(summary)))
if description:
print(f" {description}")
desc_bits.append(Text(str(description)))
stdout_console().print(Panel(Group(*desc_bits), title="Description", expand=False))
if args and isinstance(args, list):
print("\nPARAMETERS")
param_table = RichTable(show_header=True, header_style="bold", box=SIMPLE, expand=True)
param_table.add_column("Arg", no_wrap=True)
param_table.add_column("Type", no_wrap=True)
param_table.add_column("Required", no_wrap=True)
param_table.add_column("Description")
for arg in args:
if isinstance(arg, dict):
name_str = arg.get("name", "?")
typ = arg.get("type", "string")
required = arg.get("required", False)
required = bool(arg.get("required", False))
desc = arg.get("description", "")
else:
name_str = getattr(arg, "name", "?")
typ = getattr(arg, "type", "string")
required = getattr(arg, "required", False)
required = bool(getattr(arg, "required", False))
desc = getattr(arg, "description", "")
req_marker = "[required]" if required else "[optional]"
print(f" -{name_str} <{typ}>")
if desc:
print(f" {desc}")
print(f" {req_marker}")
print()
param_table.add_row(f"-{name_str}", str(typ), "yes" if required else "no", str(desc or ""))
stdout_console().print(Panel(param_table, title="Parameters", expand=False))
if details:
print("REMARKS")
for detail in details:
print(f" {detail}")
print()
stdout_console().print(Panel(Group(*[Text(str(x)) for x in details]), title="Remarks", expand=False))
class CmdletExecutor:
@@ -1044,6 +1105,26 @@ class CmdletExecutor:
ctx.set_last_selection(selected_indices)
try:
try:
if hasattr(ctx, "set_current_cmdlet_name"):
ctx.set_current_cmdlet_name(cmd_name)
except Exception:
pass
try:
if hasattr(ctx, "set_current_stage_text"):
raw_stage = ""
try:
raw_stage = ctx.get_current_command_text("") if hasattr(ctx, "get_current_command_text") else ""
except Exception:
raw_stage = ""
if raw_stage:
ctx.set_current_stage_text(raw_stage)
else:
ctx.set_current_stage_text(" ".join([cmd_name, *filtered_args]).strip() or cmd_name)
except Exception:
pass
ret_code = cmd_fn(result, filtered_args, config)
if getattr(pipeline_ctx, "emits", None):
@@ -1113,8 +1194,8 @@ class CmdletExecutor:
else:
ctx.set_last_result_items_only(emits)
print()
print(table.format_plain())
stdout_console().print()
stdout_console().print(table)
if ret_code != 0:
stage_status = "failed"
@@ -1125,6 +1206,16 @@ class CmdletExecutor:
stage_error = f"{type(exc).__name__}: {exc}"
print(f"[error] {type(exc).__name__}: {exc}\n")
finally:
try:
if hasattr(ctx, "clear_current_cmdlet_name"):
ctx.clear_current_cmdlet_name()
except Exception:
pass
try:
if hasattr(ctx, "clear_current_stage_text"):
ctx.clear_current_stage_text()
except Exception:
pass
ctx.clear_last_selection()
if stage_session:
stage_session.close(status=stage_status, error_msg=stage_error)
@@ -1322,6 +1413,13 @@ class PipelineExecutor:
pipeline_text = " | ".join(" ".join(stage) for stage in stages)
pipeline_session = WorkerStages.begin_pipeline(worker_manager, pipeline_text=pipeline_text, config=config)
raw_stage_texts: List[str] = []
try:
if hasattr(ctx, "get_current_command_stages"):
raw_stage_texts = ctx.get_current_command_stages() or []
except Exception:
raw_stage_texts = []
if pipeline_session and worker_manager and isinstance(config, dict):
session_worker_ids = config.get("_session_worker_ids")
if session_worker_ids:
@@ -1452,6 +1550,9 @@ class PipelineExecutor:
if table_type == "youtube":
print("Auto-running YouTube selection via download-media")
stages.append(["download-media"])
elif table_type == "bandcamp":
print("Auto-running Bandcamp selection via download-media")
stages.append(["download-media"])
elif table_type in {"soulseek", "openlibrary", "libgen"}:
print("Auto-piping selection to download-file")
stages.append(["download-file"])
@@ -1473,6 +1574,14 @@ class PipelineExecutor:
):
print("Auto-inserting download-media after YouTube selection")
stages.insert(0, ["download-media"])
if table_type == "bandcamp" and first_cmd not in (
"download-media",
"download_media",
"download-file",
".pipe",
):
print("Auto-inserting download-media after Bandcamp selection")
stages.insert(0, ["download-media"])
if table_type == "libgen" and first_cmd not in (
"download-file",
"download-media",
@@ -1645,6 +1754,32 @@ class PipelineExecutor:
except Exception:
pass
try:
if hasattr(ctx, "set_current_cmdlet_name"):
ctx.set_current_cmdlet_name(cmd_name)
except Exception:
pass
try:
if hasattr(ctx, "set_current_stage_text"):
stage_text = ""
if raw_stage_texts and stage_index < len(raw_stage_texts):
candidate = str(raw_stage_texts[stage_index] or "").strip()
if candidate:
try:
cand_tokens = shlex.split(candidate)
except Exception:
cand_tokens = candidate.split()
if cand_tokens:
first = str(cand_tokens[0]).replace("_", "-").lower()
if first == cmd_name:
stage_text = candidate
if not stage_text:
stage_text = " ".join(stage_tokens).strip()
ctx.set_current_stage_text(stage_text)
except Exception:
pass
ret_code = cmd_fn(piped_result, list(stage_args), config)
stage_is_last = stage_index + 1 >= len(stages)
@@ -1676,7 +1811,6 @@ class PipelineExecutor:
and (not emits)
and cmd_name in {"download-media", "download_media"}
and stage_table is not None
and hasattr(stage_table, "format_plain")
and stage_table_type in {"ytdlp.formatlist", "download-media", "download_media"}
):
try:
@@ -1691,8 +1825,8 @@ class PipelineExecutor:
already_rendered = False
if not already_rendered:
print()
print(stage_table.format_plain())
stdout_console().print()
stdout_console().print(stage_table)
try:
remaining = stages[stage_index + 1 :]
@@ -1719,15 +1853,15 @@ class PipelineExecutor:
if final_table is None:
final_table = stage_table
if final_table is not None and hasattr(final_table, "format_plain"):
if final_table is not None:
try:
already_rendered = bool(getattr(final_table, "_rendered_by_cmdlet", False))
except Exception:
already_rendered = False
if not already_rendered:
print()
print(final_table.format_plain())
stdout_console().print()
stdout_console().print(final_table)
# Fallback: if a cmdlet emitted results but did not provide a table,
# render a standard ResultTable so last-stage pipelines still show output.
@@ -1739,8 +1873,8 @@ class PipelineExecutor:
table = ResultTable(table_title)
for item in emits:
table.add_result(item)
print()
print(table.format_plain())
stdout_console().print()
stdout_console().print(table)
if isinstance(ret_code, int) and ret_code != 0:
stage_status = "failed"
@@ -1757,6 +1891,16 @@ class PipelineExecutor:
pipeline_error = f"{stage_label} error: {exc}"
return
finally:
try:
if hasattr(ctx, "clear_current_cmdlet_name"):
ctx.clear_current_cmdlet_name()
except Exception:
pass
try:
if hasattr(ctx, "clear_current_stage_text"):
ctx.clear_current_stage_text()
except Exception:
pass
if stage_session:
stage_session.close(status=stage_status, error_msg=stage_error)
elif pipeline_session and worker_manager:
@@ -1774,8 +1918,8 @@ class PipelineExecutor:
for item in items:
table.add_result(item)
ctx.set_last_result_items_only(items)
print()
print(table.format_plain())
stdout_console().print()
stdout_console().print(table)
except Exception as exc:
pipeline_status = "failed"
pipeline_error = str(exc)
@@ -1786,7 +1930,20 @@ class PipelineExecutor:
except Exception as exc:
print(f"[error] Failed to execute pipeline: {exc}\n")
Welcome = """
# MEDIOS-MACINA
Rich can do a pretty *decent* job of rendering markdown.
1. This is a list item
2. This is another list item
"""
from rich.markdown import Markdown
from rich.console import Console
console = Console()
md = Markdown(Welcome)
console.print(md)
class MedeiaCLI:
"""Main CLI application object."""
@@ -1892,25 +2049,20 @@ class MedeiaCLI:
return app
def run(self) -> None:
# Ensure Rich tracebacks are active even when invoking subcommands.
try:
config = self._config_loader.load()
debug_enabled = bool(config.get("debug", False)) if isinstance(config, dict) else False
except Exception:
debug_enabled = False
set_debug(debug_enabled)
_install_rich_traceback(show_locals=debug_enabled)
self.build_app()()
def run_repl(self) -> None:
banner = r"""
Medeia-Macina
=====================
|123456789|ABCDEFGHI|
|246813579|JKLMNOPQR|
|369369369|STUVWXYZ0|
|483726159|ABCDEFGHI|
|=========+=========|
|516273849|JKLMNOPQR|
|639639639|STUVWXYZ0|
|753186429|ABCDEFGHI|
|876543219|JKLMNOPQR|
|999999999|STUVWXYZ0|
=====================
"""
print(banner)
# (Startup banner is optional; keep the REPL quiet by default.)
prompt_text = "🜂🜄🜁🜃|"
@@ -1918,6 +2070,11 @@ class MedeiaCLI:
"*********<IGNITIO>*********<NOUSEMPEH>*********<RUGRAPOG>*********<OMEGHAU>*********"
)
startup_table.set_no_choice(True).set_preserve_order(True)
startup_table.set_value_case("upper")
def _upper(value: Any) -> str:
text = "" if value is None else str(value)
return text.upper()
def _add_startup_check(
status: str,
@@ -1929,12 +2086,12 @@ class MedeiaCLI:
detail: str = "",
) -> None:
row = startup_table.add_row()
row.add_column("Status", status)
row.add_column("Name", name)
row.add_column("Provider", provider or "")
row.add_column("Store", store or "")
row.add_column("Files", "" if files is None else str(files))
row.add_column("Detail", detail or "")
row.add_column("STATUS", _upper(status))
row.add_column("NAME", _upper(name))
row.add_column("PROVIDER", _upper(provider or ""))
row.add_column("STORE", _upper(store or ""))
row.add_column("FILES", "" if files is None else str(files))
row.add_column("DETAIL", _upper(detail or ""))
def _has_store_subtype(cfg: dict, subtype: str) -> bool:
store_cfg = cfg.get("store")
@@ -1967,8 +2124,8 @@ class MedeiaCLI:
config = self._config_loader.load()
debug_enabled = bool(config.get("debug", False))
set_debug(debug_enabled)
if debug_enabled:
debug("✓ Debug logging enabled")
_install_rich_traceback(show_locals=debug_enabled)
_add_startup_check("ENABLED" if debug_enabled else "DISABLED", "DEBUGGING")
try:
try:
@@ -2226,8 +2383,8 @@ class MedeiaCLI:
_add_startup_check("ERROR", "Cookies", detail=str(exc))
if startup_table.rows:
print()
print(startup_table.format_plain())
stdout_console().print()
stdout_console().print(startup_table)
except Exception as exc:
if debug_enabled:
debug(f"⚠ Could not check service availability: {exc}")
@@ -2349,9 +2506,9 @@ class MedeiaCLI:
if last_table is None:
last_table = ctx.get_last_result_table()
if last_table:
print()
stdout_console().print()
ctx.set_current_stage_table(last_table)
print(last_table.format_plain())
stdout_console().print(last_table)
else:
items = ctx.get_last_result_items()
if items:
@@ -2370,10 +2527,44 @@ class MedeiaCLI:
last_table = ctx.get_display_table() if hasattr(ctx, "get_display_table") else None
if last_table is None:
last_table = ctx.get_last_result_table()
# Auto-refresh search-store tables when navigating back,
# so row payloads (titles/tags) reflect latest store state.
try:
src_cmd = getattr(last_table, "source_command", None) if last_table else None
if isinstance(src_cmd, str) and src_cmd.lower().replace("_", "-") == "search-store":
src_args = getattr(last_table, "source_args", None) if last_table else None
base_args = list(src_args) if isinstance(src_args, list) else []
cleaned_args = [
str(a)
for a in base_args
if str(a).strip().lower() not in {"--refresh", "-refresh"}
]
if hasattr(ctx, "set_current_command_text"):
try:
title_text = getattr(last_table, "title", None) if last_table else None
if isinstance(title_text, str) and title_text.strip():
ctx.set_current_command_text(title_text.strip())
else:
ctx.set_current_command_text(" ".join(["search-store", *cleaned_args]).strip())
except Exception:
pass
try:
self._cmdlet_executor.execute("search-store", cleaned_args + ["--refresh"])
finally:
if hasattr(ctx, "clear_current_command_text"):
try:
ctx.clear_current_command_text()
except Exception:
pass
continue
except Exception as exc:
print(f"Error refreshing search-store table: {exc}", file=sys.stderr)
if last_table:
print()
stdout_console().print()
ctx.set_current_stage_table(last_table)
print(last_table.format_plain())
stdout_console().print(last_table)
else:
items = ctx.get_last_result_items()
if items:

View File

@@ -1,6 +1,7 @@
from __future__ import annotations
import sys
from urllib.parse import urlparse
from typing import Any, Dict, List, Optional
from ProviderCore.base import Provider, SearchResult
@@ -15,6 +16,204 @@ except ImportError: # pragma: no cover
class Bandcamp(Provider):
"""Search provider for Bandcamp."""
@staticmethod
def _base_url(raw_url: str) -> str:
"""Normalize a Bandcamp URL down to scheme://netloc."""
text = str(raw_url or "").strip()
if not text:
return ""
try:
parsed = urlparse(text)
if not parsed.scheme or not parsed.netloc:
return text
return f"{parsed.scheme}://{parsed.netloc}"
except Exception:
return text
@classmethod
def _discography_url(cls, raw_url: str) -> str:
base = cls._base_url(raw_url)
if not base:
return ""
# Bandcamp discography lives under /music.
return base.rstrip("/") + "/music"
def _scrape_artist_page(self, page: Any, artist_url: str, limit: int = 50) -> List[SearchResult]:
"""Scrape an artist page for albums/tracks (discography)."""
base = self._base_url(artist_url)
discography_url = self._discography_url(artist_url)
if not base or not discography_url:
return []
debug(f"[bandcamp] Scraping artist page: {discography_url}")
page.goto(discography_url)
page.wait_for_load_state("domcontentloaded")
results: List[SearchResult] = []
cards = page.query_selector_all("li.music-grid-item") or []
if not cards:
# Fallback selector
cards = page.query_selector_all(".music-grid-item") or []
for item in cards[:limit]:
try:
link = item.query_selector("a")
if not link:
continue
href = link.get_attribute("href") or ""
href = str(href).strip()
if not href:
continue
if href.startswith("/"):
target = base.rstrip("/") + href
elif href.startswith("http://") or href.startswith("https://"):
target = href
else:
target = base.rstrip("/") + "/" + href
title_node = item.query_selector("p.title") or item.query_selector(".title")
title = (title_node.inner_text().strip() if title_node else "")
if title:
title = " ".join(title.split())
if not title:
title = target.rsplit("/", 1)[-1]
kind = "album" if "/album/" in target else ("track" if "/track/" in target else "item")
results.append(
SearchResult(
table="bandcamp",
title=title,
path=target,
detail="",
annotations=[kind],
media_kind="audio",
columns=[
("Title", title),
("Type", kind),
("Url", target),
],
full_metadata={
"type": kind,
"url": target,
"artist_url": base,
},
)
)
except Exception as exc:
debug(f"[bandcamp] Error parsing artist item: {exc}")
return results
def selector(self, selected_items: List[Any], *, ctx: Any, stage_is_last: bool = True, **_kwargs: Any) -> bool:
"""Handle Bandcamp `@N` selection.
If the selected item is an ARTIST result, selecting it auto-expands into
a discography table by scraping the artist URL.
"""
if not stage_is_last:
return False
if sync_playwright is None:
return False
# Only handle artist selections.
chosen: List[Dict[str, Any]] = []
for item in selected_items or []:
payload: Dict[str, Any] = {}
if isinstance(item, dict):
payload = item
else:
try:
if hasattr(item, "to_dict"):
payload = item.to_dict() # type: ignore[assignment]
except Exception:
payload = {}
if not payload:
try:
payload = {
"title": getattr(item, "title", None),
"url": getattr(item, "url", None),
"path": getattr(item, "path", None),
"metadata": getattr(item, "metadata", None),
"extra": getattr(item, "extra", None),
}
except Exception:
payload = {}
meta = payload.get("metadata") or payload.get("full_metadata") or {}
if not isinstance(meta, dict):
meta = {}
extra = payload.get("extra")
if isinstance(extra, dict):
meta = {**meta, **extra}
type_val = str(meta.get("type") or "").strip().lower()
if type_val != "artist":
continue
title = str(payload.get("title") or "").strip()
url_val = str(payload.get("url") or payload.get("path") or meta.get("url") or "").strip()
base = self._base_url(url_val)
if not base:
continue
chosen.append({"title": title, "url": base, "location": str(meta.get("artist") or "").strip()})
if not chosen:
return False
# Build a new table from artist discography.
try:
from result_table import ResultTable
from rich_display import stdout_console
except Exception:
return False
artist_title = chosen[0].get("title") or "artist"
artist_url = chosen[0].get("url") or ""
try:
with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
discography = self._scrape_artist_page(page, artist_url, limit=50)
browser.close()
except Exception as exc:
print(f"bandcamp artist lookup failed: {exc}\n")
return True
table = ResultTable(f"Bandcamp: artist:{artist_title}").set_preserve_order(True)
table.set_table("bandcamp")
try:
table.set_value_case("lower")
except Exception:
pass
results_payload: List[Dict[str, Any]] = []
for r in discography:
table.add_result(r)
try:
results_payload.append(r.to_dict())
except Exception:
results_payload.append({"table": "bandcamp", "title": getattr(r, "title", ""), "path": getattr(r, "path", "")})
try:
ctx.set_last_result_table(table, results_payload)
ctx.set_current_stage_table(table)
except Exception:
pass
try:
stdout_console().print()
stdout_console().print(table)
except Exception:
pass
return True
def search(
self,
query: str,
@@ -73,6 +272,7 @@ class Bandcamp(Provider):
title = link.inner_text().strip()
target_url = link.get_attribute("href")
base_url = self._base_url(str(target_url or ""))
subhead = item.query_selector(".subhead")
artist = subhead.inner_text().strip() if subhead else "Unknown"
@@ -89,13 +289,15 @@ class Bandcamp(Provider):
annotations=[media_type],
media_kind="audio",
columns=[
("Name", title),
("Artist", artist),
("Title", title),
("Location", artist),
("Type", media_type),
("Url", base_url or str(target_url or "")),
],
full_metadata={
"artist": artist,
"type": media_type,
"url": base_url or str(target_url or ""),
},
)
)

View File

@@ -175,42 +175,11 @@ class Libgen(Provider):
elapsed = max(0.001, now - start_time)
speed = downloaded / elapsed
eta_seconds = 0.0
if total and total > 0 and speed > 0:
eta_seconds = max(0.0, float(total - downloaded) / float(speed))
minutes, seconds = divmod(int(eta_seconds), 60)
hours, minutes = divmod(minutes, 60)
eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}" if total else "?:?:?"
speed_str = progress_bar.format_bytes(speed) + "/s"
percent_str = None
if total and total > 0:
percent = (downloaded / total) * 100.0
percent_str = f"{percent:.1f}%"
line = progress_bar.format_progress(
percent_str=percent_str,
downloaded=downloaded,
total=total,
speed_str=speed_str,
eta_str=eta_str,
)
# Prefix with filename for clarity when downloading multiple items.
if label:
line = f"{label} {line}"
if getattr(sys.stderr, "isatty", lambda: True)():
sys.stderr.write("\r" + line + " ")
sys.stderr.flush()
progress_bar.update(downloaded=downloaded, total=total, label=str(label or "download"), file=sys.stderr)
last_progress_time[0] = now
ok, final_path = download_from_mirror(target, out_path, progress_callback=progress_callback)
# Clear the in-place progress line.
if getattr(sys.stderr, "isatty", lambda: True)():
sys.stderr.write("\r" + (" " * 180) + "\r")
sys.stderr.write("\n")
sys.stderr.flush()
progress_bar.finish()
if ok and final_path:
return Path(final_path)
return None

View File

@@ -584,48 +584,19 @@ async def download_soulseek_file(
log(f"[soulseek] Download timeout after {timeout}s", file=sys.stderr)
bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0)
state_val = getattr(getattr(transfer, "state", None), "VALUE", None)
try:
if getattr(sys.stderr, "isatty", lambda: False)():
sys.stderr.write("\r" + (" " * 140) + "\r")
sys.stderr.flush()
except Exception:
pass
progress_bar.finish()
return None, state_val, bytes_done, elapsed
bytes_done = int(getattr(transfer, "bytes_transfered", 0) or 0)
total_bytes = int(getattr(transfer, "filesize", 0) or 0)
now = time.time()
if now - last_progress_time >= 0.5:
percent = (bytes_done / total_bytes) * 100.0 if total_bytes > 0 else 0.0
speed = bytes_done / elapsed if elapsed > 0 else 0.0
eta_str: Optional[str] = None
if total_bytes > 0 and speed > 0:
try:
eta_seconds = max(0.0, float(total_bytes - bytes_done) / float(speed))
minutes, seconds = divmod(int(eta_seconds), 60)
hours, minutes = divmod(minutes, 60)
eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
except Exception:
eta_str = None
speed_str = progress_bar.format_bytes(speed) + "/s"
progress_line = progress_bar.format_progress(
percent_str=f"{percent:.1f}%",
progress_bar.update(
downloaded=bytes_done,
total=total_bytes if total_bytes > 0 else None,
speed_str=speed_str,
eta_str=eta_str,
label="download",
file=sys.stderr,
)
try:
if getattr(sys.stderr, "isatty", lambda: False)():
sys.stderr.write("\r" + progress_line + " ")
sys.stderr.flush()
else:
log(progress_line, file=sys.stderr)
except Exception:
pass
last_progress_time = now
await asyncio.sleep(1)
@@ -635,12 +606,7 @@ async def download_soulseek_file(
final_elapsed = time.time() - start_time
# Clear in-place progress bar.
try:
if getattr(sys.stderr, "isatty", lambda: False)():
sys.stderr.write("\r" + (" " * 140) + "\r")
sys.stderr.flush()
except Exception:
pass
progress_bar.finish()
# If a file was written, treat it as success even if state is odd.
try:

View File

@@ -467,27 +467,16 @@ class Telegram(Provider):
pass
# Progress callback: prints to stderr so it doesn't interfere with pipeline stdout.
from models import ProgressBar
progress_bar = ProgressBar()
last_print = {"t": 0.0}
def _progress(current: int, total: int) -> None:
try:
now = time.monotonic()
# Throttle to avoid spamming.
if now - float(last_print.get("t", 0.0)) < 0.25 and current < total:
return
last_print["t"] = now
pct = ""
try:
if total and total > 0:
pct = f" {min(100.0, (current / total) * 100.0):5.1f}%"
except Exception:
pct = ""
line = f"[telegram] Downloading{pct} ({_format_bytes(current)}/{_format_bytes(total)})"
sys.stderr.write("\r" + line)
sys.stderr.flush()
except Exception:
now = time.monotonic()
# Throttle to avoid spamming.
if now - float(last_print.get("t", 0.0)) < 0.25 and current < total:
return
last_print["t"] = now
progress_bar.update(downloaded=int(current), total=int(total), label="telegram", file=sys.stderr)
part_kb = self._resolve_part_size_kb(file_size)
try:
@@ -502,11 +491,7 @@ class Telegram(Provider):
except TypeError:
# Older/newer Telethon versions may not accept part_size_kb on download_media.
downloaded = _resolve(client.download_media(message, file=str(output_dir), progress_callback=_progress))
try:
sys.stderr.write("\n")
sys.stderr.flush()
except Exception:
pass
progress_bar.finish()
if not downloaded:
raise Exception("Telegram download returned no file")
downloaded_path = Path(str(downloaded))

View File

@@ -2,9 +2,12 @@ from __future__ import annotations
from pathlib import Path
from typing import Optional
import sys
import requests
from models import ProgressBar
def sanitize_filename(name: str, *, max_len: int = 150) -> str:
text = str(name or "").strip()
@@ -25,15 +28,45 @@ def download_file(url: str, output_path: Path, *, session: Optional[requests.Ses
s = session or requests.Session()
bar = ProgressBar()
downloaded = 0
total = None
try:
with s.get(url, stream=True, timeout=timeout_s) as resp:
resp.raise_for_status()
try:
total_val = int(resp.headers.get("content-length") or 0)
total = total_val if total_val > 0 else None
except Exception:
total = None
# Render once immediately so fast downloads still show something.
try:
bar.update(downloaded=0, total=total, label=str(output_path.name or "download"), file=sys.stderr)
except Exception:
pass
with open(output_path, "wb") as f:
for chunk in resp.iter_content(chunk_size=1024 * 256):
if chunk:
f.write(chunk)
downloaded += len(chunk)
try:
bar.update(downloaded=downloaded, total=total, label=str(output_path.name or "download"), file=sys.stderr)
except Exception:
pass
try:
bar.finish()
except Exception:
pass
return output_path.exists() and output_path.stat().st_size > 0
except Exception:
try:
bar.finish()
except Exception:
pass
try:
if output_path.exists():
output_path.unlink()

View File

@@ -44,6 +44,7 @@ except ImportError:
extract_ytdlp_tags = None
_EXTRACTOR_CACHE: List[Any] | None = None
_YTDLP_PROGRESS = ProgressBar()
def _ensure_yt_dlp_ready() -> None:
@@ -58,14 +59,16 @@ def _progress_callback(status: Dict[str, Any]) -> None:
"""Simple progress callback using logger."""
event = status.get("status")
if event == "downloading":
percent = status.get("_percent_str", "?")
speed = status.get("_speed_str", "?")
eta = status.get("_eta_str", "?")
sys.stdout.write(f"\r[download] {percent} at {speed} ETA {eta} ")
sys.stdout.flush()
downloaded = status.get("downloaded_bytes")
total = status.get("total_bytes") or status.get("total_bytes_estimate")
_YTDLP_PROGRESS.update(
downloaded=int(downloaded or 0),
total=int(total) if total else None,
label="download",
file=sys.stderr,
)
elif event == "finished":
sys.stdout.write("\r" + " " * 70 + "\r")
sys.stdout.flush()
_YTDLP_PROGRESS.finish()
debug(f"✓ Download finished: {status.get('filename')}")
elif event in ("postprocessing", "processing"):
debug(f"Post-processing: {status.get('postprocessor')}")
@@ -632,13 +635,17 @@ def _download_direct_file(
downloaded_bytes = [0]
total_bytes = [0]
last_progress_time = [start_time]
rendered_once = [False]
def progress_callback(bytes_downloaded: int, content_length: int) -> None:
downloaded_bytes[0] = bytes_downloaded
total_bytes[0] = content_length
now = time.time()
if now - last_progress_time[0] < 0.5:
is_final = bool(content_length > 0 and bytes_downloaded >= content_length)
if (not rendered_once[0]) or is_final:
pass
elif now - last_progress_time[0] < 0.5:
return
elapsed = now - start_time
@@ -654,26 +661,14 @@ def _download_direct_file(
except Exception:
eta_str = None
speed_str = progress_bar.format_bytes(speed) + "/s"
progress_line = progress_bar.format_progress(
percent_str=f"{percent:.1f}%",
progress_bar.update(
downloaded=bytes_downloaded,
total=content_length if content_length > 0 else None,
speed_str=speed_str,
eta_str=eta_str,
label=str(filename or "download"),
file=sys.stderr,
)
if not quiet:
try:
if getattr(sys.stderr, "isatty", lambda: False)():
sys.stderr.write("\r" + progress_line + " ")
sys.stderr.flush()
else:
# Non-interactive: print occasional progress lines.
log(progress_line, file=sys.stderr)
except Exception:
pass
rendered_once[0] = True
last_progress_time[0] = now
@@ -681,14 +676,7 @@ def _download_direct_file(
client.download(url, str(file_path), progress_callback=progress_callback)
elapsed = time.time() - start_time
# Clear in-place progress bar.
if not quiet:
try:
if getattr(sys.stderr, "isatty", lambda: False)():
sys.stderr.write("\r" + (" " * 140) + "\r")
sys.stderr.flush()
except Exception:
pass
progress_bar.finish()
avg_speed_str = progress_bar.format_bytes(downloaded_bytes[0] / elapsed if elapsed > 0 else 0) + "/s"
if not quiet:
debug(f"✓ Downloaded in {elapsed:.1f}s at {avg_speed_str}")
@@ -742,6 +730,10 @@ def _download_direct_file(
)
except (httpx.HTTPError, httpx.RequestError) as exc:
try:
progress_bar.finish()
except Exception:
pass
log(f"Download error: {exc}", file=sys.stderr)
if debug_logger is not None:
debug_logger.write_record(
@@ -750,6 +742,10 @@ def _download_direct_file(
)
raise DownloadError(f"Failed to download {url}: {exc}") from exc
except Exception as exc:
try:
progress_bar.finish()
except Exception:
pass
log(f"Error downloading file: {exc}", file=sys.stderr)
if debug_logger is not None:
debug_logger.write_record(

View File

@@ -5,6 +5,8 @@ import inspect
import threading
from pathlib import Path
from rich_display import console_for
_DEBUG_ENABLED = False
_thread_local = threading.local()
@@ -56,6 +58,80 @@ def debug(*args, **kwargs) -> None:
# Use the same logic as log()
log(*args, **kwargs)
def debug_inspect(
obj,
*,
title: str | None = None,
file=None,
methods: bool = False,
docs: bool = False,
private: bool = False,
dunder: bool = False,
sort: bool = True,
all: bool = False,
value: bool = True,
) -> None:
"""Rich-inspect an object when debug logging is enabled.
Uses the same stream / quiet-mode behavior as `debug()` and prepends a
`[file.function]` prefix when debug is enabled.
"""
if not _DEBUG_ENABLED:
return
# Mirror debug() quiet-mode guard.
try:
stderr_name = getattr(sys.stderr, "name", "")
if "nul" in str(stderr_name).lower() or "/dev/null" in str(stderr_name):
return
except Exception:
pass
# Resolve destination stream.
stream = get_thread_stream()
if stream is not None:
file = stream
elif file is None:
file = sys.stderr
# Compute caller prefix (same as log()).
prefix = None
frame = inspect.currentframe()
if frame is not None and frame.f_back is not None:
caller_frame = frame.f_back
try:
file_name = Path(caller_frame.f_code.co_filename).stem
func_name = caller_frame.f_code.co_name
prefix = f"[{file_name}.{func_name}]"
finally:
del caller_frame
if frame is not None:
del frame
# Render.
from rich import inspect as rich_inspect
console = console_for(file)
# If the caller provides a title, treat it as authoritative.
# Only fall back to the automatic [file.func] prefix when no title is supplied.
effective_title = title
if not effective_title and prefix:
effective_title = prefix
rich_inspect(
obj,
console=console,
title=effective_title,
methods=methods,
docs=docs,
private=private,
dunder=dunder,
sort=sort,
all=all,
value=value,
)
def log(*args, **kwargs) -> None:
"""Print with automatic file.function prefix.
@@ -71,12 +147,18 @@ def log(*args, **kwargs) -> None:
# Get the calling frame
frame = inspect.currentframe()
if frame is None:
print(*args, **kwargs)
file = kwargs.pop("file", sys.stdout)
sep = kwargs.pop("sep", " ")
end = kwargs.pop("end", "\n")
console_for(file).print(*args, sep=sep, end=end)
return
caller_frame = frame.f_back
if caller_frame is None:
print(*args, **kwargs)
file = kwargs.pop("file", sys.stdout)
sep = kwargs.pop("sep", " ")
end = kwargs.pop("end", "\n")
console_for(file).print(*args, sep=sep, end=end)
return
try:
@@ -93,12 +175,15 @@ def log(*args, **kwargs) -> None:
# Set default to stdout if not specified
elif 'file' not in kwargs:
kwargs['file'] = sys.stdout
file = kwargs.pop("file", sys.stdout)
sep = kwargs.pop("sep", " ")
end = kwargs.pop("end", "\n")
if add_prefix:
prefix = f"[{file_name}.{func_name}]"
print(prefix, *args, **kwargs)
console_for(file).print(prefix, *args, sep=sep, end=end)
else:
print(*args, **kwargs)
console_for(file).print(*args, sep=sep, end=end)
finally:
del frame
del caller_frame

View File

@@ -1,102 +1,22 @@
#!/usr/bin/env python3
"""Text-based progress bar utilities for consistent display across all downloads."""
"""Rich-only progress helpers.
These functions preserve the legacy call signatures used around the codebase,
but all rendering is performed via Rich (no ASCII progress bars).
"""
from __future__ import annotations
import sys
from SYS.logger import log
from models import ProgressBar
def format_progress_bar(current: int, total: int, width: int = 40, label: str = "") -> str:
"""Create a text-based progress bar.
Args:
current: Current progress (bytes/items)
total: Total to complete (bytes/items)
width: Width of the bar in characters (default 40)
label: Optional label prefix
Returns:
Formatted progress bar string
Examples:
format_progress_bar(50, 100)
# Returns: "[████████████████░░░░░░░░░░░░░░░░░░░░] 50.0%"
format_progress_bar(256*1024*1024, 1024*1024*1024, label="download.zip")
# Returns: "download.zip: [████████░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░] 25.0%"
"""
if total <= 0:
percentage = 0
filled = 0
else:
percentage = (current / total) * 100
filled = int((current / total) * width)
bar = "" * filled + "" * (width - filled)
pct_str = f"{percentage:.1f}%"
if label:
result = f"{label}: [{bar}] {pct_str}"
else:
result = f"[{bar}] {pct_str}"
return result
def format_size(bytes_val: float) -> str:
"""Format bytes to human-readable size."""
for unit in ['B', 'KB', 'MB', 'GB', 'TB']:
if bytes_val < 1024:
return f"{bytes_val:.2f} {unit}"
bytes_val /= 1024
return f"{bytes_val:.2f} PB"
def format_download_status(filename: str, current: int, total: int, speed: float = 0) -> str:
"""Format download status with progress bar and details."""
bar = format_progress_bar(current, total, width=30)
size_current = format_size(current)
size_total = format_size(total)
if speed > 0:
speed_str = f" @ {format_size(speed)}/s"
else:
speed_str = ""
return f"{bar} ({size_current} / {size_total}{speed_str})"
_BAR = ProgressBar()
def print_progress(filename: str, current: int, total: int, speed: float = 0, end: str = "\r") -> None:
"""Print download progress to stderr (doesn't interfere with piped output)."""
status = format_download_status(filename, current, total, speed)
print(status, file=sys.stderr, end=end, flush=True)
_BAR.update(downloaded=int(current), total=int(total) if total else None, label=str(filename or "progress"), file=sys.stderr)
def print_final_progress(filename: str, total: int, elapsed: float) -> None:
"""Print final progress line (100%) with time elapsed."""
bar = format_progress_bar(total, total, width=30)
size_str = format_size(total)
if elapsed < 60:
time_str = f"{elapsed:.1f}s"
elif elapsed < 3600:
minutes = elapsed / 60
time_str = f"{minutes:.1f}m"
else:
hours = elapsed / 3600
time_str = f"{hours:.2f}h"
print(f"{bar} ({size_str}) - {time_str}", file=sys.stderr, flush=True)
if __name__ == "__main__":
import time
log("Progress Bar Demo:", file=sys.stderr)
for i in range(101):
print_progress("demo.bin", i * 10 * 1024 * 1024, 1024 * 1024 * 1024)
time.sleep(0.02)
print_final_progress("demo.bin", 1024 * 1024 * 1024, 2.0)
log()
_BAR.finish()

View File

@@ -124,7 +124,7 @@ def create_tags_sidecar(file_path: Path, tags: set) -> None:
try:
with open(tags_path, 'w', encoding='utf-8') as f:
for tag in sorted(tags):
f.write(f"{tag}\n")
f.write(f"{str(tag).strip().lower()}\n")
except Exception as e:
raise RuntimeError(f"Failed to create tags sidecar {tags_path}: {e}") from e

View File

@@ -452,7 +452,44 @@ class Folder(Store):
query = query.lower()
query_lower = query # Ensure query_lower is defined for all code paths
match_all = query == "*"
def _normalize_ext_filter(value: str) -> str:
v = str(value or "").strip().lower().lstrip('.')
v = "".join(ch for ch in v if ch.isalnum())
return v
def _extract_system_filetype_ext(text: str) -> Optional[str]:
# Match: system:filetype = png (allow optional '=' and flexible spaces)
m = re.search(r"\bsystem:filetype\s*(?:=\s*)?([^\s,]+)", text)
if not m:
m = re.search(r"\bsystem:filetype\s*=\s*([^\s,]+)", text)
if not m:
return None
return _normalize_ext_filter(m.group(1)) or None
# Support `ext:<value>` and Hydrus-style `system:filetype = <value>` anywhere
# in the query (space or comma separated).
ext_filter: Optional[str] = None
try:
sys_ext = _extract_system_filetype_ext(query_lower)
if sys_ext:
ext_filter = sys_ext
query_lower = re.sub(r"\s*\bsystem:filetype\s*(?:=\s*)?[^\s,]+", " ", query_lower)
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(',')
query = query_lower
m = re.search(r"\bext:([^\s,]+)", query_lower)
if not m:
m = re.search(r"\bextension:([^\s,]+)", query_lower)
if m:
ext_filter = _normalize_ext_filter(m.group(1)) or None
query_lower = re.sub(r"\s*\b(?:ext|extension):[^\s,]+", " ", query_lower)
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(',')
query = query_lower
except Exception:
ext_filter = None
match_all = query == "*" or (not query and bool(ext_filter))
results = []
search_dir = Path(self._location).expanduser()
@@ -518,6 +555,41 @@ class Folder(Store):
try:
with DatabaseAPI(search_dir) as api:
ext_hashes: set[str] | None = None
if ext_filter:
# Fetch a bounded set of hashes to intersect with other filters.
ext_fetch_limit = (limit or 45) * 50
ext_hashes = api.get_file_hashes_by_ext(ext_filter, limit=ext_fetch_limit)
# ext-only search: query is empty (or coerced to match_all above).
if ext_filter and (not query_lower or query_lower == "*"):
rows = api.get_files_by_ext(ext_filter, limit)
for file_hash, file_path_str, size_bytes, ext in rows:
if not file_path_str:
continue
file_path = Path(file_path_str)
if not file_path.exists():
continue
if size_bytes is None:
try:
size_bytes = file_path.stat().st_size
except OSError:
size_bytes = None
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
if db_ext:
entry["ext"] = db_ext
except Exception:
pass
results.append(entry)
if limit is not None and len(results) >= limit:
return results
backend_label = str(getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder")
debug(f"[folder:{backend_label}] {len(results)} result(s)")
return results
if tokens and len(tokens) > 1:
url_fetch_limit = (limit or 45) * 50
@@ -546,6 +618,22 @@ class Folder(Store):
return api.get_file_hashes_with_any_url(limit=url_fetch_limit)
return api.get_file_hashes_by_url_like(_url_like_pattern(pattern), limit=url_fetch_limit)
if namespace == 'system':
# Hydrus-compatible query: system:filetype = png
m_ft = re.match(r"^filetype\s*(?:=\s*)?(.+)$", pattern)
if m_ft:
normalized_ext = _normalize_ext_filter(m_ft.group(1))
if not normalized_ext:
return set()
return api.get_file_hashes_by_ext(normalized_ext, limit=url_fetch_limit)
return set()
if namespace in {'ext', 'extension'}:
normalized_ext = _normalize_ext_filter(pattern)
if not normalized_ext:
return set()
return api.get_file_hashes_by_ext(normalized_ext, limit=url_fetch_limit)
if namespace == 'store':
if pattern not in {'local', 'file', 'filesystem'}:
return set()
@@ -579,6 +667,11 @@ class Folder(Store):
if not matching_hashes:
return results
if ext_hashes is not None:
matching_hashes = (matching_hashes or set()) & ext_hashes
if not matching_hashes:
return results
if not matching_hashes:
return results
@@ -596,6 +689,12 @@ class Folder(Store):
size_bytes = None
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
if db_ext:
entry["ext"] = db_ext
except Exception:
pass
results.append(entry)
if limit is not None and len(results) >= limit:
return results
@@ -631,6 +730,12 @@ class Folder(Store):
size_bytes = None
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
if db_ext:
entry["ext"] = db_ext
except Exception:
pass
results.append(entry)
if limit is not None and len(results) >= limit:
return results
@@ -658,6 +763,67 @@ class Folder(Store):
if limit is not None and len(results) >= limit:
return results
return results
if namespace == "system":
# Hydrus-compatible query: system:filetype = png
m_ft = re.match(r"^filetype\s*(?:=\s*)?(.+)$", pattern)
if m_ft:
normalized_ext = _normalize_ext_filter(m_ft.group(1))
if not normalized_ext:
return results
rows = api.get_files_by_ext(normalized_ext, limit)
for file_hash, file_path_str, size_bytes, ext in rows:
if not file_path_str:
continue
file_path = Path(file_path_str)
if not file_path.exists():
continue
if size_bytes is None:
try:
size_bytes = file_path.stat().st_size
except OSError:
size_bytes = None
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
if db_ext:
entry["ext"] = db_ext
except Exception:
pass
results.append(entry)
if limit is not None and len(results) >= limit:
return results
return results
if namespace in {"ext", "extension"}:
normalized_ext = _normalize_ext_filter(pattern)
if not normalized_ext:
return results
rows = api.get_files_by_ext(normalized_ext, limit)
for file_hash, file_path_str, size_bytes, ext in rows:
if not file_path_str:
continue
file_path = Path(file_path_str)
if not file_path.exists():
continue
if size_bytes is None:
try:
size_bytes = file_path.stat().st_size
except OSError:
size_bytes = None
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
if db_ext:
entry["ext"] = db_ext
except Exception:
pass
results.append(entry)
if limit is not None and len(results) >= limit:
return results
return results
query_pattern = f"{namespace}:%"
rows = api.get_files_by_namespace_pattern(query_pattern, limit)
@@ -674,12 +840,20 @@ class Folder(Store):
if tag_lower.startswith(f"{namespace}:"):
value = tag_lower[len(namespace)+1:]
if fnmatch(value, pattern):
if ext_hashes is not None and file_hash not in ext_hashes:
break
file_path = Path(file_path_str)
if file_path.exists():
if size_bytes is None:
size_bytes = file_path.stat().st_size
all_tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, all_tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
if db_ext:
entry["ext"] = db_ext
except Exception:
pass
results.append(entry)
else:
debug(f"File missing on disk: {file_path}")
@@ -703,6 +877,8 @@ class Folder(Store):
for file_hash, file_path_str, size_bytes, ext in term_rows:
if not file_path_str:
continue
if ext_hashes is not None and file_hash not in ext_hashes:
continue
entry = hits.get(file_hash)
if entry:
entry["count"] += 1
@@ -746,6 +922,8 @@ class Folder(Store):
rows = api.get_all_files(limit)
for file_hash, file_path_str, size_bytes, ext in rows:
if file_path_str:
if ext_hashes is not None and file_hash not in ext_hashes:
continue
file_path = Path(file_path_str)
if file_path.exists():
if size_bytes is None:
@@ -753,6 +931,12 @@ class Folder(Store):
tags = api.get_tags_for_file(file_hash)
entry = _create_entry(file_path, tags, size_bytes, file_hash)
try:
db_ext = str(ext or "").strip().lstrip('.')
if db_ext:
entry["ext"] = db_ext
except Exception:
pass
results.append(entry)
backend_label = str(getattr(self, "_name", "") or getattr(self, "NAME", "") or "folder")
@@ -896,7 +1080,7 @@ class Folder(Store):
if db_tags:
# Return actual store name instead of generic "local_db"
store_name = self._name if self._name else "local"
return list(db_tags), store_name
return [str(t).strip().lower() for t in db_tags if isinstance(t, str) and t.strip()], store_name
except Exception as exc:
debug(f"Local DB lookup failed: {exc}")
return [], "unknown"
@@ -917,22 +1101,30 @@ class Folder(Store):
try:
with API_folder_store(Path(self._location)) as db:
# Get existing tags
existing_tags = list(db.get_tags(hash) or [])
original_tags_lower = {t.lower() for t in existing_tags}
# Merge new tags, handling namespace overwrites
for new_tag in tag:
if ':' in new_tag:
namespace = new_tag.split(':', 1)[0]
# Remove existing tags in same namespace
existing_tags = [t for t in existing_tags if not t.startswith(namespace + ':')]
# Add new tag if not already present (case-insensitive check)
if new_tag.lower() not in original_tags_lower:
existing_tags.append(new_tag)
# Save merged tags
db.add_tags_to_hash(hash, existing_tags)
existing_tags = [t for t in (db.get_tags(hash) or []) if isinstance(t, str) and t.strip()]
from metadata import compute_namespaced_tag_overwrite
_to_remove, _to_add, merged = compute_namespaced_tag_overwrite(existing_tags, tag or [])
if not _to_remove and not _to_add:
return True
# Folder DB tag table is case-sensitive and add_tags_to_hash() is additive.
# To enforce lowercase-only tags and namespace overwrites, rewrite the full tag set.
cursor = db.connection.cursor()
cursor.execute("DELETE FROM tags WHERE hash = ?", (hash,))
for t in merged:
t = str(t).strip().lower()
if t:
cursor.execute(
"INSERT OR IGNORE INTO tags (hash, tag) VALUES (?, ?)",
(hash, t),
)
db.connection.commit()
try:
db._update_metadata_modified_time(hash)
except Exception:
pass
return True
except Exception as exc:
debug(f"Local DB add_tags failed: {exc}")
@@ -949,7 +1141,10 @@ class Folder(Store):
if self._location:
try:
with API_folder_store(Path(self._location)) as db:
db.remove_tags_from_hash(file_hash, list(tags))
tag_list = [str(t).strip().lower() for t in (tags or []) if isinstance(t, str) and str(t).strip()]
if not tag_list:
return True
db.remove_tags_from_hash(file_hash, tag_list)
return True
except Exception as exc:
debug(f"Local DB remove_tags failed: {exc}")
@@ -1006,6 +1201,130 @@ class Folder(Store):
debug(f"add_url failed for local file: {exc}")
return False
def add_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool:
"""Add known urls to many local files in one DB session.
This is a performance optimization used by cmdlets that receive many PipeObjects.
"""
from API.folder import API_folder_store
try:
if not self._location:
return False
# Normalize + coalesce duplicates per hash.
try:
from metadata import normalize_urls
except Exception:
normalize_urls = None # type: ignore
merged_by_hash: Dict[str, List[str]] = {}
for file_identifier, url_list in (items or []):
file_hash = str(file_identifier or "").strip().lower()
if not file_hash:
continue
incoming: List[str]
if normalize_urls is not None:
try:
incoming = normalize_urls(url_list)
except Exception:
incoming = [str(u).strip() for u in (url_list or []) if str(u).strip()]
else:
incoming = [str(u).strip() for u in (url_list or []) if str(u).strip()]
if not incoming:
continue
existing = merged_by_hash.get(file_hash) or []
for u in incoming:
if u and u not in existing:
existing.append(u)
merged_by_hash[file_hash] = existing
if not merged_by_hash:
return True
import json
with API_folder_store(Path(self._location)) as db:
conn = getattr(db, "connection", None)
if conn is None:
return False
cursor = conn.cursor()
# Ensure metadata rows exist (may be needed for older entries).
for file_hash in merged_by_hash.keys():
try:
cursor.execute("INSERT OR IGNORE INTO metadata (hash) VALUES (?)", (file_hash,))
except Exception:
continue
# Load existing urls for all hashes in chunks.
existing_urls_by_hash: Dict[str, List[str]] = {h: [] for h in merged_by_hash.keys()}
hashes = list(merged_by_hash.keys())
chunk_size = 400
for i in range(0, len(hashes), chunk_size):
chunk = hashes[i : i + chunk_size]
if not chunk:
continue
placeholders = ",".join(["?"] * len(chunk))
try:
cursor.execute(f"SELECT hash, url FROM metadata WHERE hash IN ({placeholders})", chunk)
rows = cursor.fetchall() or []
except Exception:
rows = []
for row in rows:
try:
row_hash = str(row[0]).strip().lower()
except Exception:
continue
raw_urls = None
try:
raw_urls = row[1]
except Exception:
raw_urls = None
parsed_urls: List[str] = []
if raw_urls:
try:
parsed = json.loads(raw_urls)
if normalize_urls is not None:
parsed_urls = normalize_urls(parsed)
else:
if isinstance(parsed, list):
parsed_urls = [str(u).strip() for u in parsed if str(u).strip()]
except Exception:
parsed_urls = []
existing_urls_by_hash[row_hash] = parsed_urls
# Compute updates and write in one commit.
updates: List[tuple[str, str]] = []
for file_hash, incoming_urls in merged_by_hash.items():
existing_urls = existing_urls_by_hash.get(file_hash) or []
final = list(existing_urls)
for u in incoming_urls:
if u and u not in final:
final.append(u)
if final != existing_urls:
try:
updates.append((json.dumps(final), file_hash))
except Exception:
continue
if updates:
cursor.executemany(
"UPDATE metadata SET url = ?, time_modified = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP WHERE hash = ?",
updates,
)
conn.commit()
return True
except Exception as exc:
debug(f"add_url_bulk failed for local file: {exc}")
return False
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Delete known url from a local file by hash."""
from API.folder import API_folder_store
@@ -1031,6 +1350,119 @@ class Folder(Store):
debug(f"delete_url failed for local file: {exc}")
return False
def delete_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool:
"""Delete known urls from many local files in one DB session."""
from API.folder import API_folder_store
try:
if not self._location:
return False
try:
from metadata import normalize_urls
except Exception:
normalize_urls = None # type: ignore
remove_by_hash: Dict[str, set[str]] = {}
for file_identifier, url_list in (items or []):
file_hash = str(file_identifier or "").strip().lower()
if not file_hash:
continue
incoming: List[str]
if normalize_urls is not None:
try:
incoming = normalize_urls(url_list)
except Exception:
incoming = [str(u).strip() for u in (url_list or []) if str(u).strip()]
else:
incoming = [str(u).strip() for u in (url_list or []) if str(u).strip()]
remove = {u for u in incoming if u}
if not remove:
continue
remove_by_hash.setdefault(file_hash, set()).update(remove)
if not remove_by_hash:
return True
import json
with API_folder_store(Path(self._location)) as db:
conn = getattr(db, "connection", None)
if conn is None:
return False
cursor = conn.cursor()
# Ensure metadata rows exist.
for file_hash in remove_by_hash.keys():
try:
cursor.execute("INSERT OR IGNORE INTO metadata (hash) VALUES (?)", (file_hash,))
except Exception:
continue
# Load existing urls for hashes in chunks.
existing_urls_by_hash: Dict[str, List[str]] = {h: [] for h in remove_by_hash.keys()}
hashes = list(remove_by_hash.keys())
chunk_size = 400
for i in range(0, len(hashes), chunk_size):
chunk = hashes[i : i + chunk_size]
if not chunk:
continue
placeholders = ",".join(["?"] * len(chunk))
try:
cursor.execute(f"SELECT hash, url FROM metadata WHERE hash IN ({placeholders})", chunk)
rows = cursor.fetchall() or []
except Exception:
rows = []
for row in rows:
try:
row_hash = str(row[0]).strip().lower()
except Exception:
continue
raw_urls = None
try:
raw_urls = row[1]
except Exception:
raw_urls = None
parsed_urls: List[str] = []
if raw_urls:
try:
parsed = json.loads(raw_urls)
if normalize_urls is not None:
parsed_urls = normalize_urls(parsed)
else:
if isinstance(parsed, list):
parsed_urls = [str(u).strip() for u in parsed if str(u).strip()]
except Exception:
parsed_urls = []
existing_urls_by_hash[row_hash] = parsed_urls
# Apply removals + write updates.
updates: List[tuple[str, str]] = []
for file_hash, remove_set in remove_by_hash.items():
existing_urls = existing_urls_by_hash.get(file_hash) or []
new_urls = [u for u in existing_urls if u not in remove_set]
if new_urls != existing_urls:
try:
updates.append((json.dumps(new_urls), file_hash))
except Exception:
continue
if updates:
cursor.executemany(
"UPDATE metadata SET url = ?, time_modified = CURRENT_TIMESTAMP, updated_at = CURRENT_TIMESTAMP WHERE hash = ?",
updates,
)
conn.commit()
return True
except Exception as exc:
debug(f"delete_url_bulk failed for local file: {exc}")
return False
def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
"""Get notes for a local file by hash."""
from API.folder import API_folder_store
@@ -1077,6 +1509,94 @@ class Folder(Store):
debug(f"set_note failed for local file: {exc}")
return False
def set_note_bulk(self, items: List[tuple[str, str, str]], **kwargs: Any) -> bool:
"""Set notes for many local files in one DB session.
Preserves existing semantics by only setting notes for hashes that still
map to a file path that exists on disk.
"""
from API.folder import API_folder_store
try:
if not self._location:
return False
# Normalize input.
normalized: List[tuple[str, str, str]] = []
for file_identifier, name, text in (items or []):
file_hash = str(file_identifier or "").strip().lower()
note_name = str(name or "").strip()
note_text = str(text or "")
if not file_hash or not _normalize_hash(file_hash) or not note_name:
continue
normalized.append((file_hash, note_name, note_text))
if not normalized:
return True
with API_folder_store(Path(self._location)) as db:
conn = getattr(db, "connection", None)
if conn is None:
return False
cursor = conn.cursor()
# Look up file paths for hashes in chunks (to verify existence).
wanted_hashes = sorted({h for (h, _n, _t) in normalized})
hash_to_path: Dict[str, str] = {}
chunk_size = 400
for i in range(0, len(wanted_hashes), chunk_size):
chunk = wanted_hashes[i : i + chunk_size]
if not chunk:
continue
placeholders = ",".join(["?"] * len(chunk))
try:
cursor.execute(f"SELECT hash, file_path FROM files WHERE hash IN ({placeholders})", chunk)
rows = cursor.fetchall() or []
except Exception:
rows = []
for row in rows:
try:
h = str(row[0]).strip().lower()
p = str(row[1]).strip()
except Exception:
continue
if h and p:
hash_to_path[h] = p
# Ensure notes rows exist and only write for existing files.
inserts: List[tuple[str, str, str]] = []
for h, note_name, note_text in normalized:
p = hash_to_path.get(h)
if not p:
continue
try:
if not Path(p).exists():
continue
except Exception:
continue
inserts.append((h, note_name, note_text))
if not inserts:
return False
# Prefer upsert when supported, else fall back to INSERT OR REPLACE.
try:
cursor.executemany(
"INSERT INTO notes (hash, name, note) VALUES (?, ?, ?) "
"ON CONFLICT(hash, name) DO UPDATE SET note = excluded.note, updated_at = CURRENT_TIMESTAMP",
inserts,
)
except Exception:
cursor.executemany(
"INSERT OR REPLACE INTO notes (hash, name, note) VALUES (?, ?, ?)",
inserts,
)
conn.commit()
return True
except Exception as exc:
debug(f"set_note_bulk failed for local file: {exc}")
return False
def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
"""Delete a named note for a local file by hash."""
from API.folder import API_folder_store

View File

@@ -217,10 +217,13 @@ class HydrusNetwork(Store):
# Add title to tags if provided and not already present
if title:
title_tag = f"title:{title}"
title_tag = f"title:{title}".strip().lower()
if not any(str(candidate).lower().startswith("title:") for candidate in tag_list):
tag_list = [title_tag] + list(tag_list)
# Hydrus is lowercase-only tags; normalize here for consistency.
tag_list = [str(t).strip().lower() for t in (tag_list or []) if isinstance(t, str) and str(t).strip()]
try:
# Compute file hash
file_hash = sha256_file(file_path)
@@ -445,6 +448,36 @@ class HydrusNetwork(Store):
query_lower = query.lower().strip()
# Support `ext:<value>` anywhere in the query. We filter results by the
# Hydrus metadata extension field.
def _normalize_ext_filter(value: str) -> str:
v = str(value or "").strip().lower().lstrip('.')
v = "".join(ch for ch in v if ch.isalnum())
return v
ext_filter: str | None = None
ext_only: bool = False
try:
m = re.search(r"\bext:([^\s,]+)", query_lower)
if not m:
m = re.search(r"\bextension:([^\s,]+)", query_lower)
if m:
ext_filter = _normalize_ext_filter(m.group(1)) or None
query_lower = re.sub(r"\s*\b(?:ext|extension):[^\s,]+", " ", query_lower)
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(',')
query = query_lower
if ext_filter and not query_lower:
query = "*"
query_lower = "*"
ext_only = True
except Exception:
ext_filter = None
ext_only = False
# Split into meaningful terms for AND logic.
# Avoid punctuation tokens like '-' that would make matching brittle.
search_terms = [t for t in re.findall(r"[a-z0-9]+", query_lower) if t]
# Special case: url:* and url:<value>
metadata_list: list[dict[str, Any]] | None = None
if ":" in query_lower and not query_lower.startswith(":"):
@@ -508,54 +541,268 @@ class HydrusNetwork(Store):
metadata_list = _iter_url_filtered_metadata(pattern, want_any=False, fetch_limit=int(limit) if limit else 100)
# Parse the query into tags
# Handle both simple tags and complex queries
# "*" means "match all" - use system:everything tag in Hydrus
# If query has explicit namespace, use it as a tag search.
# If query is free-form, search BOTH:
# - title:*term* (title: is the only namespace searched implicitly)
# - *term* (freeform tags; we will filter out other namespace matches client-side)
tags: list[str] = []
freeform_union_search: bool = False
title_predicates: list[str] = []
freeform_predicates: list[str] = []
if query.strip() == "*":
# Use system:everything to match all files in Hydrus
tags = ["system:everything"]
elif ':' in query_lower:
tags = [query_lower]
else:
# If query doesn't have a namespace (no ':'), search all files and filter by title/tags
# If query has explicit namespace, use it as a tag search
if ':' not in query_lower:
# No namespace provided: search all files, then filter by title/tags containing the query
tags = ["system:everything"]
freeform_union_search = True
if search_terms:
# Hydrus supports wildcard matching primarily as a prefix (e.g., tag*).
# Use per-term prefix matching for both title: and freeform tags.
title_predicates = [f"title:{term}*" for term in search_terms]
freeform_predicates = [f"{term}*" for term in search_terms]
else:
# User provided explicit namespace (e.g., "creator:john" or "system:has_audio")
# Use it as a tag search
tags = [query_lower]
if not tags:
debug(f"{prefix} 0 result(s)")
return []
# If we can't extract alnum terms, fall back to the raw query text.
title_predicates = [f"title:{query_lower}*"]
freeform_predicates = [f"{query_lower}*"]
# Search files with the tags (unless url: search already produced metadata)
results = []
# Split by comma or space for AND logic
search_terms = set(query_lower.replace(',', ' ').split()) # For substring matching
def _extract_search_ids(payload: Any) -> tuple[list[int], list[str]]:
if not isinstance(payload, dict):
return [], []
raw_ids = payload.get("file_ids", [])
raw_hashes = payload.get("hashes", [])
ids_out: list[int] = []
hashes_out: list[str] = []
if isinstance(raw_ids, list):
for item in raw_ids:
try:
ids_out.append(int(item))
except (TypeError, ValueError):
continue
if isinstance(raw_hashes, list):
hashes_out = [str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()]
return ids_out, hashes_out
if metadata_list is None:
search_result = client.search_files(
tags=tags,
return_hashes=True,
return_file_ids=True
)
file_ids: list[int] = []
hashes: list[str] = []
file_ids = search_result.get("file_ids", []) if isinstance(search_result, dict) else []
hashes = search_result.get("hashes", []) if isinstance(search_result, dict) else []
if freeform_union_search:
if not title_predicates and not freeform_predicates:
debug(f"{prefix} 0 result(s)")
return []
payloads: list[Any] = []
try:
payloads.append(
client.search_files(
tags=title_predicates,
return_hashes=True,
return_file_ids=True,
)
)
except Exception:
pass
try:
payloads.append(
client.search_files(
tags=freeform_predicates,
return_hashes=True,
return_file_ids=True,
)
)
except Exception:
pass
id_set: set[int] = set()
hash_set: set[str] = set()
for payload in payloads:
ids_part, hashes_part = _extract_search_ids(payload)
for fid in ids_part:
id_set.add(fid)
for hh in hashes_part:
hash_set.add(hh)
file_ids = list(id_set)
hashes = list(hash_set)
else:
if not tags:
debug(f"{prefix} 0 result(s)")
return []
search_result = client.search_files(
tags=tags,
return_hashes=True,
return_file_ids=True
)
file_ids, hashes = _extract_search_ids(search_result)
# Fast path: ext-only search. Avoid fetching metadata for an unbounded
# system:everything result set; fetch in chunks until we have enough.
if ext_only and ext_filter:
results: list[dict[str, Any]] = []
if not file_ids and not hashes:
debug(f"{prefix} 0 result(s)")
return []
# Prefer file_ids if available.
if file_ids:
chunk_size = 200
for start in range(0, len(file_ids), chunk_size):
if len(results) >= limit:
break
chunk = file_ids[start : start + chunk_size]
try:
payload = client.fetch_file_metadata(
file_ids=chunk,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=True,
include_size=True,
include_mime=True,
)
except Exception:
continue
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
if not isinstance(metas, list):
continue
for meta in metas:
if len(results) >= limit:
break
if not isinstance(meta, dict):
continue
mime_type = meta.get("mime")
ext = str(meta.get("ext") or "").strip().lstrip('.')
if not ext and mime_type:
for category in mime_maps.values():
for _ext_key, info in category.items():
if mime_type in info.get("mimes", []):
ext = str(info.get("ext", "")).strip().lstrip('.')
break
if ext:
break
if _normalize_ext_filter(ext) != ext_filter:
continue
file_id = meta.get("file_id")
hash_hex = meta.get("hash")
size = meta.get("size", 0)
tags_set = meta.get("tags", {})
all_tags: list[str] = []
title = f"Hydrus File {file_id}"
if isinstance(tags_set, dict):
def _collect(tag_list: Any) -> None:
nonlocal title
if not isinstance(tag_list, list):
return
for tag in tag_list:
tag_text = str(tag) if tag else ""
if not tag_text:
continue
tag_l = tag_text.strip().lower()
if not tag_l:
continue
all_tags.append(tag_l)
if tag_l.startswith("title:") and title == f"Hydrus File {file_id}":
title = tag_l.split(":", 1)[1].strip()
for _service_name, service_tags in tags_set.items():
if not isinstance(service_tags, dict):
continue
storage_tags = service_tags.get("storage_tags", {})
if isinstance(storage_tags, dict):
for tag_list in storage_tags.values():
_collect(tag_list)
display_tags = service_tags.get("display_tags", [])
_collect(display_tags)
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append(
{
"hash": hash_hex,
"url": file_url,
"name": title,
"title": title,
"size": size,
"size_bytes": size,
"store": self.NAME,
"tag": all_tags,
"file_id": file_id,
"mime": mime_type,
"ext": ext,
}
)
debug(f"{prefix} {len(results)} result(s)")
return results[:limit]
# If we only got hashes, fall back to the normal flow below.
if not file_ids and not hashes:
debug(f"{prefix} 0 result(s)")
return []
if file_ids:
metadata = client.fetch_file_metadata(file_ids=file_ids)
metadata = client.fetch_file_metadata(
file_ids=file_ids,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=True,
include_size=True,
include_mime=True,
)
metadata_list = metadata.get("metadata", [])
elif hashes:
metadata = client.fetch_file_metadata(hashes=hashes)
metadata = client.fetch_file_metadata(
hashes=hashes,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=True,
include_size=True,
include_mime=True,
)
metadata_list = metadata.get("metadata", [])
else:
metadata_list = []
# If our free-text searches produce nothing (or nothing survived downstream filtering), fallback to scanning.
if (not metadata_list) and (query_lower != "*") and (":" not in query_lower):
try:
search_result = client.search_files(
tags=["system:everything"],
return_hashes=True,
return_file_ids=True,
)
file_ids, hashes = _extract_search_ids(search_result)
if file_ids:
metadata = client.fetch_file_metadata(
file_ids=file_ids,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=True,
include_size=True,
include_mime=True,
)
metadata_list = metadata.get("metadata", [])
elif hashes:
metadata = client.fetch_file_metadata(
hashes=hashes,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=True,
include_size=True,
include_mime=True,
)
metadata_list = metadata.get("metadata", [])
except Exception:
pass
if not isinstance(metadata_list, list):
metadata_list = []
@@ -585,10 +832,13 @@ class HydrusNetwork(Store):
tag_text = str(tag) if tag else ""
if not tag_text:
continue
all_tags.append(tag_text)
all_tags_str += " " + tag_text.lower()
if tag_text.lower().startswith("title:") and title == f"Hydrus File {file_id}":
title = tag_text.split(":", 1)[1].strip()
tag_l = tag_text.strip().lower()
if not tag_l:
continue
all_tags.append(tag_l)
all_tags_str += " " + tag_l
if tag_l.startswith("title:") and title == f"Hydrus File {file_id}":
title = tag_l.split(":", 1)[1].strip()
for _service_name, service_tags in tags_set.items():
if not isinstance(service_tags, dict):
@@ -641,20 +891,15 @@ class HydrusNetwork(Store):
"ext": ext,
})
else:
# Free-form search: check if search terms match the title or tags
# Match if ALL search terms are found in title or tags (AND logic)
# AND use whole word matching
# Combine title and tags for searching
searchable_text = (title + " " + all_tags_str).lower()
# Free-form search: check if search terms match title or FREEFORM tags.
# Do NOT implicitly match other namespace tags (except title:).
freeform_tags = [t for t in all_tags if isinstance(t, str) and t and (":" not in t)]
searchable_text = (title + " " + " ".join(freeform_tags)).lower()
match = True
if query_lower != "*":
if query_lower != "*" and search_terms:
for term in search_terms:
# Regex for whole word: \bterm\b
# Escape term to handle special chars
pattern = r'\b' + re.escape(term) + r'\b'
if not re.search(pattern, searchable_text):
if term not in searchable_text:
match = False
break
@@ -675,6 +920,17 @@ class HydrusNetwork(Store):
})
debug(f"{prefix} {len(results)} result(s)")
if ext_filter:
wanted = ext_filter
filtered: list[dict[str, Any]] = []
for item in results:
try:
if _normalize_ext_filter(str(item.get("ext") or "")) == wanted:
filtered.append(item)
except Exception:
continue
results = filtered
return results[:limit]
except Exception as exc:
@@ -903,8 +1159,8 @@ class HydrusNetwork(Store):
# Extract tags from metadata
tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name)
return tags, "hydrus"
return [str(t).strip().lower() for t in tags if isinstance(t, str) and t.strip()], "hydrus"
except Exception as exc:
debug(f"{self._log_prefix()} get_tags failed: {exc}")
@@ -924,12 +1180,38 @@ class HydrusNetwork(Store):
debug(f"{self._log_prefix()} add_tag: invalid file hash '{file_identifier}'")
return False
service_name = kwargs.get("service_name") or "my tags"
# Ensure tags is a list
tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
if not tag_list:
return False
client.add_tag(file_hash, tag_list, service_name)
return True
incoming_tags = [str(t).strip().lower() for t in (tags or []) if isinstance(t, str) and str(t).strip()]
if not incoming_tags:
return True
try:
existing_tags, _src = self.get_tag(file_hash)
except Exception:
existing_tags = []
from metadata import compute_namespaced_tag_overwrite
tags_to_remove, tags_to_add, _merged = compute_namespaced_tag_overwrite(existing_tags, incoming_tags)
if not tags_to_add and not tags_to_remove:
return True
did_any = False
if tags_to_remove:
try:
client.delete_tag(file_hash, tags_to_remove, service_name)
did_any = True
except Exception as exc:
debug(f"{self._log_prefix()} add_tag: delete_tag failed: {exc}")
if tags_to_add:
try:
client.add_tag(file_hash, tags_to_add, service_name)
did_any = True
except Exception as exc:
debug(f"{self._log_prefix()} add_tag: add_tag failed: {exc}")
return did_any
except Exception as exc:
debug(f"{self._log_prefix()} add_tag failed: {exc}")
return False
@@ -948,7 +1230,8 @@ class HydrusNetwork(Store):
debug(f"{self._log_prefix()} delete_tag: invalid file hash '{file_identifier}'")
return False
service_name = kwargs.get("service_name") or "my tags"
tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
raw_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
tag_list = [str(t).strip().lower() for t in raw_list if isinstance(t, str) and str(t).strip()]
if not tag_list:
return False
client.delete_tag(file_hash, tag_list, service_name)
@@ -1014,6 +1297,38 @@ class HydrusNetwork(Store):
debug(f"{self._log_prefix()} add_url failed: {exc}")
return False
def add_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool:
"""Bulk associate urls with Hydrus files.
This is a best-effort convenience wrapper used by cmdlets to batch url associations.
Hydrus' client API is still called per (hash,url) pair, but this consolidates the
cmdlet-level control flow so url association can be deferred until the end.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} add_url_bulk: client unavailable")
return False
any_success = False
for file_identifier, urls in (items or []):
h = str(file_identifier or "").strip().lower()
if len(h) != 64:
continue
for u in (urls or []):
s = str(u or "").strip()
if not s:
continue
try:
client.associate_url(h, s)
any_success = True
except Exception:
continue
return any_success
except Exception as exc:
debug(f"{self._log_prefix()} add_url_bulk failed: {exc}")
return False
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Delete one or more url from a Hydrus file.
"""

View File

@@ -50,6 +50,51 @@ class Store(ABC):
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
raise NotImplementedError
def add_url_bulk(self, items: List[Tuple[str, List[str]]], **kwargs: Any) -> bool:
"""Optional bulk url association.
Backends may override this to batch writes (single transaction / request).
Default behavior is to call add_url() per file.
"""
changed_any = False
for file_identifier, urls in (items or []):
try:
ok = self.add_url(file_identifier, urls, **kwargs)
changed_any = changed_any or bool(ok)
except Exception:
continue
return changed_any
def delete_url_bulk(self, items: List[Tuple[str, List[str]]], **kwargs: Any) -> bool:
"""Optional bulk url deletion.
Backends may override this to batch writes (single transaction / request).
Default behavior is to call delete_url() per file.
"""
changed_any = False
for file_identifier, urls in (items or []):
try:
ok = self.delete_url(file_identifier, urls, **kwargs)
changed_any = changed_any or bool(ok)
except Exception:
continue
return changed_any
def set_note_bulk(self, items: List[Tuple[str, str, str]], **kwargs: Any) -> bool:
"""Optional bulk note set.
Backends may override this to batch writes (single transaction / request).
Default behavior is to call set_note() per file.
"""
changed_any = False
for file_identifier, name, text in (items or []):
try:
ok = self.set_note(file_identifier, name, text, **kwargs)
changed_any = changed_any or bool(ok)
except Exception:
continue
return changed_any
@abstractmethod
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
raise NotImplementedError

View File

@@ -109,6 +109,7 @@ class Add_File(Cmdlet):
collected_payloads: List[Dict[str, Any]] = []
pending_relationship_pairs: Dict[str, set[tuple[str, str]]] = {}
pending_url_associations: Dict[str, List[tuple[str, List[str]]]] = {}
successes = 0
failures = 0
@@ -118,6 +119,110 @@ class Add_File(Cmdlet):
want_final_search_store = bool(is_last_stage) and bool(is_storage_backend_location) and bool(location)
auto_search_store_after_add = False
# When ingesting multiple items into a backend store, defer URL association and
# apply it once at the end (bulk) to avoid per-item URL API calls.
defer_url_association = bool(is_storage_backend_location) and bool(location) and len(items_to_process) > 1
# If we are going to persist results (-store / -provider) and the piped input contains
# URL download targets (e.g. playlist rows), preflight URL duplicates once up-front.
# IMPORTANT: Do not treat a *source URL* on an already-local file (e.g. screen-shot)
# as a download target; that would trigger yt-dlp preflights for non-yt-dlp URLs.
skip_url_downloads: set[str] = set()
download_mode_hint: Optional[str] = None
forced_ytdl_format: Optional[str] = None
if (provider_name or location) and isinstance(items_to_process, list) and items_to_process:
url_candidates: List[str] = []
for it in items_to_process:
try:
po_probe = coerce_to_pipe_object(it, path_arg)
except Exception:
continue
# If the piped item already points at a local file, we are *ingesting* it,
# not downloading it. Skip URL-preflight and yt-dlp probing for those.
try:
po_path = getattr(po_probe, "path", None)
po_path_s = str(po_path or "").strip()
if po_path_s and not po_path_s.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
continue
except Exception:
pass
try:
for u in (self._get_url(it, po_probe) or []):
s = str(u or "").strip()
if not s:
continue
if s.lower().startswith(("http://", "https://", "magnet:", "torrent:")):
url_candidates.append(s)
except Exception:
continue
# Only meaningful when targeting a registered backend store.
if url_candidates and is_storage_backend_location and location:
# De-dupe in-order to keep logs stable.
seen: set[str] = set()
unique_urls: List[str] = []
for u in url_candidates:
if u in seen:
continue
seen.add(u)
unique_urls.append(u)
try:
skip_url_downloads = self._preflight_url_duplicates_bulk(unique_urls, config)
except Exception:
skip_url_downloads = set()
# Batch-level format preflight:
# - If the sample URL only has one available format, force it for the batch.
# - If the sample URL appears audio-only (no video codecs), prefer audio mode.
try:
from cmdlet.download_media import is_url_supported_by_ytdlp, list_formats
from tool.ytdlp import YtDlpTool
sample_url = unique_urls[0] if unique_urls else None
if sample_url and is_url_supported_by_ytdlp(str(sample_url)):
cf = None
try:
cookie_path = YtDlpTool(config).resolve_cookiefile()
if cookie_path is not None and cookie_path.is_file():
cf = str(cookie_path)
except Exception:
cf = None
fmts = list_formats(
str(sample_url),
no_playlist=False,
playlist_items=None,
cookiefile=cf,
)
if isinstance(fmts, list) and fmts:
has_video = False
try:
for f in fmts:
if not isinstance(f, dict):
continue
vcodec = str(f.get("vcodec", "none") or "none").strip().lower()
if vcodec and vcodec != "none":
has_video = True
break
except Exception:
has_video = False
download_mode_hint = "video" if has_video else "audio"
if len(fmts) == 1 and isinstance(fmts[0], dict):
fid = str(fmts[0].get("format_id") or "").strip()
if fid:
forced_ytdl_format = fid
except Exception:
download_mode_hint = download_mode_hint
forced_ytdl_format = forced_ytdl_format
processed_url_items: set[str] = set()
for item in items_to_process:
pipe_obj = coerce_to_pipe_object(item, path_arg)
@@ -244,7 +349,148 @@ class Add_File(Cmdlet):
if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(
("http://", "https://", "magnet:", "torrent:")
):
code = self._delegate_to_download_data(item, media_path_or_url, location, provider_name, args, config)
# If the user provided a destination (-store / -provider), download here and then
# continue normal add-file logic so the downloaded file is actually ingested.
url_str = str(media_path_or_url)
if (provider_name or location):
# Avoid re-processing the same URL multiple times in a batch.
if url_str in processed_url_items:
successes += 1
continue
processed_url_items.add(url_str)
# If bulk preflight found this URL already stored, skip downloading.
if url_str in skip_url_downloads:
log(f"Skipping download (already stored): {url_str}", file=sys.stderr)
successes += 1
continue
downloaded_pipe_dicts = self._download_streaming_url_as_pipe_objects(
url_str,
config,
mode_hint=download_mode_hint,
ytdl_format_hint=forced_ytdl_format,
)
if not downloaded_pipe_dicts:
failures += 1
continue
# Merge original tags/notes/relationships into each downloaded item and ingest.
for dl_item in downloaded_pipe_dicts:
try:
if isinstance(dl_item, dict):
# Merge tags
base_tags = list(getattr(pipe_obj, "tag", None) or [])
if base_tags:
dl_tags = list(dl_item.get("tag") or [])
dl_item["tag"] = merge_sequences(dl_tags, base_tags, case_sensitive=False)
# Carry notes/relationships forward when present on the original.
base_notes = getattr(pipe_obj, "notes", None)
if base_notes and ("notes" not in dl_item):
dl_item["notes"] = base_notes
base_rels = getattr(pipe_obj, "relationships", None)
if base_rels and ("relationships" not in dl_item):
dl_item["relationships"] = base_rels
except Exception:
pass
dl_pipe_obj = coerce_to_pipe_object(dl_item, None)
try:
dl_media_path = Path(str(getattr(dl_pipe_obj, "path", "") or ""))
except Exception:
dl_media_path = None
if dl_media_path is None or not self._validate_source(dl_media_path):
failures += 1
continue
if provider_name:
if str(provider_name).strip().lower() == "matrix":
room_id = None
if provider_room:
room_id = str(provider_room).strip()
if not room_id:
try:
matrix_conf = config.get("provider", {}).get("matrix", {}) if isinstance(config, dict) else {}
room_id = str(matrix_conf.get("room_id") or "").strip() or None
except Exception:
room_id = None
if not room_id:
pending = [
{
"path": str(dl_media_path),
"pipe_obj": dl_pipe_obj,
"delete_after": bool(delete_after_item),
}
]
return self._matrix_prompt_room_selection(pending, config, list(args))
code = self._handle_matrix_upload(
dl_media_path,
dl_pipe_obj,
config,
delete_after_item,
room_id=room_id,
)
else:
code = self._handle_provider_upload(
dl_media_path,
provider_name,
dl_pipe_obj,
config,
delete_after_item,
)
if code == 0:
successes += 1
else:
failures += 1
continue
if location:
try:
store = Store(config)
backends = store.list_backends()
if location in backends:
code = self._handle_storage_backend(
dl_item,
dl_media_path,
location,
dl_pipe_obj,
config,
delete_after_item,
collect_payloads=collected_payloads,
collect_relationship_pairs=pending_relationship_pairs,
defer_url_association=defer_url_association,
pending_url_associations=pending_url_associations,
suppress_last_stage_overlay=want_final_search_store,
auto_search_store=auto_search_store_after_add,
)
else:
code = self._handle_local_export(
dl_media_path,
location,
dl_pipe_obj,
config,
delete_after_item,
)
except Exception as exc:
debug(f"[add-file] ERROR: Failed to resolve location: {exc}")
log(f"Invalid location: {location}", file=sys.stderr)
failures += 1
continue
if code == 0:
successes += 1
else:
failures += 1
continue
# Finished processing all downloaded items for this URL.
continue
# No destination specified: keep legacy behavior (download-media only).
code = self._delegate_to_download_data(item, url_str, location, provider_name, args, config)
if code == 0:
successes += 1
else:
@@ -303,6 +549,8 @@ class Add_File(Cmdlet):
delete_after_item,
collect_payloads=collected_payloads,
collect_relationship_pairs=pending_relationship_pairs,
defer_url_association=defer_url_association,
pending_url_associations=pending_url_associations,
suppress_last_stage_overlay=want_final_search_store,
auto_search_store=auto_search_store_after_add,
)
@@ -329,6 +577,13 @@ class Add_File(Cmdlet):
except Exception:
pass
# Apply deferred url associations (bulk) before showing the final store table.
if pending_url_associations:
try:
Add_File._apply_pending_url_associations(pending_url_associations, config)
except Exception:
pass
# Always end add-file -store (when last stage) by showing the canonical store table.
# This keeps output consistent and ensures @N selection works for multi-item ingests.
if want_final_search_store and collected_payloads:
@@ -383,7 +638,7 @@ class Add_File(Cmdlet):
query = "hash:" + ",".join(hashes)
args = ["-store", str(store), query]
log(f"[add-file] Refresh: search-store -store {store} \"{query}\"", file=sys.stderr)
debug(f"[add-file] Refresh: search-store -store {store} \"{query}\"")
# Run search-store under a temporary stage context so its ctx.emit() calls
# don't interfere with the outer add-file pipeline stage.
@@ -1440,6 +1695,292 @@ class Add_File(Cmdlet):
return 0
@staticmethod
def _preflight_url_duplicates_bulk(urls: Sequence[str], config: Dict[str, Any]) -> set[str]:
"""Return a set of URLs that appear to already exist in any searchable backend.
This is a best-effort check used to avoid re-downloading already-stored media when
a batch of URL items is piped into add-file.
"""
skip: set[str] = set()
try:
storage = Store(config)
backend_names = list(storage.list_searchable_backends() or [])
except Exception:
return skip
for raw in urls:
u = str(raw or "").strip()
if not u:
continue
for backend_name in backend_names:
try:
if str(backend_name).strip().lower() == "temp":
continue
except Exception:
pass
try:
backend = storage[backend_name]
except Exception:
continue
try:
hits = backend.search(f"url:{u}", limit=1) or []
except Exception:
hits = []
if hits:
skip.add(u)
break
return skip
@staticmethod
def _download_streaming_url_as_pipe_objects(
url: str,
config: Dict[str, Any],
*,
mode_hint: Optional[str] = None,
ytdl_format_hint: Optional[str] = None,
) -> List[Dict[str, Any]]:
"""Download a yt-dlp-supported URL and return PipeObject-style dict(s).
This does not rely on pipeline stage context and is used so add-file can ingest
URL selections directly (download -> add to store/provider) in one invocation.
"""
url_str = str(url or "").strip()
if not url_str:
return []
try:
from cmdlet.download_media import (
CMDLET as dl_cmdlet,
_download_with_timeout,
is_url_supported_by_ytdlp,
list_formats,
_format_chapters_note,
_best_subtitle_sidecar,
_read_text_file,
)
from models import DownloadOptions
from tool.ytdlp import YtDlpTool
except Exception:
return []
if not is_url_supported_by_ytdlp(url_str):
return []
try:
from config import resolve_output_dir
out_dir = resolve_output_dir(config)
if out_dir is None:
return []
except Exception:
return []
cookies_path = None
try:
cookie_candidate = YtDlpTool(config).resolve_cookiefile()
if cookie_candidate is not None and cookie_candidate.is_file():
cookies_path = cookie_candidate
except Exception:
cookies_path = None
quiet_download = False
try:
quiet_download = bool((config or {}).get("_quiet_background_output"))
except Exception:
quiet_download = False
# Decide download mode.
# Default to video unless we have a hint or the URL appears to be audio-only.
mode = str(mode_hint or "").strip().lower() if mode_hint else ""
if mode not in {"audio", "video"}:
mode = "video"
# Best-effort: infer from formats for this URL (one-time, no playlist probing).
try:
cf = str(cookies_path) if cookies_path is not None and cookies_path.is_file() else None
fmts_probe = list_formats(url_str, no_playlist=False, playlist_items=None, cookiefile=cf)
if isinstance(fmts_probe, list) and fmts_probe:
has_video = False
for f in fmts_probe:
if not isinstance(f, dict):
continue
vcodec = str(f.get("vcodec", "none") or "none").strip().lower()
if vcodec and vcodec != "none":
has_video = True
break
mode = "video" if has_video else "audio"
except Exception:
mode = "video"
# Pick a safe initial format selector.
# Important: yt-dlp defaults like "251/140" are YouTube-specific and break Bandcamp.
fmt_hint = str(ytdl_format_hint).strip() if ytdl_format_hint else ""
if fmt_hint:
chosen_format: Optional[str] = fmt_hint
else:
chosen_format = None
if mode == "audio":
# Generic audio selector that works across extractors.
chosen_format = "bestaudio/best"
opts = DownloadOptions(
url=url_str,
mode=mode,
output_dir=Path(out_dir),
cookies_path=cookies_path,
ytdl_format=chosen_format,
quiet=quiet_download,
embed_chapters=True,
write_sub=True,
)
# Download with a small amount of resilience for format errors.
try:
result_obj = _download_with_timeout(opts, timeout_seconds=300)
except Exception as exc:
msg = str(exc)
# If a format is invalid/unsupported, try:
# - if only one format exists, retry with that id
# - else for audio-only sources, retry with bestaudio/best
try:
format_error = "Requested format is not available" in msg
except Exception:
format_error = False
if format_error:
try:
cf = str(cookies_path) if cookies_path is not None and cookies_path.is_file() else None
fmts = list_formats(url_str, no_playlist=False, playlist_items=None, cookiefile=cf)
if isinstance(fmts, list) and len(fmts) == 1 and isinstance(fmts[0], dict):
fid = str(fmts[0].get("format_id") or "").strip()
if fid:
opts = DownloadOptions(
url=url_str,
mode=mode,
output_dir=Path(out_dir),
cookies_path=cookies_path,
ytdl_format=fid,
quiet=quiet_download,
embed_chapters=True,
write_sub=True,
)
result_obj = _download_with_timeout(opts, timeout_seconds=300)
# proceed
else:
raise
elif mode == "audio" and (not chosen_format or chosen_format != "bestaudio/best"):
opts = DownloadOptions(
url=url_str,
mode=mode,
output_dir=Path(out_dir),
cookies_path=cookies_path,
ytdl_format="bestaudio/best",
quiet=quiet_download,
embed_chapters=True,
write_sub=True,
)
result_obj = _download_with_timeout(opts, timeout_seconds=300)
else:
raise
except Exception as exc2:
log(f"[add-file] Download failed for {url_str}: {exc2}", file=sys.stderr)
return []
else:
log(f"[add-file] Download failed for {url_str}: {exc}", file=sys.stderr)
return []
results: List[Any]
if isinstance(result_obj, list):
results = list(result_obj)
else:
paths = getattr(result_obj, "paths", None)
if isinstance(paths, list) and paths:
# Section downloads: create one result per file.
from models import DownloadMediaResult
results = []
for p in paths:
try:
p_path = Path(p)
except Exception:
continue
if not p_path.exists() or p_path.is_dir():
continue
try:
hv = sha256_file(p_path)
except Exception:
hv = None
try:
results.append(
DownloadMediaResult(
path=p_path,
info=getattr(result_obj, "info", {}) or {},
tag=list(getattr(result_obj, "tag", []) or []),
source_url=getattr(result_obj, "source_url", None) or url_str,
hash_value=hv,
)
)
except Exception:
continue
else:
results = [result_obj]
out: List[Dict[str, Any]] = []
for downloaded in results:
try:
po = dl_cmdlet._build_pipe_object(downloaded, url_str, opts)
# Attach chapter timestamps note (best-effort).
try:
info = downloaded.info if isinstance(getattr(downloaded, "info", None), dict) else {}
except Exception:
info = {}
try:
chapters_text = _format_chapters_note(info)
except Exception:
chapters_text = None
if chapters_text:
notes = po.get("notes")
if not isinstance(notes, dict):
notes = {}
notes.setdefault("chapters", chapters_text)
po["notes"] = notes
# Capture subtitle sidecar into notes and remove it so add-file won't ingest it later.
try:
media_path = Path(str(po.get("path") or ""))
except Exception:
media_path = None
if media_path is not None and media_path.exists() and media_path.is_file():
try:
sub_path = _best_subtitle_sidecar(media_path)
except Exception:
sub_path = None
if sub_path is not None:
sub_text = _read_text_file(sub_path)
if sub_text:
notes = po.get("notes")
if not isinstance(notes, dict):
notes = {}
notes["sub"] = sub_text
po["notes"] = notes
try:
sub_path.unlink()
except Exception:
pass
# Mark as temp artifact from download-media so add-file can auto-delete after ingest.
po["action"] = "cmdlet:download-media"
po["is_temp"] = True
out.append(po)
except Exception:
continue
return out
@staticmethod
def _download_soulseek_file(
result: Any,
@@ -1640,7 +2181,9 @@ class Add_File(Cmdlet):
ctx.set_current_stage_table(table)
print()
print(table.format_plain())
from rich_display import stdout_console
stdout_console().print(table)
print("\nSelect room(s) with @N (e.g. @1 or @1-3) to upload the selected item(s)")
return 0
@@ -1710,6 +2253,8 @@ class Add_File(Cmdlet):
*,
collect_payloads: Optional[List[Dict[str, Any]]] = None,
collect_relationship_pairs: Optional[Dict[str, set[tuple[str, str]]]] = None,
defer_url_association: bool = False,
pending_url_associations: Optional[Dict[str, List[tuple[str, List[str]]]]] = None,
suppress_last_stage_overlay: bool = False,
auto_search_store: bool = True,
) -> int:
@@ -1822,7 +2367,7 @@ class Add_File(Cmdlet):
media_path,
title=title,
tag=tags,
url=url
url=[] if (defer_url_association and url) else url
)
##log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr)
@@ -1859,10 +2404,16 @@ class Add_File(Cmdlet):
# If we have url(s), ensure they get associated with the destination file.
# This mirrors `add-url` behavior but avoids emitting extra pipeline noise.
if url:
try:
backend.add_url(resolved_hash, list(url))
except Exception:
pass
if defer_url_association and pending_url_associations is not None:
try:
pending_url_associations.setdefault(str(backend_name), []).append((str(resolved_hash), list(url)))
except Exception:
pass
else:
try:
backend.add_url(resolved_hash, list(url))
except Exception:
pass
# If a subtitle note was provided upstream (e.g., download-media writes notes.sub),
# persist it automatically like add-note would.
@@ -1965,6 +2516,68 @@ class Add_File(Cmdlet):
# --- Helpers ---
@staticmethod
def _apply_pending_url_associations(pending: Dict[str, List[tuple[str, List[str]]]], config: Dict[str, Any]) -> None:
"""Apply deferred URL associations in bulk, grouped per backend."""
try:
store = Store(config)
except Exception:
return
for backend_name, pairs in (pending or {}).items():
if not pairs:
continue
try:
backend = store[backend_name]
except Exception:
continue
# Merge URLs per hash and de-duplicate.
merged: Dict[str, List[str]] = {}
for file_hash, urls in pairs:
h = str(file_hash or "").strip().lower()
if len(h) != 64:
continue
url_list: List[str] = []
try:
for u in (urls or []):
s = str(u or "").strip()
if s:
url_list.append(s)
except Exception:
url_list = []
if not url_list:
continue
bucket = merged.setdefault(h, [])
seen = set(bucket)
for u in url_list:
if u in seen:
continue
seen.add(u)
bucket.append(u)
items: List[tuple[str, List[str]]] = [(h, u) for h, u in merged.items() if u]
if not items:
continue
bulk = getattr(backend, "add_url_bulk", None)
if callable(bulk):
try:
bulk(items)
continue
except Exception:
pass
single = getattr(backend, "add_url", None)
if callable(single):
for h, u in items:
try:
single(h, u)
except Exception:
continue
@staticmethod
def _load_sidecar_bundle(
media_path: Path,

View File

@@ -1,7 +1,7 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, Optional, Sequence
from typing import Any, Dict, List, Optional, Sequence, Tuple
import sys
from SYS.logger import log
@@ -103,6 +103,9 @@ class Add_Note(Cmdlet):
store_registry = Store(config)
updated = 0
# Batch write plan: store -> [(hash, name, text), ...]
note_ops: Dict[str, List[Tuple[str, str, str]]] = {}
# Optional global fallback for note text from pipeline values.
# Allows patterns like: ... | add-note sub
pipeline_default_text = None
@@ -177,20 +180,43 @@ class Add_Note(Cmdlet):
log(f"[add_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
return 1
ok = False
try:
ok = bool(backend.set_note(resolved_hash, note_name, item_note_text, config=config))
except Exception as exc:
log(f"[add_note] Error: Failed to set note: {exc}", file=sys.stderr)
ok = False
if ok:
updated += 1
# Queue for bulk write per store. We still emit items immediately;
# the pipeline only advances after this cmdlet returns.
note_ops.setdefault(store_name, []).append((resolved_hash, note_name, item_note_text))
updated += 1
ctx.emit(res)
# Execute bulk writes per store.
wrote_any = False
for store_name, ops in note_ops.items():
if not ops:
continue
try:
backend = store_registry[store_name]
except Exception:
continue
bulk_fn = getattr(backend, "set_note_bulk", None)
if callable(bulk_fn):
try:
ok = bool(bulk_fn(list(ops), config=config))
wrote_any = wrote_any or ok or True
ctx.print_if_visible(f"✓ add-note: {len(ops)} item(s) in '{store_name}'", file=sys.stderr)
continue
except Exception as exc:
log(f"[add_note] Warning: bulk set_note failed for '{store_name}': {exc}; falling back", file=sys.stderr)
# Fallback: per-item writes
for file_hash, name, text in ops:
try:
ok = bool(backend.set_note(file_hash, name, text, config=config))
wrote_any = wrote_any or ok
except Exception:
continue
log(f"[add_note] Updated {updated} item(s)", file=sys.stderr)
return 0 if updated > 0 else 1
return 0 if (updated > 0 and wrote_any) else (0 if updated > 0 else 1)
CMDLET = Add_Note()

View File

@@ -520,45 +520,13 @@ class Add_Tag(Cmdlet):
if new_tag.lower() not in existing_lower:
item_tag_to_add.append(new_tag)
# Namespace replacement: delete old namespace:* when adding namespace:value
removed_namespace_tag: list[str] = []
for new_tag in item_tag_to_add:
if not isinstance(new_tag, str) or ":" not in new_tag:
continue
ns = new_tag.split(":", 1)[0].strip()
if not ns:
continue
ns_prefix = ns.lower() + ":"
for t in existing_tag_list:
if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower():
removed_namespace_tag.append(t)
removed_namespace_tag = sorted({t for t in removed_namespace_tag})
actual_tag_to_add = [t for t in item_tag_to_add if isinstance(t, str) and t.lower() not in existing_lower]
changed = False
if removed_namespace_tag:
try:
ok_del = backend.delete_tag(resolved_hash, removed_namespace_tag, config=config)
if ok_del:
changed = True
except Exception as exc:
log(f"[add_tag] Warning: Failed deleting namespace tag: {exc}", file=sys.stderr)
if actual_tag_to_add:
try:
ok_add = backend.add_tag(resolved_hash, actual_tag_to_add, config=config)
if ok_add:
changed = True
else:
log("[add_tag] Warning: Store rejected tag update", file=sys.stderr)
except Exception as exc:
log(f"[add_tag] Warning: Failed adding tag: {exc}", file=sys.stderr)
if changed:
total_added += len(actual_tag_to_add)
total_modified += 1
try:
ok_add = backend.add_tag(resolved_hash, item_tag_to_add, config=config)
if not ok_add:
log("[add_tag] Warning: Store rejected tag update", file=sys.stderr)
except Exception as exc:
log(f"[add_tag] Warning: Failed adding tag: {exc}", file=sys.stderr)
try:
refreshed_tag, _src2 = backend.get_tag(resolved_hash, config=config)
@@ -566,6 +534,14 @@ class Add_Tag(Cmdlet):
except Exception:
refreshed_list = existing_tag_list
# Decide whether anything actually changed (case-sensitive so title casing updates count).
if set(refreshed_list) != set(existing_tag_list):
changed = True
before_lower = {t.lower() for t in existing_tag_list}
after_lower = {t.lower() for t in refreshed_list}
total_added += len(after_lower - before_lower)
total_modified += 1
# Update the result's tag using canonical field
if isinstance(res, models.PipeObject):
res.tag = refreshed_list
@@ -575,7 +551,7 @@ class Add_Tag(Cmdlet):
final_title = _extract_title_tag(refreshed_list)
_apply_title_to_result(res, final_title)
if final_title and (not original_title or final_title.lower() != original_title.lower()):
if final_title and (not original_title or final_title != original_title):
_refresh_result_table_title(final_title, resolved_hash, str(store_name), raw_path)
if changed:

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
from typing import Any, Dict, List, Optional, Sequence, Tuple
import sys
import pipeline as ctx
@@ -39,28 +39,37 @@ class Add_Url(sh.Cmdlet):
log("Error: -query must be of the form hash:<sha256>")
return 1
# Bulk input is common in pipelines; treat a list of PipeObjects as a batch.
results: List[Any] = result if isinstance(result, list) else ([result] if result is not None else [])
if query_hash and len(results) > 1:
log("Error: -query hash:<sha256> cannot be used with multiple piped items")
return 1
# Extract hash and store from result or args
file_hash = query_hash or sh.get_field(result, "hash")
store_name = parsed.get("store") or sh.get_field(result, "store")
file_hash = query_hash or (sh.get_field(result, "hash") if result is not None else None)
store_name = parsed.get("store") or (sh.get_field(result, "store") if result is not None else None)
url_arg = parsed.get("url")
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
return 1
if not store_name:
log("Error: No store name provided")
return 1
# If we have multiple piped items, we will resolve hash/store per item below.
if not results:
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
return 1
if not store_name:
log("Error: No store name provided")
return 1
if not url_arg:
log("Error: No URL provided")
return 1
# Normalize hash
file_hash = sh.normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
# Normalize hash (single-item mode)
if not results and file_hash:
file_hash = sh.normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
# Parse url (comma-separated)
urls = [u.strip() for u in str(url_arg).split(',') if u.strip()]
@@ -71,12 +80,118 @@ class Add_Url(sh.Cmdlet):
# Get backend and add url
try:
storage = Store(config)
backend = storage[store_name]
backend.add_url(file_hash, urls)
for u in urls:
ctx.emit(f"Added URL: {u}")
def _merge_urls(existing: Any, incoming: List[str]) -> List[str]:
out: List[str] = []
try:
if isinstance(existing, str):
out.extend([p.strip() for p in existing.split(",") if p.strip()])
elif isinstance(existing, (list, tuple)):
out.extend([str(u).strip() for u in existing if str(u).strip()])
except Exception:
out = []
for u in incoming:
if u and u not in out:
out.append(u)
return out
def _set_item_url(item: Any, merged: List[str]) -> None:
try:
if isinstance(item, dict):
if len(merged) == 1:
item["url"] = merged[0]
else:
item["url"] = list(merged)
return
# PipeObject-like
if hasattr(item, "url"):
if len(merged) == 1:
setattr(item, "url", merged[0])
else:
setattr(item, "url", list(merged))
except Exception:
return
# Build batches per store.
store_override = parsed.get("store")
batch: Dict[str, List[Tuple[str, List[str]]]] = {}
pass_through: List[Any] = []
if results:
for item in results:
pass_through.append(item)
raw_hash = query_hash or sh.get_field(item, "hash")
raw_store = store_override or sh.get_field(item, "store")
if not raw_hash or not raw_store:
ctx.print_if_visible("[add-url] Warning: Item missing hash/store; skipping", file=sys.stderr)
continue
normalized = sh.normalize_hash(raw_hash)
if not normalized:
ctx.print_if_visible("[add-url] Warning: Item has invalid hash; skipping", file=sys.stderr)
continue
store_text = str(raw_store).strip()
if not store_text:
ctx.print_if_visible("[add-url] Warning: Item has empty store; skipping", file=sys.stderr)
continue
# Validate backend exists (skip PATH/unknown).
if not storage.is_available(store_text):
ctx.print_if_visible(
f"[add-url] Warning: Store '{store_text}' not configured; skipping", file=sys.stderr
)
continue
batch.setdefault(store_text, []).append((normalized, list(urls)))
# Execute per-store batches.
for store_text, pairs in batch.items():
try:
backend = storage[store_text]
except Exception:
continue
# Coalesce duplicates per hash before passing to backend.
merged: Dict[str, List[str]] = {}
for h, ulist in pairs:
merged.setdefault(h, [])
for u in (ulist or []):
if u and u not in merged[h]:
merged[h].append(u)
bulk_pairs = [(h, merged[h]) for h in merged.keys()]
bulk_fn = getattr(backend, "add_url_bulk", None)
if callable(bulk_fn):
bulk_fn(bulk_pairs, config=config)
else:
for h, ulist in bulk_pairs:
backend.add_url(h, ulist, config=config)
ctx.print_if_visible(
f"✓ add-url: {len(urls)} url(s) for {len(bulk_pairs)} item(s) in '{store_text}'",
file=sys.stderr,
)
# Pass items through unchanged (but update url field for convenience).
for item in pass_through:
existing = sh.get_field(item, "url")
merged = _merge_urls(existing, list(urls))
_set_item_url(item, merged)
ctx.emit(item)
return 0
# Single-item mode
backend = storage[str(store_name)]
backend.add_url(str(file_hash), urls, config=config)
ctx.print_if_visible(f"✓ add-url: {len(urls)} url(s) added", file=sys.stderr)
if result is not None:
existing = sh.get_field(result, "url")
merged = _merge_urls(existing, list(urls))
_set_item_url(result, merged)
ctx.emit(result)
return 0
except KeyError:

View File

@@ -1,16 +1,19 @@
"""Delete-file cmdlet: Delete files from local storage and/or Hydrus."""
from __future__ import annotations
from typing import Any, Dict, Sequence
from typing import Any, Dict, List, Sequence
import sys
from pathlib import Path
from SYS.logger import debug, log
from SYS.utils import format_bytes
from Store.Folder import Folder
from Store import Store
from . import _shared as sh
from API import HydrusNetwork as hydrus_wrapper
import pipeline as ctx
from result_table import ResultTable, _format_size
from rich_display import stdout_console
class Delete_File(sh.Cmdlet):
@@ -38,9 +41,20 @@ class Delete_File(sh.Cmdlet):
)
self.register()
def _process_single_item(self, item: Any, override_hash: str | None, conserve: str | None,
lib_root: str | None, reason: str, config: Dict[str, Any]) -> bool:
"""Process deletion for a single item."""
def _process_single_item(
self,
item: Any,
override_hash: str | None,
conserve: str | None,
lib_root: str | None,
reason: str,
config: Dict[str, Any],
) -> List[Dict[str, Any]]:
"""Process deletion for a single item.
Returns display rows (for the final Rich table). Returning an empty list
indicates no delete occurred.
"""
# Handle item as either dict or object
if isinstance(item, dict):
hash_hex_raw = item.get("hash_hex") or item.get("hash")
@@ -50,6 +64,44 @@ class Delete_File(sh.Cmdlet):
hash_hex_raw = sh.get_field(item, "hash_hex") or sh.get_field(item, "hash")
target = sh.get_field(item, "target") or sh.get_field(item, "file_path") or sh.get_field(item, "path")
title_val = sh.get_field(item, "title") or sh.get_field(item, "name")
def _get_ext_from_item() -> str:
try:
if isinstance(item, dict):
ext_val = item.get("ext")
if ext_val:
return str(ext_val)
extra = item.get("extra")
if isinstance(extra, dict) and extra.get("ext"):
return str(extra.get("ext"))
else:
ext_val = sh.get_field(item, "ext")
if ext_val:
return str(ext_val)
extra = sh.get_field(item, "extra")
if isinstance(extra, dict) and extra.get("ext"):
return str(extra.get("ext"))
except Exception:
pass
# Fallback: infer from target path or title if it looks like a filename
try:
if isinstance(target, str) and target:
suffix = Path(target).suffix
if suffix:
return suffix.lstrip(".")
except Exception:
pass
try:
if title_val:
suffix = Path(str(title_val)).suffix
if suffix:
return suffix.lstrip(".")
except Exception:
pass
return ""
store = None
if isinstance(item, dict):
@@ -70,9 +122,16 @@ class Delete_File(sh.Cmdlet):
local_deleted = False
local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://"))
deleted_rows: List[Dict[str, Any]] = []
if conserve != "local" and local_target:
path = Path(str(target))
size_bytes: int | None = None
try:
if path.exists() and path.is_file():
size_bytes = int(path.stat().st_size)
except Exception:
size_bytes = None
# If lib_root is provided and this is from a folder store, use the Folder class
if lib_root:
@@ -80,8 +139,15 @@ class Delete_File(sh.Cmdlet):
folder = Folder(Path(lib_root), name=store or "local")
if folder.delete_file(str(path)):
local_deleted = True
ctx.emit(f"Removed file: {path.name}")
log(f"Deleted: {path.name}", file=sys.stderr)
deleted_rows.append(
{
"title": str(title_val).strip() if title_val else path.name,
"store": store_label,
"hash": hash_hex or sh.normalize_hash(path.stem) or "",
"size_bytes": size_bytes,
"ext": _get_ext_from_item() or path.suffix.lstrip("."),
}
)
except Exception as exc:
debug(f"Folder.delete_file failed: {exc}", file=sys.stderr)
# Fallback to manual deletion
@@ -89,8 +155,15 @@ class Delete_File(sh.Cmdlet):
if path.exists() and path.is_file():
path.unlink()
local_deleted = True
ctx.emit(f"Removed local file: {path}")
log(f"Deleted: {path.name}", file=sys.stderr)
deleted_rows.append(
{
"title": str(title_val).strip() if title_val else path.name,
"store": store_label,
"hash": hash_hex or sh.normalize_hash(path.stem) or "",
"size_bytes": size_bytes,
"ext": _get_ext_from_item() or path.suffix.lstrip("."),
}
)
except Exception as exc:
log(f"Local delete failed: {exc}", file=sys.stderr)
else:
@@ -99,8 +172,15 @@ class Delete_File(sh.Cmdlet):
if path.exists() and path.is_file():
path.unlink()
local_deleted = True
ctx.emit(f"Removed local file: {path}")
log(f"Deleted: {path.name}", file=sys.stderr)
deleted_rows.append(
{
"title": str(title_val).strip() if title_val else path.name,
"store": store_label,
"hash": hash_hex or sh.normalize_hash(path.stem) or "",
"size_bytes": size_bytes,
"ext": _get_ext_from_item() or path.suffix.lstrip("."),
}
)
except Exception as exc:
log(f"Local delete failed: {exc}", file=sys.stderr)
@@ -168,26 +248,32 @@ class Delete_File(sh.Cmdlet):
except Exception:
# If it's not in Hydrus (e.g. 404 or similar), that's fine
if not local_deleted:
return False
return []
if hydrus_deleted and hash_hex:
title_str = str(title_val).strip() if title_val else ""
if reason:
if title_str:
ctx.emit(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex} (reason: {reason}).")
size_hint = None
try:
if isinstance(item, dict):
size_hint = item.get("size_bytes") or item.get("size")
else:
ctx.emit(f"{hydrus_prefix} Deleted hash:{hash_hex} (reason: {reason}).")
else:
if title_str:
ctx.emit(f"{hydrus_prefix} Deleted title:{title_str} hash:{hash_hex}.")
else:
ctx.emit(f"{hydrus_prefix} Deleted hash:{hash_hex}.")
size_hint = sh.get_field(item, "size_bytes") or sh.get_field(item, "size")
except Exception:
size_hint = None
deleted_rows.append(
{
"title": str(title_val).strip() if title_val else "",
"store": store_label,
"hash": hash_hex,
"size_bytes": size_hint,
"ext": _get_ext_from_item(),
}
)
if hydrus_deleted or local_deleted:
return True
return deleted_rows
log("Selected result has neither Hydrus hash nor local file target")
return False
return []
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Execute delete-file command."""
@@ -257,15 +343,34 @@ class Delete_File(sh.Cmdlet):
return 1
success_count = 0
deleted_rows: List[Dict[str, Any]] = []
for item in items:
if self._process_single_item(item, override_hash, conserve, lib_root, reason, config):
rows = self._process_single_item(item, override_hash, conserve, lib_root, reason, config)
if rows:
success_count += 1
deleted_rows.extend(rows)
if success_count > 0:
# Clear cached tables/items so deleted entries are not redisplayed
if deleted_rows:
table = ResultTable("Deleted")
table.set_no_choice(True).set_preserve_order(True)
for row in deleted_rows:
result_row = table.add_row()
result_row.add_column("Title", row.get("title", ""))
result_row.add_column("Store", row.get("store", ""))
result_row.add_column("Hash", row.get("hash", ""))
result_row.add_column("Size", _format_size(row.get("size_bytes"), integer_only=False))
result_row.add_column("Ext", row.get("ext", ""))
# Display-only: print directly and do not affect selection/history.
try:
stdout_console().print()
stdout_console().print(table)
setattr(table, "_rendered_by_cmdlet", True)
except Exception:
pass
# Ensure no stale overlay/selection carries forward.
try:
ctx.set_last_result_table_overlay(None, None, None)
ctx.set_last_result_table(None, [])
ctx.set_last_result_items_only([])
ctx.set_current_stage_table(None)
except Exception:

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
from typing import Any, Dict, List, Optional, Sequence, Tuple
import sys
import pipeline as ctx
@@ -48,28 +48,37 @@ class Delete_Url(Cmdlet):
log("Error: -query must be of the form hash:<sha256>")
return 1
# Bulk input is common in pipelines; treat a list of PipeObjects as a batch.
results: List[Any] = result if isinstance(result, list) else ([result] if result is not None else [])
if query_hash and len(results) > 1:
log("Error: -query hash:<sha256> cannot be used with multiple piped items")
return 1
# Extract hash and store from result or args
file_hash = query_hash or get_field(result, "hash")
store_name = parsed.get("store") or get_field(result, "store")
file_hash = query_hash or (get_field(result, "hash") if result is not None else None)
store_name = parsed.get("store") or (get_field(result, "store") if result is not None else None)
url_arg = parsed.get("url")
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
return 1
if not store_name:
log("Error: No store name provided")
return 1
# If we have multiple piped items, we will resolve hash/store per item below.
if not results:
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
return 1
if not store_name:
log("Error: No store name provided")
return 1
if not url_arg:
log("Error: No URL provided")
return 1
# Normalize hash
file_hash = normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
# Normalize hash (single-item mode)
if not results and file_hash:
file_hash = normalize_hash(file_hash)
if not file_hash:
log("Error: Invalid hash format")
return 1
# Parse url (comma-separated)
urls = [u.strip() for u in str(url_arg).split(',') if u.strip()]
@@ -80,12 +89,104 @@ class Delete_Url(Cmdlet):
# Get backend and delete url
try:
storage = Store(config)
backend = storage[store_name]
backend.delete_url(file_hash, urls)
for u in urls:
ctx.emit(f"Deleted URL: {u}")
def _remove_urls(existing: Any, remove: List[str]) -> Any:
# Preserve prior shape: keep str when 1 url, list when multiple.
current: List[str] = []
try:
if isinstance(existing, str):
current = [p.strip() for p in existing.split(",") if p.strip()]
elif isinstance(existing, (list, tuple)):
current = [str(u).strip() for u in existing if str(u).strip()]
except Exception:
current = []
remove_set = {u for u in (remove or []) if u}
new_urls = [u for u in current if u not in remove_set]
if len(new_urls) == 1:
return new_urls[0]
return new_urls
def _set_item_url(item: Any, merged: Any) -> None:
try:
if isinstance(item, dict):
item["url"] = merged
return
if hasattr(item, "url"):
setattr(item, "url", merged)
except Exception:
return
store_override = parsed.get("store")
batch: Dict[str, List[Tuple[str, List[str]]]] = {}
pass_through: List[Any] = []
if results:
for item in results:
pass_through.append(item)
raw_hash = query_hash or get_field(item, "hash")
raw_store = store_override or get_field(item, "store")
if not raw_hash or not raw_store:
ctx.print_if_visible("[delete-url] Warning: Item missing hash/store; skipping", file=sys.stderr)
continue
normalized = normalize_hash(raw_hash)
if not normalized:
ctx.print_if_visible("[delete-url] Warning: Item has invalid hash; skipping", file=sys.stderr)
continue
store_text = str(raw_store).strip()
if not store_text:
ctx.print_if_visible("[delete-url] Warning: Item has empty store; skipping", file=sys.stderr)
continue
if not storage.is_available(store_text):
ctx.print_if_visible(
f"[delete-url] Warning: Store '{store_text}' not configured; skipping", file=sys.stderr
)
continue
batch.setdefault(store_text, []).append((normalized, list(urls)))
for store_text, pairs in batch.items():
try:
backend = storage[store_text]
except Exception:
continue
merged: Dict[str, List[str]] = {}
for h, ulist in pairs:
merged.setdefault(h, [])
for u in (ulist or []):
if u and u not in merged[h]:
merged[h].append(u)
bulk_pairs = [(h, merged[h]) for h in merged.keys()]
bulk_fn = getattr(backend, "delete_url_bulk", None)
if callable(bulk_fn):
bulk_fn(bulk_pairs, config=config)
else:
for h, ulist in bulk_pairs:
backend.delete_url(h, ulist, config=config)
ctx.print_if_visible(
f"✓ delete-url: {len(urls)} url(s) for {len(bulk_pairs)} item(s) in '{store_text}'",
file=sys.stderr,
)
for item in pass_through:
existing = get_field(item, "url")
_set_item_url(item, _remove_urls(existing, list(urls)))
ctx.emit(item)
return 0
# Single-item mode
backend = storage[str(store_name)]
backend.delete_url(str(file_hash), urls, config=config)
ctx.print_if_visible(f"✓ delete-url: {len(urls)} url(s) removed", file=sys.stderr)
if result is not None:
existing = get_field(result, "url")
_set_item_url(result, _remove_urls(existing, list(urls)))
ctx.emit(result)
return 0
except KeyError:

File diff suppressed because it is too large Load Diff

View File

@@ -6,7 +6,14 @@ import os
import sys
import shutil
import subprocess
import tempfile
import threading
import time
import http.server
from urllib.parse import quote
import webbrowser
from urllib.parse import urljoin
from urllib.request import pathname2url
import pipeline as ctx
from . import _shared as sh
@@ -56,7 +63,7 @@ class Get_File(sh.Cmdlet):
output_path = parsed.get("path")
output_name = parsed.get("name")
debug(f"[get-file] file_hash={file_hash[:12] if file_hash else None}... store_name={store_name}")
debug(f"[get-file] file_hash={file_hash} store_name={store_name}")
if not file_hash:
log("Error: No file hash provided (pipe an item or use -query \"hash:<sha256>\")")
@@ -83,7 +90,7 @@ class Get_File(sh.Cmdlet):
debug(f"[get-file] Getting metadata for hash...")
metadata = backend.get_metadata(file_hash)
if not metadata:
log(f"Error: File metadata not found for hash {file_hash[:12]}...")
log(f"Error: File metadata not found for hash {file_hash}")
return 1
debug(f"[get-file] Metadata retrieved: title={metadata.get('title')}, ext={metadata.get('ext')}")
@@ -104,7 +111,7 @@ class Get_File(sh.Cmdlet):
return text
return ""
debug(f"[get-file] Calling backend.get_file({file_hash[:12]}...)")
debug(f"[get-file] Calling backend.get_file({file_hash})")
# Get file from backend (may return Path or URL string depending on backend)
source_path = backend.get_file(file_hash)
@@ -135,7 +142,7 @@ class Get_File(sh.Cmdlet):
source_path = Path(source_path)
if not source_path or not source_path.exists():
log(f"Error: Backend could not retrieve file for hash {file_hash[:12]}...")
log(f"Error: Backend could not retrieve file for hash {file_hash}")
return 1
# Folder store UX: without -path, just open the file in the default app.
@@ -202,6 +209,18 @@ class Get_File(sh.Cmdlet):
def _open_file_default(self, path: Path) -> None:
"""Open a local file in the OS default application."""
try:
suffix = str(path.suffix or "").lower()
if sys.platform.startswith("win"):
# On Windows, file associations for common media types can point at
# editors (Paint/VS Code). Prefer opening a localhost URL.
if self._open_local_file_in_browser_via_http(path):
return
if suffix in {".png", ".jpg", ".jpeg", ".gif", ".webp", ".bmp", ".tif", ".tiff", ".svg"}:
# Use default web browser for images.
if self._open_image_in_default_browser(path):
return
if sys.platform.startswith("win"):
os.startfile(str(path)) # type: ignore[attr-defined]
return
@@ -211,6 +230,122 @@ class Get_File(sh.Cmdlet):
subprocess.Popen(["xdg-open", str(path)], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
except Exception as exc:
log(f"Error opening file: {exc}", file=sys.stderr)
def _open_local_file_in_browser_via_http(self, file_path: Path) -> bool:
"""Serve a single local file via localhost HTTP and open in browser.
This avoids Windows file-association issues (e.g., PNG -> Paint, HTML -> VS Code).
The server is bound to 127.0.0.1 on an ephemeral port and is shut down after
a timeout.
"""
try:
resolved = file_path.resolve()
directory = resolved.parent
filename = resolved.name
except Exception:
return False
class OneFileHandler(http.server.SimpleHTTPRequestHandler):
def __init__(self, *handler_args, **handler_kwargs):
super().__init__(*handler_args, directory=str(directory), **handler_kwargs)
def log_message(self, format: str, *args) -> None: # noqa: A003
# Keep normal output clean.
return
def do_GET(self) -> None: # noqa: N802
if self.path in {"/", ""}:
self.path = "/" + filename
return super().do_GET()
if self.path == "/" + filename or self.path == "/" + quote(filename):
return super().do_GET()
self.send_error(404)
def do_HEAD(self) -> None: # noqa: N802
if self.path in {"/", ""}:
self.path = "/" + filename
return super().do_HEAD()
if self.path == "/" + filename or self.path == "/" + quote(filename):
return super().do_HEAD()
self.send_error(404)
try:
httpd = http.server.ThreadingHTTPServer(("127.0.0.1", 0), OneFileHandler)
except Exception:
return False
port = httpd.server_address[1]
url = f"http://127.0.0.1:{port}/{quote(filename)}"
# Run server in the background.
server_thread = threading.Thread(target=httpd.serve_forever, kwargs={"poll_interval": 0.2}, daemon=True)
server_thread.start()
# Auto-shutdown after a timeout to avoid lingering servers.
def shutdown_later() -> None:
time.sleep(10 * 60)
try:
httpd.shutdown()
except Exception:
pass
try:
httpd.server_close()
except Exception:
pass
threading.Thread(target=shutdown_later, daemon=True).start()
try:
debug(f"[get-file] Opening via localhost: {url}")
return bool(webbrowser.open(url))
except Exception:
return False
def _open_image_in_default_browser(self, image_path: Path) -> bool:
"""Open an image file in the user's default web browser.
We intentionally avoid opening the image path directly on Windows because
file associations may point to editors/viewers (e.g., Paint). Instead we
generate a tiny HTML wrapper and open that (HTML is typically associated
with the default browser).
"""
try:
resolved = image_path.resolve()
image_url = urljoin("file:", pathname2url(str(resolved)))
except Exception:
return False
# Create a stable wrapper filename to reduce temp-file spam.
wrapper_path = Path(tempfile.gettempdir()) / f"medeia-open-image-{resolved.stem}.html"
try:
wrapper_path.write_text(
"\n".join(
[
"<!doctype html>",
"<meta charset=\"utf-8\">",
f"<title>{resolved.name}</title>",
"<style>html,body{margin:0;padding:0;background:#000}img{display:block;max-width:100vw;max-height:100vh;margin:auto}</style>",
f"<img src=\"{image_url}\" alt=\"{resolved.name}\">",
]
),
encoding="utf-8",
)
except Exception:
return False
# Prefer localhost server when possible (reliable on Windows).
if self._open_local_file_in_browser_via_http(image_path):
return True
wrapper_url = wrapper_path.as_uri()
try:
return bool(webbrowser.open(wrapper_url))
except Exception:
return False
def _sanitize_filename(self, name: str) -> str:
"""Sanitize filename by removing invalid characters."""

View File

@@ -450,7 +450,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
table.set_row_selection_args(i, ["-store", str(item['store']), "-query", f"hash:{item['hash']}"])
ctx.set_last_result_table(table, pipeline_results)
print(table)
from rich_display import stdout_console
stdout_console().print(table)
return 0

View File

@@ -112,6 +112,107 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
item = files_to_merge[0]
ctx.emit(item)
return 0
def _resolve_existing_path(item: Dict[str, Any]) -> Optional[Path]:
raw_path = get_pipe_object_path(item)
target_path: Optional[Path] = None
if isinstance(raw_path, Path):
target_path = raw_path
elif isinstance(raw_path, str) and raw_path.strip():
candidate = Path(raw_path).expanduser()
if candidate.exists():
target_path = candidate
if target_path and target_path.exists():
return target_path
return None
def _extract_url(item: Dict[str, Any]) -> Optional[str]:
u = get_field(item, "url") or get_field(item, "target")
if isinstance(u, str):
s = u.strip()
if s.lower().startswith(("http://", "https://")):
return s
return None
# If the user piped URL-only playlist selections (no local paths yet), download first.
# This keeps the pipeline order intuitive:
# @* | merge-file | add-file -store ...
urls_to_download: List[str] = []
for it in files_to_merge:
if _resolve_existing_path(it) is not None:
continue
u = _extract_url(it)
if u:
urls_to_download.append(u)
if urls_to_download and len(urls_to_download) >= 2:
try:
# Compute a batch hint (audio vs video + single-format id) once.
mode_hint: Optional[str] = None
forced_format: Optional[str] = None
try:
from cmdlet.download_media import list_formats
from tool.ytdlp import YtDlpTool
sample_url = urls_to_download[0]
cookiefile = None
try:
cookie_path = YtDlpTool(config).resolve_cookiefile()
if cookie_path is not None and cookie_path.is_file():
cookiefile = str(cookie_path)
except Exception:
cookiefile = None
fmts = list_formats(sample_url, no_playlist=False, playlist_items=None, cookiefile=cookiefile)
if isinstance(fmts, list) and fmts:
has_video = False
for f in fmts:
if not isinstance(f, dict):
continue
vcodec = str(f.get("vcodec", "none") or "none").strip().lower()
if vcodec and vcodec != "none":
has_video = True
break
mode_hint = "video" if has_video else "audio"
if len(fmts) == 1 and isinstance(fmts[0], dict):
fid = str(fmts[0].get("format_id") or "").strip()
if fid:
forced_format = fid
except Exception:
mode_hint = None
forced_format = None
from cmdlet.add_file import Add_File
expanded: List[Dict[str, Any]] = []
downloaded_any = False
for it in files_to_merge:
if _resolve_existing_path(it) is not None:
expanded.append(it)
continue
u = _extract_url(it)
if not u:
expanded.append(it)
continue
downloaded = Add_File._download_streaming_url_as_pipe_objects(
u,
config,
mode_hint=mode_hint,
ytdl_format_hint=forced_format,
)
if downloaded:
expanded.extend(downloaded)
downloaded_any = True
else:
expanded.append(it)
if downloaded_any:
files_to_merge = expanded
except Exception:
# If downloads fail, we fall back to the existing path-based merge behavior.
pass
# Extract file paths and metadata from result objects
source_files: List[Path] = []
@@ -120,14 +221,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
source_tags: List[str] = [] # tags read from .tag sidecars
source_item_tag_lists: List[List[str]] = [] # tags carried in-memory on piped items
for item in files_to_merge:
raw_path = get_pipe_object_path(item)
target_path = None
if isinstance(raw_path, Path):
target_path = raw_path
elif isinstance(raw_path, str) and raw_path.strip():
candidate = Path(raw_path).expanduser()
if candidate.exists():
target_path = candidate
target_path = _resolve_existing_path(item)
if target_path and target_path.exists():
source_files.append(target_path)

View File

@@ -266,27 +266,27 @@ def _archive_url(url: str, timeout: float) -> Tuple[List[str], List[str]]:
(_submit_archive_ph, "archive.ph"),
):
try:
log(f"Archiving to {label}...", flush=True)
debug(f"Archiving to {label}...")
archived = submitter(url, timeout)
except httpx.HTTPStatusError as exc:
if exc.response.status_code == 429:
warnings.append(f"archive {label} rate limited (HTTP 429)")
log(f"{label}: Rate limited (HTTP 429)", flush=True)
debug(f"{label}: Rate limited (HTTP 429)")
else:
warnings.append(f"archive {label} failed: HTTP {exc.response.status_code}")
log(f"{label}: HTTP {exc.response.status_code}", flush=True)
debug(f"{label}: HTTP {exc.response.status_code}")
except httpx.RequestError as exc:
warnings.append(f"archive {label} failed: {exc}")
log(f"{label}: Connection error: {exc}", flush=True)
debug(f"{label}: Connection error: {exc}")
except Exception as exc:
warnings.append(f"archive {label} failed: {exc}")
log(f"{label}: {exc}", flush=True)
debug(f"{label}: {exc}")
else:
if archived:
archives.append(archived)
log(f"{label}: Success - {archived}", flush=True)
debug(f"{label}: Success - {archived}")
else:
log(f"{label}: No archive link returned", flush=True)
debug(f"{label}: No archive link returned")
return archives, warnings
@@ -335,7 +335,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
tool.debug_dump()
log("Launching browser...", flush=True)
debug("Launching browser...")
format_name = _normalise_format(options.output_format)
headless = options.headless or format_name == "pdf"
debug(f"[_capture] Format: {format_name}, Headless: {headless}")
@@ -345,29 +345,29 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
try:
with tool.open_page(headless=headless) as page:
log(f"Navigating to {options.url}...", flush=True)
debug(f"Navigating to {options.url}...")
try:
tool.goto(page, options.url)
log("Page loaded successfully", flush=True)
debug("Page loaded successfully")
except PlaywrightTimeoutError:
warnings.append("navigation timeout; capturing current page state")
log("Navigation timeout; proceeding with current state", flush=True)
debug("Navigation timeout; proceeding with current state")
# Skip article lookup by default (wait_for_article defaults to False)
if options.wait_for_article:
try:
log("Waiting for article element...", flush=True)
debug("Waiting for article element...")
page.wait_for_selector("article", timeout=10_000)
log("Article element found", flush=True)
debug("Article element found")
except PlaywrightTimeoutError:
warnings.append("<article> selector not found; capturing fallback")
log("Article element not found; using fallback", flush=True)
debug("Article element not found; using fallback")
if options.wait_after_load > 0:
log(f"Waiting {options.wait_after_load}s for page stabilization...", flush=True)
debug(f"Waiting {options.wait_after_load}s for page stabilization...")
time.sleep(min(10.0, max(0.0, options.wait_after_load)))
if options.replace_video_posters:
log("Replacing video elements with posters...", flush=True)
debug("Replacing video elements with posters...")
page.evaluate(
"""
document.querySelectorAll('video').forEach(v => {
@@ -384,7 +384,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
# Attempt platform-specific target capture if requested (and not PDF)
element_captured = False
if options.prefer_platform_target and format_name != "pdf":
log("Attempting platform-specific content capture...", flush=True)
debug("Attempting platform-specific content capture...")
try:
_platform_preprocess(options.url, page, warnings)
except Exception as e:
@@ -397,36 +397,36 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
debug(f"[_capture] Trying selectors: {selectors}")
for sel in selectors:
try:
log(f"Trying selector: {sel}", flush=True)
debug(f"Trying selector: {sel}")
el = page.wait_for_selector(sel, timeout=max(0, int(options.selector_timeout_ms)))
except PlaywrightTimeoutError:
log(f"Selector not found: {sel}", flush=True)
debug(f"Selector not found: {sel}")
continue
try:
if el is not None:
log(f"Found element with selector: {sel}", flush=True)
debug(f"Found element with selector: {sel}")
try:
el.scroll_into_view_if_needed(timeout=1000)
except Exception:
pass
log(f"Capturing element to {destination}...", flush=True)
debug(f"Capturing element to {destination}...")
el.screenshot(path=str(destination), type=("jpeg" if format_name == "jpeg" else None))
element_captured = True
log("Element captured successfully", flush=True)
debug("Element captured successfully")
break
except Exception as exc:
warnings.append(f"element capture failed for '{sel}': {exc}")
log(f"Failed to capture element: {exc}", flush=True)
debug(f"Failed to capture element: {exc}")
# Fallback to default capture paths
if element_captured:
pass
elif format_name == "pdf":
log("Generating PDF...", flush=True)
debug("Generating PDF...")
page.emulate_media(media="print")
page.pdf(path=str(destination), print_background=True)
log(f"PDF saved to {destination}", flush=True)
debug(f"PDF saved to {destination}")
else:
log(f"Capturing full page to {destination}...", flush=True)
debug(f"Capturing full page to {destination}...")
screenshot_kwargs: Dict[str, Any] = {"path": str(destination)}
if format_name == "jpeg":
screenshot_kwargs["type"] = "jpeg"
@@ -441,7 +441,7 @@ def _capture(options: ScreenshotOptions, destination: Path, warnings: List[str])
article.screenshot(**article_kwargs)
else:
page.screenshot(**screenshot_kwargs)
log(f"Screenshot saved to {destination}", flush=True)
debug(f"Screenshot saved to {destination}")
except Exception as exc:
debug(f"[_capture] Exception launching browser/page: {exc}")
msg = str(exc).lower()
@@ -587,7 +587,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if storage_value:
try:
screenshot_dir = SharedArgs.resolve_storage(storage_value)
log(f"[screen_shot] Using --storage {storage_value}: {screenshot_dir}", flush=True)
debug(f"[screen_shot] Using --storage {storage_value}: {screenshot_dir}")
except ValueError as e:
log(str(e), file=sys.stderr)
return 1
@@ -596,7 +596,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if screenshot_dir is None and resolve_output_dir is not None:
try:
screenshot_dir = resolve_output_dir(config)
log(f"[screen_shot] Using config resolver: {screenshot_dir}", flush=True)
debug(f"[screen_shot] Using config resolver: {screenshot_dir}")
except Exception:
pass
@@ -604,14 +604,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if screenshot_dir is None and config and config.get("outfile"):
try:
screenshot_dir = Path(config["outfile"]).expanduser()
log(f"[screen_shot] Using config outfile: {screenshot_dir}", flush=True)
debug(f"[screen_shot] Using config outfile: {screenshot_dir}")
except Exception:
pass
# Default: User's Videos directory
if screenshot_dir is None:
screenshot_dir = Path.home() / "Videos"
log(f"[screen_shot] Using default directory: {screenshot_dir}", flush=True)
debug(f"[screen_shot] Using default directory: {screenshot_dir}")
ensure_directory(screenshot_dir)
@@ -693,11 +693,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
screenshot_result = _capture_screenshot(options)
# Log results and warnings
log(f"Screenshot captured to {screenshot_result.path}", flush=True)
debug(f"Screenshot captured to {screenshot_result.path}")
if screenshot_result.archive_url:
log(f"Archives: {', '.join(screenshot_result.archive_url)}", flush=True)
debug(f"Archives: {', '.join(screenshot_result.archive_url)}")
for warning in screenshot_result.warnings:
log(f"Warning: {warning}", flush=True)
debug(f"Warning: {warning}")
# Compute hash of screenshot file
screenshot_hash = None
@@ -762,8 +762,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"No screenshots were successfully captured", file=sys.stderr)
return 1
# Log completion message
log(f"✓ Successfully captured {len(all_emitted)} screenshot(s)", flush=True)
# Log completion message (keep this as normal output)
log(f"✓ Successfully captured {len(all_emitted)} screenshot(s)")
return exit_code
CMDLET = Cmdlet(

View File

@@ -45,6 +45,8 @@ class Search_Store(Cmdlet):
"Search across storage backends: Folder stores and Hydrus instances",
"Use -store to search a specific backend by name",
"URL search: url:* (any URL) or url:<value> (URL substring)",
"Extension search: ext:<value> (e.g., ext:png)",
"Hydrus-style extension: system:filetype = png",
"Results include hash for downstream commands (get-file, add-tag, etc.)",
"Examples:",
"search-store -query foo # Search all storage backends",
@@ -53,6 +55,8 @@ class Search_Store(Cmdlet):
"search-store -query 'hash:deadbeef...' # Search by SHA256 hash",
"search-store -query 'url:*' # Files that have any URL",
"search-store -query 'url:youtube.com' # Files whose URL contains substring",
"search-store -query 'ext:png' # Files whose metadata ext is png",
"search-store -query 'system:filetype = png' # Hydrus: native; Folder: maps to metadata.ext",
],
exec=self.run,
)
@@ -107,6 +111,35 @@ class Search_Store(Cmdlet):
args_list = [str(arg) for arg in (args or [])]
refresh_mode = any(str(a).strip().lower() in {"--refresh", "-refresh"} for a in args_list)
def _format_command_title(command: str, raw_args: List[str]) -> str:
def _quote(value: str) -> str:
text = str(value)
if not text:
return '""'
needs_quotes = any(ch.isspace() for ch in text) or '"' in text
if not needs_quotes:
return text
return '"' + text.replace('"', '\\"') + '"'
cleaned = [
str(a)
for a in (raw_args or [])
if str(a).strip().lower() not in {"--refresh", "-refresh"}
]
if not cleaned:
return command
return " ".join([command, *[_quote(a) for a in cleaned]])
raw_title = None
try:
raw_title = ctx.get_current_stage_text("") if hasattr(ctx, "get_current_stage_text") else None
except Exception:
raw_title = None
command_title = (str(raw_title).strip() if raw_title else "") or _format_command_title("search-store", list(args_list))
# Build dynamic flag variants from cmdlet arg definitions.
# This avoids hardcoding flag spellings in parsing loops.
flag_registry = self.build_flag_registry()
@@ -188,11 +221,7 @@ class Search_Store(Cmdlet):
importlib.reload(result_table)
from result_table import ResultTable
table_title = f"Search: {query}"
if storage_backend:
table_title += f" [{storage_backend}]"
table = ResultTable(table_title)
table = ResultTable(command_title)
try:
table.set_source_command("search-store", list(args_list))
except Exception:
@@ -326,26 +355,23 @@ class Search_Store(Cmdlet):
ctx.emit(payload)
if found_any:
# Title should reflect the command, query, and only stores present in the table.
store_counts: "OrderedDict[str, int]" = OrderedDict()
for row_item in results_list:
store_val = str(row_item.get("store") or "").strip()
if not store_val:
continue
if store_val not in store_counts:
store_counts[store_val] = 0
store_counts[store_val] += 1
table.title = command_title
counts_part = " ".join(f"{name}:{count}" for name, count in store_counts.items() if count > 0)
base_title = f"search-store: {query}".strip()
table.title = f"{base_title} | {counts_part}" if counts_part else base_title
ctx.set_last_result_table(table, results_list)
if refresh_mode:
ctx.set_last_result_table_preserve_history(table, results_list)
else:
ctx.set_last_result_table(table, results_list)
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, 'completed')
return 0
log("No results found", file=sys.stderr)
if refresh_mode:
try:
table.title = command_title
ctx.set_last_result_table_preserve_history(table, [])
except Exception:
pass
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')
return 0
@@ -413,24 +439,21 @@ class Search_Store(Cmdlet):
results_list.append(normalized)
ctx.emit(normalized)
# Title should reflect the command, query, and only stores present in the table.
store_counts: "OrderedDict[str, int]" = OrderedDict()
for row_item in results_list:
store_val = str(row_item.get("store") or "").strip()
if not store_val:
continue
if store_val not in store_counts:
store_counts[store_val] = 0
store_counts[store_val] += 1
table.title = command_title
counts_part = " ".join(f"{name}:{count}" for name, count in store_counts.items() if count > 0)
base_title = f"search-store: {query}".strip()
table.title = f"{base_title} | {counts_part}" if counts_part else base_title
ctx.set_last_result_table(table, results_list)
if refresh_mode:
ctx.set_last_result_table_preserve_history(table, results_list)
else:
ctx.set_last_result_table(table, results_list)
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
else:
log("No results found", file=sys.stderr)
if refresh_mode:
try:
table.title = command_title
ctx.set_last_result_table_preserve_history(table, [])
except Exception:
pass
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')

View File

@@ -48,7 +48,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
table.set_source_command(".adjective")
ctx.set_last_result_table_overlay(table, list(data.keys()))
ctx.set_current_stage_table(table)
print(table)
from rich_display import stdout_console
stdout_console().print(table)
return 0
# We have args. First arg is likely category.
@@ -129,7 +131,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
table.set_source_command(".adjective")
ctx.set_last_result_table_overlay(table, tags)
ctx.set_current_stage_table(table)
print(table)
from rich_display import stdout_console
stdout_console().print(table)
return 0

View File

@@ -62,7 +62,9 @@ def _render_list(metadata: Dict[str, Dict[str, Any]], filter_text: Optional[str]
ctx.set_last_result_table(table, items)
ctx.set_current_stage_table(table)
print(table)
from rich_display import stdout_console
stdout_console().print(table)
def _render_detail(meta: Dict[str, Any], args: Sequence[str]) -> None:
@@ -130,7 +132,9 @@ def _render_detail(meta: Dict[str, Any], args: Sequence[str]) -> None:
ctx.set_last_result_table_overlay(table, [meta])
ctx.set_current_stage_table(table)
print(table)
from rich_display import stdout_console
stdout_console().print(table)
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:

View File

@@ -463,7 +463,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
ctx.set_pending_pipeline_tail([[".matrix", "-send"]], ".matrix")
print()
print(table.format_plain())
from rich_display import stdout_console
stdout_console().print(table)
print("\nSelect room(s) with @N (e.g. @1 or @1-3) to send the selected item(s)")
return 0

View File

@@ -1196,7 +1196,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# In pipeline mode, the CLI renders current-stage tables; printing here duplicates output.
suppress_direct_print = bool(isinstance(config, dict) and config.get("_quiet_background_output"))
if not suppress_direct_print:
print(table)
from rich_display import stdout_console
stdout_console().print(table)
return 0
# Everything below was originally outside a try block; keep it inside so `start_opts` is in scope.
@@ -1514,7 +1516,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# In pipeline mode, the CLI renders current-stage tables; printing here duplicates output.
suppress_direct_print = bool(isinstance(config, dict) and config.get("_quiet_background_output"))
if not suppress_direct_print:
print(table)
from rich_display import stdout_console
stdout_console().print(table)
return 0
finally:

View File

@@ -1515,7 +1515,7 @@ def _normalise_string_list(values: Optional[Iterable[Any]]) -> List[str]:
for value in values:
if value is None:
continue
text = str(value).strip()
text = str(value).strip().lower()
if not text:
continue
if text in seen:
@@ -1569,7 +1569,7 @@ def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str]
urls.append(url_clean)
else:
# Everything else is a tag (including relationship: lines)
tags.append(line)
tags.append(line.lower())
return hash_value, tags, urls
@@ -1644,11 +1644,12 @@ def write_tags(media_path: Path, tags: Iterable[str], url: Iterable[str], hash_v
# Prepare tags lines and convert to list if needed (tags only)
tag_list = list(tags) if not isinstance(tags, list) else tags
tag_list = [str(tag).strip().lower() for tag in tag_list if str(tag).strip()]
# If database provided, insert directly and skip sidecar
if db is not None:
try:
db_tags = [str(tag).strip() for tag in tag_list if str(tag).strip()]
db_tags = [str(tag).strip().lower() for tag in tag_list if str(tag).strip()]
if db_tags:
db.add_tags(media_path, db_tags)
@@ -1675,7 +1676,7 @@ def write_tags(media_path: Path, tags: Iterable[str], url: Iterable[str], hash_v
# Write via consolidated function
try:
lines = []
lines.extend(str(tag).strip() for tag in tag_list if str(tag).strip())
lines.extend(str(tag).strip().lower() for tag in tag_list if str(tag).strip())
if lines:
sidecar.write_text("\n".join(lines) + "\n", encoding="utf-8")
@@ -2263,7 +2264,7 @@ def read_tags_from_file(file_path: Path) -> List[str]:
continue
# Normalize the tag
normalized = value_normalize(line)
normalized = value_normalize(line).lower()
if normalized and normalized not in seen:
seen.add(normalized)
tags.append(normalized)
@@ -2443,7 +2444,7 @@ def write_tags_to_file(
# Add tags
if tags:
content_lines.extend(tags)
content_lines.extend([str(t).strip().lower() for t in tags if str(t).strip()])
# Write to file
mode = 'a' if (append and file_path.exists()) else 'w'
@@ -2969,6 +2970,86 @@ def normalize_tags(tags: List[Any]) -> List[str]:
return sort_tags(normalized)
def compute_namespaced_tag_overwrite(
existing_tags: Sequence[Any],
incoming_tags: Sequence[Any],
) -> Tuple[List[str], List[str], List[str]]:
"""Compute a tag mutation with namespace overwrite semantics.
Rules:
- Incoming namespaced tags ("ns:value") overwrite any existing tags in that namespace.
- Overwrite is based on namespace match (case-insensitive).
- Additions are deduped case-insensitively against kept existing tags and within the incoming list.
- If an existing tag matches an incoming tag exactly, it is kept (no remove/add).
Returns:
(tags_to_remove, tags_to_add, merged_tags)
Notes:
This is intentionally store-agnostic: stores decide how to persist/apply
the returned mutation (DB merge write, Hydrus delete/add, etc.).
"""
def _clean(values: Sequence[Any]) -> List[str]:
out: List[str] = []
for v in values or []:
if not isinstance(v, str):
continue
t = v.strip()
if t:
out.append(t.lower())
return out
def _ns_of(tag: str) -> str:
if ":" not in tag:
return ""
return tag.split(":", 1)[0].strip().lower()
existing = _clean(existing_tags)
incoming = _clean(incoming_tags)
if not incoming:
return [], [], existing
namespaces_to_replace: Set[str] = set()
for t in incoming:
ns = _ns_of(t)
if ns:
namespaces_to_replace.add(ns)
kept_existing: List[str] = []
kept_existing_lower: Set[str] = set()
tags_to_remove: List[str] = []
for t in existing:
ns = _ns_of(t)
if ns and ns in namespaces_to_replace:
# If it matches exactly, keep it; otherwise remove it.
if t in incoming:
kept_existing.append(t)
kept_existing_lower.add(t.lower())
else:
# If incoming has the same tag value but different casing, treat as replace.
tags_to_remove.append(t)
continue
kept_existing.append(t)
kept_existing_lower.add(t.lower())
tags_to_add: List[str] = []
added_lower: Set[str] = set()
for t in incoming:
tl = t.lower()
if tl in kept_existing_lower:
continue
if tl in added_lower:
continue
tags_to_add.append(t)
added_lower.add(tl)
merged = kept_existing + tags_to_add
return tags_to_remove, tags_to_add, merged
def merge_tag_lists(*tag_lists: List[str]) -> List[str]:
"""
Merge multiple tag lists, removing duplicates.

427
models.py
View File

@@ -3,14 +3,25 @@
import datetime
import hashlib
import json
import math
import os
import shutil
import sys
import time
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Protocol, TextIO, Tuple
from typing import Any, Callable, Dict, List, Optional, Protocol, TextIO
from rich.console import Console
from rich.progress import (
BarColumn,
DownloadColumn,
Progress,
TaskID,
TaskProgressColumn,
TextColumn,
TimeRemainingColumn,
TransferSpeedColumn,
)
@dataclass(slots=True)
@@ -75,154 +86,49 @@ class PipeObject:
return self.relationships.copy() if self.relationships else {}
def debug_table(self) -> None:
"""Print a formatted debug table showing PipeObject state.
Only prints when debug logging is enabled. Useful for tracking
object state throughout the pipeline.
"""
"""Rich-inspect the PipeObject when debug logging is enabled."""
try:
from SYS.logger import is_debug_enabled, debug
import shutil
if not is_debug_enabled():
return
from SYS.logger import is_debug_enabled, debug_inspect
except Exception:
return
# Prepare display values
hash_display = str(self.hash or "N/A")
store_display = str(self.store or "N/A")
provider_display = str(self.provider or "N/A")
title_display = str(self.title or "N/A")
tag_display = ", ".join(self.tag[:3]) if self.tag else "[]"
if len(self.tag) > 3:
tag_display += f" (+{len(self.tag) - 3} more)"
file_path_display = str(self.path or "N/A")
url_display: Any = self.url or "N/A"
if isinstance(url_display, (list, tuple, set)):
parts = [str(x) for x in url_display if x]
url_display = ", ".join(parts) if parts else "N/A"
else:
url_display = str(url_display)
relationships_display = "N/A"
if self.relationships:
rel_parts = []
for key, val in self.relationships.items():
if isinstance(val, list):
rel_parts.append(f"{key}({len(val)})")
else:
rel_parts.append(key)
relationships_display = ", ".join(rel_parts)
warnings_display = f"{len(self.warnings)} warning(s)" if self.warnings else "none"
if not is_debug_enabled():
return
def _fit(text: str, max_len: int) -> str:
if max_len <= 0:
return ""
if len(text) <= max_len:
return text
if max_len <= 3:
return text[:max_len]
return text[: max_len - 3] + "..."
# Compute box width from terminal size, but never allow overflow.
# Prefer a stable, human-friendly title:
# "1 - download-media", "2 - download-media", ...
# The index is preserved when possible via `pipe_index` in the PipeObject's extra.
idx = None
try:
term_cols = int(getattr(shutil.get_terminal_size((120, 20)), "columns", 120))
if isinstance(self.extra, dict):
idx = self.extra.get("pipe_index")
except Exception:
term_cols = 120
box_inner_max = max(60, term_cols - 3) # line length = box_inner + 3
idx = None
rows = [
("Hash", hash_display),
("Store", store_display),
("Provider", provider_display),
("Title", title_display),
("Tag", tag_display),
("URL", str(url_display)),
("File Path", file_path_display),
("Relationships", relationships_display),
("Warnings", warnings_display),
]
label_width = max(len(k) for k, _ in rows)
cmdlet_name = "PipeObject"
try:
import pipeline as ctx
current = ctx.get_current_cmdlet_name("") if hasattr(ctx, "get_current_cmdlet_name") else ""
if current:
cmdlet_name = current
else:
action = str(self.action or "").strip()
if action.lower().startswith("cmdlet:"):
cmdlet_name = action.split(":", 1)[1].strip() or cmdlet_name
elif action:
cmdlet_name = action
except Exception:
cmdlet_name = "PipeObject"
# Estimate a good inner width from current content, capped to terminal.
base_contents = [f"{k:<{label_width}} : {v}" for k, v in rows]
desired_inner = max([len("PipeObject Debug Info"), *[len(x) for x in base_contents], 60])
box_inner = min(desired_inner, box_inner_max)
title_text = cmdlet_name
try:
if idx is not None and str(idx).strip():
title_text = f"{idx} - {cmdlet_name}"
except Exception:
title_text = cmdlet_name
def _line(content: str) -> str:
return f"{_fit(content, box_inner):<{box_inner}}"
# Print table
debug("" + ("" * (box_inner + 1)) + "")
debug(_line("PipeObject Debug Info"))
debug("" + ("" * (box_inner + 1)) + "")
for key, val in rows:
content = f"{key:<{label_width}} : {val}"
debug(_line(content))
# Show extra keys as individual rows
if self.extra:
debug("" + ("" * (box_inner + 1)) + "")
debug(_line("Extra Fields:"))
for key, val in self.extra.items():
# Format value for display
if isinstance(val, (list, set)):
val_display = f"{type(val).__name__}({len(val)})"
elif isinstance(val, dict):
val_display = f"dict({len(val)})"
elif isinstance(val, (int, float)):
val_display = str(val)
else:
val_str = str(val)
val_display = val_str if len(val_str) <= 40 else val_str[:37] + "..."
# Truncate key if needed
key_display = str(key)
key_display = key_display if len(key_display) <= 15 else key_display[:12] + "..."
content = f" {key_display:<15}: {val_display}"
debug(_line(content))
# If we have structured provider metadata, expand it for debugging.
full_md = self.extra.get("full_metadata")
if isinstance(full_md, dict) and full_md:
debug("" + ("" * (box_inner + 1)) + "")
debug(_line("full_metadata:"))
for md_key in sorted(full_md.keys(), key=lambda x: str(x)):
md_val = full_md.get(md_key)
if isinstance(md_val, (str, int, float)) or md_val is None or isinstance(md_val, bool):
md_display = str(md_val)
elif isinstance(md_val, list):
if len(md_val) <= 6 and all(isinstance(x, (str, int, float, bool)) or x is None for x in md_val):
md_display = "[" + ", ".join(str(x) for x in md_val) + "]"
else:
md_display = f"list({len(md_val)})"
elif isinstance(md_val, dict):
# Avoid dumping huge nested dicts (like raw provider docs).
keys = list(md_val.keys())
preview = ",".join(str(k) for k in keys[:6])
md_display = f"dict({len(keys)})[{preview}{',...' if len(keys) > 6 else ''}]"
else:
md_str = str(md_val)
md_display = md_str if len(md_str) <= 40 else md_str[:37] + "..."
md_key_display = str(md_key)
md_key_display = md_key_display if len(md_key_display) <= 15 else md_key_display[:12] + "..."
content = f" {md_key_display:<15}: {md_display}"
debug(_line(content))
if self.action:
debug("├─────────────────────────────────────────────────────────────┤")
action_display = self.action[:48]
debug(f"│ Action : {action_display:<48}")
if self.parent_hash:
if not self.action:
debug("├─────────────────────────────────────────────────────────────┤")
parent_display = self.parent_hash[:12] + "..." if len(self.parent_hash) > 12 else self.parent_hash
debug(f"│ Parent Hash : {parent_display:<48}")
debug("└─────────────────────────────────────────────────────────────┘")
# Color the title (requested: yellow instead of Rich's default blue-ish title).
debug_inspect(self, title=f"[yellow]{title_text}[/yellow]")
def to_dict(self) -> Dict[str, Any]:
"""Serialize to dictionary, excluding None and empty values."""
@@ -482,18 +388,76 @@ def _sanitise_for_json(value: Any, *, max_depth: int = 8, _seen: Optional[set[in
return repr(value)
class ProgressBar:
"""Formats download progress with visual bar, speed, ETA, and file size."""
"""Rich progress helper for byte-based transfers.
Opinionated: requires `rich` and always renders via Rich.
"""
def __init__(self, width: Optional[int] = None):
"""Initialize progress bar with optional custom width.
Args:
width: Terminal width, defaults to auto-detect.
"""
"""Initialize progress bar with optional custom width."""
if width is None:
width = shutil.get_terminal_size((80, 20))[0]
self.width = max(40, width) # Minimum 40 chars for readability
self._console: Optional[Console] = None
self._progress: Optional[Progress] = None
self._task_id: Optional[TaskID] = None
def _ensure_started(self, *, label: str, total: Optional[int], file: Any = None) -> None:
if self._progress is not None and self._task_id is not None:
if total is not None and total > 0:
self._progress.update(self._task_id, total=int(total))
return
stream = file if file is not None else sys.stderr
console = Console(file=stream)
progress = Progress(
TextColumn("[progress.description]{task.description}"),
BarColumn(),
TaskProgressColumn(),
DownloadColumn(),
TransferSpeedColumn(),
TimeRemainingColumn(),
console=console,
transient=True,
)
progress.start()
task_total = int(total) if isinstance(total, int) and total > 0 else None
task_id: TaskID = progress.add_task(str(label or "download"), total=task_total)
self._console = console
self._progress = progress
self._task_id = task_id
def update(
self,
*,
downloaded: Optional[int],
total: Optional[int],
label: str = "download",
file: Any = None,
) -> None:
if downloaded is None and total is None:
return
self._ensure_started(label=label, total=total, file=file)
if self._progress is None or self._task_id is None:
return
if total is not None and total > 0:
self._progress.update(self._task_id, completed=int(downloaded or 0), total=int(total), refresh=True)
else:
self._progress.update(self._task_id, completed=int(downloaded or 0), refresh=True)
def finish(self) -> None:
if self._progress is None:
return
try:
self._progress.stop()
finally:
self._console = None
self._progress = None
self._task_id = None
def format_bytes(self, bytes_val: Optional[float]) -> str:
"""Format bytes to human-readable size.
@@ -513,152 +477,7 @@ class ProgressBar:
return f"{bytes_val:.1f} PB"
def format_speed(self, speed_str: Optional[str]) -> str:
"""Format download speed.
Args:
speed_str: Speed string from yt-dlp (e.g., "1.23MiB/s").
Returns:
Formatted speed string or "?.? KB/s".
"""
if not speed_str or speed_str.strip() == "":
return "?.? KB/s"
return speed_str.strip()
def format_eta(self, eta_str: Optional[str]) -> str:
"""Format estimated time remaining.
Args:
eta_str: ETA string from yt-dlp (e.g., "00:12:34").
Returns:
Formatted ETA string or "?:?:?".
"""
if not eta_str or eta_str.strip() == "":
return "?:?:?"
return eta_str.strip()
def format_percent(self, percent_str: Optional[str]) -> float:
"""Extract percent as float.
Args:
percent_str: Percent string from yt-dlp (e.g., "45.2%").
Returns:
Float 0-100 or 0 if invalid.
"""
if not percent_str:
return 0.0
try:
return float(percent_str.replace("%", "").strip())
except ValueError:
return 0.0
def build_bar(self, percent: float, width: int = 30) -> str:
"""Build ASCII progress bar.
Args:
percent: Completion percentage (0-100).
width: Bar width in characters.
Returns:
Progress bar string (e.g., "[████████░░░░░░░░░░░░░░░░░░]").
"""
percent = max(0, min(100, percent)) # Clamp to 0-100
filled = int(percent * width / 100)
empty = width - filled
# Use box-drawing characters for nice appearance
bar = "" * filled + "" * empty
return f"[{bar}]"
def format_progress(
self,
percent_str: Optional[str] = None,
downloaded: Optional[int] = None,
total: Optional[int] = None,
speed_str: Optional[str] = None,
eta_str: Optional[str] = None,
) -> str:
"""Format complete progress line.
Args:
percent_str: Percent string (e.g., "45.2%").
downloaded: Downloaded bytes.
total: Total bytes.
speed_str: Speed string (e.g., "1.23MiB/s").
eta_str: ETA string (e.g., "00:12:34").
Returns:
Formatted progress string.
"""
percent = self.format_percent(percent_str)
# Some callers (e.g. yt-dlp hooks) may not provide a stable percent string.
# When we have byte counts, derive percent from them so the bar advances.
if (not percent_str or percent == 0.0) and downloaded is not None and total is not None and total > 0:
try:
percent = (float(downloaded) / float(total)) * 100.0
except Exception:
percent = percent
bar = self.build_bar(percent)
# Format sizes
if downloaded is not None and total is not None and total > 0:
size_str = f"{self.format_bytes(downloaded)} / {self.format_bytes(total)}"
elif total is not None and total > 0:
size_str = f"/ {self.format_bytes(total)}"
elif downloaded is not None and downloaded > 0:
size_str = f"{self.format_bytes(downloaded)} downloaded"
else:
size_str = ""
speed = self.format_speed(speed_str)
eta = self.format_eta(eta_str)
# Build complete line
# Format: [████░░░░] 45.2% | 125.5 MB / 278.3 MB | 1.23 MB/s | ETA 00:12:34
parts = [
bar,
f"{percent:5.1f}%",
]
if size_str:
parts.append(f"| {size_str}")
parts.append(f"| {speed}")
parts.append(f"| ETA {eta}")
return " ".join(parts)
def format_summary(
self,
total: Optional[int] = None,
speed_str: Optional[str] = None,
elapsed_str: Optional[str] = None,
) -> str:
"""Format completion summary.
Args:
total: Total bytes downloaded.
speed_str: Average speed.
elapsed_str: Total time elapsed.
Returns:
Summary string.
"""
parts = ["✓ Download complete"]
if total is not None and total > 0:
parts.append(f"| {self.format_bytes(total)}")
if speed_str:
parts.append(f"| {speed_str.strip()}")
if elapsed_str:
parts.append(f"| {elapsed_str.strip()}")
return " ".join(parts)
# NOTE: rich.Progress handles the visual formatting; format_bytes remains as a general utility.
class ProgressFileReader:
@@ -687,34 +506,14 @@ class ProgressFileReader:
now = time.time()
if now - self._last < self._min_interval_s:
return
elapsed = max(0.001, now - self._start)
speed = float(self._read) / elapsed
eta_s = (float(self._total) - float(self._read)) / speed if speed > 0 else 0.0
minutes, seconds = divmod(int(max(0.0, eta_s)), 60)
hours, minutes = divmod(minutes, 60)
eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
speed_str = self._bar.format_bytes(speed) + "/s"
percent = (float(self._read) / float(self._total)) * 100.0 if self._total > 0 else 0.0
line = self._bar.format_progress(
percent_str=f"{percent:.1f}%",
downloaded=int(self._read),
total=int(self._total),
speed_str=speed_str,
eta_str=eta_str,
)
sys.stderr.write("\r" + f"[{self._label}] " + line + " ")
sys.stderr.flush()
self._bar.update(downloaded=int(self._read), total=int(self._total), label=str(self._label or "upload"), file=sys.stderr)
self._last = now
def _finish(self) -> None:
if self._done:
return
self._done = True
sys.stderr.write("\r" + (" " * 180) + "\r")
sys.stderr.write("\n")
sys.stderr.flush()
self._bar.finish()
def read(self, size: int = -1) -> Any:
chunk = self._f.read(size)

View File

@@ -19,6 +19,7 @@ PowerShell-like piping model:
from __future__ import annotations
import sys
import shlex
from typing import Any, Dict, List, Optional, Sequence
from models import PipelineStageContext
@@ -76,6 +77,13 @@ _PIPELINE_LAST_SELECTION: List[int] = []
# Track the currently executing command/pipeline string for worker attribution
_PIPELINE_COMMAND_TEXT: str = ""
# Track the currently executing cmdlet name so debug helpers can label objects
# with the active stage (e.g., "1 - add-file").
_CURRENT_CMDLET_NAME: str = ""
# Track the currently executing stage text (best-effort, quotes preserved).
_CURRENT_STAGE_TEXT: str = ""
# Shared scratchpad for cmdlet/funacts to stash structured data between stages
_PIPELINE_VALUES: Dict[str, Any] = {}
_PIPELINE_MISSING = object()
@@ -367,6 +375,93 @@ def clear_current_command_text() -> None:
_PIPELINE_COMMAND_TEXT = ""
def split_pipeline_text(pipeline_text: str) -> List[str]:
"""Split a pipeline string on unquoted '|' characters.
Preserves original quoting/spacing within each returned stage segment.
"""
text = str(pipeline_text or "")
if not text:
return []
stages: List[str] = []
buf: List[str] = []
quote: Optional[str] = None
escape = False
for ch in text:
if escape:
buf.append(ch)
escape = False
continue
if ch == "\\" and quote is not None:
buf.append(ch)
escape = True
continue
if ch in ("\"", "'"):
if quote is None:
quote = ch
elif quote == ch:
quote = None
buf.append(ch)
continue
if ch == "|" and quote is None:
stages.append("".join(buf).strip())
buf = []
continue
buf.append(ch)
tail = "".join(buf).strip()
if tail:
stages.append(tail)
return [s for s in stages if s]
def get_current_command_stages() -> List[str]:
"""Return the raw stage segments for the current command text."""
return split_pipeline_text(get_current_command_text(""))
def set_current_stage_text(stage_text: Optional[str]) -> None:
"""Record the raw stage text currently being executed."""
global _CURRENT_STAGE_TEXT
_CURRENT_STAGE_TEXT = str(stage_text or "").strip()
def get_current_stage_text(default: str = "") -> str:
"""Return the raw stage text currently being executed."""
text = _CURRENT_STAGE_TEXT.strip()
return text if text else default
def clear_current_stage_text() -> None:
"""Clear the cached stage text after a stage completes."""
global _CURRENT_STAGE_TEXT
_CURRENT_STAGE_TEXT = ""
def set_current_cmdlet_name(cmdlet_name: Optional[str]) -> None:
"""Record the currently executing cmdlet name (stage-local)."""
global _CURRENT_CMDLET_NAME
_CURRENT_CMDLET_NAME = str(cmdlet_name or "").strip()
def get_current_cmdlet_name(default: str = "") -> str:
"""Return the currently executing cmdlet name (stage-local)."""
text = _CURRENT_CMDLET_NAME.strip()
return text if text else default
def clear_current_cmdlet_name() -> None:
"""Clear the cached cmdlet name after a stage completes."""
global _CURRENT_CMDLET_NAME
_CURRENT_CMDLET_NAME = ""
def set_search_query(query: Optional[str]) -> None:
"""Set the last search query for refresh purposes."""
global _LAST_SEARCH_QUERY

View File

@@ -1,5 +1,6 @@
# Core CLI and TUI frameworks
typer>=0.9.0
rich>=13.7.0
prompt-toolkit>=3.0.0
textual>=0.30.0

View File

@@ -12,11 +12,18 @@ Features:
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Dict, List, Optional, Callable
from typing import Any, Dict, List, Optional, Callable, Set
from pathlib import Path
import json
import shutil
from rich.box import SIMPLE
from rich.console import Group
from rich.panel import Panel
from rich.prompt import Prompt
from rich.table import Table as RichTable
from rich.text import Text
# Optional Textual imports - graceful fallback if not available
try:
from textual.widgets import Tree
@@ -26,7 +33,7 @@ except ImportError:
def _sanitize_cell_text(value: Any) -> str:
"""Coerce to a single-line, tab-free string suitable for ASCII tables."""
"""Coerce to a single-line, tab-free string suitable for terminal display."""
if value is None:
return ""
text = str(value)
@@ -136,10 +143,15 @@ class ResultRow:
def add_column(self, name: str, value: Any) -> None:
"""Add a column to this row."""
# Normalize column header names.
normalized_name = str(name or "").strip()
if normalized_name.lower() == "name":
normalized_name = "Title"
str_value = _sanitize_cell_text(value)
# Normalize extension columns globally and cap to 5 characters
if str(name).strip().lower() == "ext":
if normalized_name.lower() == "ext":
str_value = str_value.strip().lstrip(".")
for idx, ch in enumerate(str_value):
if not ch.isalnum():
@@ -147,7 +159,7 @@ class ResultRow:
break
str_value = str_value[:5]
self.columns.append(ResultColumn(name, str_value))
self.columns.append(ResultColumn(normalized_name, str_value))
def get_column(self, name: str) -> Optional[str]:
"""Get column value by name."""
@@ -195,6 +207,30 @@ class ResultTable:
preserve_order: When True, skip automatic sorting so row order matches source
"""
self.title = title
try:
import pipeline as ctx
cmdlet_name = ""
try:
cmdlet_name = ctx.get_current_cmdlet_name("") if hasattr(ctx, "get_current_cmdlet_name") else ""
except Exception:
cmdlet_name = ""
stage_text = ""
try:
stage_text = ctx.get_current_stage_text("") if hasattr(ctx, "get_current_stage_text") else ""
except Exception:
stage_text = ""
if cmdlet_name and stage_text:
normalized_cmd = str(cmdlet_name).replace("_", "-").strip().lower()
normalized_title = str(self.title or "").strip().lower()
normalized_stage = str(stage_text).strip()
if normalized_stage and normalized_stage.lower().startswith(normalized_cmd):
if (not normalized_title) or normalized_title.replace("_", "-").startswith(normalized_cmd):
self.title = normalized_stage
except Exception:
pass
self.title_width = title_width
self.max_columns = max_columns if max_columns is not None else 5 # Default 5 for cleaner display
self.rows: List[ResultRow] = []
@@ -214,6 +250,26 @@ class ResultTable:
self.table: Optional[str] = None
"""Table type (e.g., 'youtube', 'soulseek') for context-aware selection logic."""
self.value_case: str = "lower"
"""Display-only value casing: 'lower' (default), 'upper', or 'preserve'."""
def set_value_case(self, value_case: str) -> "ResultTable":
"""Configure display-only casing for rendered cell values."""
case = str(value_case or "").strip().lower()
if case not in {"lower", "upper", "preserve"}:
case = "lower"
self.value_case = case
return self
def _apply_value_case(self, text: str) -> str:
if not text:
return ""
if self.value_case == "upper":
return text.upper()
if self.value_case == "preserve":
return text
return text.lower()
def set_table(self, table: str) -> "ResultTable":
"""Set the table type for context-aware selection logic."""
self.table = table
@@ -459,7 +515,7 @@ class ResultTable:
# Size (for files)
if hasattr(result, 'size_bytes') and result.size_bytes:
row.add_column("Size (Mb)", _format_size(result.size_bytes, integer_only=True))
row.add_column("Size", _format_size(result.size_bytes, integer_only=False))
# Annotations
if hasattr(result, 'annotations') and result.annotations:
@@ -505,9 +561,9 @@ class ResultTable:
elif getattr(item, 'store', None):
row.add_column("Storage", str(getattr(item, 'store')))
# Size (for files) - integer MB only
# Size (for files)
if hasattr(item, 'size_bytes') and item.size_bytes:
row.add_column("Size (Mb)", _format_size(item.size_bytes, integer_only=True))
row.add_column("Size", _format_size(item.size_bytes, integer_only=False))
def _add_tag_item(self, row: ResultRow, item: Any) -> None:
"""Extract and add TagItem fields to row (compact tag display).
@@ -575,9 +631,9 @@ class ResultTable:
Priority field groups (first match per group):
- title | name | filename
- ext
- size | size_bytes
- store | table | source
- size | size_bytes
- ext
"""
# Helper to determine if a field should be hidden from display
def is_hidden_field(field_name: Any) -> bool:
@@ -670,9 +726,9 @@ class ResultTable:
# Explicitly set which columns to display in order
priority_groups = [
('title', ['title', 'name', 'filename']),
('ext', ['ext']),
('size', ['size', 'size_bytes']),
('store', ['store', 'table', 'source']),
('size', ['size', 'size_bytes']),
('ext', ['ext']),
]
# Add priority field groups first - use first match in each group
@@ -681,9 +737,9 @@ class ResultTable:
break
for field in field_options:
if field in visible_data and field not in added_fields:
# Special handling for size fields - format as MB integer
# Special handling for size fields - format with unit and decimals
if field in ['size', 'size_bytes']:
value_str = _format_size(visible_data[field], integer_only=True)
value_str = _format_size(visible_data[field], integer_only=False)
else:
value_str = format_value(visible_data[field])
@@ -694,7 +750,7 @@ class ResultTable:
if field in ['store', 'table', 'source']:
col_name = "Store"
elif field in ['size', 'size_bytes']:
col_name = "Size (Mb)"
col_name = "Size"
elif field in ['title', 'name', 'filename']:
col_name = "Title"
else:
@@ -727,115 +783,56 @@ class ResultTable:
row.add_column(key.replace('_', ' ').title(), value_str)
def format_plain(self) -> str:
"""Format table as plain text with aligned columns and row numbers.
Returns:
Formatted table string
"""
def to_rich(self):
"""Return a Rich renderable representing this table."""
if not self.rows:
return "No results"
empty = Text("No results")
return Panel(empty, title=self.title) if self.title else empty
# Cap rendering to terminal width so long tables don't hard-wrap and
# visually break the border/shape.
term_width = shutil.get_terminal_size(fallback=(120, 24)).columns
if not term_width or term_width <= 0:
term_width = 120
# Calculate column widths
col_widths: Dict[str, int] = {}
col_names: List[str] = []
seen: Set[str] = set()
for row in self.rows:
for col in row.columns:
col_name = col.name
value_width = len(col.value)
if col_name.lower() == "ext":
value_width = min(value_width, 5)
col_widths[col_name] = max(
col_widths.get(col_name, 0),
len(col.name),
value_width
)
# Calculate row number column width (skip if no-choice)
num_width = 0 if self.no_choice else len(str(len(self.rows))) + 1
if col.name not in seen:
seen.add(col.name)
col_names.append(col.name)
# Preserve column order
column_names = list(col_widths.keys())
table = RichTable(
show_header=True,
header_style="bold",
box=SIMPLE,
expand=True,
show_lines=False,
)
def capped_width(name: str) -> int:
if not self.no_choice:
table.add_column("#", justify="right", no_wrap=True)
# Render headers in uppercase, but keep original column keys for lookup.
header_by_key: Dict[str, str] = {name: str(name).upper() for name in col_names}
for name in col_names:
header = header_by_key.get(name, str(name).upper())
if name.lower() == "ext":
cap = 5
table.add_column(header, no_wrap=True)
else:
# Single-column tables (e.g., get-tag) can use more horizontal space,
# but still must stay within the terminal to avoid hard wrapping.
if len(column_names) == 1:
# Keep room for side walls and optional row-number column.
cap = max(30, min(240, term_width - 6))
else:
cap = 90
return min(col_widths[name], cap)
table.add_column(header)
widths = ([] if self.no_choice else [num_width]) + [capped_width(name) for name in column_names]
base_inner_width = sum(widths) + (len(widths) - 1) * 3 # account for " | " separators
for row_idx, row in enumerate(self.rows, 1):
cells: List[str] = []
if not self.no_choice:
cells.append(str(row_idx))
for name in col_names:
val = row.get_column(name) or ""
cells.append(self._apply_value_case(_sanitize_cell_text(val)))
table.add_row(*cells)
# Compute final table width (with side walls) to accommodate headers/titles
table_width = base_inner_width + 2 # side walls
if self.title:
table_width = max(table_width, len(self.title) + 2)
if self.header_lines:
table_width = max(table_width, max(len(line) for line in self.header_lines) + 2)
if self.title or self.header_lines:
header_bits = [Text(line) for line in (self.header_lines or [])]
renderable = Group(*header_bits, table) if header_bits else table
return Panel(renderable, title=self.title) if self.title else renderable
# Ensure final render doesn't exceed terminal width (minus 1 safety column).
safe_term_width = max(20, term_width - 1)
table_width = min(table_width, safe_term_width)
def wrap(text: str) -> str:
"""Wrap content with side walls and pad to table width."""
if len(text) > table_width - 2:
text = text[: table_width - 5] + "..." # keep walls intact
return "|" + text.ljust(table_width - 2) + "|"
lines = []
# Title block
if self.title:
lines.append("|" + "=" * (table_width - 2) + "|")
safe_title = _sanitize_cell_text(self.title)
lines.append(wrap(safe_title.ljust(table_width - 2)))
lines.append("|" + "=" * (table_width - 2) + "|")
# Optional header metadata lines
for meta in self.header_lines:
safe_meta = _sanitize_cell_text(meta)
lines.append(wrap(safe_meta))
# Add header with # column
header_parts = [] if self.no_choice else ["#".ljust(num_width)]
separator_parts = [] if self.no_choice else ["-" * num_width]
for col_name in column_names:
width = capped_width(col_name)
header_parts.append(col_name.ljust(width))
separator_parts.append("-" * width)
lines.append(wrap(" | ".join(header_parts)))
lines.append(wrap("-+-".join(separator_parts)))
# Add rows with row numbers
for row_num, row in enumerate(self.rows, 1):
row_parts = [] if self.no_choice else [str(row_num).ljust(num_width)]
for col_name in column_names:
width = capped_width(col_name)
col_value = row.get_column(col_name) or ""
col_value = _sanitize_cell_text(col_value)
if len(col_value) > width:
col_value = col_value[: width - 3] + "..."
row_parts.append(col_value.ljust(width))
lines.append(wrap(" | ".join(row_parts)))
# Bottom border to close the rectangle
lines.append("|" + "=" * (table_width - 2) + "|")
return "\n".join(lines)
return table
def format_compact(self) -> str:
"""Format table in compact form (one line per row).
@@ -880,8 +877,16 @@ class ResultTable:
}
def __str__(self) -> str:
"""String representation (plain text format)."""
return self.format_plain()
"""String representation.
Rich is the primary rendering path. This keeps accidental `print(table)`
usage from emitting ASCII box-drawn tables.
"""
label = self.title or "ResultTable"
return f"{label} ({len(self.rows)} rows)"
def __rich__(self):
return self.to_rich()
def __repr__(self) -> str:
"""Developer representation."""
@@ -921,20 +926,24 @@ class ResultTable:
If accept_args=True: Dict with "indices" and "args" keys, or None if cancelled
"""
if self.no_choice:
print(f"\n{self}")
print("Selection is disabled for this table.")
from rich_display import stdout_console
stdout_console().print(self)
stdout_console().print(Panel(Text("Selection is disabled for this table.")))
return None
# Display the table
print(f"\n{self}")
from rich_display import stdout_console
stdout_console().print(self)
# Get user input
while True:
try:
if accept_args:
choice = input(f"\n{prompt} (e.g., '5' or '2 -storage hydrus' or 'q' to quit): ").strip()
choice = Prompt.ask(f"{prompt} (e.g., '5' or '2 -storage hydrus' or 'q' to quit)").strip()
else:
choice = input(f"\n{prompt} (e.g., '5' or '3-5' or '1,3,5' or 'q' to quit): ").strip()
choice = Prompt.ask(f"{prompt} (e.g., '5' or '3-5' or '1,3,5' or 'q' to quit)").strip()
if choice.lower() == 'q':
return None
@@ -944,18 +953,18 @@ class ResultTable:
result = self._parse_selection_with_args(choice)
if result is not None:
return result
print(f"Invalid format. Use: selection (5 or 3-5 or 1,3,5) optionally followed by flags (e.g., '5 -storage hydrus').")
stdout_console().print(Panel(Text("Invalid format. Use: selection (5 or 3-5 or 1,3,5) optionally followed by flags (e.g., '5 -storage hydrus').")))
else:
# Parse just the selection
selected_indices = self._parse_selection(choice)
if selected_indices is not None:
return selected_indices
print(f"Invalid format. Use: single (5), range (3-5), list (1,3,5), combined (1-3,7,9-11), or 'q' to quit.")
stdout_console().print(Panel(Text("Invalid format. Use: single (5), range (3-5), list (1,3,5), combined (1-3,7,9-11), or 'q' to quit.")))
except (ValueError, EOFError):
if accept_args:
print(f"Invalid format. Use: selection (5 or 3-5 or 1,3,5) optionally followed by flags (e.g., '5 -storage hydrus').")
stdout_console().print(Panel(Text("Invalid format. Use: selection (5 or 3-5 or 1,3,5) optionally followed by flags (e.g., '5 -storage hydrus').")))
else:
print(f"Invalid format. Use: single (5), range (3-5), list (1,3,5), combined (1-3,7,9-11), or 'q' to quit.")
stdout_console().print(Panel(Text("Invalid format. Use: single (5), range (3-5), list (1,3,5), combined (1-3,7,9-11), or 'q' to quit.")))
def _parse_selection(self, selection_str: str) -> Optional[List[int]]:
"""Parse user selection string into list of 0-based indices.
@@ -1317,10 +1326,10 @@ def _format_size(size: Any, integer_only: bool = False) -> str:
Args:
size: Size in bytes or already formatted string
integer_only: If True, show MB as integer only (e.g., "250 MB" not "250.5 MB")
integer_only: If True, show MB as an integer (e.g., "250 MB")
Returns:
Formatted size string (e.g., "250 MB", "1.5 MB" or "250 MB" if integer_only=True)
Formatted size string with units (e.g., "3.53 MB", "0.57 MB", "1.2 GB")
"""
if isinstance(size, str):
return size if size else ""
@@ -1329,23 +1338,22 @@ def _format_size(size: Any, integer_only: bool = False) -> str:
bytes_val = int(size)
if bytes_val < 0:
return ""
if integer_only:
# For table display: always show as integer MB if >= 1MB
mb_val = int(bytes_val / (1024 * 1024))
if mb_val > 0:
return str(mb_val)
kb_val = int(bytes_val / 1024)
if kb_val > 0:
return str(kb_val)
return str(bytes_val)
# Keep display consistent with the CLI expectation: show MB with unit
# (including values under 1 MB as fractional MB), and show GB for very
# large sizes.
if bytes_val >= 1024**3:
value = bytes_val / (1024**3)
unit = "GB"
else:
# For descriptions: show with one decimal place
for unit, divisor in [("GB", 1024**3), ("MB", 1024**2), ("KB", 1024)]:
if bytes_val >= divisor:
return f"{bytes_val / divisor:.1f} {unit}"
return f"{bytes_val} B"
value = bytes_val / (1024**2)
unit = "MB"
if integer_only:
return f"{int(round(value))} {unit}"
num = f"{value:.2f}".rstrip("0").rstrip(".")
return f"{num} {unit}"
except (ValueError, TypeError):
return ""

39
rich_display.py Normal file
View File

@@ -0,0 +1,39 @@
"""Central Rich output helpers.
Opinionated: `rich` is a required dependency.
This module centralizes Console instances so tables/panels render consistently and
so callers can choose stdout vs stderr explicitly (important for pipeline-safe
output).
"""
from __future__ import annotations
import sys
from typing import Any, TextIO
from rich.console import Console
_STDOUT_CONSOLE = Console(file=sys.stdout)
_STDERR_CONSOLE = Console(file=sys.stderr)
def stdout_console() -> Console:
return _STDOUT_CONSOLE
def stderr_console() -> Console:
return _STDERR_CONSOLE
def console_for(file: TextIO | None) -> Console:
if file is None or file is sys.stdout:
return _STDOUT_CONSOLE
if file is sys.stderr:
return _STDERR_CONSOLE
return Console(file=file)
def rprint(renderable: Any = "", *, file: TextIO | None = None) -> None:
console_for(file).print(renderable)

View File

@@ -1,10 +1,14 @@
from __future__ import annotations
import os
from dataclasses import dataclass
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence
from SYS.logger import debug
from SYS.utils import ensure_directory
from models import DownloadOptions
def _get_nested(config: Dict[str, Any], *path: str) -> Any:
@@ -140,6 +144,124 @@ class YtDlpTool:
return self.defaults.audio_format
return self.defaults.video_format
def build_ytdlp_options(self, opts: DownloadOptions) -> Dict[str, Any]:
"""Translate DownloadOptions into yt-dlp API options."""
ensure_directory(opts.output_dir)
outtmpl = str((opts.output_dir / "%(title)s.%(ext)s").resolve())
base_options: Dict[str, Any] = {
"outtmpl": outtmpl,
"quiet": True,
"no_warnings": True,
"noprogress": True,
"socket_timeout": 30,
"retries": 10,
"fragment_retries": 10,
"http_chunk_size": 10_485_760,
"restrictfilenames": True,
}
try:
repo_root = Path(__file__).resolve().parents[1]
bundled_ffmpeg_dir = repo_root / "MPV" / "ffmpeg" / "bin"
if bundled_ffmpeg_dir.exists():
base_options.setdefault("ffmpeg_location", str(bundled_ffmpeg_dir))
except Exception:
pass
try:
if os.name == "nt":
base_options.setdefault("file_access_retries", 40)
except Exception:
pass
if opts.cookies_path and opts.cookies_path.is_file():
base_options["cookiefile"] = str(opts.cookies_path)
else:
cookiefile = self.resolve_cookiefile()
if cookiefile is not None and cookiefile.is_file():
base_options["cookiefile"] = str(cookiefile)
if opts.no_playlist:
base_options["noplaylist"] = True
fmt = opts.ytdl_format or self.default_format(opts.mode)
base_options["format"] = fmt
if opts.mode == "audio":
base_options["postprocessors"] = [{"key": "FFmpegExtractAudio"}]
else:
format_sort = self.defaults.format_sort or [
"res:4320",
"res:2880",
"res:2160",
"res:1440",
"res:1080",
"res:720",
"res",
]
base_options["format_sort"] = format_sort
if getattr(opts, "embed_chapters", False):
pps = base_options.get("postprocessors")
if not isinstance(pps, list):
pps = []
already_has_metadata = any(
isinstance(pp, dict) and str(pp.get("key") or "") == "FFmpegMetadata" for pp in pps
)
if not already_has_metadata:
pps.append(
{
"key": "FFmpegMetadata",
"add_metadata": True,
"add_chapters": True,
"add_infojson": "if_exists",
}
)
base_options["postprocessors"] = pps
if opts.mode != "audio":
base_options.setdefault("merge_output_format", "mkv")
if getattr(opts, "write_sub", False):
base_options["writesubtitles"] = True
base_options["writeautomaticsub"] = True
base_options["subtitlesformat"] = "vtt"
if opts.clip_sections:
sections: List[str] = []
def _secs_to_hms(seconds: float) -> str:
total = max(0, int(seconds))
minutes, secs = divmod(total, 60)
hours, minutes = divmod(minutes, 60)
return f"{hours:02d}:{minutes:02d}:{secs:02d}"
for section_range in str(opts.clip_sections).split(","):
section_range = section_range.strip()
if not section_range:
continue
try:
start_s_raw, end_s_raw = section_range.split("-", 1)
start_s = float(start_s_raw.strip())
end_s = float(end_s_raw.strip())
if start_s >= end_s:
continue
sections.append(f"*{_secs_to_hms(start_s)}-{_secs_to_hms(end_s)}")
except (ValueError, AttributeError):
continue
if sections:
base_options["download_sections"] = sections
debug(f"Download sections configured: {', '.join(sections)}")
if opts.playlist_items:
base_options["playlist_items"] = opts.playlist_items
if not opts.quiet:
debug(f"yt-dlp: mode={opts.mode}, format={base_options.get('format')}")
return base_options
def build_yt_dlp_cli_args(
self,
*,