This commit is contained in:
nose
2025-12-23 16:36:39 -08:00
parent 16316bb3fd
commit 8bf04c6b71
25 changed files with 3165 additions and 234 deletions

View File

@@ -4,6 +4,7 @@
from __future__ import annotations
import json
import shutil
import sys
from collections.abc import Iterable as IterableABC
@@ -1275,6 +1276,233 @@ def get_pipe_object_path(pipe_object: Any) -> Optional[str]:
return None
def _extract_flag_value(args: Sequence[str], *flags: str) -> Optional[str]:
"""Return the value for the first matching flag in args.
This is intentionally lightweight (no cmdlet spec required) so callers in CLI/pipeline
can share the same behavior.
"""
if not args:
return None
want = {str(f).strip().lower() for f in flags if str(f).strip()}
if not want:
return None
try:
tokens = [str(a) for a in args]
except Exception:
tokens = list(args) # type: ignore[list-item]
for i, tok in enumerate(tokens):
low = str(tok).strip().lower()
if low in want:
if i + 1 >= len(tokens):
return None
nxt = str(tokens[i + 1])
# Allow paths like "-"? Treat missing value as None.
if not nxt.strip():
return None
# Don't consume another flag as value.
if nxt.startswith("-"):
return None
return nxt
return None
def _unique_destination_path(dest: Path) -> Path:
"""Generate a non-colliding destination path by appending " (N)"."""
try:
if not dest.exists():
return dest
except Exception:
return dest
parent = dest.parent
stem = dest.stem
suffix = dest.suffix
for i in range(1, 10_000):
candidate = parent / f"{stem} ({i}){suffix}"
try:
if not candidate.exists():
return candidate
except Exception:
return candidate
return dest
def apply_output_path_from_pipeobjects(
*,
cmd_name: str,
args: Sequence[str],
emits: Sequence[Any],
) -> List[Any]:
"""If the user supplied `-path`, move emitted temp/PATH files there.
This enables a dynamic pattern:
- Any cmdlet can include `SharedArgs.PATH`.
- If it emits a file-backed PipeObject (`path` exists on disk) and the item is
a temp/PATH artifact, then `-path <dest>` will save it to that location.
Rules:
- Only affects items whose `action` matches the current cmdlet.
- Only affects items that look like local artifacts (`is_temp` True or `store` == PATH).
- Updates the emitted object's `path` (and `target` when it points at the same file).
"""
dest_raw = _extract_flag_value(args, "-path", "--path")
if not dest_raw:
return list(emits or [])
cmd_norm = str(cmd_name or "").replace("_", "-").strip().lower()
if not cmd_norm:
return list(emits or [])
try:
dest_hint_dir = str(dest_raw).endswith(("/", "\\"))
except Exception:
dest_hint_dir = False
try:
dest_path = Path(str(dest_raw)).expanduser()
except Exception:
return list(emits or [])
items = list(emits or [])
# Identify which emitted items are actually file artifacts produced by this cmdlet.
artifact_indices: List[int] = []
artifact_paths: List[Path] = []
for idx, item in enumerate(items):
action = str(get_field(item, "action", "") or "").strip().lower()
if not action.startswith("cmdlet:"):
continue
action_name = action.split(":", 1)[-1].strip().lower()
if action_name != cmd_norm:
continue
store = str(get_field(item, "store", "") or "").strip().lower()
is_temp = bool(get_field(item, "is_temp", False))
if not (is_temp or store == "path"):
continue
src_str = get_pipe_object_path(item)
if not src_str:
continue
try:
src = Path(str(src_str)).expanduser()
except Exception:
continue
try:
if not src.exists() or not src.is_file():
continue
except Exception:
continue
artifact_indices.append(idx)
artifact_paths.append(src)
if not artifact_indices:
return items
# Decide whether the destination is a directory or a single file.
if len(artifact_indices) > 1:
# Multiple artifacts: always treat destination as a directory.
if dest_path.suffix:
dest_dir = dest_path.parent
else:
dest_dir = dest_path
try:
dest_dir.mkdir(parents=True, exist_ok=True)
except Exception as exc:
log(f"Failed to create destination directory: {dest_dir} ({exc})", file=sys.stderr)
return items
for idx, src in zip(artifact_indices, artifact_paths):
final = dest_dir / src.name
final = _unique_destination_path(final)
try:
if src.resolve() == final.resolve():
continue
except Exception:
pass
try:
shutil.move(str(src), str(final))
except Exception as exc:
log(f"Failed to save output to {final}: {exc}", file=sys.stderr)
continue
_apply_saved_path_update(items[idx], old_path=str(src), new_path=str(final))
return items
# Single artifact: destination can be a directory or a concrete file path.
src = artifact_paths[0]
idx = artifact_indices[0]
final: Path
try:
if dest_hint_dir or (dest_path.exists() and dest_path.is_dir()):
final = dest_path / src.name
else:
final = dest_path
except Exception:
final = dest_path
try:
final.parent.mkdir(parents=True, exist_ok=True)
except Exception as exc:
log(f"Failed to create destination directory: {final.parent} ({exc})", file=sys.stderr)
return items
final = _unique_destination_path(final)
try:
if src.resolve() != final.resolve():
shutil.move(str(src), str(final))
except Exception as exc:
log(f"Failed to save output to {final}: {exc}", file=sys.stderr)
return items
_apply_saved_path_update(items[idx], old_path=str(src), new_path=str(final))
return items
def _apply_saved_path_update(item: Any, *, old_path: str, new_path: str) -> None:
"""Update a PipeObject-like item after its backing file has moved."""
old_str = str(old_path)
new_str = str(new_path)
if isinstance(item, dict):
try:
if str(item.get("path") or "") == old_str:
item["path"] = new_str
except Exception:
pass
try:
if str(item.get("target") or "") == old_str:
item["target"] = new_str
except Exception:
pass
try:
extra = item.get("extra")
if isinstance(extra, dict):
if str(extra.get("target") or "") == old_str:
extra["target"] = new_str
if str(extra.get("path") or "") == old_str:
extra["path"] = new_str
except Exception:
pass
return
# models.PipeObject or PipeObject-ish
try:
if getattr(item, "path", None) == old_str:
setattr(item, "path", new_str)
except Exception:
pass
try:
extra = getattr(item, "extra", None)
if isinstance(extra, dict):
if str(extra.get("target") or "") == old_str:
extra["target"] = new_str
if str(extra.get("path") or "") == old_str:
extra["path"] = new_str
except Exception:
pass
def get_pipe_object_hash(pipe_object: Any) -> Optional[str]:
"""Extract file hash from PipeObject, dict, or pipeline-friendly object."""
if pipe_object is None:

View File

@@ -123,6 +123,70 @@ class Delete_File(sh.Cmdlet):
local_deleted = False
local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://"))
deleted_rows: List[Dict[str, Any]] = []
# If this item references a configured non-Hydrus store backend, prefer deleting
# via the backend API. This supports store items where `path`/`target` is the hash.
if conserve != "local" and store and (not is_hydrus_store):
try:
registry = Store(config)
if registry.is_available(str(store)):
backend = registry[str(store)]
# Prefer hash when available.
hash_candidate = sh.normalize_hash(hash_hex_raw) if hash_hex_raw else None
if not hash_candidate and isinstance(target, str):
hash_candidate = sh.normalize_hash(target)
resolved_path = None
try:
if hash_candidate and hasattr(backend, "get_file"):
resolved_path = backend.get_file(hash_candidate)
except Exception:
resolved_path = None
identifier = hash_candidate or (str(target).strip() if isinstance(target, str) else "")
if identifier:
deleter = getattr(backend, "delete_file", None)
if callable(deleter) and bool(deleter(identifier)):
local_deleted = True
size_bytes: int | None = None
try:
if resolved_path is not None and isinstance(resolved_path, Path) and resolved_path.exists():
size_bytes = int(resolved_path.stat().st_size)
except Exception:
size_bytes = None
deleted_rows.append(
{
"title": str(title_val).strip() if title_val else (resolved_path.name if resolved_path else identifier),
"store": store_label,
"hash": hash_candidate or (hash_hex or ""),
"size_bytes": size_bytes,
"ext": _get_ext_from_item() or (resolved_path.suffix.lstrip(".") if resolved_path else ""),
}
)
# Best-effort remove sidecars if we know the resolved path.
try:
if resolved_path is not None and isinstance(resolved_path, Path):
for sidecar in (
resolved_path.with_suffix(".tag"),
resolved_path.with_suffix(".metadata"),
resolved_path.with_suffix(".notes"),
):
try:
if sidecar.exists() and sidecar.is_file():
sidecar.unlink()
except Exception:
pass
except Exception:
pass
# Skip legacy local-path deletion below.
local_target = False
except Exception:
pass
if conserve != "local" and local_target:
path = Path(str(target))

View File

@@ -108,6 +108,21 @@ def _set_pipe_percent(percent: int) -> None:
return
def _print_table_suspended(table: Any) -> None:
"""Print a Rich table while pausing Live progress if active."""
suspend = getattr(pipeline_context, "suspend_live_progress", None)
cm: AbstractContextManager[Any] = nullcontext()
if callable(suspend):
try:
maybe_cm = suspend()
if maybe_cm is not None:
cm = maybe_cm # type: ignore[assignment]
except Exception:
cm = nullcontext()
with cm:
get_stderr_console().print(table)
# Minimal inlined helpers from helper/download.py (is_url_supported_by_ytdlp, list_formats)
try:
import yt_dlp # type: ignore
@@ -1231,6 +1246,18 @@ class Download_Media(Cmdlet):
return sh.parse_single_hash_query(f"hash:{hash_candidate}")
# Backwards-compatible: treat a non-keyed query as a hash query.
# If the query uses keyed specs (e.g. format:, item:, clip:), do NOT attempt
# to interpret the whole string as a hash.
try:
has_non_hash_keys = bool(
query_keyed
and isinstance(query_keyed, dict)
and any(k for k in query_keyed.keys() if str(k).strip().lower() != "hash")
)
except Exception:
has_non_hash_keys = False
if has_non_hash_keys:
return None
return sh.parse_single_hash_query(str(query_spec)) if query_spec else None
except Exception:
return None
@@ -1315,6 +1342,117 @@ class Download_Media(Cmdlet):
formats_cache[key] = fmts
return fmts
def _is_browseable_format(self, fmt: Any) -> bool:
"""Return True for formats that are sensible to show in the format table."""
if not isinstance(fmt, dict):
return False
format_id = str(fmt.get("format_id") or "").strip()
if not format_id:
return False
ext = str(fmt.get("ext") or "").strip().lower()
if ext in {"mhtml", "json"}:
return False
note = str(fmt.get("format_note") or "").lower()
if "storyboard" in note:
return False
if format_id.lower().startswith("sb"):
return False
vcodec = str(fmt.get("vcodec", "none"))
acodec = str(fmt.get("acodec", "none"))
# Keep anything with at least one stream.
return not (vcodec == "none" and acodec == "none")
def _format_id_for_query_index(
self,
query_format: str,
url: str,
formats_cache: Dict[str, Optional[List[Dict[str, Any]]]],
ytdlp_tool: YtDlpTool,
) -> Optional[str]:
"""Resolve a numeric 'format:N' query into an actual yt-dlp format selector.
Acceptable forms: '7', '#7', ' 7 ' (whitespace allowed). Uses the same
browseable filtering rules as the interactive table and selects the
1-based index. Returns a yt-dlp format string (possibly with +ba added
for video-only formats). Raises ValueError when the index is invalid or
formats cannot be listed.
"""
import re
if not query_format or not re.match(r"^\s*#?\d+\s*$", str(query_format)):
return None
try:
idx = int(str(query_format).lstrip("#").strip())
except Exception:
raise ValueError(f"Invalid format index: {query_format}")
fmts = self._list_formats_cached(
url,
playlist_items_value=None,
formats_cache=formats_cache,
ytdlp_tool=ytdlp_tool,
)
if not fmts:
raise ValueError("Unable to list formats for the URL; cannot resolve numeric format index")
candidate_formats = [f for f in fmts if self._is_browseable_format(f)]
filtered_formats = candidate_formats if candidate_formats else list(fmts)
if not filtered_formats:
raise ValueError("No formats available for selection")
if idx <= 0 or idx > len(filtered_formats):
raise ValueError(f"Format index {idx} out of range (1..{len(filtered_formats)})")
chosen = filtered_formats[idx - 1]
selection_format_id = str(chosen.get("format_id") or "").strip()
if not selection_format_id:
raise ValueError("Selected format has no format_id")
try:
vcodec = str(chosen.get("vcodec", "none"))
acodec = str(chosen.get("acodec", "none"))
if vcodec != "none" and acodec == "none":
selection_format_id = f"{selection_format_id}+ba"
except Exception:
pass
return selection_format_id
@staticmethod
def _format_selector_for_query_height(query_format: str) -> Optional[str]:
"""Translate a query value like '720p' into a yt-dlp -f selector.
Returns a selector that chooses the best video at or under the requested
height and always pairs it with audio.
Example: '640p' -> 'bv*[height<=640]+ba'
Notes:
- Only the '<digits>p' form is treated as a height cap to avoid
ambiguity with numeric format IDs and numeric index selection.
"""
import re
if query_format is None:
return None
s = str(query_format).strip().lower()
m = re.match(r"^(\d{2,5})p$", s)
if not m:
return None
try:
height = int(m.group(1))
except Exception:
return None
if height <= 0:
raise ValueError(f"Invalid height selection: {query_format}")
return f"bv*[height<={height}]+ba"
@staticmethod
def _canonicalize_url_for_storage(*, requested_url: str, ytdlp_tool: YtDlpTool, playlist_items: Optional[str]) -> str:
# Prefer yt-dlp's canonical webpage URL (e.g. strips timestamps/redirects).
@@ -1778,11 +1916,22 @@ class Download_Media(Cmdlet):
except Exception:
pass
get_stderr_console().print(table)
_print_table_suspended(table)
setattr(table, "_rendered_by_cmdlet", True)
if not Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()):
return False
suspend = getattr(pipeline_context, "suspend_live_progress", None)
cm: AbstractContextManager[Any] = nullcontext()
if callable(suspend):
try:
maybe_cm = suspend()
if maybe_cm is not None:
cm = maybe_cm # type: ignore[assignment]
except Exception:
cm = nullcontext()
with cm:
if not Confirm.ask("Continue anyway?", default=False, console=get_stderr_console()):
return False
return True
def _maybe_show_playlist_table(self, *, url: str, ytdlp_tool: YtDlpTool) -> bool:
@@ -1869,7 +2018,7 @@ class Download_Media(Cmdlet):
pipeline_context.set_current_stage_table(table)
pipeline_context.set_last_result_table(table, results_list)
get_stderr_console().print(table)
_print_table_suspended(table)
setattr(table, "_rendered_by_cmdlet", True)
return True
@@ -2048,7 +2197,7 @@ class Download_Media(Cmdlet):
table.add_result(format_dict)
try:
get_stderr_console().print(table)
_print_table_suspended(table)
setattr(table, "_rendered_by_cmdlet", True)
except Exception:
pass
@@ -2326,7 +2475,7 @@ class Download_Media(Cmdlet):
pipeline_context.set_last_result_table(table, results_list)
try:
get_stderr_console().print(table)
_print_table_suspended(table)
setattr(table, "_rendered_by_cmdlet", True)
except Exception:
pass
@@ -2499,6 +2648,7 @@ class Download_Media(Cmdlet):
# -query "hash:<sha256>"
# -query "clip:1m-1m15s,2m1s-2m11s"
# -query "hash:<sha256>,clip:1m-1m15s,item:2-3"
# -query "format:audio,item:1-3" (audio-only + playlist selection)
query_keyed = self._parse_query_keyed_spec(str(query_spec) if query_spec is not None else None)
# Optional: allow an explicit hash via -query "hash:<sha256>".
@@ -2512,7 +2662,27 @@ class Download_Media(Cmdlet):
embed_chapters = True
write_sub = True
mode = "audio" if parsed.get("audio") else "video"
# QueryArgs:
# - format:audio => audio-only (highest quality audio)
# - format:<ytdlp-format> => equivalent to -format <ytdlp-format>
query_format: Optional[str] = None
try:
fmt_values = query_keyed.get("format", []) if isinstance(query_keyed, dict) else []
fmt_candidate = fmt_values[-1] if fmt_values else None
if fmt_candidate is not None:
query_format = str(fmt_candidate).strip()
except Exception:
query_format = None
query_wants_audio = False
if query_format:
try:
query_wants_audio = str(query_format).strip().lower() == "audio"
except Exception:
query_wants_audio = False
# Explicit CLI flag wins; else query format:audio can select audio mode.
mode = "audio" if (parsed.get("audio") or query_wants_audio) else "video"
clip_ranges, clip_invalid, clip_values = self._parse_clip_ranges_and_apply_items(
clip_spec=str(clip_spec) if clip_spec is not None else None,
@@ -2534,19 +2704,84 @@ class Download_Media(Cmdlet):
storage, hydrus_available = self._init_storage(config if isinstance(config, dict) else {})
# Check if we need to show format selection
formats_cache: Dict[str, Optional[List[Dict[str, Any]]]] = {}
playlist_items = str(parsed.get("item")) if parsed.get("item") else None
ytdl_format = parsed.get("format")
# If user didn't pass -format, allow -query "format:<...>" to provide it.
# Supported query forms:
# - format:audio => audio-only mode (handled above)
# - format:720p => pick best video <= 720p and always include audio
# - format:<ytdlp -f> => treated as a raw yt-dlp selector (non-numeric)
# - format:<N> => treated as a 1-based index into the shown format list (resolved below)
if not ytdl_format and query_format and not query_wants_audio:
try:
height_selector = self._format_selector_for_query_height(query_format)
except ValueError as e:
log(f"Error parsing format selection: {e}", file=sys.stderr)
return 1
if height_selector:
ytdl_format = height_selector
else:
import re
# Preserve numeric index selection and avoid ambiguity with numeric format IDs.
if not re.match(r"^\s*#?\d+\s*$", str(query_format)):
ytdl_format = query_format
playlist_selection_handled = False
# Playlist/multi-entry detection: if the URL has multiple items and the user didn't
# specify -item or -format, show a normal selectable table and return.
if len(supported_url) == 1 and not playlist_items and not ytdl_format:
candidate_url = supported_url[0]
if self._maybe_show_playlist_table(url=candidate_url, ytdlp_tool=ytdlp_tool):
playlist_selection_handled = True
# Let the user pick items using the normal REPL prompt:
# @* | download-media ...
return 0
# Support numeric index selection via -query "format:<N>" where N is 1-based index
# into the filtered format list (e.g., -query "format:7" selects the 7th listed format).
# This allows non-interactive invocation from shells (PowerShell treats '@' specially).
if query_format and not query_wants_audio:
try:
idx_fmt = self._format_id_for_query_index(query_format, candidate_url, formats_cache, ytdlp_tool)
except ValueError as e:
log(f"Error parsing format selection: {e}", file=sys.stderr)
return 1
if idx_fmt:
debug(f"Resolved numeric format selection '{query_format}' -> {idx_fmt}")
ytdl_format = idx_fmt
if not ytdl_format:
if self._maybe_show_playlist_table(url=candidate_url, ytdlp_tool=ytdlp_tool):
playlist_selection_handled = True
# Let the user pick items using the normal REPL prompt:
# @* | download-media ...
# If we printed a format table, give a quick hint for non-interactive selection.
try:
last_table = pipeline_context.get_last_result_table() if hasattr(pipeline_context, "get_last_result_table") else None
if hasattr(last_table, "rows") and getattr(last_table, "rows", None):
# Build user-friendly examples using the base command we already constructed
sample_index = 1
sample_fmt_id = None
try:
sample_row = last_table.rows[0]
sample_fmt_id = sample_row._full_metadata.get("item_selector") if getattr(sample_row, "_full_metadata", None) else None
except Exception:
sample_fmt_id = None
try:
# Use single quotes inside the outer quotes so PowerShell doesn't interpret the pipe character
sample_pipeline = base_cmd.replace(f'"{candidate_url}"', f"'{candidate_url}'")
hint = (
"To select non-interactively, re-run with an explicit format: "
"e.g. mm \"{pipeline} -format {fmt} | add-file -store <store>\" or "
"mm \"{pipeline} -query 'format:{index}' | add-file -store <store>\""
).format(pipeline=sample_pipeline, fmt=sample_fmt_id or "<format_id>", index=sample_index)
log(hint, file=sys.stderr)
except Exception:
pass
except Exception:
pass
return 0
# Bulk preflight for playlist selections (per-entry URLs): check all URLs once before downloading.
skip_per_url_preflight = False
@@ -2563,7 +2798,7 @@ class Download_Media(Cmdlet):
# Playlist-level format preflight: if the batch has only one available format,
# discover it once and force it for every item. This avoids per-item failures
# and per-item --list-formats calls (e.g. Bandcamp albums).
formats_cache: Dict[str, Optional[List[Dict[str, Any]]]] = {}
forced_single_format_id: Optional[str] = None
forced_single_format_for_batch = False

View File

@@ -122,6 +122,8 @@ class Get_Note(Cmdlet):
for k in sorted(notes.keys(), key=lambda x: str(x).lower()):
v = notes.get(k)
raw_text = str(v or "")
# Keep payload small for IPC/pipes.
raw_text = raw_text[:999]
preview = " ".join(raw_text.replace("\r", "").split("\n"))
ctx.emit(
{

View File

@@ -401,7 +401,13 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"Hydrus relationships fetch failed: {exc}", file=sys.stderr)
if not found_relationships:
log("No relationships found.")
try:
from rich.panel import Panel
from rich_display import stdout_console
title = source_title or (hash_hex[:16] + "..." if hash_hex else "Item")
stdout_console().print(Panel(f"{title} has no relationships", title="Relationships"))
except Exception:
log("No relationships found.")
return 0
# Display results

View File

@@ -1060,6 +1060,7 @@ CMDLET = Cmdlet(
SharedArgs.URL,
CmdletArg(name="format", type="string", description="Output format: webp, png, jpeg, or pdf"),
CmdletArg(name="selector", type="string", description="CSS selector for element capture"),
SharedArgs.PATH
],
detail=[

View File

@@ -387,16 +387,10 @@ class Search_Store(Cmdlet):
results = target_backend.search(query, limit=limit)
debug(f"[search-store] '{backend_to_search}' -> {len(results or [])} result(s)")
else:
from API.HydrusNetwork import is_hydrus_available
hydrus_available = is_hydrus_available(config or {})
from Store.HydrusNetwork import HydrusNetwork
all_results = []
for backend_name in storage.list_searchable_backends():
try:
backend = storage[backend_name]
if isinstance(backend, HydrusNetwork) and not hydrus_available:
continue
searched_backends.append(backend_name)
debug(f"[search-store] Searching '{backend_name}'")

View File

@@ -1,102 +1,202 @@
"""Trim a media file using ffmpeg."""
from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional
from typing import Any, Dict, Sequence, Optional
from pathlib import Path
import sys
import json
import subprocess
import shutil
import re
import time
from urllib.parse import urlparse
from SYS.logger import log, debug
from SYS.utils import sha256_file
from . import _shared as sh
from Store import Store
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
parse_cmdlet_args = sh.parse_cmdlet_args
normalize_result_input = sh.normalize_result_input
extract_tag_from_result = sh.extract_tag_from_result
extract_title_from_result = sh.extract_title_from_result
extract_url_from_result = sh.extract_url_from_result
get_field = sh.get_field
import pipeline as ctx
CMDLET = Cmdlet(
name="trim-file",
summary="Trim a media file using ffmpeg.",
usage="trim-file [-path <path>] -range <start-end> [-delete]",
usage="trim-file [-path <path>] [-input <path-or-url>] -range <start-end> [-outdir <dir>] [-delete]",
arg=[
CmdletArg("-path", description="Path to the file (optional if piped)."),
CmdletArg("-range", required=True, description="Time range to trim (e.g. '3:45-3:55' or '00:03:45-00:03:55')."),
CmdletArg("-input", description="Override input media source (path or URL). Useful when piping store metadata but trimming from an mpv stream URL."),
CmdletArg("-range", required=True, description="Time range to trim (e.g. '3:45-3:55', '00:03:45-00:03:55', or '1h3m-1h10m30s')."),
CmdletArg("-outdir", description="Output directory for the clip (defaults to source folder for local files; otherwise uses config temp/videos)."),
CmdletArg("-delete", type="flag", description="Delete the original file after trimming."),
],
detail=[
"Creates a new file with 'clip_' prefix in the filename/title.",
"Creates a new file with 'clip_' prefix in the filename.",
"Adds the trim range to the title as: [1h3m-1h3m10s] - <title>.",
"Inherits tag values from the source file.",
"Adds a relationship to the source file (if hash is available).",
"Output can be piped to add-file.",
]
)
def _format_hms(total_seconds: float) -> str:
"""Format seconds as compact h/m/s (no colons), e.g. 1h3m10s, 3m5s, 2s."""
try:
total = int(round(float(total_seconds)))
except Exception:
total = 0
if total < 0:
total = 0
hours = total // 3600
minutes = (total % 3600) // 60
seconds = total % 60
parts: list[str] = []
if hours > 0:
parts.append(f"{hours}h")
if minutes > 0:
parts.append(f"{minutes}m")
if seconds > 0:
parts.append(f"{seconds}s")
# Ensure we always output something.
if not parts:
return "0s"
return "".join(parts)
def _is_url(value: str) -> bool:
try:
p = urlparse(str(value))
return bool(p.scheme and p.netloc)
except Exception:
return False
def _parse_time(time_str: str) -> float:
"""Convert time string (HH:MM:SS or MM:SS or SS) to seconds."""
parts = time_str.strip().split(':')
"""Convert time string into seconds.
Supports:
- HH:MM:SS(.sss)
- MM:SS(.sss)
- SS(.sss)
- 1h3m53s (also 1h3m, 3m53s, 53s)
"""
raw = str(time_str or '').strip()
if not raw:
raise ValueError("Empty time")
# h/m/s format (case-insensitive)
hms = re.fullmatch(
r"(?i)\s*(?:(?P<h>\d+(?:\.\d+)?)h)?(?:(?P<m>\d+(?:\.\d+)?)m)?(?:(?P<s>\d+(?:\.\d+)?)s)?\s*",
raw,
)
if hms and (hms.group('h') or hms.group('m') or hms.group('s')):
hours = float(hms.group('h') or 0)
minutes = float(hms.group('m') or 0)
seconds = float(hms.group('s') or 0)
total = hours * 3600 + minutes * 60 + seconds
return float(total)
# Colon-separated
parts = [p.strip() for p in raw.split(':')]
if len(parts) == 3:
return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2])
elif len(parts) == 2:
if len(parts) == 2:
return float(parts[0]) * 60 + float(parts[1])
elif len(parts) == 1:
if len(parts) == 1:
return float(parts[0])
else:
raise ValueError(f"Invalid time format: {time_str}")
def _trim_media(input_path: Path, output_path: Path, start_time: str, end_time: str) -> bool:
"""Trim media file using ffmpeg."""
raise ValueError(f"Invalid time format: {time_str}")
def _sanitize_filename(name: str, *, max_len: int = 140) -> str:
name = str(name or '').strip()
if not name:
return 'clip'
# Windows-forbidden characters: <>:"/\\|?* plus control chars
name = re.sub('[<>:"/\\\\|?*\\x00-\\x1F]', '_', name)
name = re.sub(r"\s+", " ", name).strip()
name = name.rstrip('.')
if not name:
return 'clip'
if len(name) > max_len:
name = name[:max_len].rstrip()
return name
def _extract_store_name(item: Any) -> Optional[str]:
try:
store_val = get_field(item, "store")
s = str(store_val or "").strip()
return s if s else None
except Exception:
return None
def _persist_alt_relationship(*, config: Dict[str, Any], store_name: str, alt_hash: str, king_hash: str) -> None:
"""Persist directional alt -> king relationship in the given backend."""
try:
store = Store(config)
backend: Any = store[str(store_name)]
except Exception:
return
alt_norm = str(alt_hash or "").strip().lower()
king_norm = str(king_hash or "").strip().lower()
if len(alt_norm) != 64 or len(king_norm) != 64 or alt_norm == king_norm:
return
# Folder-backed local DB
try:
if type(backend).__name__ == "Folder" and hasattr(backend, "location") and callable(getattr(backend, "location")):
from API.folder import API_folder_store
from pathlib import Path
root = Path(str(backend.location())).expanduser()
with API_folder_store(root) as db:
db.set_relationship_by_hash(alt_norm, king_norm, "alt", bidirectional=False)
return
except Exception:
pass
# Hydrus-like backend
try:
client = getattr(backend, "_client", None)
if client is not None and hasattr(client, "set_relationship"):
client.set_relationship(alt_norm, king_norm, "alt")
except Exception:
return
def _trim_media(input_source: str, output_path: Path, start_seconds: float, duration_seconds: float) -> bool:
"""Trim media using ffmpeg.
input_source may be a local path or a URL.
"""
ffmpeg_path = shutil.which('ffmpeg')
if not ffmpeg_path:
log("ffmpeg not found in PATH", file=sys.stderr)
return False
# Calculate duration to avoid seeking issues if possible, or just use -to
# Using -ss before -i is faster (input seeking) but might be less accurate.
# Using -ss after -i is slower (output seeking) but accurate.
# For trimming, accuracy is usually preferred, but for long files input seeking is better.
# We'll use input seeking (-ss before -i) and -to.
cmd = [
ffmpeg_path, '-y',
'-ss', start_time,
'-i', str(input_path),
'-to', end_time,
'-c', 'copy', # Stream copy for speed and quality preservation
'-map_metadata', '0', # Copy metadata
str(output_path)
]
# If stream copy fails (e.g. cutting not on keyframe), we might need re-encoding.
# But let's try copy first as it's standard for "trimming" without quality loss.
# Note: -to with input seeking (-ss before -i) resets timestamp, so -to refers to duration?
# No, -to refers to position in output if used after -ss?
# Actually, if -ss is before -i, the timestamps are reset to 0.
# So -to should be (end - start).
# Alternatively, use -t (duration).
try:
s = _parse_time(start_time)
e = _parse_time(end_time)
duration = e - s
if duration <= 0:
log(f"Invalid range: start {start_time} >= end {end_time}", file=sys.stderr)
if duration_seconds <= 0:
log(f"Invalid range: duration <= 0 ({duration_seconds})", file=sys.stderr)
return False
cmd = [
ffmpeg_path, '-y',
'-ss', start_time,
'-i', str(input_path),
'-t', str(duration),
'-ss', str(float(start_seconds)),
'-i', str(input_source),
'-t', str(float(duration_seconds)),
'-c', 'copy',
'-map_metadata', '0',
str(output_path)
str(output_path),
]
debug(f"Running ffmpeg: {' '.join(cmd)}")
@@ -121,10 +221,27 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log("Error: -range argument required (format: start-end)", file=sys.stderr)
return 1
start_str, end_str = range_arg.split('-', 1)
start_str, end_str = [s.strip() for s in range_arg.split('-', 1)]
if not start_str or not end_str:
log("Error: -range must be start-end", file=sys.stderr)
return 1
try:
start_seconds = _parse_time(start_str)
end_seconds = _parse_time(end_str)
except Exception as exc:
log(f"Error parsing -range: {exc}", file=sys.stderr)
return 1
duration_seconds = end_seconds - start_seconds
if duration_seconds <= 0:
log(f"Invalid range: start {start_str} >= end {end_str}", file=sys.stderr)
return 1
delete_original = parsed.get("delete", False)
path_arg = parsed.get("path")
input_override = parsed.get("input")
outdir_arg = parsed.get("outdir")
# Collect inputs
inputs = normalize_result_input(result)
@@ -140,8 +257,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
success_count = 0
for item in inputs:
store_name = _extract_store_name(item)
# Resolve file path
file_path = None
file_path: Optional[str] = None
if isinstance(item, dict):
file_path = item.get("path") or item.get("target")
elif hasattr(item, "path"):
@@ -149,22 +268,84 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
elif isinstance(item, str):
file_path = item
if not file_path:
if not file_path and not input_override:
continue
media_source = str(input_override or file_path)
is_url = _is_url(media_source)
path_obj: Optional[Path] = None
if not is_url:
try:
path_obj = Path(str(media_source))
except Exception:
path_obj = None
if not path_obj or not path_obj.exists():
log(f"File not found: {media_source}", file=sys.stderr)
continue
path_obj = Path(file_path)
if not path_obj.exists():
log(f"File not found: {file_path}", file=sys.stderr)
continue
# Determine output path
# Prepend clip_ to filename
new_filename = f"clip_{path_obj.name}"
output_path = path_obj.parent / new_filename
# Determine output directory
output_dir: Path
if outdir_arg:
output_dir = Path(str(outdir_arg)).expanduser()
elif store_name:
from config import resolve_output_dir
output_dir = resolve_output_dir(config or {})
elif path_obj is not None:
output_dir = path_obj.parent
else:
from config import resolve_output_dir
output_dir = resolve_output_dir(config or {})
try:
output_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
# Determine output filename
output_ext = ''
if path_obj is not None:
output_ext = path_obj.suffix
base_name = path_obj.stem
else:
# Prefer title from metadata if present
title = extract_title_from_result(item)
if title:
base_name = _sanitize_filename(str(title))
else:
base_name = time.strftime('%Y%m%d-%H%M%S')
if base_name.lower().startswith('clip_'):
base_name = base_name[5:] or base_name
try:
p = urlparse(str(media_source))
last = (p.path or '').split('/')[-1]
if last and '.' in last:
output_ext = '.' + last.split('.')[-1]
except Exception:
pass
if not output_ext or len(output_ext) > 8:
output_ext = '.mkv'
new_filename = f"clip_{base_name}{output_ext}"
output_path = output_dir / new_filename
# Avoid clobbering existing files
if output_path.exists():
stem = output_path.stem
suffix = output_path.suffix
for i in range(1, 1000):
candidate = output_dir / f"{stem}_{i}{suffix}"
if not candidate.exists():
output_path = candidate
break
# Trim
log(f"Trimming {path_obj.name} ({start_str} to {end_str})...", file=sys.stderr)
if _trim_media(path_obj, output_path, start_str, end_str):
source_label = (path_obj.name if path_obj is not None else str(media_source))
log(f"Trimming {source_label} ({start_str} to {end_str})...", file=sys.stderr)
if _trim_media(str(media_source), output_path, start_seconds, duration_seconds):
log(f"Created clip: {output_path}", file=sys.stderr)
success_count += 1
@@ -178,98 +359,104 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
source_hash = item.hash
if not source_hash:
try:
source_hash = sha256_file(path_obj)
except Exception:
pass
if path_obj is not None:
try:
source_hash = sha256_file(path_obj)
except Exception:
pass
# 2. Get tag values
tags = extract_tag_from_result(item)
# Do not inherit tags from the source (per UX request).
new_tags: list[str] = []
# Copy URL(s) when present.
urls: list[str] = []
try:
urls = extract_url_from_result(item) or []
except Exception:
urls = []
try:
src_u = get_field(item, "source_url")
if isinstance(src_u, str) and src_u.strip():
if src_u.strip() not in urls:
urls.append(src_u.strip())
except Exception:
pass
# 3. Get title and modify it
title = extract_title_from_result(item)
if not title:
title = path_obj.stem
title = path_obj.stem if path_obj is not None else base_name
range_hms = f"{_format_hms(start_seconds)}-{_format_hms(end_seconds)}"
new_title = f"[{range_hms}] - {title}"
new_title = f"clip_{title}"
# Update title tag if present
new_tags = []
has_title_tag = False
for t in tags:
if t.lower().startswith("title:"):
new_tags.append(f"title:{new_title}")
has_title_tag = True
else:
new_tags.append(t)
if not has_title_tag:
new_tags.append(f"title:{new_title}")
# 4. Calculate clip hash and update original file's relationships
# 4. Calculate clip hash
clip_hash = None
try:
clip_hash = sha256_file(output_path)
except Exception:
pass
if source_hash and clip_hash:
# Update original file in local DB if possible
# If this was a store item, ingest the clip into the same store.
stored_store: Optional[str] = None
stored_hash: Optional[str] = None
stored_path: Optional[str] = None
if store_name:
try:
from config import get_local_storage_path
from API.folder import API_folder_store
storage_path = get_local_storage_path(config)
if storage_path:
with API_folder_store(storage_path) as db:
# Get original file metadata
# We need to find the original file by hash or path
# Try path first
orig_meta = db.get_metadata(path_obj)
if not orig_meta and source_hash:
# Try by hash
orig_path_resolved = db.search_hash(source_hash)
if orig_path_resolved:
orig_meta = db.get_metadata(orig_path_resolved)
if orig_meta:
# Update relationships
rels = orig_meta.get("relationships", {})
if not isinstance(rels, dict):
rels = {}
# Add clip as "derivative" (since original is the source)
if "derivative" not in rels:
rels["derivative"] = []
if clip_hash not in rels["derivative"]:
rels["derivative"].append(clip_hash)
# Save back to DB
# We need to preserve other metadata
orig_meta["relationships"] = rels
# Ensure hash is set in metadata if we have it
if source_hash and not orig_meta.get("hash"):
orig_meta["hash"] = source_hash
# We need the path to save
save_path = Path(orig_meta.get("path") or path_obj)
db.save_metadata(save_path, orig_meta)
log(f"Updated relationship for original file: {save_path.name}", file=sys.stderr)
except Exception as e:
log(f"Failed to update original file relationships: {e}", file=sys.stderr)
store = Store(config)
if store.is_available(store_name):
backend = store[str(store_name)]
move_flag = type(backend).__name__ == "Folder"
stored_hash = backend.add_file(
Path(str(output_path)),
title=new_title,
tag=new_tags,
url=urls,
move=move_flag,
)
stored_store = store_name
# Best-effort resolve stored path for folder backends.
try:
if type(backend).__name__ == "Folder" and hasattr(backend, "get_file"):
p = backend.get_file(str(stored_hash))
if isinstance(p, Path):
stored_path = str(p)
elif isinstance(p, str) and p:
stored_path = p
except Exception:
stored_path = None
except Exception as exc:
log(f"Failed to add clip to store '{store_name}': {exc}", file=sys.stderr)
# If we stored it, persist relationship alt -> king in that store.
if stored_store and stored_hash and source_hash:
_persist_alt_relationship(
config=config,
store_name=stored_store,
alt_hash=stored_hash,
king_hash=str(source_hash),
)
if stored_hash:
clip_hash = stored_hash
# 5. Construct result
result_dict = {
"path": str(output_path),
"path": stored_path or str(output_path),
"title": new_title,
"tag": new_tags,
"url": urls,
"media_kind": "video", # Assumption, or derive
"hash": clip_hash, # Pass calculated hash
"store": stored_store,
"relationships": {
# The source is the KING of this clip
"king": [source_hash] if source_hash else []
# Clip is an ALT of the source; store semantics are directional alt -> king.
# Provide both keys so downstream (e.g. add-file) can persist relationships.
"king": [source_hash] if source_hash else [],
"alt": [clip_hash] if (source_hash and clip_hash) else [],
}
}
@@ -279,15 +466,17 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Delete original if requested
if delete_original:
try:
path_obj.unlink()
log(f"Deleted original file: {path_obj}", file=sys.stderr)
if path_obj is not None:
path_obj.unlink()
log(f"Deleted original file: {path_obj}", file=sys.stderr)
# Also try to delete sidecars?
# Maybe leave that to user or cleanup cmdlet
except Exception as e:
log(f"Failed to delete original: {e}", file=sys.stderr)
else:
log(f"Failed to trim {path_obj.name}", file=sys.stderr)
failed_label = (path_obj.name if path_obj is not None else str(media_source))
log(f"Failed to trim {failed_label}", file=sys.stderr)
return 0 if success_count > 0 else 1