Files
Medios-Macina/cmdlet/trim_file.py

545 lines
17 KiB
Python
Raw Normal View History

2025-12-01 01:10:16 -08:00
"""Trim a media file using ffmpeg."""
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
from __future__ import annotations
2025-12-23 16:36:39 -08:00
from typing import Any, Dict, Sequence, Optional
2025-12-01 01:10:16 -08:00
from pathlib import Path
import sys
import subprocess
import shutil
import re
2025-12-23 16:36:39 -08:00
import time
from urllib.parse import urlparse
2025-12-01 01:10:16 -08:00
2025-12-11 19:04:02 -08:00
from SYS.logger import log, debug
from SYS.utils import sha256_file
2025-12-16 23:23:43 -08:00
from . import _shared as sh
2025-12-23 16:36:39 -08:00
from Store import Store
2025-12-16 23:23:43 -08:00
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
parse_cmdlet_args = sh.parse_cmdlet_args
normalize_result_input = sh.normalize_result_input
extract_title_from_result = sh.extract_title_from_result
2025-12-23 16:36:39 -08:00
extract_url_from_result = sh.extract_url_from_result
get_field = sh.get_field
2025-12-01 01:10:16 -08:00
import pipeline as ctx
CMDLET = Cmdlet(
name="trim-file",
summary="Trim a media file using ffmpeg.",
usage=
"trim-file [-path <path>] [-input <path-or-url>] -range <start-end> [-outdir <dir>] [-delete]",
2025-12-11 12:47:30 -08:00
arg=[
CmdletArg("-path",
description="Path to the file (optional if piped)."),
2025-12-29 17:05:03 -08:00
CmdletArg(
"-input",
description=
"Override input media source (path or URL). Useful when piping store metadata but trimming from an mpv stream URL.",
2025-12-29 17:05:03 -08:00
),
CmdletArg(
"-range",
required=True,
description=
"Time range to trim (e.g. '3:45-3:55', '00:03:45-00:03:55', or '1h3m-1h10m30s').",
2025-12-29 17:05:03 -08:00
),
CmdletArg(
"-outdir",
description=
"Output directory for the clip (defaults to source folder for local files; otherwise uses config temp/videos).",
),
CmdletArg(
"-delete",
type="flag",
description="Delete the original file after trimming."
2025-12-29 17:05:03 -08:00
),
2025-12-01 01:10:16 -08:00
],
2025-12-11 12:47:30 -08:00
detail=[
2025-12-23 16:36:39 -08:00
"Creates a new file with 'clip_' prefix in the filename.",
"Adds the trim range to the title as: [1h3m-1h3m10s] - <title>.",
2025-12-11 23:21:45 -08:00
"Inherits tag values from the source file.",
2025-12-01 01:10:16 -08:00
"Adds a relationship to the source file (if hash is available).",
"Output can be piped to add-file.",
2025-12-29 17:05:03 -08:00
],
2025-12-01 01:10:16 -08:00
)
2025-12-23 16:36:39 -08:00
def _format_hms(total_seconds: float) -> str:
"""Format seconds as compact h/m/s (no colons), e.g. 1h3m10s, 3m5s, 2s."""
try:
total = int(round(float(total_seconds)))
except Exception:
total = 0
if total < 0:
total = 0
hours = total // 3600
minutes = (total % 3600) // 60
seconds = total % 60
parts: list[str] = []
if hours > 0:
parts.append(f"{hours}h")
if minutes > 0:
parts.append(f"{minutes}m")
if seconds > 0:
parts.append(f"{seconds}s")
# Ensure we always output something.
if not parts:
return "0s"
return "".join(parts)
2025-12-29 17:05:03 -08:00
2025-12-23 16:36:39 -08:00
def _is_url(value: str) -> bool:
try:
p = urlparse(str(value))
return bool(p.scheme and p.netloc)
except Exception:
return False
2025-12-01 01:10:16 -08:00
def _parse_time(time_str: str) -> float:
2025-12-23 16:36:39 -08:00
"""Convert time string into seconds.
Supports:
- HH:MM:SS(.sss)
- MM:SS(.sss)
- SS(.sss)
- 1h3m53s (also 1h3m, 3m53s, 53s)
"""
2025-12-29 17:05:03 -08:00
raw = str(time_str or "").strip()
2025-12-23 16:36:39 -08:00
if not raw:
raise ValueError("Empty time")
# h/m/s format (case-insensitive)
hms = re.fullmatch(
r"(?i)\s*(?:(?P<h>\d+(?:\.\d+)?)h)?(?:(?P<m>\d+(?:\.\d+)?)m)?(?:(?P<s>\d+(?:\.\d+)?)s)?\s*",
raw,
)
2025-12-29 17:05:03 -08:00
if hms and (hms.group("h") or hms.group("m") or hms.group("s")):
hours = float(hms.group("h") or 0)
minutes = float(hms.group("m") or 0)
seconds = float(hms.group("s") or 0)
2025-12-23 16:36:39 -08:00
total = hours * 3600 + minutes * 60 + seconds
return float(total)
# Colon-separated
2025-12-29 17:05:03 -08:00
parts = [p.strip() for p in raw.split(":")]
2025-12-01 01:10:16 -08:00
if len(parts) == 3:
return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2])
2025-12-23 16:36:39 -08:00
if len(parts) == 2:
2025-12-01 01:10:16 -08:00
return float(parts[0]) * 60 + float(parts[1])
2025-12-23 16:36:39 -08:00
if len(parts) == 1:
2025-12-01 01:10:16 -08:00
return float(parts[0])
2025-12-23 16:36:39 -08:00
raise ValueError(f"Invalid time format: {time_str}")
def _sanitize_filename(name: str, *, max_len: int = 140) -> str:
2025-12-29 17:05:03 -08:00
name = str(name or "").strip()
2025-12-23 16:36:39 -08:00
if not name:
2025-12-29 17:05:03 -08:00
return "clip"
2025-12-23 16:36:39 -08:00
# Windows-forbidden characters: <>:"/\\|?* plus control chars
2025-12-29 17:05:03 -08:00
name = re.sub('[<>:"/\\\\|?*\\x00-\\x1F]', "_", name)
2025-12-23 16:36:39 -08:00
name = re.sub(r"\s+", " ", name).strip()
2025-12-29 17:05:03 -08:00
name = name.rstrip(".")
2025-12-23 16:36:39 -08:00
if not name:
2025-12-29 17:05:03 -08:00
return "clip"
2025-12-23 16:36:39 -08:00
if len(name) > max_len:
name = name[:max_len].rstrip()
return name
def _extract_store_name(item: Any) -> Optional[str]:
try:
store_val = get_field(item, "store")
s = str(store_val or "").strip()
return s if s else None
except Exception:
return None
2025-12-29 17:05:03 -08:00
def _persist_alt_relationship(
*,
config: Dict[str,
Any],
store_name: str,
alt_hash: str,
king_hash: str
2025-12-29 17:05:03 -08:00
) -> None:
2025-12-23 16:36:39 -08:00
"""Persist directional alt -> king relationship in the given backend."""
try:
store = Store(config)
backend: Any = store[str(store_name)]
except Exception:
return
alt_norm = str(alt_hash or "").strip().lower()
king_norm = str(king_hash or "").strip().lower()
if len(alt_norm) != 64 or len(king_norm) != 64 or alt_norm == king_norm:
return
# Folder-backed local DB
try:
if (type(backend).__name__ == "Folder" and hasattr(backend,
"location")
and callable(getattr(backend,
"location"))):
2025-12-23 16:36:39 -08:00
from API.folder import API_folder_store
from pathlib import Path
root = Path(str(backend.location())).expanduser()
with API_folder_store(root) as db:
db.set_relationship_by_hash(
alt_norm,
king_norm,
"alt",
bidirectional=False
)
2025-12-23 16:36:39 -08:00
return
except Exception:
pass
# Hydrus-like backend
try:
client = getattr(backend, "_client", None)
if client is not None and hasattr(client, "set_relationship"):
client.set_relationship(alt_norm, king_norm, "alt")
except Exception:
return
2025-12-29 17:05:03 -08:00
def _trim_media(
input_source: str,
output_path: Path,
start_seconds: float,
duration_seconds: float
2025-12-29 17:05:03 -08:00
) -> bool:
2025-12-23 16:36:39 -08:00
"""Trim media using ffmpeg.
input_source may be a local path or a URL.
"""
2025-12-29 17:05:03 -08:00
ffmpeg_path = shutil.which("ffmpeg")
2025-12-01 01:10:16 -08:00
if not ffmpeg_path:
log("ffmpeg not found in PATH", file=sys.stderr)
return False
try:
2025-12-23 16:36:39 -08:00
if duration_seconds <= 0:
log(f"Invalid range: duration <= 0 ({duration_seconds})", file=sys.stderr)
2025-12-01 01:10:16 -08:00
return False
2025-12-23 16:36:39 -08:00
2025-12-01 01:10:16 -08:00
cmd = [
2025-12-29 17:05:03 -08:00
ffmpeg_path,
"-y",
"-ss",
str(float(start_seconds)),
"-i",
str(input_source),
"-t",
str(float(duration_seconds)),
"-c",
"copy",
"-map_metadata",
"0",
2025-12-23 16:36:39 -08:00
str(output_path),
2025-12-01 01:10:16 -08:00
]
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
debug(f"Running ffmpeg: {' '.join(cmd)}")
result = subprocess.run(cmd, capture_output=True, text=True)
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
if result.returncode != 0:
log(f"ffmpeg error: {result.stderr}", file=sys.stderr)
return False
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
return True
except Exception as e:
log(f"Error parsing time or running ffmpeg: {e}", file=sys.stderr)
return False
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Trim a media file."""
# Parse arguments
parsed = parse_cmdlet_args(args, CMDLET)
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
range_arg = parsed.get("range")
2025-12-29 17:05:03 -08:00
if not range_arg or "-" not in range_arg:
2025-12-01 01:10:16 -08:00
log("Error: -range argument required (format: start-end)", file=sys.stderr)
return 1
2025-12-29 17:05:03 -08:00
start_str, end_str = [s.strip() for s in range_arg.split("-", 1)]
2025-12-23 16:36:39 -08:00
if not start_str or not end_str:
log("Error: -range must be start-end", file=sys.stderr)
return 1
try:
start_seconds = _parse_time(start_str)
end_seconds = _parse_time(end_str)
except Exception as exc:
log(f"Error parsing -range: {exc}", file=sys.stderr)
return 1
duration_seconds = end_seconds - start_seconds
if duration_seconds <= 0:
log(f"Invalid range: start {start_str} >= end {end_str}", file=sys.stderr)
return 1
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
delete_original = parsed.get("delete", False)
path_arg = parsed.get("path")
2025-12-23 16:36:39 -08:00
input_override = parsed.get("input")
outdir_arg = parsed.get("outdir")
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
# Collect inputs
inputs = normalize_result_input(result)
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
# If path arg provided, add it to inputs
if path_arg:
inputs.append({
"path": path_arg
})
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
if not inputs:
log("No input files provided.", file=sys.stderr)
return 1
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
success_count = 0
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
for item in inputs:
2025-12-23 16:36:39 -08:00
store_name = _extract_store_name(item)
2025-12-01 01:10:16 -08:00
# Resolve file path
2025-12-23 16:36:39 -08:00
file_path: Optional[str] = None
2025-12-01 01:10:16 -08:00
if isinstance(item, dict):
2025-12-11 12:47:30 -08:00
file_path = item.get("path") or item.get("target")
elif hasattr(item, "path"):
file_path = item.path
2025-12-01 01:10:16 -08:00
elif isinstance(item, str):
file_path = item
2025-12-29 17:05:03 -08:00
2025-12-23 16:36:39 -08:00
if not file_path and not input_override:
2025-12-01 01:10:16 -08:00
continue
2025-12-23 16:36:39 -08:00
media_source = str(input_override or file_path)
is_url = _is_url(media_source)
path_obj: Optional[Path] = None
if not is_url:
try:
path_obj = Path(str(media_source))
except Exception:
path_obj = None
if not path_obj or not path_obj.exists():
log(f"File not found: {media_source}", file=sys.stderr)
continue
2025-12-29 17:05:03 -08:00
2025-12-23 16:36:39 -08:00
# Determine output directory
output_dir: Path
if outdir_arg:
output_dir = Path(str(outdir_arg)).expanduser()
elif store_name:
from SYS.config import resolve_output_dir
2025-12-29 17:05:03 -08:00
2025-12-23 16:36:39 -08:00
output_dir = resolve_output_dir(config or {})
elif path_obj is not None:
output_dir = path_obj.parent
else:
from SYS.config import resolve_output_dir
2025-12-29 17:05:03 -08:00
2025-12-23 16:36:39 -08:00
output_dir = resolve_output_dir(config or {})
try:
output_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
# Determine output filename
2025-12-29 17:05:03 -08:00
output_ext = ""
2025-12-23 16:36:39 -08:00
if path_obj is not None:
output_ext = path_obj.suffix
base_name = path_obj.stem
else:
# Prefer title from metadata if present
title = extract_title_from_result(item)
if title:
base_name = _sanitize_filename(str(title))
else:
2025-12-29 17:05:03 -08:00
base_name = time.strftime("%Y%m%d-%H%M%S")
2025-12-23 16:36:39 -08:00
2025-12-29 17:05:03 -08:00
if base_name.lower().startswith("clip_"):
2025-12-23 16:36:39 -08:00
base_name = base_name[5:] or base_name
try:
p = urlparse(str(media_source))
2025-12-29 17:05:03 -08:00
last = (p.path or "").split("/")[-1]
if last and "." in last:
output_ext = "." + last.split(".")[-1]
2025-12-23 16:36:39 -08:00
except Exception:
pass
if not output_ext or len(output_ext) > 8:
2025-12-29 17:05:03 -08:00
output_ext = ".mkv"
2025-12-23 16:36:39 -08:00
new_filename = f"clip_{base_name}{output_ext}"
output_path = output_dir / new_filename
# Avoid clobbering existing files
if output_path.exists():
stem = output_path.stem
suffix = output_path.suffix
for i in range(1, 1000):
candidate = output_dir / f"{stem}_{i}{suffix}"
if not candidate.exists():
output_path = candidate
break
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
# Trim
2025-12-29 17:05:03 -08:00
source_label = path_obj.name if path_obj is not None else str(media_source)
2025-12-23 16:36:39 -08:00
log(f"Trimming {source_label} ({start_str} to {end_str})...", file=sys.stderr)
if _trim_media(str(media_source), output_path, start_seconds, duration_seconds):
2025-12-01 01:10:16 -08:00
log(f"Created clip: {output_path}", file=sys.stderr)
success_count += 1
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
# Prepare result for pipeline
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
# 1. Get source hash for relationship
source_hash = None
if isinstance(item, dict):
2025-12-11 12:47:30 -08:00
source_hash = item.get("hash")
elif hasattr(item, "hash"):
source_hash = item.hash
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
if not source_hash:
2025-12-23 16:36:39 -08:00
if path_obj is not None:
try:
source_hash = sha256_file(path_obj)
except Exception:
pass
2025-12-29 17:05:03 -08:00
2025-12-11 23:21:45 -08:00
# 2. Get tag values
2025-12-23 16:36:39 -08:00
# Do not inherit tags from the source (per UX request).
new_tags: list[str] = []
# Copy URL(s) when present.
urls: list[str] = []
try:
urls = extract_url_from_result(item) or []
except Exception:
urls = []
try:
src_u = get_field(item, "source_url")
if isinstance(src_u, str) and src_u.strip():
if src_u.strip() not in urls:
urls.append(src_u.strip())
except Exception:
pass
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
# 3. Get title and modify it
title = extract_title_from_result(item)
if not title:
2025-12-23 16:36:39 -08:00
title = path_obj.stem if path_obj is not None else base_name
range_hms = f"{_format_hms(start_seconds)}-{_format_hms(end_seconds)}"
new_title = f"[{range_hms}] - {title}"
2025-12-29 17:05:03 -08:00
2025-12-23 16:36:39 -08:00
# 4. Calculate clip hash
2025-12-01 01:10:16 -08:00
clip_hash = None
try:
clip_hash = sha256_file(output_path)
except Exception:
pass
2025-12-23 16:36:39 -08:00
# If this was a store item, ingest the clip into the same store.
stored_store: Optional[str] = None
stored_hash: Optional[str] = None
stored_path: Optional[str] = None
if store_name:
2025-12-01 01:10:16 -08:00
try:
2025-12-23 16:36:39 -08:00
store = Store(config)
if store.is_available(store_name):
backend = store[str(store_name)]
move_flag = type(backend).__name__ == "Folder"
stored_hash = backend.add_file(
Path(str(output_path)),
title=new_title,
tag=new_tags,
url=urls,
move=move_flag,
)
stored_store = store_name
# Best-effort resolve stored path for folder backends.
try:
if type(backend).__name__ == "Folder" and hasattr(
backend,
"get_file"):
2025-12-23 16:36:39 -08:00
p = backend.get_file(str(stored_hash))
if isinstance(p, Path):
stored_path = str(p)
elif isinstance(p, str) and p:
stored_path = p
except Exception:
stored_path = None
except Exception as exc:
log(
f"Failed to add clip to store '{store_name}': {exc}",
file=sys.stderr
)
2025-12-23 16:36:39 -08:00
# If we stored it, persist relationship alt -> king in that store.
if stored_store and stored_hash and source_hash:
_persist_alt_relationship(
config=config,
store_name=stored_store,
alt_hash=stored_hash,
king_hash=str(source_hash),
)
if stored_hash:
clip_hash = stored_hash
2025-12-01 01:10:16 -08:00
# 5. Construct result
result_dict = {
2025-12-23 16:36:39 -08:00
"path": stored_path or str(output_path),
2025-12-01 01:10:16 -08:00
"title": new_title,
2025-12-11 23:21:45 -08:00
"tag": new_tags,
2025-12-23 16:36:39 -08:00
"url": urls,
2025-12-29 17:05:03 -08:00
"media_kind": "video", # Assumption, or derive
"hash": clip_hash, # Pass calculated hash
2025-12-23 16:36:39 -08:00
"store": stored_store,
2025-12-01 01:10:16 -08:00
"relationships": {
2025-12-23 16:36:39 -08:00
# Clip is an ALT of the source; store semantics are directional alt -> king.
# Provide both keys so downstream (e.g. add-file) can persist relationships.
"king": [source_hash] if source_hash else [],
"alt": [clip_hash] if (source_hash and clip_hash) else [],
2025-12-29 17:05:03 -08:00
},
2025-12-01 01:10:16 -08:00
}
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
# Emit result
ctx.emit(result_dict)
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
# Delete original if requested
if delete_original:
try:
2025-12-23 16:36:39 -08:00
if path_obj is not None:
path_obj.unlink()
log(f"Deleted original file: {path_obj}", file=sys.stderr)
2025-12-01 01:10:16 -08:00
# Also try to delete sidecars?
# Maybe leave that to user or cleanup cmdlet
except Exception as e:
log(f"Failed to delete original: {e}", file=sys.stderr)
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
else:
2025-12-29 17:05:03 -08:00
failed_label = path_obj.name if path_obj is not None else str(media_source)
2025-12-23 16:36:39 -08:00
log(f"Failed to trim {failed_label}", file=sys.stderr)
2025-12-29 17:05:03 -08:00
2025-12-01 01:10:16 -08:00
return 0 if success_count > 0 else 1
2025-12-12 21:55:38 -08:00
# Register cmdlet (no legacy decorator)
CMDLET.exec = _run
CMDLET.register()