2025-11-25 20:09:33 -08:00
|
|
|
"""Merge multiple files into a single output file."""
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
from typing import Any, Dict, Optional, Sequence, List
|
|
|
|
|
from pathlib import Path
|
|
|
|
|
import sys
|
|
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
from SYS.logger import log
|
2025-11-25 20:09:33 -08:00
|
|
|
import subprocess as _subprocess
|
|
|
|
|
import shutil as _shutil
|
2025-12-13 00:18:30 -08:00
|
|
|
import re as _re
|
|
|
|
|
|
|
|
|
|
from config import resolve_output_dir
|
2025-12-12 21:55:38 -08:00
|
|
|
|
|
|
|
|
from ._shared import (
|
|
|
|
|
Cmdlet,
|
|
|
|
|
CmdletArg,
|
|
|
|
|
create_pipe_object_result,
|
|
|
|
|
get_field,
|
|
|
|
|
get_pipe_object_hash,
|
|
|
|
|
get_pipe_object_path,
|
|
|
|
|
normalize_result_input,
|
|
|
|
|
parse_cmdlet_args,
|
|
|
|
|
should_show_help,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
import pipeline as ctx
|
2025-11-25 20:09:33 -08:00
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from PyPDF2 import PdfWriter, PdfReader
|
|
|
|
|
HAS_PYPDF2 = True
|
|
|
|
|
except ImportError:
|
|
|
|
|
HAS_PYPDF2 = False
|
|
|
|
|
PdfWriter = None
|
|
|
|
|
PdfReader = None
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from metadata import (
|
|
|
|
|
read_tags_from_file,
|
|
|
|
|
dedup_tags_by_namespace,
|
|
|
|
|
)
|
|
|
|
|
HAS_METADATA_API = True
|
|
|
|
|
except ImportError:
|
|
|
|
|
HAS_METADATA_API = False
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
def read_tags_from_file(file_path: Path) -> List[str]:
|
|
|
|
|
return []
|
2025-12-11 12:47:30 -08:00
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
def write_tags_to_file(
|
|
|
|
|
file_path: Path,
|
|
|
|
|
tags: List[str],
|
|
|
|
|
source_hashes: Optional[List[str]] = None,
|
|
|
|
|
url: Optional[List[str]] = None,
|
|
|
|
|
append: bool = False,
|
|
|
|
|
) -> bool:
|
|
|
|
|
return False
|
2025-11-25 20:09:33 -08:00
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
def dedup_tags_by_namespace(tags: List[str]) -> List[str]:
|
|
|
|
|
return tags
|
2025-11-25 20:09:33 -08:00
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
def write_metadata(*_args: Any, **_kwargs: Any) -> None:
|
|
|
|
|
return None
|
2025-11-25 20:09:33 -08:00
|
|
|
|
|
|
|
|
|
|
|
|
|
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
|
|
|
|
"""Merge multiple files into one."""
|
|
|
|
|
|
|
|
|
|
# Parse help
|
2025-12-11 12:47:30 -08:00
|
|
|
if should_show_help(args):
|
2025-12-12 21:55:38 -08:00
|
|
|
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
2025-12-11 12:47:30 -08:00
|
|
|
return 0
|
2025-11-25 20:09:33 -08:00
|
|
|
|
|
|
|
|
# Parse arguments
|
|
|
|
|
parsed = parse_cmdlet_args(args, CMDLET)
|
|
|
|
|
delete_after = parsed.get("delete", False)
|
|
|
|
|
|
|
|
|
|
output_override: Optional[Path] = None
|
|
|
|
|
output_arg = parsed.get("output")
|
|
|
|
|
if output_arg:
|
|
|
|
|
try:
|
|
|
|
|
output_override = Path(str(output_arg)).expanduser()
|
|
|
|
|
except Exception:
|
|
|
|
|
output_override = None
|
|
|
|
|
|
|
|
|
|
format_spec = parsed.get("format")
|
|
|
|
|
if format_spec:
|
|
|
|
|
format_spec = str(format_spec).lower().strip()
|
|
|
|
|
|
|
|
|
|
# Collect files from piped results
|
|
|
|
|
# Use normalize_result_input to handle both single items and lists
|
|
|
|
|
files_to_merge: List[Dict[str, Any]] = normalize_result_input(result)
|
|
|
|
|
|
|
|
|
|
if not files_to_merge:
|
|
|
|
|
log("No files provided to merge", file=sys.stderr)
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
if len(files_to_merge) < 2:
|
|
|
|
|
# Only 1 file - pass it through unchanged
|
|
|
|
|
# (merge only happens when multiple files are collected)
|
|
|
|
|
item = files_to_merge[0]
|
|
|
|
|
ctx.emit(item)
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
# Extract file paths and metadata from result objects
|
|
|
|
|
source_files: List[Path] = []
|
|
|
|
|
source_hashes: List[str] = []
|
2025-12-11 12:47:30 -08:00
|
|
|
source_url: List[str] = []
|
2025-11-25 20:09:33 -08:00
|
|
|
source_tags: List[str] = [] # NEW: collect tags from source files
|
|
|
|
|
for item in files_to_merge:
|
|
|
|
|
raw_path = get_pipe_object_path(item)
|
|
|
|
|
target_path = None
|
|
|
|
|
if isinstance(raw_path, Path):
|
|
|
|
|
target_path = raw_path
|
|
|
|
|
elif isinstance(raw_path, str) and raw_path.strip():
|
|
|
|
|
candidate = Path(raw_path).expanduser()
|
|
|
|
|
if candidate.exists():
|
|
|
|
|
target_path = candidate
|
|
|
|
|
|
|
|
|
|
if target_path and target_path.exists():
|
|
|
|
|
source_files.append(target_path)
|
2025-12-12 21:55:38 -08:00
|
|
|
|
|
|
|
|
# Track tags from the .tag sidecar for this source (if present)
|
2025-12-11 23:21:45 -08:00
|
|
|
tags_file = target_path.with_suffix(target_path.suffix + '.tag')
|
2025-12-12 21:55:38 -08:00
|
|
|
if tags_file.exists() and HAS_METADATA_API:
|
2025-11-25 20:09:33 -08:00
|
|
|
try:
|
2025-12-12 21:55:38 -08:00
|
|
|
source_tags.extend(read_tags_from_file(tags_file) or [])
|
2025-11-25 20:09:33 -08:00
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
# Extract hash if available in item (as fallback)
|
|
|
|
|
hash_value = get_pipe_object_hash(item)
|
|
|
|
|
if hash_value and hash_value not in source_hashes:
|
|
|
|
|
source_hashes.append(str(hash_value))
|
|
|
|
|
|
2025-12-11 12:47:30 -08:00
|
|
|
# Extract known url if available
|
|
|
|
|
url = get_field(item, 'url', [])
|
|
|
|
|
if isinstance(url, str):
|
|
|
|
|
source_url.append(url)
|
|
|
|
|
elif isinstance(url, list):
|
|
|
|
|
source_url.extend(url)
|
2025-11-25 20:09:33 -08:00
|
|
|
else:
|
2025-12-11 12:47:30 -08:00
|
|
|
title = get_field(item, 'title', 'unknown') or get_field(item, 'id', 'unknown')
|
2025-11-25 20:09:33 -08:00
|
|
|
log(f"Warning: Could not locate file for item: {title}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
if len(source_files) < 2:
|
|
|
|
|
log("At least 2 valid files required to merge", file=sys.stderr)
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
# Detect file types
|
|
|
|
|
file_types = set()
|
|
|
|
|
for f in source_files:
|
|
|
|
|
suffix = f.suffix.lower()
|
|
|
|
|
if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
|
|
|
|
|
file_types.add('audio')
|
|
|
|
|
elif suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
|
|
|
|
|
file_types.add('video')
|
|
|
|
|
elif suffix in {'.pdf'}:
|
|
|
|
|
file_types.add('pdf')
|
|
|
|
|
elif suffix in {'.txt', '.srt', '.vtt', '.md', '.log'}:
|
|
|
|
|
file_types.add('text')
|
|
|
|
|
else:
|
|
|
|
|
file_types.add('other')
|
|
|
|
|
|
|
|
|
|
if len(file_types) > 1 and 'other' not in file_types:
|
|
|
|
|
log(f"Mixed file types detected: {', '.join(sorted(file_types))}", file=sys.stderr)
|
|
|
|
|
log(f"Can only merge files of the same type", file=sys.stderr)
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
file_kind = list(file_types)[0] if file_types else 'other'
|
|
|
|
|
|
|
|
|
|
# Determine output format
|
|
|
|
|
output_format = format_spec or 'auto'
|
|
|
|
|
if output_format == 'auto':
|
|
|
|
|
if file_kind == 'audio':
|
|
|
|
|
output_format = 'mka' # Default audio codec - mka supports chapters and stream copy
|
|
|
|
|
elif file_kind == 'video':
|
|
|
|
|
output_format = 'mp4' # Default video codec
|
|
|
|
|
elif file_kind == 'pdf':
|
|
|
|
|
output_format = 'pdf'
|
|
|
|
|
else:
|
|
|
|
|
output_format = 'txt'
|
|
|
|
|
|
|
|
|
|
# Determine output path
|
|
|
|
|
if output_override:
|
|
|
|
|
if output_override.is_dir():
|
2025-12-12 21:55:38 -08:00
|
|
|
base_title = get_field(files_to_merge[0], 'title', 'merged')
|
|
|
|
|
base_name = _sanitize_name(str(base_title or 'merged'))
|
2025-11-25 20:09:33 -08:00
|
|
|
output_path = output_override / f"{base_name} (merged).{_ext_for_format(output_format)}"
|
|
|
|
|
else:
|
|
|
|
|
output_path = output_override
|
|
|
|
|
else:
|
|
|
|
|
first_file = source_files[0]
|
2025-12-13 00:18:30 -08:00
|
|
|
try:
|
|
|
|
|
base_dir = resolve_output_dir(config)
|
|
|
|
|
except Exception:
|
|
|
|
|
base_dir = first_file.parent
|
|
|
|
|
output_path = Path(base_dir) / f"{first_file.stem} (merged).{_ext_for_format(output_format)}"
|
2025-11-25 20:09:33 -08:00
|
|
|
|
|
|
|
|
# Ensure output directory exists
|
|
|
|
|
output_path.parent.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
# Perform merge based on file type
|
|
|
|
|
if file_kind == 'audio':
|
|
|
|
|
success = _merge_audio(source_files, output_path, output_format)
|
|
|
|
|
elif file_kind == 'video':
|
|
|
|
|
success = _merge_video(source_files, output_path, output_format)
|
|
|
|
|
elif file_kind == 'pdf':
|
|
|
|
|
success = _merge_pdf(source_files, output_path)
|
|
|
|
|
elif file_kind == 'text':
|
|
|
|
|
success = _merge_text(source_files, output_path)
|
|
|
|
|
else:
|
|
|
|
|
log(f"Unsupported file type: {file_kind}", file=sys.stderr)
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
if not success:
|
|
|
|
|
log("Merge failed", file=sys.stderr)
|
|
|
|
|
return 1
|
|
|
|
|
|
|
|
|
|
log(f"Merged {len(source_files)} files into: {output_path}", file=sys.stderr)
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
merged_tags: List[str] = [f"title:{output_path.stem}"]
|
|
|
|
|
|
2025-12-13 00:18:30 -08:00
|
|
|
# Merge tags from source files into the emitted PipeObject only.
|
|
|
|
|
# Sidecar files (.tag/.metadata) are written only during explicit filesystem export (add-file to a path).
|
|
|
|
|
if source_tags and HAS_METADATA_API:
|
|
|
|
|
merged_source_tags = dedup_tags_by_namespace(source_tags)
|
|
|
|
|
merged_tags.extend(merged_source_tags)
|
|
|
|
|
log(f"Merged {len(merged_source_tags)} unique tags from source files", file=sys.stderr)
|
|
|
|
|
elif source_tags:
|
|
|
|
|
merged_tags.extend(list(dict.fromkeys(source_tags))) # Preserve order, remove duplicates
|
2025-11-25 20:09:33 -08:00
|
|
|
|
2025-12-11 23:21:45 -08:00
|
|
|
# Emit a PipeObject-compatible dict so the merged file can be piped to next command
|
2025-11-25 20:09:33 -08:00
|
|
|
try:
|
2025-12-11 23:21:45 -08:00
|
|
|
from SYS.utils import sha256_file
|
|
|
|
|
merged_hash = sha256_file(output_path)
|
|
|
|
|
merged_item = create_pipe_object_result(
|
|
|
|
|
source="local",
|
|
|
|
|
identifier=output_path.name,
|
|
|
|
|
file_path=str(output_path),
|
|
|
|
|
cmdlet_name="merge-file",
|
2025-11-25 20:09:33 -08:00
|
|
|
title=output_path.stem,
|
2025-12-11 23:21:45 -08:00
|
|
|
hash_value=merged_hash,
|
|
|
|
|
tag=merged_tags,
|
|
|
|
|
url=source_url,
|
2025-11-25 20:09:33 -08:00
|
|
|
media_kind=file_kind,
|
2025-12-13 00:18:30 -08:00
|
|
|
store="PATH",
|
2025-11-25 20:09:33 -08:00
|
|
|
)
|
2025-12-01 01:10:16 -08:00
|
|
|
# Clear previous results to ensure only the merged file is passed down
|
|
|
|
|
ctx.clear_last_result()
|
2025-11-25 20:09:33 -08:00
|
|
|
ctx.emit(merged_item)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Warning: Could not emit pipeline item: {e}", file=sys.stderr)
|
|
|
|
|
# Still emit a string representation for feedback
|
|
|
|
|
ctx.emit(f"Merged: {output_path}")
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
# Cleanup
|
|
|
|
|
# - Delete source files only when -delete is set.
|
|
|
|
|
if delete_after:
|
2025-11-25 20:09:33 -08:00
|
|
|
for f in source_files:
|
|
|
|
|
try:
|
2025-12-12 21:55:38 -08:00
|
|
|
# Delete sidecar tags for the source (if any)
|
|
|
|
|
tag_file = f.with_suffix(f.suffix + '.tag')
|
|
|
|
|
if tag_file.exists():
|
|
|
|
|
try:
|
|
|
|
|
tag_file.unlink()
|
|
|
|
|
log(f"Deleted: {tag_file.name}", file=sys.stderr)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Warning: Could not delete {tag_file.name}: {e}", file=sys.stderr)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
if f.exists():
|
|
|
|
|
f.unlink()
|
|
|
|
|
log(f"Deleted: {f.name}", file=sys.stderr)
|
2025-11-25 20:09:33 -08:00
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Warning: Could not delete {f.name}: {e}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _sanitize_name(text: str) -> str:
|
|
|
|
|
"""Sanitize filename."""
|
|
|
|
|
allowed = []
|
|
|
|
|
for ch in text:
|
|
|
|
|
allowed.append(ch if (ch.isalnum() or ch in {"-", "_", " ", "."}) else " ")
|
|
|
|
|
return (" ".join("".join(allowed).split()) or "merged").strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _ext_for_format(fmt: str) -> str:
|
|
|
|
|
"""Get file extension for format."""
|
|
|
|
|
format_map = {
|
|
|
|
|
'mp3': 'mp3',
|
|
|
|
|
'm4a': 'm4a',
|
2025-12-12 21:55:38 -08:00
|
|
|
'm4b': 'm4b',
|
2025-11-25 20:09:33 -08:00
|
|
|
'aac': 'aac',
|
|
|
|
|
'opus': 'opus',
|
|
|
|
|
'mka': 'mka', # Matroska Audio - EXCELLENT chapter support (recommended)
|
|
|
|
|
'mkv': 'mkv',
|
|
|
|
|
'mp4': 'mp4',
|
|
|
|
|
'webm': 'webm',
|
|
|
|
|
'pdf': 'pdf',
|
|
|
|
|
'txt': 'txt',
|
|
|
|
|
'auto': 'mka', # Default - MKA for chapters
|
|
|
|
|
}
|
|
|
|
|
return format_map.get(fmt.lower(), 'mka')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
|
|
|
|
|
"""Merge audio files with chapters based on file boundaries."""
|
|
|
|
|
import logging
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
ffmpeg_path = _shutil.which('ffmpeg')
|
|
|
|
|
if not ffmpeg_path:
|
|
|
|
|
log("ffmpeg not found in PATH", file=sys.stderr)
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# Step 1: Get duration of each file to calculate chapter timestamps
|
|
|
|
|
chapters = []
|
|
|
|
|
current_time_ms = 0
|
|
|
|
|
|
|
|
|
|
log(f"Analyzing {len(files)} files for chapter information...", file=sys.stderr)
|
|
|
|
|
logger.info(f"[merge-file] Analyzing files for chapters")
|
|
|
|
|
|
|
|
|
|
for file_path in files:
|
|
|
|
|
# Get duration using ffprobe
|
|
|
|
|
try:
|
|
|
|
|
ffprobe_cmd = [
|
|
|
|
|
'ffprobe', '-v', 'error', '-show_entries',
|
|
|
|
|
'format=duration', '-print_format',
|
|
|
|
|
'default=noprint_wrappers=1:nokey=1', str(file_path)
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
probe_result = _subprocess.run(ffprobe_cmd, capture_output=True, text=True, timeout=10)
|
|
|
|
|
if probe_result.returncode == 0 and probe_result.stdout.strip():
|
|
|
|
|
try:
|
|
|
|
|
duration_sec = float(probe_result.stdout.strip())
|
|
|
|
|
except ValueError:
|
|
|
|
|
logger.warning(f"[merge-file] Could not parse duration from ffprobe output: {probe_result.stdout}")
|
|
|
|
|
duration_sec = 0
|
|
|
|
|
else:
|
|
|
|
|
logger.warning(f"[merge-file] ffprobe failed for {file_path.name}: {probe_result.stderr}")
|
|
|
|
|
duration_sec = 0
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning(f"[merge-file] Could not get duration for {file_path.name}: {e}")
|
|
|
|
|
duration_sec = 0
|
|
|
|
|
|
|
|
|
|
# Create chapter entry - use title: tag from metadata if available
|
|
|
|
|
title = file_path.stem # Default to filename without extension
|
|
|
|
|
if HAS_METADATA_API:
|
|
|
|
|
try:
|
2025-12-11 23:21:45 -08:00
|
|
|
# Try to read tags from .tag sidecar file
|
|
|
|
|
tags_file = file_path.with_suffix(file_path.suffix + '.tag')
|
2025-11-25 20:09:33 -08:00
|
|
|
if tags_file.exists():
|
|
|
|
|
tags = read_tags_from_file(tags_file)
|
|
|
|
|
if tags:
|
|
|
|
|
# Look for title: tag
|
|
|
|
|
for tag in tags:
|
|
|
|
|
if isinstance(tag, str) and tag.lower().startswith('title:'):
|
|
|
|
|
# Extract the title value after the colon
|
|
|
|
|
title = tag.split(':', 1)[1].strip()
|
|
|
|
|
break
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.debug(f"[merge-file] Could not read metadata for {file_path.name}: {e}")
|
|
|
|
|
pass # Fall back to filename
|
|
|
|
|
|
|
|
|
|
# Convert seconds to HH:MM:SS.mmm format
|
|
|
|
|
hours = int(current_time_ms // 3600000)
|
|
|
|
|
minutes = int((current_time_ms % 3600000) // 60000)
|
|
|
|
|
seconds = int((current_time_ms % 60000) // 1000)
|
|
|
|
|
millis = int(current_time_ms % 1000)
|
|
|
|
|
|
|
|
|
|
chapters.append({
|
|
|
|
|
'time_ms': current_time_ms,
|
|
|
|
|
'time_str': f"{hours:02d}:{minutes:02d}:{seconds:02d}.{millis:03d}",
|
|
|
|
|
'title': title,
|
|
|
|
|
'duration_sec': duration_sec
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
logger.info(f"[merge-file] Chapter: {title} @ {chapters[-1]['time_str']} (duration: {duration_sec:.2f}s)")
|
|
|
|
|
current_time_ms += int(duration_sec * 1000)
|
2025-12-13 00:18:30 -08:00
|
|
|
|
|
|
|
|
# If these came from a playlist/album, titles often look like:
|
|
|
|
|
# "Book Name - Chapter"
|
|
|
|
|
# If *all* titles share the same "Book Name" prefix, strip it.
|
|
|
|
|
if len(chapters) >= 2:
|
|
|
|
|
split_re = _re.compile(r"^(?P<prefix>.+?)\s+-\s+(?P<chapter>.+)$")
|
|
|
|
|
prefixes: List[str] = []
|
|
|
|
|
stripped_titles: List[str] = []
|
|
|
|
|
all_match = True
|
|
|
|
|
for ch in chapters:
|
|
|
|
|
raw_title = str(ch.get('title') or '').strip()
|
|
|
|
|
m = split_re.match(raw_title)
|
|
|
|
|
if not m:
|
|
|
|
|
all_match = False
|
|
|
|
|
break
|
|
|
|
|
prefix = m.group('prefix').strip()
|
|
|
|
|
chapter_title = m.group('chapter').strip()
|
|
|
|
|
if not prefix or not chapter_title:
|
|
|
|
|
all_match = False
|
|
|
|
|
break
|
|
|
|
|
prefixes.append(prefix.casefold())
|
|
|
|
|
stripped_titles.append(chapter_title)
|
|
|
|
|
|
|
|
|
|
if all_match and prefixes and len(set(prefixes)) == 1:
|
|
|
|
|
for idx, ch in enumerate(chapters):
|
|
|
|
|
ch['title'] = stripped_titles[idx]
|
|
|
|
|
logger.info(f"[merge-file] Stripped common title prefix for chapters: {prefixes[0]}")
|
2025-11-25 20:09:33 -08:00
|
|
|
|
|
|
|
|
# Step 2: Create concat demuxer file
|
|
|
|
|
concat_file = output.parent / f".concat_{output.stem}.txt"
|
|
|
|
|
concat_lines = []
|
|
|
|
|
for f in files:
|
|
|
|
|
# Escape quotes in path
|
|
|
|
|
safe_path = str(f).replace("'", "'\\''")
|
|
|
|
|
concat_lines.append(f"file '{safe_path}'")
|
|
|
|
|
|
|
|
|
|
concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
|
|
|
|
|
|
|
|
|
|
# Step 3: Create FFmpeg metadata file with chapters
|
|
|
|
|
metadata_file = output.parent / f".metadata_{output.stem}.txt"
|
|
|
|
|
metadata_lines = [';FFMETADATA1']
|
|
|
|
|
|
|
|
|
|
for i, chapter in enumerate(chapters):
|
|
|
|
|
# FFMetadata format for chapters (note: [CHAPTER] not [CHAPTER01])
|
|
|
|
|
metadata_lines.append('[CHAPTER]')
|
|
|
|
|
metadata_lines.append('TIMEBASE=1/1000')
|
|
|
|
|
metadata_lines.append(f'START={chapter["time_ms"]}')
|
|
|
|
|
# Calculate end time (start of next chapter or end of file)
|
|
|
|
|
if i < len(chapters) - 1:
|
|
|
|
|
metadata_lines.append(f'END={chapters[i+1]["time_ms"]}')
|
|
|
|
|
else:
|
|
|
|
|
metadata_lines.append(f'END={current_time_ms}')
|
|
|
|
|
metadata_lines.append(f'title={chapter["title"]}')
|
|
|
|
|
|
|
|
|
|
metadata_file.write_text('\n'.join(metadata_lines), encoding='utf-8')
|
|
|
|
|
log(f"Created chapters metadata file with {len(chapters)} chapters", file=sys.stderr)
|
|
|
|
|
logger.info(f"[merge-file] Created {len(chapters)} chapters")
|
|
|
|
|
|
|
|
|
|
# Step 4: Build FFmpeg command to merge and embed chapters
|
|
|
|
|
# Strategy: First merge audio, then add metadata in separate pass
|
|
|
|
|
cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
|
|
|
|
|
|
|
|
|
|
# Add threading options for speed
|
|
|
|
|
cmd.extend(['-threads', '0']) # Use all available threads
|
|
|
|
|
|
|
|
|
|
# Audio codec selection for first input
|
|
|
|
|
if output_format == 'mp3':
|
|
|
|
|
cmd.extend(['-c:a', 'libmp3lame', '-q:a', '2'])
|
2025-12-12 21:55:38 -08:00
|
|
|
elif output_format in {'m4a', 'm4b'}:
|
2025-11-25 20:09:33 -08:00
|
|
|
# Use copy if possible (much faster), otherwise re-encode
|
|
|
|
|
# Check if inputs are already AAC/M4A to avoid re-encoding
|
|
|
|
|
# For now, default to copy if format matches, otherwise re-encode
|
|
|
|
|
# But since we are merging potentially different codecs, re-encoding is safer
|
|
|
|
|
# To speed up re-encoding, we can use a faster preset or hardware accel if available
|
|
|
|
|
cmd.extend(['-c:a', 'aac', '-b:a', '256k']) # M4A with better quality
|
|
|
|
|
elif output_format == 'aac':
|
|
|
|
|
cmd.extend(['-c:a', 'aac', '-b:a', '192k'])
|
|
|
|
|
elif output_format == 'opus':
|
|
|
|
|
cmd.extend(['-c:a', 'libopus', '-b:a', '128k'])
|
|
|
|
|
elif output_format == 'mka':
|
|
|
|
|
# FLAC is fast to encode but large. Copy is fastest if inputs are compatible.
|
|
|
|
|
# If we want speed, copy is best. If we want compatibility, re-encode.
|
|
|
|
|
# Let's try copy first if inputs are same format, but that's hard to detect here.
|
|
|
|
|
# Defaulting to copy for MKA as it's a container that supports many codecs
|
|
|
|
|
cmd.extend(['-c:a', 'copy'])
|
|
|
|
|
else:
|
|
|
|
|
cmd.extend(['-c:a', 'copy']) # Copy without re-encoding
|
|
|
|
|
|
|
|
|
|
# Add the output file
|
|
|
|
|
cmd.append(str(output))
|
|
|
|
|
|
|
|
|
|
log(f"Merging {len(files)} audio files to {output_format}...", file=sys.stderr)
|
|
|
|
|
logger.info(f"[merge-file] Running ffmpeg merge: {' '.join(cmd)}")
|
|
|
|
|
|
|
|
|
|
# Run ffmpeg with progress monitoring
|
|
|
|
|
try:
|
2025-12-11 19:04:02 -08:00
|
|
|
from SYS.progress import print_progress, print_final_progress
|
2025-11-25 20:09:33 -08:00
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
process = _subprocess.Popen(
|
|
|
|
|
cmd,
|
|
|
|
|
stdout=_subprocess.PIPE,
|
|
|
|
|
stderr=_subprocess.PIPE,
|
|
|
|
|
text=True,
|
|
|
|
|
encoding='utf-8',
|
|
|
|
|
errors='replace'
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Monitor progress
|
|
|
|
|
duration_re = re.compile(r"time=(\d{2}):(\d{2}):(\d{2})\.(\d{2})")
|
|
|
|
|
total_duration_sec = current_time_ms / 1000.0
|
|
|
|
|
|
|
|
|
|
while True:
|
|
|
|
|
# Read stderr line by line (ffmpeg writes progress to stderr)
|
|
|
|
|
if process.stderr:
|
|
|
|
|
line = process.stderr.readline()
|
|
|
|
|
if not line and process.poll() is not None:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
if line:
|
|
|
|
|
# Parse time=HH:MM:SS.mm
|
|
|
|
|
match = duration_re.search(line)
|
|
|
|
|
if match and total_duration_sec > 0:
|
|
|
|
|
h, m, s, cs = map(int, match.groups())
|
|
|
|
|
current_sec = h * 3600 + m * 60 + s + cs / 100.0
|
|
|
|
|
|
|
|
|
|
# Calculate speed/bitrate if available (optional)
|
|
|
|
|
# For now just show percentage
|
|
|
|
|
print_progress(
|
|
|
|
|
output.name,
|
|
|
|
|
int(current_sec * 1000), # Use ms as "bytes" for progress bar
|
|
|
|
|
int(total_duration_sec * 1000),
|
|
|
|
|
speed=0
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
# Wait for completion
|
|
|
|
|
stdout, stderr = process.communicate()
|
|
|
|
|
|
|
|
|
|
if process.returncode != 0:
|
|
|
|
|
log(f"FFmpeg error: {stderr}", file=sys.stderr)
|
|
|
|
|
raise _subprocess.CalledProcessError(process.returncode, cmd, output=stdout, stderr=stderr)
|
|
|
|
|
|
|
|
|
|
print_final_progress(output.name, int(total_duration_sec * 1000), 0)
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.exception(f"[merge-file] ffmpeg process error: {e}")
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
log(f"Merge successful, adding chapters metadata...", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
# Step 5: Embed chapters into container (MKA, MP4/M4A, or note limitation)
|
|
|
|
|
if output_format == 'mka' or output.suffix.lower() == '.mka':
|
|
|
|
|
# MKA/MKV format has native chapter support via FFMetadata
|
|
|
|
|
# Re-mux the file with chapters embedded (copy streams, no re-encode)
|
|
|
|
|
log(f"Embedding chapters into Matroska container...", file=sys.stderr)
|
|
|
|
|
logger.info(f"[merge-file] Adding chapters to MKA file via FFMetadata")
|
|
|
|
|
|
|
|
|
|
temp_output = output.parent / f".temp_{output.stem}.mka"
|
|
|
|
|
|
|
|
|
|
# Use mkvmerge if available (best for MKA chapters), otherwise fall back to ffmpeg
|
|
|
|
|
mkvmerge_path = _shutil.which('mkvmerge')
|
|
|
|
|
|
|
|
|
|
if mkvmerge_path:
|
|
|
|
|
# mkvmerge is the best tool for embedding chapters in Matroska files
|
|
|
|
|
log(f"Using mkvmerge for optimal chapter embedding...", file=sys.stderr)
|
|
|
|
|
cmd2 = [
|
|
|
|
|
mkvmerge_path, '-o', str(temp_output),
|
|
|
|
|
'--chapters', str(metadata_file),
|
|
|
|
|
str(output)
|
|
|
|
|
]
|
|
|
|
|
else:
|
|
|
|
|
# Fallback to ffmpeg with proper chapter embedding for Matroska
|
|
|
|
|
log(f"Using ffmpeg for chapter embedding (install mkvtoolnix for better quality)...", file=sys.stderr)
|
|
|
|
|
# For Matroska files, the metadata must be provided via -f ffmetadata input
|
|
|
|
|
cmd2 = [
|
|
|
|
|
ffmpeg_path, '-y',
|
|
|
|
|
'-i', str(output), # Input: merged audio
|
|
|
|
|
'-i', str(metadata_file), # Input: FFMetadata file
|
|
|
|
|
'-c:a', 'copy', # Copy audio without re-encoding
|
|
|
|
|
'-threads', '0', # Use all threads
|
|
|
|
|
'-map', '0', # Map all from first input
|
|
|
|
|
'-map_chapters', '1', # Map CHAPTERS from second input (FFMetadata)
|
|
|
|
|
str(temp_output) # Output
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
logger.info(f"[merge-file] Running chapter embedding: {' '.join(cmd2)}")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# Run chapter embedding silently (progress handled by worker thread)
|
|
|
|
|
_subprocess.run(
|
|
|
|
|
cmd2,
|
|
|
|
|
capture_output=True,
|
|
|
|
|
text=True,
|
|
|
|
|
stdin=_subprocess.DEVNULL,
|
|
|
|
|
timeout=600,
|
|
|
|
|
check=False
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Replace original with temp if successful
|
|
|
|
|
if temp_output.exists() and temp_output.stat().st_size > 0:
|
|
|
|
|
try:
|
|
|
|
|
import shutil
|
|
|
|
|
if output.exists():
|
|
|
|
|
output.unlink()
|
|
|
|
|
shutil.move(str(temp_output), str(output))
|
|
|
|
|
log(f"✓ Chapters successfully embedded!", file=sys.stderr)
|
|
|
|
|
logger.info(f"[merge-file] Chapters embedded successfully")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning(f"[merge-file] Could not replace file: {e}")
|
|
|
|
|
log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
|
|
|
|
|
try:
|
|
|
|
|
temp_output.unlink()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
else:
|
|
|
|
|
logger.warning(f"[merge-file] Chapter embedding did not create output")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.exception(f"[merge-file] Chapter embedding failed: {e}")
|
|
|
|
|
log(f"Warning: Chapter embedding failed, using merge without chapters", file=sys.stderr)
|
2025-12-12 21:55:38 -08:00
|
|
|
elif output_format in {'m4a', 'm4b'} or output.suffix.lower() in ['.m4a', '.m4b', '.mp4']:
|
2025-11-25 20:09:33 -08:00
|
|
|
# MP4/M4A format has native chapter support via iTunes metadata atoms
|
|
|
|
|
log(f"Embedding chapters into MP4 container...", file=sys.stderr)
|
|
|
|
|
logger.info(f"[merge-file] Adding chapters to M4A/MP4 file via iTunes metadata")
|
|
|
|
|
|
|
|
|
|
temp_output = output.parent / f".temp_{output.stem}{output.suffix}"
|
|
|
|
|
|
|
|
|
|
# ffmpeg embeds chapters in MP4 using -map_metadata and -map_chapters
|
|
|
|
|
log(f"Using ffmpeg for MP4 chapter embedding...", file=sys.stderr)
|
|
|
|
|
cmd2 = [
|
|
|
|
|
ffmpeg_path, '-y',
|
|
|
|
|
'-i', str(output), # Input: merged audio
|
|
|
|
|
'-i', str(metadata_file), # Input: FFMetadata file
|
|
|
|
|
'-c:a', 'copy', # Copy audio without re-encoding
|
|
|
|
|
'-threads', '0', # Use all threads
|
|
|
|
|
'-map', '0', # Map all from first input
|
|
|
|
|
'-map_metadata', '1', # Map metadata from second input (FFMetadata)
|
|
|
|
|
'-map_chapters', '1', # Map CHAPTERS from second input (FFMetadata)
|
|
|
|
|
str(temp_output) # Output
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
logger.info(f"[merge-file] Running MP4 chapter embedding: {' '.join(cmd2)}")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# Run MP4 chapter embedding silently (progress handled by worker thread)
|
|
|
|
|
_subprocess.run(
|
|
|
|
|
cmd2,
|
|
|
|
|
capture_output=True,
|
|
|
|
|
text=True,
|
|
|
|
|
stdin=_subprocess.DEVNULL,
|
|
|
|
|
timeout=600,
|
|
|
|
|
check=False
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
# Replace original with temp if successful
|
|
|
|
|
if temp_output.exists() and temp_output.stat().st_size > 0:
|
|
|
|
|
try:
|
|
|
|
|
import shutil
|
|
|
|
|
if output.exists():
|
|
|
|
|
output.unlink()
|
|
|
|
|
shutil.move(str(temp_output), str(output))
|
|
|
|
|
log(f"✓ Chapters successfully embedded in MP4!", file=sys.stderr)
|
|
|
|
|
logger.info(f"[merge-file] MP4 chapters embedded successfully")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.warning(f"[merge-file] Could not replace file: {e}")
|
|
|
|
|
log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
|
|
|
|
|
try:
|
|
|
|
|
temp_output.unlink()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
else:
|
|
|
|
|
logger.warning(f"[merge-file] MP4 chapter embedding did not create output")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.exception(f"[merge-file] MP4 chapter embedding failed: {e}")
|
|
|
|
|
log(f"Warning: MP4 chapter embedding failed, using merge without chapters", file=sys.stderr)
|
|
|
|
|
else:
|
|
|
|
|
# For other formats, chapters would require external tools
|
|
|
|
|
logger.info(f"[merge-file] Format {output_format} does not have native chapter support")
|
|
|
|
|
log(f"Note: For chapter support, use MKA or M4A format", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
# Clean up temp files
|
|
|
|
|
try:
|
|
|
|
|
concat_file.unlink()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
try:
|
|
|
|
|
metadata_file.unlink()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Audio merge error: {e}", file=sys.stderr)
|
|
|
|
|
logger.error(f"[merge-file] Audio merge error: {e}", exc_info=True)
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _merge_video(files: List[Path], output: Path, output_format: str) -> bool:
|
|
|
|
|
"""Merge video files."""
|
|
|
|
|
ffmpeg_path = _shutil.which('ffmpeg')
|
|
|
|
|
if not ffmpeg_path:
|
|
|
|
|
log("ffmpeg not found in PATH", file=sys.stderr)
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
# Create concat demuxer file
|
|
|
|
|
concat_file = output.parent / f".concat_{output.stem}.txt"
|
|
|
|
|
concat_lines = []
|
|
|
|
|
for f in files:
|
|
|
|
|
safe_path = str(f).replace("'", "'\\''")
|
|
|
|
|
concat_lines.append(f"file '{safe_path}'")
|
|
|
|
|
|
|
|
|
|
concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
|
|
|
|
|
|
|
|
|
|
# Build FFmpeg command for video merge
|
|
|
|
|
cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
|
|
|
|
|
|
|
|
|
|
# Video codec selection
|
|
|
|
|
if output_format == 'mp4':
|
|
|
|
|
cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-tag:v', 'hvc1', '-c:a', 'aac', '-b:a', '192k'])
|
|
|
|
|
elif output_format == 'mkv':
|
|
|
|
|
cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-c:a', 'aac', '-b:a', '192k'])
|
|
|
|
|
else:
|
|
|
|
|
cmd.extend(['-c', 'copy']) # Copy without re-encoding
|
|
|
|
|
|
|
|
|
|
cmd.append(str(output))
|
|
|
|
|
|
|
|
|
|
log(f"Merging {len(files)} video files...", file=sys.stderr)
|
|
|
|
|
result = _subprocess.run(cmd, capture_output=True, text=True)
|
|
|
|
|
|
|
|
|
|
# Clean up concat file
|
|
|
|
|
try:
|
|
|
|
|
concat_file.unlink()
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
if result.returncode != 0:
|
|
|
|
|
stderr = (result.stderr or '').strip()
|
|
|
|
|
log(f"FFmpeg error: {stderr}", file=sys.stderr)
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Video merge error: {e}", file=sys.stderr)
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _merge_text(files: List[Path], output: Path) -> bool:
|
|
|
|
|
"""Merge text files."""
|
|
|
|
|
try:
|
|
|
|
|
with open(output, 'w', encoding='utf-8') as outf:
|
|
|
|
|
for i, f in enumerate(files):
|
|
|
|
|
if i > 0:
|
|
|
|
|
outf.write('\n---\n') # Separator between files
|
|
|
|
|
try:
|
|
|
|
|
content = f.read_text(encoding='utf-8', errors='replace')
|
|
|
|
|
outf.write(content)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Warning reading {f.name}: {e}", file=sys.stderr)
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Text merge error: {e}", file=sys.stderr)
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _merge_pdf(files: List[Path], output: Path) -> bool:
|
|
|
|
|
"""Merge PDF files."""
|
2025-12-12 21:55:38 -08:00
|
|
|
if (not HAS_PYPDF2) or (PdfWriter is None) or (PdfReader is None):
|
2025-11-25 20:09:33 -08:00
|
|
|
log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
try:
|
2025-12-12 21:55:38 -08:00
|
|
|
writer = PdfWriter()
|
2025-11-25 20:09:33 -08:00
|
|
|
|
|
|
|
|
for f in files:
|
|
|
|
|
try:
|
|
|
|
|
reader = PdfReader(f)
|
|
|
|
|
for page in reader.pages:
|
|
|
|
|
writer.add_page(page)
|
|
|
|
|
log(f"Added {len(reader.pages)} pages from {f.name}", file=sys.stderr)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"Error reading PDF {f.name}: {e}", file=sys.stderr)
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
with open(output, 'wb') as outf:
|
|
|
|
|
writer.write(outf)
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
except Exception as e:
|
|
|
|
|
log(f"PDF merge error: {e}", file=sys.stderr)
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
CMDLET = Cmdlet(
|
|
|
|
|
name="merge-file",
|
|
|
|
|
summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
|
2025-12-12 21:55:38 -08:00
|
|
|
usage="merge-file [-delete] [-output <path>] [-format <auto|mka|m4a|m4b|mp3|aac|opus|mp4|mkv|pdf|txt>]",
|
2025-12-11 12:47:30 -08:00
|
|
|
arg=[
|
2025-11-25 20:09:33 -08:00
|
|
|
CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
|
|
|
|
|
CmdletArg("-output", description="Override output file path."),
|
2025-12-12 21:55:38 -08:00
|
|
|
CmdletArg("-format", description="Output format (auto/mka/m4a/m4b/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
|
2025-11-25 20:09:33 -08:00
|
|
|
],
|
2025-12-11 12:47:30 -08:00
|
|
|
detail=[
|
2025-11-25 20:09:33 -08:00
|
|
|
"- Pipe multiple files: search-file query | [1,2,3] | merge-file",
|
|
|
|
|
"- Audio files merge with minimal quality loss using specified codec.",
|
|
|
|
|
"- Video files merge into MP4 or MKV containers.",
|
|
|
|
|
"- PDF files merge into a single PDF document.",
|
|
|
|
|
"- Text/document files are concatenated.",
|
|
|
|
|
"- Output name derived from first file with ' (merged)' suffix.",
|
|
|
|
|
"- -delete flag removes all source files after successful merge.",
|
|
|
|
|
],
|
|
|
|
|
)
|
2025-12-12 21:55:38 -08:00
|
|
|
|
|
|
|
|
CMDLET.exec = _run
|
|
|
|
|
CMDLET.register()
|