AST

2025-11-25 20:09:33 -08:00
parent d75c644a82
commit bd69119996
80 changed files with 39615 additions and 0 deletions
--- a/cmdlets/merge_file.py
+++ b/cmdlets/merge_file.py
@@ -0,0 +1,916 @@
+"""Merge multiple files into a single output file."""
+from __future__ import annotations
+
+from typing import Any, Dict, Optional, Sequence, List
+from pathlib import Path
+import json
+import sys
+
+from helper.logger import log
+from helper.download import download_media
+from models import DownloadOptions
+from config import resolve_output_dir
+import subprocess as _subprocess
+import shutil as _shutil
+from ._shared import parse_cmdlet_args
+
+try:
+    from PyPDF2 import PdfWriter, PdfReader
+    HAS_PYPDF2 = True
+except ImportError:
+    HAS_PYPDF2 = False
+    PdfWriter = None
+    PdfReader = None
+
+try:
+    from metadata import (
+        read_tags_from_file,
+        write_tags_to_file,
+        dedup_tags_by_namespace,
+        merge_multiple_tag_lists,
+        write_tags,
+        write_metadata
+    )
+    HAS_METADATA_API = True
+except ImportError:
+    HAS_METADATA_API = False
+
+from . import register
+from ._shared import (
+    Cmdlet,
+    CmdletArg,
+    normalize_result_input,
+    get_pipe_object_path,
+    get_pipe_object_hash,
+)
+import models
+import pipeline as ctx
+
+
+def _get_item_value(item: Any, key: str, default: Any = None) -> Any:
+    """Helper to read either dict keys or attributes."""
+    if isinstance(item, dict):
+        return item.get(key, default)
+    return getattr(item, key, default)
+
+
+
+
+def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
+    """Merge multiple files into one."""
+    
+    # Parse help
+    try:
+        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
+            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
+            return 0
+    except Exception:
+        pass
+    
+    # Parse arguments
+    parsed = parse_cmdlet_args(args, CMDLET)
+    delete_after = parsed.get("delete", False)
+    
+    output_override: Optional[Path] = None
+    output_arg = parsed.get("output")
+    if output_arg:
+        try:
+            output_override = Path(str(output_arg)).expanduser()
+        except Exception:
+            output_override = None
+            
+    format_spec = parsed.get("format")
+    if format_spec:
+        format_spec = str(format_spec).lower().strip()
+    
+    # Collect files from piped results
+    # Use normalize_result_input to handle both single items and lists
+    files_to_merge: List[Dict[str, Any]] = normalize_result_input(result)
+    
+    if not files_to_merge:
+        log("No files provided to merge", file=sys.stderr)
+        return 1
+    
+    if len(files_to_merge) < 2:
+        # Only 1 file - pass it through unchanged
+        # (merge only happens when multiple files are collected)
+        item = files_to_merge[0]
+        ctx.emit(item)
+        return 0
+    
+    # Extract file paths and metadata from result objects
+    source_files: List[Path] = []
+    source_tags_files: List[Path] = []
+    source_hashes: List[str] = []
+    source_urls: List[str] = []
+    source_tags: List[str] = []  # NEW: collect tags from source files
+    source_relationships: List[str] = []  # NEW: collect relationships from source files
+    
+    for item in files_to_merge:
+        raw_path = get_pipe_object_path(item)
+        target_path = None
+        if isinstance(raw_path, Path):
+            target_path = raw_path
+        elif isinstance(raw_path, str) and raw_path.strip():
+            candidate = Path(raw_path).expanduser()
+            if candidate.exists():
+                target_path = candidate
+        
+        # Check for playlist item that needs downloading
+        if not target_path and isinstance(item, dict) and item.get('__action', '').startswith('playlist-item:'):
+            try:
+                playlist_url = item.get('__file_path')
+                item_idx = int(item['__action'].split(':')[1])
+                log(f"Downloading playlist item #{item_idx} from {playlist_url}...", flush=True)
+                
+                output_dir = resolve_output_dir(config)
+                opts = DownloadOptions(
+                    url=playlist_url,
+                    output_dir=output_dir,
+                    playlist_items=str(item_idx),
+                    mode="audio" if format_spec == "m4b" else "auto" # Infer mode if possible
+                )
+                
+                res = download_media(opts)
+                if res and res.path and res.path.exists():
+                    target_path = res.path
+                    log(f"✓ Downloaded: {target_path.name}", flush=True)
+            except Exception as e:
+                log(f"Failed to download playlist item: {e}", file=sys.stderr)
+
+        if target_path and target_path.exists():
+            source_files.append(target_path)
+			
+            # Track the .tags file for this source
+            tags_file = target_path.with_suffix(target_path.suffix + '.tags')
+            if tags_file.exists():
+                source_tags_files.append(tags_file)
+				
+                # Try to read hash, tags, urls, and relationships from .tags sidecar file
+                try:
+                    tags_content = tags_file.read_text(encoding='utf-8')
+                    for line in tags_content.split('\n'):
+                        line = line.strip()
+                        if not line:
+                            continue
+                        if line.startswith('hash:'):
+                            hash_value = line[5:].strip()
+                            if hash_value:
+                                source_hashes.append(hash_value)
+                        elif line.startswith('known_url:') or line.startswith('url:'):
+                            # Extract URLs from tags file
+                            url_value = line.split(':', 1)[1].strip() if ':' in line else ''
+                            if url_value and url_value not in source_urls:
+                                source_urls.append(url_value)
+                        elif line.startswith('relationship:'):
+                            # Extract relationships from tags file
+                            rel_value = line.split(':', 1)[1].strip() if ':' in line else ''
+                            if rel_value and rel_value not in source_relationships:
+                                source_relationships.append(rel_value)
+                        else:
+                            # Collect actual tags (not metadata like hash: or known_url:)
+                            source_tags.append(line)
+                except Exception:
+                    pass
+			
+            # Extract hash if available in item (as fallback)
+            hash_value = get_pipe_object_hash(item)
+            if hash_value and hash_value not in source_hashes:
+                source_hashes.append(str(hash_value))
+			
+            # Extract known URLs if available
+            known_urls = _get_item_value(item, 'known_urls', [])
+            if isinstance(known_urls, str):
+                source_urls.append(known_urls)
+            elif isinstance(known_urls, list):
+                source_urls.extend(known_urls)
+        else:
+            title = _get_item_value(item, 'title', 'unknown') or _get_item_value(item, 'id', 'unknown')
+            log(f"Warning: Could not locate file for item: {title}", file=sys.stderr)
+    
+    if len(source_files) < 2:
+        log("At least 2 valid files required to merge", file=sys.stderr)
+        return 1
+    
+    # Detect file types
+    file_types = set()
+    for f in source_files:
+        suffix = f.suffix.lower()
+        if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}:
+            file_types.add('audio')
+        elif suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}:
+            file_types.add('video')
+        elif suffix in {'.pdf'}:
+            file_types.add('pdf')
+        elif suffix in {'.txt', '.srt', '.vtt', '.md', '.log'}:
+            file_types.add('text')
+        else:
+            file_types.add('other')
+    
+    if len(file_types) > 1 and 'other' not in file_types:
+        log(f"Mixed file types detected: {', '.join(sorted(file_types))}", file=sys.stderr)
+        log(f"Can only merge files of the same type", file=sys.stderr)
+        return 1
+    
+    file_kind = list(file_types)[0] if file_types else 'other'
+    
+    # Determine output format
+    output_format = format_spec or 'auto'
+    if output_format == 'auto':
+        if file_kind == 'audio':
+            output_format = 'mka'  # Default audio codec - mka supports chapters and stream copy
+        elif file_kind == 'video':
+            output_format = 'mp4'  # Default video codec
+        elif file_kind == 'pdf':
+            output_format = 'pdf'
+        else:
+            output_format = 'txt'
+    
+    # Determine output path
+    if output_override:
+        if output_override.is_dir():
+            base_name = _sanitize_name(getattr(files_to_merge[0], 'title', 'merged'))
+            output_path = output_override / f"{base_name} (merged).{_ext_for_format(output_format)}"
+        else:
+            output_path = output_override
+    else:
+        first_file = source_files[0]
+        output_path = first_file.parent / f"{first_file.stem} (merged).{_ext_for_format(output_format)}"
+    
+    # Ensure output directory exists
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    
+    # Perform merge based on file type
+    if file_kind == 'audio':
+        success = _merge_audio(source_files, output_path, output_format)
+    elif file_kind == 'video':
+        success = _merge_video(source_files, output_path, output_format)
+    elif file_kind == 'pdf':
+        success = _merge_pdf(source_files, output_path)
+    elif file_kind == 'text':
+        success = _merge_text(source_files, output_path)
+    else:
+        log(f"Unsupported file type: {file_kind}", file=sys.stderr)
+        return 1
+    
+    if not success:
+        log("Merge failed", file=sys.stderr)
+        return 1
+    
+    log(f"Merged {len(source_files)} files into: {output_path}", file=sys.stderr)
+    
+    # Create .tags sidecar file for the merged output using unified API
+    tags_path = output_path.with_suffix(output_path.suffix + '.tags')
+    try:
+        # Start with title tag
+        merged_tags = [f"title:{output_path.stem}"]
+        
+        # Merge tags from source files using metadata API
+        if source_tags and HAS_METADATA_API:
+            # Use dedup function to normalize and deduplicate
+            merged_source_tags = dedup_tags_by_namespace(source_tags)
+            merged_tags.extend(merged_source_tags)
+            log(f"Merged {len(merged_source_tags)} unique tags from source files", file=sys.stderr)
+        elif source_tags:
+            # Fallback: simple deduplication if metadata API unavailable
+            merged_tags.extend(list(dict.fromkeys(source_tags)))  # Preserve order, remove duplicates
+        
+        # Write merged tags to sidecar file
+        if HAS_METADATA_API and write_tags_to_file:
+            # Use unified API for file writing
+            source_hashes_list = source_hashes if source_hashes else None
+            source_urls_list = source_urls if source_urls else None
+            write_tags_to_file(tags_path, merged_tags, source_hashes_list, source_urls_list)
+        else:
+            # Fallback: manual file writing
+            tags_lines = []
+            
+            # Add hash first (if available)
+            if source_hashes:
+                tags_lines.append(f"hash:{source_hashes[0]}")
+            
+            # Add regular tags
+            tags_lines.extend(merged_tags)
+            
+            # Add known URLs
+            if source_urls:
+                for url in source_urls:
+                    tags_lines.append(f"known_url:{url}")
+            
+            # Add relationships (if available)
+            if source_relationships:
+                for rel in source_relationships:
+                    tags_lines.append(f"relationship:{rel}")
+            
+            with open(tags_path, 'w', encoding='utf-8') as f:
+                f.write('\n'.join(tags_lines) + '\n')
+        
+        log(f"Created sidecar: {tags_path.name}", file=sys.stderr)
+        
+        # Also create .metadata file using centralized function
+        try:
+            write_metadata(output_path, source_hashes[0] if source_hashes else None, source_urls, source_relationships)
+            log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr)
+        except Exception as e:
+            log(f"Warning: Could not create metadata file: {e}", file=sys.stderr)
+    
+    except Exception as e:
+        log(f"Warning: Could not create sidecar: {e}", file=sys.stderr)
+    
+    # Emit PipelineItem so the merged file can be piped to next command
+    try:
+        # Try to import PipelineItem from downlow module
+        try:
+            from downlow import PipelineItem
+        except ImportError:
+            # Fallback: create a simple object with the required attributes
+            class SimpleItem:
+                def __init__(self, target, title, media_kind, tags=None, known_urls=None):
+                    self.target = target
+                    self.title = title
+                    self.media_kind = media_kind
+                    self.tags = tags or []
+                    self.known_urls = known_urls or []
+            PipelineItem = SimpleItem
+        
+        merged_item = PipelineItem(
+            target=str(output_path),
+            title=output_path.stem,
+            media_kind=file_kind,
+            tags=merged_tags,  # Include merged tags
+            known_urls=source_urls  # Include known URLs
+        )
+        ctx.emit(merged_item)
+    except Exception as e:
+        log(f"Warning: Could not emit pipeline item: {e}", file=sys.stderr)
+        # Still emit a string representation for feedback
+        ctx.emit(f"Merged: {output_path}")
+    
+    # Delete source files if requested
+    if delete_after:
+        # First delete all .tags files
+        for tags_file in source_tags_files:
+            try:
+                tags_file.unlink()
+                log(f"Deleted: {tags_file.name}", file=sys.stderr)
+            except Exception as e:
+                log(f"Warning: Could not delete {tags_file.name}: {e}", file=sys.stderr)
+        
+        # Then delete all source files
+        for f in source_files:
+            try:
+                f.unlink()
+                log(f"Deleted: {f.name}", file=sys.stderr)
+            except Exception as e:
+                log(f"Warning: Could not delete {f.name}: {e}", file=sys.stderr)
+    
+    return 0
+
+
+def _sanitize_name(text: str) -> str:
+    """Sanitize filename."""
+    allowed = []
+    for ch in text:
+        allowed.append(ch if (ch.isalnum() or ch in {"-", "_", " ", "."}) else " ")
+    return (" ".join("".join(allowed).split()) or "merged").strip()
+
+
+def _ext_for_format(fmt: str) -> str:
+    """Get file extension for format."""
+    format_map = {
+        'mp3': 'mp3',
+        'm4a': 'm4a',
+        'aac': 'aac',
+        'opus': 'opus',
+        'mka': 'mka',  # Matroska Audio - EXCELLENT chapter support (recommended)
+        'mkv': 'mkv',
+        'mp4': 'mp4',
+        'webm': 'webm',
+        'pdf': 'pdf',
+        'txt': 'txt',
+        'auto': 'mka',  # Default - MKA for chapters
+    }
+    return format_map.get(fmt.lower(), 'mka')
+
+
+def _add_chapters_to_m4a(file_path: Path, chapters: List[Dict]) -> bool:
+    """Add chapters to an M4A file using mutagen.
+    
+    Args:
+        file_path: Path to M4A file
+        chapters: List of chapter dicts with 'title', 'start_ms', 'end_ms'
+    
+    Returns:
+        True if successful, False otherwise
+    """
+    import logging
+    logger = logging.getLogger(__name__)
+    
+    if not chapters:
+        return True
+    
+    try:
+        from mutagen.mp4 import MP4, Atom
+        from mutagen.mp4._util import Atom as MP4Atom
+    except ImportError:
+        logger.warning("[merge-file] mutagen not available for chapter writing")
+        return False
+    
+    try:
+        # Load the MP4 file
+        audio = MP4(str(file_path))
+        
+        # Build the chapter atom
+        # MP4 chapters are stored in a 'chap' atom with specific structure
+        chapter_data = b''
+        
+        for i, chapter in enumerate(chapters, 1):
+            # Each chapter entry: 10-byte header + title
+            title = chapter.get('title', f'Chapter {i}').encode('utf-8')
+            start_time_ms = int(chapter.get('start_ms', 0))
+            
+            # Chapter atom format for M4A:
+            # (uint32: size)(uint32: 'chap')(uint8: reserved)(uint24: atom type) + more...
+            # This is complex, so we'll use a simpler atom approach
+            pass
+        
+        # Unfortunately, mutagen doesn't have built-in chapter writing for MP4
+        # Chapter writing requires low-level atom manipulation
+        # For now, we'll just return and note this limitation
+        logger.info("[merge-file] MP4 chapter writing via mutagen not fully supported")
+        return False
+        
+    except Exception as e:
+        logger.warning(f"[merge-file] Error writing chapters: {e}")
+        return False
+
+
+def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
+    """Merge audio files with chapters based on file boundaries."""
+    import logging
+    logger = logging.getLogger(__name__)
+    
+    ffmpeg_path = _shutil.which('ffmpeg')
+    if not ffmpeg_path:
+        log("ffmpeg not found in PATH", file=sys.stderr)
+        return False
+    
+    try:
+        # Step 1: Get duration of each file to calculate chapter timestamps
+        chapters = []
+        current_time_ms = 0
+        
+        log(f"Analyzing {len(files)} files for chapter information...", file=sys.stderr)
+        logger.info(f"[merge-file] Analyzing files for chapters")
+        
+        for file_path in files:
+            # Get duration using ffprobe
+            try:
+                ffprobe_cmd = [
+                    'ffprobe', '-v', 'error', '-show_entries',
+                    'format=duration', '-print_format',
+                    'default=noprint_wrappers=1:nokey=1', str(file_path)
+                ]
+                
+                probe_result = _subprocess.run(ffprobe_cmd, capture_output=True, text=True, timeout=10)
+                if probe_result.returncode == 0 and probe_result.stdout.strip():
+                    try:
+                        duration_sec = float(probe_result.stdout.strip())
+                    except ValueError:
+                        logger.warning(f"[merge-file] Could not parse duration from ffprobe output: {probe_result.stdout}")
+                        duration_sec = 0
+                else:
+                    logger.warning(f"[merge-file] ffprobe failed for {file_path.name}: {probe_result.stderr}")
+                    duration_sec = 0
+            except Exception as e:
+                logger.warning(f"[merge-file] Could not get duration for {file_path.name}: {e}")
+                duration_sec = 0
+            
+            # Create chapter entry - use title: tag from metadata if available
+            title = file_path.stem  # Default to filename without extension
+            if HAS_METADATA_API:
+                try:
+                    # Try to read tags from .tags sidecar file
+                    tags_file = file_path.with_suffix(file_path.suffix + '.tags')
+                    if tags_file.exists():
+                        tags = read_tags_from_file(tags_file)
+                        if tags:
+                            # Look for title: tag
+                            for tag in tags:
+                                if isinstance(tag, str) and tag.lower().startswith('title:'):
+                                    # Extract the title value after the colon
+                                    title = tag.split(':', 1)[1].strip()
+                                    break
+                except Exception as e:
+                    logger.debug(f"[merge-file] Could not read metadata for {file_path.name}: {e}")
+                    pass  # Fall back to filename
+            
+            # Convert seconds to HH:MM:SS.mmm format
+            hours = int(current_time_ms // 3600000)
+            minutes = int((current_time_ms % 3600000) // 60000)
+            seconds = int((current_time_ms % 60000) // 1000)
+            millis = int(current_time_ms % 1000)
+            
+            chapters.append({
+                'time_ms': current_time_ms,
+                'time_str': f"{hours:02d}:{minutes:02d}:{seconds:02d}.{millis:03d}",
+                'title': title,
+                'duration_sec': duration_sec
+            })
+            
+            logger.info(f"[merge-file] Chapter: {title} @ {chapters[-1]['time_str']} (duration: {duration_sec:.2f}s)")
+            current_time_ms += int(duration_sec * 1000)
+        
+        # Step 2: Create concat demuxer file
+        concat_file = output.parent / f".concat_{output.stem}.txt"
+        concat_lines = []
+        for f in files:
+            # Escape quotes in path
+            safe_path = str(f).replace("'", "'\\''")
+            concat_lines.append(f"file '{safe_path}'")
+        
+        concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
+        
+        # Step 3: Create FFmpeg metadata file with chapters
+        metadata_file = output.parent / f".metadata_{output.stem}.txt"
+        metadata_lines = [';FFMETADATA1']
+        
+        for i, chapter in enumerate(chapters):
+            # FFMetadata format for chapters (note: [CHAPTER] not [CHAPTER01])
+            metadata_lines.append('[CHAPTER]')
+            metadata_lines.append('TIMEBASE=1/1000')
+            metadata_lines.append(f'START={chapter["time_ms"]}')
+            # Calculate end time (start of next chapter or end of file)
+            if i < len(chapters) - 1:
+                metadata_lines.append(f'END={chapters[i+1]["time_ms"]}')
+            else:
+                metadata_lines.append(f'END={current_time_ms}')
+            metadata_lines.append(f'title={chapter["title"]}')
+        
+        metadata_file.write_text('\n'.join(metadata_lines), encoding='utf-8')
+        log(f"Created chapters metadata file with {len(chapters)} chapters", file=sys.stderr)
+        logger.info(f"[merge-file] Created {len(chapters)} chapters")
+        
+        # Step 4: Build FFmpeg command to merge and embed chapters
+        # Strategy: First merge audio, then add metadata in separate pass
+        cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
+        
+        # Add threading options for speed
+        cmd.extend(['-threads', '0'])  # Use all available threads
+        
+        # Audio codec selection for first input
+        if output_format == 'mp3':
+            cmd.extend(['-c:a', 'libmp3lame', '-q:a', '2'])
+        elif output_format == 'm4a':
+            # Use copy if possible (much faster), otherwise re-encode
+            # Check if inputs are already AAC/M4A to avoid re-encoding
+            # For now, default to copy if format matches, otherwise re-encode
+            # But since we are merging potentially different codecs, re-encoding is safer
+            # To speed up re-encoding, we can use a faster preset or hardware accel if available
+            cmd.extend(['-c:a', 'aac', '-b:a', '256k'])  # M4A with better quality
+        elif output_format == 'aac':
+            cmd.extend(['-c:a', 'aac', '-b:a', '192k'])
+        elif output_format == 'opus':
+            cmd.extend(['-c:a', 'libopus', '-b:a', '128k'])
+        elif output_format == 'mka':
+            # FLAC is fast to encode but large. Copy is fastest if inputs are compatible.
+            # If we want speed, copy is best. If we want compatibility, re-encode.
+            # Let's try copy first if inputs are same format, but that's hard to detect here.
+            # Defaulting to copy for MKA as it's a container that supports many codecs
+            cmd.extend(['-c:a', 'copy']) 
+        else:
+            cmd.extend(['-c:a', 'copy'])  # Copy without re-encoding
+        
+        # Add the output file
+        cmd.append(str(output))
+        
+        log(f"Merging {len(files)} audio files to {output_format}...", file=sys.stderr)
+        logger.info(f"[merge-file] Running ffmpeg merge: {' '.join(cmd)}")
+        
+        # Run ffmpeg with progress monitoring
+        try:
+            from helper.progress import print_progress, print_final_progress
+            import re
+            
+            process = _subprocess.Popen(
+                cmd,
+                stdout=_subprocess.PIPE,
+                stderr=_subprocess.PIPE,
+                text=True,
+                encoding='utf-8',
+                errors='replace'
+            )
+            
+            # Monitor progress
+            duration_re = re.compile(r"time=(\d{2}):(\d{2}):(\d{2})\.(\d{2})")
+            total_duration_sec = current_time_ms / 1000.0
+            
+            while True:
+                # Read stderr line by line (ffmpeg writes progress to stderr)
+                if process.stderr:
+                    line = process.stderr.readline()
+                    if not line and process.poll() is not None:
+                        break
+                    
+                    if line:
+                        # Parse time=HH:MM:SS.mm
+                        match = duration_re.search(line)
+                        if match and total_duration_sec > 0:
+                            h, m, s, cs = map(int, match.groups())
+                            current_sec = h * 3600 + m * 60 + s + cs / 100.0
+                            
+                            # Calculate speed/bitrate if available (optional)
+                            # For now just show percentage
+                            print_progress(
+                                output.name,
+                                int(current_sec * 1000), # Use ms as "bytes" for progress bar
+                                int(total_duration_sec * 1000),
+                                speed=0
+                            )
+                else:
+                    break
+            
+            # Wait for completion
+            stdout, stderr = process.communicate()
+            
+            if process.returncode != 0:
+                log(f"FFmpeg error: {stderr}", file=sys.stderr)
+                raise _subprocess.CalledProcessError(process.returncode, cmd, output=stdout, stderr=stderr)
+                
+            print_final_progress(output.name, int(total_duration_sec * 1000), 0)
+            
+        except Exception as e:
+            logger.exception(f"[merge-file] ffmpeg process error: {e}")
+            raise
+        
+        log(f"Merge successful, adding chapters metadata...", file=sys.stderr)
+        
+        # Step 5: Embed chapters into container (MKA, MP4/M4A, or note limitation)
+        if output_format == 'mka' or output.suffix.lower() == '.mka':
+            # MKA/MKV format has native chapter support via FFMetadata
+            # Re-mux the file with chapters embedded (copy streams, no re-encode)
+            log(f"Embedding chapters into Matroska container...", file=sys.stderr)
+            logger.info(f"[merge-file] Adding chapters to MKA file via FFMetadata")
+            
+            temp_output = output.parent / f".temp_{output.stem}.mka"
+            
+            # Use mkvmerge if available (best for MKA chapters), otherwise fall back to ffmpeg
+            mkvmerge_path = _shutil.which('mkvmerge')
+            
+            if mkvmerge_path:
+                # mkvmerge is the best tool for embedding chapters in Matroska files
+                log(f"Using mkvmerge for optimal chapter embedding...", file=sys.stderr)
+                cmd2 = [
+                    mkvmerge_path, '-o', str(temp_output),
+                    '--chapters', str(metadata_file),
+                    str(output)
+                ]
+            else:
+                # Fallback to ffmpeg with proper chapter embedding for Matroska
+                log(f"Using ffmpeg for chapter embedding (install mkvtoolnix for better quality)...", file=sys.stderr)
+                # For Matroska files, the metadata must be provided via -f ffmetadata input
+                cmd2 = [
+                    ffmpeg_path, '-y',
+                    '-i', str(output),                       # Input: merged audio
+                    '-i', str(metadata_file),                # Input: FFMetadata file
+                    '-c:a', 'copy',                          # Copy audio without re-encoding
+                    '-threads', '0',                         # Use all threads
+                    '-map', '0',                             # Map all from first input
+                    '-map_chapters', '1',                    # Map CHAPTERS from second input (FFMetadata)
+                    str(temp_output)                         # Output
+                ]
+            
+            logger.info(f"[merge-file] Running chapter embedding: {' '.join(cmd2)}")
+            
+            try:
+                # Run chapter embedding silently (progress handled by worker thread)
+                _subprocess.run(
+                    cmd2,
+                    capture_output=True,
+                    text=True,
+                    stdin=_subprocess.DEVNULL,
+                    timeout=600,
+                    check=False
+                )
+                
+                # Replace original with temp if successful
+                if temp_output.exists() and temp_output.stat().st_size > 0:
+                    try:
+                        import shutil
+                        if output.exists():
+                            output.unlink()
+                        shutil.move(str(temp_output), str(output))
+                        log(f"✓ Chapters successfully embedded!", file=sys.stderr)
+                        logger.info(f"[merge-file] Chapters embedded successfully")
+                    except Exception as e:
+                        logger.warning(f"[merge-file] Could not replace file: {e}")
+                        log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
+                        try:
+                            temp_output.unlink()
+                        except Exception:
+                            pass
+                else:
+                    logger.warning(f"[merge-file] Chapter embedding did not create output")
+            except Exception as e:
+                logger.exception(f"[merge-file] Chapter embedding failed: {e}")
+                log(f"Warning: Chapter embedding failed, using merge without chapters", file=sys.stderr)
+        elif output_format == 'm4a' or output.suffix.lower() in ['.m4a', '.mp4']:
+            # MP4/M4A format has native chapter support via iTunes metadata atoms
+            log(f"Embedding chapters into MP4 container...", file=sys.stderr)
+            logger.info(f"[merge-file] Adding chapters to M4A/MP4 file via iTunes metadata")
+            
+            temp_output = output.parent / f".temp_{output.stem}{output.suffix}"
+            
+            # ffmpeg embeds chapters in MP4 using -map_metadata and -map_chapters
+            log(f"Using ffmpeg for MP4 chapter embedding...", file=sys.stderr)
+            cmd2 = [
+                ffmpeg_path, '-y',
+                '-i', str(output),                       # Input: merged audio
+                '-i', str(metadata_file),                # Input: FFMetadata file
+                '-c:a', 'copy',                          # Copy audio without re-encoding
+                '-threads', '0',                         # Use all threads
+                '-map', '0',                             # Map all from first input
+                '-map_metadata', '1',                    # Map metadata from second input (FFMetadata)
+                '-map_chapters', '1',                    # Map CHAPTERS from second input (FFMetadata)
+                str(temp_output)                         # Output
+            ]
+            
+            logger.info(f"[merge-file] Running MP4 chapter embedding: {' '.join(cmd2)}")
+            
+            try:
+                # Run MP4 chapter embedding silently (progress handled by worker thread)
+                _subprocess.run(
+                    cmd2,
+                    capture_output=True,
+                    text=True,
+                    stdin=_subprocess.DEVNULL,
+                    timeout=600,
+                    check=False
+                )
+                
+                # Replace original with temp if successful
+                if temp_output.exists() and temp_output.stat().st_size > 0:
+                    try:
+                        import shutil
+                        if output.exists():
+                            output.unlink()
+                        shutil.move(str(temp_output), str(output))
+                        log(f"✓ Chapters successfully embedded in MP4!", file=sys.stderr)
+                        logger.info(f"[merge-file] MP4 chapters embedded successfully")
+                    except Exception as e:
+                        logger.warning(f"[merge-file] Could not replace file: {e}")
+                        log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr)
+                        try:
+                            temp_output.unlink()
+                        except Exception:
+                            pass
+                else:
+                    logger.warning(f"[merge-file] MP4 chapter embedding did not create output")
+            except Exception as e:
+                logger.exception(f"[merge-file] MP4 chapter embedding failed: {e}")
+                log(f"Warning: MP4 chapter embedding failed, using merge without chapters", file=sys.stderr)
+        else:
+            # For other formats, chapters would require external tools
+            logger.info(f"[merge-file] Format {output_format} does not have native chapter support")
+            log(f"Note: For chapter support, use MKA or M4A format", file=sys.stderr)
+        
+        # Clean up temp files
+        try:
+            concat_file.unlink()
+        except Exception:
+            pass
+        try:
+            metadata_file.unlink()
+        except Exception:
+            pass
+        
+        return True
+    
+    except Exception as e:
+        log(f"Audio merge error: {e}", file=sys.stderr)
+        logger.error(f"[merge-file] Audio merge error: {e}", exc_info=True)
+        return False
+
+
+def _merge_video(files: List[Path], output: Path, output_format: str) -> bool:
+    """Merge video files."""
+    ffmpeg_path = _shutil.which('ffmpeg')
+    if not ffmpeg_path:
+        log("ffmpeg not found in PATH", file=sys.stderr)
+        return False
+    
+    try:
+        # Create concat demuxer file
+        concat_file = output.parent / f".concat_{output.stem}.txt"
+        concat_lines = []
+        for f in files:
+            safe_path = str(f).replace("'", "'\\''")
+            concat_lines.append(f"file '{safe_path}'")
+        
+        concat_file.write_text('\n'.join(concat_lines), encoding='utf-8')
+        
+        # Build FFmpeg command for video merge
+        cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)]
+        
+        # Video codec selection
+        if output_format == 'mp4':
+            cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-tag:v', 'hvc1', '-c:a', 'aac', '-b:a', '192k'])
+        elif output_format == 'mkv':
+            cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-c:a', 'aac', '-b:a', '192k'])
+        else:
+            cmd.extend(['-c', 'copy'])  # Copy without re-encoding
+        
+        cmd.append(str(output))
+        
+        log(f"Merging {len(files)} video files...", file=sys.stderr)
+        result = _subprocess.run(cmd, capture_output=True, text=True)
+        
+        # Clean up concat file
+        try:
+            concat_file.unlink()
+        except Exception:
+            pass
+        
+        if result.returncode != 0:
+            stderr = (result.stderr or '').strip()
+            log(f"FFmpeg error: {stderr}", file=sys.stderr)
+            return False
+        
+        return True
+    
+    except Exception as e:
+        log(f"Video merge error: {e}", file=sys.stderr)
+        return False
+
+
+def _merge_text(files: List[Path], output: Path) -> bool:
+    """Merge text files."""
+    try:
+        with open(output, 'w', encoding='utf-8') as outf:
+            for i, f in enumerate(files):
+                if i > 0:
+                    outf.write('\n---\n')  # Separator between files
+                try:
+                    content = f.read_text(encoding='utf-8', errors='replace')
+                    outf.write(content)
+                except Exception as e:
+                    log(f"Warning reading {f.name}: {e}", file=sys.stderr)
+        
+        return True
+    
+    except Exception as e:
+        log(f"Text merge error: {e}", file=sys.stderr)
+        return False
+
+
+def _merge_pdf(files: List[Path], output: Path) -> bool:
+    """Merge PDF files."""
+    if not HAS_PYPDF2:
+        log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
+        return False
+    
+    try:
+        if HAS_PYPDF2:
+            writer = PdfWriter()
+        else:
+            log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
+            return False
+        
+        for f in files:
+            try:
+                reader = PdfReader(f)
+                for page in reader.pages:
+                    writer.add_page(page)
+                log(f"Added {len(reader.pages)} pages from {f.name}", file=sys.stderr)
+            except Exception as e:
+                log(f"Error reading PDF {f.name}: {e}", file=sys.stderr)
+                return False
+        
+        with open(output, 'wb') as outf:
+            writer.write(outf)
+        
+        return True
+    
+    except Exception as e:
+        log(f"PDF merge error: {e}", file=sys.stderr)
+        return False
+
+CMDLET = Cmdlet(
+    name="merge-file",
+    summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
+    usage="merge-file [-delete] [-output <path>] [-format <auto|mp3|aac|opus|mp4|mkv|pdf|txt>]",
+    args=[
+        CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
+        CmdletArg("-output", description="Override output file path."),
+        CmdletArg("-format", description="Output format (auto/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
+    ],
+    details=[
+        "- Pipe multiple files: search-file query | [1,2,3] | merge-file",
+        "- Audio files merge with minimal quality loss using specified codec.",
+        "- Video files merge into MP4 or MKV containers.",
+        "- PDF files merge into a single PDF document.",
+        "- Text/document files are concatenated.",
+        "- Output name derived from first file with ' (merged)' suffix.",
+        "- -delete flag removes all source files after successful merge.",
+    ],
+)