"""Merge multiple files into a single output file.""" from __future__ import annotations from typing import Any, Dict, Optional, Sequence, List from pathlib import Path import json import sys from helper.logger import log from helper.download import download_media from models import DownloadOptions from config import resolve_output_dir import subprocess as _subprocess import shutil as _shutil from ._shared import parse_cmdlet_args try: from PyPDF2 import PdfWriter, PdfReader HAS_PYPDF2 = True except ImportError: HAS_PYPDF2 = False PdfWriter = None PdfReader = None try: from metadata import ( read_tags_from_file, write_tags_to_file, dedup_tags_by_namespace, merge_multiple_tag_lists, write_tags, write_metadata ) HAS_METADATA_API = True except ImportError: HAS_METADATA_API = False from . import register from ._shared import ( Cmdlet, CmdletArg, normalize_result_input, get_pipe_object_path, get_pipe_object_hash, ) import models import pipeline as ctx def _get_item_value(item: Any, key: str, default: Any = None) -> Any: """Helper to read either dict keys or attributes.""" if isinstance(item, dict): return item.get(key, default) return getattr(item, key, default) def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Merge multiple files into one.""" # Parse help try: if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args): log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) return 0 except Exception: pass # Parse arguments parsed = parse_cmdlet_args(args, CMDLET) delete_after = parsed.get("delete", False) output_override: Optional[Path] = None output_arg = parsed.get("output") if output_arg: try: output_override = Path(str(output_arg)).expanduser() except Exception: output_override = None format_spec = parsed.get("format") if format_spec: format_spec = str(format_spec).lower().strip() # Collect files from piped results # Use normalize_result_input to handle both single items and lists files_to_merge: List[Dict[str, Any]] = normalize_result_input(result) if not files_to_merge: log("No files provided to merge", file=sys.stderr) return 1 if len(files_to_merge) < 2: # Only 1 file - pass it through unchanged # (merge only happens when multiple files are collected) item = files_to_merge[0] ctx.emit(item) return 0 # Extract file paths and metadata from result objects source_files: List[Path] = [] source_tags_files: List[Path] = [] source_hashes: List[str] = [] source_urls: List[str] = [] source_tags: List[str] = [] # NEW: collect tags from source files source_relationships: List[str] = [] # NEW: collect relationships from source files for item in files_to_merge: raw_path = get_pipe_object_path(item) target_path = None if isinstance(raw_path, Path): target_path = raw_path elif isinstance(raw_path, str) and raw_path.strip(): candidate = Path(raw_path).expanduser() if candidate.exists(): target_path = candidate # Check for playlist item that needs downloading if not target_path and isinstance(item, dict) and item.get('__action', '').startswith('playlist-item:'): try: playlist_url = item.get('__file_path') item_idx = int(item['__action'].split(':')[1]) log(f"Downloading playlist item #{item_idx} from {playlist_url}...", flush=True) output_dir = resolve_output_dir(config) opts = DownloadOptions( url=playlist_url, output_dir=output_dir, playlist_items=str(item_idx), mode="audio" if format_spec == "m4b" else "auto" # Infer mode if possible ) res = download_media(opts) if res and res.path and res.path.exists(): target_path = res.path log(f"✓ Downloaded: {target_path.name}", flush=True) except Exception as e: log(f"Failed to download playlist item: {e}", file=sys.stderr) if target_path and target_path.exists(): source_files.append(target_path) # Track the .tags file for this source tags_file = target_path.with_suffix(target_path.suffix + '.tags') if tags_file.exists(): source_tags_files.append(tags_file) # Try to read hash, tags, urls, and relationships from .tags sidecar file try: tags_content = tags_file.read_text(encoding='utf-8') for line in tags_content.split('\n'): line = line.strip() if not line: continue if line.startswith('hash:'): hash_value = line[5:].strip() if hash_value: source_hashes.append(hash_value) elif line.startswith('known_url:') or line.startswith('url:'): # Extract URLs from tags file url_value = line.split(':', 1)[1].strip() if ':' in line else '' if url_value and url_value not in source_urls: source_urls.append(url_value) elif line.startswith('relationship:'): # Extract relationships from tags file rel_value = line.split(':', 1)[1].strip() if ':' in line else '' if rel_value and rel_value not in source_relationships: source_relationships.append(rel_value) else: # Collect actual tags (not metadata like hash: or known_url:) source_tags.append(line) except Exception: pass # Extract hash if available in item (as fallback) hash_value = get_pipe_object_hash(item) if hash_value and hash_value not in source_hashes: source_hashes.append(str(hash_value)) # Extract known URLs if available known_urls = _get_item_value(item, 'known_urls', []) if isinstance(known_urls, str): source_urls.append(known_urls) elif isinstance(known_urls, list): source_urls.extend(known_urls) else: title = _get_item_value(item, 'title', 'unknown') or _get_item_value(item, 'id', 'unknown') log(f"Warning: Could not locate file for item: {title}", file=sys.stderr) if len(source_files) < 2: log("At least 2 valid files required to merge", file=sys.stderr) return 1 # Detect file types file_types = set() for f in source_files: suffix = f.suffix.lower() if suffix in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}: file_types.add('audio') elif suffix in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}: file_types.add('video') elif suffix in {'.pdf'}: file_types.add('pdf') elif suffix in {'.txt', '.srt', '.vtt', '.md', '.log'}: file_types.add('text') else: file_types.add('other') if len(file_types) > 1 and 'other' not in file_types: log(f"Mixed file types detected: {', '.join(sorted(file_types))}", file=sys.stderr) log(f"Can only merge files of the same type", file=sys.stderr) return 1 file_kind = list(file_types)[0] if file_types else 'other' # Determine output format output_format = format_spec or 'auto' if output_format == 'auto': if file_kind == 'audio': output_format = 'mka' # Default audio codec - mka supports chapters and stream copy elif file_kind == 'video': output_format = 'mp4' # Default video codec elif file_kind == 'pdf': output_format = 'pdf' else: output_format = 'txt' # Determine output path if output_override: if output_override.is_dir(): base_name = _sanitize_name(getattr(files_to_merge[0], 'title', 'merged')) output_path = output_override / f"{base_name} (merged).{_ext_for_format(output_format)}" else: output_path = output_override else: first_file = source_files[0] output_path = first_file.parent / f"{first_file.stem} (merged).{_ext_for_format(output_format)}" # Ensure output directory exists output_path.parent.mkdir(parents=True, exist_ok=True) # Perform merge based on file type if file_kind == 'audio': success = _merge_audio(source_files, output_path, output_format) elif file_kind == 'video': success = _merge_video(source_files, output_path, output_format) elif file_kind == 'pdf': success = _merge_pdf(source_files, output_path) elif file_kind == 'text': success = _merge_text(source_files, output_path) else: log(f"Unsupported file type: {file_kind}", file=sys.stderr) return 1 if not success: log("Merge failed", file=sys.stderr) return 1 log(f"Merged {len(source_files)} files into: {output_path}", file=sys.stderr) # Create .tags sidecar file for the merged output using unified API tags_path = output_path.with_suffix(output_path.suffix + '.tags') try: # Start with title tag merged_tags = [f"title:{output_path.stem}"] # Merge tags from source files using metadata API if source_tags and HAS_METADATA_API: # Use dedup function to normalize and deduplicate merged_source_tags = dedup_tags_by_namespace(source_tags) merged_tags.extend(merged_source_tags) log(f"Merged {len(merged_source_tags)} unique tags from source files", file=sys.stderr) elif source_tags: # Fallback: simple deduplication if metadata API unavailable merged_tags.extend(list(dict.fromkeys(source_tags))) # Preserve order, remove duplicates # Write merged tags to sidecar file if HAS_METADATA_API and write_tags_to_file: # Use unified API for file writing source_hashes_list = source_hashes if source_hashes else None source_urls_list = source_urls if source_urls else None write_tags_to_file(tags_path, merged_tags, source_hashes_list, source_urls_list) else: # Fallback: manual file writing tags_lines = [] # Add hash first (if available) if source_hashes: tags_lines.append(f"hash:{source_hashes[0]}") # Add regular tags tags_lines.extend(merged_tags) # Add known URLs if source_urls: for url in source_urls: tags_lines.append(f"known_url:{url}") # Add relationships (if available) if source_relationships: for rel in source_relationships: tags_lines.append(f"relationship:{rel}") with open(tags_path, 'w', encoding='utf-8') as f: f.write('\n'.join(tags_lines) + '\n') log(f"Created sidecar: {tags_path.name}", file=sys.stderr) # Also create .metadata file using centralized function try: write_metadata(output_path, source_hashes[0] if source_hashes else None, source_urls, source_relationships) log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr) except Exception as e: log(f"Warning: Could not create metadata file: {e}", file=sys.stderr) except Exception as e: log(f"Warning: Could not create sidecar: {e}", file=sys.stderr) # Emit PipelineItem so the merged file can be piped to next command try: # Try to import PipelineItem from downlow module try: from downlow import PipelineItem except ImportError: # Fallback: create a simple object with the required attributes class SimpleItem: def __init__(self, target, title, media_kind, tags=None, known_urls=None): self.target = target self.title = title self.media_kind = media_kind self.tags = tags or [] self.known_urls = known_urls or [] PipelineItem = SimpleItem merged_item = PipelineItem( target=str(output_path), title=output_path.stem, media_kind=file_kind, tags=merged_tags, # Include merged tags known_urls=source_urls # Include known URLs ) ctx.emit(merged_item) except Exception as e: log(f"Warning: Could not emit pipeline item: {e}", file=sys.stderr) # Still emit a string representation for feedback ctx.emit(f"Merged: {output_path}") # Delete source files if requested if delete_after: # First delete all .tags files for tags_file in source_tags_files: try: tags_file.unlink() log(f"Deleted: {tags_file.name}", file=sys.stderr) except Exception as e: log(f"Warning: Could not delete {tags_file.name}: {e}", file=sys.stderr) # Then delete all source files for f in source_files: try: f.unlink() log(f"Deleted: {f.name}", file=sys.stderr) except Exception as e: log(f"Warning: Could not delete {f.name}: {e}", file=sys.stderr) return 0 def _sanitize_name(text: str) -> str: """Sanitize filename.""" allowed = [] for ch in text: allowed.append(ch if (ch.isalnum() or ch in {"-", "_", " ", "."}) else " ") return (" ".join("".join(allowed).split()) or "merged").strip() def _ext_for_format(fmt: str) -> str: """Get file extension for format.""" format_map = { 'mp3': 'mp3', 'm4a': 'm4a', 'aac': 'aac', 'opus': 'opus', 'mka': 'mka', # Matroska Audio - EXCELLENT chapter support (recommended) 'mkv': 'mkv', 'mp4': 'mp4', 'webm': 'webm', 'pdf': 'pdf', 'txt': 'txt', 'auto': 'mka', # Default - MKA for chapters } return format_map.get(fmt.lower(), 'mka') def _add_chapters_to_m4a(file_path: Path, chapters: List[Dict]) -> bool: """Add chapters to an M4A file using mutagen. Args: file_path: Path to M4A file chapters: List of chapter dicts with 'title', 'start_ms', 'end_ms' Returns: True if successful, False otherwise """ import logging logger = logging.getLogger(__name__) if not chapters: return True try: from mutagen.mp4 import MP4, Atom from mutagen.mp4._util import Atom as MP4Atom except ImportError: logger.warning("[merge-file] mutagen not available for chapter writing") return False try: # Load the MP4 file audio = MP4(str(file_path)) # Build the chapter atom # MP4 chapters are stored in a 'chap' atom with specific structure chapter_data = b'' for i, chapter in enumerate(chapters, 1): # Each chapter entry: 10-byte header + title title = chapter.get('title', f'Chapter {i}').encode('utf-8') start_time_ms = int(chapter.get('start_ms', 0)) # Chapter atom format for M4A: # (uint32: size)(uint32: 'chap')(uint8: reserved)(uint24: atom type) + more... # This is complex, so we'll use a simpler atom approach pass # Unfortunately, mutagen doesn't have built-in chapter writing for MP4 # Chapter writing requires low-level atom manipulation # For now, we'll just return and note this limitation logger.info("[merge-file] MP4 chapter writing via mutagen not fully supported") return False except Exception as e: logger.warning(f"[merge-file] Error writing chapters: {e}") return False def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool: """Merge audio files with chapters based on file boundaries.""" import logging logger = logging.getLogger(__name__) ffmpeg_path = _shutil.which('ffmpeg') if not ffmpeg_path: log("ffmpeg not found in PATH", file=sys.stderr) return False try: # Step 1: Get duration of each file to calculate chapter timestamps chapters = [] current_time_ms = 0 log(f"Analyzing {len(files)} files for chapter information...", file=sys.stderr) logger.info(f"[merge-file] Analyzing files for chapters") for file_path in files: # Get duration using ffprobe try: ffprobe_cmd = [ 'ffprobe', '-v', 'error', '-show_entries', 'format=duration', '-print_format', 'default=noprint_wrappers=1:nokey=1', str(file_path) ] probe_result = _subprocess.run(ffprobe_cmd, capture_output=True, text=True, timeout=10) if probe_result.returncode == 0 and probe_result.stdout.strip(): try: duration_sec = float(probe_result.stdout.strip()) except ValueError: logger.warning(f"[merge-file] Could not parse duration from ffprobe output: {probe_result.stdout}") duration_sec = 0 else: logger.warning(f"[merge-file] ffprobe failed for {file_path.name}: {probe_result.stderr}") duration_sec = 0 except Exception as e: logger.warning(f"[merge-file] Could not get duration for {file_path.name}: {e}") duration_sec = 0 # Create chapter entry - use title: tag from metadata if available title = file_path.stem # Default to filename without extension if HAS_METADATA_API: try: # Try to read tags from .tags sidecar file tags_file = file_path.with_suffix(file_path.suffix + '.tags') if tags_file.exists(): tags = read_tags_from_file(tags_file) if tags: # Look for title: tag for tag in tags: if isinstance(tag, str) and tag.lower().startswith('title:'): # Extract the title value after the colon title = tag.split(':', 1)[1].strip() break except Exception as e: logger.debug(f"[merge-file] Could not read metadata for {file_path.name}: {e}") pass # Fall back to filename # Convert seconds to HH:MM:SS.mmm format hours = int(current_time_ms // 3600000) minutes = int((current_time_ms % 3600000) // 60000) seconds = int((current_time_ms % 60000) // 1000) millis = int(current_time_ms % 1000) chapters.append({ 'time_ms': current_time_ms, 'time_str': f"{hours:02d}:{minutes:02d}:{seconds:02d}.{millis:03d}", 'title': title, 'duration_sec': duration_sec }) logger.info(f"[merge-file] Chapter: {title} @ {chapters[-1]['time_str']} (duration: {duration_sec:.2f}s)") current_time_ms += int(duration_sec * 1000) # Step 2: Create concat demuxer file concat_file = output.parent / f".concat_{output.stem}.txt" concat_lines = [] for f in files: # Escape quotes in path safe_path = str(f).replace("'", "'\\''") concat_lines.append(f"file '{safe_path}'") concat_file.write_text('\n'.join(concat_lines), encoding='utf-8') # Step 3: Create FFmpeg metadata file with chapters metadata_file = output.parent / f".metadata_{output.stem}.txt" metadata_lines = [';FFMETADATA1'] for i, chapter in enumerate(chapters): # FFMetadata format for chapters (note: [CHAPTER] not [CHAPTER01]) metadata_lines.append('[CHAPTER]') metadata_lines.append('TIMEBASE=1/1000') metadata_lines.append(f'START={chapter["time_ms"]}') # Calculate end time (start of next chapter or end of file) if i < len(chapters) - 1: metadata_lines.append(f'END={chapters[i+1]["time_ms"]}') else: metadata_lines.append(f'END={current_time_ms}') metadata_lines.append(f'title={chapter["title"]}') metadata_file.write_text('\n'.join(metadata_lines), encoding='utf-8') log(f"Created chapters metadata file with {len(chapters)} chapters", file=sys.stderr) logger.info(f"[merge-file] Created {len(chapters)} chapters") # Step 4: Build FFmpeg command to merge and embed chapters # Strategy: First merge audio, then add metadata in separate pass cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)] # Add threading options for speed cmd.extend(['-threads', '0']) # Use all available threads # Audio codec selection for first input if output_format == 'mp3': cmd.extend(['-c:a', 'libmp3lame', '-q:a', '2']) elif output_format == 'm4a': # Use copy if possible (much faster), otherwise re-encode # Check if inputs are already AAC/M4A to avoid re-encoding # For now, default to copy if format matches, otherwise re-encode # But since we are merging potentially different codecs, re-encoding is safer # To speed up re-encoding, we can use a faster preset or hardware accel if available cmd.extend(['-c:a', 'aac', '-b:a', '256k']) # M4A with better quality elif output_format == 'aac': cmd.extend(['-c:a', 'aac', '-b:a', '192k']) elif output_format == 'opus': cmd.extend(['-c:a', 'libopus', '-b:a', '128k']) elif output_format == 'mka': # FLAC is fast to encode but large. Copy is fastest if inputs are compatible. # If we want speed, copy is best. If we want compatibility, re-encode. # Let's try copy first if inputs are same format, but that's hard to detect here. # Defaulting to copy for MKA as it's a container that supports many codecs cmd.extend(['-c:a', 'copy']) else: cmd.extend(['-c:a', 'copy']) # Copy without re-encoding # Add the output file cmd.append(str(output)) log(f"Merging {len(files)} audio files to {output_format}...", file=sys.stderr) logger.info(f"[merge-file] Running ffmpeg merge: {' '.join(cmd)}") # Run ffmpeg with progress monitoring try: from helper.progress import print_progress, print_final_progress import re process = _subprocess.Popen( cmd, stdout=_subprocess.PIPE, stderr=_subprocess.PIPE, text=True, encoding='utf-8', errors='replace' ) # Monitor progress duration_re = re.compile(r"time=(\d{2}):(\d{2}):(\d{2})\.(\d{2})") total_duration_sec = current_time_ms / 1000.0 while True: # Read stderr line by line (ffmpeg writes progress to stderr) if process.stderr: line = process.stderr.readline() if not line and process.poll() is not None: break if line: # Parse time=HH:MM:SS.mm match = duration_re.search(line) if match and total_duration_sec > 0: h, m, s, cs = map(int, match.groups()) current_sec = h * 3600 + m * 60 + s + cs / 100.0 # Calculate speed/bitrate if available (optional) # For now just show percentage print_progress( output.name, int(current_sec * 1000), # Use ms as "bytes" for progress bar int(total_duration_sec * 1000), speed=0 ) else: break # Wait for completion stdout, stderr = process.communicate() if process.returncode != 0: log(f"FFmpeg error: {stderr}", file=sys.stderr) raise _subprocess.CalledProcessError(process.returncode, cmd, output=stdout, stderr=stderr) print_final_progress(output.name, int(total_duration_sec * 1000), 0) except Exception as e: logger.exception(f"[merge-file] ffmpeg process error: {e}") raise log(f"Merge successful, adding chapters metadata...", file=sys.stderr) # Step 5: Embed chapters into container (MKA, MP4/M4A, or note limitation) if output_format == 'mka' or output.suffix.lower() == '.mka': # MKA/MKV format has native chapter support via FFMetadata # Re-mux the file with chapters embedded (copy streams, no re-encode) log(f"Embedding chapters into Matroska container...", file=sys.stderr) logger.info(f"[merge-file] Adding chapters to MKA file via FFMetadata") temp_output = output.parent / f".temp_{output.stem}.mka" # Use mkvmerge if available (best for MKA chapters), otherwise fall back to ffmpeg mkvmerge_path = _shutil.which('mkvmerge') if mkvmerge_path: # mkvmerge is the best tool for embedding chapters in Matroska files log(f"Using mkvmerge for optimal chapter embedding...", file=sys.stderr) cmd2 = [ mkvmerge_path, '-o', str(temp_output), '--chapters', str(metadata_file), str(output) ] else: # Fallback to ffmpeg with proper chapter embedding for Matroska log(f"Using ffmpeg for chapter embedding (install mkvtoolnix for better quality)...", file=sys.stderr) # For Matroska files, the metadata must be provided via -f ffmetadata input cmd2 = [ ffmpeg_path, '-y', '-i', str(output), # Input: merged audio '-i', str(metadata_file), # Input: FFMetadata file '-c:a', 'copy', # Copy audio without re-encoding '-threads', '0', # Use all threads '-map', '0', # Map all from first input '-map_chapters', '1', # Map CHAPTERS from second input (FFMetadata) str(temp_output) # Output ] logger.info(f"[merge-file] Running chapter embedding: {' '.join(cmd2)}") try: # Run chapter embedding silently (progress handled by worker thread) _subprocess.run( cmd2, capture_output=True, text=True, stdin=_subprocess.DEVNULL, timeout=600, check=False ) # Replace original with temp if successful if temp_output.exists() and temp_output.stat().st_size > 0: try: import shutil if output.exists(): output.unlink() shutil.move(str(temp_output), str(output)) log(f"✓ Chapters successfully embedded!", file=sys.stderr) logger.info(f"[merge-file] Chapters embedded successfully") except Exception as e: logger.warning(f"[merge-file] Could not replace file: {e}") log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr) try: temp_output.unlink() except Exception: pass else: logger.warning(f"[merge-file] Chapter embedding did not create output") except Exception as e: logger.exception(f"[merge-file] Chapter embedding failed: {e}") log(f"Warning: Chapter embedding failed, using merge without chapters", file=sys.stderr) elif output_format == 'm4a' or output.suffix.lower() in ['.m4a', '.mp4']: # MP4/M4A format has native chapter support via iTunes metadata atoms log(f"Embedding chapters into MP4 container...", file=sys.stderr) logger.info(f"[merge-file] Adding chapters to M4A/MP4 file via iTunes metadata") temp_output = output.parent / f".temp_{output.stem}{output.suffix}" # ffmpeg embeds chapters in MP4 using -map_metadata and -map_chapters log(f"Using ffmpeg for MP4 chapter embedding...", file=sys.stderr) cmd2 = [ ffmpeg_path, '-y', '-i', str(output), # Input: merged audio '-i', str(metadata_file), # Input: FFMetadata file '-c:a', 'copy', # Copy audio without re-encoding '-threads', '0', # Use all threads '-map', '0', # Map all from first input '-map_metadata', '1', # Map metadata from second input (FFMetadata) '-map_chapters', '1', # Map CHAPTERS from second input (FFMetadata) str(temp_output) # Output ] logger.info(f"[merge-file] Running MP4 chapter embedding: {' '.join(cmd2)}") try: # Run MP4 chapter embedding silently (progress handled by worker thread) _subprocess.run( cmd2, capture_output=True, text=True, stdin=_subprocess.DEVNULL, timeout=600, check=False ) # Replace original with temp if successful if temp_output.exists() and temp_output.stat().st_size > 0: try: import shutil if output.exists(): output.unlink() shutil.move(str(temp_output), str(output)) log(f"✓ Chapters successfully embedded in MP4!", file=sys.stderr) logger.info(f"[merge-file] MP4 chapters embedded successfully") except Exception as e: logger.warning(f"[merge-file] Could not replace file: {e}") log(f"Warning: Could not embed chapters, using merge without chapters", file=sys.stderr) try: temp_output.unlink() except Exception: pass else: logger.warning(f"[merge-file] MP4 chapter embedding did not create output") except Exception as e: logger.exception(f"[merge-file] MP4 chapter embedding failed: {e}") log(f"Warning: MP4 chapter embedding failed, using merge without chapters", file=sys.stderr) else: # For other formats, chapters would require external tools logger.info(f"[merge-file] Format {output_format} does not have native chapter support") log(f"Note: For chapter support, use MKA or M4A format", file=sys.stderr) # Clean up temp files try: concat_file.unlink() except Exception: pass try: metadata_file.unlink() except Exception: pass return True except Exception as e: log(f"Audio merge error: {e}", file=sys.stderr) logger.error(f"[merge-file] Audio merge error: {e}", exc_info=True) return False def _merge_video(files: List[Path], output: Path, output_format: str) -> bool: """Merge video files.""" ffmpeg_path = _shutil.which('ffmpeg') if not ffmpeg_path: log("ffmpeg not found in PATH", file=sys.stderr) return False try: # Create concat demuxer file concat_file = output.parent / f".concat_{output.stem}.txt" concat_lines = [] for f in files: safe_path = str(f).replace("'", "'\\''") concat_lines.append(f"file '{safe_path}'") concat_file.write_text('\n'.join(concat_lines), encoding='utf-8') # Build FFmpeg command for video merge cmd = [ffmpeg_path, '-y', '-f', 'concat', '-safe', '0', '-i', str(concat_file)] # Video codec selection if output_format == 'mp4': cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-tag:v', 'hvc1', '-c:a', 'aac', '-b:a', '192k']) elif output_format == 'mkv': cmd.extend(['-c:v', 'libx265', '-preset', 'fast', '-c:a', 'aac', '-b:a', '192k']) else: cmd.extend(['-c', 'copy']) # Copy without re-encoding cmd.append(str(output)) log(f"Merging {len(files)} video files...", file=sys.stderr) result = _subprocess.run(cmd, capture_output=True, text=True) # Clean up concat file try: concat_file.unlink() except Exception: pass if result.returncode != 0: stderr = (result.stderr or '').strip() log(f"FFmpeg error: {stderr}", file=sys.stderr) return False return True except Exception as e: log(f"Video merge error: {e}", file=sys.stderr) return False def _merge_text(files: List[Path], output: Path) -> bool: """Merge text files.""" try: with open(output, 'w', encoding='utf-8') as outf: for i, f in enumerate(files): if i > 0: outf.write('\n---\n') # Separator between files try: content = f.read_text(encoding='utf-8', errors='replace') outf.write(content) except Exception as e: log(f"Warning reading {f.name}: {e}", file=sys.stderr) return True except Exception as e: log(f"Text merge error: {e}", file=sys.stderr) return False def _merge_pdf(files: List[Path], output: Path) -> bool: """Merge PDF files.""" if not HAS_PYPDF2: log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr) return False try: if HAS_PYPDF2: writer = PdfWriter() else: log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr) return False for f in files: try: reader = PdfReader(f) for page in reader.pages: writer.add_page(page) log(f"Added {len(reader.pages)} pages from {f.name}", file=sys.stderr) except Exception as e: log(f"Error reading PDF {f.name}: {e}", file=sys.stderr) return False with open(output, 'wb') as outf: writer.write(outf) return True except Exception as e: log(f"PDF merge error: {e}", file=sys.stderr) return False CMDLET = Cmdlet( name="merge-file", summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.", usage="merge-file [-delete] [-output ] [-format ]", args=[ CmdletArg("-delete", type="flag", description="Delete source files after successful merge."), CmdletArg("-output", description="Override output file path."), CmdletArg("-format", description="Output format (auto/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."), ], details=[ "- Pipe multiple files: search-file query | [1,2,3] | merge-file", "- Audio files merge with minimal quality loss using specified codec.", "- Video files merge into MP4 or MKV containers.", "- PDF files merge into a single PDF document.", "- Text/document files are concatenated.", "- Output name derived from first file with ' (merged)' suffix.", "- -delete flag removes all source files after successful merge.", ], )