lkjlkj

2025-12-11 23:21:45 -08:00
parent 16d8a763cd
commit e2ffcab030
44 changed files with 3558 additions and 1793 deletions
--- a/metadata.py
+++ b/metadata.py
@@ -33,6 +33,13 @@ try:
 except ImportError:  # pragma: no cover
    sha256_file = None  # type: ignore[assignment]

+try:  # Optional metadata helper for audio files
+    import mutagen  # type: ignore
+except ImportError:  # pragma: no cover - best effort
+    mutagen = None  # type: ignore
+
+from SYS.utils import sanitize_metadata_value, unique_preserve_order
+
 try:
    from helpers.hydrus import HydrusClient, HydrusRequestError, HydrusRequestSpec  # type: ignore
 except ImportError:  # pragma: no cover
@@ -50,6 +57,223 @@ else:  # pragma: no cover
 _CURRENT_RELATIONSHIP_TRACKER = FileRelationshipTracker()


+def prepare_ffmpeg_metadata(payload: Optional[Dict[str, Any]]) -> Dict[str, str]:
+    """Derive ffmpeg/mutagen metadata tags from a generic metadata payload.
+
+    This is not Hydrus-specific; it is used by exporters/converters.
+    """
+    if not isinstance(payload, dict):
+        return {}
+
+    metadata: Dict[str, str] = {}
+
+    def set_field(key: str, raw: Any, limit: int = 2000) -> None:
+        sanitized = sanitize_metadata_value(raw)
+        if not sanitized:
+            return
+        if len(sanitized) > limit:
+            sanitized = sanitized[:limit]
+        metadata[key] = sanitized
+
+    set_field("title", payload.get("title"))
+    set_field("artist", payload.get("artist"), 512)
+    set_field("album", payload.get("album"), 512)
+    set_field("date", payload.get("year"), 20)
+
+    comment = payload.get("comment")
+    tags_value = payload.get("tag")
+
+    tag_strings: List[str] = []
+    artists_from_tags: List[str] = []
+    albums_from_tags: List[str] = []
+    genres_from_tags: List[str] = []
+
+    if isinstance(tags_value, list):
+        for raw_tag in tags_value:
+            if raw_tag is None:
+                continue
+            if not isinstance(raw_tag, str):
+                raw_tag = str(raw_tag)
+            tag = raw_tag.strip()
+            if not tag:
+                continue
+
+            tag_strings.append(tag)
+            namespace, sep, value = tag.partition(":")
+            if sep and value:
+                ns = namespace.strip().lower()
+                value = value.strip()
+                if ns in {"artist", "creator", "author", "performer"}:
+                    artists_from_tags.append(value)
+                elif ns in {"album", "series", "collection", "group"}:
+                    albums_from_tags.append(value)
+                elif ns in {"genre", "rating"}:
+                    genres_from_tags.append(value)
+                elif ns in {"comment", "description"} and not comment:
+                    comment = value
+                elif ns in {"year", "date"} and not payload.get("year"):
+                    set_field("date", value, 20)
+            else:
+                genres_from_tags.append(tag)
+
+    if "artist" not in metadata and artists_from_tags:
+        set_field("artist", ", ".join(unique_preserve_order(artists_from_tags)[:3]), 512)
+    if "album" not in metadata and albums_from_tags:
+        set_field("album", unique_preserve_order(albums_from_tags)[0], 512)
+    if genres_from_tags:
+        set_field("genre", ", ".join(unique_preserve_order(genres_from_tags)[:5]), 256)
+
+    if tag_strings:
+        joined_tags = ", ".join(tag_strings[:50])
+        set_field("keywords", joined_tags, 2000)
+        if not comment:
+            comment = joined_tags
+
+    if comment:
+        set_field("comment", comment, 2000)
+        set_field("description", comment, 2000)
+
+    return metadata
+
+
+def apply_mutagen_metadata(path: Path, metadata: Dict[str, str], fmt: str) -> None:
+    """Best-effort metadata writing for audio containers."""
+    if fmt != "audio":
+        return
+    if not metadata:
+        return
+    if mutagen is None:
+        return
+
+    try:
+        audio = mutagen.File(path, easy=True)  # type: ignore[attr-defined]
+    except Exception as exc:  # pragma: no cover - best effort only
+        log(f"mutagen load failed: {exc}", file=sys.stderr)
+        return
+
+    if audio is None:
+        return
+
+    field_map = {
+        "title": "title",
+        "artist": "artist",
+        "album": "album",
+        "genre": "genre",
+        "comment": "comment",
+        "description": "comment",
+        "date": "date",
+    }
+
+    changed = False
+    for source_key, target_key in field_map.items():
+        value = metadata.get(source_key)
+        if not value:
+            continue
+        try:
+            audio[target_key] = [value]
+            changed = True
+        except Exception:  # pragma: no cover
+            continue
+
+    if not changed:
+        return
+
+    try:
+        audio.save()
+    except Exception as exc:  # pragma: no cover
+        log(f"mutagen save failed: {exc}", file=sys.stderr)
+
+
+def build_ffmpeg_command(
+    ffmpeg_path: str,
+    input_path: Path,
+    output_path: Path,
+    fmt: str,
+    max_width: int,
+    metadata: Optional[Dict[str, str]] = None,
+) -> List[str]:
+    """Build an ffmpeg command line for common export formats."""
+    cmd: List[str] = [ffmpeg_path, "-y", "-i", str(input_path)]
+
+    if fmt in {"mp4", "webm"} and max_width and max_width > 0:
+        cmd.extend(["-vf", f"scale='min({max_width},iw)':-2"])
+
+    if metadata:
+        for key, value in metadata.items():
+            cmd.extend(["-metadata", f"{key}={value}"])
+
+    # Video formats
+    if fmt == "mp4":
+        cmd.extend(
+            [
+                "-c:v",
+                "libx265",
+                "-preset",
+                "medium",
+                "-crf",
+                "26",
+                "-tag:v",
+                "hvc1",
+                "-pix_fmt",
+                "yuv420p",
+                "-c:a",
+                "aac",
+                "-b:a",
+                "192k",
+                "-movflags",
+                "+faststart",
+            ]
+        )
+    elif fmt == "webm":
+        cmd.extend(
+            [
+                "-c:v",
+                "libvpx-vp9",
+                "-b:v",
+                "0",
+                "-crf",
+                "32",
+                "-c:a",
+                "libopus",
+                "-b:a",
+                "160k",
+            ]
+        )
+        cmd.extend(["-f", "webm"])
+
+    # Audio formats
+    elif fmt == "mp3":
+        cmd.extend(["-vn", "-c:a", "libmp3lame", "-b:a", "192k"])
+        cmd.extend(["-f", "mp3"])
+    elif fmt == "flac":
+        cmd.extend(["-vn", "-c:a", "flac"])
+        cmd.extend(["-f", "flac"])
+    elif fmt == "wav":
+        cmd.extend(["-vn", "-c:a", "pcm_s16le"])
+        cmd.extend(["-f", "wav"])
+    elif fmt == "aac":
+        cmd.extend(["-vn", "-c:a", "aac", "-b:a", "192k"])
+        cmd.extend(["-f", "adts"])
+    elif fmt == "m4a":
+        cmd.extend(["-vn", "-c:a", "aac", "-b:a", "192k"])
+        cmd.extend(["-f", "ipod"])
+    elif fmt == "ogg":
+        cmd.extend(["-vn", "-c:a", "libvorbis", "-b:a", "192k"])
+        cmd.extend(["-f", "ogg"])
+    elif fmt == "opus":
+        cmd.extend(["-vn", "-c:a", "libopus", "-b:a", "192k"])
+        cmd.extend(["-f", "opus"])
+    elif fmt == "audio":
+        # Legacy format name for mp3
+        cmd.extend(["-vn", "-c:a", "libmp3lame", "-b:a", "192k"])
+        cmd.extend(["-f", "mp3"])
+    elif fmt != "copy":
+        raise ValueError(f"Unsupported format: {fmt}")
+
+    cmd.append(str(output_path))
+    return cmd
+
+
 def field(obj: Any, name: str, value: Any = None) -> Any:
 	"""Get or set a field on dict or object.
 	
@@ -131,9 +355,9 @@ def value_normalize(value: str) -> str:


 def import_pending_sidecars(db_root: Path, db: Any) -> None:
-    """Import pending sidecars (.tag/.tags/.metadata/.notes) into the database."""
+    """Import pending sidecars (.tag/.metadata/.notes) into the database."""
    try:
-        sidecar_patterns = ['**/*.tag', '**/*.tags', '**/*.metadata', '**/*.notes']
+        sidecar_patterns = ['**/*.tag', '**/*.metadata', '**/*.notes']

        for pattern in sidecar_patterns:
            for sidecar_path in db_root.glob(pattern):
@@ -174,7 +398,7 @@ def import_pending_sidecars(db_root: Path, db: Any) -> None:
                if not file_id:
                    continue

-                if sidecar_path.suffix in {'.tag', '.tags'}:
+                if sidecar_path.suffix == '.tag':
                    try:
                        content = sidecar_path.read_text(encoding='utf-8')
                    except Exception:
@@ -395,7 +619,7 @@ def imdb_tag(imdb_id: str) -> Dict[str, object]:
                break
    if cast_names:
        _extend_tags(tags, "cast", cast_names)
-    return {"source": "imdb", "id": canonical_id, "tags": tags}
+    return {"source": "imdb", "id": canonical_id, "tag": tags}
 def fetch_musicbrainz_tags(mbid: str, entity: str) -> Dict[str, object]:
    if not musicbrainzngs:
        raise RuntimeError("musicbrainzngs package is not available")
@@ -451,7 +675,7 @@ def fetch_musicbrainz_tags(mbid: str, entity: str) -> Dict[str, object]:
    for genre in genre_list:
        if isinstance(genre, dict) and genre.get("name"):
            _add_tag(tags, "genre", genre["name"])
-    return {"source": "musicbrainz", "id": mbid, "tags": tags, "entity": entity}
+    return {"source": "musicbrainz", "id": mbid, "tag": tags, "entity": entity}


 def fetch_openlibrary_tags(ol_id: str) -> Dict[str, object]:
@@ -461,7 +685,7 @@ def fetch_openlibrary_tags(ol_id: str) -> Dict[str, object]:
        ol_id: OpenLibrary ID (e.g., 'OL123456M' for a book)
    
    Returns:
-        Dictionary with 'tags' key containing list of extracted tags
+        Dictionary with 'tag' key containing list of extracted tags
    """
    import urllib.request
    
@@ -573,7 +797,7 @@ def fetch_openlibrary_tags(ol_id: str) -> Dict[str, object]:
            description = description.get("value")
        _add_tag(tags, "summary", description)
    
-    return {"source": "openlibrary", "id": ol_id, "tags": tags}
+    return {"source": "openlibrary", "id": ol_id, "tag": tags}


 def _append_unique(target: List[str], seen: Set[str], value: Optional[str]) -> None:
@@ -1328,25 +1552,16 @@ def _normalise_string_list(values: Optional[Iterable[Any]]) -> List[str]:


 def _derive_sidecar_path(media_path: Path) -> Path:
-    """Return preferred sidecar path (.tag), falling back to legacy .tags if it exists.
-
-    Keeps backward compatibility by preferring existing .tags, but new writes use .tag.
-    """
+    """Return sidecar path (.tag)."""
    try:
        preferred = media_path.parent / (media_path.name + '.tag')
-        legacy = media_path.parent / (media_path.name + '.tags')
    except ValueError:
        preferred = media_path.with_name(media_path.name + '.tag')
-        legacy = media_path.with_name(media_path.name + '.tags')
-
-    # Prefer legacy if it already exists to avoid duplicate sidecars
-    if legacy.exists():
-        return legacy
    return preferred


 def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str], List[str]]:
-    """Read hash, tags, and url from .tags sidecar file.
+    """Read hash, tags, and url from sidecar file.
    
    Consolidated with read_tags_from_file - this extracts extra metadata (hash, url).
    """
@@ -1389,7 +1604,7 @@ def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str]
 def rename(file_path: Path, tags: Iterable[str]) -> Optional[Path]:
 	"""Rename a file based on title: tag in the tags list.
 	
-	If a title: tag is present, renames the file and any .tags/.metadata sidecars.
+    If a title: tag is present, renames the file and any .tag/.metadata sidecars.
 	
 	Args:
 		file_path: Path to the file to potentially rename
@@ -1432,10 +1647,10 @@ def rename(file_path: Path, tags: Iterable[str]) -> Optional[Path]:
 		file_path.rename(new_path)
 		debug(f"Renamed file: {old_name} → {new_name}", file=sys.stderr)
 		
-		# Rename the .tags sidecar if it exists
-		old_tags_path = file_path.parent / (old_name + '.tags')
+        # Rename the .tag sidecar if it exists
+        old_tags_path = file_path.parent / (old_name + '.tag')
 		if old_tags_path.exists():
-			new_tags_path = file_path.parent / (new_name + '.tags')
+            new_tags_path = file_path.parent / (new_name + '.tag')
 			if new_tags_path.exists():
 				try:
 					new_tags_path.unlink()
@@ -1508,14 +1723,6 @@ def write_tags(media_path: Path, tags: Iterable[str], url: Iterable[str], hash_v
        if lines:
            sidecar.write_text("\n".join(lines) + "\n", encoding="utf-8")
            debug(f"Tags: {sidecar}")
-            # Clean up legacy files
-            for legacy_path in [media_path.with_name(media_path.name + '.tags'), 
-                               media_path.with_name(media_path.name + '.tags.txt')]:
-                if legacy_path.exists() and legacy_path != sidecar:
-                    try:
-                        legacy_path.unlink()
-                    except OSError:
-                        pass
        else:
            try:
                sidecar.unlink()
@@ -1691,7 +1898,7 @@ def _locate_sidecar_by_hash(hash_value: str, roots: Iterable[Path]) -> Optional[
            continue
        if not root_path.exists() or not root_path.is_dir():
            continue
-        for pattern in ('*.tags', '*.tags.txt'):
+        for pattern in ('*.tag',):
            try:
                iterator = root_path.rglob(pattern)
            except OSError:
@@ -1711,80 +1918,35 @@ def _locate_sidecar_by_hash(hash_value: str, roots: Iterable[Path]) -> Optional[

 def sync_sidecar(payload: Dict[str, Any]) -> Dict[str, Any]:
    path_value = payload.get('path')
-    sidecar_path: Optional[Path] = None
-    media_path: Optional[Path] = None
-    if path_value:
-        candidate = Path(str(path_value)).expanduser()
-        if candidate.suffix.lower() in {'.tags', '.tags.txt'}:
-            sidecar_path = candidate
-        else:
-            media_path = candidate
-    hash_input = payload.get('hash')
-    hash_value = None
-    if hash_input:
-        hash_value = _normalize_hash(hash_input)
-    tags = _normalise_string_list(payload.get('tags'))
-    url = _normalise_string_list(payload.get('url'))
-    if media_path is not None:
-        sidecar_path = _derive_sidecar_path(media_path)
-    search_roots = _collect_search_roots(payload)
-    if sidecar_path is None and hash_value:
-        located = _locate_sidecar_by_hash(hash_value, search_roots)
-        if located is not None:
-            sidecar_path = located
-    if sidecar_path is None:
-        if media_path is not None:
-            sidecar_path = _derive_sidecar_path(media_path)
-        elif hash_value:
-            return {
-                'error': 'not_found',
-                'hash': hash_value,
-                'tags': tags,
-                'url': url,
-            }
-        else:
-            raise ValueError('path or hash is required to synchronise sidecar')
-    existing_hash, existing_tags, existing_known = _read_sidecar_metadata(sidecar_path)
-    if not tags:
-        tags = existing_tags
-    if not url:
-        url = existing_known
-    hash_line = hash_value or existing_hash
-    title_value: Optional[str] = None
-    for tag in tags:
-        if isinstance(tag, str):
-            if tag.lower().startswith('title:'):
-                title_value = tag.split(':', 1)[1].strip() if ':' in tag else ''
-                if title_value == '':
-                    title_value = None
-                break
-    lines: List[str] = []
-    if hash_line:
-        lines.append(f'hash:{hash_line}')
-    lines.extend(tags)
-    lines.extend(f'url:{url}' for url in url)
-    sidecar_path.parent.mkdir(parents=True, exist_ok=True)
-    if lines:
-        sidecar_path.write_text('\n'.join(lines) + '\n', encoding='utf-8')
+    if not path_value:
+        raise ValueError('path is required to synchronise sidecar')
+
+    candidate = Path(str(path_value)).expanduser()
+    if candidate.suffix.lower() == '.tag':
+        sidecar_path = candidate
    else:
-        try:
-            sidecar_path.unlink()
-        except FileNotFoundError:
-            pass
+        sidecar_path = _derive_sidecar_path(candidate)
+
+    tags = _normalise_string_list(payload.get('tag'))
+    if not tags and sidecar_path.exists():
+        tags = read_tags_from_file(sidecar_path)
+
+    sidecar_path.parent.mkdir(parents=True, exist_ok=True)
+    if tags:
+        sidecar_path.write_text('\n'.join(tags) + '\n', encoding='utf-8')
        return {
            'path': str(sidecar_path),
-            'hash': hash_line,
-            'tags': [],
-            'url': [],
-            'deleted': True,
-            'title': title_value,
+            'tag': tags,
        }
+
+    try:
+        sidecar_path.unlink()
+    except FileNotFoundError:
+        pass
    return {
        'path': str(sidecar_path),
-        'hash': hash_line,
-        'tags': tags,
-        'url': url,
-        'title': title_value,
+        'tag': [],
+        'deleted': True,
    }


@@ -1901,16 +2063,16 @@ def apply_tag_mutation(payload: Dict[str, Any], operation: str = 'add') -> Dict[
            result['updated'] = True
            return result
    else:  # local
-        tags = _clean_existing_tags(payload.get('tags'))
+        tag = _clean_existing_tags(payload.get('tag'))
        
        if operation == 'add':
            new_tag = _normalize_tag(payload.get('new_tag'))
            if not new_tag:
                raise ValueError('new_tag is required')
-            added = new_tag not in tags
+            added = new_tag not in tag
            if added:
-                tags.append(new_tag)
-            return {'tags': tags, 'added': added}
+                tag.append(new_tag)
+            return {'tag': tag, 'added': added}
        
        else:  # update
            old_tag = _normalize_tag(payload.get('old_tag'))
@@ -1920,17 +2082,17 @@ def apply_tag_mutation(payload: Dict[str, Any], operation: str = 'add') -> Dict[
            
            remaining = []
            removed_count = 0
-            for tag in tags:
-                if tag == old_tag:
+            for item in tag:
+                if item == old_tag:
                    removed_count += 1
                else:
-                    remaining.append(tag)
+                    remaining.append(item)
            
            if new_tag and removed_count > 0:
                remaining.extend([new_tag] * removed_count)
            
-            updated = removed_count > 0 or (bool(new_tag) and new_tag not in tags)
-            return {'tags': remaining, 'updated': updated, 'removed_count': removed_count}
+            updated = removed_count > 0 or (bool(new_tag) and new_tag not in tag)
+            return {'tag': remaining, 'updated': updated, 'removed_count': removed_count}


 def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
@@ -2181,13 +2343,13 @@ def merge_multiple_tag_lists(


 def read_tags_from_file(file_path: Path) -> List[str]:
-    """Read and normalize tags from .tags sidecar file.
+    """Read and normalize tags from .tag sidecar file.
    
-    This is the UNIFIED API for reading .tags files across all cmdlets.
+    This is the UNIFIED API for reading .tag files across all cmdlets.
    Handles normalization, deduplication, and format validation.
    
    Args:
-        file_path: Path to .tags sidecar file
+        file_path: Path to .tag sidecar file
        
    Returns:
        List of normalized tag strings
@@ -2196,7 +2358,7 @@ def read_tags_from_file(file_path: Path) -> List[str]:
        FileNotFoundError: If file doesn't exist
        
    Example:
-        >>> tags = read_tags_from_file(Path('file.txt.tags'))
+        >>> tags = read_tags_from_file(Path('file.txt.tag'))
        >>> debug(tags)
        ['artist:Beatles', 'album:Abbey Road']
    """
@@ -2386,13 +2548,13 @@ def write_tags_to_file(
    url: Optional[List[str]] = None,
    append: bool = False
 ) -> bool:
-    """Write tags to .tags sidecar file.
+    """Write tags to .tag sidecar file.
    
-    This is the UNIFIED API for writing .tags files across all cmdlets.
+    This is the UNIFIED API for writing .tag files across all cmdlets.
    Uses consistent format and handles file creation/overwriting.
    
    Args:
-        file_path: Path to .tags file (will be created if doesn't exist)
+        file_path: Path to .tag file (will be created if doesn't exist)
        tags: List of tags to write
        source_hashes: Optional source file hashes (written as source:hash1,hash2)
        url: Optional known url (each written on separate line as url:url)
@@ -2406,7 +2568,7 @@ def write_tags_to_file(
        
    Example:
        >>> tags = ['artist:Beatles', 'album:Abbey Road']
-        >>> write_tags_to_file(Path('file.txt.tags'), tags)
+        >>> write_tags_to_file(Path('file.txt.tag'), tags)
        True
    """
    file_path = Path(file_path)
@@ -2448,7 +2610,7 @@ def normalize_tags_from_source(
    Universal function to normalize tags from different sources:
    - yt-dlp entry dicts
    - Raw tag lists
-    - .tags file content strings
+    - .tag file content strings
    - Metadata dictionaries
    
    Args:
@@ -2575,12 +2737,12 @@ def expand_metadata_tag(payload: Dict[str, Any]) -> Dict[str, Any]:
        else:
            data = fetch_musicbrainz_tags(request['id'], request['entity'])
    except Exception as exc:  # pragma: no cover - network/service errors
-        return {'tags': tags, 'error': str(exc)}
+        return {'tag': tags, 'error': str(exc)}
    # Add tags from fetched data (no namespace, just unique append)
-    for tag in (data.get('tags') or []):
+    for tag in (data.get('tag') or []):
        _append_unique(tags, seen, tag)
    result = {
-        'tags': tags,
+        'tag': tags,
        'source': request['source'],
        'id': request['id'],
    }
@@ -2597,7 +2759,7 @@ def build_remote_bundle(metadata: Optional[Dict[str, Any]], existing: Optional[S
            _append_unique(tags, seen, tag)
    
    # Add tags from various sources
-    for tag in (metadata.get("tags") or []):
+    for tag in (metadata.get("tag") or []):
        _append_unique(tags, seen, tag)
    for tag in (metadata.get("categories") or []):
        _append_unique(tags, seen, tag)
@@ -2632,7 +2794,7 @@ def build_remote_bundle(metadata: Optional[Dict[str, Any]], existing: Optional[S
    source_url = context.get("source_url") or metadata.get("original_url") or metadata.get("webpage_url") or metadata.get("url")
    clean_title = value_normalize(str(title_value)) if title_value is not None else None
    result = {
-        "tags": tags,
+        "tag": tags,
        "title": clean_title,
        "source_url": _sanitize_url(source_url),
        "duration": _coerce_duration(metadata),
@@ -2747,9 +2909,9 @@ def hydrus_fetch_url(payload: Optional[str] = typer.Option(None, "--payload", he
        debug(json.dumps(error_payload, ensure_ascii=False), flush=True)
        raise typer.Exit(code=1)

-@app.command(name="sync-sidecar", help="Synchronise .tags sidecar with supplied data")
+@app.command(name="sync-sidecar", help="Synchronise .tag sidecar with supplied data")
 def sync_sidecar_cmd(payload: Optional[str] = typer.Option(None, "--payload", help="JSON payload; reads stdin if omitted")):
-    """Synchronise .tags sidecar with supplied data."""
+    """Synchronise .tag sidecar with supplied data."""
    try:
        payload_data = _load_payload(payload)
        result = sync_sidecar(payload_data)