This commit is contained in:
nose
2025-12-11 23:21:45 -08:00
parent 16d8a763cd
commit e2ffcab030
44 changed files with 3558 additions and 1793 deletions

View File

@@ -33,6 +33,13 @@ try:
except ImportError: # pragma: no cover
sha256_file = None # type: ignore[assignment]
try: # Optional metadata helper for audio files
import mutagen # type: ignore
except ImportError: # pragma: no cover - best effort
mutagen = None # type: ignore
from SYS.utils import sanitize_metadata_value, unique_preserve_order
try:
from helpers.hydrus import HydrusClient, HydrusRequestError, HydrusRequestSpec # type: ignore
except ImportError: # pragma: no cover
@@ -50,6 +57,223 @@ else: # pragma: no cover
_CURRENT_RELATIONSHIP_TRACKER = FileRelationshipTracker()
def prepare_ffmpeg_metadata(payload: Optional[Dict[str, Any]]) -> Dict[str, str]:
"""Derive ffmpeg/mutagen metadata tags from a generic metadata payload.
This is not Hydrus-specific; it is used by exporters/converters.
"""
if not isinstance(payload, dict):
return {}
metadata: Dict[str, str] = {}
def set_field(key: str, raw: Any, limit: int = 2000) -> None:
sanitized = sanitize_metadata_value(raw)
if not sanitized:
return
if len(sanitized) > limit:
sanitized = sanitized[:limit]
metadata[key] = sanitized
set_field("title", payload.get("title"))
set_field("artist", payload.get("artist"), 512)
set_field("album", payload.get("album"), 512)
set_field("date", payload.get("year"), 20)
comment = payload.get("comment")
tags_value = payload.get("tag")
tag_strings: List[str] = []
artists_from_tags: List[str] = []
albums_from_tags: List[str] = []
genres_from_tags: List[str] = []
if isinstance(tags_value, list):
for raw_tag in tags_value:
if raw_tag is None:
continue
if not isinstance(raw_tag, str):
raw_tag = str(raw_tag)
tag = raw_tag.strip()
if not tag:
continue
tag_strings.append(tag)
namespace, sep, value = tag.partition(":")
if sep and value:
ns = namespace.strip().lower()
value = value.strip()
if ns in {"artist", "creator", "author", "performer"}:
artists_from_tags.append(value)
elif ns in {"album", "series", "collection", "group"}:
albums_from_tags.append(value)
elif ns in {"genre", "rating"}:
genres_from_tags.append(value)
elif ns in {"comment", "description"} and not comment:
comment = value
elif ns in {"year", "date"} and not payload.get("year"):
set_field("date", value, 20)
else:
genres_from_tags.append(tag)
if "artist" not in metadata and artists_from_tags:
set_field("artist", ", ".join(unique_preserve_order(artists_from_tags)[:3]), 512)
if "album" not in metadata and albums_from_tags:
set_field("album", unique_preserve_order(albums_from_tags)[0], 512)
if genres_from_tags:
set_field("genre", ", ".join(unique_preserve_order(genres_from_tags)[:5]), 256)
if tag_strings:
joined_tags = ", ".join(tag_strings[:50])
set_field("keywords", joined_tags, 2000)
if not comment:
comment = joined_tags
if comment:
set_field("comment", comment, 2000)
set_field("description", comment, 2000)
return metadata
def apply_mutagen_metadata(path: Path, metadata: Dict[str, str], fmt: str) -> None:
"""Best-effort metadata writing for audio containers."""
if fmt != "audio":
return
if not metadata:
return
if mutagen is None:
return
try:
audio = mutagen.File(path, easy=True) # type: ignore[attr-defined]
except Exception as exc: # pragma: no cover - best effort only
log(f"mutagen load failed: {exc}", file=sys.stderr)
return
if audio is None:
return
field_map = {
"title": "title",
"artist": "artist",
"album": "album",
"genre": "genre",
"comment": "comment",
"description": "comment",
"date": "date",
}
changed = False
for source_key, target_key in field_map.items():
value = metadata.get(source_key)
if not value:
continue
try:
audio[target_key] = [value]
changed = True
except Exception: # pragma: no cover
continue
if not changed:
return
try:
audio.save()
except Exception as exc: # pragma: no cover
log(f"mutagen save failed: {exc}", file=sys.stderr)
def build_ffmpeg_command(
ffmpeg_path: str,
input_path: Path,
output_path: Path,
fmt: str,
max_width: int,
metadata: Optional[Dict[str, str]] = None,
) -> List[str]:
"""Build an ffmpeg command line for common export formats."""
cmd: List[str] = [ffmpeg_path, "-y", "-i", str(input_path)]
if fmt in {"mp4", "webm"} and max_width and max_width > 0:
cmd.extend(["-vf", f"scale='min({max_width},iw)':-2"])
if metadata:
for key, value in metadata.items():
cmd.extend(["-metadata", f"{key}={value}"])
# Video formats
if fmt == "mp4":
cmd.extend(
[
"-c:v",
"libx265",
"-preset",
"medium",
"-crf",
"26",
"-tag:v",
"hvc1",
"-pix_fmt",
"yuv420p",
"-c:a",
"aac",
"-b:a",
"192k",
"-movflags",
"+faststart",
]
)
elif fmt == "webm":
cmd.extend(
[
"-c:v",
"libvpx-vp9",
"-b:v",
"0",
"-crf",
"32",
"-c:a",
"libopus",
"-b:a",
"160k",
]
)
cmd.extend(["-f", "webm"])
# Audio formats
elif fmt == "mp3":
cmd.extend(["-vn", "-c:a", "libmp3lame", "-b:a", "192k"])
cmd.extend(["-f", "mp3"])
elif fmt == "flac":
cmd.extend(["-vn", "-c:a", "flac"])
cmd.extend(["-f", "flac"])
elif fmt == "wav":
cmd.extend(["-vn", "-c:a", "pcm_s16le"])
cmd.extend(["-f", "wav"])
elif fmt == "aac":
cmd.extend(["-vn", "-c:a", "aac", "-b:a", "192k"])
cmd.extend(["-f", "adts"])
elif fmt == "m4a":
cmd.extend(["-vn", "-c:a", "aac", "-b:a", "192k"])
cmd.extend(["-f", "ipod"])
elif fmt == "ogg":
cmd.extend(["-vn", "-c:a", "libvorbis", "-b:a", "192k"])
cmd.extend(["-f", "ogg"])
elif fmt == "opus":
cmd.extend(["-vn", "-c:a", "libopus", "-b:a", "192k"])
cmd.extend(["-f", "opus"])
elif fmt == "audio":
# Legacy format name for mp3
cmd.extend(["-vn", "-c:a", "libmp3lame", "-b:a", "192k"])
cmd.extend(["-f", "mp3"])
elif fmt != "copy":
raise ValueError(f"Unsupported format: {fmt}")
cmd.append(str(output_path))
return cmd
def field(obj: Any, name: str, value: Any = None) -> Any:
"""Get or set a field on dict or object.
@@ -131,9 +355,9 @@ def value_normalize(value: str) -> str:
def import_pending_sidecars(db_root: Path, db: Any) -> None:
"""Import pending sidecars (.tag/.tags/.metadata/.notes) into the database."""
"""Import pending sidecars (.tag/.metadata/.notes) into the database."""
try:
sidecar_patterns = ['**/*.tag', '**/*.tags', '**/*.metadata', '**/*.notes']
sidecar_patterns = ['**/*.tag', '**/*.metadata', '**/*.notes']
for pattern in sidecar_patterns:
for sidecar_path in db_root.glob(pattern):
@@ -174,7 +398,7 @@ def import_pending_sidecars(db_root: Path, db: Any) -> None:
if not file_id:
continue
if sidecar_path.suffix in {'.tag', '.tags'}:
if sidecar_path.suffix == '.tag':
try:
content = sidecar_path.read_text(encoding='utf-8')
except Exception:
@@ -395,7 +619,7 @@ def imdb_tag(imdb_id: str) -> Dict[str, object]:
break
if cast_names:
_extend_tags(tags, "cast", cast_names)
return {"source": "imdb", "id": canonical_id, "tags": tags}
return {"source": "imdb", "id": canonical_id, "tag": tags}
def fetch_musicbrainz_tags(mbid: str, entity: str) -> Dict[str, object]:
if not musicbrainzngs:
raise RuntimeError("musicbrainzngs package is not available")
@@ -451,7 +675,7 @@ def fetch_musicbrainz_tags(mbid: str, entity: str) -> Dict[str, object]:
for genre in genre_list:
if isinstance(genre, dict) and genre.get("name"):
_add_tag(tags, "genre", genre["name"])
return {"source": "musicbrainz", "id": mbid, "tags": tags, "entity": entity}
return {"source": "musicbrainz", "id": mbid, "tag": tags, "entity": entity}
def fetch_openlibrary_tags(ol_id: str) -> Dict[str, object]:
@@ -461,7 +685,7 @@ def fetch_openlibrary_tags(ol_id: str) -> Dict[str, object]:
ol_id: OpenLibrary ID (e.g., 'OL123456M' for a book)
Returns:
Dictionary with 'tags' key containing list of extracted tags
Dictionary with 'tag' key containing list of extracted tags
"""
import urllib.request
@@ -573,7 +797,7 @@ def fetch_openlibrary_tags(ol_id: str) -> Dict[str, object]:
description = description.get("value")
_add_tag(tags, "summary", description)
return {"source": "openlibrary", "id": ol_id, "tags": tags}
return {"source": "openlibrary", "id": ol_id, "tag": tags}
def _append_unique(target: List[str], seen: Set[str], value: Optional[str]) -> None:
@@ -1328,25 +1552,16 @@ def _normalise_string_list(values: Optional[Iterable[Any]]) -> List[str]:
def _derive_sidecar_path(media_path: Path) -> Path:
"""Return preferred sidecar path (.tag), falling back to legacy .tags if it exists.
Keeps backward compatibility by preferring existing .tags, but new writes use .tag.
"""
"""Return sidecar path (.tag)."""
try:
preferred = media_path.parent / (media_path.name + '.tag')
legacy = media_path.parent / (media_path.name + '.tags')
except ValueError:
preferred = media_path.with_name(media_path.name + '.tag')
legacy = media_path.with_name(media_path.name + '.tags')
# Prefer legacy if it already exists to avoid duplicate sidecars
if legacy.exists():
return legacy
return preferred
def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str], List[str]]:
"""Read hash, tags, and url from .tags sidecar file.
"""Read hash, tags, and url from sidecar file.
Consolidated with read_tags_from_file - this extracts extra metadata (hash, url).
"""
@@ -1389,7 +1604,7 @@ def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str]
def rename(file_path: Path, tags: Iterable[str]) -> Optional[Path]:
"""Rename a file based on title: tag in the tags list.
If a title: tag is present, renames the file and any .tags/.metadata sidecars.
If a title: tag is present, renames the file and any .tag/.metadata sidecars.
Args:
file_path: Path to the file to potentially rename
@@ -1432,10 +1647,10 @@ def rename(file_path: Path, tags: Iterable[str]) -> Optional[Path]:
file_path.rename(new_path)
debug(f"Renamed file: {old_name}{new_name}", file=sys.stderr)
# Rename the .tags sidecar if it exists
old_tags_path = file_path.parent / (old_name + '.tags')
# Rename the .tag sidecar if it exists
old_tags_path = file_path.parent / (old_name + '.tag')
if old_tags_path.exists():
new_tags_path = file_path.parent / (new_name + '.tags')
new_tags_path = file_path.parent / (new_name + '.tag')
if new_tags_path.exists():
try:
new_tags_path.unlink()
@@ -1508,14 +1723,6 @@ def write_tags(media_path: Path, tags: Iterable[str], url: Iterable[str], hash_v
if lines:
sidecar.write_text("\n".join(lines) + "\n", encoding="utf-8")
debug(f"Tags: {sidecar}")
# Clean up legacy files
for legacy_path in [media_path.with_name(media_path.name + '.tags'),
media_path.with_name(media_path.name + '.tags.txt')]:
if legacy_path.exists() and legacy_path != sidecar:
try:
legacy_path.unlink()
except OSError:
pass
else:
try:
sidecar.unlink()
@@ -1691,7 +1898,7 @@ def _locate_sidecar_by_hash(hash_value: str, roots: Iterable[Path]) -> Optional[
continue
if not root_path.exists() or not root_path.is_dir():
continue
for pattern in ('*.tags', '*.tags.txt'):
for pattern in ('*.tag',):
try:
iterator = root_path.rglob(pattern)
except OSError:
@@ -1711,80 +1918,35 @@ def _locate_sidecar_by_hash(hash_value: str, roots: Iterable[Path]) -> Optional[
def sync_sidecar(payload: Dict[str, Any]) -> Dict[str, Any]:
path_value = payload.get('path')
sidecar_path: Optional[Path] = None
media_path: Optional[Path] = None
if path_value:
candidate = Path(str(path_value)).expanduser()
if candidate.suffix.lower() in {'.tags', '.tags.txt'}:
sidecar_path = candidate
else:
media_path = candidate
hash_input = payload.get('hash')
hash_value = None
if hash_input:
hash_value = _normalize_hash(hash_input)
tags = _normalise_string_list(payload.get('tags'))
url = _normalise_string_list(payload.get('url'))
if media_path is not None:
sidecar_path = _derive_sidecar_path(media_path)
search_roots = _collect_search_roots(payload)
if sidecar_path is None and hash_value:
located = _locate_sidecar_by_hash(hash_value, search_roots)
if located is not None:
sidecar_path = located
if sidecar_path is None:
if media_path is not None:
sidecar_path = _derive_sidecar_path(media_path)
elif hash_value:
return {
'error': 'not_found',
'hash': hash_value,
'tags': tags,
'url': url,
}
else:
raise ValueError('path or hash is required to synchronise sidecar')
existing_hash, existing_tags, existing_known = _read_sidecar_metadata(sidecar_path)
if not tags:
tags = existing_tags
if not url:
url = existing_known
hash_line = hash_value or existing_hash
title_value: Optional[str] = None
for tag in tags:
if isinstance(tag, str):
if tag.lower().startswith('title:'):
title_value = tag.split(':', 1)[1].strip() if ':' in tag else ''
if title_value == '':
title_value = None
break
lines: List[str] = []
if hash_line:
lines.append(f'hash:{hash_line}')
lines.extend(tags)
lines.extend(f'url:{url}' for url in url)
sidecar_path.parent.mkdir(parents=True, exist_ok=True)
if lines:
sidecar_path.write_text('\n'.join(lines) + '\n', encoding='utf-8')
if not path_value:
raise ValueError('path is required to synchronise sidecar')
candidate = Path(str(path_value)).expanduser()
if candidate.suffix.lower() == '.tag':
sidecar_path = candidate
else:
try:
sidecar_path.unlink()
except FileNotFoundError:
pass
sidecar_path = _derive_sidecar_path(candidate)
tags = _normalise_string_list(payload.get('tag'))
if not tags and sidecar_path.exists():
tags = read_tags_from_file(sidecar_path)
sidecar_path.parent.mkdir(parents=True, exist_ok=True)
if tags:
sidecar_path.write_text('\n'.join(tags) + '\n', encoding='utf-8')
return {
'path': str(sidecar_path),
'hash': hash_line,
'tags': [],
'url': [],
'deleted': True,
'title': title_value,
'tag': tags,
}
try:
sidecar_path.unlink()
except FileNotFoundError:
pass
return {
'path': str(sidecar_path),
'hash': hash_line,
'tags': tags,
'url': url,
'title': title_value,
'tag': [],
'deleted': True,
}
@@ -1901,16 +2063,16 @@ def apply_tag_mutation(payload: Dict[str, Any], operation: str = 'add') -> Dict[
result['updated'] = True
return result
else: # local
tags = _clean_existing_tags(payload.get('tags'))
tag = _clean_existing_tags(payload.get('tag'))
if operation == 'add':
new_tag = _normalize_tag(payload.get('new_tag'))
if not new_tag:
raise ValueError('new_tag is required')
added = new_tag not in tags
added = new_tag not in tag
if added:
tags.append(new_tag)
return {'tags': tags, 'added': added}
tag.append(new_tag)
return {'tag': tag, 'added': added}
else: # update
old_tag = _normalize_tag(payload.get('old_tag'))
@@ -1920,17 +2082,17 @@ def apply_tag_mutation(payload: Dict[str, Any], operation: str = 'add') -> Dict[
remaining = []
removed_count = 0
for tag in tags:
if tag == old_tag:
for item in tag:
if item == old_tag:
removed_count += 1
else:
remaining.append(tag)
remaining.append(item)
if new_tag and removed_count > 0:
remaining.extend([new_tag] * removed_count)
updated = removed_count > 0 or (bool(new_tag) and new_tag not in tags)
return {'tags': remaining, 'updated': updated, 'removed_count': removed_count}
updated = removed_count > 0 or (bool(new_tag) and new_tag not in tag)
return {'tag': remaining, 'updated': updated, 'removed_count': removed_count}
def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
@@ -2181,13 +2343,13 @@ def merge_multiple_tag_lists(
def read_tags_from_file(file_path: Path) -> List[str]:
"""Read and normalize tags from .tags sidecar file.
"""Read and normalize tags from .tag sidecar file.
This is the UNIFIED API for reading .tags files across all cmdlets.
This is the UNIFIED API for reading .tag files across all cmdlets.
Handles normalization, deduplication, and format validation.
Args:
file_path: Path to .tags sidecar file
file_path: Path to .tag sidecar file
Returns:
List of normalized tag strings
@@ -2196,7 +2358,7 @@ def read_tags_from_file(file_path: Path) -> List[str]:
FileNotFoundError: If file doesn't exist
Example:
>>> tags = read_tags_from_file(Path('file.txt.tags'))
>>> tags = read_tags_from_file(Path('file.txt.tag'))
>>> debug(tags)
['artist:Beatles', 'album:Abbey Road']
"""
@@ -2386,13 +2548,13 @@ def write_tags_to_file(
url: Optional[List[str]] = None,
append: bool = False
) -> bool:
"""Write tags to .tags sidecar file.
"""Write tags to .tag sidecar file.
This is the UNIFIED API for writing .tags files across all cmdlets.
This is the UNIFIED API for writing .tag files across all cmdlets.
Uses consistent format and handles file creation/overwriting.
Args:
file_path: Path to .tags file (will be created if doesn't exist)
file_path: Path to .tag file (will be created if doesn't exist)
tags: List of tags to write
source_hashes: Optional source file hashes (written as source:hash1,hash2)
url: Optional known url (each written on separate line as url:url)
@@ -2406,7 +2568,7 @@ def write_tags_to_file(
Example:
>>> tags = ['artist:Beatles', 'album:Abbey Road']
>>> write_tags_to_file(Path('file.txt.tags'), tags)
>>> write_tags_to_file(Path('file.txt.tag'), tags)
True
"""
file_path = Path(file_path)
@@ -2448,7 +2610,7 @@ def normalize_tags_from_source(
Universal function to normalize tags from different sources:
- yt-dlp entry dicts
- Raw tag lists
- .tags file content strings
- .tag file content strings
- Metadata dictionaries
Args:
@@ -2575,12 +2737,12 @@ def expand_metadata_tag(payload: Dict[str, Any]) -> Dict[str, Any]:
else:
data = fetch_musicbrainz_tags(request['id'], request['entity'])
except Exception as exc: # pragma: no cover - network/service errors
return {'tags': tags, 'error': str(exc)}
return {'tag': tags, 'error': str(exc)}
# Add tags from fetched data (no namespace, just unique append)
for tag in (data.get('tags') or []):
for tag in (data.get('tag') or []):
_append_unique(tags, seen, tag)
result = {
'tags': tags,
'tag': tags,
'source': request['source'],
'id': request['id'],
}
@@ -2597,7 +2759,7 @@ def build_remote_bundle(metadata: Optional[Dict[str, Any]], existing: Optional[S
_append_unique(tags, seen, tag)
# Add tags from various sources
for tag in (metadata.get("tags") or []):
for tag in (metadata.get("tag") or []):
_append_unique(tags, seen, tag)
for tag in (metadata.get("categories") or []):
_append_unique(tags, seen, tag)
@@ -2632,7 +2794,7 @@ def build_remote_bundle(metadata: Optional[Dict[str, Any]], existing: Optional[S
source_url = context.get("source_url") or metadata.get("original_url") or metadata.get("webpage_url") or metadata.get("url")
clean_title = value_normalize(str(title_value)) if title_value is not None else None
result = {
"tags": tags,
"tag": tags,
"title": clean_title,
"source_url": _sanitize_url(source_url),
"duration": _coerce_duration(metadata),
@@ -2747,9 +2909,9 @@ def hydrus_fetch_url(payload: Optional[str] = typer.Option(None, "--payload", he
debug(json.dumps(error_payload, ensure_ascii=False), flush=True)
raise typer.Exit(code=1)
@app.command(name="sync-sidecar", help="Synchronise .tags sidecar with supplied data")
@app.command(name="sync-sidecar", help="Synchronise .tag sidecar with supplied data")
def sync_sidecar_cmd(payload: Optional[str] = typer.Option(None, "--payload", help="JSON payload; reads stdin if omitted")):
"""Synchronise .tags sidecar with supplied data."""
"""Synchronise .tag sidecar with supplied data."""
try:
payload_data = _load_payload(payload)
result = sync_sidecar(payload_data)