This commit is contained in:
nose
2025-12-11 23:21:45 -08:00
parent 16d8a763cd
commit e2ffcab030
44 changed files with 3558 additions and 1793 deletions

View File

@@ -2,6 +2,7 @@
from __future__ import annotations
import base64
import http.client
import json
import os
import re
@@ -21,18 +22,11 @@ import httpx
logger = logging.getLogger(__name__)
try: # Optional metadata helper for audio files
import mutagen # type: ignore
except ImportError: # pragma: no cover - best effort
mutagen = None # type: ignore
from SYS.utils import (
decode_cbor,
jsonify,
ensure_directory,
sanitize_metadata_value,
unique_path,
unique_preserve_order,
)
from .HTTP import HTTPClient
@@ -70,7 +64,7 @@ class HydrusRequestSpec:
@dataclass(slots=True)
class HydrusClient:
class HydrusNetwork:
"""Thin wrapper around the Hydrus Client API."""
url: str
@@ -311,10 +305,10 @@ class HydrusClient:
spec = HydrusRequestSpec("POST", endpoint, data=data, file_path=file_path, content_type=content_type)
return cast(dict[str, Any], self._perform_request(spec))
def _ensure_hashes(self, hashes: Union[str, Iterable[str]]) -> list[str]:
if isinstance(hashes, str):
return [hashes]
return list(hashes)
def _ensure_hashes(self, hash: Union[str, Iterable[str]]) -> list[str]:
if isinstance(hash, str):
return [hash]
return list(hash)
def _append_access_key(self, url: str) -> str:
if not self.access_key:
@@ -330,12 +324,12 @@ class HydrusClient:
def add_file(self, file_path: Path) -> dict[str, Any]:
return self._post("/add_files/add_file", file_path=file_path)
def add_tags(self, file_hashes: Union[str, Iterable[str]], tags: Iterable[str], service_name: str) -> dict[str, Any]:
hashes = self._ensure_hashes(file_hashes)
body = {"hashes": hashes, "service_names_to_tags": {service_name: list(tags)}}
def add_tag(self, hash: Union[str, Iterable[str]], tags: Iterable[str], service_name: str) -> dict[str, Any]:
hash = self._ensure_hashes(hash)
body = {"hashes": hash, "service_names_to_tags": {service_name: list(tags)}}
return self._post("/add_tags/add_tags", data=body)
def delete_tags(
def delete_tag(
self,
file_hashes: Union[str, Iterable[str]],
tags: Iterable[str],
@@ -350,9 +344,9 @@ class HydrusClient:
}
return self._post("/add_tags/add_tags", data=body)
def add_tags_by_key(self, file_hashes: Union[str, Iterable[str]], tags: Iterable[str], service_key: str) -> dict[str, Any]:
hashes = self._ensure_hashes(file_hashes)
body = {"hashes": hashes, "service_keys_to_tags": {service_key: list(tags)}}
def add_tags_by_key(self, hash: Union[str, Iterable[str]], tags: Iterable[str], service_key: str) -> dict[str, Any]:
hash = self._ensure_hashes(hash)
body = {"hashes": hash, "service_keys_to_tags": {service_key: list(tags)}}
return self._post("/add_tags/add_tags", data=body)
def delete_tags_by_key(
@@ -727,199 +721,9 @@ def hydrus_request(args, parser) -> int:
else:
log(json.dumps({'value': json_ready}, ensure_ascii=False))
return 0 if 200 <= status < 400 else 1
def prepare_ffmpeg_metadata(payload: Optional[dict[str, Any]]) -> dict[str, str]:
if not isinstance(payload, dict):
return {}
metadata: dict[str, str] = {}
def set_field(key: str, raw: Any, limit: int = 2000) -> None:
sanitized = sanitize_metadata_value(raw)
if not sanitized:
return
if len(sanitized) > limit:
sanitized = sanitized[:limit]
metadata[key] = sanitized
set_field('title', payload.get('title'))
set_field('artist', payload.get('artist'), 512)
set_field('album', payload.get('album'), 512)
set_field('date', payload.get('year'), 20)
comment = payload.get('comment')
tags_value = payload.get('tags')
tag_strings: list[str] = []
artists_from_tags: list[str] = []
albums_from_tags: list[str] = []
genres_from_tags: list[str] = []
if isinstance(tags_value, list):
for raw_tag in tags_value:
if raw_tag is None:
continue
if not isinstance(raw_tag, str):
raw_tag = str(raw_tag)
tag = raw_tag.strip()
if not tag:
continue
tag_strings.append(tag)
namespace, sep, value = tag.partition(':')
if sep and value:
ns = namespace.strip().lower()
value = value.strip()
if ns in {'artist', 'creator', 'author', 'performer'}:
artists_from_tags.append(value)
elif ns in {'album', 'series', 'collection', 'group'}:
albums_from_tags.append(value)
elif ns in {'genre', 'rating'}:
genres_from_tags.append(value)
elif ns in {'comment', 'description'} and not comment:
comment = value
elif ns in {'year', 'date'} and not payload.get('year'):
set_field('date', value, 20)
else:
genres_from_tags.append(tag)
if 'artist' not in metadata and artists_from_tags:
set_field('artist', ', '.join(unique_preserve_order(artists_from_tags)[:3]), 512)
if 'album' not in metadata and albums_from_tags:
set_field('album', unique_preserve_order(albums_from_tags)[0], 512)
if genres_from_tags:
set_field('genre', ', '.join(unique_preserve_order(genres_from_tags)[:5]), 256)
if tag_strings:
joined_tags = ', '.join(tag_strings[:50])
set_field('keywords', joined_tags, 2000)
if not comment:
comment = joined_tags
if comment:
set_field('comment', comment, 2000)
set_field('description', comment, 2000)
return metadata
def apply_mutagen_metadata(path: Path, metadata: dict[str, str], fmt: str) -> None:
if fmt != 'audio':
return
if not metadata:
return
if mutagen is None:
return
try:
audio = mutagen.File(path, easy=True) # type: ignore[attr-defined]
except Exception as exc: # pragma: no cover - best effort only
log(f"mutagen load failed: {exc}", file=sys.stderr)
return
if audio is None:
return
field_map = {
'title': 'title',
'artist': 'artist',
'album': 'album',
'genre': 'genre',
'comment': 'comment',
'description': 'comment',
'date': 'date',
}
changed = False
for source_key, target_key in field_map.items():
value = metadata.get(source_key)
if not value:
continue
try:
audio[target_key] = [value]
changed = True
except Exception: # pragma: no cover - best effort only
continue
if not changed:
return
try:
audio.save()
except Exception as exc: # pragma: no cover - best effort only
log(f"mutagen save failed: {exc}", file=sys.stderr)
def build_ffmpeg_command(ffmpeg_path: str, input_path: Path, output_path: Path, fmt: str, max_width: int, metadata: Optional[dict[str, str]] = None) -> list[str]:
cmd = [ffmpeg_path, '-y', '-i', str(input_path)]
if fmt in {'mp4', 'webm'} and max_width and max_width > 0:
cmd.extend(['-vf', f"scale='min({max_width},iw)':-2"])
if metadata:
for key, value in metadata.items():
cmd.extend(['-metadata', f'{key}={value}'])
# Video formats
if fmt == 'mp4':
cmd.extend([
'-c:v', 'libx265',
'-preset', 'medium',
'-crf', '26',
'-tag:v', 'hvc1',
'-pix_fmt', 'yuv420p',
'-c:a', 'aac',
'-b:a', '192k',
'-movflags', '+faststart',
])
elif fmt == 'webm':
cmd.extend([
'-c:v', 'libvpx-vp9',
'-b:v', '0',
'-crf', '32',
'-c:a', 'libopus',
'-b:a', '160k',
])
cmd.extend(['-f', 'webm'])
# Audio formats
elif fmt == 'mp3':
cmd.extend([
'-vn',
'-c:a', 'libmp3lame',
'-b:a', '192k',
])
cmd.extend(['-f', 'mp3'])
elif fmt == 'flac':
cmd.extend([
'-vn',
'-c:a', 'flac',
])
cmd.extend(['-f', 'flac'])
elif fmt == 'wav':
cmd.extend([
'-vn',
'-c:a', 'pcm_s16le',
])
cmd.extend(['-f', 'wav'])
elif fmt == 'aac':
cmd.extend([
'-vn',
'-c:a', 'aac',
'-b:a', '192k',
])
cmd.extend(['-f', 'adts'])
elif fmt == 'm4a':
cmd.extend([
'-vn',
'-c:a', 'aac',
'-b:a', '192k',
])
cmd.extend(['-f', 'ipod'])
elif fmt == 'ogg':
cmd.extend([
'-vn',
'-c:a', 'libvorbis',
'-b:a', '192k',
])
cmd.extend(['-f', 'ogg'])
elif fmt == 'opus':
cmd.extend([
'-vn',
'-c:a', 'libopus',
'-b:a', '192k',
])
cmd.extend(['-f', 'opus'])
elif fmt == 'audio':
# Legacy format name for mp3
cmd.extend([
'-vn',
'-c:a', 'libmp3lame',
'-b:a', '192k',
])
cmd.extend(['-f', 'mp3'])
elif fmt != 'copy':
raise ValueError(f'Unsupported format: {fmt}')
cmd.append(str(output_path))
return cmd
def hydrus_export(args, _parser) -> int:
from metadata import apply_mutagen_metadata, build_ffmpeg_command, prepare_ffmpeg_metadata
output_path: Path = args.output
original_suffix = output_path.suffix
target_dir = output_path.parent
@@ -1064,7 +868,7 @@ def hydrus_export(args, _parser) -> int:
file_hash = getattr(args, 'file_hash', None) or _extract_hash(args.file_url)
if hydrus_url and file_hash:
try:
client = HydrusClient(url=hydrus_url, access_key=args.access_key, timeout=args.timeout)
client = HydrusNetwork(url=hydrus_url, access_key=args.access_key, timeout=args.timeout)
meta_response = client.fetch_file_metadata(hashes=[file_hash], include_mime=True)
entries = meta_response.get('metadata') if isinstance(meta_response, dict) else None
if isinstance(entries, list) and entries:
@@ -1387,7 +1191,7 @@ def is_hydrus_available(config: dict[str, Any]) -> bool:
return available
def get_client(config: dict[str, Any]) -> HydrusClient:
def get_client(config: dict[str, Any]) -> HydrusNetwork:
"""Create and return a Hydrus client with session key authentication.
Reuses cached client instance to preserve session keys across requests.
@@ -1440,7 +1244,7 @@ def get_client(config: dict[str, Any]) -> HydrusClient:
del _hydrus_client_cache[cache_key]
# Create new client
client = HydrusClient(hydrus_url, access_key, timeout)
client = HydrusNetwork(hydrus_url, access_key, timeout)
# Acquire session key for secure authentication
try:
@@ -1474,7 +1278,7 @@ def get_tag_service_name(config: dict[str, Any]) -> str:
return "my tags"
def get_tag_service_key(client: HydrusClient, fallback_name: str = "my tags") -> Optional[str]:
def get_tag_service_key(client: HydrusNetwork, fallback_name: str = "my tags") -> Optional[str]:
"""Get the service key for a named tag service.
Queries the Hydrus client's services and finds the service key matching

View File

@@ -3,7 +3,7 @@
This module provides:
- SQLite database management for local file metadata caching
- Library scanning and database initialization
- Sidecar file migration from old .tags/.metadata files to database
- Sidecar file migration from .tag/.metadata files to database
- Optimized search functionality using database indices
- Worker task tracking for background operations
"""
@@ -68,7 +68,7 @@ def read_sidecar(sidecar_path: Path) -> Tuple[Optional[str], List[str], List[str
Delegates to metadata._read_sidecar_metadata for centralized handling.
Args:
sidecar_path: Path to .tags sidecar file
sidecar_path: Path to .tag sidecar file
Returns:
Tuple of (hash_value, tags_list, url_list)
@@ -90,7 +90,7 @@ def write_sidecar(media_path: Path, tags: List[str], url: List[str],
Delegates to metadata.write_tags for centralized handling.
Args:
media_path: Path to the media file (sidecar created as media_path.tags)
media_path: Path to the media file (sidecar created as media_path.tag)
tags: List of tag strings
url: List of known URL strings
hash_value: Optional SHA256 hash to include
@@ -129,7 +129,7 @@ def find_sidecar(media_path: Path) -> Optional[Path]:
return None
try:
# Check for new format: filename.ext.tags
# Check for new format: filename.ext.tag
sidecar_path = _derive_sidecar_path(media_path)
if sidecar_path.exists():
return sidecar_path
@@ -1861,8 +1861,7 @@ class LocalLibraryInitializer:
sidecar_map: Dict[Path, Dict[str, List[Path]]] = {}
patterns = [
("*.tag", "tags"),
("*.tags", "tags"),
("*.tag", "tag"),
("*.metadata", "metadata"),
("*.notes", "notes"),
]
@@ -1877,14 +1876,14 @@ class LocalLibraryInitializer:
if not base.exists():
continue
bucket = sidecar_map.setdefault(base, {"tags": [], "metadata": [], "notes": []})
bucket = sidecar_map.setdefault(base, {"tag": [], "metadata": [], "notes": []})
bucket[key].append(sidecar)
return sidecar_map
def _read_tag_sidecars(self, sidecars: Dict[str, List[Path]]) -> List[str]:
tags: List[str] = []
for tag_path in sidecars.get("tags", []):
for tag_path in sidecars.get("tag", []):
try:
content = tag_path.read_text(encoding="utf-8")
except OSError:
@@ -1972,7 +1971,7 @@ class LocalLibraryInitializer:
def _rename_sidecars(self, old_base: Path, new_base: Path, sidecars: Dict[str, List[Path]]) -> None:
"""Rename sidecars to follow the new hashed filename."""
mappings = [
(sidecars.get("tags", []), ".tag"),
(sidecars.get("tag", []), ".tag"),
(sidecars.get("metadata", []), ".metadata"),
(sidecars.get("notes", []), ".notes"),
]
@@ -2006,7 +2005,7 @@ class LocalLibraryInitializer:
def _cleanup_orphaned_sidecars(self) -> None:
"""Remove sidecars for non-existent files."""
try:
patterns = ["*.tag", "*.tags", "*.metadata", "*.notes"]
patterns = ["*.tag", "*.metadata", "*.notes"]
for pattern in patterns:
for sidecar_path in self.library_root.rglob(pattern):
@@ -2022,13 +2021,13 @@ class LocalLibraryInitializer:
def migrate_tags_to_db(library_root: Path, db: API_folder_store) -> int:
"""Migrate .tags files to the database."""
"""Migrate .tag files to the database."""
migrated_count = 0
try:
for tags_file in library_root.rglob("*.tags"):
for tags_file in library_root.rglob("*.tag"):
try:
base_path = Path(str(tags_file)[:-len('.tags')])
base_path = tags_file.with_suffix("")
tags_text = tags_file.read_text(encoding='utf-8')
tags = [line.strip() for line in tags_text.splitlines() if line.strip()]
@@ -2043,7 +2042,7 @@ def migrate_tags_to_db(library_root: Path, db: API_folder_store) -> int:
except Exception as e:
logger.warning(f"Failed to migrate {tags_file}: {e}")
logger.info(f"Migrated {migrated_count} .tags files to database")
logger.info(f"Migrated {migrated_count} .tag files to database")
return migrated_count
except Exception as e:
logger.error(f"Error during tags migration: {e}", exc_info=True)