diff --git a/API/HydrusNetwork.py b/API/HydrusNetwork.py index 0e631e3..233e971 100644 --- a/API/HydrusNetwork.py +++ b/API/HydrusNetwork.py @@ -2,6 +2,7 @@ from __future__ import annotations import base64 +import http.client import json import os import re @@ -21,18 +22,11 @@ import httpx logger = logging.getLogger(__name__) -try: # Optional metadata helper for audio files - import mutagen # type: ignore -except ImportError: # pragma: no cover - best effort - mutagen = None # type: ignore - from SYS.utils import ( decode_cbor, jsonify, ensure_directory, - sanitize_metadata_value, unique_path, - unique_preserve_order, ) from .HTTP import HTTPClient @@ -70,7 +64,7 @@ class HydrusRequestSpec: @dataclass(slots=True) -class HydrusClient: +class HydrusNetwork: """Thin wrapper around the Hydrus Client API.""" url: str @@ -311,10 +305,10 @@ class HydrusClient: spec = HydrusRequestSpec("POST", endpoint, data=data, file_path=file_path, content_type=content_type) return cast(dict[str, Any], self._perform_request(spec)) - def _ensure_hashes(self, hashes: Union[str, Iterable[str]]) -> list[str]: - if isinstance(hashes, str): - return [hashes] - return list(hashes) + def _ensure_hashes(self, hash: Union[str, Iterable[str]]) -> list[str]: + if isinstance(hash, str): + return [hash] + return list(hash) def _append_access_key(self, url: str) -> str: if not self.access_key: @@ -330,12 +324,12 @@ class HydrusClient: def add_file(self, file_path: Path) -> dict[str, Any]: return self._post("/add_files/add_file", file_path=file_path) - def add_tags(self, file_hashes: Union[str, Iterable[str]], tags: Iterable[str], service_name: str) -> dict[str, Any]: - hashes = self._ensure_hashes(file_hashes) - body = {"hashes": hashes, "service_names_to_tags": {service_name: list(tags)}} + def add_tag(self, hash: Union[str, Iterable[str]], tags: Iterable[str], service_name: str) -> dict[str, Any]: + hash = self._ensure_hashes(hash) + body = {"hashes": hash, "service_names_to_tags": {service_name: list(tags)}} return self._post("/add_tags/add_tags", data=body) - def delete_tags( + def delete_tag( self, file_hashes: Union[str, Iterable[str]], tags: Iterable[str], @@ -350,9 +344,9 @@ class HydrusClient: } return self._post("/add_tags/add_tags", data=body) - def add_tags_by_key(self, file_hashes: Union[str, Iterable[str]], tags: Iterable[str], service_key: str) -> dict[str, Any]: - hashes = self._ensure_hashes(file_hashes) - body = {"hashes": hashes, "service_keys_to_tags": {service_key: list(tags)}} + def add_tags_by_key(self, hash: Union[str, Iterable[str]], tags: Iterable[str], service_key: str) -> dict[str, Any]: + hash = self._ensure_hashes(hash) + body = {"hashes": hash, "service_keys_to_tags": {service_key: list(tags)}} return self._post("/add_tags/add_tags", data=body) def delete_tags_by_key( @@ -727,199 +721,9 @@ def hydrus_request(args, parser) -> int: else: log(json.dumps({'value': json_ready}, ensure_ascii=False)) return 0 if 200 <= status < 400 else 1 -def prepare_ffmpeg_metadata(payload: Optional[dict[str, Any]]) -> dict[str, str]: - if not isinstance(payload, dict): - return {} - metadata: dict[str, str] = {} - def set_field(key: str, raw: Any, limit: int = 2000) -> None: - sanitized = sanitize_metadata_value(raw) - if not sanitized: - return - if len(sanitized) > limit: - sanitized = sanitized[:limit] - metadata[key] = sanitized - set_field('title', payload.get('title')) - set_field('artist', payload.get('artist'), 512) - set_field('album', payload.get('album'), 512) - set_field('date', payload.get('year'), 20) - comment = payload.get('comment') - tags_value = payload.get('tags') - tag_strings: list[str] = [] - artists_from_tags: list[str] = [] - albums_from_tags: list[str] = [] - genres_from_tags: list[str] = [] - if isinstance(tags_value, list): - for raw_tag in tags_value: - if raw_tag is None: - continue - if not isinstance(raw_tag, str): - raw_tag = str(raw_tag) - tag = raw_tag.strip() - if not tag: - continue - tag_strings.append(tag) - namespace, sep, value = tag.partition(':') - if sep and value: - ns = namespace.strip().lower() - value = value.strip() - if ns in {'artist', 'creator', 'author', 'performer'}: - artists_from_tags.append(value) - elif ns in {'album', 'series', 'collection', 'group'}: - albums_from_tags.append(value) - elif ns in {'genre', 'rating'}: - genres_from_tags.append(value) - elif ns in {'comment', 'description'} and not comment: - comment = value - elif ns in {'year', 'date'} and not payload.get('year'): - set_field('date', value, 20) - else: - genres_from_tags.append(tag) - if 'artist' not in metadata and artists_from_tags: - set_field('artist', ', '.join(unique_preserve_order(artists_from_tags)[:3]), 512) - if 'album' not in metadata and albums_from_tags: - set_field('album', unique_preserve_order(albums_from_tags)[0], 512) - if genres_from_tags: - set_field('genre', ', '.join(unique_preserve_order(genres_from_tags)[:5]), 256) - if tag_strings: - joined_tags = ', '.join(tag_strings[:50]) - set_field('keywords', joined_tags, 2000) - if not comment: - comment = joined_tags - if comment: - set_field('comment', comment, 2000) - set_field('description', comment, 2000) - return metadata -def apply_mutagen_metadata(path: Path, metadata: dict[str, str], fmt: str) -> None: - if fmt != 'audio': - return - if not metadata: - return - if mutagen is None: - return - try: - audio = mutagen.File(path, easy=True) # type: ignore[attr-defined] - except Exception as exc: # pragma: no cover - best effort only - log(f"mutagen load failed: {exc}", file=sys.stderr) - return - if audio is None: - return - field_map = { - 'title': 'title', - 'artist': 'artist', - 'album': 'album', - 'genre': 'genre', - 'comment': 'comment', - 'description': 'comment', - 'date': 'date', - } - changed = False - for source_key, target_key in field_map.items(): - value = metadata.get(source_key) - if not value: - continue - try: - audio[target_key] = [value] - changed = True - except Exception: # pragma: no cover - best effort only - continue - if not changed: - return - try: - audio.save() - except Exception as exc: # pragma: no cover - best effort only - log(f"mutagen save failed: {exc}", file=sys.stderr) -def build_ffmpeg_command(ffmpeg_path: str, input_path: Path, output_path: Path, fmt: str, max_width: int, metadata: Optional[dict[str, str]] = None) -> list[str]: - cmd = [ffmpeg_path, '-y', '-i', str(input_path)] - if fmt in {'mp4', 'webm'} and max_width and max_width > 0: - cmd.extend(['-vf', f"scale='min({max_width},iw)':-2"]) - if metadata: - for key, value in metadata.items(): - cmd.extend(['-metadata', f'{key}={value}']) - - # Video formats - if fmt == 'mp4': - cmd.extend([ - '-c:v', 'libx265', - '-preset', 'medium', - '-crf', '26', - '-tag:v', 'hvc1', - '-pix_fmt', 'yuv420p', - '-c:a', 'aac', - '-b:a', '192k', - '-movflags', '+faststart', - ]) - elif fmt == 'webm': - cmd.extend([ - '-c:v', 'libvpx-vp9', - '-b:v', '0', - '-crf', '32', - '-c:a', 'libopus', - '-b:a', '160k', - ]) - cmd.extend(['-f', 'webm']) - - # Audio formats - elif fmt == 'mp3': - cmd.extend([ - '-vn', - '-c:a', 'libmp3lame', - '-b:a', '192k', - ]) - cmd.extend(['-f', 'mp3']) - elif fmt == 'flac': - cmd.extend([ - '-vn', - '-c:a', 'flac', - ]) - cmd.extend(['-f', 'flac']) - elif fmt == 'wav': - cmd.extend([ - '-vn', - '-c:a', 'pcm_s16le', - ]) - cmd.extend(['-f', 'wav']) - elif fmt == 'aac': - cmd.extend([ - '-vn', - '-c:a', 'aac', - '-b:a', '192k', - ]) - cmd.extend(['-f', 'adts']) - elif fmt == 'm4a': - cmd.extend([ - '-vn', - '-c:a', 'aac', - '-b:a', '192k', - ]) - cmd.extend(['-f', 'ipod']) - elif fmt == 'ogg': - cmd.extend([ - '-vn', - '-c:a', 'libvorbis', - '-b:a', '192k', - ]) - cmd.extend(['-f', 'ogg']) - elif fmt == 'opus': - cmd.extend([ - '-vn', - '-c:a', 'libopus', - '-b:a', '192k', - ]) - cmd.extend(['-f', 'opus']) - elif fmt == 'audio': - # Legacy format name for mp3 - cmd.extend([ - '-vn', - '-c:a', 'libmp3lame', - '-b:a', '192k', - ]) - cmd.extend(['-f', 'mp3']) - elif fmt != 'copy': - raise ValueError(f'Unsupported format: {fmt}') - - cmd.append(str(output_path)) - return cmd def hydrus_export(args, _parser) -> int: + from metadata import apply_mutagen_metadata, build_ffmpeg_command, prepare_ffmpeg_metadata + output_path: Path = args.output original_suffix = output_path.suffix target_dir = output_path.parent @@ -1064,7 +868,7 @@ def hydrus_export(args, _parser) -> int: file_hash = getattr(args, 'file_hash', None) or _extract_hash(args.file_url) if hydrus_url and file_hash: try: - client = HydrusClient(url=hydrus_url, access_key=args.access_key, timeout=args.timeout) + client = HydrusNetwork(url=hydrus_url, access_key=args.access_key, timeout=args.timeout) meta_response = client.fetch_file_metadata(hashes=[file_hash], include_mime=True) entries = meta_response.get('metadata') if isinstance(meta_response, dict) else None if isinstance(entries, list) and entries: @@ -1387,7 +1191,7 @@ def is_hydrus_available(config: dict[str, Any]) -> bool: return available -def get_client(config: dict[str, Any]) -> HydrusClient: +def get_client(config: dict[str, Any]) -> HydrusNetwork: """Create and return a Hydrus client with session key authentication. Reuses cached client instance to preserve session keys across requests. @@ -1440,7 +1244,7 @@ def get_client(config: dict[str, Any]) -> HydrusClient: del _hydrus_client_cache[cache_key] # Create new client - client = HydrusClient(hydrus_url, access_key, timeout) + client = HydrusNetwork(hydrus_url, access_key, timeout) # Acquire session key for secure authentication try: @@ -1474,7 +1278,7 @@ def get_tag_service_name(config: dict[str, Any]) -> str: return "my tags" -def get_tag_service_key(client: HydrusClient, fallback_name: str = "my tags") -> Optional[str]: +def get_tag_service_key(client: HydrusNetwork, fallback_name: str = "my tags") -> Optional[str]: """Get the service key for a named tag service. Queries the Hydrus client's services and finds the service key matching diff --git a/API/folder.py b/API/folder.py index 34d1f7b..ed84b61 100644 --- a/API/folder.py +++ b/API/folder.py @@ -3,7 +3,7 @@ This module provides: - SQLite database management for local file metadata caching - Library scanning and database initialization -- Sidecar file migration from old .tags/.metadata files to database +- Sidecar file migration from .tag/.metadata files to database - Optimized search functionality using database indices - Worker task tracking for background operations """ @@ -68,7 +68,7 @@ def read_sidecar(sidecar_path: Path) -> Tuple[Optional[str], List[str], List[str Delegates to metadata._read_sidecar_metadata for centralized handling. Args: - sidecar_path: Path to .tags sidecar file + sidecar_path: Path to .tag sidecar file Returns: Tuple of (hash_value, tags_list, url_list) @@ -90,7 +90,7 @@ def write_sidecar(media_path: Path, tags: List[str], url: List[str], Delegates to metadata.write_tags for centralized handling. Args: - media_path: Path to the media file (sidecar created as media_path.tags) + media_path: Path to the media file (sidecar created as media_path.tag) tags: List of tag strings url: List of known URL strings hash_value: Optional SHA256 hash to include @@ -129,7 +129,7 @@ def find_sidecar(media_path: Path) -> Optional[Path]: return None try: - # Check for new format: filename.ext.tags + # Check for new format: filename.ext.tag sidecar_path = _derive_sidecar_path(media_path) if sidecar_path.exists(): return sidecar_path @@ -1861,8 +1861,7 @@ class LocalLibraryInitializer: sidecar_map: Dict[Path, Dict[str, List[Path]]] = {} patterns = [ - ("*.tag", "tags"), - ("*.tags", "tags"), + ("*.tag", "tag"), ("*.metadata", "metadata"), ("*.notes", "notes"), ] @@ -1877,14 +1876,14 @@ class LocalLibraryInitializer: if not base.exists(): continue - bucket = sidecar_map.setdefault(base, {"tags": [], "metadata": [], "notes": []}) + bucket = sidecar_map.setdefault(base, {"tag": [], "metadata": [], "notes": []}) bucket[key].append(sidecar) return sidecar_map def _read_tag_sidecars(self, sidecars: Dict[str, List[Path]]) -> List[str]: tags: List[str] = [] - for tag_path in sidecars.get("tags", []): + for tag_path in sidecars.get("tag", []): try: content = tag_path.read_text(encoding="utf-8") except OSError: @@ -1972,7 +1971,7 @@ class LocalLibraryInitializer: def _rename_sidecars(self, old_base: Path, new_base: Path, sidecars: Dict[str, List[Path]]) -> None: """Rename sidecars to follow the new hashed filename.""" mappings = [ - (sidecars.get("tags", []), ".tag"), + (sidecars.get("tag", []), ".tag"), (sidecars.get("metadata", []), ".metadata"), (sidecars.get("notes", []), ".notes"), ] @@ -2006,7 +2005,7 @@ class LocalLibraryInitializer: def _cleanup_orphaned_sidecars(self) -> None: """Remove sidecars for non-existent files.""" try: - patterns = ["*.tag", "*.tags", "*.metadata", "*.notes"] + patterns = ["*.tag", "*.metadata", "*.notes"] for pattern in patterns: for sidecar_path in self.library_root.rglob(pattern): @@ -2022,13 +2021,13 @@ class LocalLibraryInitializer: def migrate_tags_to_db(library_root: Path, db: API_folder_store) -> int: - """Migrate .tags files to the database.""" + """Migrate .tag files to the database.""" migrated_count = 0 try: - for tags_file in library_root.rglob("*.tags"): + for tags_file in library_root.rglob("*.tag"): try: - base_path = Path(str(tags_file)[:-len('.tags')]) + base_path = tags_file.with_suffix("") tags_text = tags_file.read_text(encoding='utf-8') tags = [line.strip() for line in tags_text.splitlines() if line.strip()] @@ -2043,7 +2042,7 @@ def migrate_tags_to_db(library_root: Path, db: API_folder_store) -> int: except Exception as e: logger.warning(f"Failed to migrate {tags_file}: {e}") - logger.info(f"Migrated {migrated_count} .tags files to database") + logger.info(f"Migrated {migrated_count} .tag files to database") return migrated_count except Exception as e: logger.error(f"Error during tags migration: {e}", exc_info=True) diff --git a/CLI.py b/CLI.py index 9f37bde..a6025ce 100644 --- a/CLI.py +++ b/CLI.py @@ -227,8 +227,8 @@ def _get_table_title_for_command(cmd_name: str, emitted_items: Optional[List[Any 'get_tag': 'Tags', 'get-file': 'Results', 'get_file': 'Results', - 'add-tag': 'Results', - 'add_tag': 'Results', + 'add-tags': 'Results', + 'add_tags': 'Results', 'delete-tag': 'Results', 'delete_tag': 'Results', 'add-url': 'Results', @@ -1362,12 +1362,20 @@ def _execute_pipeline(tokens: list): print(f"Auto-piping YouTube selection to .pipe") stages.append(['.pipe']) elif table_type == 'soulseek': - print(f"Auto-piping Soulseek selection to download-provider") - stages.append(['download-provider']) + print(f"Auto-piping Soulseek selection to download-file") + stages.append(['download-file']) elif source_cmd == 'search-file' and source_args and 'youtube' in source_args: # Legacy check print(f"Auto-piping YouTube selection to .pipe") stages.append(['.pipe']) + else: + # If the user is piping a provider selection into additional stages (e.g. add-file), + # automatically insert the appropriate download stage so @N is "logical". + # This prevents add-file from receiving an unreachable provider path like "share\...". + first_cmd = stages[0][0] if stages and stages[0] else None + if table_type == 'soulseek' and first_cmd not in ('download-file', 'download-media', 'download_media', '.pipe'): + print(f"Auto-inserting download-file after Soulseek selection") + stages.insert(0, ['download-file']) else: print(f"No items matched selection in pipeline\n") diff --git a/Provider/_base.py b/Provider/_base.py index 9541121..1803df5 100644 --- a/Provider/_base.py +++ b/Provider/_base.py @@ -18,7 +18,7 @@ class SearchResult: annotations: List[str] = field(default_factory=list) # Tags: ["120MB", "flac", "ready"] media_kind: str = "other" # Type: "book", "audio", "video", "game", "magnet" size_bytes: Optional[int] = None - tags: set[str] = field(default_factory=set) # Searchable tags + tag: set[str] = field(default_factory=set) # Searchable tag values columns: List[Tuple[str, str]] = field(default_factory=list) # Display columns full_metadata: Dict[str, Any] = field(default_factory=dict) # Extra metadata @@ -33,7 +33,7 @@ class SearchResult: "annotations": self.annotations, "media_kind": self.media_kind, "size_bytes": self.size_bytes, - "tags": list(self.tags), + "tag": list(self.tag), "columns": list(self.columns), "full_metadata": self.full_metadata, } diff --git a/Provider/soulseek.py b/Provider/soulseek.py index 92933da..a757b62 100644 --- a/Provider/soulseek.py +++ b/Provider/soulseek.py @@ -1,6 +1,10 @@ from __future__ import annotations import asyncio +import contextlib +import io +import logging +import os import re import sys import time @@ -11,6 +15,143 @@ from Provider._base import SearchProvider, SearchResult from SYS.logger import log, debug +_SOULSEEK_NOISE_SUBSTRINGS = ( + "search reply ticket does not match any search request", + "failed to receive transfer ticket on file connection", + "aioslsk.exceptions.ConnectionReadError", +) + + +def _configure_aioslsk_logging() -> None: + """Reduce aioslsk internal log noise. + + Some aioslsk components emit non-fatal warnings/errors during high churn + (search + download + disconnect). We keep our own debug output, but push + aioslsk to ERROR and stop propagation so it doesn't spam the CLI. + """ + for name in ( + "aioslsk", + "aioslsk.network", + "aioslsk.search", + "aioslsk.transfer", + "aioslsk.transfer.manager", + ): + logger = logging.getLogger(name) + logger.setLevel(logging.ERROR) + logger.propagate = False + + +class _LineFilterStream(io.TextIOBase): + """A minimal stream wrapper that filters known noisy lines. + + It also suppresses entire traceback blocks when they contain known non-fatal + aioslsk noise (e.g. ConnectionReadError during peer init). + """ + + def __init__(self, underlying: Any, suppress_substrings: tuple[str, ...]): + super().__init__() + self._underlying = underlying + self._suppress = suppress_substrings + self._buf = "" + self._in_tb = False + self._tb_lines: list[str] = [] + self._tb_suppress = False + + def writable(self) -> bool: # pragma: no cover + return True + + def _should_suppress_line(self, line: str) -> bool: + return any(sub in line for sub in self._suppress) + + def _flush_tb(self) -> None: + if not self._tb_lines: + return + if not self._tb_suppress: + for l in self._tb_lines: + try: + self._underlying.write(l + "\n") + except Exception: + pass + self._tb_lines = [] + self._tb_suppress = False + self._in_tb = False + + def write(self, s: str) -> int: + self._buf += str(s) + while "\n" in self._buf: + line, self._buf = self._buf.split("\n", 1) + self._handle_line(line) + return len(s) + + def _handle_line(self, line: str) -> None: + # Start capturing tracebacks so we can suppress the whole block if it matches. + if not self._in_tb and line.startswith("Traceback (most recent call last):"): + self._in_tb = True + self._tb_lines = [line] + self._tb_suppress = False + return + + if self._in_tb: + self._tb_lines.append(line) + if self._should_suppress_line(line): + self._tb_suppress = True + # End traceback block on blank line. + if line.strip() == "": + self._flush_tb() + return + + # Non-traceback line + if self._should_suppress_line(line): + return + try: + self._underlying.write(line + "\n") + except Exception: + pass + + def flush(self) -> None: + # Flush any pending traceback block. + if self._in_tb: + # If the traceback ends without a trailing blank line, decide here. + self._flush_tb() + if self._buf: + line = self._buf + self._buf = "" + if not self._should_suppress_line(line): + try: + self._underlying.write(line) + except Exception: + pass + try: + self._underlying.flush() + except Exception: + pass + + +@contextlib.contextmanager +def _suppress_aioslsk_noise() -> Any: + """Temporarily suppress known aioslsk noise printed to stdout/stderr. + + Opt out by setting DOWNLOW_SOULSEEK_VERBOSE=1. + """ + if os.environ.get("DOWNLOW_SOULSEEK_VERBOSE"): + yield + return + + _configure_aioslsk_logging() + old_out, old_err = sys.stdout, sys.stderr + sys.stdout = _LineFilterStream(old_out, _SOULSEEK_NOISE_SUBSTRINGS) + sys.stderr = _LineFilterStream(old_err, _SOULSEEK_NOISE_SUBSTRINGS) + try: + yield + finally: + try: + sys.stdout.flush() + sys.stderr.flush() + except Exception: + pass + sys.stdout, sys.stderr = old_out, old_err + + class Soulseek(SearchProvider): """Search provider for Soulseek P2P network.""" @@ -90,7 +231,6 @@ class Soulseek(SearchProvider): async def perform_search(self, query: str, timeout: float = 9.0, limit: int = 50) -> List[Dict[str, Any]]: """Perform async Soulseek search.""" - import os from aioslsk.client import SoulSeekClient from aioslsk.settings import CredentialsSettings, Settings @@ -99,25 +239,37 @@ class Soulseek(SearchProvider): settings = Settings(credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD)) client = SoulSeekClient(settings) - try: - await client.start() - await client.login() - except Exception as exc: - log(f"[soulseek] Login failed: {type(exc).__name__}: {exc}", file=sys.stderr) - return [] - - try: - search_request = await client.searches.search(query) - await self._collect_results(search_request, timeout=timeout) - return self._flatten_results(search_request)[:limit] - except Exception as exc: - log(f"[soulseek] Search error: {type(exc).__name__}: {exc}", file=sys.stderr) - return [] - finally: + with _suppress_aioslsk_noise(): try: - await client.stop() - except Exception: - pass + await client.start() + await client.login() + except Exception as exc: + log(f"[soulseek] Login failed: {type(exc).__name__}: {exc}", file=sys.stderr) + return [] + + try: + search_request = await client.searches.search(query) + await self._collect_results(search_request, timeout=timeout) + return self._flatten_results(search_request)[:limit] + except Exception as exc: + log(f"[soulseek] Search error: {type(exc).__name__}: {exc}", file=sys.stderr) + return [] + finally: + # Best-effort: try to cancel/close the search request before stopping + # the client to reduce stray reply spam. + try: + if "search_request" in locals() and search_request is not None: + cancel = getattr(search_request, "cancel", None) + if callable(cancel): + maybe = cancel() + if asyncio.iscoroutine(maybe): + await maybe + except Exception: + pass + try: + await client.stop() + except Exception: + pass def _flatten_results(self, search_request: Any) -> List[dict]: flat: List[dict] = [] @@ -322,55 +474,56 @@ async def download_soulseek_file( settings = Settings(credentials=CredentialsSettings(username=Soulseek.USERNAME, password=Soulseek.PASSWORD)) client = SoulSeekClient(settings) - try: - await client.start() - await client.login() - debug(f"[soulseek] Logged in as {Soulseek.USERNAME}") + with _suppress_aioslsk_noise(): + try: + await client.start() + await client.login() + debug(f"[soulseek] Logged in as {Soulseek.USERNAME}") - debug(f"[soulseek] Requesting download from {username}: {filename}") + debug(f"[soulseek] Requesting download from {username}: {filename}") - transfer = await client.transfers.add(Transfer(username, filename, TransferDirection.DOWNLOAD)) - transfer.local_path = str(output_path) - await client.transfers.queue(transfer) + transfer = await client.transfers.add(Transfer(username, filename, TransferDirection.DOWNLOAD)) + transfer.local_path = str(output_path) + await client.transfers.queue(transfer) - start_time = time.time() - last_log_time = 0.0 - while not transfer.is_finalized(): - if time.time() - start_time > timeout: - log(f"[soulseek] Download timeout after {timeout}s", file=sys.stderr) + start_time = time.time() + last_log_time = 0.0 + while not transfer.is_finalized(): + if time.time() - start_time > timeout: + log(f"[soulseek] Download timeout after {timeout}s", file=sys.stderr) + return None + + if time.time() - last_log_time >= 5.0 and transfer.bytes_transfered > 0: + progress = (transfer.bytes_transfered / transfer.filesize * 100) if transfer.filesize else 0 + debug( + f"[soulseek] Progress: {progress:.1f}% " + f"({transfer.bytes_transfered}/{transfer.filesize})" + ) + last_log_time = time.time() + + await asyncio.sleep(1) + + if transfer.state.VALUE == TransferState.COMPLETE and transfer.local_path: + downloaded_path = Path(transfer.local_path) + if downloaded_path.exists(): + debug(f"[soulseek] Download complete: {downloaded_path}") + return downloaded_path + + log(f"[soulseek] Transfer completed but file missing: {downloaded_path}", file=sys.stderr) return None - if time.time() - last_log_time >= 5.0 and transfer.bytes_transfered > 0: - progress = (transfer.bytes_transfered / transfer.filesize * 100) if transfer.filesize else 0 - debug( - f"[soulseek] Progress: {progress:.1f}% " - f"({transfer.bytes_transfered}/{transfer.filesize})" - ) - last_log_time = time.time() - - await asyncio.sleep(1) - - if transfer.state.VALUE == TransferState.COMPLETE and transfer.local_path: - downloaded_path = Path(transfer.local_path) - if downloaded_path.exists(): - debug(f"[soulseek] Download complete: {downloaded_path}") - return downloaded_path - - log(f"[soulseek] Transfer completed but file missing: {downloaded_path}", file=sys.stderr) + log( + f"[soulseek] Download failed: state={transfer.state.VALUE} " + f"bytes={transfer.bytes_transfered}/{transfer.filesize}", + file=sys.stderr, + ) return None - log( - f"[soulseek] Download failed: state={transfer.state.VALUE} " - f"bytes={transfer.bytes_transfered}/{transfer.filesize}", - file=sys.stderr, - ) - return None - - finally: - try: - await client.stop() - except Exception: - pass + finally: + try: + await client.stop() + except Exception: + pass except ImportError: log("[soulseek] aioslsk not installed. Install with: pip install aioslsk", file=sys.stderr) diff --git a/SYS/utils.py b/SYS/utils.py index b676afe..17db8d3 100644 --- a/SYS/utils.py +++ b/SYS/utils.py @@ -111,7 +111,7 @@ def create_metadata_sidecar(file_path: Path, metadata: dict) -> None: raise RuntimeError(f"Failed to write metadata sidecar {metadata_path}: {exc}") from exc def create_tags_sidecar(file_path: Path, tags: set) -> None: - """Create a .tags sidecar file with tags (one per line). + """Create a .tag sidecar file with tags (one per line). Args: file_path: Path to the exported file @@ -120,7 +120,7 @@ def create_tags_sidecar(file_path: Path, tags: set) -> None: if not tags: return - tags_path = file_path.with_suffix(file_path.suffix + '.tags') + tags_path = file_path.with_suffix(file_path.suffix + '.tag') try: with open(tags_path, 'w', encoding='utf-8') as f: for tag in sorted(tags): diff --git a/Store/Folder.py b/Store/Folder.py index 3a09376..723723c 100644 --- a/Store/Folder.py +++ b/Store/Folder.py @@ -11,7 +11,7 @@ from typing import Any, Dict, List, Optional, Tuple from SYS.logger import debug, log from SYS.utils import sha256_file -from Store._base import StoreBackend +from Store._base import Store def _normalize_hash(value: Any) -> Optional[str]: @@ -30,7 +30,7 @@ def _resolve_file_hash(db_hash: Optional[str], file_path: Path) -> Optional[str] return _normalize_hash(file_path.stem) -class Folder(StoreBackend): +class Folder(Store): """""" # Track which locations have already been migrated to avoid repeated migrations _migrated_locations = set() @@ -243,7 +243,7 @@ class Folder(StoreBackend): Args: file_path: Path to the file to add move: If True, move file instead of copy (default: False) - tags: Optional list of tags to add + tag: Optional list of tag values to add url: Optional list of url to associate with the file title: Optional title (will be added as 'title:value' tag) @@ -251,15 +251,15 @@ class Folder(StoreBackend): File hash (SHA256 hex string) as identifier """ move_file = bool(kwargs.get("move")) - tags = kwargs.get("tags", []) + tag_list = kwargs.get("tag", []) url = kwargs.get("url", []) title = kwargs.get("title") # Extract title from tags if not explicitly provided if not title: - for tag in tags: - if isinstance(tag, str) and tag.lower().startswith("title:"): - title = tag.split(":", 1)[1].strip() + for candidate in tag_list: + if isinstance(candidate, str) and candidate.lower().startswith("title:"): + title = candidate.split(":", 1)[1].strip() break # Fallback to filename if no title @@ -268,8 +268,8 @@ class Folder(StoreBackend): # Ensure title is in tags title_tag = f"title:{title}" - if not any(str(tag).lower().startswith("title:") for tag in tags): - tags = [title_tag] + list(tags) + if not any(str(candidate).lower().startswith("title:") for candidate in tag_list): + tag_list = [title_tag] + list(tag_list) try: file_hash = sha256_file(file_path) @@ -290,8 +290,8 @@ class Folder(StoreBackend): file=sys.stderr, ) # Still add tags and url if provided - if tags: - self.add_tag(file_hash, tags) + if tag_list: + self.add_tag(file_hash, tag_list) if url: self.add_url(file_hash, url) return file_hash @@ -316,8 +316,8 @@ class Folder(StoreBackend): }) # Add tags if provided - if tags: - self.add_tag(file_hash, tags) + if tag_list: + self.add_tag(file_hash, tag_list) # Add url if provided if url: @@ -330,7 +330,7 @@ class Folder(StoreBackend): log(f"❌ Local storage failed: {exc}", file=sys.stderr) raise - def search_store(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: + def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: """Search local database for files by title tag or filename.""" from fnmatch import fnmatch from API.folder import DatabaseAPI @@ -685,9 +685,6 @@ class Folder(StoreBackend): log(f"❌ Local search failed: {exc}", file=sys.stderr) raise - def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: - """Alias for search_file to match the interface expected by FileStorage.""" - return self.search_store(query, **kwargs) def _resolve_library_root(self, file_path: Path, config: Dict[str, Any]) -> Optional[Path]: """Return the library root containing medios-macina.db. diff --git a/Store/HydrusNetwork.py b/Store/HydrusNetwork.py index 2f47694..8d5e3ac 100644 --- a/Store/HydrusNetwork.py +++ b/Store/HydrusNetwork.py @@ -8,10 +8,10 @@ from typing import Any, Dict, List, Optional, Tuple from SYS.logger import debug, log from SYS.utils_constant import mime_maps -from Store._base import StoreBackend +from Store._base import Store -class HydrusNetwork(StoreBackend): +class HydrusNetwork(Store): """File storage backend for Hydrus client. Each instance represents a specific Hydrus client connection. @@ -26,7 +26,7 @@ class HydrusNetwork(StoreBackend): api_key: Hydrus Client API access key url: Hydrus client URL (e.g., 'http://192.168.1.230:45869') """ - from API.HydrusNetwork import HydrusClient + from API.HydrusNetwork import HydrusNetwork as HydrusClient self._instance_name = instance_name self._api_key = api_key @@ -45,7 +45,7 @@ class HydrusNetwork(StoreBackend): Args: file_path: Path to the file to upload - tags: Optional list of tags to add + tag: Optional list of tag values to add url: Optional list of url to associate with the file title: Optional title (will be added as 'title:value' tag) @@ -57,15 +57,15 @@ class HydrusNetwork(StoreBackend): """ from SYS.utils import sha256_file - tags = kwargs.get("tags", []) + tag_list = kwargs.get("tag", []) url = kwargs.get("url", []) title = kwargs.get("title") # Add title to tags if provided and not already present if title: title_tag = f"title:{title}" - if not any(str(tag).lower().startswith("title:") for tag in tags): - tags = [title_tag] + list(tags) + if not any(str(candidate).lower().startswith("title:") for candidate in tag_list): + tag_list = [title_tag] + list(tag_list) try: # Compute file hash @@ -113,7 +113,7 @@ class HydrusNetwork(StoreBackend): log(f"Hydrus: {file_hash}", file=sys.stderr) # Add tags if provided (both for new and existing files) - if tags: + if tag_list: try: # Use default tag service service_name = "my tags" @@ -121,8 +121,8 @@ class HydrusNetwork(StoreBackend): service_name = "my tags" try: - debug(f"Adding {len(tags)} tag(s) to Hydrus: {tags}") - client.add_tags(file_hash, tags, service_name) + debug(f"Adding {len(tag_list)} tag(s) to Hydrus: {tag_list}") + client.add_tag(file_hash, tag_list, service_name) log(f"Tags added via '{service_name}'", file=sys.stderr) except Exception as exc: log(f"⚠️ Failed to add tags: {exc}", file=sys.stderr) @@ -144,7 +144,7 @@ class HydrusNetwork(StoreBackend): log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr) raise - def search_store(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: + def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: """Search Hydrus database for files matching query. Args: @@ -290,7 +290,7 @@ class HydrusNetwork(StoreBackend): "size": size, "size_bytes": size, "store": self._instance_name, - "tags": all_tags, + "tag": all_tags, "file_id": file_id, "mime": mime_type, "ext": ext, @@ -323,7 +323,7 @@ class HydrusNetwork(StoreBackend): "size": size, "size_bytes": size, "store": self._instance_name, - "tags": all_tags, + "tag": all_tags, "file_id": file_id, "mime": mime_type, "ext": ext, @@ -488,7 +488,7 @@ class HydrusNetwork(StoreBackend): tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] if not tag_list: return False - client.add_tags(file_identifier, tag_list, service_name) + client.add_tag(file_identifier, tag_list, service_name) return True except Exception as exc: debug(f"Hydrus add_tag failed: {exc}") @@ -506,7 +506,7 @@ class HydrusNetwork(StoreBackend): tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] if not tag_list: return False - client.delete_tags(file_identifier, tag_list, service_name) + client.delete_tag(file_identifier, tag_list, service_name) return True except Exception as exc: debug(f"Hydrus delete_tag failed: {exc}") diff --git a/Store/__init__.py b/Store/__init__.py index 8f9438a..77b76f9 100644 --- a/Store/__init__.py +++ b/Store/__init__.py @@ -1,7 +1,7 @@ -from Store._base import StoreBackend +from Store._base import Store as BaseStore from Store.registry import Store __all__ = [ - "StoreBackend", "Store", + "BaseStore", ] diff --git a/Store/_base.py b/Store/_base.py index 6b8bcf0..9db16fb 100644 --- a/Store/_base.py +++ b/Store/_base.py @@ -10,7 +10,7 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Tuple -class StoreBackend(ABC): +class Store(ABC): @abstractmethod def add_file(self, file_path: Path, **kwargs: Any) -> str: raise NotImplementedError @@ -19,7 +19,7 @@ class StoreBackend(ABC): def name(self) -> str: raise NotImplementedError - def search_store(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: + def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: raise NotImplementedError(f"{self.name()} backend does not support searching") @abstractmethod diff --git a/Store/registry.py b/Store/registry.py index f6ff96b..e1e6771 100644 --- a/Store/registry.py +++ b/Store/registry.py @@ -25,7 +25,7 @@ from typing import Any, Dict, Optional from SYS.logger import debug -from Store._base import StoreBackend +from Store._base import Store as BaseStore from Store.Folder import Folder from Store.HydrusNetwork import HydrusNetwork @@ -34,7 +34,7 @@ class Store: def __init__(self, config: Optional[Dict[str, Any]] = None, suppress_debug: bool = False) -> None: self._config = config or {} self._suppress_debug = suppress_debug - self._backends: Dict[str, StoreBackend] = {} + self._backends: Dict[str, BaseStore] = {} self._load_backends() def _load_backends(self) -> None: @@ -86,11 +86,11 @@ class Store: def list_searchable_backends(self) -> list[str]: searchable: list[str] = [] for name, backend in self._backends.items(): - if type(backend).search_store is not StoreBackend.search_store: + if type(backend).search is not BaseStore.search: searchable.append(name) return sorted(searchable) - def __getitem__(self, backend_name: str) -> StoreBackend: + def __getitem__(self, backend_name: str) -> BaseStore: if backend_name not in self._backends: raise KeyError(f"Unknown store backend: {backend_name}. Available: {list(self._backends.keys())}") return self._backends[backend_name] diff --git a/TUI/menu_actions.py b/TUI/menu_actions.py index a6aff1d..c58c101 100644 --- a/TUI/menu_actions.py +++ b/TUI/menu_actions.py @@ -30,12 +30,12 @@ PIPELINE_PRESETS: List[PipelinePreset] = [ PipelinePreset( label="Download → Merge → Local", description="Use download-data with playlist auto-selection, merge the pieces, tag, then import into local storage.", - pipeline='download-data "" | merge-file | add-tag | add-file -storage local', + pipeline='download-data "" | merge-file | add-tags -store local | add-file -storage local', ), PipelinePreset( label="Download → Hydrus", description="Fetch media, auto-tag, and push directly into Hydrus.", - pipeline='download-data "" | merge-file | add-tag | add-file -storage hydrus', + pipeline='download-data "" | merge-file | add-tags -store hydrus | add-file -storage hydrus', ), PipelinePreset( label="Search Local Library", diff --git a/TUI/modalscreen/download.py b/TUI/modalscreen/download.py index 181b68f..2fa824d 100644 --- a/TUI/modalscreen/download.py +++ b/TUI/modalscreen/download.py @@ -781,9 +781,9 @@ class DownloadModal(ModalScreen): # Stage 3: Add tags (now after merge, if merge happened) # If merge succeeded, result_obj now points to merged file if tags and (download_succeeded or not download_enabled): - add_tags_cmdlet = get_cmdlet("add-tag") + add_tags_cmdlet = get_cmdlet("add-tags") if add_tags_cmdlet: - logger.info(f"Executing add-tag stage with {len(tags)} tags") + logger.info(f"Executing add-tags stage with {len(tags)} tags") logger.info(f" Tags: {tags}") logger.info(f" Source: {source}") logger.info(f" Result path: {result_obj.path}") @@ -791,10 +791,10 @@ class DownloadModal(ModalScreen): # Log step to worker if worker: - worker.log_step(f"Starting add-tag stage with {len(tags)} tags...") + worker.log_step(f"Starting add-tags stage with {len(tags)} tags...") - # Build add-tag arguments: tag1 tag2 tag3 --source - tag_args = [str(t) for t in tags] + ["--source", str(source)] + # Build add-tags arguments. add-tags requires a store; for downloads, default to local sidecar tagging. + tag_args = ["-store", "local"] + [str(t) for t in tags] + ["--source", str(source)] logger.info(f" Tag args: {tag_args}") logger.info(f" Result object attributes: target={getattr(result_obj, 'target', 'MISSING')}, path={getattr(result_obj, 'path', 'MISSING')}, hash_hex={getattr(result_obj, 'hash_hex', 'MISSING')}") @@ -814,12 +814,12 @@ class DownloadModal(ModalScreen): # Log the tag output so it gets captured by WorkerLoggingHandler if stdout_text: - logger.info(f"[add-tag output]\n{stdout_text}") + logger.info(f"[add-tags output]\n{stdout_text}") if stderr_text: - logger.info(f"[add-tag stderr]\n{stderr_text}") + logger.info(f"[add-tags stderr]\n{stderr_text}") if returncode != 0: - logger.error(f"add-tag stage failed with code {returncode}") + logger.error(f"add-tags stage failed with code {returncode}") logger.error(f" stdout: {stdout_text}") logger.error(f" stderr: {stderr_text}") self.app.call_from_thread( @@ -833,16 +833,16 @@ class DownloadModal(ModalScreen): return else: if stdout_text: - logger.debug(f"add-tag stdout: {stdout_text}") + logger.debug(f"add-tags stdout: {stdout_text}") if stderr_text: - logger.debug(f"add-tag stderr: {stderr_text}") - logger.info("add-tag stage completed successfully") + logger.debug(f"add-tags stderr: {stderr_text}") + logger.info("add-tags stage completed successfully") # Log step to worker if worker: worker.log_step(f"Successfully added {len(tags)} tags") except Exception as e: - logger.error(f"add-tag execution error: {e}", exc_info=True) + logger.error(f"add-tags execution error: {e}", exc_info=True) self.app.call_from_thread( self.app.notify, f"Error adding tags: {e}", @@ -852,10 +852,10 @@ class DownloadModal(ModalScreen): self.app.call_from_thread(self._hide_progress) return else: - logger.error("add-tag cmdlet not found") + logger.error("add-tags cmdlet not found") else: if tags and download_enabled and not download_succeeded: - skip_msg = "⚠️ Skipping add-tag stage because download failed" + skip_msg = "⚠️ Skipping add-tags stage because download failed" logger.info(skip_msg) if worker: worker.append_stdout(f"\n{skip_msg}\n") @@ -1249,8 +1249,9 @@ class DownloadModal(ModalScreen): stdout_buf = io.StringIO() stderr_buf = io.StringIO() + tag_args = ["-store", "local"] + [str(t) for t in tags] with redirect_stdout(stdout_buf), redirect_stderr(stderr_buf): - tag_returncode = tag_cmdlet(result_obj, tags, self.config) + tag_returncode = tag_cmdlet(result_obj, tag_args, self.config) if tag_returncode != 0: logger.warning(f"Tag stage returned code {tag_returncode}") diff --git a/TUI/modalscreen/export.py b/TUI/modalscreen/export.py index fcd0ebe..abe9c49 100644 --- a/TUI/modalscreen/export.py +++ b/TUI/modalscreen/export.py @@ -171,14 +171,14 @@ class ExportModal(ModalScreen): with Container(id="export-container"): yield Static("Export File with Metadata", id="export-title") - # Row 1: Three columns (Tags, Metadata, Export-To Options) + # Row 1: Three columns (Tag, Metadata, Export-To Options) self.tags_textarea = TextArea( text=self._format_tags(), id="tags-area", read_only=False, ) yield self.tags_textarea - self.tags_textarea.border_title = "Tags" + self.tags_textarea.border_title = "Tag" # Metadata display instead of files tree self.metadata_display = Static( diff --git a/TUI/tui.py b/TUI/tui.py index 59fef72..60acda8 100644 --- a/TUI/tui.py +++ b/TUI/tui.py @@ -83,7 +83,7 @@ class PipelineHubApp(App): with Container(id="app-shell"): with Horizontal(id="command-pane"): self.command_input = Input( - placeholder='download-data "" | merge-file | add-tag | add-file -storage local', + placeholder='download-data "" | merge-file | add-tags -store local | add-file -storage local', id="pipeline-input", ) yield self.command_input diff --git a/cmdlets/__init__.py b/cmdlets/__init__.py index 551f1a5..400f037 100644 --- a/cmdlets/__init__.py +++ b/cmdlets/__init__.py @@ -14,7 +14,7 @@ def register(names: Iterable[str]): """Decorator to register a function under one or more command names. Usage: - @register(["add-tag", "add-tags"]) + @register(["add-tags"]) def _run(result, args, config) -> int: ... """ def _wrap(fn: Cmdlet) -> Cmdlet: diff --git a/cmdlets/_shared.py b/cmdlets/_shared.py index 3efbc5f..896ccfe 100644 --- a/cmdlets/_shared.py +++ b/cmdlets/_shared.py @@ -1,7 +1,4 @@ -"""Shared utilities for cmdlets and funacts. - -This module provides common utility functions for working with hashes, tags, -relationship data, and other frequently-needed operations. +""" """ from __future__ import annotations @@ -192,7 +189,7 @@ class SharedArgs: DELETE_FLAG = CmdletArg( "delete", type="flag", - description="Delete the file and its .tags after successful operation." + description="Delete the file and its .tag after successful operation." ) # Metadata arguments @@ -1092,7 +1089,7 @@ def create_pipe_object_result( hash_value: Optional[str] = None, is_temp: bool = False, parent_hash: Optional[str] = None, - tags: Optional[List[str]] = None, + tag: Optional[List[str]] = None, **extra: Any ) -> Dict[str, Any]: """Create a PipeObject-compatible result dict for pipeline chaining. @@ -1109,7 +1106,7 @@ def create_pipe_object_result( hash_value: SHA-256 hash of file (for integrity) is_temp: If True, this is a temporary/intermediate artifact parent_hash: Hash of the parent file in the chain (for provenance) - tags: List of tags to apply + tag: List of tag values to apply **extra: Additional fields Returns: @@ -1130,8 +1127,8 @@ def create_pipe_object_result( result['is_temp'] = True if parent_hash: result['parent_hash'] = parent_hash - if tags: - result['tags'] = tags + if tag: + result['tag'] = tag # Canonical store field: use source for compatibility try: @@ -1350,33 +1347,46 @@ def collapse_namespace_tags(tags: Optional[Iterable[Any]], namespace: str, prefe return result -def extract_tags_from_result(result: Any) -> list[str]: - tags: list[str] = [] - if isinstance(result, models.PipeObject): - tags.extend(result.tags or []) - tags.extend(result.extra.get('tags', [])) - elif hasattr(result, 'tags'): - # Handle objects with tags attribute (e.g. SearchResult) - val = getattr(result, 'tags') - if isinstance(val, (list, set, tuple)): - tags.extend(val) - elif isinstance(val, str): - tags.append(val) - - if isinstance(result, dict): - raw_tags = result.get('tags') - if isinstance(raw_tags, list): - tags.extend(raw_tags) - elif isinstance(raw_tags, str): - tags.append(raw_tags) - extra = result.get('extra') - if isinstance(extra, dict): - extra_tags = extra.get('tags') - if isinstance(extra_tags, list): - tags.extend(extra_tags) - elif isinstance(extra_tags, str): - tags.append(extra_tags) - return merge_sequences(tags, case_sensitive=True) +def collapse_namespace_tag(tags: Optional[Iterable[Any]], namespace: str, prefer: str = "last") -> list[str]: + """Singular alias for collapse_namespace_tags. + + Some cmdlets prefer the singular name; keep behavior centralized. + """ + return collapse_namespace_tags(tags, namespace, prefer=prefer) + + +def extract_tag_from_result(result: Any) -> list[str]: + tag: list[str] = [] + if isinstance(result, models.PipeObject): + tag.extend(result.tag or []) + if isinstance(result.extra, dict): + extra_tag = result.extra.get('tag') + if isinstance(extra_tag, list): + tag.extend(extra_tag) + elif isinstance(extra_tag, str): + tag.append(extra_tag) + elif hasattr(result, 'tag'): + # Handle objects with tag attribute (e.g. SearchResult) + val = getattr(result, 'tag') + if isinstance(val, (list, set, tuple)): + tag.extend(val) + elif isinstance(val, str): + tag.append(val) + + if isinstance(result, dict): + raw_tag = result.get('tag') + if isinstance(raw_tag, list): + tag.extend(raw_tag) + elif isinstance(raw_tag, str): + tag.append(raw_tag) + extra = result.get('extra') + if isinstance(extra, dict): + extra_tag = extra.get('tag') + if isinstance(extra_tag, list): + tag.extend(extra_tag) + elif isinstance(extra_tag, str): + tag.append(extra_tag) + return merge_sequences(tag, case_sensitive=True) def extract_title_from_result(result: Any) -> Optional[str]: @@ -1469,7 +1479,7 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod debug(f" target={getattr(value, 'target', None)}") debug(f" hash={getattr(value, 'hash', None)}") debug(f" media_kind={getattr(value, 'media_kind', None)}") - debug(f" tags={getattr(value, 'tags', None)}") + debug(f" tag={getattr(value, 'tag', None)}") debug(f" tag_summary={getattr(value, 'tag_summary', None)}") debug(f" size_bytes={getattr(value, 'size_bytes', None)}") debug(f" duration_seconds={getattr(value, 'duration_seconds', None)}") @@ -1483,7 +1493,7 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod return value known_keys = { - "hash", "store", "tags", "title", "url", "source_url", "duration", "metadata", + "hash", "store", "tag", "title", "url", "source_url", "duration", "metadata", "warnings", "path", "relationships", "is_temp", "action", "parent_hash", } @@ -1542,18 +1552,14 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod # Extract relationships rels = value.get("relationships") or {} - # Consolidate tags: prefer tags_set over tags, tag_summary - tags_val = [] - if "tags_set" in value and value["tags_set"]: - tags_val = list(value["tags_set"]) - elif "tags" in value and isinstance(value["tags"], (list, set)): - tags_val = list(value["tags"]) - elif "tag" in value: - # Single tag string or list - if isinstance(value["tag"], list): - tags_val = value["tag"] # Already a list - else: - tags_val = [value["tag"]] # Wrap single string in list + # Canonical tag: accept list or single string + tag_val: list[str] = [] + if "tag" in value: + raw_tag = value["tag"] + if isinstance(raw_tag, list): + tag_val = [str(t) for t in raw_tag if t is not None] + elif isinstance(raw_tag, str): + tag_val = [raw_tag] # Consolidate path: prefer explicit path key, but NOT target if it's a URL path_val = value.get("path") @@ -1580,7 +1586,7 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod pipe_obj = models.PipeObject( hash=hash_val, store=store_val, - tags=tags_val, + tag=tag_val, title=title_val, url=url_val, source_url=value.get("source_url"), @@ -1624,7 +1630,7 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod store=store_val, path=str(path_val) if path_val and path_val != "unknown" else None, title=title_val, - tags=[], + tag=[], extra={}, ) diff --git a/cmdlets/add_file.py b/cmdlets/add_file.py index 19bc487..de88ca6 100644 --- a/cmdlets/add_file.py +++ b/cmdlets/add_file.py @@ -12,10 +12,10 @@ from SYS.logger import log, debug from Store import Store from ._shared import ( Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs, - extract_tags_from_result, extract_title_from_result, extract_url_from_result, + extract_tag_from_result, extract_title_from_result, extract_url_from_result, merge_sequences, extract_relationships, extract_duration, coerce_to_pipe_object ) -from ._shared import collapse_namespace_tags +from ._shared import collapse_namespace_tag from API.folder import read_sidecar, find_sidecar, write_sidecar, API_folder_store from SYS.utils import sha256_file, unique_path from metadata import write_metadata @@ -419,14 +419,14 @@ class Add_File(Cmdlet): hash_value: str, store: str, path: Optional[str], - tags: List[str], + tag: List[str], title: Optional[str], extra_updates: Optional[Dict[str, Any]] = None, ) -> None: pipe_obj.hash = hash_value pipe_obj.store = store pipe_obj.path = path - pipe_obj.tags = tags + pipe_obj.tag = tag if title: pipe_obj.title = title if isinstance(pipe_obj.extra, dict): @@ -452,10 +452,10 @@ class Add_File(Cmdlet): Prepare tags, url, and title for the file. Returns (tags, url, preferred_title, file_hash) """ - tags_from_result = list(pipe_obj.tags or []) + tags_from_result = list(pipe_obj.tag or []) if not tags_from_result: try: - tags_from_result = list(extract_tags_from_result(result) or []) + tags_from_result = list(extract_tag_from_result(result) or []) except Exception: tags_from_result = [] @@ -488,7 +488,7 @@ class Add_File(Cmdlet): return tag tags_from_result_no_title = [t for t in tags_from_result if not str(t).strip().lower().startswith("title:")] - sidecar_tags = collapse_namespace_tags([normalize_title_tag(t) for t in sidecar_tags], "title", prefer="last") + sidecar_tags = collapse_namespace_tag([normalize_title_tag(t) for t in sidecar_tags], "title", prefer="last") sidecar_tags_filtered = [t for t in sidecar_tags if not str(t).strip().lower().startswith("title:")] merged_tags = merge_sequences(tags_from_result_no_title, sidecar_tags_filtered, case_sensitive=True) @@ -501,7 +501,7 @@ class Add_File(Cmdlet): file_hash = Add_File._resolve_file_hash(result, media_path, pipe_obj, sidecar_hash) # Persist back to PipeObject - pipe_obj.tags = merged_tags + pipe_obj.tag = merged_tags if preferred_title and not pipe_obj.title: pipe_obj.title = preferred_title if file_hash and not pipe_obj.hash: @@ -591,7 +591,7 @@ class Add_File(Cmdlet): hash_value=f_hash or "unknown", store="local", path=str(target_path), - tags=tags, + tag=tags, title=chosen_title, extra_updates=extra_updates, ) @@ -729,7 +729,7 @@ class Add_File(Cmdlet): hash_value=f_hash or "unknown", store=provider_name or "provider", path=file_path, - tags=pipe_obj.tags, + tag=pipe_obj.tag, title=pipe_obj.title or (media_path.name if media_path else None), extra_updates=extra_updates, ) @@ -782,7 +782,7 @@ class Add_File(Cmdlet): hash_value=file_identifier if len(file_identifier) == 64 else f_hash or "unknown", store=backend_name, path=stored_path, - tags=tags, + tag=tags, title=title or pipe_obj.title or media_path.name, extra_updates={ "url": url, @@ -907,8 +907,6 @@ class Add_File(Cmdlet): possible_sidecars = [ source_path.with_suffix(source_path.suffix + ".json"), source_path.with_name(source_path.name + ".tag"), - source_path.with_name(source_path.name + ".tags"), - source_path.with_name(source_path.name + ".tags.txt"), source_path.with_name(source_path.name + ".metadata"), source_path.with_name(source_path.name + ".notes"), ] @@ -944,8 +942,6 @@ class Add_File(Cmdlet): media_path.parent / (media_path.name + '.metadata'), media_path.parent / (media_path.name + '.notes'), media_path.parent / (media_path.name + '.tag'), - media_path.parent / (media_path.name + '.tags'), - media_path.parent / (media_path.name + '.tags.txt'), ] for target in targets: try: diff --git a/cmdlets/add_tag.py b/cmdlets/add_tag.py index fb64a76..f0381a7 100644 --- a/cmdlets/add_tag.py +++ b/cmdlets/add_tag.py @@ -9,10 +9,172 @@ from SYS.logger import log import models import pipeline as ctx from ._shared import normalize_result_input, filter_results_by_temp -from API import HydrusNetwork as hydrus_wrapper -from API.folder import write_sidecar, API_folder_store -from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, expand_tag_groups, parse_cmdlet_args, collapse_namespace_tags, should_show_help, get_field -from config import get_local_storage_path +from ._shared import ( + Cmdlet, + CmdletArg, + SharedArgs, + normalize_hash, + parse_tag_arguments, + expand_tag_groups, + parse_cmdlet_args, + collapse_namespace_tag, + should_show_help, + get_field, +) +from Store import Store +from SYS.utils import sha256_file + + +def _extract_title_tag(tags: List[str]) -> Optional[str]: + """Return the value of the first title: tag if present.""" + for t in tags: + if t.lower().startswith("title:"): + value = t.split(":", 1)[1].strip() + return value or None + return None + + +def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None: + """Update result object/dict title fields and columns in-place.""" + if not title_value: + return + if isinstance(res, models.PipeObject): + res.title = title_value + # Update columns if present (Title column assumed index 0) + columns = getattr(res, "columns", None) + if isinstance(columns, list) and columns: + label, *_ = columns[0] + if str(label).lower() == "title": + columns[0] = (label, title_value) + elif isinstance(res, dict): + res["title"] = title_value + cols = res.get("columns") + if isinstance(cols, list): + updated = [] + changed = False + for col in cols: + if isinstance(col, tuple) and len(col) == 2: + label, _val = col + if str(label).lower() == "title": + updated.append((label, title_value)) + changed = True + else: + updated.append(col) + else: + updated.append(col) + if changed: + res["columns"] = updated + + +def _matches_target(item: Any, target_hash: Optional[str], target_path: Optional[str]) -> bool: + """Determine whether a result item refers to the given hash/path target (canonical fields only).""" + + def norm(val: Any) -> Optional[str]: + return str(val).lower() if val is not None else None + + target_hash_l = target_hash.lower() if target_hash else None + target_path_l = target_path.lower() if target_path else None + + if isinstance(item, dict): + hashes = [norm(item.get("hash"))] + paths = [norm(item.get("path"))] + else: + hashes = [norm(get_field(item, "hash"))] + paths = [norm(get_field(item, "path"))] + + if target_hash_l and target_hash_l in hashes: + return True + if target_path_l and target_path_l in paths: + return True + return False + + +def _update_item_title_fields(item: Any, new_title: str) -> None: + """Mutate an item to reflect a new title in plain fields and columns.""" + if isinstance(item, models.PipeObject): + item.title = new_title + columns = getattr(item, "columns", None) + if isinstance(columns, list) and columns: + label, *_ = columns[0] + if str(label).lower() == "title": + columns[0] = (label, new_title) + elif isinstance(item, dict): + item["title"] = new_title + cols = item.get("columns") + if isinstance(cols, list): + updated_cols = [] + changed = False + for col in cols: + if isinstance(col, tuple) and len(col) == 2: + label, _val = col + if str(label).lower() == "title": + updated_cols.append((label, new_title)) + changed = True + else: + updated_cols.append(col) + else: + updated_cols.append(col) + if changed: + item["columns"] = updated_cols + + +def _refresh_result_table_title(new_title: str, target_hash: Optional[str], target_path: Optional[str]) -> None: + """Refresh the cached result table with an updated title and redisplay it.""" + try: + last_table = ctx.get_last_result_table() + items = ctx.get_last_result_items() + if not last_table or not items: + return + + updated_items = [] + match_found = False + for item in items: + try: + if _matches_target(item, target_hash, target_path): + _update_item_title_fields(item, new_title) + match_found = True + except Exception: + pass + updated_items.append(item) + if not match_found: + return + + new_table = last_table.copy_with_title(getattr(last_table, "title", "")) + + for item in updated_items: + new_table.add_result(item) + + # Keep the underlying history intact; update only the overlay so @.. can + # clear the overlay then continue back to prior tables (e.g., the search list). + ctx.set_last_result_table_overlay(new_table, updated_items) + except Exception: + pass + + +def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional[str], target_path: Optional[str], config: Dict[str, Any]) -> None: + """Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh.""" + try: + from cmdlets import get_tag as get_tag_cmd # type: ignore + except Exception: + return + + if not target_hash or not store_name: + return + + refresh_args: List[str] = ["-hash", target_hash, "-store", store_name] + + try: + subject = ctx.get_last_result_subject() + if subject and _matches_target(subject, target_hash, target_path): + get_tag_cmd._run(subject, refresh_args, config) + return + except Exception: + pass + + try: + get_tag_cmd._run(res, refresh_args, config) + except Exception: + pass @@ -22,23 +184,23 @@ class Add_Tag(Cmdlet): def __init__(self) -> None: super().__init__( name="add-tag", - summary="Add a tag to a Hydrus file or write it to a local .tags sidecar.", - usage="add-tag [-hash ] [-store ] [-duplicate ] [-list [,...]] [--all] [,...]", + summary="Add tag to a file in a store.", + usage="add-tag -store [-hash ] [-duplicate ] [-list [,...]] [--all] [,...]", arg=[ SharedArgs.HASH, SharedArgs.STORE, CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"), CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."), - CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."), - CmdletArg("tags", type="string", required=False, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tags from pipeline payload.", variadic=True), + CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tag non-temporary files)."), + CmdletArg("tag", type="string", required=False, description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.", variadic=True), ], detail=[ - "- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.", - "- Without -hash and when the selection is a local file, tags are written to .tags.", - "- With a Hydrus hash, tags are sent to the 'my tags' service.", - "- Multiple tags can be comma-separated or space-separated.", + "- By default, only tag non-temporary files (from pipelines). Use --all to tag everything.", + "- Requires a store backend: use -store or pipe items that include store.", + "- If -hash is not provided, uses the piped item's hash (or derives from its path when possible).", + "- Multiple tag can be comma-separated or space-separated.", "- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult", - "- Tags can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"", + "- tag can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"", "- Use -duplicate to copy EXISTING tag values to new namespaces:", " Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)", " Inferred format: -duplicate title,album,artist (first is source, rest are targets)", @@ -50,180 +212,20 @@ class Add_Tag(Cmdlet): ) self.register() - @staticmethod - def _extract_title_tag(tags: List[str]) -> Optional[str]: - """Return the value of the first title: tag if present.""" - for tag in tags: - if isinstance(tag, str) and tag.lower().startswith("title:"): - value = tag.split(":", 1)[1].strip() - if value: - return value - return None - - @staticmethod - def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None: - """Update result object/dict title fields and columns in-place.""" - if not title_value: - return - if isinstance(res, models.PipeObject): - res.title = title_value - if hasattr(res, "columns") and isinstance(res.columns, list) and res.columns: - label, *_ = res.columns[0] - if str(label).lower() == "title": - res.columns[0] = (res.columns[0][0], title_value) - elif isinstance(res, dict): - res["title"] = title_value - cols = res.get("columns") - if isinstance(cols, list): - updated = [] - changed = False - for col in cols: - if isinstance(col, tuple) and len(col) == 2: - label, val = col - if str(label).lower() == "title": - updated.append((label, title_value)) - changed = True - else: - updated.append(col) - else: - updated.append(col) - if changed: - res["columns"] = updated - - @staticmethod - def _matches_target(item: Any, file_hash: Optional[str], path: Optional[str]) -> bool: - """Determine whether a result item refers to the given hash/path target.""" - file_hash_l = file_hash.lower() if file_hash else None - path_l = path.lower() if path else None - - def norm(val: Any) -> Optional[str]: - return str(val).lower() if val is not None else None - - hash_fields = ["hash"] - path_fields = ["path", "target"] - - if isinstance(item, dict): - hashes = [norm(item.get(field)) for field in hash_fields] - paths = [norm(item.get(field)) for field in path_fields] - else: - hashes = [norm(get_field(item, field)) for field in hash_fields] - paths = [norm(get_field(item, field)) for field in path_fields] - - if file_hash_l and file_hash_l in hashes: - return True - if path_l and path_l in paths: - return True - return False - - @staticmethod - def _update_item_title_fields(item: Any, new_title: str) -> None: - """Mutate an item to reflect a new title in plain fields and columns.""" - if isinstance(item, models.PipeObject): - item.title = new_title - if hasattr(item, "columns") and isinstance(item.columns, list) and item.columns: - label, *_ = item.columns[0] - if str(label).lower() == "title": - item.columns[0] = (label, new_title) - elif isinstance(item, dict): - item["title"] = new_title - cols = item.get("columns") - if isinstance(cols, list): - updated_cols = [] - changed = False - for col in cols: - if isinstance(col, tuple) and len(col) == 2: - label, val = col - if str(label).lower() == "title": - updated_cols.append((label, new_title)) - changed = True - else: - updated_cols.append(col) - else: - updated_cols.append(col) - if changed: - item["columns"] = updated_cols - - def _refresh_result_table_title(self, new_title: str, file_hash: Optional[str], path: Optional[str]) -> None: - """Refresh the cached result table with an updated title and redisplay it.""" - try: - last_table = ctx.get_last_result_table() - items = ctx.get_last_result_items() - if not last_table or not items: - return - - updated_items = [] - match_found = False - for item in items: - try: - if self._matches_target(item, file_hash, path): - self._update_item_title_fields(item, new_title) - match_found = True - except Exception: - pass - updated_items.append(item) - if not match_found: - return - - from result_table import ResultTable # Local import to avoid circular dependency - - new_table = last_table.copy_with_title(getattr(last_table, "title", "")) - - for item in updated_items: - new_table.add_result(item) - - ctx.set_last_result_table_overlay(new_table, updated_items) - except Exception: - pass - - def _refresh_tags_view(self, res: Any, file_hash: Optional[str], path: Optional[str], config: Dict[str, Any]) -> None: - """Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh.""" - try: - from cmdlets import get_tag as get_tag_cmd # type: ignore - except Exception: - return - - target_hash = file_hash - refresh_args: List[str] = [] - if target_hash: - refresh_args = ["-hash", target_hash] - - try: - subject = ctx.get_last_result_subject() - if subject and self._matches_target(subject, file_hash, path): - get_tag_cmd._run(subject, refresh_args, config) - return - except Exception: - pass - - if target_hash: - try: - get_tag_cmd._run(res, refresh_args, config) - except Exception: - pass - def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - """Add a tag to a file with smart filtering for pipeline results.""" + """Add tag to a file with smart filtering for pipeline results.""" if should_show_help(args): log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}") return 0 + # Parse arguments parsed = parse_cmdlet_args(args, self) # Check for --all flag include_temp = parsed.get("all", False) - - # Get explicit -hash and -store overrides from CLI - hash_override = normalize_hash(parsed.get("hash")) - store_override = parsed.get("store") # Normalize input to list results = normalize_result_input(result) - - # If no piped results but we have -hash flag, create a minimal synthetic result - if not results and hash_override: - results = [{"hash": hash_override, "is_temp": False}] - if store_override: - results[0]["store"] = store_override # Filter by temp status (unless --all is set) if not include_temp: @@ -233,34 +235,35 @@ class Add_Tag(Cmdlet): log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr) return 1 - # Get tags from arguments (or fallback to pipeline payload) - raw_tags = parsed.get("tags", []) - if isinstance(raw_tags, str): - raw_tags = [raw_tags] + # Get tag from arguments (or fallback to pipeline payload) + raw_tag = parsed.get("tag", []) + if isinstance(raw_tag, str): + raw_tag = [raw_tag] - # Fallback: if no tags provided explicitly, try to pull from first result payload - if not raw_tags and results: + # Fallback: if no tag provided explicitly, try to pull from first result payload + if not raw_tag and results: first = results[0] - payload_tags = None + payload_tag = None + # Try multiple tag lookup strategies in order tag_lookups = [ - lambda x: x.extra.get("tags") if isinstance(x, models.PipeObject) and isinstance(x.extra, dict) else None, - lambda x: x.get("tags") if isinstance(x, dict) else None, - lambda x: x.get("extra", {}).get("tags") if isinstance(x, dict) and isinstance(x.get("extra"), dict) else None, - lambda x: getattr(x, "tags", None), + lambda x: getattr(x, "tag", None), + lambda x: x.get("tag") if isinstance(x, dict) else None, ] + for lookup in tag_lookups: try: - payload_tags = lookup(first) - if payload_tags: + payload_tag = lookup(first) + if payload_tag: break except (AttributeError, TypeError, KeyError): continue - if payload_tags: - if isinstance(payload_tags, str): - raw_tags = [payload_tags] - elif isinstance(payload_tags, list): - raw_tags = payload_tags + + if payload_tag: + if isinstance(payload_tag, str): + raw_tag = [payload_tag] + elif isinstance(payload_tag, list): + raw_tag = payload_tag # Handle -list argument (convert to {list} syntax) list_arg = parsed.get("list") @@ -268,222 +271,184 @@ class Add_Tag(Cmdlet): for l in list_arg.split(','): l = l.strip() if l: - raw_tags.append(f"{{{l}}}") + raw_tag.append(f"{{{l}}}") - # Parse and expand tags - tags_to_add = parse_tag_arguments(raw_tags) - tags_to_add = expand_tag_groups(tags_to_add) + # Parse and expand tag + tag_to_add = parse_tag_arguments(raw_tag) + tag_to_add = expand_tag_groups(tag_to_add) # Allow hash override via namespaced token (e.g., "hash:abcdef...") extracted_hash = None - filtered_tags: List[str] = [] - for tag in tags_to_add: + filtered_tag: List[str] = [] + for tag in tag_to_add: if isinstance(tag, str) and tag.lower().startswith("hash:"): _, _, hash_val = tag.partition(":") if hash_val: extracted_hash = normalize_hash(hash_val.strip()) continue - filtered_tags.append(tag) - tags_to_add = filtered_tags + filtered_tag.append(tag) + tag_to_add = filtered_tag - if not tags_to_add: - log("No tags provided to add", file=sys.stderr) + if not tag_to_add: + log("No tag provided to add", file=sys.stderr) return 1 - def _find_library_root(path_obj: Path) -> Optional[Path]: - candidates = [] - cfg_root = get_local_storage_path(config) if config else None - if cfg_root: - try: - candidates.append(Path(cfg_root).expanduser()) - except Exception: - pass - try: - for candidate in candidates: - if (candidate / "medios-macina.db").exists(): - return candidate - for parent in [path_obj] + list(path_obj.parents): - if (parent / "medios-macina.db").exists(): - return parent - except Exception: - pass - return None - - # Get other flags + # Get other flags (hash override can come from -hash or hash: token) + hash_override = normalize_hash(parsed.get("hash")) or extracted_hash duplicate_arg = parsed.get("duplicate") - if not tags_to_add and not duplicate_arg: - # Write sidecar files with the tags that are already in the result dicts - sidecar_count = 0 - for res in results: - # Handle both dict and PipeObject formats - file_path = None - tags = [] - file_hash = "" - # Use canonical field access with get_field for both dict and objects - file_path = get_field(res, "path") - # Try tags from top-level 'tags' or from 'extra.tags' - tags = get_field(res, "tags") or (get_field(res, "extra") or {}).get("tags", []) - file_hash = get_field(res, "hash") or "" - if not file_path: - log(f"[add_tag] Warning: Result has no path, skipping", file=sys.stderr) - ctx.emit(res) - continue - if tags: - # Write sidecar file for this file with its tags - try: - sidecar_path = write_sidecar(Path(file_path), tags, [], file_hash) - log(f"[add_tag] Wrote {len(tags)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr) - sidecar_count += 1 - except Exception as e: - log(f"[add_tag] Warning: Failed to write sidecar for {file_path}: {e}", file=sys.stderr) - ctx.emit(res) - if sidecar_count > 0: - log(f"[add_tag] Wrote {sidecar_count} sidecar file(s) with embedded tags", file=sys.stderr) - else: - log(f"[add_tag] No tags to write - passed {len(results)} result(s) through unchanged", file=sys.stderr) - return 0 - - # Main loop: process results with tags to add - total_new_tags = 0 + # tag ARE provided - apply them to each store-backed result + total_added = 0 total_modified = 0 + + store_override = parsed.get("store") + for res in results: - # Extract file info from result - file_path = None - existing_tags = [] - file_hash = "" - storage_source = None - - # Use canonical getters for fields from both dicts and PipeObject - file_path = get_field(res, "path") - existing_tags = get_field(res, "tags") or [] - if not existing_tags: - existing_tags = (get_field(res, "extra", {}) or {}).get("tags") or [] - file_hash = get_field(res, "hash") or "" - store_name = store_override or get_field(res, "store") - - original_tags_lower = {str(t).lower() for t in existing_tags if isinstance(t, str)} - original_title = self._extract_title_tag(list(existing_tags)) - - # Apply CLI overrides if provided - if hash_override and not file_hash: - file_hash = hash_override + store_name: Optional[str] + raw_hash: Optional[str] + raw_path: Optional[str] + + if isinstance(res, models.PipeObject): + store_name = store_override or res.store + raw_hash = res.hash + raw_path = res.path + elif isinstance(res, dict): + store_name = store_override or res.get("store") + raw_hash = res.get("hash") + raw_path = res.get("path") + else: + ctx.emit(res) + continue + if not store_name: - log("[add_tag] Missing store (use -store or pipe a result with store)", file=sys.stderr) + log("[add_tag] Error: Missing -store and item has no store field", file=sys.stderr) + return 1 + + resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash) + if not resolved_hash and raw_path: + try: + p = Path(str(raw_path)) + stem = p.stem + if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()): + resolved_hash = stem.lower() + elif p.exists() and p.is_file(): + resolved_hash = sha256_file(p) + except Exception: + resolved_hash = None + + if not resolved_hash: + log("[add_tag] Warning: Item missing usable hash (and could not derive from path); skipping", file=sys.stderr) ctx.emit(res) continue - - # Check if we have sufficient identifier (file_path OR file_hash) - if not file_path and not file_hash: - log(f"[add_tag] Warning: Result has neither path nor hash available, skipping", file=sys.stderr) - ctx.emit(res) - continue - # Handle -duplicate logic (copy existing tags to new namespaces) + + try: + backend = Store(config)[str(store_name)] + except Exception as exc: + log(f"[add_tag] Error: Unknown store '{store_name}': {exc}", file=sys.stderr) + return 1 + + try: + existing_tag, _src = backend.get_tag(resolved_hash, config=config) + except Exception: + existing_tag = [] + + existing_tag_list = [t for t in (existing_tag or []) if isinstance(t, str)] + existing_lower = {t.lower() for t in existing_tag_list} + original_title = _extract_title_tag(existing_tag_list) + + # Per-item tag list (do not mutate shared list) + item_tag_to_add = list(tag_to_add) + item_tag_to_add = collapse_namespace_tag(item_tag_to_add, "title", prefer="last") + + # Handle -duplicate logic (copy existing tag to new namespaces) if duplicate_arg: - # Parse duplicate format: source:target1,target2 or source,target1,target2 - parts = duplicate_arg.split(':') + parts = str(duplicate_arg).split(':') source_ns = "" - targets = [] + targets: list[str] = [] + if len(parts) > 1: - # Explicit format: source:target1,target2 source_ns = parts[0] - targets = parts[1].split(',') + targets = [t.strip() for t in parts[1].split(',') if t.strip()] else: - # Inferred format: source,target1,target2 - parts = duplicate_arg.split(',') - if len(parts) > 1: - source_ns = parts[0] - targets = parts[1:] + parts2 = str(duplicate_arg).split(',') + if len(parts2) > 1: + source_ns = parts2[0] + targets = [t.strip() for t in parts2[1:] if t.strip()] + if source_ns and targets: - # Find tags in source namespace - source_tags = [t for t in existing_tags if t.startswith(source_ns + ':')] - for t in source_tags: - value = t.split(':', 1)[1] + source_prefix = source_ns.lower() + ":" + for t in existing_tag_list: + if not t.lower().startswith(source_prefix): + continue + value = t.split(":", 1)[1] for target_ns in targets: new_tag = f"{target_ns}:{value}" - if new_tag not in existing_tags and new_tag not in tags_to_add: - tags_to_add.append(new_tag) - - # Initialize tag mutation tracking local variables - removed_tags = [] - new_tags_added = [] - final_tags = list(existing_tags) if existing_tags else [] + if new_tag.lower() not in existing_lower: + item_tag_to_add.append(new_tag) - # Resolve hash from path if needed - if not file_hash and file_path: - try: - from SYS.utils import sha256_file - file_hash = sha256_file(Path(file_path)) - except Exception: - file_hash = "" - - if not file_hash: - log("[add_tag] Warning: No hash available, skipping", file=sys.stderr) - ctx.emit(res) - continue - - # Route tag updates through the configured store backend - try: - storage = Store(config) - backend = storage[store_name] - - # For namespaced tags, compute old tags in same namespace to remove - removed_tags = [] - for new_tag in tags_to_add: - if ':' in new_tag: - namespace = new_tag.split(':', 1)[0] - to_remove = [t for t in existing_tags if t.startswith(namespace + ':') and t.lower() != new_tag.lower()] - removed_tags.extend(to_remove) - - ok = backend.add_tag(file_hash, tags_to_add, config=config) - if removed_tags: - unique_removed = sorted(set(removed_tags)) - backend.delete_tag(file_hash, unique_removed, config=config) - - if not ok: - log(f"[add_tag] Warning: Failed to add tags via store '{store_name}'", file=sys.stderr) - ctx.emit(res) + # Namespace replacement: delete old namespace:* when adding namespace:value + removed_namespace_tag: list[str] = [] + for new_tag in item_tag_to_add: + if not isinstance(new_tag, str) or ":" not in new_tag: continue + ns = new_tag.split(":", 1)[0].strip() + if not ns: + continue + ns_prefix = ns.lower() + ":" + for t in existing_tag_list: + if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower(): + removed_namespace_tag.append(t) - refreshed_tags, _ = backend.get_tag(file_hash, config=config) - refreshed_tags = list(refreshed_tags or []) - final_tags = refreshed_tags - new_tags_added = [t for t in refreshed_tags if t.lower() not in original_tags_lower] + removed_namespace_tag = sorted({t for t in removed_namespace_tag}) - # Update result tags for downstream cmdlets/UI - if isinstance(res, models.PipeObject): - res.tags = refreshed_tags - if isinstance(res.extra, dict): - res.extra['tags'] = refreshed_tags - elif isinstance(res, dict): - res['tags'] = refreshed_tags + actual_tag_to_add = [t for t in item_tag_to_add if isinstance(t, str) and t.lower() not in existing_lower] - # Update title if changed - title_value = self._extract_title_tag(refreshed_tags) - self._apply_title_to_result(res, title_value) + changed = False + if removed_namespace_tag: + try: + backend.delete_tag(resolved_hash, removed_namespace_tag, config=config) + changed = True + except Exception as exc: + log(f"[add_tag] Warning: Failed deleting namespace tag: {exc}", file=sys.stderr) - total_new_tags += len(new_tags_added) - if new_tags_added: - total_modified += 1 - except KeyError: - log(f"[add_tag] Store '{store_name}' not configured", file=sys.stderr) - ctx.emit(res) - continue - except Exception as e: - log(f"[add_tag] Warning: Backend error for store '{store_name}': {e}", file=sys.stderr) - ctx.emit(res) - continue + if actual_tag_to_add: + try: + backend.add_tag(resolved_hash, actual_tag_to_add, config=config) + changed = True + except Exception as exc: + log(f"[add_tag] Warning: Failed adding tag: {exc}", file=sys.stderr) + + if changed: + total_added += len(actual_tag_to_add) + total_modified += 1 + + try: + refreshed_tag, _src2 = backend.get_tag(resolved_hash, config=config) + refreshed_list = [t for t in (refreshed_tag or []) if isinstance(t, str)] + except Exception: + refreshed_list = existing_tag_list + + # Update the result's tag using canonical field + if isinstance(res, models.PipeObject): + res.tag = refreshed_list + elif isinstance(res, dict): + res["tag"] = refreshed_list + + final_title = _extract_title_tag(refreshed_list) + _apply_title_to_result(res, final_title) - # If title changed, refresh the cached result table so the display reflects the new name - final_title = self._extract_title_tag(final_tags) if final_title and (not original_title or final_title.lower() != original_title.lower()): - self._refresh_result_table_title(final_title, file_hash, file_path) - # If tags changed, refresh tag view via get-tag - if new_tags_added or removed_tags: - self._refresh_tags_view(res, file_hash, file_path, config) - # Emit the modified result + _refresh_result_table_title(final_title, resolved_hash, raw_path) + + if changed: + _refresh_tag_view(res, resolved_hash, str(store_name), raw_path, config) + ctx.emit(res) - log(f"[add_tag] Added {total_new_tags} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)", file=sys.stderr) + + log( + f"[add_tag] Added {total_added} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)", + file=sys.stderr, + ) return 0 diff --git a/cmdlets/add_tags.py b/cmdlets/add_tags.py deleted file mode 100644 index 24e96fb..0000000 --- a/cmdlets/add_tags.py +++ /dev/null @@ -1,456 +0,0 @@ -from __future__ import annotations - -from typing import Any, Dict, List, Sequence, Optional -from pathlib import Path -import sys - -from SYS.logger import log - -import models -import pipeline as ctx -from ._shared import normalize_result_input, filter_results_by_temp -from ._shared import ( - Cmdlet, - CmdletArg, - SharedArgs, - normalize_hash, - parse_tag_arguments, - expand_tag_groups, - parse_cmdlet_args, - collapse_namespace_tags, - should_show_help, - get_field, -) -from Store import Store -from SYS.utils import sha256_file - - -def _extract_title_tag(tags: List[str]) -> Optional[str]: - """Return the value of the first title: tag if present.""" - for tag in tags: - if isinstance(tag, str) and tag.lower().startswith("title:"): - value = tag.split(":", 1)[1].strip() - if value: - return value - return None - - -def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None: - """Update result object/dict title fields and columns in-place.""" - if not title_value: - return - if isinstance(res, models.PipeObject): - res.title = title_value - # Update columns if present (Title column assumed index 0) - if hasattr(res, "columns") and isinstance(res.columns, list) and res.columns: - label, *_ = res.columns[0] - if str(label).lower() == "title": - res.columns[0] = (res.columns[0][0], title_value) - elif isinstance(res, dict): - res["title"] = title_value - cols = res.get("columns") - if isinstance(cols, list): - updated = [] - changed = False - for col in cols: - if isinstance(col, tuple) and len(col) == 2: - label, val = col - if str(label).lower() == "title": - updated.append((label, title_value)) - changed = True - else: - updated.append(col) - else: - updated.append(col) - if changed: - res["columns"] = updated - - -def _matches_target(item: Any, target_hash: Optional[str], target_path: Optional[str]) -> bool: - """Determine whether a result item refers to the given hash/path target (canonical fields only).""" - - def norm(val: Any) -> Optional[str]: - return str(val).lower() if val is not None else None - - target_hash_l = target_hash.lower() if target_hash else None - target_path_l = target_path.lower() if target_path else None - - if isinstance(item, dict): - hashes = [norm(item.get("hash"))] - paths = [norm(item.get("path"))] - else: - hashes = [norm(get_field(item, "hash"))] - paths = [norm(get_field(item, "path"))] - - if target_hash_l and target_hash_l in hashes: - return True - if target_path_l and target_path_l in paths: - return True - return False - - -def _update_item_title_fields(item: Any, new_title: str) -> None: - """Mutate an item to reflect a new title in plain fields and columns.""" - if isinstance(item, models.PipeObject): - item.title = new_title - if hasattr(item, "columns") and isinstance(item.columns, list) and item.columns: - label, *_ = item.columns[0] - if str(label).lower() == "title": - item.columns[0] = (label, new_title) - elif isinstance(item, dict): - item["title"] = new_title - cols = item.get("columns") - if isinstance(cols, list): - updated_cols = [] - changed = False - for col in cols: - if isinstance(col, tuple) and len(col) == 2: - label, val = col - if str(label).lower() == "title": - updated_cols.append((label, new_title)) - changed = True - else: - updated_cols.append(col) - else: - updated_cols.append(col) - if changed: - item["columns"] = updated_cols - - -def _refresh_result_table_title(new_title: str, target_hash: Optional[str], target_path: Optional[str]) -> None: - """Refresh the cached result table with an updated title and redisplay it.""" - try: - last_table = ctx.get_last_result_table() - items = ctx.get_last_result_items() - if not last_table or not items: - return - - updated_items = [] - match_found = False - for item in items: - try: - if _matches_target(item, target_hash, target_path): - _update_item_title_fields(item, new_title) - match_found = True - except Exception: - pass - updated_items.append(item) - if not match_found: - return - - from result_table import ResultTable # Local import to avoid circular dependency - - new_table = last_table.copy_with_title(getattr(last_table, "title", "")) - - for item in updated_items: - new_table.add_result(item) - - # Keep the underlying history intact; update only the overlay so @.. can - # clear the overlay then continue back to prior tables (e.g., the search list). - ctx.set_last_result_table_overlay(new_table, updated_items) - except Exception: - pass - - -def _refresh_tags_view(res: Any, target_hash: Optional[str], store_name: Optional[str], target_path: Optional[str], config: Dict[str, Any]) -> None: - """Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh.""" - try: - from cmdlets import get_tag as get_tag_cmd # type: ignore - except Exception: - return - - if not target_hash or not store_name: - return - - refresh_args: List[str] = ["-hash", target_hash, "-store", store_name] - - try: - subject = ctx.get_last_result_subject() - if subject and _matches_target(subject, target_hash, target_path): - get_tag_cmd._run(subject, refresh_args, config) - return - except Exception: - pass - - try: - get_tag_cmd._run(res, refresh_args, config) - except Exception: - pass - - - -class Add_Tag(Cmdlet): - """Class-based add-tags cmdlet with Cmdlet metadata inheritance.""" - - def __init__(self) -> None: - super().__init__( - name="add-tags", - summary="Add tags to a file in a store.", - usage="add-tags -store [-hash ] [-duplicate ] [-list [,...]] [--all] [,...]", - arg=[ - SharedArgs.HASH, - SharedArgs.STORE, - CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"), - CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."), - CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."), - CmdletArg("tags", type="string", required=False, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tags from pipeline payload.", variadic=True), - ], - detail=[ - "- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.", - "- Requires a store backend: use -store or pipe items that include store.", - "- If -hash is not provided, uses the piped item's hash (or derives from its path when possible).", - "- Multiple tags can be comma-separated or space-separated.", - "- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult", - "- Tags can also reference lists with curly braces: add-tags {philosophy} \"other:tag\"", - "- Use -duplicate to copy EXISTING tag values to new namespaces:", - " Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)", - " Inferred format: -duplicate title,album,artist (first is source, rest are targets)", - "- The source namespace must already exist in the file being tagged.", - "- Target namespaces that already have a value are skipped (not overwritten).", - "- You can also pass the target hash as a tag token: hash:. This overrides -hash and is removed from the tag list.", - ], - exec=self.run, - ) - self.register() - - def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - """Add tags to a file with smart filtering for pipeline results.""" - if should_show_help(args): - log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}") - return 0 - - # Parse arguments - parsed = parse_cmdlet_args(args, self) - - # Check for --all flag - include_temp = parsed.get("all", False) - - # Normalize input to list - results = normalize_result_input(result) - - # Filter by temp status (unless --all is set) - if not include_temp: - results = filter_results_by_temp(results, include_temp=False) - - if not results: - log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr) - return 1 - - # Get tags from arguments (or fallback to pipeline payload) - raw_tags = parsed.get("tags", []) - if isinstance(raw_tags, str): - raw_tags = [raw_tags] - - # Fallback: if no tags provided explicitly, try to pull from first result payload - if not raw_tags and results: - first = results[0] - payload_tags = None - - # Try multiple tag lookup strategies in order - tag_lookups = [ - lambda x: getattr(x, "tags", None), - lambda x: x.get("tags") if isinstance(x, dict) else None, - ] - - for lookup in tag_lookups: - try: - payload_tags = lookup(first) - if payload_tags: - break - except (AttributeError, TypeError, KeyError): - continue - - if payload_tags: - if isinstance(payload_tags, str): - raw_tags = [payload_tags] - elif isinstance(payload_tags, list): - raw_tags = payload_tags - - # Handle -list argument (convert to {list} syntax) - list_arg = parsed.get("list") - if list_arg: - for l in list_arg.split(','): - l = l.strip() - if l: - raw_tags.append(f"{{{l}}}") - - # Parse and expand tags - tags_to_add = parse_tag_arguments(raw_tags) - tags_to_add = expand_tag_groups(tags_to_add) - - # Allow hash override via namespaced token (e.g., "hash:abcdef...") - extracted_hash = None - filtered_tags: List[str] = [] - for tag in tags_to_add: - if isinstance(tag, str) and tag.lower().startswith("hash:"): - _, _, hash_val = tag.partition(":") - if hash_val: - extracted_hash = normalize_hash(hash_val.strip()) - continue - filtered_tags.append(tag) - tags_to_add = filtered_tags - - if not tags_to_add: - log("No tags provided to add", file=sys.stderr) - return 1 - - # Get other flags (hash override can come from -hash or hash: token) - hash_override = normalize_hash(parsed.get("hash")) or extracted_hash - duplicate_arg = parsed.get("duplicate") - - # Tags ARE provided - apply them to each store-backed result - total_added = 0 - total_modified = 0 - - store_override = parsed.get("store") - - for res in results: - store_name: Optional[str] - raw_hash: Optional[str] - raw_path: Optional[str] - - if isinstance(res, models.PipeObject): - store_name = store_override or res.store - raw_hash = res.hash - raw_path = res.path - elif isinstance(res, dict): - store_name = store_override or res.get("store") - raw_hash = res.get("hash") - raw_path = res.get("path") - else: - ctx.emit(res) - continue - - if not store_name: - log("[add_tags] Error: Missing -store and item has no store field", file=sys.stderr) - return 1 - - resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash) - if not resolved_hash and raw_path: - try: - p = Path(str(raw_path)) - stem = p.stem - if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()): - resolved_hash = stem.lower() - elif p.exists() and p.is_file(): - resolved_hash = sha256_file(p) - except Exception: - resolved_hash = None - - if not resolved_hash: - log("[add_tags] Warning: Item missing usable hash (and could not derive from path); skipping", file=sys.stderr) - ctx.emit(res) - continue - - try: - backend = Store(config)[str(store_name)] - except Exception as exc: - log(f"[add_tags] Error: Unknown store '{store_name}': {exc}", file=sys.stderr) - return 1 - - try: - existing_tags, _src = backend.get_tag(resolved_hash, config=config) - except Exception: - existing_tags = [] - - existing_tags_list = [t for t in (existing_tags or []) if isinstance(t, str)] - existing_lower = {t.lower() for t in existing_tags_list} - original_title = _extract_title_tag(existing_tags_list) - - # Per-item tag list (do not mutate shared list) - item_tags_to_add = list(tags_to_add) - item_tags_to_add = collapse_namespace_tags(item_tags_to_add, "title", prefer="last") - - # Handle -duplicate logic (copy existing tags to new namespaces) - if duplicate_arg: - parts = str(duplicate_arg).split(':') - source_ns = "" - targets: list[str] = [] - - if len(parts) > 1: - source_ns = parts[0] - targets = [t.strip() for t in parts[1].split(',') if t.strip()] - else: - parts2 = str(duplicate_arg).split(',') - if len(parts2) > 1: - source_ns = parts2[0] - targets = [t.strip() for t in parts2[1:] if t.strip()] - - if source_ns and targets: - source_prefix = source_ns.lower() + ":" - for t in existing_tags_list: - if not t.lower().startswith(source_prefix): - continue - value = t.split(":", 1)[1] - for target_ns in targets: - new_tag = f"{target_ns}:{value}" - if new_tag.lower() not in existing_lower: - item_tags_to_add.append(new_tag) - - # Namespace replacement: delete old namespace:* when adding namespace:value - removed_namespace_tags: list[str] = [] - for new_tag in item_tags_to_add: - if not isinstance(new_tag, str) or ":" not in new_tag: - continue - ns = new_tag.split(":", 1)[0].strip() - if not ns: - continue - ns_prefix = ns.lower() + ":" - for t in existing_tags_list: - if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower(): - removed_namespace_tags.append(t) - - removed_namespace_tags = sorted({t for t in removed_namespace_tags}) - - actual_tags_to_add = [t for t in item_tags_to_add if isinstance(t, str) and t.lower() not in existing_lower] - - changed = False - if removed_namespace_tags: - try: - backend.delete_tag(resolved_hash, removed_namespace_tags, config=config) - changed = True - except Exception as exc: - log(f"[add_tags] Warning: Failed deleting namespace tags: {exc}", file=sys.stderr) - - if actual_tags_to_add: - try: - backend.add_tag(resolved_hash, actual_tags_to_add, config=config) - changed = True - except Exception as exc: - log(f"[add_tags] Warning: Failed adding tags: {exc}", file=sys.stderr) - - if changed: - total_added += len(actual_tags_to_add) - total_modified += 1 - - try: - refreshed_tags, _src2 = backend.get_tag(resolved_hash, config=config) - refreshed_list = [t for t in (refreshed_tags or []) if isinstance(t, str)] - except Exception: - refreshed_list = existing_tags_list - - # Update the result's tags using canonical field - if isinstance(res, models.PipeObject): - res.tags = refreshed_list - elif isinstance(res, dict): - res["tags"] = refreshed_list - - final_title = _extract_title_tag(refreshed_list) - _apply_title_to_result(res, final_title) - - if final_title and (not original_title or final_title.lower() != original_title.lower()): - _refresh_result_table_title(final_title, resolved_hash, raw_path) - - if changed: - _refresh_tags_view(res, resolved_hash, str(store_name), raw_path, config) - - ctx.emit(res) - - log( - f"[add_tags] Added {total_added} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)", - file=sys.stderr, - ) - return 0 - - -CMDLET = Add_Tag() \ No newline at end of file diff --git a/cmdlets/catalog.py b/cmdlets/catalog.py index 46290f2..84a66ef 100644 --- a/cmdlets/catalog.py +++ b/cmdlets/catalog.py @@ -103,11 +103,11 @@ def get_cmdlet_metadata(cmd_name: str) -> Optional[Dict[str, Any]]: base = {} name = getattr(data, "name", base.get("name", cmd_name)) or cmd_name - aliases = getattr(data, "aliases", base.get("aliases", [])) or [] + aliases = getattr(data, "alias", base.get("alias", [])) or [] usage = getattr(data, "usage", base.get("usage", "")) summary = getattr(data, "summary", base.get("summary", "")) - details = getattr(data, "details", base.get("details", [])) or [] - args_list = getattr(data, "args", base.get("args", [])) or [] + details = getattr(data, "detail", base.get("detail", [])) or [] + args_list = getattr(data, "arg", base.get("arg", [])) or [] args = [_normalize_arg(arg) for arg in args_list] return { diff --git a/cmdlets/cleanup.py b/cmdlets/cleanup.py index bcc684d..8574d58 100644 --- a/cmdlets/cleanup.py +++ b/cmdlets/cleanup.py @@ -33,7 +33,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - Emits only non-temporary results Typical pipeline usage: - download-data url | screen-shot | add-tag "tag" --all | cleanup + download-data url | screen-shot | add-tag -store local "tag" --all | cleanup """ # Help @@ -67,7 +67,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: deleted_count += 1 # Clean up any associated sidecar files - for ext in ['.tags', '.metadata']: + for ext in ['.tag', '.metadata']: sidecar = path_obj.parent / (path_obj.name + ext) if sidecar.exists(): try: @@ -98,9 +98,9 @@ CMDLET = Cmdlet( detail=[ "- Accepts pipeline results that may contain temporary files (screenshots, intermediate artifacts)", "- Deletes files marked with is_temp=True from disk", - "- Also cleans up associated sidecar files (.tags, .metadata)", + "- Also cleans up associated sidecar files (.tag, .metadata)", "- Emits only non-temporary results for further processing", - "- Typical usage at end of pipeline: ... | add-tag \"tag\" --all | cleanup", + "- Typical usage at end of pipeline: ... | add-tag -store local \"tag\" --all | cleanup", "- Exit code 0 if cleanup successful, 1 if no results to process", ], ) diff --git a/cmdlets/delete_file.py b/cmdlets/delete_file.py index df31369..203c5dc 100644 --- a/cmdlets/delete_file.py +++ b/cmdlets/delete_file.py @@ -100,8 +100,11 @@ class Delete_File(Cmdlet): log(f"Local delete failed: {exc}", file=sys.stderr) # Remove common sidecars regardless of file removal success - for sidecar in (path.with_suffix(".tags"), path.with_suffix(".tags.txt"), - path.with_suffix(".metadata"), path.with_suffix(".notes")): + for sidecar in ( + path.with_suffix(".tag"), + path.with_suffix(".metadata"), + path.with_suffix(".notes"), + ): try: if sidecar.exists() and sidecar.is_file(): sidecar.unlink() diff --git a/cmdlets/delete_tag.py b/cmdlets/delete_tag.py index edaef54..32a09e4 100644 --- a/cmdlets/delete_tag.py +++ b/cmdlets/delete_tag.py @@ -302,7 +302,7 @@ def _process_deletion(tags: list[str], file_hash: str | None, path: str | None, del_title_set = {t.lower() for t in title_tags} remaining_titles = [t for t in current_titles if t.lower() not in del_title_set] if current_titles and not remaining_titles: - log("Cannot delete the last title: tag. Add a replacement title first (add-tag \"title:new title\").", file=sys.stderr) + log("Cannot delete the last title: tag. Add a replacement title first (add-tags \"title:new title\").", file=sys.stderr) return False try: diff --git a/cmdlets/download_file.py b/cmdlets/download_file.py index 4de636a..cfba1db 100644 --- a/cmdlets/download_file.py +++ b/cmdlets/download_file.py @@ -1,12 +1,10 @@ -"""Download files directly via HTTP (non-yt-dlp url). +"""Generic file downloader. -Focused cmdlet for direct file downloads from: -- PDFs, images, documents -- url not supported by yt-dlp -- LibGen sources -- Direct file links +Supports: +- Direct HTTP file URLs (PDFs, images, documents; non-yt-dlp) +- Piped provider items (uses provider.download when available) -No streaming site logic - pure HTTP download with retries. +No streaming site logic; use download-media for yt-dlp/streaming. """ from __future__ import annotations @@ -17,10 +15,17 @@ from typing import Any, Dict, List, Optional, Sequence from SYS.download import DownloadError, _download_direct_file from SYS.logger import log, debug -from models import DownloadOptions import pipeline as pipeline_context -from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, register_url_with_local_library, coerce_to_pipe_object +from ._shared import ( + Cmdlet, + CmdletArg, + SharedArgs, + parse_cmdlet_args, + register_url_with_local_library, + coerce_to_pipe_object, + get_field, +) class Download_File(Cmdlet): @@ -30,14 +35,13 @@ class Download_File(Cmdlet): """Initialize download-file cmdlet.""" super().__init__( name="download-file", - summary="Download files directly via HTTP (PDFs, images, documents)", - usage="download-file [options] or search-file | download-file [options]", + summary="Download files via HTTP or provider handlers", + usage="download-file [options] OR @N | download-file [options]", alias=["dl-file", "download-http"], arg=[ - CmdletArg(name="url", type="string", required=False, description="URL to download (direct file links)", variadic=True), - CmdletArg(name="-url", type="string", description="URL to download (alias for positional argument)", variadic=True), - CmdletArg(name="output", type="string", alias="o", description="Output filename (auto-detected if not specified)"), - SharedArgs.URL + CmdletArg(name="output", type="string", alias="o", description="Output directory (overrides defaults)"), + SharedArgs.URL, + ], detail=["Download files directly via HTTP without yt-dlp processing.", "For streaming sites, use download-media."], exec=self.run, @@ -60,13 +64,21 @@ class Download_File(Cmdlet): # Parse arguments parsed = parse_cmdlet_args(args, self) - # Extract options + # Extract explicit URL args (if any) raw_url = parsed.get("url", []) if isinstance(raw_url, str): raw_url = [raw_url] + # If no URL args were provided, fall back to piped results (provider items) + piped_items: List[Any] = [] if not raw_url: - log("No url to download", file=sys.stderr) + if isinstance(result, list): + piped_items = result + elif result: + piped_items = [result] + + if not raw_url and not piped_items: + log("No url or piped items to download", file=sys.stderr) return 1 # Get output directory @@ -76,27 +88,78 @@ class Download_File(Cmdlet): debug(f"Output directory: {final_output_dir}") - # Download each URL + # Download each URL and/or provider item downloaded_count = 0 quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False - custom_output = parsed.get("output") + # Provider lookup is optional; keep import local to avoid overhead if unused + get_search_provider = None + SearchResult = None + try: + from Provider.registry import get_search_provider as _get_search_provider, SearchResult as _SearchResult + + get_search_provider = _get_search_provider + SearchResult = _SearchResult + except Exception: + get_search_provider = None + SearchResult = None + + def _emit_local_file(downloaded_path: Path, source: Optional[str], title_hint: Optional[str], tags_hint: Optional[List[str]], media_kind_hint: Optional[str], full_metadata: Optional[Dict[str, Any]]) -> None: + title_val = (title_hint or downloaded_path.stem or "Unknown").strip() or downloaded_path.stem + hash_value = self._compute_file_hash(downloaded_path) + tag: List[str] = [] + if tags_hint: + tag.extend([str(t) for t in tags_hint if t]) + if not any(str(t).lower().startswith("title:") for t in tag): + tag.insert(0, f"title:{title_val}") + + payload: Dict[str, Any] = { + "path": str(downloaded_path), + "hash": hash_value, + "title": title_val, + "action": "cmdlet:download-file", + "download_mode": "file", + "store": "local", + "media_kind": media_kind_hint or "file", + "tag": tag, + } + if full_metadata: + payload["full_metadata"] = full_metadata + if source and str(source).startswith("http"): + payload["url"] = source + elif source: + payload["source_url"] = source + + pipeline_context.emit(payload) + + # Automatically register url with local library + if payload.get("url"): + pipe_obj = coerce_to_pipe_object(payload) + register_url_with_local_library(pipe_obj, config) + + # 1) Explicit URL downloads for url in raw_url: try: - debug(f"Processing: {url}") + debug(f"Processing URL: {url}") - # Direct HTTP download result_obj = _download_direct_file(url, final_output_dir, quiet=quiet_mode) - debug(f"Download completed, building pipe object...") - pipe_obj_dict = self._build_pipe_object(result_obj, url, final_output_dir) - debug(f"Emitting result to pipeline...") - pipeline_context.emit(pipe_obj_dict) - - # Automatically register url with local library - if pipe_obj_dict.get("url"): - pipe_obj = coerce_to_pipe_object(pipe_obj_dict) - register_url_with_local_library(pipe_obj, config) - + file_path = None + if hasattr(result_obj, "path"): + file_path = getattr(result_obj, "path") + elif isinstance(result_obj, dict): + file_path = result_obj.get("path") + if not file_path: + file_path = str(result_obj) + downloaded_path = Path(str(file_path)) + + _emit_local_file( + downloaded_path=downloaded_path, + source=url, + title_hint=downloaded_path.stem, + tags_hint=[f"title:{downloaded_path.stem}"], + media_kind_hint="file", + full_metadata=None, + ) downloaded_count += 1 debug("✓ Downloaded and emitted") @@ -105,6 +168,72 @@ class Download_File(Cmdlet): except Exception as e: log(f"Error processing {url}: {e}", file=sys.stderr) + # 2) Provider item downloads (piped results) + for item in piped_items: + try: + table = get_field(item, "table") + title = get_field(item, "title") + target = get_field(item, "path") or get_field(item, "url") + media_kind = get_field(item, "media_kind") + tags_val = get_field(item, "tag") + tags_list: Optional[List[str]] + if isinstance(tags_val, list): + tags_list = [str(t) for t in tags_val if t] + else: + tags_list = None + + full_metadata = get_field(item, "full_metadata") + if (not full_metadata) and isinstance(item, dict) and isinstance(item.get("extra"), dict): + extra_md = item["extra"].get("full_metadata") + if isinstance(extra_md, dict): + full_metadata = extra_md + + # If this looks like a provider item and providers are available, prefer provider.download() + downloaded_path: Optional[Path] = None + if table and get_search_provider and SearchResult: + provider = get_search_provider(str(table), config) + if provider is not None: + sr = SearchResult( + table=str(table), + title=str(title or "Unknown"), + path=str(target or ""), + full_metadata=full_metadata if isinstance(full_metadata, dict) else {}, + ) + debug(f"[download-file] Downloading provider item via {table}: {sr.title}") + downloaded_path = provider.download(sr, final_output_dir) + + # Fallback: if we have a direct HTTP URL, download it directly + if downloaded_path is None and isinstance(target, str) and target.startswith("http"): + debug(f"[download-file] Provider item looks like direct URL, downloading: {target}") + result_obj = _download_direct_file(target, final_output_dir, quiet=quiet_mode) + file_path = None + if hasattr(result_obj, "path"): + file_path = getattr(result_obj, "path") + elif isinstance(result_obj, dict): + file_path = result_obj.get("path") + if not file_path: + file_path = str(result_obj) + downloaded_path = Path(str(file_path)) + + if downloaded_path is None: + log(f"Cannot download item (no provider handler / unsupported target): {title or target}", file=sys.stderr) + continue + + _emit_local_file( + downloaded_path=downloaded_path, + source=str(target) if target else None, + title_hint=str(title) if title else downloaded_path.stem, + tags_hint=tags_list, + media_kind_hint=str(media_kind) if media_kind else None, + full_metadata=full_metadata if isinstance(full_metadata, dict) else None, + ) + downloaded_count += 1 + + except DownloadError as e: + log(f"Download failed: {e}", file=sys.stderr) + except Exception as e: + log(f"Error downloading item: {e}", file=sys.stderr) + if downloaded_count > 0: debug(f"✓ Successfully processed {downloaded_count} file(s)") return 0 @@ -118,6 +247,16 @@ class Download_File(Cmdlet): def _resolve_output_dir(self, parsed: Dict[str, Any], config: Dict[str, Any]) -> Optional[Path]: """Resolve the output directory from storage location or config.""" + output_dir_arg = parsed.get("output") + if output_dir_arg: + try: + out_path = Path(str(output_dir_arg)).expanduser() + out_path.mkdir(parents=True, exist_ok=True) + return out_path + except Exception as e: + log(f"Cannot use output directory {output_dir_arg}: {e}", file=sys.stderr) + return None + storage_location = parsed.get("storage") # Priority 1: --storage flag @@ -148,40 +287,6 @@ class Download_File(Cmdlet): return final_output_dir - def _build_pipe_object(self, download_result: Any, url: str, output_dir: Path) -> Dict[str, Any]: - """Create a PipeObject-compatible dict from a download result.""" - # Try to get file path from result - file_path = None - if hasattr(download_result, 'path'): - file_path = download_result.path - elif isinstance(download_result, dict) and 'path' in download_result: - file_path = download_result['path'] - - if not file_path: - # Fallback: assume result is the path itself - file_path = str(download_result) - - media_path = Path(file_path) - hash_value = self._compute_file_hash(media_path) - title = media_path.stem - - # Build tags with title for searchability - tags = [f"title:{title}"] - - # Canonical pipeline payload (no legacy aliases) - return { - "path": str(media_path), - "hash": hash_value, - "title": title, - "file_title": title, - "action": "cmdlet:download-file", - "download_mode": "file", - "url": url or (download_result.get('url') if isinstance(download_result, dict) else None), - "store": "local", - "media_kind": "file", - "tags": tags, - } - def _compute_file_hash(self, filepath: Path) -> str: """Compute SHA256 hash of a file.""" import hashlib diff --git a/cmdlets/download_media.py b/cmdlets/download_media.py index ed9cae8..90a289d 100644 --- a/cmdlets/download_media.py +++ b/cmdlets/download_media.py @@ -1391,11 +1391,11 @@ class Download_Media(Cmdlet): media_path = Path(download_result.path) hash_value = download_result.hash_value or self._compute_file_hash(media_path) title = info.get("title") or media_path.stem - tags = list(download_result.tags or []) + tag = list(download_result.tag or []) # Add title tag for searchability - if title and f"title:{title}" not in tags: - tags.insert(0, f"title:{title}") + if title and f"title:{title}" not in tag: + tag.insert(0, f"title:{title}") # Build a single canonical URL field; prefer yt-dlp provided webpage_url or info.url, # but fall back to the original requested URL. If multiple unique urls are available, @@ -1424,7 +1424,7 @@ class Download_Media(Cmdlet): "hash": hash_value, "title": title, "url": final_url, - "tags": tags, + "tag": tag, "action": "cmdlet:download-media", # download_mode removed (deprecated), keep media_kind "store": getattr(opts, "storage_name", None) or getattr(opts, "storage_location", None) or "PATH", diff --git a/cmdlets/download_provider.py b/cmdlets/download_provider.py deleted file mode 100644 index 21db6c5..0000000 --- a/cmdlets/download_provider.py +++ /dev/null @@ -1,157 +0,0 @@ -"""download-provider cmdlet: Download items from external providers.""" -from __future__ import annotations - -from typing import Any, Dict, Sequence, List, Optional -from pathlib import Path -import sys -import json - -from SYS.logger import log, debug -from Provider.registry import get_search_provider, SearchResult -from SYS.utils import unique_path - -from ._shared import Cmdlet, CmdletArg, should_show_help, get_field, coerce_to_pipe_object -import pipeline as ctx - -# Optional dependencies -try: - from config import get_local_storage_path, resolve_output_dir -except Exception: # pragma: no cover - get_local_storage_path = None # type: ignore - resolve_output_dir = None # type: ignore - -class Download_Provider(Cmdlet): - """Download items from external providers.""" - - def __init__(self): - super().__init__( - name="download-provider", - summary="Download items from external providers (soulseek, libgen, etc).", - usage="download-provider [item] [-output DIR]", - arg=[ - CmdletArg("output", type="string", alias="o", description="Output directory"), - ], - detail=[ - "Download items from external providers.", - "Usually called automatically by @N selection on provider results.", - "Can be used manually by piping a provider result item.", - ], - exec=self.run - ) - self.register() - - def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - """Execute download-provider cmdlet.""" - if should_show_help(args): - ctx.emit(self.__dict__) - return 0 - - # Parse arguments - output_dir_arg = None - i = 0 - while i < len(args): - arg = args[i] - if arg in ("-output", "--output", "-o") and i + 1 < len(args): - output_dir_arg = args[i+1] - i += 2 - else: - i += 1 - - # Determine output directory - if output_dir_arg: - output_dir = Path(output_dir_arg) - elif resolve_output_dir: - output_dir = resolve_output_dir(config) - else: - output_dir = Path("./downloads") - - output_dir.mkdir(parents=True, exist_ok=True) - - # Process input result - items = [] - if isinstance(result, list): - items = result - elif result: - items = [result] - - if not items: - log("No items to download", file=sys.stderr) - return 1 - - success_count = 0 - - for item in items: - try: - # Extract provider info - table = get_field(item, "table") - if not table: - log(f"Skipping item without provider info: {item}", file=sys.stderr) - continue - - provider = get_search_provider(table, config) - if not provider: - log(f"Provider '{table}' not available for download", file=sys.stderr) - continue - - # Reconstruct SearchResult if needed - # The provider.download method expects a SearchResult object or compatible dict - if isinstance(item, dict): - # Ensure full_metadata is present - if "full_metadata" not in item and "extra" in item: - item["full_metadata"] = item["extra"].get("full_metadata", {}) - - search_result = SearchResult( - table=table, - title=item.get("title", "Unknown"), - path=item.get("path", ""), - full_metadata=item.get("full_metadata", {}) - ) - else: - # Assume it's an object with attributes (like PipeObject) - full_metadata = getattr(item, "full_metadata", {}) - # Check extra dict if full_metadata is missing/empty - if not full_metadata and hasattr(item, "extra") and isinstance(item.extra, dict): - full_metadata = item.extra.get("full_metadata", {}) - # Fallback: if full_metadata key isn't there, maybe the extra dict IS the metadata - if not full_metadata and "username" in item.extra: - full_metadata = item.extra - - search_result = SearchResult( - table=table, - title=getattr(item, "title", "Unknown"), - path=getattr(item, "path", ""), - full_metadata=full_metadata - ) - - debug(f"[download-provider] Downloading '{search_result.title}' via {table}...") - downloaded_path = provider.download(search_result, output_dir) - - if downloaded_path: - debug(f"[download-provider] Download successful: {downloaded_path}") - - # Create PipeObject for the downloaded file - pipe_obj = coerce_to_pipe_object({ - "path": str(downloaded_path), - "title": search_result.title, - "table": "local", # Now it's a local file - "media_kind": getattr(item, "media_kind", "other"), - "tags": getattr(item, "tags", []), - "full_metadata": search_result.full_metadata - }) - - ctx.emit(pipe_obj) - success_count += 1 - else: - log(f"Download failed for '{search_result.title}'", file=sys.stderr) - - except Exception as e: - log(f"Error downloading item: {e}", file=sys.stderr) - import traceback - debug(traceback.format_exc()) - - if success_count > 0: - return 0 - return 1 - -# Register cmdlet instance -Download_Provider_Instance = Download_Provider() diff --git a/cmdlets/get_tag.py b/cmdlets/get_tag.py index 5ebd24c..fc95a69 100644 --- a/cmdlets/get_tag.py +++ b/cmdlets/get_tag.py @@ -2,7 +2,7 @@ This cmdlet retrieves tags for a selected result, supporting both: - Hydrus Network (for files with hash) -- Local sidecar files (.tags) +- Local sidecar files (.tag) In interactive mode: navigate with numbers, add/delete tags In pipeline mode: display tags as read-only table, emit as structured JSON @@ -89,9 +89,9 @@ def _emit_tags_as_table( from result_table import ResultTable # Create ResultTable with just tag column (no title) - table_title = "Tags" + table_title = "Tag" if item_title: - table_title = f"Tags: {item_title}" + table_title = f"Tag: {item_title}" if file_hash: table_title += f" [{file_hash[:8]}]" @@ -195,19 +195,19 @@ def _rename_file_if_title_tag(media: Optional[Path], tags_added: List[str]) -> b return False # Build sidecar paths BEFORE renaming the file - old_sidecar = Path(str(file_path) + '.tags') - new_sidecar = Path(str(new_file_path) + '.tags') + old_sidecar = Path(str(file_path) + '.tag') + new_sidecar = Path(str(new_file_path) + '.tag') # Rename file try: file_path.rename(new_file_path) log(f"Renamed file: {old_name} → {new_name}") - # Rename .tags sidecar if it exists + # Rename .tag sidecar if it exists if old_sidecar.exists(): try: old_sidecar.rename(new_sidecar) - log(f"Renamed sidecar: {old_name}.tags → {new_name}.tags") + log(f"Renamed sidecar: {old_name}.tag → {new_name}.tag") except Exception as e: log(f"Failed to rename sidecar: {e}", file=sys.stderr) @@ -232,7 +232,7 @@ def _apply_result_updates_from_tags(result: Any, tag_list: List[str]) -> None: def _handle_title_rename(old_path: Path, tags_list: List[str]) -> Optional[Path]: - """If a title: tag is present, rename the file and its .tags sidecar to match. + """If a title: tag is present, rename the file and its .tag sidecar to match. Returns the new path if renamed, otherwise returns None. """ @@ -267,10 +267,10 @@ def _handle_title_rename(old_path: Path, tags_list: List[str]) -> Optional[Path] old_path.rename(new_path) log(f"Renamed file: {old_name} → {new_name}", file=sys.stderr) - # Rename the .tags sidecar if it exists - old_tags_path = old_path.parent / (old_name + '.tags') + # Rename the .tag sidecar if it exists + old_tags_path = old_path.parent / (old_name + '.tag') if old_tags_path.exists(): - new_tags_path = old_path.parent / (new_name + '.tags') + new_tags_path = old_path.parent / (new_name + '.tag') if new_tags_path.exists(): log(f"Warning: Target sidecar already exists: {new_tags_path.name}", file=sys.stderr) else: @@ -368,14 +368,12 @@ def _write_sidecar(p: Path, media: Path, tag_list: List[str], url: List[str], ha return media + def _emit_tag_payload(source: str, tags_list: List[str], *, hash_value: Optional[str], extra: Optional[Dict[str, Any]] = None, store_label: Optional[str] = None) -> int: - """Emit tags as structured payload to pipeline. - - Also emits individual tag objects to _PIPELINE_LAST_ITEMS so they can be selected by index. - """ + """Emit tag values as structured payload to pipeline.""" payload: Dict[str, Any] = { "source": source, - "tags": list(tags_list), + "tag": list(tags_list), "count": len(tags_list), } if hash_value: @@ -388,11 +386,9 @@ def _emit_tag_payload(source: str, tags_list: List[str], *, hash_value: Optional if store_label: label = store_label elif ctx.get_stage_context() is not None: - label = "tags" + label = "tag" if label: ctx.store_value(label, payload) - if ctx.get_stage_context() is not None and label.lower() != "tags": - ctx.store_value("tags", payload) # Emit individual TagItem objects so they can be selected by bare index # When in pipeline, emit individual TagItem objects @@ -1065,7 +1061,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: return 1 output = { "title": title, - "tags": tags, + "tag": tags, "formats": [(label, fmt_id) for label, fmt_id in formats], "playlist_items": playlist_items, } @@ -1080,7 +1076,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Prefer identifier tags (ISBN/OLID/etc.) when available; fallback to title/filename identifier_tags: List[str] = [] - result_tags = get_field(result, "tags", None) + result_tags = get_field(result, "tag", None) if isinstance(result_tags, list): identifier_tags = [str(t) for t in result_tags if isinstance(t, (str, bytes))] @@ -1160,7 +1156,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: row.add_column("Album", item.get("album", "")) row.add_column("Year", item.get("year", "")) payload = { - "tags": tags, + "tag": tags, "provider": provider.name, "title": item.get("title"), "artist": item.get("artist"), @@ -1169,7 +1165,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: "hash": hash_for_payload, "store": store_for_payload, "extra": { - "tags": tags, + "tag": tags, "provider": provider.name, }, } @@ -1236,13 +1232,13 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Build a subject payload representing the file whose tags are being shown subject_store = get_field(result, "store", None) or store_name subject_payload: Dict[str, Any] = { - "tags": list(current), + "tag": list(current), "title": item_title, "name": item_title, "store": subject_store, "service_name": service_name, "extra": { - "tags": list(current), + "tag": list(current), }, } if file_hash: @@ -1288,9 +1284,9 @@ class Get_Tag(Cmdlet): """Initialize get-tag cmdlet.""" super().__init__( name="get-tag", - summary="Get tags from Hydrus or local sidecar metadata", + summary="Get tag values from Hydrus or local sidecar metadata", usage="get-tag [-hash ] [--store ] [--emit] [-scrape ]", - alias=["tags"], + alias=[], arg=[ SharedArgs.HASH, CmdletArg( diff --git a/cmdlets/merge_file.py b/cmdlets/merge_file.py index a33a7e5..9046c66 100644 --- a/cmdlets/merge_file.py +++ b/cmdlets/merge_file.py @@ -12,7 +12,7 @@ from models import DownloadOptions from config import resolve_output_dir import subprocess as _subprocess import shutil as _shutil -from ._shared import parse_cmdlet_args +from ._shared import create_pipe_object_result, parse_cmdlet_args try: from PyPDF2 import PdfWriter, PdfReader @@ -136,35 +136,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: if target_path and target_path.exists(): source_files.append(target_path) - # Track the .tags file for this source - tags_file = target_path.with_suffix(target_path.suffix + '.tags') + # Track the .tag file for this source + tags_file = target_path.with_suffix(target_path.suffix + '.tag') if tags_file.exists(): source_tags_files.append(tags_file) - - # Try to read hash, tags, url, and relationships from .tags sidecar file try: - tags_content = tags_file.read_text(encoding='utf-8') - for line in tags_content.split('\n'): - line = line.strip() - if not line: - continue - if line.startswith('hash:'): - hash_value = line[5:].strip() - if hash_value: - source_hashes.append(hash_value) - elif line.startswith('url:') or line.startswith('url:'): - # Extract url from tags file - url_value = line.split(':', 1)[1].strip() if ':' in line else '' - if url_value and url_value not in source_url: - source_url.append(url_value) - elif line.startswith('relationship:'): - # Extract relationships from tags file - rel_value = line.split(':', 1)[1].strip() if ':' in line else '' - if rel_value and rel_value not in source_relationships: - source_relationships.append(rel_value) - else: - # Collect actual tags (not metadata like hash: or url:) - source_tags.append(line) + source_tags.extend(read_tags_from_file(tags_file) if HAS_METADATA_API else []) except Exception: pass @@ -254,8 +231,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: log(f"Merged {len(source_files)} files into: {output_path}", file=sys.stderr) - # Create .tags sidecar file for the merged output using unified API - tags_path = output_path.with_suffix(output_path.suffix + '.tags') + # Create .tag sidecar file for the merged output using unified API + tags_path = output_path.with_suffix(output_path.suffix + '.tag') try: # Start with title tag merged_tags = [f"title:{output_path.stem}"] @@ -312,29 +289,20 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: except Exception as e: log(f"Warning: Could not create sidecar: {e}", file=sys.stderr) - # Emit PipelineItem so the merged file can be piped to next command + # Emit a PipeObject-compatible dict so the merged file can be piped to next command try: - # Try to import PipelineItem from downlow module - try: - from downlow import PipelineItem - except ImportError: - # Fallback: create a simple object with the required attributes - class SimpleItem: - def __init__(self, target, title, media_kind, tags=None, url=None): - self.target = target - self.title = title - self.media_kind = media_kind - self.tags = tags or [] - self.url = url or [] - self.store = "local" - PipelineItem = SimpleItem - - merged_item = PipelineItem( - target=str(output_path), + from SYS.utils import sha256_file + merged_hash = sha256_file(output_path) + merged_item = create_pipe_object_result( + source="local", + identifier=output_path.name, + file_path=str(output_path), + cmdlet_name="merge-file", title=output_path.stem, + hash_value=merged_hash, + tag=merged_tags, + url=source_url, media_kind=file_kind, - tags=merged_tags, # Include merged tags - url=source_url # Include known url ) # Clear previous results to ensure only the merged file is passed down ctx.clear_last_result() @@ -348,7 +316,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Always delete source files if they were downloaded playlist items (temp files) # We can detect this if they are in the temp download directory or if we tracked them if delete_after or True: # Force delete for now as merge consumes them - # First delete all .tags files + # First delete all .tag files for tags_file in source_tags_files: try: tags_file.unlink() @@ -490,8 +458,8 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool: title = file_path.stem # Default to filename without extension if HAS_METADATA_API: try: - # Try to read tags from .tags sidecar file - tags_file = file_path.with_suffix(file_path.suffix + '.tags') + # Try to read tags from .tag sidecar file + tags_file = file_path.with_suffix(file_path.suffix + '.tag') if tags_file.exists(): tags = read_tags_from_file(tags_file) if tags: diff --git a/cmdlets/output_json.py b/cmdlets/output_json.py deleted file mode 100644 index 32b85ab..0000000 --- a/cmdlets/output_json.py +++ /dev/null @@ -1,14 +0,0 @@ -from typing import Any, Dict, Sequence -import json -from ._shared import Cmdlet - -def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - """Output the current pipeline result as JSON.""" - print(json.dumps(result, indent=2, default=str)) - return 0 - -CMDLET = Cmdlet( - name="output-json", - summary="Output the current pipeline result as JSON.", - usage="... | output-json", -) diff --git a/cmdlets/screen_shot.py b/cmdlets/screen_shot.py index ee31f53..8306f80 100644 --- a/cmdlets/screen_shot.py +++ b/cmdlets/screen_shot.py @@ -121,7 +121,7 @@ class ScreenshotOptions: wait_after_load: float = 2.0 wait_for_article: bool = False replace_video_posters: bool = True - tags: Sequence[str] = () + tag: Sequence[str] = () archive: bool = False archive_timeout: float = ARCHIVE_TIMEOUT url: Sequence[str] = () @@ -136,7 +136,7 @@ class ScreenshotResult: """Details about the captured screenshot.""" path: Path - tags_applied: List[str] + tag_applied: List[str] archive_url: List[str] url: List[str] warnings: List[str] = field(default_factory=list) @@ -481,11 +481,11 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult: if archives: url = unique_preserve_order([*url, *archives]) - applied_tags = unique_preserve_order(list(tag for tag in options.tags if tag.strip())) + applied_tag = unique_preserve_order(list(tag for tag in options.tag if tag.strip())) return ScreenshotResult( path=destination, - tags_applied=applied_tags, + tag_applied=applied_tag, archive_url=archive_url, url=url, warnings=warnings, diff --git a/cmdlets/search_store.py b/cmdlets/search_store.py index 9b1646d..7f1cf64 100644 --- a/cmdlets/search_store.py +++ b/cmdlets/search_store.py @@ -27,9 +27,9 @@ except Exception: # pragma: no cover resolve_output_dir = None # type: ignore try: - from API.HydrusNetwork import HydrusClient, HydrusRequestError + from API.HydrusNetwork import HydrusNetwork, HydrusRequestError except ImportError: # pragma: no cover - HydrusClient = None # type: ignore + HydrusNetwork = None # type: ignore HydrusRequestError = RuntimeError # type: ignore try: @@ -47,7 +47,7 @@ class SearchRecord: path: str size_bytes: int | None = None duration_seconds: str | None = None - tags: str | None = None + tag: str | None = None hash: str | None = None def as_dict(self) -> dict[str, str]: @@ -56,8 +56,8 @@ class SearchRecord: payload["size"] = str(self.size_bytes) if self.duration_seconds: payload["duration"] = self.duration_seconds - if self.tags: - payload["tags"] = self.tags + if self.tag: + payload["tag"] = self.tag if self.hash: payload["hash"] = self.hash return payload @@ -233,16 +233,17 @@ class Search_Store(Cmdlet): from Store import Store storage = Store(config=config or {}) + from Store._base import Store as BaseStore backend_to_search = storage_backend or None if backend_to_search: searched_backends.append(backend_to_search) target_backend = storage[backend_to_search] - if not callable(getattr(target_backend, 'search_file', None)): + if type(target_backend).search is BaseStore.search: log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr) db.update_worker_status(worker_id, 'error') return 1 - results = target_backend.search_store(query, limit=limit) + results = target_backend.search(query, limit=limit) else: from API.HydrusNetwork import is_hydrus_available hydrus_available = is_hydrus_available(config or {}) @@ -256,7 +257,7 @@ class Search_Store(Cmdlet): continue searched_backends.append(backend_name) - backend_results = backend.search_store(query, limit=limit - len(all_results)) + backend_results = backend.search(query, limit=limit - len(all_results)) if backend_results: all_results.extend(backend_results) if len(all_results) >= limit: diff --git a/cmdlets/trim_file.py b/cmdlets/trim_file.py index 909b164..c60d560 100644 --- a/cmdlets/trim_file.py +++ b/cmdlets/trim_file.py @@ -17,7 +17,7 @@ from ._shared import ( CmdletArg, parse_cmdlet_args, normalize_result_input, - extract_tags_from_result, + extract_tag_from_result, extract_title_from_result ) import pipeline as ctx @@ -33,7 +33,7 @@ CMDLET = Cmdlet( ], detail=[ "Creates a new file with 'clip_' prefix in the filename/title.", - "Inherits tags from the source file.", + "Inherits tag values from the source file.", "Adds a relationship to the source file (if hash is available).", "Output can be piped to add-file.", ] @@ -185,8 +185,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: except Exception: pass - # 2. Get tags - tags = extract_tags_from_result(item) + # 2. Get tag values + tags = extract_tag_from_result(item) # 3. Get title and modify it title = extract_title_from_result(item) @@ -266,7 +266,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: result_dict = { "path": str(output_path), "title": new_title, - "tags": new_tags, + "tag": new_tags, "media_kind": "video", # Assumption, or derive "hash": clip_hash, # Pass calculated hash "relationships": { diff --git a/cmdnats/__init__.py b/cmdnats/__init__.py index 48e7688..f7c78a8 100644 --- a/cmdnats/__init__.py +++ b/cmdnats/__init__.py @@ -15,9 +15,17 @@ def _register_cmdlet_object(cmdlet_obj, registry: Dict[str, CmdletFn]) -> None: if hasattr(cmdlet_obj, "name") and cmdlet_obj.name: registry[cmdlet_obj.name.replace("_", "-").lower()] = run_fn + # Cmdlet uses 'alias' (List[str]). Some older objects may use 'aliases'. + aliases = [] + if hasattr(cmdlet_obj, "alias") and getattr(cmdlet_obj, "alias"): + aliases.extend(getattr(cmdlet_obj, "alias") or []) if hasattr(cmdlet_obj, "aliases") and getattr(cmdlet_obj, "aliases"): - for alias in cmdlet_obj.aliases: - registry[alias.replace("_", "-").lower()] = run_fn + aliases.extend(getattr(cmdlet_obj, "aliases") or []) + + for alias in aliases: + if not alias: + continue + registry[alias.replace("_", "-").lower()] = run_fn def register_native_commands(registry: Dict[str, CmdletFn]) -> None: diff --git a/cmdlets/manage_config.py b/cmdnats/config.py similarity index 98% rename from cmdlets/manage_config.py rename to cmdnats/config.py index 11184ab..ff7c168 100644 --- a/cmdlets/manage_config.py +++ b/cmdnats/config.py @@ -1,5 +1,6 @@ from typing import List, Dict, Any -from ._shared import Cmdlet, CmdletArg + +from cmdlets._shared import Cmdlet, CmdletArg from config import load_config, save_config CMDLET = Cmdlet( diff --git a/cmdnats/help.py b/cmdnats/help.py index 340158b..72a692e 100644 --- a/cmdnats/help.py +++ b/cmdnats/help.py @@ -181,3 +181,5 @@ CMDLET = Cmdlet( ), ], ) + +CMDLET.exec = _run diff --git a/cmdnats/pipe.py b/cmdnats/pipe.py index 87b5f02..e7f568f 100644 --- a/cmdnats/pipe.py +++ b/cmdnats/pipe.py @@ -585,14 +585,16 @@ def _queue_items(items: List[Any], clear_first: bool = False, config: Optional[D # Treat any http(s) target as yt-dlp candidate. If the Python yt-dlp # module is available we also check more deeply, but default to True # so MPV can use its ytdl hooks for remote streaming sites. + is_hydrus_target = _is_hydrus_path(str(target), hydrus_url) try: - is_ytdlp = target.startswith("http") or is_url_supported_by_ytdlp(target) + # Hydrus direct file URLs should not be treated as yt-dlp targets. + is_ytdlp = (not is_hydrus_target) and (target.startswith("http") or is_url_supported_by_ytdlp(target)) except Exception: - is_ytdlp = target.startswith("http") + is_ytdlp = (not is_hydrus_target) and target.startswith("http") # Use memory:// M3U hack to pass title to MPV # Skip for yt-dlp url to ensure proper handling - if title and not is_ytdlp: + if title and (is_hydrus_target or not is_ytdlp): # Sanitize title for M3U (remove newlines) safe_title = title.replace('\n', ' ').replace('\r', '') m3u_content = f"#EXTM3U\n#EXTINF:-1,{safe_title}\n{target}" diff --git a/helper/search_provider.py b/helper/search_provider.py new file mode 100644 index 0000000..20ab24b --- /dev/null +++ b/helper/search_provider.py @@ -0,0 +1,2215 @@ +""" +SearchProvider: Unified interface for different search backends. + +This module defines a base class and registry for search providers that can be +used by search-file and other search-related cmdlets to handle different sources: +- Local file storage (LocalStorageBackend) +- Hydrus database +- AllDebrid magnets (search-debrid) +- Library Genesis / OpenLibrary books (search-libgen) +- Soulseek P2P network (search-soulseek) +- IMDB movies (future) +- Other sources + +Usage: + from helper.search_provider import SearchProvider, get_provider + + provider = get_provider("libgen") + results = provider.search("python programming", limit=10) + + for result in results: + print(result["title"], result["target"], result["annotations"]) +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional, Sequence, Tuple +from dataclasses import dataclass +from pathlib import Path +import sys + +try: + from playwright.sync_api import sync_playwright + PLAYWRIGHT_AVAILABLE = True +except ImportError: + PLAYWRIGHT_AVAILABLE = False +import subprocess +import json +import shutil +from SYS.logger import log, debug + + +from SYS.logger import log, debug + + +@dataclass +class SearchResult: + """Unified search result format across all providers.""" + + # Required fields + origin: str # Provider name: "libgen", "soulseek", "debrid", "local", "hydrus", etc. + title: str # Display title/filename + target: str # Unique identifier or download target (URL, path, magnet hash, etc.) + + # Optional fields + detail: str = "" # Additional details (size, status, format, etc.) + annotations: List[str] = None # Tags/annotations: ["ready", "120MB", "mp3", etc.] + media_kind: str = "other" # Type: "book", "audio", "video", "file", "magnet", etc. + size_bytes: Optional[int] = None # File size in bytes + tag: Optional[set[str]] = None # Searchable tag values + full_metadata: Optional[Dict[str, Any]] = None # Extra metadata (author, year, etc.) + columns: List[Tuple[str, str]] = None # Display columns: [("Header", "value"), ...] for result table + + def __post_init__(self): + """Ensure mutable defaults are properly initialized.""" + if self.annotations is None: + self.annotations = [] + if self.tag is None: + self.tag = set() + if self.full_metadata is None: + self.full_metadata = {} + if self.columns is None: + self.columns = [] + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for JSON serialization. + + Note: full_metadata is excluded from dict to keep response size small + until the result is actually selected/used. This speeds up initial + search result display and piping. + """ + data = { + "origin": self.origin, + "title": self.title, + "target": self.target, + "detail": self.detail, + "annotations": self.annotations, + "media_kind": self.media_kind, + "size_bytes": self.size_bytes, + "tag": list(self.tag) if self.tag else [], + } + if self.columns: + data["columns"] = list(self.columns) + # Note: full_metadata is NOT included in dict to keep payload small + return data + + +class Provider(ABC): + """Abstract base class for search providers.""" + + # Provider-specific field definitions: list of (api_field_name, display_column_name, formatter_func) + # Override in subclasses to define which fields to request and how to display them + # Example: [("title", "Title", None), ("author_name", "Author(s)", lambda x: ", ".join(x) if isinstance(x, list) else x)] + RESULT_FIELDS: List[Tuple[str, str, Optional[Any]]] = [] + + def __init__(self, config: Dict[str, Any] = None): + """ + Initialize provider with optional configuration. + + Args: + config: Configuration dictionary (global config dict) + """ + self.config = config or {} + self.name = self.__class__.__name__.replace("Provider", "").lower() + + @abstractmethod + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs + ) -> List[SearchResult]: + """ + Search for items matching the query. + + Args: + query: Search query string. Special value "*" means "match all" + limit: Maximum number of results to return + filters: Optional filtering criteria (type, size, status, etc.) + **kwargs: Provider-specific arguments + + Returns: + List of SearchResult objects + """ + pass + + @abstractmethod + def get_result_args(self) -> List[str]: + """ + Get command-line arguments from a search result to pass to downstream cmdlets. + + Example: For libgen, returns ["-url", result.target] + For soulseek, returns ["-id", result.target] + For local, returns ["-path", result.target] + + Returns: + List of arguments to append to cmdlet invocation + """ + pass + + def parse_args(self, args: Sequence[str]) -> Tuple[str, Dict[str, Any]]: + """ + Parse provider-specific command-line arguments. + + Args: + args: Sequence of command-line arguments + + Returns: + Tuple of (query, filters_dict) + """ + # Default implementation: first arg is query, rest are filters + query = args[0] if args else "" + filters = {} + return query, filters + + def validate(self) -> bool: + """ + Validate that provider is properly configured and ready to use. + + Returns: + True if provider is available, False otherwise + """ + return True + + def get_columns_format(self) -> List[str]: + """ + Define which columns this provider displays in result table. + + Returns: + List of column names to display. + Each provider can override to customize result table appearance. + Examples: ["Title", "Author", "Year"] for books + ["Title", "Duration", "Format"] for media + ["Title", "Size", "Status"] for files + + Default: Empty list (uses traditional detail/origin/media_kind/target) + """ + return [col_name for _, col_name, _ in self.RESULT_FIELDS] if self.RESULT_FIELDS else [] + + def get_api_fields_string(self) -> str: + """ + Generate comma-separated API fields string from RESULT_FIELDS. + + Returns: + Comma-separated string of API field names to request + Example: "title,author_name,first_publish_year,isbn,key" + """ + if not self.RESULT_FIELDS: + return "" + return ",".join(field_name for field_name, _, _ in self.RESULT_FIELDS) + + def build_columns_from_doc(self, doc: Dict[str, Any], idx: int = None) -> List[Tuple[str, str]]: + """ + Dynamically build columns from a result document using RESULT_FIELDS definition. + + Args: + doc: API response document (dict with field values) + idx: Optional index/number for the result (typically added as first column) + + Returns: + List of (header, value) tuples ready for SearchResult.columns + """ + columns = [] + + # Add index as first column if provided + if idx is not None: + columns.append(("#", str(idx))) + + # Process each field definition + for api_field_name, display_col_name, formatter_func in self.RESULT_FIELDS: + value = doc.get(api_field_name, "") + + # Apply formatter if defined + if formatter_func and value: + value = formatter_func(value) + + # Convert to string and add to columns + value_str = str(value) if value else "Unknown" + columns.append((display_col_name, value_str)) + + def build_result(self, origin: str, title: str, target: str, detail: str = "", + annotations: Optional[List[str]] = None, media_kind: str = "other", + columns: Optional[List[Tuple[str, str]]] = None, + full_metadata: Optional[Dict[str, Any]] = None, + size_bytes: Optional[int] = None) -> SearchResult: + """ + Build a SearchResult with consistent column/annotation handling. + + Consolidates common pattern across all providers of extracting fields, building columns, + and creating SearchResult. Reduces per-provider duplication. + + Args: + origin: Provider name (e.g. "libgen", "openlibrary") + title: Display title + target: Download target/URL/path + detail: Secondary description line + annotations: Tags/metadata labels + media_kind: Type (book, audio, video, file, magnet, etc.) + columns: Pre-built column list, or auto-built from RESULT_FIELDS + full_metadata: Additional metadata for later retrieval + size_bytes: File size in bytes + + Returns: + SearchResult ready to display + """ + return SearchResult( + origin=origin, + title=title, + target=target, + detail=detail, + annotations=annotations or [], + media_kind=media_kind, + columns=columns or [], + full_metadata=full_metadata or {}, + size_bytes=size_bytes + ) + + return columns + + +class Libgen(Provider): + """Search provider for Library Genesis books.""" + + RESULT_FIELDS: List[Tuple[str, str, Optional[Any]]] = [] # columns built manually + + def __init__(self, config: Dict[str, Any] = None): + super().__init__(config) + self.name = "libgen" + + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs + ) -> List[SearchResult]: + """Search Library Genesis for books. + + Supports dynamic query format: + - isbn:0557677203 + - author:"Albert Pike" + - title:"Book Title" + - Combination: isbn:0557677203 author:"Albert Pike" free text + + Priority: ISBN is the authoritative key for searching. + """ + filters = filters or {} + + try: + from helper.unified_book_downloader import UnifiedBookDownloader + from helper.query_parser import parse_query, get_field, get_free_text + + debug(f"[libgen] Starting search for: {query}") + + # Parse the query to extract structured fields + parsed = parse_query(query) + isbn = get_field(parsed, 'isbn') + author = get_field(parsed, 'author') + title = get_field(parsed, 'title') + free_text = get_free_text(parsed) + + # Build the search query for libgen + # Priority: isbn (authoritative key) > title > author > free_text + if isbn: + search_query = isbn + elif title: + search_query = title + elif author: + search_query = author + else: + search_query = free_text or query + + debug(f"[libgen] Built search query: {search_query}") + + downloader = UnifiedBookDownloader(config=self.config) + search_fn = getattr(downloader, "search_libgen", None) + + if not callable(search_fn): + log("[libgen] Searcher unavailable", file=sys.stderr) + return [] + + debug(f"[libgen] Calling search_libgen with query: {search_query}") + books = search_fn(search_query, limit=limit) + debug(f"[libgen] Got {len(books) if books else 0} results from search_libgen") + + search_results = [] + for idx, book in enumerate(books, 1): + title = book.get("title", "Unknown") + author = book.get("author", "Unknown") + year = book.get("year", "Unknown") + pages = book.get("pages") or book.get("pages_str") or "" + extension = book.get("extension", "") or book.get("ext", "") + filesize = book.get("filesize_str", "Unknown") + isbn = book.get("isbn", "") + mirror_url = book.get("mirror_url", "") + + # Columns: Title, Author, Pages, Ext + columns = [ + ("Title", title), + ("Author", author), + ("Pages", str(pages)), + ("Ext", str(extension)), + ] + + # Build detail with author and year + detail = f"By: {author}" + if year and year != "Unknown": + detail += f" ({year})" + + annotations = [f"{filesize}"] + if isbn: + annotations.append(f"ISBN: {isbn}") + + # Store full book data without mirrors in metadata to avoid serialization overhead + search_results.append(self.build_result( + origin="libgen", + title=title, + target=mirror_url or f"libgen:{book.get('id', '')}", + detail=detail, + annotations=annotations, + media_kind="book", + columns=columns, + full_metadata={ + "number": idx, + "author": author, + "year": year, + "isbn": isbn, + "filesize": filesize, + # Exclude mirrors dict from metadata to reduce serialization overhead + # Mirrors can be re-fetched if the result is selected + "book_id": book.get("book_id", ""), + "md5": book.get("md5", ""), + }, + )) + + debug(f"[libgen] Returning {len(search_results)} formatted results") + return search_results + + except Exception as e: + log(f"[libgen] Search error: {e}", file=sys.stderr) + import traceback + log(traceback.format_exc(), file=sys.stderr) + return [] + + def get_result_args(self) -> List[str]: + """LibGen results use -url for download or -mirror for selection.""" + return ["-url"] + + def validate(self) -> bool: + """Check if LibGen downloader is available.""" + try: + from helper.unified_book_downloader import UnifiedBookDownloader + return True + except Exception: + return False + + +class SoulSeek(Provider): + """Search provider for Soulseek P2P network.""" + + # Allowed music file extensions + MUSIC_EXTENSIONS = { + '.flac', '.mp3', '.m4a', '.aac', '.ogg', '.opus', + '.wav', '.alac', '.wma', '.ape', '.aiff', '.dsf', + '.dff', '.wv', '.tta', '.tak', '.ac3', '.dts' + } + + # Display columns for search results + RESULT_FIELDS = [ + ("track_num", "Track", None), + ("title", "Title", None), + ("artist", "Artist", lambda x: (str(x)[:32] + '...') if x and len(str(x)) > 35 else x), + ("album", "Album", lambda x: (str(x)[:32] + '...') if x and len(str(x)) > 35 else x), + ("size", "Size", lambda x: f"{int(int(x)/1024/1024)} MB" if x else ""), + ] + + # Soulseek config + USERNAME = "asjhkjljhkjfdsd334" + PASSWORD = "khhhg" + DOWNLOAD_DIR = "./downloads" + MAX_WAIT_TRANSFER = 1200 + + def __init__(self, config: Dict[str, Any] = None): + super().__init__(config) + self.name = "soulseek" + + async def perform_search( + self, + query: str, + timeout: float = 9.0, + limit: int = 50 + ) -> List[Dict[str, Any]]: + """Perform async Soulseek search and return flattened results.""" + import asyncio + import os + import re + import time + from aioslsk.client import SoulSeekClient + from aioslsk.settings import Settings, CredentialsSettings + + os.makedirs(self.DOWNLOAD_DIR, exist_ok=True) + + settings = Settings(credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD)) + client = SoulSeekClient(settings) + + try: + await client.start() + await client.login() + except Exception as e: + log(f"[soulseek] Login failed: {type(e).__name__}: {e}", file=sys.stderr) + return [] + + try: + search_request = await client.searches.search(query) + await self._collect_search_results(client, search_request, timeout=timeout) + flat = self._flatten_search_results(search_request)[:limit] + return flat + except Exception as e: + log(f"[soulseek] Search error: {type(e).__name__}: {e}", file=sys.stderr) + return [] + finally: + try: + await client.stop() + except Exception: + pass + + def _flatten_search_results(self, search_request) -> List[dict]: + """Extract files from SearchRequest.results.""" + flat: List[dict] = [] + for result in search_request.results: + username = getattr(result, "username", "?") + + for file_data in getattr(result, "shared_items", []): + flat.append({ + "file": file_data, + "username": username, + "filename": getattr(file_data, "filename", "?"), + "size": getattr(file_data, "filesize", 0), + }) + + for file_data in getattr(result, "locked_results", []): + flat.append({ + "file": file_data, + "username": username, + "filename": getattr(file_data, "filename", "?"), + "size": getattr(file_data, "filesize", 0), + }) + + return flat + + async def _collect_search_results(self, client, search_request, timeout: float = 75.0) -> None: + """Collect search results by waiting.""" + import asyncio + import time + debug(f"[soulseek] Collecting results for {timeout}s...") + end = time.time() + timeout + last_count = 0 + while time.time() < end: + current_count = len(search_request.results) + if current_count > last_count: + debug(f"[soulseek] Got {current_count} result(s) so far...") + last_count = current_count + await asyncio.sleep(0.5) + + async def download_file( + self, + username: str, + filename: str, + file_size: int, + target_dir: Optional[str] = None + ) -> bool: + """Download a file from Soulseek to a specific directory.""" + import asyncio + import os + import time + from aioslsk.client import SoulSeekClient + from aioslsk.settings import Settings, CredentialsSettings + from aioslsk.events import TransferProgressEvent + from tqdm import tqdm + + download_dir = target_dir if target_dir else self.DOWNLOAD_DIR + os.makedirs(download_dir, exist_ok=True) + + settings = Settings(credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD)) + settings.shares.download = download_dir + client = SoulSeekClient(settings) + + try: + await client.start() + await client.login() + + debug(f"[soulseek] Starting: {filename} from {username}") + + transfer = await client.transfers.download(username, filename) + if transfer is None: + log("[soulseek] Failed: transfer object is None") + return False + + success = await self._wait_for_transfer(client, transfer, file_size=file_size, max_wait=self.MAX_WAIT_TRANSFER) + + return success + + except Exception as e: + log(f"[soulseek] Download error: {type(e).__name__}: {e}", file=sys.stderr) + return False + + finally: + try: + await client.stop() + except Exception: + pass + + async def _wait_for_transfer(self, client, transfer_obj: Any, file_size: Any = None, max_wait: float = 1200) -> bool: + """Wait for transfer finish using event listeners with TQDM progress bar. + + Returns: + True if transfer completed successfully, False if failed or timed out. + """ + import asyncio + import time + from aioslsk.events import TransferProgressEvent + from tqdm import tqdm + + if transfer_obj is None: + log("[soulseek] No transfer object returned") + return False + + transfer_finished = False + transfer_success = False + pbar = None + total_size = file_size + last_speed_time = time.time() + last_speed = 0 + + async def on_progress(event): + nonlocal last_speed_time, last_speed, transfer_finished, transfer_success, pbar, total_size + if not hasattr(event, 'updates') or not event.updates: + return + + for transfer, _, curr_snapshot in event.updates: + if (transfer.username == transfer_obj.username and transfer.remote_path == transfer_obj.remote_path): + bytes_xfer = getattr(curr_snapshot, 'bytes_transfered', 0) + state_name = curr_snapshot.state.name if hasattr(curr_snapshot, 'state') else "?" + speed = getattr(curr_snapshot, 'speed', 0) + + if total_size is None and hasattr(transfer, 'file_attributes'): + try: + size = getattr(transfer, 'file_size', None) or getattr(transfer, 'size', None) + if size: + total_size = size + except Exception: + pass + + if pbar is None: + total = total_size if total_size else 100 * 1024 * 1024 + pbar = tqdm(total=total, unit='B', unit_scale=True, desc='[transfer]') + + if pbar: + pbar.n = bytes_xfer + if speed > 0: + pbar.set_postfix({"speed": f"{speed/1024:.1f} KB/s", "state": state_name}) + pbar.refresh() + + if state_name in ('FINISHED', 'COMPLETE'): + if pbar: + pbar.close() + debug(f"[soulseek] Transfer {state_name.lower()}") + transfer_finished = True + transfer_success = True + return + elif state_name in ('ABORTED', 'FAILED', 'PAUSED'): + if pbar: + pbar.close() + debug(f"[soulseek] Transfer {state_name.lower()}") + transfer_finished = True + transfer_success = False + return + + if total_size and bytes_xfer >= total_size: + if pbar: + pbar.close() + debug(f"[soulseek] Transfer complete ({bytes_xfer / 1024 / 1024:.1f} MB)") + transfer_finished = True + transfer_success = True + return + + if speed == 0 and bytes_xfer > 0: + now = time.time() + if now - last_speed_time > 3: + if pbar: + pbar.close() + debug(f"[soulseek] Transfer complete ({bytes_xfer / 1024 / 1024:.1f} MB)") + transfer_finished = True + transfer_success = True + return + else: + last_speed_time = time.time() + + last_speed = speed + + client.events.register(TransferProgressEvent, on_progress) + end = time.time() + max_wait + + while time.time() < end: + if transfer_finished: + break + await asyncio.sleep(0.5) + + client.events.unregister(TransferProgressEvent, on_progress) + + if pbar: + pbar.close() + + if not transfer_finished: + log(f"[soulseek] Timed out after {max_wait}s; transfer may still be in progress") + return False + else: + return transfer_success + + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs + ) -> List[SearchResult]: + """Search Soulseek P2P network (synchronous wrapper).""" + import asyncio + import re + + filters = filters or {} + + try: + # Run async search + flat_results = asyncio.run(self.perform_search(query, timeout=9.0, limit=limit)) + + if not flat_results: + return [] + + # Filter to music files only + music_results = [] + for item in flat_results: + filename = item['filename'] + if '.' in filename: + ext = '.' + filename.rsplit('.', 1)[-1].lower() + else: + ext = '' + + if ext in self.MUSIC_EXTENSIONS: + music_results.append(item) + + if not music_results: + return [] + + # Extract metadata for all results + enriched_results = [] + for item in music_results: + filename = item['filename'] + + # Extract extension + if '.' in filename: + _, ext = filename.rsplit('.', 1) + ext = '.' + ext.lower() + else: + ext = '' + + # Get display filename + if '\\' in filename: + display_name = filename.rsplit('\\', 1)[-1] + elif '/' in filename: + display_name = filename.rsplit('/', 1)[-1] + else: + display_name = filename + + # Extract path hierarchy for artist/album + path_parts = filename.replace('\\', '/').split('/') + artist = '' + album = '' + + if len(path_parts) >= 3: + artist = path_parts[-3] + album = path_parts[-2] + if ' - ' in album and re.match(r'^\d{4}', album): + album = album.split(' - ', 1)[1] + elif len(path_parts) == 2: + artist = path_parts[-2] + + # Extract track number and title + base_name = display_name.rsplit('.', 1)[0] if '.' in display_name else display_name + track_num = '' + title = base_name + filename_artist = '' + + # First, extract track number if present (e.g., "30 Stumfol - Prisoner" -> track=30, rest="Stumfol - Prisoner") + match = re.match(r'^(\d{1,3})\s*[\.\-]?\s+(.+)$', base_name) + if match: + track_num = match.group(1) + remainder = match.group(2) + + # Now parse "Artist - Title" from the remainder + # If there's a " - " separator, split on it + if ' - ' in remainder: + parts = remainder.split(' - ', 1) + filename_artist = parts[0].strip() + title = parts[1].strip() + else: + # No artist-title separator, use the whole remainder as title + title = remainder + else: + # No track number, check if there's "Artist - Title" format + if ' - ' in base_name: + parts = base_name.split(' - ', 1) + filename_artist = parts[0].strip() + title = parts[1].strip() + + # Use filename_artist if extracted, otherwise fall back to path artist + if filename_artist: + artist = filename_artist + + enriched_results.append({ + **item, + 'artist': artist, + 'album': album, + 'title': title, + 'track_num': track_num, + 'ext': ext + }) + + # Apply filters if specified + if filters: + artist_filter = filters.get('artist', '').lower() if filters.get('artist') else '' + album_filter = filters.get('album', '').lower() if filters.get('album') else '' + track_filter = filters.get('track', '').lower() if filters.get('track') else '' + + if artist_filter or album_filter or track_filter: + filtered_results = [] + for item in enriched_results: + if artist_filter and artist_filter not in (item['artist'] or '').lower(): + continue + if album_filter and album_filter not in (item['album'] or '').lower(): + continue + if track_filter and track_filter not in (item['title'] or '').lower(): + continue + filtered_results.append(item) + + enriched_results = filtered_results + + # Sort: .flac first, then others + enriched_results.sort(key=lambda item: (item['ext'].lower() != '.flac', -item['size'])) + + # Convert to SearchResult format + search_results = [] + for idx, item in enumerate(enriched_results, 1): + artist_display = item['artist'] if item['artist'] else "(no artist)" + album_display = item['album'] if item['album'] else "(no album)" + size_mb = int(round(item['size'] / 1024 / 1024)) + + if item['track_num']: + track_title = f"[{item['track_num']}] {item['title']}" + else: + track_title = item['title'] or "(untitled)" + + # Build columns from enriched metadata + columns = self.build_columns_from_doc(item, idx=idx) + + search_results.append(self.build_result( + origin="soulseek", + title=track_title, + target=item['filename'], + detail=f"Artist: {artist_display} | Album: {album_display}", + annotations=[f"{size_mb} MB", item['ext']], + media_kind="audio", + size_bytes=item['size'], + columns=columns, + full_metadata={ + "artist": item['artist'], + "album": item['album'], + "track_num": item['track_num'], + "username": item['username'], + "filename": item['filename'], + "ext": item['ext'], + }, + )) + + return search_results + + except Exception as e: + log(f"Soulseek search error: {e}", file=sys.stderr) + return [] + + def get_result_args(self) -> List[str]: + """Soulseek results use filename/path for results.""" + return ["-path"] + + def validate(self) -> bool: + """Check if Soulseek client is available.""" + try: + import aioslsk # type: ignore + return True + except ImportError: + return False + + +class Debrid(Provider): + """Search provider for AllDebrid magnets.""" + + # Status code mappings + STATUS_MAP = { + 0: "In Queue", + 1: "Downloading", + 2: "Compressing", + 3: "Uploading", + 4: "Ready", + 5: "Upload Failed", + 6: "Unpack Error", + 7: "Not Downloaded", + 8: "File Too Big", + 9: "Internal Error", + 10: "Download Timeout", + 11: "Deleted", + 12: "Processing Failed", + 13: "Processing Failed", + 14: "Tracker Error", + 15: "No Peers" + } + + def __init__(self, config: Dict[str, Any] = None): + super().__init__(config) + self.name = "debrid" + self._magnet_files_cache = {} + + def _format_size(self, bytes_val: float) -> str: + """Format bytes to human readable size.""" + for unit in ['B', 'KB', 'MB', 'GB', 'TB']: + if bytes_val < 1024: + return f"{bytes_val:.2f} {unit}" + bytes_val /= 1024 + return f"{bytes_val:.2f} PB" + + def _get_status_display(self, status_code: int) -> str: + """Get human-readable status for AllDebrid status codes.""" + return self.STATUS_MAP.get(status_code, f"Unknown ({status_code})") + + def _should_filter_magnet(self, status_code: int, status_text: str) -> bool: + """Check if magnet should be filtered out (expired/deleted).""" + # Filter expired/deleted entries + return status_code in (5, 6, 7, 8, 11, 12, 13, 14) + + def _fuzzy_match(self, text: str, pattern: str) -> bool: + """Check if pattern fuzzy-matches text (case-insensitive, substring matching).""" + return pattern.lower() in text.lower() + + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs + ) -> List[SearchResult]: + """Search AllDebrid magnets with optional status and name filtering. + + Args: + query: Search query (magnet filename or '*' for all) + limit: Max results to return + filters: Optional dict with 'status' filter ('all', 'active', 'ready', 'error') + + Returns: + List of SearchResult objects + """ + filters = filters or {} + + try: + from API.alldebrid import AllDebridClient + from config import get_debrid_api_key + + api_key = get_debrid_api_key(self.config) + + if not api_key: + log("[debrid] API key not configured", file=sys.stderr) + return [] + + client = AllDebridClient(api_key) + + # Parse status filter + status_filter_param = filters.get('status', 'all').lower() if filters.get('status') else 'all' + + # Get magnets with optional status filter + response = client._request("magnet/status", {}) + + if response.get("status") != "success": + log(f"[debrid] API error: {response.get('error', 'Unknown')}", file=sys.stderr) + return [] + + magnets = response.get("data", {}).get("magnets", []) + + # Handle both list and dict formats + if isinstance(magnets, dict): + magnets = list(magnets.values()) + + # Filter by status if specified + if status_filter_param == 'active': + magnets = [m for m in magnets if m.get('statusCode', -1) in (0, 1, 2, 3)] + elif status_filter_param == 'ready': + magnets = [m for m in magnets if m.get('statusCode', -1) == 4] + elif status_filter_param == 'error': + magnets = [m for m in magnets if m.get('statusCode', -1) in (5, 6, 8, 9, 10, 12, 13, 14, 15)] + # 'all' includes everything + + # Filter by query (fuzzy match on filename) + results = [] + count = 0 + for magnet in magnets: + if count >= limit: + break + + filename = magnet.get("filename", "") + status_code = magnet.get("statusCode", -1) + status_text = magnet.get("status", "Unknown") + + # Skip expired/deleted unless 'all' filter + if status_filter_param != 'all' and self._should_filter_magnet(status_code, status_text): + continue + + # Apply query filter (skip if doesn't match) + if query and query != "*" and not self._fuzzy_match(filename, query): + continue + + magnet_id = magnet.get("id") + size = magnet.get("size", 0) + downloaded = magnet.get("downloaded", 0) + progress = (downloaded / size * 100) if size > 0 else 0 + + # Get status emoji + if status_code == 4: + status_emoji = "✓" + elif status_code < 4: + status_emoji = "⧗" + else: + status_emoji = "✗" + + annotations = [self._get_status_display(status_code)] + if size > 0: + annotations.append(self._format_size(size)) + if progress > 0 and progress < 100: + annotations.append(f"{progress:.1f}%") + + results.append(self.build_result( + origin="debrid", + title=filename or "Unknown", + target=str(magnet_id), + detail=f"{status_emoji} {self._get_status_display(status_code)} | {self._format_size(size)}", + annotations=annotations, + media_kind="magnet", + size_bytes=size, + full_metadata={ + "magnet_id": magnet_id, + "status_code": status_code, + "status_text": status_text, + "progress": progress, + "downloaded": downloaded, + "seeders": magnet.get("seeders", 0), + "download_speed": magnet.get("downloadSpeed", 0), + }, + )) + + count += 1 + + # Cache metadata for ready magnets + if results: + self._cache_ready_magnet_metadata(client, [r for r in results if r.full_metadata.get('status_code') == 4]) + + return results + + except Exception as e: + log(f"Debrid search error: {e}", file=sys.stderr) + return [] + + def _cache_ready_magnet_metadata(self, client, results: List[SearchResult]) -> None: + """Cache file metadata for ready magnets.""" + if not results: + return + + try: + ready_ids = [r.full_metadata.get('magnet_id') for r in results if r.full_metadata.get('status_code') == 4] + if ready_ids: + self._magnet_files_cache = client.magnet_links(ready_ids) + log(f"[debrid] Cached metadata for {len(self._magnet_files_cache)} ready magnet(s)", file=sys.stderr) + except Exception as e: + log(f"[debrid] Warning: Could not cache magnet metadata: {e}", file=sys.stderr) + + def get_magnet_metadata(self, magnet_id: int) -> Optional[Dict[str, Any]]: + """Get cached metadata for a magnet.""" + return self._magnet_files_cache.get(str(magnet_id)) + + def get_result_args(self) -> List[str]: + """Debrid results use magnet ID for download.""" + return ["-id"] + + def validate(self) -> bool: + """Check if AllDebrid is configured.""" + from config import get_debrid_api_key + return bool(get_debrid_api_key(self.config)) + + +class OpenLibrary(Provider): + """Search provider for OpenLibrary.""" + + # Define fields to request from API and how to display them + RESULT_FIELDS: List[Tuple[str, str, Optional[Any]]] = [] # columns built manually + + def __init__(self, config: Dict[str, Any] = None): + super().__init__(config) + self.name = "openlibrary" + + def _derive_status(self, doc: Dict[str, Any]) -> tuple[str, Optional[str]]: + """Determine availability label and archive identifier.""" + ebook_access = str(doc.get("ebook_access", "") or "").strip().lower() + has_fulltext = bool(doc.get("has_fulltext")) + ia_entries = doc.get("ia") + archive_id = "" + if isinstance(ia_entries, list): + for entry in ia_entries: + if isinstance(entry, str) and entry.strip(): + archive_id = entry.strip() + break + elif isinstance(ia_entries, str) and ia_entries.strip(): + archive_id = ia_entries.strip() + elif isinstance(doc.get("ocaid"), str) and doc["ocaid"].strip(): + archive_id = doc["ocaid"].strip() + + available = False + if ebook_access in {"borrowable", "public", "full"}: + available = True + elif has_fulltext: + available = True + elif archive_id: + available = True + + status = "download" if available else "?Libgen" + return status, archive_id or None + + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs + ) -> List[SearchResult]: + """Search OpenLibrary for books. + + Smart search that detects ISBN, OCLC, OpenLibrary ID, and falls back to title search. + """ + filters = filters or {} + + try: + import requests + + query_clean = query.strip() + search_url = "https://openlibrary.org/search.json" + + # Try to detect query type (ISBN, OCLC, OL ID, or title) + if query_clean.isdigit() and len(query_clean) in (10, 13): + # ISBN search + url = f"https://openlibrary.org/isbn/{query_clean}.json" + response = requests.get(url, timeout=9) + if response.status_code == 200: + book_data = response.json() + return [self._format_isbn_result(book_data, query_clean)] + elif response.status_code == 404: + return [] + + # Default to title/general search + requested_fields = [ + "title", + "author_name", + "first_publish_year", + "number_of_pages_median", + "isbn", + "oclc_numbers", + "lccn", + "language", + "key", + "edition_key", + "ebook_access", + "ia", + "has_fulltext", + ] + params = { + "q": query_clean, + "limit": limit, + "fields": ",".join(requested_fields), + } + + response = requests.get(search_url, params=params, timeout=9) + response.raise_for_status() + data = response.json() + + search_results = [] + for idx, doc in enumerate(data.get("docs", []), 1): + # Prefer edition_key (books/OLxxxM). Fallback to work key. + edition_keys = doc.get("edition_key") or [] + olid = "" + if isinstance(edition_keys, list) and edition_keys: + olid = str(edition_keys[0]).strip() + if not olid: + olid = doc.get("key", "").split("/")[-1] + + # Determine status/availability + status, archive_id = self._derive_status(doc) + doc["status"] = status + + # Extract additional metadata + title = doc.get("title", "Unknown") + authors = doc.get("author_name", ["Unknown"]) + year = doc.get("first_publish_year", "") + isbn_list = doc.get("isbn", []) + isbn = isbn_list[0] if isbn_list else "" + oclc_list = doc.get("oclc_numbers", []) + oclc = oclc_list[0] if oclc_list else "" + lccn_list = doc.get("lccn", []) + lccn = lccn_list[0] if lccn_list else "" + pages = doc.get("number_of_pages_median", "") + languages = doc.get("language", []) + language = languages[0] if languages else "" + + author_str = ", ".join(authors) if authors else "Unknown" + + # Format status for display + ebook_access_raw = str(doc.get("ebook_access", "") or "").strip().lower() + status_display = "" + if ebook_access_raw == "borrowable": + status_display = "📚 Borrowable" + elif ebook_access_raw == "public": + status_display = "🌐 Public" + elif ebook_access_raw == "full": + status_display = "✓ Full" + elif doc.get("has_fulltext"): + status_display = "📄 Fulltext" + else: + status_display = "❌ No" + + # Columns: Title, Author, Pages, Borrowable + columns = [ + ("Title", title), + ("Author", author_str), + ("Pages", str(pages or "")), + ("Borrowable", status_display), + ] + + # Build detail with author and year + detail = f"By: {author_str}" + if year: + detail += f" ({year})" + + # Build annotations with additional info + annotations = [] + if pages: + annotations.append(f"{pages} pages") + if isbn: + annotations.append(f"ISBN: {isbn}") + + search_results.append(self.build_result( + origin="openlibrary", + title=title, + target=f"https://openlibrary.org/books/{olid}", + detail=detail, + annotations=annotations, + media_kind="book", + columns=columns, + full_metadata={ + "number": idx, + "authors": authors, + "year": year, + "isbn": isbn, + "oclc": oclc, + "lccn": lccn, + "pages": pages, + "language": language, + "olid": olid, + "ebook_access": doc.get("ebook_access", ""), + "status": status, + "archive_id": archive_id, + }, + )) + + # Sort results: borrowable ones first, then not borrowable, then unknown + def sort_key(result): + status = (result.full_metadata.get("status") or "").strip().lower() + if status == "download": + return (0, result.title) + elif status.startswith("?libgen"): + return (1, result.title) + else: + return (2, result.title) + + search_results.sort(key=sort_key) + + # Rebuild number field after sorting + for new_idx, result in enumerate(search_results, 1): + result.full_metadata["number"] = new_idx + # Update the # column in columns + if result.columns and result.columns[0][0] == "#": + result.columns[0] = ("#", str(new_idx)) + + return search_results + + except Exception as e: + log(f"OpenLibrary search error: {e}", file=sys.stderr) + return [] + + def _format_isbn_result(self, book_data: Dict[str, Any], isbn: str) -> SearchResult: + """Format a book result from ISBN endpoint.""" + # Get title from book data + title = book_data.get("title", "Unknown") + + # Get authors + author_list = [] + for author_key in book_data.get("authors", []): + if isinstance(author_key, dict): + author_list.append(author_key.get("name", "")) + elif isinstance(author_key, str): + author_list.append(author_key) + + author_str = ", ".join(filter(None, author_list)) if author_list else "Unknown" + + # Extract other metadata + year = book_data.get("first_publish_year", "") + publishers = book_data.get("publishers", []) + publisher = publishers[0].get("name", "") if publishers and isinstance(publishers[0], dict) else "" + pages = book_data.get("number_of_pages", "") + languages = book_data.get("languages", []) + language = languages[0].get("key", "").replace("/languages/", "") if languages else "" + olid = book_data.get("key", "").split("/")[-1] if book_data.get("key") else "" + + # Build doc for column rendering + doc = { + "title": title, + "author_name": author_list, + "first_publish_year": year, + "ebook_access": book_data.get("ebook_access", ""), + "has_fulltext": bool(book_data.get("ocaid")), + "ia": [book_data.get("ocaid")] if book_data.get("ocaid") else [], + "ocaid": book_data.get("ocaid", ""), + } + status, archive_id = self._derive_status(doc) + doc["status"] = status + + # Build detail + detail = f"By: {author_str}" + if year: + detail += f" ({year})" + + # Build annotations + annotations = [] + if pages: + annotations.append(f"{pages} pages") + annotations.append(f"ISBN: {isbn}") + + # Build columns using shared helper for consistency + columns = self.build_columns_from_doc(doc, idx=1) + + return SearchResult( + origin="openlibrary", + title=title, + target=f"https://openlibrary.org/books/{olid}", + detail=detail, + annotations=annotations, + media_kind="book", + columns=columns, + full_metadata={ + "number": 1, + "authors": author_list, + "year": year, + "isbn": isbn, + "oclc": "", + "lccn": "", + "pages": pages, + "language": language, + "olid": olid, + "publisher": publisher, + "ebook_access": doc.get("ebook_access", ""), + "status": status, + "archive_id": archive_id, + }, + ) + + def get_result_args(self) -> List[str]: + """OpenLibrary results are info/links only.""" + return ["-info"] + + def validate(self) -> bool: + """OpenLibrary is always available (no auth needed).""" + return True + + +class GogGames(Provider): + """Search provider for GOG Games.""" + + def __init__(self, config: Dict[str, Any] = None): + super().__init__(config) + self.name = "gog" + self.base_url = "https://gog-games.to" + self.headers = { + "Referer": "https://gog-games.to/", + "Origin": "https://gog-games.to", + "X-Requested-With": "XMLHttpRequest" + } + + def _request(self, client, endpoint: str, is_json: bool = True) -> Any: + """Helper for API requests.""" + url = f"{self.base_url}/api/web/{endpoint}" + try: + response = client.get(url, headers=self.headers) + if response.status_code == 200: + return response.json() if is_json else response.text + elif response.status_code == 404: + return None + else: + log(f"[gog] API request failed: {response.status_code} for {endpoint}", file=sys.stderr) + return None + except Exception as e: + log(f"[gog] Request error: {e}", file=sys.stderr) + return None + + def get_all_games(self, client) -> List[Dict[str, Any]]: + """Fetch all games from the API.""" + return self._request(client, "all-games") or [] + + def get_game_details(self, client, slug: str) -> Optional[Dict[str, Any]]: + """Fetch details for a specific game.""" + return self._request(client, f"query-game/{slug}") + + def get_game_md5(self, client, slug: str) -> Optional[str]: + """Fetch MD5 checksums for a game.""" + return self._request(client, f"download-md5/{slug}", is_json=False) + + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs + ) -> List[SearchResult]: + """Search GOG Games.""" + from API.HTTP import HTTPClient + + results = [] + query_norm = query.strip().lower() + + with HTTPClient() as client: + # 1. Fetch all games to perform fuzzy search + all_games = self.get_all_games(client) + + matches = [] + if all_games: + for game in all_games: + if (query_norm in game.get("title", "").lower() or + query_norm in game.get("slug", "").lower()): + matches.append(game) + + # 2. Fallback: If no matches and query looks like a slug, try direct lookup + if not matches and "_" in query_norm: + details = self.get_game_details(client, query_norm) + if details and "game_info" in details: + matches.append(details["game_info"]) + + for game in matches[:limit]: + slug = game.get("slug") + title = game.get("title", slug) + infohash = game.get("infohash") + gog_url = game.get("gog_url", "") + + # Note: 'all-games' endpoint doesn't provide file size. + # We set size to 0 to avoid N+1 requests. + + if infohash: + magnet_link = f"magnet:?xt=urn:btih:{infohash}&dn={slug}" + results.append(self.build_result( + origin="gog", + title=title, + target=magnet_link, + media_kind="magnet", + detail="Magnet Link", + annotations=["Magnet"], + full_metadata=game + )) + else: + results.append(self.build_result( + origin="gog", + title=title, + target=gog_url, + media_kind="game", + detail="No magnet available", + annotations=["No Magnet"], + full_metadata=game + )) + + return results + + def get_result_args(self) -> List[str]: + """GOG results are URLs.""" + return ["-url"] + + def validate(self) -> bool: + """GOG Games is a public website.""" + return True + + +class YouTube(Provider): + """ + Search provider for YouTube using yt-dlp. + """ + + RESULT_FIELDS = [ + ("title", "Title", None), + ("uploader", "Uploader", None), + ("duration_string", "Duration", None), + ("view_count", "Views", lambda x: f"{x:,}" if x else ""), + ] + + def search(self, query: str, limit: int = 10, filters: Optional[Dict[str, Any]] = None, **kwargs) -> List[SearchResult]: + """ + Search YouTube using yt-dlp. + + Args: + query: Search query + limit: Maximum number of results + filters: Optional filtering criteria (ignored for now) + + Returns: + List of SearchResult objects + """ + # Check if yt-dlp is available + ytdlp_path = shutil.which("yt-dlp") + if not ytdlp_path: + log("yt-dlp not found in PATH", file=sys.stderr) + return [] + + # Construct command + # ytsearchN:query searches for N results + search_query = f"ytsearch{limit}:{query}" + + cmd = [ + ytdlp_path, + "--dump-json", + "--flat-playlist", # Don't resolve video details fully, faster + "--no-warnings", + search_query + ] + + try: + # Run yt-dlp + # We need to capture stdout. yt-dlp outputs one JSON object per line for search results + process = subprocess.run( + cmd, + capture_output=True, + text=True, + encoding="utf-8", + errors="replace" + ) + + if process.returncode != 0: + log(f"yt-dlp search failed: {process.stderr}", file=sys.stderr) + return [] + + results = [] + for line in process.stdout.splitlines(): + if not line.strip(): + continue + + try: + data = json.loads(line) + + # Extract fields + title = data.get("title", "Unknown Title") + url = data.get("url") + if not url: + # Sometimes flat-playlist gives 'id', construct URL + video_id = data.get("id") + if video_id: + url = f"https://www.youtube.com/watch?v={video_id}" + else: + continue + + uploader = data.get("uploader", "Unknown Uploader") + duration = data.get("duration") # seconds + view_count = data.get("view_count") + + # Format duration + duration_str = "" + if duration: + try: + m, s = divmod(int(duration), 60) + h, m = divmod(m, 60) + if h > 0: + duration_str = f"{h}:{m:02d}:{s:02d}" + else: + duration_str = f"{m}:{s:02d}" + except (ValueError, TypeError): + pass + + # Create annotations + annotations = [] + if duration_str: + annotations.append(duration_str) + if view_count: + # Simple format for views + try: + vc = int(view_count) + if vc >= 1000000: + views_str = f"{vc/1000000:.1f}M views" + elif vc >= 1000: + views_str = f"{vc/1000:.1f}K views" + else: + views_str = f"{vc} views" + annotations.append(views_str) + except (ValueError, TypeError): + pass + + annotations.append("youtube") + + # Create result + result = self.build_result( + origin="youtube", + title=title, + target=url, + detail=f"by {uploader}", + annotations=annotations, + media_kind="video", + full_metadata=data, + columns=[ + ("Title", title), + ("Uploader", uploader), + ("Duration", duration_str), + ("Views", str(view_count) if view_count else "") + ] + ) + results.append(result) + + except json.JSONDecodeError: + continue + + return results + + except Exception as e: + log(f"Error running yt-dlp: {e}", file=sys.stderr) + return [] + + def get_result_args(self) -> List[str]: + """YouTube results are URLs.""" + return ["-url"] + + def validate(self) -> bool: + """Check if yt-dlp is installed.""" + return shutil.which("yt-dlp") is not None + + +class BandCamp(Provider): + """ + Search provider for Bandcamp using Playwright scraper. + """ + RESULT_FIELDS = [ + ("name", "Name", None), + ("artist", "Artist/Loc", None), + ("type", "Type", None) + ] + + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs + ) -> List[SearchResult]: + if not PLAYWRIGHT_AVAILABLE: + print("Playwright library not available. Please install it (pip install playwright).") + return [] + + results = [] + try: + with sync_playwright() as p: + # Launch browser (headless) + browser = p.chromium.launch(headless=True) + page = browser.new_page() + + # Check if query is a URL (Artist/Album Scraping Mode) + if query.startswith("http://") or query.startswith("https://"): + return self._scrape_url(page, query, limit) + + # Search Mode + # Parse query for prefixes + search_type = "t" # Default to track + clean_query = query + + if "artist:" in query.lower(): + search_type = "b" + clean_query = query.lower().replace("artist:", "").strip() + elif "album:" in query.lower(): + search_type = "a" + clean_query = query.lower().replace("album:", "").strip() + elif "track:" in query.lower(): + search_type = "t" + clean_query = query.lower().replace("track:", "").strip() + elif "label:" in query.lower(): + search_type = "b" + clean_query = query.lower().replace("label:", "").strip() + + # Filters override prefix + if filters: + ftype = filters.get("type", "").lower() + if ftype in ["album", "albums"]: + search_type = "a" + elif ftype in ["artist", "artists", "label", "labels"]: + search_type = "b" + elif ftype in ["track", "tracks"]: + search_type = "t" + + # Construct URL with item_type + url = f"https://bandcamp.com/search?q={clean_query}&item_type={search_type}" + debug(f"[Bandcamp] Navigating to search URL: {url}") + page.goto(url) + page.wait_for_load_state("domcontentloaded") + + # Wait for results + try: + # Wait for the search results to appear in the DOM + page.wait_for_selector(".searchresult", timeout=10000) + except Exception as e: + # No results found or timeout + log(f"Bandcamp search timeout or no results: {e}") + browser.close() + return [] + + # Extract items + items = page.query_selector_all(".searchresult") + debug(f"[Bandcamp] Found {len(items)} results") + + for item in items: + if len(results) >= limit: + break + + try: + # Extract data + heading_el = item.query_selector(".heading a") + if not heading_el: + debug("[Bandcamp] Skipping item: No heading found") + continue + + name = heading_el.inner_text().strip() + item_url = heading_el.get_attribute("href") + # Clean URL (remove query params) + if item_url and "?" in item_url: + item_url = item_url.split("?")[0] + + item_type_el = item.query_selector(".itemtype") + item_type = item_type_el.inner_text().strip() if item_type_el else "Unknown" + + subhead_el = item.query_selector(".subhead") + subhead = subhead_el.inner_text().strip() if subhead_el else "" + + art_el = item.query_selector(".art img") + img = art_el.get_attribute("src") if art_el else None + + # Map to metadata + metadata = { + "name": name, + "type": item_type, + "url": item_url, + "img": img, + "subhead": subhead + } + + # Refine metadata based on type + artist_or_loc = subhead + if "ALBUM" in item_type.upper(): + artist_or_loc = subhead.replace("by ", "").strip() + metadata["artist"] = artist_or_loc + elif "ARTIST" in item_type.upper() or "LABEL" in item_type.upper(): + metadata["location"] = subhead + elif "TRACK" in item_type.upper(): + artist_or_loc = subhead.replace("by ", "").strip() + metadata["artist"] = artist_or_loc + + columns = [ + ("Name", name), + ("Artist/Loc", artist_or_loc), + ("Type", item_type) + ] + + results.append(self.build_result( + origin="bandcamp", + title=name, + target=item_url, + full_metadata=metadata, + columns=columns + )) + except Exception as e: + # Skip malformed items + debug(f"[Bandcamp] Error parsing item: {e}") + continue + + browser.close() + + except Exception as e: + log(f"Bandcamp search error: {e}") + return [] + + return results + + def _scrape_url(self, page, url: str, limit: int) -> List[SearchResult]: + """Scrape a Bandcamp artist or album page.""" + debug(f"[Bandcamp] Scraping URL: {url}") + + # If it's an artist page, try to go to /music to see all + if ".bandcamp.com" in url and "/music" not in url and "/album/" not in url and "/track/" not in url: + # Check if it's likely an artist root + url = url.rstrip("/") + "/music" + debug(f"[Bandcamp] Adjusted to music page: {url}") + + page.goto(url) + page.wait_for_load_state("domcontentloaded") + + results = [] + + # Check for grid items (Artist page /music) + grid_items = page.query_selector_all(".music-grid-item") + if grid_items: + debug(f"[Bandcamp] Found {len(grid_items)} grid items") + + # Try to get global artist name from page metadata/header as fallback + page_artist = "" + try: + og_site_name = page.query_selector('meta[property="og:site_name"]') + if og_site_name: + page_artist = og_site_name.get_attribute("content") or "" + + if not page_artist: + band_name = page.query_selector('#band-name-location .title') + if band_name: + page_artist = band_name.inner_text().strip() + except Exception: + pass + + for item in grid_items: + if len(results) >= limit: + break + try: + title_el = item.query_selector(".title") + # Sanitize title to remove newlines which break the table + title = title_el.inner_text().strip().replace("\n", " ").replace("\r", "") if title_el else "Unknown" + # Remove extra spaces + title = " ".join(title.split()) + + link_el = item.query_selector("a") + href = link_el.get_attribute("href") if link_el else "" + if href and not href.startswith("http"): + # Relative link, construct full URL + base = url.split("/music")[0] + href = base + href + + artist_el = item.query_selector(".artist") + artist = artist_el.inner_text().replace("by ", "").strip() if artist_el else "" + + # Use page artist if item artist is missing + if not artist and page_artist: + artist = page_artist + + # Sanitize artist + artist = artist.replace("\n", " ").replace("\r", "") + artist = " ".join(artist.split()) + + columns = [ + ("Name", title), + ("Artist", artist), + ("Type", "Album/Track") + ] + + results.append(self.build_result( + origin="bandcamp", + title=title, + target=href, + full_metadata={"artist": artist}, + columns=columns + )) + except Exception as e: + debug(f"[Bandcamp] Error parsing grid item: {e}") + continue + return results + + # Check for track list (Album page) + track_rows = page.query_selector_all(".track_row_view") + if track_rows: + debug(f"[Bandcamp] Found {len(track_rows)} track rows") + # Get Album Artist + artist_el = page.query_selector("#name-section h3 span a") + album_artist = artist_el.inner_text().strip() if artist_el else "Unknown" + + for row in track_rows: + if len(results) >= limit: + break + try: + title_el = row.query_selector(".track-title") + # Sanitize title + title = title_el.inner_text().strip().replace("\n", " ").replace("\r", "") if title_el else "Unknown" + title = " ".join(title.split()) + + # Track link + link_el = row.query_selector(".title a") + href = link_el.get_attribute("href") if link_el else "" + if href and not href.startswith("http"): + base = url.split(".com")[0] + ".com" + href = base + href + + duration_el = row.query_selector(".time") + duration = duration_el.inner_text().strip() if duration_el else "" + + columns = [ + ("Name", title), + ("Artist", album_artist), + ("Duration", duration) + ] + + results.append(self.build_result( + origin="bandcamp", + title=title, + target=href, + full_metadata={"artist": album_artist, "duration": duration}, + columns=columns + )) + except Exception as e: + debug(f"[Bandcamp] Error parsing track row: {e}") + continue + return results + + debug("[Bandcamp] No recognizable items found on page") + return [] + + def get_result_args(self) -> List[str]: + return ["-url"] + + +# Provider registry +_PROVIDERS = { + "bandcamp": BandCamp, + "libgen": Libgen, + "soulseek": SoulSeek, + "debrid": Debrid, + "openlibrary": OpenLibrary, + "gog": GogGames, + "youtube": YouTube, +} + + +def get_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[Provider]: + """ + Get a search provider by name. + + Args: + name: Provider name (case-insensitive): "local", "libgen", "soulseek", "debrid", "openlibrary" + config: Optional configuration dictionary + + Returns: + SearchProvider instance or None if not found + """ + provider_class = _PROVIDERS.get(name.lower()) + + if provider_class is None: + log(f"Unknown search provider: {name}", file=sys.stderr) + return None + + try: + provider = provider_class(config) + if not provider.validate(): + log(f"Provider '{name}' is not properly configured or available", file=sys.stderr) + return None + return provider + + except Exception as e: + log(f"Error initializing provider '{name}': {e}", file=sys.stderr) + return None + + +def list_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]: + """ + List all available providers and whether they're available. + + Args: + config: Optional configuration dictionary + + Returns: + Dictionary mapping provider names to availability (True/False) + """ + availability = {} + for name, provider_class in _PROVIDERS.items(): + try: + provider = provider_class(config) + availability[name] = provider.validate() + except Exception: + availability[name] = False + return availability + + +def register_provider(name: str, provider_class: type) -> None: + """ + Register a new search provider. + + Args: + name: Provider name (lowercase) + provider_class: Class that inherits from SearchProvider + """ + _PROVIDERS[name.lower()] = provider_class + + +class FileProvider(ABC): + """Abstract base class for file hosting providers.""" + + def __init__(self, config: Optional[Dict[str, Any]] = None): + self.config = config or {} + self.name = self.__class__.__name__.replace("FileProvider", "").lower() + + @abstractmethod + def upload(self, file_path: str, **kwargs: Any) -> str: + """Upload a file and return the URL.""" + pass + + def validate(self) -> bool: + """Check if provider is available/configured.""" + return True + + +class ZeroXZeroFileProvider(FileProvider): + """File provider for 0x0.st.""" + + def __init__(self, config: Optional[Dict[str, Any]] = None): + super().__init__(config) + self.name = "0x0" + self.base_url = "https://0x0.st" + + def upload(self, file_path: str, **kwargs: Any) -> str: + """Upload file to 0x0.st.""" + from API.HTTP import HTTPClient + import os + + if not os.path.exists(file_path): + raise FileNotFoundError(f"File not found: {file_path}") + + try: + # 0x0.st expects 'file' field in multipart/form-data + # Use a custom User-Agent to avoid 403 Forbidden + headers = {"User-Agent": "Medeia-Macina/1.0"} + with HTTPClient(headers=headers) as client: + with open(file_path, 'rb') as f: + files = {'file': f} + response = client.post(self.base_url, files=files) + + if response.status_code == 200: + return response.text.strip() + else: + raise Exception(f"Upload failed: {response.status_code} - {response.text}") + + except Exception as e: + log(f"[0x0] Upload error: {e}", file=sys.stderr) + raise + + def validate(self) -> bool: + return True + + +class MatrixFileProvider(FileProvider): + """File provider for Matrix (Element) chat rooms.""" + + def __init__(self, config: Optional[Dict[str, Any]] = None): + super().__init__(config) + self.name = "matrix" + + def validate(self) -> bool: + """Check if Matrix is configured.""" + if not self.config: return False + matrix_conf = self.config.get('storage', {}).get('matrix', {}) + return bool(matrix_conf.get('homeserver') and matrix_conf.get('room_id') and (matrix_conf.get('access_token') or matrix_conf.get('password'))) + + def upload(self, file_path: str, **kwargs: Any) -> str: + """Upload file to Matrix room.""" + import requests + import mimetypes + from pathlib import Path + import json + + debug(f"[Matrix] Starting upload for: {file_path}") + debug(f"[Matrix] kwargs: {kwargs}") + + path = Path(file_path) + if not path.exists(): + raise FileNotFoundError(f"File not found: {file_path}") + + matrix_conf = self.config.get('storage', {}).get('matrix', {}) + homeserver = matrix_conf.get('homeserver') + access_token = matrix_conf.get('access_token') + room_id = matrix_conf.get('room_id') + + if not homeserver.startswith('http'): + homeserver = f"https://{homeserver}" + + # 1. Upload Media + # Use v3 API + upload_url = f"{homeserver}/_matrix/media/v3/upload" + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/octet-stream" + } + + mime_type, _ = mimetypes.guess_type(path) + if mime_type: + headers["Content-Type"] = mime_type + + filename = path.name + + debug(f"[Matrix] Uploading media to {upload_url} with mime_type: {mime_type}") + + with open(path, 'rb') as f: + resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename}) + + if resp.status_code != 200: + raise Exception(f"Matrix upload failed: {resp.text}") + + content_uri = resp.json().get('content_uri') + if not content_uri: + raise Exception("No content_uri returned from Matrix upload") + + debug(f"[Matrix] Media uploaded, content_uri: {content_uri}") + + # 2. Send Message + # Use v3 API + send_url = f"{homeserver}/_matrix/client/v3/rooms/{room_id}/send/m.room.message" + + # Determine msgtype with better fallback for audio + msgtype = "m.file" + ext = path.suffix.lower() + + # Explicit check for common audio extensions to force m.audio + # This prevents audio files being treated as generic files or video + AUDIO_EXTS = {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka', '.alac'} + VIDEO_EXTS = {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'} + IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'} + + if ext in AUDIO_EXTS: + msgtype = "m.audio" + elif ext in VIDEO_EXTS: + msgtype = "m.video" + elif ext in IMAGE_EXTS: + msgtype = "m.image" + elif mime_type: + if mime_type.startswith("audio/"): msgtype = "m.audio" + elif mime_type.startswith("video/"): msgtype = "m.video" + elif mime_type.startswith("image/"): msgtype = "m.image" + + debug(f"[Matrix] Determined msgtype: {msgtype} (ext: {ext}, mime: {mime_type})") + + info = { + "mimetype": mime_type, + "size": path.stat().st_size + } + + # Try to get duration for audio/video + if msgtype in ("m.audio", "m.video"): + try: + # Try mutagen first (lightweight) + # Use dynamic import to avoid top-level dependency if not installed + # Note: mutagen.File is available at package level at runtime but type checkers might miss it + import mutagen # type: ignore + m = mutagen.File(str(path)) # type: ignore + if m and m.info and hasattr(m.info, 'length'): + duration_ms = int(m.info.length * 1000) + info['duration'] = duration_ms + debug(f"[Matrix] Extracted duration: {duration_ms}ms") + except Exception as e: + debug(f"[Matrix] Failed to extract duration: {e}") + + payload = { + "msgtype": msgtype, + "body": filename, + "url": content_uri, + "info": info + } + + debug(f"[Matrix] Sending message payload: {json.dumps(payload, indent=2)}") + + resp = requests.post(send_url, headers=headers, json=payload) + if resp.status_code != 200: + raise Exception(f"Matrix send message failed: {resp.text}") + + event_id = resp.json().get('event_id') + return f"https://matrix.to/#/{room_id}/{event_id}" + + +# File provider registry +_FILE_PROVIDERS = { + "0x0": ZeroXZeroFileProvider, + "matrix": MatrixFileProvider, +} + + +def get_file_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[FileProvider]: + """ + Get a file hosting provider by name. + + Args: + name: Provider name (case-insensitive): "0x0" + config: Optional configuration dictionary + + Returns: + FileProvider instance or None if not found + """ + provider_class = _FILE_PROVIDERS.get(name.lower()) + + if provider_class is None: + log(f"Unknown file provider: {name}", file=sys.stderr) + return None + + try: + provider = provider_class(config) + if not provider.validate(): + log(f"File provider '{name}' is not properly configured or available", file=sys.stderr) + return None + return provider + + except Exception as e: + log(f"Error initializing file provider '{name}': {e}", file=sys.stderr) + return None + + +def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]: + """ + List all available file hosting providers and whether they're available. + + Args: + config: Optional configuration dictionary + + Returns: + Dictionary mapping provider names to availability (True/False) + """ + availability = {} + for name, provider_class in _FILE_PROVIDERS.items(): + try: + provider = provider_class(config) + availability[name] = provider.validate() + except Exception: + availability[name] = False + return availability + + +def register_file_provider(name: str, provider_class: type) -> None: + """ + Register a new file hosting provider. + + Args: + name: Provider name (lowercase) + provider_class: Class that inherits from FileProvider + """ + _FILE_PROVIDERS[name.lower()] = provider_class + + + + diff --git a/metadata.py b/metadata.py index 59190de..603603a 100644 --- a/metadata.py +++ b/metadata.py @@ -33,6 +33,13 @@ try: except ImportError: # pragma: no cover sha256_file = None # type: ignore[assignment] +try: # Optional metadata helper for audio files + import mutagen # type: ignore +except ImportError: # pragma: no cover - best effort + mutagen = None # type: ignore + +from SYS.utils import sanitize_metadata_value, unique_preserve_order + try: from helpers.hydrus import HydrusClient, HydrusRequestError, HydrusRequestSpec # type: ignore except ImportError: # pragma: no cover @@ -50,6 +57,223 @@ else: # pragma: no cover _CURRENT_RELATIONSHIP_TRACKER = FileRelationshipTracker() +def prepare_ffmpeg_metadata(payload: Optional[Dict[str, Any]]) -> Dict[str, str]: + """Derive ffmpeg/mutagen metadata tags from a generic metadata payload. + + This is not Hydrus-specific; it is used by exporters/converters. + """ + if not isinstance(payload, dict): + return {} + + metadata: Dict[str, str] = {} + + def set_field(key: str, raw: Any, limit: int = 2000) -> None: + sanitized = sanitize_metadata_value(raw) + if not sanitized: + return + if len(sanitized) > limit: + sanitized = sanitized[:limit] + metadata[key] = sanitized + + set_field("title", payload.get("title")) + set_field("artist", payload.get("artist"), 512) + set_field("album", payload.get("album"), 512) + set_field("date", payload.get("year"), 20) + + comment = payload.get("comment") + tags_value = payload.get("tag") + + tag_strings: List[str] = [] + artists_from_tags: List[str] = [] + albums_from_tags: List[str] = [] + genres_from_tags: List[str] = [] + + if isinstance(tags_value, list): + for raw_tag in tags_value: + if raw_tag is None: + continue + if not isinstance(raw_tag, str): + raw_tag = str(raw_tag) + tag = raw_tag.strip() + if not tag: + continue + + tag_strings.append(tag) + namespace, sep, value = tag.partition(":") + if sep and value: + ns = namespace.strip().lower() + value = value.strip() + if ns in {"artist", "creator", "author", "performer"}: + artists_from_tags.append(value) + elif ns in {"album", "series", "collection", "group"}: + albums_from_tags.append(value) + elif ns in {"genre", "rating"}: + genres_from_tags.append(value) + elif ns in {"comment", "description"} and not comment: + comment = value + elif ns in {"year", "date"} and not payload.get("year"): + set_field("date", value, 20) + else: + genres_from_tags.append(tag) + + if "artist" not in metadata and artists_from_tags: + set_field("artist", ", ".join(unique_preserve_order(artists_from_tags)[:3]), 512) + if "album" not in metadata and albums_from_tags: + set_field("album", unique_preserve_order(albums_from_tags)[0], 512) + if genres_from_tags: + set_field("genre", ", ".join(unique_preserve_order(genres_from_tags)[:5]), 256) + + if tag_strings: + joined_tags = ", ".join(tag_strings[:50]) + set_field("keywords", joined_tags, 2000) + if not comment: + comment = joined_tags + + if comment: + set_field("comment", comment, 2000) + set_field("description", comment, 2000) + + return metadata + + +def apply_mutagen_metadata(path: Path, metadata: Dict[str, str], fmt: str) -> None: + """Best-effort metadata writing for audio containers.""" + if fmt != "audio": + return + if not metadata: + return + if mutagen is None: + return + + try: + audio = mutagen.File(path, easy=True) # type: ignore[attr-defined] + except Exception as exc: # pragma: no cover - best effort only + log(f"mutagen load failed: {exc}", file=sys.stderr) + return + + if audio is None: + return + + field_map = { + "title": "title", + "artist": "artist", + "album": "album", + "genre": "genre", + "comment": "comment", + "description": "comment", + "date": "date", + } + + changed = False + for source_key, target_key in field_map.items(): + value = metadata.get(source_key) + if not value: + continue + try: + audio[target_key] = [value] + changed = True + except Exception: # pragma: no cover + continue + + if not changed: + return + + try: + audio.save() + except Exception as exc: # pragma: no cover + log(f"mutagen save failed: {exc}", file=sys.stderr) + + +def build_ffmpeg_command( + ffmpeg_path: str, + input_path: Path, + output_path: Path, + fmt: str, + max_width: int, + metadata: Optional[Dict[str, str]] = None, +) -> List[str]: + """Build an ffmpeg command line for common export formats.""" + cmd: List[str] = [ffmpeg_path, "-y", "-i", str(input_path)] + + if fmt in {"mp4", "webm"} and max_width and max_width > 0: + cmd.extend(["-vf", f"scale='min({max_width},iw)':-2"]) + + if metadata: + for key, value in metadata.items(): + cmd.extend(["-metadata", f"{key}={value}"]) + + # Video formats + if fmt == "mp4": + cmd.extend( + [ + "-c:v", + "libx265", + "-preset", + "medium", + "-crf", + "26", + "-tag:v", + "hvc1", + "-pix_fmt", + "yuv420p", + "-c:a", + "aac", + "-b:a", + "192k", + "-movflags", + "+faststart", + ] + ) + elif fmt == "webm": + cmd.extend( + [ + "-c:v", + "libvpx-vp9", + "-b:v", + "0", + "-crf", + "32", + "-c:a", + "libopus", + "-b:a", + "160k", + ] + ) + cmd.extend(["-f", "webm"]) + + # Audio formats + elif fmt == "mp3": + cmd.extend(["-vn", "-c:a", "libmp3lame", "-b:a", "192k"]) + cmd.extend(["-f", "mp3"]) + elif fmt == "flac": + cmd.extend(["-vn", "-c:a", "flac"]) + cmd.extend(["-f", "flac"]) + elif fmt == "wav": + cmd.extend(["-vn", "-c:a", "pcm_s16le"]) + cmd.extend(["-f", "wav"]) + elif fmt == "aac": + cmd.extend(["-vn", "-c:a", "aac", "-b:a", "192k"]) + cmd.extend(["-f", "adts"]) + elif fmt == "m4a": + cmd.extend(["-vn", "-c:a", "aac", "-b:a", "192k"]) + cmd.extend(["-f", "ipod"]) + elif fmt == "ogg": + cmd.extend(["-vn", "-c:a", "libvorbis", "-b:a", "192k"]) + cmd.extend(["-f", "ogg"]) + elif fmt == "opus": + cmd.extend(["-vn", "-c:a", "libopus", "-b:a", "192k"]) + cmd.extend(["-f", "opus"]) + elif fmt == "audio": + # Legacy format name for mp3 + cmd.extend(["-vn", "-c:a", "libmp3lame", "-b:a", "192k"]) + cmd.extend(["-f", "mp3"]) + elif fmt != "copy": + raise ValueError(f"Unsupported format: {fmt}") + + cmd.append(str(output_path)) + return cmd + + def field(obj: Any, name: str, value: Any = None) -> Any: """Get or set a field on dict or object. @@ -131,9 +355,9 @@ def value_normalize(value: str) -> str: def import_pending_sidecars(db_root: Path, db: Any) -> None: - """Import pending sidecars (.tag/.tags/.metadata/.notes) into the database.""" + """Import pending sidecars (.tag/.metadata/.notes) into the database.""" try: - sidecar_patterns = ['**/*.tag', '**/*.tags', '**/*.metadata', '**/*.notes'] + sidecar_patterns = ['**/*.tag', '**/*.metadata', '**/*.notes'] for pattern in sidecar_patterns: for sidecar_path in db_root.glob(pattern): @@ -174,7 +398,7 @@ def import_pending_sidecars(db_root: Path, db: Any) -> None: if not file_id: continue - if sidecar_path.suffix in {'.tag', '.tags'}: + if sidecar_path.suffix == '.tag': try: content = sidecar_path.read_text(encoding='utf-8') except Exception: @@ -395,7 +619,7 @@ def imdb_tag(imdb_id: str) -> Dict[str, object]: break if cast_names: _extend_tags(tags, "cast", cast_names) - return {"source": "imdb", "id": canonical_id, "tags": tags} + return {"source": "imdb", "id": canonical_id, "tag": tags} def fetch_musicbrainz_tags(mbid: str, entity: str) -> Dict[str, object]: if not musicbrainzngs: raise RuntimeError("musicbrainzngs package is not available") @@ -451,7 +675,7 @@ def fetch_musicbrainz_tags(mbid: str, entity: str) -> Dict[str, object]: for genre in genre_list: if isinstance(genre, dict) and genre.get("name"): _add_tag(tags, "genre", genre["name"]) - return {"source": "musicbrainz", "id": mbid, "tags": tags, "entity": entity} + return {"source": "musicbrainz", "id": mbid, "tag": tags, "entity": entity} def fetch_openlibrary_tags(ol_id: str) -> Dict[str, object]: @@ -461,7 +685,7 @@ def fetch_openlibrary_tags(ol_id: str) -> Dict[str, object]: ol_id: OpenLibrary ID (e.g., 'OL123456M' for a book) Returns: - Dictionary with 'tags' key containing list of extracted tags + Dictionary with 'tag' key containing list of extracted tags """ import urllib.request @@ -573,7 +797,7 @@ def fetch_openlibrary_tags(ol_id: str) -> Dict[str, object]: description = description.get("value") _add_tag(tags, "summary", description) - return {"source": "openlibrary", "id": ol_id, "tags": tags} + return {"source": "openlibrary", "id": ol_id, "tag": tags} def _append_unique(target: List[str], seen: Set[str], value: Optional[str]) -> None: @@ -1328,25 +1552,16 @@ def _normalise_string_list(values: Optional[Iterable[Any]]) -> List[str]: def _derive_sidecar_path(media_path: Path) -> Path: - """Return preferred sidecar path (.tag), falling back to legacy .tags if it exists. - - Keeps backward compatibility by preferring existing .tags, but new writes use .tag. - """ + """Return sidecar path (.tag).""" try: preferred = media_path.parent / (media_path.name + '.tag') - legacy = media_path.parent / (media_path.name + '.tags') except ValueError: preferred = media_path.with_name(media_path.name + '.tag') - legacy = media_path.with_name(media_path.name + '.tags') - - # Prefer legacy if it already exists to avoid duplicate sidecars - if legacy.exists(): - return legacy return preferred def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str], List[str]]: - """Read hash, tags, and url from .tags sidecar file. + """Read hash, tags, and url from sidecar file. Consolidated with read_tags_from_file - this extracts extra metadata (hash, url). """ @@ -1389,7 +1604,7 @@ def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str] def rename(file_path: Path, tags: Iterable[str]) -> Optional[Path]: """Rename a file based on title: tag in the tags list. - If a title: tag is present, renames the file and any .tags/.metadata sidecars. + If a title: tag is present, renames the file and any .tag/.metadata sidecars. Args: file_path: Path to the file to potentially rename @@ -1432,10 +1647,10 @@ def rename(file_path: Path, tags: Iterable[str]) -> Optional[Path]: file_path.rename(new_path) debug(f"Renamed file: {old_name} → {new_name}", file=sys.stderr) - # Rename the .tags sidecar if it exists - old_tags_path = file_path.parent / (old_name + '.tags') + # Rename the .tag sidecar if it exists + old_tags_path = file_path.parent / (old_name + '.tag') if old_tags_path.exists(): - new_tags_path = file_path.parent / (new_name + '.tags') + new_tags_path = file_path.parent / (new_name + '.tag') if new_tags_path.exists(): try: new_tags_path.unlink() @@ -1508,14 +1723,6 @@ def write_tags(media_path: Path, tags: Iterable[str], url: Iterable[str], hash_v if lines: sidecar.write_text("\n".join(lines) + "\n", encoding="utf-8") debug(f"Tags: {sidecar}") - # Clean up legacy files - for legacy_path in [media_path.with_name(media_path.name + '.tags'), - media_path.with_name(media_path.name + '.tags.txt')]: - if legacy_path.exists() and legacy_path != sidecar: - try: - legacy_path.unlink() - except OSError: - pass else: try: sidecar.unlink() @@ -1691,7 +1898,7 @@ def _locate_sidecar_by_hash(hash_value: str, roots: Iterable[Path]) -> Optional[ continue if not root_path.exists() or not root_path.is_dir(): continue - for pattern in ('*.tags', '*.tags.txt'): + for pattern in ('*.tag',): try: iterator = root_path.rglob(pattern) except OSError: @@ -1711,80 +1918,35 @@ def _locate_sidecar_by_hash(hash_value: str, roots: Iterable[Path]) -> Optional[ def sync_sidecar(payload: Dict[str, Any]) -> Dict[str, Any]: path_value = payload.get('path') - sidecar_path: Optional[Path] = None - media_path: Optional[Path] = None - if path_value: - candidate = Path(str(path_value)).expanduser() - if candidate.suffix.lower() in {'.tags', '.tags.txt'}: - sidecar_path = candidate - else: - media_path = candidate - hash_input = payload.get('hash') - hash_value = None - if hash_input: - hash_value = _normalize_hash(hash_input) - tags = _normalise_string_list(payload.get('tags')) - url = _normalise_string_list(payload.get('url')) - if media_path is not None: - sidecar_path = _derive_sidecar_path(media_path) - search_roots = _collect_search_roots(payload) - if sidecar_path is None and hash_value: - located = _locate_sidecar_by_hash(hash_value, search_roots) - if located is not None: - sidecar_path = located - if sidecar_path is None: - if media_path is not None: - sidecar_path = _derive_sidecar_path(media_path) - elif hash_value: - return { - 'error': 'not_found', - 'hash': hash_value, - 'tags': tags, - 'url': url, - } - else: - raise ValueError('path or hash is required to synchronise sidecar') - existing_hash, existing_tags, existing_known = _read_sidecar_metadata(sidecar_path) - if not tags: - tags = existing_tags - if not url: - url = existing_known - hash_line = hash_value or existing_hash - title_value: Optional[str] = None - for tag in tags: - if isinstance(tag, str): - if tag.lower().startswith('title:'): - title_value = tag.split(':', 1)[1].strip() if ':' in tag else '' - if title_value == '': - title_value = None - break - lines: List[str] = [] - if hash_line: - lines.append(f'hash:{hash_line}') - lines.extend(tags) - lines.extend(f'url:{url}' for url in url) - sidecar_path.parent.mkdir(parents=True, exist_ok=True) - if lines: - sidecar_path.write_text('\n'.join(lines) + '\n', encoding='utf-8') + if not path_value: + raise ValueError('path is required to synchronise sidecar') + + candidate = Path(str(path_value)).expanduser() + if candidate.suffix.lower() == '.tag': + sidecar_path = candidate else: - try: - sidecar_path.unlink() - except FileNotFoundError: - pass + sidecar_path = _derive_sidecar_path(candidate) + + tags = _normalise_string_list(payload.get('tag')) + if not tags and sidecar_path.exists(): + tags = read_tags_from_file(sidecar_path) + + sidecar_path.parent.mkdir(parents=True, exist_ok=True) + if tags: + sidecar_path.write_text('\n'.join(tags) + '\n', encoding='utf-8') return { 'path': str(sidecar_path), - 'hash': hash_line, - 'tags': [], - 'url': [], - 'deleted': True, - 'title': title_value, + 'tag': tags, } + + try: + sidecar_path.unlink() + except FileNotFoundError: + pass return { 'path': str(sidecar_path), - 'hash': hash_line, - 'tags': tags, - 'url': url, - 'title': title_value, + 'tag': [], + 'deleted': True, } @@ -1901,16 +2063,16 @@ def apply_tag_mutation(payload: Dict[str, Any], operation: str = 'add') -> Dict[ result['updated'] = True return result else: # local - tags = _clean_existing_tags(payload.get('tags')) + tag = _clean_existing_tags(payload.get('tag')) if operation == 'add': new_tag = _normalize_tag(payload.get('new_tag')) if not new_tag: raise ValueError('new_tag is required') - added = new_tag not in tags + added = new_tag not in tag if added: - tags.append(new_tag) - return {'tags': tags, 'added': added} + tag.append(new_tag) + return {'tag': tag, 'added': added} else: # update old_tag = _normalize_tag(payload.get('old_tag')) @@ -1920,17 +2082,17 @@ def apply_tag_mutation(payload: Dict[str, Any], operation: str = 'add') -> Dict[ remaining = [] removed_count = 0 - for tag in tags: - if tag == old_tag: + for item in tag: + if item == old_tag: removed_count += 1 else: - remaining.append(tag) + remaining.append(item) if new_tag and removed_count > 0: remaining.extend([new_tag] * removed_count) - updated = removed_count > 0 or (bool(new_tag) and new_tag not in tags) - return {'tags': remaining, 'updated': updated, 'removed_count': removed_count} + updated = removed_count > 0 or (bool(new_tag) and new_tag not in tag) + return {'tag': remaining, 'updated': updated, 'removed_count': removed_count} def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]: @@ -2181,13 +2343,13 @@ def merge_multiple_tag_lists( def read_tags_from_file(file_path: Path) -> List[str]: - """Read and normalize tags from .tags sidecar file. + """Read and normalize tags from .tag sidecar file. - This is the UNIFIED API for reading .tags files across all cmdlets. + This is the UNIFIED API for reading .tag files across all cmdlets. Handles normalization, deduplication, and format validation. Args: - file_path: Path to .tags sidecar file + file_path: Path to .tag sidecar file Returns: List of normalized tag strings @@ -2196,7 +2358,7 @@ def read_tags_from_file(file_path: Path) -> List[str]: FileNotFoundError: If file doesn't exist Example: - >>> tags = read_tags_from_file(Path('file.txt.tags')) + >>> tags = read_tags_from_file(Path('file.txt.tag')) >>> debug(tags) ['artist:Beatles', 'album:Abbey Road'] """ @@ -2386,13 +2548,13 @@ def write_tags_to_file( url: Optional[List[str]] = None, append: bool = False ) -> bool: - """Write tags to .tags sidecar file. + """Write tags to .tag sidecar file. - This is the UNIFIED API for writing .tags files across all cmdlets. + This is the UNIFIED API for writing .tag files across all cmdlets. Uses consistent format and handles file creation/overwriting. Args: - file_path: Path to .tags file (will be created if doesn't exist) + file_path: Path to .tag file (will be created if doesn't exist) tags: List of tags to write source_hashes: Optional source file hashes (written as source:hash1,hash2) url: Optional known url (each written on separate line as url:url) @@ -2406,7 +2568,7 @@ def write_tags_to_file( Example: >>> tags = ['artist:Beatles', 'album:Abbey Road'] - >>> write_tags_to_file(Path('file.txt.tags'), tags) + >>> write_tags_to_file(Path('file.txt.tag'), tags) True """ file_path = Path(file_path) @@ -2448,7 +2610,7 @@ def normalize_tags_from_source( Universal function to normalize tags from different sources: - yt-dlp entry dicts - Raw tag lists - - .tags file content strings + - .tag file content strings - Metadata dictionaries Args: @@ -2575,12 +2737,12 @@ def expand_metadata_tag(payload: Dict[str, Any]) -> Dict[str, Any]: else: data = fetch_musicbrainz_tags(request['id'], request['entity']) except Exception as exc: # pragma: no cover - network/service errors - return {'tags': tags, 'error': str(exc)} + return {'tag': tags, 'error': str(exc)} # Add tags from fetched data (no namespace, just unique append) - for tag in (data.get('tags') or []): + for tag in (data.get('tag') or []): _append_unique(tags, seen, tag) result = { - 'tags': tags, + 'tag': tags, 'source': request['source'], 'id': request['id'], } @@ -2597,7 +2759,7 @@ def build_remote_bundle(metadata: Optional[Dict[str, Any]], existing: Optional[S _append_unique(tags, seen, tag) # Add tags from various sources - for tag in (metadata.get("tags") or []): + for tag in (metadata.get("tag") or []): _append_unique(tags, seen, tag) for tag in (metadata.get("categories") or []): _append_unique(tags, seen, tag) @@ -2632,7 +2794,7 @@ def build_remote_bundle(metadata: Optional[Dict[str, Any]], existing: Optional[S source_url = context.get("source_url") or metadata.get("original_url") or metadata.get("webpage_url") or metadata.get("url") clean_title = value_normalize(str(title_value)) if title_value is not None else None result = { - "tags": tags, + "tag": tags, "title": clean_title, "source_url": _sanitize_url(source_url), "duration": _coerce_duration(metadata), @@ -2747,9 +2909,9 @@ def hydrus_fetch_url(payload: Optional[str] = typer.Option(None, "--payload", he debug(json.dumps(error_payload, ensure_ascii=False), flush=True) raise typer.Exit(code=1) -@app.command(name="sync-sidecar", help="Synchronise .tags sidecar with supplied data") +@app.command(name="sync-sidecar", help="Synchronise .tag sidecar with supplied data") def sync_sidecar_cmd(payload: Optional[str] = typer.Option(None, "--payload", help="JSON payload; reads stdin if omitted")): - """Synchronise .tags sidecar with supplied data.""" + """Synchronise .tag sidecar with supplied data.""" try: payload_data = _load_payload(payload) result = sync_sidecar(payload_data) diff --git a/models.py b/models.py index 3fb3b5d..dfc11ea 100644 --- a/models.py +++ b/models.py @@ -14,7 +14,7 @@ from typing import Any, Callable, Dict, List, Optional, Protocol, TextIO, Tuple @dataclass(slots=True) class PipeObject: - """Unified pipeline object for tracking files, metadata, tags, and relationships through the pipeline. + """Unified pipeline object for tracking files, metadata, tag values, and relationships through the pipeline. This is the single source of truth for all result data in the pipeline. Uses the hash+store canonical pattern for file identification. @@ -22,7 +22,7 @@ class PipeObject: Attributes: hash: SHA-256 hash of the file (canonical identifier) store: Storage backend name (e.g., 'default', 'hydrus', 'test', 'home') - tags: List of extracted or assigned tags + tag: List of extracted or assigned tag values title: Human-readable title if applicable source_url: URL where the object came from duration: Duration in seconds if applicable @@ -37,7 +37,7 @@ class PipeObject: """ hash: str store: str - tags: List[str] = field(default_factory=list) + tag: List[str] = field(default_factory=list) title: Optional[str] = None url: Optional[str] = None source_url: Optional[str] = None @@ -90,9 +90,9 @@ class PipeObject: hash_display = self.hash or "N/A" store_display = self.store or "N/A" title_display = self.title or "N/A" - tags_display = ", ".join(self.tags[:3]) if self.tags else "[]" - if len(self.tags) > 3: - tags_display += f" (+{len(self.tags) - 3} more)" + tag_display = ", ".join(self.tag[:3]) if self.tag else "[]" + if len(self.tag) > 3: + tag_display += f" (+{len(self.tag) - 3} more)" file_path_display = self.path or "N/A" if file_path_display != "N/A" and len(file_path_display) > 50: file_path_display = "..." + file_path_display[-47:] @@ -120,7 +120,7 @@ class PipeObject: debug(f"│ Hash : {hash_display:<48}│") debug(f"│ Store : {store_display:<48}│") debug(f"│ Title : {title_display:<48}│") - debug(f"│ Tags : {tags_display:<48}│") + debug(f"│ Tag : {tag_display:<48}│") debug(f"│ URL : {url_display:<48}│") debug(f"│ File Path : {file_path_display:<48}│") debug(f"│ Relationships: {relationships_display:<47}│") @@ -164,8 +164,8 @@ class PipeObject: "store": self.store, } - if self.tags: - data["tags"] = self.tags + if self.tag: + data["tag"] = self.tag if self.title: data["title"] = self.title if self.url: @@ -298,7 +298,7 @@ class DownloadMediaResult: """Result of a successful media download.""" path: Path info: Dict[str, Any] - tags: List[str] + tag: List[str] source_url: Optional[str] hash_value: Optional[str] = None paths: Optional[List[Path]] = None # For multiple files (e.g., section downloads) @@ -677,7 +677,7 @@ class TUIResultCard: subtitle: Optional[str] = None metadata: Optional[Dict[str, str]] = None media_kind: Optional[str] = None - tags: Optional[List[str]] = None + tag: Optional[List[str]] = None file_hash: Optional[str] = None file_size: Optional[str] = None duration: Optional[str] = None @@ -686,8 +686,8 @@ class TUIResultCard: """Initialize default values.""" if self.metadata is None: self.metadata = {} - if self.tags is None: - self.tags = [] + if self.tag is None: + self.tag = [] @dataclass diff --git a/pipeline.py b/pipeline.py index 8d3be86..249c0c4 100644 --- a/pipeline.py +++ b/pipeline.py @@ -197,7 +197,7 @@ def store_value(key: str, value: Any) -> None: def load_value(key: str, default: Any = None) -> Any: """Retrieve a value stored by an earlier pipeline stage. - Supports dotted path notation for nested access (e.g., "metadata.tags" or "items.0"). + Supports dotted path notation for nested access (e.g., "metadata.tag" or "items.0"). Args: key: Variable name or dotted path (e.g., "my_var", "metadata.title", "list.0") @@ -447,7 +447,7 @@ def set_last_result_table(result_table: Optional[Any], items: Optional[List[Any] Also maintains a history stack for @.. navigation (restore previous result table). Only selectable commands (search-file, download-data) should call this to create history. - For action commands (delete-tag, add-tag, etc), use set_last_result_table_preserve_history() instead. + For action commands (delete-tag, add-tags, etc), use set_last_result_table_preserve_history() instead. Args: result_table: The ResultTable object that was displayed (or None) @@ -524,7 +524,7 @@ def set_last_result_table_overlay(result_table: Optional[Any], items: Optional[L def set_last_result_table_preserve_history(result_table: Optional[Any], items: Optional[List[Any]] = None, subject: Optional[Any] = None) -> None: """Update the last result table WITHOUT adding to history. - Used for action commands (delete-tag, add-tag, etc.) that modify data but shouldn't + Used for action commands (delete-tag, add-tags, etc.) that modify data but shouldn't create history entries. This allows @.. to navigate search results, not undo stacks. Args: @@ -543,7 +543,7 @@ def set_last_result_items_only(items: Optional[List[Any]]) -> None: """Store items for @N selection WITHOUT affecting history or saved search data. Used for display-only commands (get-tag, get-url, etc.) and action commands - (delete-tag, add-tag, etc.) that emit results but shouldn't affect history. + (delete-tag, add-tags, etc.) that emit results but shouldn't affect history. These items are available for @1, @2, etc. selection in the next command, but are NOT saved to history. This preserves search context for @.. navigation. diff --git a/result_table.py b/result_table.py index 1858e7a..9fd107a 100644 --- a/result_table.py +++ b/result_table.py @@ -79,7 +79,7 @@ class TUIResultCard: subtitle: Optional[str] = None metadata: Optional[Dict[str, str]] = None media_kind: Optional[str] = None - tags: Optional[List[str]] = None + tag: Optional[List[str]] = None file_hash: Optional[str] = None file_size: Optional[str] = None duration: Optional[str] = None @@ -88,8 +88,8 @@ class TUIResultCard: """Initialize default values.""" if self.metadata is None: self.metadata = {} - if self.tags is None: - self.tags = [] + if self.tag is None: + self.tag = [] @dataclass @@ -164,7 +164,7 @@ class ResultTable: >>> row = result_table.add_row() >>> row.add_column("File", "document.pdf") >>> row.add_column("Size", "2.5 MB") - >>> row.add_column("Tags", "pdf, document") + >>> row.add_column("Tag", "pdf, document") >>> print(result_table) """ @@ -425,12 +425,12 @@ class ResultTable: if hasattr(result, 'media_kind') and result.media_kind: row.add_column("Type", result.media_kind) - # Tags summary + # Tag summary if hasattr(result, 'tag_summary') and result.tag_summary: - tags_str = str(result.tag_summary) - if len(tags_str) > 60: - tags_str = tags_str[:57] + "..." - row.add_column("Tags", tags_str) + tag_str = str(result.tag_summary) + if len(tag_str) > 60: + tag_str = tag_str[:57] + "..." + row.add_column("Tag", tag_str) # Duration (for media) if hasattr(result, 'duration_seconds') and result.duration_seconds: @@ -494,7 +494,7 @@ class ResultTable: """Extract and add TagItem fields to row (compact tag display). Shows the Tag column with the tag name and Source column to identify - which storage backend the tags come from (Hydrus, local, etc.). + which storage backend the tag values come from (Hydrus, local, etc.). All data preserved in TagItem for piping and operations. Use @1 to select a tag, @{1,3,5} to select multiple. """ @@ -505,7 +505,7 @@ class ResultTable: tag_name = tag_name[:57] + "..." row.add_column("Tag", tag_name) - # Source/Store (where the tags come from) + # Source/Store (where the tag values come from) if hasattr(item, 'source') and item.source: row.add_column("Store", item.source) @@ -527,12 +527,12 @@ class ResultTable: file_str = "..." + file_str[-57:] row.add_column("Path", file_str) - # Tags - if hasattr(obj, 'tags') and obj.tags: - tags_str = ", ".join(obj.tags[:3]) # First 3 tags - if len(obj.tags) > 3: - tags_str += f", +{len(obj.tags) - 3} more" - row.add_column("Tags", tags_str) + # Tag + if hasattr(obj, 'tag') and obj.tag: + tag_str = ", ".join(obj.tag[:3]) # First 3 tag values + if len(obj.tag) > 3: + tag_str += f", +{len(obj.tag) - 3} more" + row.add_column("Tag", tag_str) # Duration if hasattr(obj, 'duration') and obj.duration: @@ -560,7 +560,7 @@ class ResultTable: - type | media_kind | kind - target | path | url - hash | hash_hex | file_hash - - tags | tag_summary + - tag | tag_summary - detail | description """ # Helper to determine if a field should be hidden from display @@ -568,7 +568,7 @@ class ResultTable: # Hide internal/metadata fields hidden_fields = { '__', 'id', 'action', 'parent_id', 'is_temp', 'path', 'extra', - 'target', 'hash', 'hash_hex', 'file_hash', 'tags', 'tag_summary', 'name' + 'target', 'hash', 'hash_hex', 'file_hash', 'tag', 'tag_summary', 'name' } if isinstance(field_name, str): if field_name.startswith('__'): @@ -1220,12 +1220,12 @@ class ResultTable: title = col.value metadata[col.name] = col.value - # Extract tags if present - tags = [] - if "tags" in metadata: - tags_val = metadata["tags"] - if tags_val: - tags = [t.strip() for t in tags_val.split(",")][:5] + # Extract tag values if present + tag = [] + if "Tag" in metadata: + tag_val = metadata["Tag"] + if tag_val: + tag = [t.strip() for t in tag_val.split(",")][:5] # Try to find useful metadata fields subtitle = metadata.get("Artist", metadata.get("Author", "")) @@ -1239,7 +1239,7 @@ class ResultTable: subtitle=subtitle, metadata=metadata, media_kind=media_kind, - tags=tags, + tag=tag, file_hash=file_hash or None, file_size=file_size or None, duration=duration or None diff --git a/scripts/remote_storage_server.py b/scripts/remote_storage_server.py index 8d5ba20..426856a 100644 --- a/scripts/remote_storage_server.py +++ b/scripts/remote_storage_server.py @@ -222,7 +222,7 @@ def create_app(): "path": str(file_path), "size": file_path.stat().st_size, "metadata": metadata, - "tags": tags + "tag": tags }), 200 except Exception as e: logger.error(f"Get metadata error: {e}", exc_info=True) @@ -238,7 +238,7 @@ def create_app(): data = request.get_json() or {} file_path_str = data.get('path') - tags = data.get('tags', []) + tags = data.get('tag', []) url = data.get('url', []) if not file_path_str: @@ -289,7 +289,7 @@ def create_app(): return jsonify({"error": "File not found"}), 404 tags = db.get_tags(file_path) - return jsonify({"hash": file_hash, "tags": tags}), 200 + return jsonify({"hash": file_hash, "tag": tags}), 200 except Exception as e: logger.error(f"Get tags error: {e}", exc_info=True) return jsonify({"error": f"Failed: {str(e)}"}), 500 @@ -302,11 +302,11 @@ def create_app(): from API.folder import API_folder_store data = request.get_json() or {} - tags = data.get('tags', []) + tags = data.get('tag', []) mode = data.get('mode', 'add') if not tags: - return jsonify({"error": "Tags required"}), 400 + return jsonify({"error": "Tag required"}), 400 try: with API_folder_store(STORAGE_PATH) as db: @@ -318,7 +318,7 @@ def create_app(): db.remove_tags(file_path, db.get_tags(file_path)) db.add_tags(file_path, tags) - return jsonify({"hash": file_hash, "tags_added": len(tags), "mode": mode}), 200 + return jsonify({"hash": file_hash, "tag_added": len(tags), "mode": mode}), 200 except Exception as e: logger.error(f"Add tags error: {e}", exc_info=True) return jsonify({"error": f"Failed: {str(e)}"}), 500 @@ -330,7 +330,7 @@ def create_app(): """Remove tags from a file.""" from API.folder import API_folder_store - tags_str = request.args.get('tags', '') + tags_str = request.args.get('tag', '') try: with API_folder_store(STORAGE_PATH) as db: diff --git a/search_file.py b/search_file.py index 797d0a2..81c0f0c 100644 --- a/search_file.py +++ b/search_file.py @@ -1,4 +1,4 @@ -"""Search-file cmdlet: Search for files by query, tags, size, type, duration, etc.""" +"""Search-file cmdlet: Search for files by query, tag, size, type, duration, etc.""" from __future__ import annotations from typing import Any, Dict, Sequence, List, Optional, Tuple @@ -43,9 +43,9 @@ except Exception: # pragma: no cover resolve_output_dir = None # type: ignore try: - from API.HydrusNetwork import HydrusClient, HydrusRequestError + from API.HydrusNetwork import HydrusNetwork, HydrusRequestError except ImportError: # pragma: no cover - HydrusClient = None # type: ignore + HydrusNetwork = None # type: ignore HydrusRequestError = RuntimeError # type: ignore try: @@ -63,7 +63,7 @@ class SearchRecord: path: str size_bytes: int | None = None duration_seconds: str | None = None - tags: str | None = None + tag: str | None = None hash: str | None = None def as_dict(self) -> dict[str, str]: @@ -72,8 +72,8 @@ class SearchRecord: payload["size"] = str(self.size_bytes) if self.duration_seconds: payload["duration"] = self.duration_seconds - if self.tags: - payload["tags"] = self.tags + if self.tag: + payload["tag"] = self.tag if self.hash: payload["hash"] = self.hash return payload @@ -93,7 +93,7 @@ class ResultItem: duration_seconds: Optional[float] = None size_bytes: Optional[int] = None full_metadata: Optional[Dict[str, Any]] = None - tags: Optional[set[str]] = field(default_factory=set) + tag: Optional[set[str]] = field(default_factory=set) relationships: Optional[List[str]] = field(default_factory=list) known_urls: Optional[List[str]] = field(default_factory=list) @@ -128,9 +128,9 @@ class ResultItem: if self.hash: payload["hash"] = self.hash if self.tag_summary: - payload["tags"] = self.tag_summary - if self.tags: - payload["tags_set"] = list(self.tags) + payload["tag_summary"] = self.tag_summary + if self.tag: + payload["tag"] = list(self.tag) if self.relationships: payload["relationships"] = self.relationships if self.known_urls: @@ -411,7 +411,7 @@ class Search_File(Cmdlet): return 1 searched_backends.append(backend_to_search) target_backend = storage[backend_to_search] - results = target_backend.search_store(query, limit=limit) + results = target_backend.search(query, limit=limit) else: from API.HydrusNetwork import is_hydrus_available hydrus_available = is_hydrus_available(config or {}) @@ -422,7 +422,7 @@ class Search_File(Cmdlet): continue searched_backends.append(backend_name) try: - backend_results = storage[backend_name].search_store(query, limit=limit - len(all_results)) + backend_results = storage[backend_name].search(query, limit=limit - len(all_results)) if backend_results: all_results.extend(backend_results) if len(all_results) >= limit: