diff --git a/helper/http_client.py b/API/HTTP.py similarity index 100% rename from helper/http_client.py rename to API/HTTP.py diff --git a/helper/hydrus.py b/API/HydrusNetwork.py similarity index 99% rename from helper/hydrus.py rename to API/HydrusNetwork.py index 016d6ad..0e631e3 100644 --- a/helper/hydrus.py +++ b/API/HydrusNetwork.py @@ -10,7 +10,7 @@ import subprocess import sys import time -from helper.logger import log, debug +from SYS.logger import log, debug import tempfile import logging from dataclasses import dataclass, field @@ -26,7 +26,7 @@ try: # Optional metadata helper for audio files except ImportError: # pragma: no cover - best effort mutagen = None # type: ignore -from .utils import ( +from SYS.utils import ( decode_cbor, jsonify, ensure_directory, @@ -34,7 +34,7 @@ from .utils import ( unique_path, unique_preserve_order, ) -from .http_client import HTTPClient +from .HTTP import HTTPClient class HydrusRequestError(RuntimeError): @@ -1526,7 +1526,7 @@ CHUNK_SIZE = 1024 * 1024 # 1 MiB def download_hydrus_file(file_url: str, headers: dict[str, str], destination: Path, timeout: float) -> int: """Download *file_url* into *destination* returning the byte count with progress bar.""" - from .progress import print_progress, print_final_progress + from SYS.progress import print_progress, print_final_progress downloaded = 0 start_time = time.time() diff --git a/helper/alldebrid.py b/API/alldebrid.py similarity index 99% rename from helper/alldebrid.py rename to API/alldebrid.py index 7a80885..3f91b16 100644 --- a/helper/alldebrid.py +++ b/API/alldebrid.py @@ -9,13 +9,13 @@ from __future__ import annotations import json import sys -from helper.logger import log, debug +from SYS.logger import log, debug import time import logging from pathlib import Path from typing import Any, Dict, Optional, Set, List, Sequence from urllib.parse import urlencode, urlparse -from .http_client import HTTPClient +from .HTTP import HTTPClient logger = logging.getLogger(__name__) diff --git a/helper/archive_client.py b/API/archive_client.py similarity index 99% rename from helper/archive_client.py rename to API/archive_client.py index d67e415..3278e80 100644 --- a/helper/archive_client.py +++ b/API/archive_client.py @@ -23,7 +23,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple import requests -from helper.logger import log, debug +from SYS.logger import log, debug try: from Crypto.Cipher import AES # type: ignore diff --git a/helper/folder_store.py b/API/folder.py similarity index 99% rename from helper/folder_store.py rename to API/folder.py index 787eddc..34d1f7b 100644 --- a/helper/folder_store.py +++ b/API/folder.py @@ -19,7 +19,7 @@ from datetime import datetime from pathlib import Path from typing import Optional, Dict, Any, List, Tuple, Set -from .utils import sha256_file +from SYS.utils import sha256_file logger = logging.getLogger(__name__) WORKER_LOG_MAX_ENTRIES = 99 @@ -143,7 +143,7 @@ def has_sidecar(media_path: Path) -> bool: """Check if a media file has a sidecar.""" return find_sidecar(media_path) is not None -class FolderDB: +class API_folder_store: """SQLite database for caching local library metadata.""" DB_NAME = "medios-macina.db" @@ -434,7 +434,7 @@ class FolderDB: # Populate type column from ext if not already populated if 'type' in existing_columns and 'ext' in existing_columns: try: - from helper.utils_constant import get_type_from_ext + from SYS.utils_constant import get_type_from_ext cursor.execute("SELECT hash, ext FROM metadata WHERE type IS NULL OR type = ''") rows = cursor.fetchall() for file_hash, ext in rows: @@ -581,7 +581,7 @@ class FolderDB: file_type = metadata.get('type') ext = metadata.get('ext') if not file_type and ext: - from helper.utils_constant import get_type_from_ext + from SYS.utils_constant import get_type_from_ext file_type = get_type_from_ext(str(ext)) cursor.execute(""" @@ -635,7 +635,7 @@ class FolderDB: file_type = metadata.get('type') ext = metadata.get('ext') if not file_type and ext: - from helper.utils_constant import get_type_from_ext + from SYS.utils_constant import get_type_from_ext file_type = get_type_from_ext(str(ext)) cursor.execute(""" @@ -1535,7 +1535,7 @@ class DatabaseAPI: def __init__(self, search_dir: Path): self.search_dir = search_dir - self.db = FolderDB(search_dir) + self.db = API_folder_store(search_dir) def __enter__(self): self.db.__enter__() @@ -1737,7 +1737,7 @@ class LocalLibraryInitializer: def __init__(self, library_root: Path): """Initialize the database scanner.""" self.library_root = Path(library_root) - self.db = FolderDB(library_root) + self.db = API_folder_store(library_root) self.stats = { 'files_scanned': 0, 'files_new': 0, 'files_existing': 0, 'sidecars_imported': 0, 'sidecars_deleted': 0, @@ -2021,7 +2021,7 @@ class LocalLibraryInitializer: logger.error(f"Error cleaning up orphaned sidecars: {e}", exc_info=True) -def migrate_tags_to_db(library_root: Path, db: FolderDB) -> int: +def migrate_tags_to_db(library_root: Path, db: API_folder_store) -> int: """Migrate .tags files to the database.""" migrated_count = 0 @@ -2050,7 +2050,7 @@ def migrate_tags_to_db(library_root: Path, db: FolderDB) -> int: return migrated_count -def migrate_metadata_to_db(library_root: Path, db: FolderDB) -> int: +def migrate_metadata_to_db(library_root: Path, db: API_folder_store) -> int: """Migrate .metadata files to the database.""" migrated_count = 0 @@ -2088,13 +2088,13 @@ def _parse_metadata_file(content: str) -> Dict[str, Any]: return {} -def migrate_all(library_root: Path, db: Optional[FolderDB] = None) -> Dict[str, int]: +def migrate_all(library_root: Path, db: Optional[API_folder_store] = None) -> Dict[str, int]: """Migrate all sidecar files to database.""" should_close = db is None try: if db is None: - db = FolderDB(library_root) + db = API_folder_store(library_root) return { 'tags': migrate_tags_to_db(library_root, db), @@ -2115,11 +2115,11 @@ class LocalLibrarySearchOptimizer: def __init__(self, library_root: Path): """Initialize the search optimizer.""" self.library_root = Path(library_root) - self.db: Optional[FolderDB] = None + self.db: Optional[API_folder_store] = None def __enter__(self): """Context manager entry.""" - self.db = FolderDB(self.library_root) + self.db = API_folder_store(self.library_root) return self def __exit__(self, exc_type, exc_val, exc_tb): diff --git a/CLI.py b/CLI.py index da03ae6..9f37bde 100644 --- a/CLI.py +++ b/CLI.py @@ -15,7 +15,7 @@ from typing import Any, Dict, List, Optional, Sequence, Set, TextIO, TYPE_CHECKI import time import threading -from helper.logger import debug +from SYS.logger import debug try: import typer @@ -48,17 +48,17 @@ except ImportError: # pragma: no cover - optional dependency try: - from helper.worker_manager import WorkerManager + from SYS.worker_manager import WorkerManager except ImportError: # pragma: no cover - optional dependency WorkerManager = None # type: ignore try: - from helper.background_notifier import ensure_background_notifier + from SYS.background_notifier import ensure_background_notifier except ImportError: # pragma: no cover - optional dependency ensure_background_notifier = lambda *_, **__: None # type: ignore if TYPE_CHECKING: # pragma: no cover - typing helper - from helper.worker_manager import WorkerManager as WorkerManagerType + from SYS.worker_manager import WorkerManager as WorkerManagerType else: WorkerManagerType = Any @@ -68,7 +68,7 @@ from typing import Callable from config import get_local_storage_path, load_config -from helper.cmdlet_catalog import ( +from cmdlets.catalog import ( import_cmd_module as _catalog_import_cmd_module, list_cmdlet_metadata as _catalog_list_cmdlet_metadata, list_cmdlet_names as _catalog_list_cmdlet_names, @@ -507,8 +507,8 @@ def _get_arg_choices(cmd_name: str, arg_name: str) -> List[str]: # Support both "storage" and "store" argument names if normalized_arg in ("storage", "store"): try: - from helper.store import FileStorage - storage = FileStorage(_load_cli_config(), suppress_debug=True) + from Store import Store + storage = Store(_load_cli_config(), suppress_debug=True) backends = storage.list_backends() if backends: return backends @@ -518,15 +518,15 @@ def _get_arg_choices(cmd_name: str, arg_name: str) -> List[str]: # Dynamic search providers if normalized_arg == "provider": try: - from helper.provider import list_providers - providers = list_providers(_load_cli_config()) + from Provider.registry import list_search_providers + providers = list_search_providers(_load_cli_config()) available = [name for name, is_ready in providers.items() if is_ready] provider_choices = sorted(available) if available else sorted(providers.keys()) except Exception: provider_choices = [] try: - from helper.metadata_search import list_metadata_providers + from Provider.metadata_provider import list_metadata_providers meta_providers = list_metadata_providers(_load_cli_config()) meta_available = [n for n, ready in meta_providers.items() if ready] meta_choices = sorted(meta_available) if meta_available else sorted(meta_providers.keys()) @@ -539,7 +539,7 @@ def _get_arg_choices(cmd_name: str, arg_name: str) -> List[str]: if normalized_arg == "scrape": try: - from helper.metadata_search import list_metadata_providers + from Provider.metadata_provider import list_metadata_providers meta_providers = list_metadata_providers(_load_cli_config()) if meta_providers: return sorted(meta_providers.keys()) @@ -687,7 +687,7 @@ def _create_cmdlet_cli(): # Initialize debug logging if enabled if config: - from helper.logger import set_debug + from SYS.logger import set_debug debug_enabled = config.get("debug", False) set_debug(debug_enabled) @@ -772,14 +772,14 @@ def _create_cmdlet_cli(): try: if config: - from helper.logger import set_debug, debug + from SYS.logger import set_debug, debug debug_enabled = config.get("debug", False) set_debug(debug_enabled) if debug_enabled: debug("✓ Debug logging enabled") try: - from helper.hydrus import get_client + from API.HydrusNetwork import get_client # get_client(config) # Pre-acquire and cache session key # debug("✓ Hydrus session key acquired") except RuntimeError: @@ -859,7 +859,7 @@ def _create_cmdlet_cli(): except Exception as e: if config: - from helper.logger import debug # local import to avoid failing when debug disabled + from SYS.logger import debug # local import to avoid failing when debug disabled debug(f"⚠ Could not check service availability: {e}") except Exception: pass # Silently ignore if config loading fails @@ -1263,16 +1263,24 @@ def _execute_pipeline(tokens: list): if table_for_stage: ctx.set_current_stage_table(table_for_stage) - # Special check for YouTube search results BEFORE command expansion - # If we are selecting from a YouTube search, we want to force auto-piping to .pipe + # Special check for table-specific behavior BEFORE command expansion + # If we are selecting from a YouTube or Soulseek search, we want to force auto-piping to .pipe # instead of trying to expand to a command (which search-file doesn't support well for re-execution) source_cmd = ctx.get_current_stage_table_source_command() source_args = ctx.get_current_stage_table_source_args() - if source_cmd == 'search-file' and source_args and 'youtube' in source_args: + # Check table property + current_table = ctx.get_current_stage_table() + table_type = current_table.table if current_table and hasattr(current_table, 'table') else None + + # Logic based on table type + if table_type == 'youtube' or table_type == 'soulseek': # Force fallback to item-based selection so we can auto-pipe command_expanded = False # Skip the command expansion block below + elif source_cmd == 'search-file' and source_args and 'youtube' in source_args: + # Legacy check for youtube + command_expanded = False else: # Try command-based expansion first if we have source command info command_expanded = False @@ -1335,16 +1343,29 @@ def _execute_pipeline(tokens: list): log_msg = f"Applied @N selection {' | '.join(selection_parts)}" worker_manager.log_step(pipeline_session.worker_id, log_msg) if pipeline_session and worker_manager else None - # Special case for youtube search results in fallback mode: auto-pipe to .pipe + # Special case for table-specific auto-piping # This handles the case where @N is the ONLY stage (e.g. user typed "@1") # In this case, stages is [['@1']], but we are in the fallback block because command_expanded is False - # We need to check if the source was youtube search + + # Check table type + current_table = ctx.get_current_stage_table() + if not current_table: + current_table = ctx.get_last_result_table() + + table_type = current_table.table if current_table and hasattr(current_table, 'table') else None + source_cmd = ctx.get_last_result_table_source_command() source_args = ctx.get_last_result_table_source_args() - if source_cmd == 'search-file' and source_args and 'youtube' in source_args: - # Only auto-pipe if no other stages follow (stages is empty because we popped the selection) - if not stages: + if not stages: + if table_type == 'youtube': + print(f"Auto-piping YouTube selection to .pipe") + stages.append(['.pipe']) + elif table_type == 'soulseek': + print(f"Auto-piping Soulseek selection to download-provider") + stages.append(['download-provider']) + elif source_cmd == 'search-file' and source_args and 'youtube' in source_args: + # Legacy check print(f"Auto-piping YouTube selection to .pipe") stages.append(['.pipe']) @@ -1606,8 +1627,30 @@ def _execute_pipeline(tokens: list): else: if cmd_name in selectable_commands: table = ResultTable(table_title) + + # Detect table type from items + first_table = None + consistent = True + for emitted in pipeline_ctx.emits: table.add_result(emitted) + + # Check for table property + item_table = None + if isinstance(emitted, dict): + item_table = emitted.get('table') + else: + item_table = getattr(emitted, 'table', None) + + if item_table: + if first_table is None: + first_table = item_table + elif first_table != item_table: + consistent = False + + if consistent and first_table: + table.set_table(first_table) + table.set_source_command(cmd_name, stage_args) ctx.set_last_result_table(table, pipeline_ctx.emits) elif cmd_name in display_only_commands: @@ -1772,7 +1815,7 @@ def _execute_cmdlet(cmd_name: str, args: list): # Ensure native commands (cmdnats) are loaded try: - from helper.cmdlet_catalog import ensure_registry_loaded as _ensure_registry_loaded + from cmdlets.catalog import ensure_registry_loaded as _ensure_registry_loaded _ensure_registry_loaded() except Exception: pass @@ -1781,7 +1824,7 @@ def _execute_cmdlet(cmd_name: str, args: list): cmd_fn = REGISTRY.get(cmd_name) if not cmd_fn: # Attempt lazy import of the module and retry - from helper.cmdlet_catalog import import_cmd_module as _catalog_import + from cmdlets.catalog import import_cmd_module as _catalog_import try: mod = _catalog_import(cmd_name) data = getattr(mod, "CMDLET", None) if mod else None diff --git a/LUA/main.lua b/MPV/LUA/main.lua similarity index 100% rename from LUA/main.lua rename to MPV/LUA/main.lua diff --git a/MPV/__init__.py b/MPV/__init__.py new file mode 100644 index 0000000..dd2f442 --- /dev/null +++ b/MPV/__init__.py @@ -0,0 +1,5 @@ +from MPV.mpv_ipc import MPV + +__all__ = [ + "MPV", +] diff --git a/helper/mpv_ipc.py b/MPV/mpv_ipc.py similarity index 51% rename from helper/mpv_ipc.py rename to MPV/mpv_ipc.py index 5c73a10..4e8a899 100644 --- a/helper/mpv_ipc.py +++ b/MPV/mpv_ipc.py @@ -11,16 +11,17 @@ import json import os import platform import socket +import subprocess import time as _time from pathlib import Path -from typing import Any, Dict, Optional, List +from typing import Any, Dict, Optional, List, BinaryIO, cast -from helper.logger import debug +from SYS.logger import debug # Fixed pipe name for persistent MPV connection across all Python sessions FIXED_IPC_PIPE_NAME = "mpv-medeia-macina" -MPV_LUA_SCRIPT_PATH = str(Path(__file__).resolve().parent.parent / "LUA" / "main.lua") +MPV_LUA_SCRIPT_PATH = str(Path(__file__).resolve().parent / "LUA" / "main.lua") class MPVIPCError(Exception): @@ -28,6 +29,172 @@ class MPVIPCError(Exception): pass +class MPV: + """High-level MPV controller for this app. + + Responsibilities: + - Own the IPC pipe/socket path + - Start MPV with the bundled Lua script + - Query playlist and currently playing item via IPC + + This class intentionally stays "dumb": it does not implement app logic. + App behavior is driven by cmdlets (e.g. `.pipe`) and the bundled Lua script. + """ + + def __init__( + self, + ipc_path: Optional[str] = None, + lua_script_path: Optional[str | Path] = None, + timeout: float = 5.0, + ) -> None: + self.timeout = timeout + self.ipc_path = ipc_path or get_ipc_pipe_path() + + if lua_script_path is None: + lua_script_path = MPV_LUA_SCRIPT_PATH + lua_path = Path(str(lua_script_path)).resolve() + self.lua_script_path = str(lua_path) + + def client(self) -> "MPVIPCClient": + return MPVIPCClient(socket_path=self.ipc_path, timeout=self.timeout) + + def is_running(self) -> bool: + client = self.client() + try: + ok = client.connect() + return bool(ok) + finally: + client.disconnect() + + def send(self, command: Dict[str, Any] | List[Any], silent: bool = False) -> Optional[Dict[str, Any]]: + client = self.client() + try: + if not client.connect(): + return None + return client.send_command(command) + except Exception as exc: + if not silent: + debug(f"MPV IPC error: {exc}") + return None + finally: + client.disconnect() + + def get_property(self, name: str, default: Any = None) -> Any: + resp = self.send({"command": ["get_property", name]}) + if resp and resp.get("error") == "success": + return resp.get("data", default) + return default + + def set_property(self, name: str, value: Any) -> bool: + resp = self.send({"command": ["set_property", name, value]}) + return bool(resp and resp.get("error") == "success") + + def get_playlist(self, silent: bool = False) -> Optional[List[Dict[str, Any]]]: + resp = self.send({"command": ["get_property", "playlist"], "request_id": 100}, silent=silent) + if resp is None: + return None + if resp.get("error") == "success": + data = resp.get("data", []) + return data if isinstance(data, list) else [] + return [] + + def get_now_playing(self) -> Optional[Dict[str, Any]]: + if not self.is_running(): + return None + + playlist = self.get_playlist(silent=True) or [] + pos = self.get_property("playlist-pos", None) + path = self.get_property("path", None) + title = self.get_property("media-title", None) + + effective_path = _unwrap_memory_target(path) if isinstance(path, str) else path + + current_item: Optional[Dict[str, Any]] = None + if isinstance(pos, int) and 0 <= pos < len(playlist): + item = playlist[pos] + current_item = item if isinstance(item, dict) else None + else: + for item in playlist: + if isinstance(item, dict) and item.get("current") is True: + current_item = item + break + + return { + "path": effective_path, + "title": title, + "playlist_pos": pos, + "playlist_item": current_item, + } + + def ensure_lua_loaded(self) -> None: + try: + script_path = self.lua_script_path + if not script_path or not os.path.exists(script_path): + return + # Safe to call repeatedly; mpv will reload the script. + self.send({"command": ["load-script", script_path], "request_id": 12}, silent=True) + except Exception: + return + + def wait_for_ipc(self, retries: int = 20, delay_seconds: float = 0.2) -> bool: + for _ in range(max(1, retries)): + client = self.client() + try: + if client.connect(): + return True + finally: + client.disconnect() + _time.sleep(delay_seconds) + return False + + def kill_existing_windows(self) -> None: + if platform.system() != "Windows": + return + try: + subprocess.run( + ["taskkill", "/IM", "mpv.exe", "/F"], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=2, + ) + except Exception: + return + + def start( + self, + *, + extra_args: Optional[List[str]] = None, + ytdl_raw_options: Optional[str] = None, + http_header_fields: Optional[str] = None, + detached: bool = True, + ) -> None: + cmd: List[str] = [ + "mpv", + f"--input-ipc-server={self.ipc_path}", + "--idle=yes", + "--force-window=yes", + ] + + # Always load the bundled Lua script at startup. + if self.lua_script_path and os.path.exists(self.lua_script_path): + cmd.append(f"--script={self.lua_script_path}") + + if ytdl_raw_options: + cmd.append(f"--ytdl-raw-options={ytdl_raw_options}") + if http_header_fields: + cmd.append(f"--http-header-fields={http_header_fields}") + if extra_args: + cmd.extend([str(a) for a in extra_args if a]) + + kwargs: Dict[str, Any] = {} + if detached and platform.system() == "Windows": + kwargs["creationflags"] = 0x00000008 # DETACHED_PROCESS + + debug("Starting MPV") + subprocess.Popen(cmd, stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, **kwargs) + + def get_ipc_pipe_path() -> str: """Get the fixed IPC pipe/socket path for persistent MPV connection. @@ -59,36 +226,6 @@ def _unwrap_memory_target(text: Optional[str]) -> Optional[str]: return text -def _normalize_target(text: Optional[str]) -> Optional[str]: - """Normalize playlist targets for deduping across raw/memory:// wrappers.""" - if not text: - return None - real = _unwrap_memory_target(text) - if not real: - return None - real = real.strip() - if not real: - return None - - lower = real.lower() - # Hydrus bare hash - if len(lower) == 64 and all(ch in "0123456789abcdef" for ch in lower): - return lower - - # Hydrus file URL with hash query - try: - parsed = __import__("urllib.parse").parse.urlparse(real) - qs = __import__("urllib.parse").parse.parse_qs(parsed.query) - hash_qs = qs.get("hash", [None])[0] - if hash_qs and len(hash_qs) == 64 and all(ch in "0123456789abcdef" for ch in hash_qs.lower()): - return hash_qs.lower() - except Exception: - pass - - # Normalize paths/url for comparison - return lower.replace('\\', '\\') - - class MPVIPCClient: """Client for communicating with mpv via IPC socket/pipe. @@ -105,7 +242,7 @@ class MPVIPCClient: """ self.timeout = timeout self.socket_path = socket_path or get_ipc_pipe_path() - self.sock = None + self.sock: socket.socket | BinaryIO | None = None self.is_windows = platform.system() == "Windows" def connect(self) -> bool: @@ -129,8 +266,13 @@ class MPVIPCClient: if not os.path.exists(self.socket_path): debug(f"IPC socket not found: {self.socket_path}") return False - - self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) + + af_unix = getattr(socket, "AF_UNIX", None) + if af_unix is None: + debug("IPC AF_UNIX is not available on this platform") + return False + + self.sock = socket.socket(af_unix, socket.SOCK_STREAM) self.sock.settimeout(self.timeout) self.sock.connect(self.socket_path) return True @@ -154,6 +296,7 @@ class MPVIPCClient: try: # Format command as JSON (mpv IPC protocol) + request: Dict[str, Any] if isinstance(command_data, list): request = {"command": command_data} else: @@ -166,15 +309,17 @@ class MPVIPCClient: payload = json.dumps(request) + "\n" # Debug: log the command being sent - from helper.logger import debug as _debug + from SYS.logger import debug as _debug _debug(f"[IPC] Sending: {payload.strip()}") # Send command if self.is_windows: - self.sock.write(payload.encode('utf-8')) - self.sock.flush() + pipe = cast(BinaryIO, self.sock) + pipe.write(payload.encode("utf-8")) + pipe.flush() else: - self.sock.sendall(payload.encode('utf-8')) + sock_obj = cast(socket.socket, self.sock) + sock_obj.sendall(payload.encode("utf-8")) # Receive response # We need to read lines until we find the one with matching request_id @@ -184,14 +329,16 @@ class MPVIPCClient: response_data = b"" if self.is_windows: try: - response_data = self.sock.readline() + pipe = cast(BinaryIO, self.sock) + response_data = pipe.readline() except (OSError, IOError): return None else: try: # This is simplistic for Unix socket (might not get full line) # But for now assuming MPV sends line-buffered JSON - chunk = self.sock.recv(4096) + sock_obj = cast(socket.socket, self.sock) + chunk = sock_obj.recv(4096) if not chunk: break response_data = chunk @@ -209,7 +356,7 @@ class MPVIPCClient: resp = json.loads(line) # Debug: log responses - from helper.logger import debug as _debug + from SYS.logger import debug as _debug _debug(f"[IPC] Received: {line}") # Check if this is the response to our request @@ -258,147 +405,3 @@ class MPVIPCClient: """Context manager exit.""" self.disconnect() - -def send_to_mpv(file_url: str, title: str, headers: Optional[Dict[str, str]] = None, - append: bool = True) -> bool: - """Send a file to be played in the existing MPV instance via IPC. - - This attempts to send to an existing MPV instance. If it fails, the calling - code should start a new MPV instance with the IPC pipe. - - Args: - file_url: URL or path to file to play - title: Display title for the file - headers: Optional HTTP headers (dict) - append: If True, append to playlist; if False, replace - - Returns: - True if successfully sent to existing MPV, False if pipe unavailable. - """ - # Try to connect using the robust client - client = get_mpv_client() - if not client: - return False - - try: - # Command 0: Subscribe to log messages so MPV console errors surface in REPL - _subscribe_log_messages(client) - - # Command 1: Ensure our Lua helper is loaded for in-window controls - _ensure_lua_script_loaded(client) - - # Command 2: Set headers if provided - if headers: - header_str = ",".join([f"{k}: {v}" for k, v in headers.items()]) - cmd_headers = { - "command": ["set_property", "http-header-fields", header_str], - "request_id": 0 - } - client.send_command(cmd_headers) - - # Deduplicate: if target already exists in playlist, just play it - normalized_new = _normalize_target(file_url) - existing_index = None - existing_title = None - if normalized_new: - playlist_resp = client.send_command({"command": ["get_property", "playlist"], "request_id": 98}) - if playlist_resp and playlist_resp.get("error") == "success": - for idx, item in enumerate(playlist_resp.get("data", []) or []): - for key in ("playlist-path", "filename"): - norm_existing = _normalize_target(item.get(key)) if isinstance(item, dict) else None - if norm_existing and norm_existing == normalized_new: - existing_index = idx - existing_title = item.get("title") if isinstance(item, dict) else None - break - if existing_index is not None: - break - - if existing_index is not None and append: - play_cmd = {"command": ["playlist-play-index", existing_index], "request_id": 99} - play_resp = client.send_command(play_cmd) - if play_resp and play_resp.get("error") == "success": - client.send_command({"command": ["set_property", "pause", False], "request_id": 100}) - safe_title = (title or existing_title or "").replace("\n", " ").replace("\r", " ").strip() - if safe_title: - client.send_command({"command": ["set_property", "force-media-title", safe_title], "request_id": 101}) - debug(f"Already in playlist, playing existing entry: {safe_title or file_url}") - return True - - # Command 2: Load file and inject title via memory:// wrapper so playlist shows friendly names immediately - target = file_url - load_mode = "append-play" if append else "replace" - safe_title = (title or "").replace("\n", " ").replace("\r", " ").strip() - target_to_send = target - if safe_title and not str(target).startswith("memory://"): - m3u_content = f"#EXTM3U\n#EXTINF:-1,{safe_title}\n{target}" - target_to_send = f"memory://{m3u_content}" - - cmd_load = { - "command": ["loadfile", target_to_send, load_mode], - "request_id": 1 - } - - resp = client.send_command(cmd_load) - if not resp or resp.get('error') != 'success': - debug(f"MPV loadfile failed: {resp}") - return False - - # Command 3: Set title (metadata for display) - still useful for window title - if safe_title: - cmd_title = { - "command": ["set_property", "force-media-title", safe_title], - "request_id": 2 - } - client.send_command(cmd_title) - - debug(f"Sent to existing MPV: {safe_title or title}") - return True - - except Exception as e: - debug(f"Error in send_to_mpv: {e}") - return False - finally: - client.disconnect() - - - -def get_mpv_client(socket_path: Optional[str] = None) -> Optional[MPVIPCClient]: - """Get an MPV IPC client, attempting to connect. - - Args: - socket_path: Custom socket path (uses default if None) - - Returns: - Connected MPVIPCClient or None if connection fails. - """ - client = MPVIPCClient(socket_path=socket_path) - if client.connect(): - return client - return None - - -def _subscribe_log_messages(client: MPVIPCClient) -> None: - """Ask MPV to emit log messages over IPC so we can surface console errors.""" - try: - client.send_command({"command": ["request_log_messages", "warn"], "request_id": 11}) - except Exception as exc: - debug(f"Failed to subscribe to MPV logs: {exc}") - - -def _ensure_lua_script_loaded(client: MPVIPCClient) -> None: - """Load the bundled MPV Lua script to enable in-window controls. - - Safe to call repeatedly; mpv will simply reload the script if already present. - """ - try: - script_path = MPV_LUA_SCRIPT_PATH - if not script_path or not os.path.exists(script_path): - return - resp = client.send_command({"command": ["load-script", script_path], "request_id": 12}) - if resp and resp.get("error") == "success": - debug(f"Loaded MPV Lua script: {script_path}") - else: - debug(f"MPV Lua load response: {resp}") - except Exception as exc: - debug(f"Failed to load MPV Lua script: {exc}") - diff --git a/Provider/__init__.py b/Provider/__init__.py new file mode 100644 index 0000000..31d441a --- /dev/null +++ b/Provider/__init__.py @@ -0,0 +1,5 @@ +"""Provider plugin modules. + +Concrete provider implementations live in this package. +The public entrypoint/registry is Provider.registry. +""" diff --git a/Provider/_base.py b/Provider/_base.py new file mode 100644 index 0000000..9541121 --- /dev/null +++ b/Provider/_base.py @@ -0,0 +1,84 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + + +@dataclass +class SearchResult: + """Unified search result format across all search providers.""" + + table: str # Provider name: "libgen", "soulseek", "bandcamp", "youtube", etc. + title: str # Display title/filename + path: str # Download target (URL, path, magnet, identifier) + + detail: str = "" # Additional description + annotations: List[str] = field(default_factory=list) # Tags: ["120MB", "flac", "ready"] + media_kind: str = "other" # Type: "book", "audio", "video", "game", "magnet" + size_bytes: Optional[int] = None + tags: set[str] = field(default_factory=set) # Searchable tags + columns: List[Tuple[str, str]] = field(default_factory=list) # Display columns + full_metadata: Dict[str, Any] = field(default_factory=dict) # Extra metadata + + def to_dict(self) -> Dict[str, Any]: + """Convert to dictionary for pipeline processing.""" + + return { + "table": self.table, + "title": self.title, + "path": self.path, + "detail": self.detail, + "annotations": self.annotations, + "media_kind": self.media_kind, + "size_bytes": self.size_bytes, + "tags": list(self.tags), + "columns": list(self.columns), + "full_metadata": self.full_metadata, + } + + +class SearchProvider(ABC): + """Base class for search providers.""" + + def __init__(self, config: Optional[Dict[str, Any]] = None): + self.config = config or {} + self.name = self.__class__.__name__.lower() + + @abstractmethod + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[SearchResult]: + """Search for items matching the query.""" + + def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]: + """Download an item from a search result.""" + + return None + + def validate(self) -> bool: + """Check if provider is available and properly configured.""" + + return True + + +class FileProvider(ABC): + """Base class for file upload providers.""" + + def __init__(self, config: Optional[Dict[str, Any]] = None): + self.config = config or {} + self.name = self.__class__.__name__.lower() + + @abstractmethod + def upload(self, file_path: str, **kwargs: Any) -> str: + """Upload a file and return the URL.""" + + def validate(self) -> bool: + """Check if provider is available/configured.""" + + return True diff --git a/Provider/bandcamp.py b/Provider/bandcamp.py new file mode 100644 index 0000000..52f7def --- /dev/null +++ b/Provider/bandcamp.py @@ -0,0 +1,109 @@ +from __future__ import annotations + +import sys +from typing import Any, Dict, List, Optional + +from Provider._base import SearchProvider, SearchResult +from SYS.logger import log, debug + +try: + from playwright.sync_api import sync_playwright +except ImportError: # pragma: no cover + sync_playwright = None + + +class Bandcamp(SearchProvider): + """Search provider for Bandcamp.""" + + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[SearchResult]: + if sync_playwright is None: + log( + "[bandcamp] Playwright not available. Install with: pip install playwright", + file=sys.stderr, + ) + return [] + + try: + with sync_playwright() as p: + browser = p.chromium.launch(headless=True) + page = browser.new_page() + + if query.strip().lower().startswith("artist:"): + artist_name = query[7:].strip().strip('"') + search_url = f"https://bandcamp.com/search?q={artist_name}&item_type=b" + else: + search_url = f"https://bandcamp.com/search?q={query}&item_type=a" + + results = self._scrape_url(page, search_url, limit) + + browser.close() + return results + + except Exception as exc: + log(f"[bandcamp] Search error: {exc}", file=sys.stderr) + return [] + + def _scrape_url(self, page: Any, url: str, limit: int) -> List[SearchResult]: + debug(f"[bandcamp] Scraping: {url}") + + page.goto(url) + page.wait_for_load_state("domcontentloaded") + + results: List[SearchResult] = [] + + search_results = page.query_selector_all(".searchresult") + if not search_results: + return results + + for item in search_results[:limit]: + try: + heading = item.query_selector(".heading") + if not heading: + continue + + link = heading.query_selector("a") + if not link: + continue + + title = link.inner_text().strip() + target_url = link.get_attribute("href") + + subhead = item.query_selector(".subhead") + artist = subhead.inner_text().strip() if subhead else "Unknown" + + itemtype = item.query_selector(".itemtype") + media_type = itemtype.inner_text().strip() if itemtype else "album" + + results.append( + SearchResult( + table="bandcamp", + title=title, + path=target_url, + detail=f"By: {artist}", + annotations=[media_type], + media_kind="audio", + columns=[ + ("Name", title), + ("Artist", artist), + ("Type", media_type), + ], + full_metadata={ + "artist": artist, + "type": media_type, + }, + ) + ) + + except Exception as exc: + debug(f"[bandcamp] Error parsing result: {exc}") + + return results + + def validate(self) -> bool: + return sync_playwright is not None diff --git a/Provider/libgen.py b/Provider/libgen.py new file mode 100644 index 0000000..d261e7c --- /dev/null +++ b/Provider/libgen.py @@ -0,0 +1,98 @@ +from __future__ import annotations + +import sys +from typing import Any, Dict, List, Optional + +from Provider._base import SearchProvider, SearchResult +from SYS.logger import log + + +class Libgen(SearchProvider): + """Search provider for Library Genesis books.""" + + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[SearchResult]: + filters = filters or {} + + try: + from Provider.unified_book_downloader import UnifiedBookDownloader + from Provider.query_parser import parse_query, get_field, get_free_text + + parsed = parse_query(query) + isbn = get_field(parsed, "isbn") + author = get_field(parsed, "author") + title = get_field(parsed, "title") + free_text = get_free_text(parsed) + + search_query = isbn or title or author or free_text or query + + downloader = UnifiedBookDownloader(config=self.config) + books = downloader.search_libgen(search_query, limit=limit) + + results: List[SearchResult] = [] + for idx, book in enumerate(books, 1): + title = book.get("title", "Unknown") + author = book.get("author", "Unknown") + year = book.get("year", "Unknown") + pages = book.get("pages") or book.get("pages_str") or "" + extension = book.get("extension", "") or book.get("ext", "") + filesize = book.get("filesize_str", "Unknown") + isbn = book.get("isbn", "") + mirror_url = book.get("mirror_url", "") + + columns = [ + ("Title", title), + ("Author", author), + ("Pages", str(pages)), + ("Ext", str(extension)), + ] + + detail = f"By: {author}" + if year and year != "Unknown": + detail += f" ({year})" + + annotations = [f"{filesize}"] + if isbn: + annotations.append(f"ISBN: {isbn}") + + results.append( + SearchResult( + table="libgen", + title=title, + path=mirror_url or f"libgen:{book.get('id', '')}", + detail=detail, + annotations=annotations, + media_kind="book", + columns=columns, + full_metadata={ + "number": idx, + "author": author, + "year": year, + "isbn": isbn, + "filesize": filesize, + "pages": pages, + "extension": extension, + "book_id": book.get("book_id", ""), + "md5": book.get("md5", ""), + }, + ) + ) + + return results + + except Exception as exc: + log(f"[libgen] Search error: {exc}", file=sys.stderr) + return [] + + def validate(self) -> bool: + try: + from Provider.unified_book_downloader import UnifiedBookDownloader # noqa: F401 + + return True + except Exception: + return False diff --git a/helper/libgen_service.py b/Provider/libgen_service.py similarity index 100% rename from helper/libgen_service.py rename to Provider/libgen_service.py diff --git a/Provider/matrix.py b/Provider/matrix.py new file mode 100644 index 0000000..eb77e45 --- /dev/null +++ b/Provider/matrix.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +import mimetypes +from pathlib import Path +from typing import Any + +import requests + +from Provider._base import FileProvider + + +class Matrix(FileProvider): + """File provider for Matrix (Element) chat rooms.""" + + def validate(self) -> bool: + if not self.config: + return False + matrix_conf = self.config.get("storage", {}).get("matrix", {}) + return bool( + matrix_conf.get("homeserver") + and matrix_conf.get("room_id") + and (matrix_conf.get("access_token") or matrix_conf.get("password")) + ) + + def upload(self, file_path: str, **kwargs: Any) -> str: + path = Path(file_path) + if not path.exists(): + raise FileNotFoundError(f"File not found: {file_path}") + + matrix_conf = self.config.get("storage", {}).get("matrix", {}) + homeserver = matrix_conf.get("homeserver") + access_token = matrix_conf.get("access_token") + room_id = matrix_conf.get("room_id") + + if not homeserver: + raise Exception("Matrix homeserver missing") + if not access_token: + raise Exception("Matrix access_token missing") + if not room_id: + raise Exception("Matrix room_id missing") + + if not homeserver.startswith("http"): + homeserver = f"https://{homeserver}" + + # Upload media + upload_url = f"{homeserver}/_matrix/media/v3/upload" + headers = { + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/octet-stream", + } + + mime_type, _ = mimetypes.guess_type(path) + if mime_type: + headers["Content-Type"] = mime_type + + filename = path.name + + with open(path, "rb") as handle: + resp = requests.post(upload_url, headers=headers, data=handle, params={"filename": filename}) + + if resp.status_code != 200: + raise Exception(f"Matrix upload failed: {resp.text}") + + content_uri = resp.json().get("content_uri") + if not content_uri: + raise Exception("No content_uri returned") + + # Send message + send_url = f"{homeserver}/_matrix/client/v3/rooms/{room_id}/send/m.room.message" + + # Determine message type + msgtype = "m.file" + ext = path.suffix.lower() + + audio_exts = {".mp3", ".flac", ".wav", ".m4a", ".aac", ".ogg", ".opus", ".wma", ".mka", ".alac"} + video_exts = {".mp4", ".mkv", ".webm", ".mov", ".avi", ".flv", ".mpg", ".mpeg", ".ts", ".m4v", ".wmv"} + image_exts = {".jpg", ".jpeg", ".png", ".gif", ".webp", ".bmp", ".tiff"} + + if ext in audio_exts: + msgtype = "m.audio" + elif ext in video_exts: + msgtype = "m.video" + elif ext in image_exts: + msgtype = "m.image" + + info = {"mimetype": mime_type, "size": path.stat().st_size} + payload = {"msgtype": msgtype, "body": filename, "url": content_uri, "info": info} + + resp = requests.post(send_url, headers=headers, json=payload) + if resp.status_code != 200: + raise Exception(f"Matrix send message failed: {resp.text}") + + event_id = resp.json().get("event_id") + return f"https://matrix.to/#/{room_id}/{event_id}" diff --git a/helper/metadata_search.py b/Provider/metadata_provider.py similarity index 99% rename from helper/metadata_search.py rename to Provider/metadata_provider.py index 26b1237..00052ac 100644 --- a/helper/metadata_search.py +++ b/Provider/metadata_provider.py @@ -5,7 +5,7 @@ from typing import Any, Dict, List, Optional, Type import requests import sys -from helper.logger import log, debug +from SYS.logger import log, debug try: # Optional dependency import musicbrainzngs # type: ignore diff --git a/helper/query_parser.py b/Provider/query_parser.py similarity index 100% rename from helper/query_parser.py rename to Provider/query_parser.py diff --git a/Provider/registry.py b/Provider/registry.py new file mode 100644 index 0000000..f957c89 --- /dev/null +++ b/Provider/registry.py @@ -0,0 +1,110 @@ +"""Provider registry. + +Concrete provider implementations live in the `Provider/` package. +This module is the single source of truth for provider discovery. +""" + +from __future__ import annotations + +from typing import Any, Dict, Optional, Type +import sys + +from SYS.logger import log + +from Provider._base import FileProvider, SearchProvider, SearchResult +from Provider.bandcamp import Bandcamp +from Provider.libgen import Libgen +from Provider.matrix import Matrix +from Provider.soulseek import Soulseek, download_soulseek_file +from Provider.youtube import YouTube +from Provider.zeroxzero import ZeroXZero + + +_SEARCH_PROVIDERS: Dict[str, Type[SearchProvider]] = { + "libgen": Libgen, + "soulseek": Soulseek, + "bandcamp": Bandcamp, + "youtube": YouTube, +} + + +def get_search_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[SearchProvider]: + """Get a search provider by name.""" + + provider_class = _SEARCH_PROVIDERS.get((name or "").lower()) + if provider_class is None: + log(f"[provider] Unknown search provider: {name}", file=sys.stderr) + return None + + try: + provider = provider_class(config) + if not provider.validate(): + log(f"[provider] Provider '{name}' is not available", file=sys.stderr) + return None + return provider + except Exception as exc: + log(f"[provider] Error initializing '{name}': {exc}", file=sys.stderr) + return None + + +def list_search_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]: + """List all search providers and their availability.""" + + availability: Dict[str, bool] = {} + for name, provider_class in _SEARCH_PROVIDERS.items(): + try: + provider = provider_class(config) + availability[name] = provider.validate() + except Exception: + availability[name] = False + return availability + + +_FILE_PROVIDERS: Dict[str, Type[FileProvider]] = { + "0x0": ZeroXZero, + "matrix": Matrix, +} + + +def get_file_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[FileProvider]: + """Get a file provider by name.""" + + provider_class = _FILE_PROVIDERS.get((name or "").lower()) + if provider_class is None: + log(f"[provider] Unknown file provider: {name}", file=sys.stderr) + return None + + try: + provider = provider_class(config) + if not provider.validate(): + log(f"[provider] File provider '{name}' is not available", file=sys.stderr) + return None + return provider + except Exception as exc: + log(f"[provider] Error initializing file provider '{name}': {exc}", file=sys.stderr) + return None + + +def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]: + """List all file providers and their availability.""" + + availability: Dict[str, bool] = {} + for name, provider_class in _FILE_PROVIDERS.items(): + try: + provider = provider_class(config) + availability[name] = provider.validate() + except Exception: + availability[name] = False + return availability + + +__all__ = [ + "SearchResult", + "SearchProvider", + "FileProvider", + "get_search_provider", + "list_search_providers", + "get_file_provider", + "list_file_providers", + "download_soulseek_file", +] diff --git a/Provider/soulseek.py b/Provider/soulseek.py new file mode 100644 index 0000000..92933da --- /dev/null +++ b/Provider/soulseek.py @@ -0,0 +1,380 @@ +from __future__ import annotations + +import asyncio +import re +import sys +import time +from pathlib import Path +from typing import Any, Dict, List, Optional + +from Provider._base import SearchProvider, SearchResult +from SYS.logger import log, debug + + +class Soulseek(SearchProvider): + """Search provider for Soulseek P2P network.""" + + MUSIC_EXTENSIONS = { + ".flac", + ".mp3", + ".m4a", + ".aac", + ".ogg", + ".opus", + ".wav", + ".alac", + ".wma", + ".ape", + ".aiff", + ".dsf", + ".dff", + ".wv", + ".tta", + ".tak", + ".ac3", + ".dts", + } + + # NOTE: These defaults preserve existing behavior. + USERNAME = "asjhkjljhkjfdsd334" + PASSWORD = "khhhg" + DOWNLOAD_DIR = "./downloads" + MAX_WAIT_TRANSFER = 1200 + + def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]: + """Download file from Soulseek.""" + + try: + full_metadata = result.full_metadata or {} + username = full_metadata.get("username") + filename = full_metadata.get("filename") or result.path + + if not username or not filename: + log(f"[soulseek] Missing metadata for download: {result.title}", file=sys.stderr) + return None + + # This cmdlet stack is synchronous; use asyncio.run for clarity. + return asyncio.run( + download_soulseek_file( + username=username, + filename=filename, + output_dir=output_dir, + timeout=self.MAX_WAIT_TRANSFER, + ) + ) + + except RuntimeError: + # If we're already inside an event loop (e.g., TUI), fall back to a + # dedicated loop in this thread. + loop = asyncio.new_event_loop() + try: + asyncio.set_event_loop(loop) + return loop.run_until_complete( + download_soulseek_file( + username=username, + filename=filename, + output_dir=output_dir, + timeout=self.MAX_WAIT_TRANSFER, + ) + ) + finally: + try: + loop.close() + except Exception: + pass + + except Exception as exc: + log(f"[soulseek] Download error: {exc}", file=sys.stderr) + return None + + async def perform_search(self, query: str, timeout: float = 9.0, limit: int = 50) -> List[Dict[str, Any]]: + """Perform async Soulseek search.""" + + import os + from aioslsk.client import SoulSeekClient + from aioslsk.settings import CredentialsSettings, Settings + + os.makedirs(self.DOWNLOAD_DIR, exist_ok=True) + + settings = Settings(credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD)) + client = SoulSeekClient(settings) + + try: + await client.start() + await client.login() + except Exception as exc: + log(f"[soulseek] Login failed: {type(exc).__name__}: {exc}", file=sys.stderr) + return [] + + try: + search_request = await client.searches.search(query) + await self._collect_results(search_request, timeout=timeout) + return self._flatten_results(search_request)[:limit] + except Exception as exc: + log(f"[soulseek] Search error: {type(exc).__name__}: {exc}", file=sys.stderr) + return [] + finally: + try: + await client.stop() + except Exception: + pass + + def _flatten_results(self, search_request: Any) -> List[dict]: + flat: List[dict] = [] + for result in getattr(search_request, "results", []): + username = getattr(result, "username", "?") + + for file_data in getattr(result, "shared_items", []): + flat.append( + { + "file": file_data, + "username": username, + "filename": getattr(file_data, "filename", "?"), + "size": getattr(file_data, "filesize", 0), + } + ) + + for file_data in getattr(result, "locked_results", []): + flat.append( + { + "file": file_data, + "username": username, + "filename": getattr(file_data, "filename", "?"), + "size": getattr(file_data, "filesize", 0), + } + ) + + return flat + + async def _collect_results(self, search_request: Any, timeout: float = 75.0) -> None: + end = time.time() + timeout + last_count = 0 + while time.time() < end: + current_count = len(getattr(search_request, "results", [])) + if current_count > last_count: + debug(f"[soulseek] Got {current_count} result(s)...") + last_count = current_count + await asyncio.sleep(0.5) + + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[SearchResult]: + filters = filters or {} + + try: + flat_results = asyncio.run(self.perform_search(query, timeout=9.0, limit=limit)) + if not flat_results: + return [] + + music_results: List[dict] = [] + for item in flat_results: + filename = item["filename"] + ext = ("." + filename.rsplit(".", 1)[-1].lower()) if "." in filename else "" + if ext in self.MUSIC_EXTENSIONS: + music_results.append(item) + + if not music_results: + return [] + + enriched_results: List[dict] = [] + for item in music_results: + filename = item["filename"] + ext = ("." + filename.rsplit(".", 1)[-1].lower()) if "." in filename else "" + + display_name = filename.replace("\\", "/").split("/")[-1] + path_parts = filename.replace("\\", "/").split("/") + artist = path_parts[-3] if len(path_parts) >= 3 else "" + album = path_parts[-2] if len(path_parts) >= 3 else (path_parts[-2] if len(path_parts) == 2 else "") + + base_name = display_name.rsplit(".", 1)[0] if "." in display_name else display_name + track_num = "" + title = base_name + filename_artist = "" + + match = re.match(r"^(\d{1,3})\s*[\.\-]?\s+(.+)$", base_name) + if match: + track_num = match.group(1) + rest = match.group(2) + if " - " in rest: + filename_artist, title = rest.split(" - ", 1) + else: + title = rest + + if filename_artist: + artist = filename_artist + + enriched_results.append( + { + **item, + "artist": artist, + "album": album, + "title": title, + "track_num": track_num, + "ext": ext, + } + ) + + if filters: + artist_filter = (filters.get("artist", "") or "").lower() + album_filter = (filters.get("album", "") or "").lower() + track_filter = (filters.get("track", "") or "").lower() + + if artist_filter or album_filter or track_filter: + filtered: List[dict] = [] + for item in enriched_results: + if artist_filter and artist_filter not in item["artist"].lower(): + continue + if album_filter and album_filter not in item["album"].lower(): + continue + if track_filter and track_filter not in item["title"].lower(): + continue + filtered.append(item) + enriched_results = filtered + + enriched_results.sort(key=lambda item: (item["ext"].lower() != ".flac", -item["size"])) + + results: List[SearchResult] = [] + for item in enriched_results: + artist_display = item["artist"] if item["artist"] else "(no artist)" + album_display = item["album"] if item["album"] else "(no album)" + size_mb = int(item["size"] / 1024 / 1024) + + columns = [ + ("Track", item["track_num"] or "?"), + ("Title", item["title"][:40]), + ("Artist", artist_display[:32]), + ("Album", album_display[:32]), + ("Size", f"{size_mb} MB"), + ] + + results.append( + SearchResult( + table="soulseek", + title=item["title"], + path=item["filename"], + detail=f"{artist_display} - {album_display}", + annotations=[f"{size_mb} MB", item["ext"].lstrip(".").upper()], + media_kind="audio", + size_bytes=item["size"], + columns=columns, + full_metadata={ + "username": item["username"], + "filename": item["filename"], + "artist": item["artist"], + "album": item["album"], + "track_num": item["track_num"], + "ext": item["ext"], + }, + ) + ) + + return results + + except Exception as exc: + log(f"[soulseek] Search error: {exc}", file=sys.stderr) + return [] + + def validate(self) -> bool: + try: + from aioslsk.client import SoulSeekClient # noqa: F401 + + return True + except ImportError: + return False + + +async def download_soulseek_file( + username: str, + filename: str, + output_dir: Path = Path("./downloads"), + timeout: int = 1200, +) -> Optional[Path]: + """Download a file from a Soulseek peer.""" + + try: + from aioslsk.client import SoulSeekClient + from aioslsk.settings import CredentialsSettings, Settings + from aioslsk.transfer.model import Transfer, TransferDirection + from aioslsk.transfer.state import TransferState + + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + local_filename = filename.replace("\\", "/").split("/")[-1] + output_user_dir = output_dir / username + output_user_dir.mkdir(parents=True, exist_ok=True) + output_path = (output_user_dir / local_filename) + + if output_path.exists(): + base = output_path.stem + ext = output_path.suffix + counter = 1 + while output_path.exists(): + output_path = output_user_dir / f"{base}_{counter}{ext}" + counter += 1 + + output_path = output_path.resolve() + + settings = Settings(credentials=CredentialsSettings(username=Soulseek.USERNAME, password=Soulseek.PASSWORD)) + client = SoulSeekClient(settings) + + try: + await client.start() + await client.login() + debug(f"[soulseek] Logged in as {Soulseek.USERNAME}") + + debug(f"[soulseek] Requesting download from {username}: {filename}") + + transfer = await client.transfers.add(Transfer(username, filename, TransferDirection.DOWNLOAD)) + transfer.local_path = str(output_path) + await client.transfers.queue(transfer) + + start_time = time.time() + last_log_time = 0.0 + while not transfer.is_finalized(): + if time.time() - start_time > timeout: + log(f"[soulseek] Download timeout after {timeout}s", file=sys.stderr) + return None + + if time.time() - last_log_time >= 5.0 and transfer.bytes_transfered > 0: + progress = (transfer.bytes_transfered / transfer.filesize * 100) if transfer.filesize else 0 + debug( + f"[soulseek] Progress: {progress:.1f}% " + f"({transfer.bytes_transfered}/{transfer.filesize})" + ) + last_log_time = time.time() + + await asyncio.sleep(1) + + if transfer.state.VALUE == TransferState.COMPLETE and transfer.local_path: + downloaded_path = Path(transfer.local_path) + if downloaded_path.exists(): + debug(f"[soulseek] Download complete: {downloaded_path}") + return downloaded_path + + log(f"[soulseek] Transfer completed but file missing: {downloaded_path}", file=sys.stderr) + return None + + log( + f"[soulseek] Download failed: state={transfer.state.VALUE} " + f"bytes={transfer.bytes_transfered}/{transfer.filesize}", + file=sys.stderr, + ) + return None + + finally: + try: + await client.stop() + except Exception: + pass + + except ImportError: + log("[soulseek] aioslsk not installed. Install with: pip install aioslsk", file=sys.stderr) + return None + except Exception as exc: + log(f"[soulseek] Download failed: {type(exc).__name__}: {exc}", file=sys.stderr) + return None diff --git a/helper/unified_book_downloader.py b/Provider/unified_book_downloader.py similarity index 98% rename from helper/unified_book_downloader.py rename to Provider/unified_book_downloader.py index c4e5dad..ae4c298 100644 --- a/helper/unified_book_downloader.py +++ b/Provider/unified_book_downloader.py @@ -15,7 +15,7 @@ import requests from typing import Optional, Dict, Any, Tuple, List, Callable, cast from pathlib import Path -from helper.logger import debug +from SYS.logger import debug logger = logging.getLogger(__name__) @@ -40,7 +40,7 @@ class UnifiedBookDownloader: def _init_downloaders(self) -> None: """Initialize downloader functions from their modules.""" try: - from helper.archive_client import ( + from API.archive_client import ( check_direct_download, get_openlibrary_by_isbn, loan @@ -56,7 +56,7 @@ class UnifiedBookDownloader: self.loan_func = None try: - from helper.libgen_service import ( + from Provider.libgen_service import ( DEFAULT_LIMIT as _LIBGEN_DEFAULT_LIMIT, download_from_mirror as _libgen_download, search_libgen as _libgen_search, @@ -179,7 +179,7 @@ class UnifiedBookDownloader: def _has_archive_credentials(self) -> bool: """Check if Archive.org credentials are available.""" try: - from helper.archive_client import credential_openlibrary + from API.archive_client import credential_openlibrary email, password = credential_openlibrary(self.config) return bool(email and password) except Exception: @@ -352,7 +352,7 @@ class UnifiedBookDownloader: img2pdf merges pages into searchable PDF """ try: - from helper.archive_client import credential_openlibrary + from API.archive_client import credential_openlibrary book_id = method.get('book_id', '') @@ -562,7 +562,7 @@ class UnifiedBookDownloader: Returns tuple of (success: bool, filepath/message: str) """ try: - from helper.archive_client import login, loan, get_book_infos, download + from API.archive_client import login, loan, get_book_infos, download import tempfile import shutil diff --git a/Provider/youtube.py b/Provider/youtube.py new file mode 100644 index 0000000..b05a564 --- /dev/null +++ b/Provider/youtube.py @@ -0,0 +1,94 @@ +from __future__ import annotations + +import json +import shutil +import subprocess +import sys +from typing import Any, Dict, List, Optional + +from Provider._base import SearchProvider, SearchResult +from SYS.logger import log + + +class YouTube(SearchProvider): + """Search provider for YouTube using yt-dlp.""" + + def search( + self, + query: str, + limit: int = 10, + filters: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[SearchResult]: + ytdlp_path = shutil.which("yt-dlp") + if not ytdlp_path: + log("[youtube] yt-dlp not found in PATH", file=sys.stderr) + return [] + + search_query = f"ytsearch{limit}:{query}" + cmd = [ytdlp_path, "--dump-json", "--flat-playlist", "--no-warnings", search_query] + + try: + process = subprocess.run( + cmd, + capture_output=True, + text=True, + encoding="utf-8", + errors="replace", + ) + + if process.returncode != 0: + log(f"[youtube] yt-dlp failed: {process.stderr}", file=sys.stderr) + return [] + + results: List[SearchResult] = [] + for line in process.stdout.splitlines(): + if not line.strip(): + continue + + try: + video_data = json.loads(line) + except json.JSONDecodeError: + continue + + title = video_data.get("title", "Unknown") + video_id = video_data.get("id", "") + url = video_data.get("url") or f"https://youtube.com/watch?v={video_id}" + uploader = video_data.get("uploader", "Unknown") + duration = video_data.get("duration", 0) + view_count = video_data.get("view_count", 0) + + duration_str = f"{int(duration // 60)}:{int(duration % 60):02d}" if duration else "" + views_str = f"{view_count:,}" if view_count else "" + + results.append( + SearchResult( + table="youtube", + title=title, + path=url, + detail=f"By: {uploader}", + annotations=[duration_str, f"{views_str} views"], + media_kind="video", + columns=[ + ("Title", title), + ("Uploader", uploader), + ("Duration", duration_str), + ("Views", views_str), + ], + full_metadata={ + "video_id": video_id, + "uploader": uploader, + "duration": duration, + "view_count": view_count, + }, + ) + ) + + return results + + except Exception as exc: + log(f"[youtube] Error: {exc}", file=sys.stderr) + return [] + + def validate(self) -> bool: + return shutil.which("yt-dlp") is not None diff --git a/Provider/zeroxzero.py b/Provider/zeroxzero.py new file mode 100644 index 0000000..0a835ec --- /dev/null +++ b/Provider/zeroxzero.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import os +import sys +from typing import Any + +from Provider._base import FileProvider +from SYS.logger import log + + +class ZeroXZero(FileProvider): + """File provider for 0x0.st.""" + + def upload(self, file_path: str, **kwargs: Any) -> str: + from API.HTTP import HTTPClient + + if not os.path.exists(file_path): + raise FileNotFoundError(f"File not found: {file_path}") + + try: + headers = {"User-Agent": "Medeia-Macina/1.0"} + with HTTPClient(headers=headers) as client: + with open(file_path, "rb") as handle: + response = client.post("https://0x0.st", files={"file": handle}) + + if response.status_code == 200: + return response.text.strip() + + raise Exception(f"Upload failed: {response.status_code} - {response.text}") + + except Exception as exc: + log(f"[0x0] Upload error: {exc}", file=sys.stderr) + raise + + def validate(self) -> bool: + return True diff --git a/helper/background_notifier.py b/SYS/background_notifier.py similarity index 99% rename from helper/background_notifier.py rename to SYS/background_notifier.py index 1eb90dd..dd2938f 100644 --- a/helper/background_notifier.py +++ b/SYS/background_notifier.py @@ -10,7 +10,7 @@ from __future__ import annotations from typing import Any, Callable, Dict, Optional, Set -from helper.logger import log, debug +from SYS.logger import log, debug class BackgroundNotifier: diff --git a/helper/download.py b/SYS/download.py similarity index 99% rename from helper/download.py rename to SYS/download.py index 4d19ed5..d827347 100644 --- a/helper/download.py +++ b/SYS/download.py @@ -24,9 +24,9 @@ from urllib.parse import urljoin, urlparse import httpx -from helper.logger import log, debug -from .utils import ensure_directory, sha256_file -from .http_client import HTTPClient +from SYS.logger import log, debug +from SYS.utils import ensure_directory, sha256_file +from API.HTTP import HTTPClient from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLogger, ProgressBar try: diff --git a/helper/file_server.py b/SYS/file_server.py similarity index 100% rename from helper/file_server.py rename to SYS/file_server.py diff --git a/helper/logger.py b/SYS/logger.py similarity index 100% rename from helper/logger.py rename to SYS/logger.py diff --git a/helper/progress.py b/SYS/progress.py similarity index 65% rename from helper/progress.py rename to SYS/progress.py index ce9e6a0..8ce3c32 100644 --- a/helper/progress.py +++ b/SYS/progress.py @@ -3,25 +3,25 @@ import sys -from helper.logger import log, debug +from SYS.logger import log, debug def format_progress_bar(current: int, total: int, width: int = 40, label: str = "") -> str: """Create a text-based progress bar. - + Args: current: Current progress (bytes/items) total: Total to complete (bytes/items) width: Width of the bar in characters (default 40) label: Optional label prefix - + Returns: Formatted progress bar string - + Examples: format_progress_bar(50, 100) # Returns: "[████████████████░░░░░░░░░░░░░░░░░░░░] 50.0%" - + format_progress_bar(256*1024*1024, 1024*1024*1024, label="download.zip") # Returns: "download.zip: [████████░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░] 25.0%" """ @@ -31,30 +31,20 @@ def format_progress_bar(current: int, total: int, width: int = 40, label: str = else: percentage = (current / total) * 100 filled = int((current / total) * width) - - # Create bar: filled blocks + empty blocks + bar = "█" * filled + "░" * (width - filled) - - # Format percentage pct_str = f"{percentage:.1f}%" - - # Build result + if label: result = f"{label}: [{bar}] {pct_str}" else: result = f"[{bar}] {pct_str}" - + return result def format_size(bytes_val: float) -> str: - """Format bytes to human-readable size. - - Examples: - format_size(1024) -> "1.00 KB" - format_size(1024*1024) -> "1.00 MB" - format_size(1024*1024*1024) -> "1.00 GB" - """ + """Format bytes to human-readable size.""" for unit in ['B', 'KB', 'MB', 'GB', 'TB']: if bytes_val < 1024: return f"{bytes_val:.2f} {unit}" @@ -63,59 +53,30 @@ def format_size(bytes_val: float) -> str: def format_download_status(filename: str, current: int, total: int, speed: float = 0) -> str: - """Format download status with progress bar and details. - - Args: - filename: Name of file being downloaded - current: Current bytes downloaded - total: Total file size - speed: Download speed in bytes/sec - - Returns: - Formatted status line - - Examples: - format_download_status("movie.mkv", 512*1024*1024, 2*1024*1024*1024, 10*1024*1024) - # Returns: "movie.mkv: [████████████░░░░░░░░░░░░░░░░░░░░░░░░░░] 25.0% (512.00 MB / 2.00 GB @ 10.00 MB/s)" - """ + """Format download status with progress bar and details.""" bar = format_progress_bar(current, total, width=30) size_current = format_size(current) size_total = format_size(total) - + if speed > 0: speed_str = f" @ {format_size(speed)}/s" else: speed_str = "" - + return f"{bar} ({size_current} / {size_total}{speed_str})" def print_progress(filename: str, current: int, total: int, speed: float = 0, end: str = "\r") -> None: - """Print download progress to stderr (doesn't interfere with piped output). - - Args: - filename: File being downloaded - current: Current bytes - total: Total bytes - speed: Speed in bytes/sec - end: Line ending (default "\r" for overwriting, use "\n" for final) - """ + """Print download progress to stderr (doesn't interfere with piped output).""" status = format_download_status(filename, current, total, speed) debug(status, end=end, flush=True) def print_final_progress(filename: str, total: int, elapsed: float) -> None: - """Print final progress line (100%) with time elapsed. - - Args: - filename: File that was downloaded - total: Total size - elapsed: Time elapsed in seconds - """ + """Print final progress line (100%) with time elapsed.""" bar = format_progress_bar(total, total, width=30) size_str = format_size(total) - - # Format elapsed time + if elapsed < 60: time_str = f"{elapsed:.1f}s" elif elapsed < 3600: @@ -124,20 +85,18 @@ def print_final_progress(filename: str, total: int, elapsed: float) -> None: else: hours = elapsed / 3600 time_str = f"{hours:.2f}h" - + debug(f"{bar} ({size_str}) - {time_str}") if __name__ == "__main__": - # Demo import time - + log("Progress Bar Demo:", file=sys.stderr) - - # Demo 1: Simple progress + for i in range(101): print_progress("demo.bin", i * 10 * 1024 * 1024, 1024 * 1024 * 1024) time.sleep(0.02) - + print_final_progress("demo.bin", 1024 * 1024 * 1024, 2.0) log() diff --git a/helper/tasks.py b/SYS/tasks.py similarity index 99% rename from helper/tasks.py rename to SYS/tasks.py index c5fd61c..8a49945 100644 --- a/helper/tasks.py +++ b/SYS/tasks.py @@ -7,7 +7,7 @@ import socket import subprocess import sys -from helper.logger import log +from SYS.logger import log import threading import time from typing import IO, Iterable diff --git a/helper/utils.py b/SYS/utils.py similarity index 99% rename from helper/utils.py rename to SYS/utils.py index f7689fd..b676afe 100644 --- a/helper/utils.py +++ b/SYS/utils.py @@ -14,7 +14,7 @@ from dataclasses import dataclass, field from fnmatch import fnmatch from urllib.parse import urlparse -import helper.utils_constant +import SYS.utils_constant try: import cbor2 @@ -90,7 +90,7 @@ def create_metadata_sidecar(file_path: Path, metadata: dict) -> None: metadata['hash'] = sha256_file(file_path) metadata['size'] = Path(file_path).stat().st_size format_found = False - for mime_type, ext_map in helper.utils_constant.mime_maps.items(): + for mime_type, ext_map in SYS.utils_constant.mime_maps.items(): for key, info in ext_map.items(): if info.get("ext") == file_ext: metadata['type'] = mime_type diff --git a/helper/utils_constant.py b/SYS/utils_constant.py similarity index 97% rename from helper/utils_constant.py rename to SYS/utils_constant.py index cd106a5..f319f28 100644 --- a/helper/utils_constant.py +++ b/SYS/utils_constant.py @@ -81,22 +81,20 @@ mime_maps = { def get_type_from_ext(ext: str) -> str: """Determine the type (e.g., 'image', 'video', 'audio') from file extension. - + Args: ext: File extension (with or without leading dot, e.g., 'jpg' or '.jpg') - + Returns: Type string (e.g., 'image', 'video', 'audio') or 'other' if unknown """ if not ext: return 'other' - - # Normalize: remove leading dot and convert to lowercase + ext_clean = ext.lstrip('.').lower() - - # Search through mime_maps to find matching type + for type_name, extensions_dict in mime_maps.items(): if ext_clean in extensions_dict: return type_name - + return 'other' diff --git a/helper/worker_manager.py b/SYS/worker_manager.py similarity index 99% rename from helper/worker_manager.py rename to SYS/worker_manager.py index ab7f908..6549374 100644 --- a/helper/worker_manager.py +++ b/SYS/worker_manager.py @@ -11,8 +11,8 @@ from datetime import datetime from threading import Thread, Lock import time -from .folder_store import FolderDB -from helper.logger import log +from ..API.folder import API_folder_store +from SYS.logger import log logger = logging.getLogger(__name__) @@ -140,7 +140,7 @@ class Worker: class WorkerLoggingHandler(logging.StreamHandler): """Custom logging handler that captures logs for a worker.""" - def __init__(self, worker_id: str, db: FolderDB, + def __init__(self, worker_id: str, db: API_folder_store, manager: Optional['WorkerManager'] = None, buffer_size: int = 50): """Initialize the handler. @@ -235,7 +235,7 @@ class WorkerManager: auto_refresh_interval: Seconds between auto-refresh checks (0 = disabled) """ self.library_root = Path(library_root) - self.db = FolderDB(library_root) + self.db = API_folder_store(library_root) self.auto_refresh_interval = auto_refresh_interval self.refresh_callbacks: List[Callable] = [] self.refresh_thread: Optional[Thread] = None diff --git a/Store/Folder.py b/Store/Folder.py new file mode 100644 index 0000000..3a09376 --- /dev/null +++ b/Store/Folder.py @@ -0,0 +1,977 @@ +from __future__ import annotations + +import json +import re +import shutil +import sys +from fnmatch import translate +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +from SYS.logger import debug, log +from SYS.utils import sha256_file + +from Store._base import StoreBackend + + +def _normalize_hash(value: Any) -> Optional[str]: + candidate = str(value or '').strip().lower() + if len(candidate) != 64: + return None + if any(ch not in '0123456789abcdef' for ch in candidate): + return None + return candidate + + +def _resolve_file_hash(db_hash: Optional[str], file_path: Path) -> Optional[str]: + normalized = _normalize_hash(db_hash) if db_hash else None + if normalized: + return normalized + return _normalize_hash(file_path.stem) + + +class Folder(StoreBackend): + """""" + # Track which locations have already been migrated to avoid repeated migrations + _migrated_locations = set() + + def __init__(self, location: Optional[str] = None, name: Optional[str] = None) -> None: + self._location = location + self._name = name + + if self._location: + try: + from API.folder import API_folder_store + from pathlib import Path + location_path = Path(self._location).expanduser() + + # Use context manager to ensure connection is properly closed + with API_folder_store(location_path) as db: + if db.connection: + db.connection.commit() + + # Call migration and discovery at startup + Folder.migrate_location(self._location) + except Exception as exc: + debug(f"Failed to initialize database for '{name}': {exc}") + + @classmethod + def migrate_location(cls, location: Optional[str]) -> None: + """Migrate a location to hash-based storage (one-time operation, call explicitly at startup).""" + if not location: + return + + from pathlib import Path + location_path = Path(location).expanduser() + location_str = str(location_path) + + # Only migrate once per location + if location_str in cls._migrated_locations: + return + + cls._migrated_locations.add(location_str) + + # Create a temporary instance just to call the migration + temp_instance = cls(location=location) + temp_instance._migrate_to_hash_storage(location_path) + + def _migrate_to_hash_storage(self, location_path: Path) -> None: + """Migrate existing files from filename-based to hash-based storage. + + Checks for sidecars (.metadata, .tag) and imports them before renaming. + Also ensures all files have a title: tag. + """ + from API.folder import API_folder_store, read_sidecar, write_sidecar, find_sidecar + + try: + with API_folder_store(location_path) as db: + cursor = db.connection.cursor() + + # First pass: migrate filename-based files and add title tags + # Scan all files in the storage directory + for file_path in sorted(location_path.iterdir()): + if not file_path.is_file(): + continue + + # Skip database files and sidecars + if file_path.suffix in ('.db', '.metadata', '.tag', '-shm', '-wal'): + continue + # Also skip if the file ends with -shm or -wal (SQLite journal files) + if file_path.name.endswith(('-shm', '-wal')): + continue + + # Check if filename is already a hash (without extension) + if len(file_path.stem) == 64 and all(c in '0123456789abcdef' for c in file_path.stem.lower()): + continue # Already migrated, will process in second pass + + try: + # Compute file hash + file_hash = sha256_file(file_path) + # Preserve extension in the hash-based filename + file_ext = file_path.suffix # e.g., '.mp4' + hash_filename = file_hash + file_ext if file_ext else file_hash + hash_path = location_path / hash_filename + + # Check for sidecars and import them + sidecar_path = find_sidecar(file_path) + tags_to_add = [] + url_to_add = [] + has_title_tag = False + + if sidecar_path and sidecar_path.exists(): + try: + _, tags, url = read_sidecar(sidecar_path) + if tags: + tags_to_add = list(tags) + # Check if title tag exists + has_title_tag = any(t.lower().startswith('title:') for t in tags_to_add) + if url: + url_to_add = list(url) + debug(f"Found sidecar for {file_path.name}: {len(tags_to_add)} tags, {len(url_to_add)} url", file=sys.stderr) + # Delete the sidecar after importing + sidecar_path.unlink() + except Exception as exc: + debug(f"Failed to read sidecar for {file_path.name}: {exc}", file=sys.stderr) + + # Ensure there's a title tag (use original filename if not present) + if not has_title_tag: + tags_to_add.append(f"title:{file_path.name}") + + # Rename file to hash if needed + if hash_path != file_path and not hash_path.exists(): + debug(f"Migrating: {file_path.name} -> {hash_filename}", file=sys.stderr) + file_path.rename(hash_path) + + # Create or update database entry + db.get_or_create_file_entry(hash_path) + + # Save extension metadata + ext_clean = file_ext.lstrip('.') if file_ext else '' + db.save_metadata(hash_path, { + 'hash': file_hash, + 'ext': ext_clean, + 'size': hash_path.stat().st_size + }) + + # Add all tags (including title tag) + if tags_to_add: + db.save_tags(hash_path, tags_to_add) + debug(f"Added {len(tags_to_add)} tags to {file_hash}", file=sys.stderr) + + # Note: url would need a separate table if you want to store them + # For now, we're just noting them in debug + if url_to_add: + debug(f"Imported {len(url_to_add)} url for {file_hash}: {url_to_add}", file=sys.stderr) + + except Exception as exc: + debug(f"Failed to migrate file {file_path.name}: {exc}", file=sys.stderr) + + # Second pass: ensure all files in database have a title: tag + db.connection.commit() + cursor.execute(''' + SELECT f.hash, f.file_path + FROM files f + WHERE NOT EXISTS ( + SELECT 1 FROM tags t WHERE t.hash = f.hash AND LOWER(t.tag) LIKE 'title:%' + ) + ''') + files_without_title = cursor.fetchall() + + for file_hash, file_path_str in files_without_title: + try: + file_path = Path(file_path_str) + if file_path.exists(): + # Use the filename as the title + title_tag = f"title:{file_path.name}" + db.save_tags(file_path, [title_tag]) + debug(f"Added title tag to {file_path.name}", file=sys.stderr) + except Exception as exc: + debug(f"Failed to add title tag to file {file_path_str}: {exc}", file=sys.stderr) + + db.connection.commit() + + # Third pass: discover files on disk that aren't in the database yet + # These are hash-named files that were added after initial indexing + cursor.execute('SELECT LOWER(hash) FROM files') + db_hashes = {row[0] for row in cursor.fetchall()} + + discovered = 0 + for file_path in sorted(location_path.rglob("*")): + if file_path.is_file(): + # Check if file name (without extension) is a 64-char hex hash + name_without_ext = file_path.stem + if len(name_without_ext) == 64 and all(c in '0123456789abcdef' for c in name_without_ext.lower()): + file_hash = name_without_ext.lower() + + # Skip if already in DB + if file_hash in db_hashes: + continue + + try: + # Add file to DB (creates entry and auto-adds title: tag) + db.get_or_create_file_entry(file_path) + + # Save extension metadata + file_ext = file_path.suffix + ext_clean = file_ext.lstrip('.') if file_ext else '' + db.save_metadata(file_path, { + 'hash': file_hash, + 'ext': ext_clean, + 'size': file_path.stat().st_size + }) + + discovered += 1 + except Exception as e: + debug(f"Failed to discover file {file_path.name}: {e}", file=sys.stderr) + + if discovered > 0: + debug(f"Discovered and indexed {discovered} undiscovered files in {location_path.name}", file=sys.stderr) + db.connection.commit() + except Exception as exc: + debug(f"Migration to hash storage failed: {exc}", file=sys.stderr) + + + def location(self) -> str: + return self._location + + def name(self) -> str: + return self._name + + def add_file(self, file_path: Path, **kwargs: Any) -> str: + """Add file to local folder storage with full metadata support. + + Args: + file_path: Path to the file to add + move: If True, move file instead of copy (default: False) + tags: Optional list of tags to add + url: Optional list of url to associate with the file + title: Optional title (will be added as 'title:value' tag) + + Returns: + File hash (SHA256 hex string) as identifier + """ + move_file = bool(kwargs.get("move")) + tags = kwargs.get("tags", []) + url = kwargs.get("url", []) + title = kwargs.get("title") + + # Extract title from tags if not explicitly provided + if not title: + for tag in tags: + if isinstance(tag, str) and tag.lower().startswith("title:"): + title = tag.split(":", 1)[1].strip() + break + + # Fallback to filename if no title + if not title: + title = file_path.name + + # Ensure title is in tags + title_tag = f"title:{title}" + if not any(str(tag).lower().startswith("title:") for tag in tags): + tags = [title_tag] + list(tags) + + try: + file_hash = sha256_file(file_path) + debug(f"File hash: {file_hash}", file=sys.stderr) + + # Preserve extension in the stored filename + file_ext = file_path.suffix # e.g., '.mp4' + save_filename = file_hash + file_ext if file_ext else file_hash + save_file = Path(self._location) / save_filename + + # Check if file already exists + from API.folder import API_folder_store + with API_folder_store(Path(self._location)) as db: + existing_path = db.search_hash(file_hash) + if existing_path and existing_path.exists(): + log( + f"✓ File already in local storage: {existing_path}", + file=sys.stderr, + ) + # Still add tags and url if provided + if tags: + self.add_tag(file_hash, tags) + if url: + self.add_url(file_hash, url) + return file_hash + + # Move or copy file + if move_file: + shutil.move(str(file_path), str(save_file)) + debug(f"Local move: {save_file}", file=sys.stderr) + else: + shutil.copy2(str(file_path), str(save_file)) + debug(f"Local copy: {save_file}", file=sys.stderr) + + # Save to database + with API_folder_store(Path(self._location)) as db: + db.get_or_create_file_entry(save_file) + # Save metadata including extension + ext_clean = file_ext.lstrip('.') if file_ext else '' + db.save_metadata(save_file, { + 'hash': file_hash, + 'ext': ext_clean, + 'size': file_path.stat().st_size + }) + + # Add tags if provided + if tags: + self.add_tag(file_hash, tags) + + # Add url if provided + if url: + self.add_url(file_hash, url) + + log(f"✓ Added to local storage: {save_file.name}", file=sys.stderr) + return file_hash + + except Exception as exc: + log(f"❌ Local storage failed: {exc}", file=sys.stderr) + raise + + def search_store(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: + """Search local database for files by title tag or filename.""" + from fnmatch import fnmatch + from API.folder import DatabaseAPI + + limit = kwargs.get("limit") + try: + limit = int(limit) if limit is not None else None + except (TypeError, ValueError): + limit = None + if isinstance(limit, int) and limit <= 0: + limit = None + + query = query.lower() + query_lower = query # Ensure query_lower is defined for all code paths + match_all = query == "*" + results = [] + search_dir = Path(self._location).expanduser() + + tokens = [t.strip() for t in query.split(',') if t.strip()] + + if not match_all and len(tokens) == 1 and _normalize_hash(query): + debug("Hash queries require 'hash:' prefix for local search") + return results + + if not match_all and _normalize_hash(query): + debug("Hash queries require 'hash:' prefix for local search") + return results + + def _create_entry(file_path: Path, tags: list[str], size_bytes: int | None, db_hash: Optional[str]) -> dict[str, Any]: + path_str = str(file_path) + # Get title from tags if available, otherwise use hash as fallback + title = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None) + if not title: + # Fallback to hash if no title tag exists + hash_value = _resolve_file_hash(db_hash, file_path) + title = hash_value if hash_value else file_path.stem + + # Extract extension from file path + ext = file_path.suffix.lstrip('.') + if not ext: + # Fallback: try to extract from title (original filename might be in title) + title_path = Path(title) + ext = title_path.suffix.lstrip('.') + + # Build clean entry with only necessary fields + hash_value = _resolve_file_hash(db_hash, file_path) + entry = { + "title": title, + "ext": ext, + "path": path_str, + "target": path_str, + "store": self._name, + "size": size_bytes, + "hash": hash_value, + "tag": tags, + } + return entry + + try: + if not search_dir.exists(): + debug(f"Search directory does not exist: {search_dir}") + return results + + try: + with DatabaseAPI(search_dir) as api: + if tokens and len(tokens) > 1: + def _like_pattern(term: str) -> str: + return term.replace('*', '%').replace('?', '_') + + def _ids_for_token(token: str) -> set[int]: + token = token.strip() + if not token: + return set() + + if ':' in token and not token.startswith(':'): + namespace, pattern = token.split(':', 1) + namespace = namespace.strip().lower() + pattern = pattern.strip().lower() + + if namespace == 'hash': + normalized_hash = _normalize_hash(pattern) + if not normalized_hash: + return set() + h = api.get_file_hash_by_hash(normalized_hash) + return {h} if h else set() + + if namespace == 'store': + if pattern not in {'local', 'file', 'filesystem'}: + return set() + return api.get_all_file_hashes() + + query_pattern = f"{namespace}:%" + tag_rows = api.get_file_hashes_by_tag_pattern(query_pattern) + matched: set[str] = set() + for file_hash, tag_val in tag_rows: + if not tag_val: + continue + tag_lower = str(tag_val).lower() + if not tag_lower.startswith(f"{namespace}:"): + continue + value = tag_lower[len(namespace)+1:] + if fnmatch(value, pattern): + matched.add(file_hash) + return matched + + term = token.lower() + like_pattern = f"%{_like_pattern(term)}%" + hashes = api.get_file_hashes_by_path_pattern(like_pattern) + hashes.update(api.get_file_hashes_by_tag_substring(like_pattern)) + return hashes + + try: + matching_hashes: set[str] | None = None + for token in tokens: + hashes = _ids_for_token(token) + matching_hashes = hashes if matching_hashes is None else matching_hashes & hashes + if not matching_hashes: + return results + + if not matching_hashes: + return results + + rows = api.get_file_metadata(matching_hashes, limit) + for file_hash, file_path_str, size_bytes, ext in rows: + if not file_path_str: + continue + file_path = Path(file_path_str) + if not file_path.exists(): + continue + if size_bytes is None: + try: + size_bytes = file_path.stat().st_size + except OSError: + size_bytes = None + tags = api.get_tags_for_file(file_hash) + entry = _create_entry(file_path, tags, size_bytes, file_hash) + results.append(entry) + if limit is not None and len(results) >= limit: + return results + return results + except Exception as exc: + log(f"⚠️ AND search failed: {exc}", file=sys.stderr) + debug(f"AND search exception details: {exc}") + return [] + + if ":" in query and not query.startswith(":"): + namespace, pattern = query.split(":", 1) + namespace = namespace.strip().lower() + pattern = pattern.strip().lower() + debug(f"Performing namespace search: {namespace}:{pattern}") + + if namespace == "hash": + normalized_hash = _normalize_hash(pattern) + if not normalized_hash: + return results + h = api.get_file_hash_by_hash(normalized_hash) + hashes = {h} if h else set() + rows = api.get_file_metadata(hashes, limit) + for file_hash, file_path_str, size_bytes, ext in rows: + if not file_path_str: + continue + file_path = Path(file_path_str) + if not file_path.exists(): + continue + if size_bytes is None: + try: + size_bytes = file_path.stat().st_size + except OSError: + size_bytes = None + tags = api.get_tags_for_file(file_hash) + entry = _create_entry(file_path, tags, size_bytes, file_hash) + results.append(entry) + if limit is not None and len(results) >= limit: + return results + return results + + query_pattern = f"{namespace}:%" + rows = api.get_files_by_namespace_pattern(query_pattern, limit) + debug(f"Found {len(rows)} potential matches in DB") + + for file_hash, file_path_str, size_bytes, ext in rows: + if not file_path_str: + continue + + tags = api.get_tags_by_namespace_and_file(file_hash, query_pattern) + + for tag in tags: + tag_lower = tag.lower() + if tag_lower.startswith(f"{namespace}:"): + value = tag_lower[len(namespace)+1:] + if fnmatch(value, pattern): + file_path = Path(file_path_str) + if file_path.exists(): + if size_bytes is None: + size_bytes = file_path.stat().st_size + all_tags = api.get_tags_for_file(file_hash) + entry = _create_entry(file_path, all_tags, size_bytes, file_hash) + results.append(entry) + else: + debug(f"File missing on disk: {file_path}") + break + + if limit is not None and len(results) >= limit: + return results + elif not match_all: + terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()] + if not terms: + terms = [query_lower] + + debug(f"Performing filename/tag search for terms: {terms}") + + fetch_limit = (limit or 45) * 50 + + conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms] + params = [f"%{t}%" for t in terms] + + rows = api.get_files_by_multiple_path_conditions(conditions, params, fetch_limit) + debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)") + + word_regex = None + if len(terms) == 1: + term = terms[0] + has_wildcard = '*' in term or '?' in term + + if has_wildcard: + try: + from fnmatch import translate + word_regex = re.compile(translate(term), re.IGNORECASE) + except Exception: + word_regex = None + else: + try: + pattern = r'(?= limit: + return results + + if terms: + title_hits: dict[str, dict[str, Any]] = {} + for term in terms: + title_pattern = f"title:%{term}%" + title_rows = api.get_files_by_title_tag_pattern(title_pattern, fetch_limit) + for file_hash, file_path_str, size_bytes, ext in title_rows: + if not file_path_str: + continue + entry = title_hits.get(file_hash) + if entry: + entry["count"] += 1 + if size_bytes is not None: + entry["size"] = size_bytes + else: + title_hits[file_hash] = { + "path": file_path_str, + "size": size_bytes, + "hash": file_hash, + "count": 1, + } + + if title_hits: + required = len(terms) + for file_hash, info in title_hits.items(): + if info.get("count") != required: + continue + file_path_str = info.get("path") + if not file_path_str or file_path_str in seen_files: + continue + file_path = Path(file_path_str) + if not file_path.exists(): + continue + seen_files.add(file_path_str) + + size_bytes = info.get("size") + if size_bytes is None: + try: + size_bytes = file_path.stat().st_size + except OSError: + size_bytes = None + + tags = api.get_tags_for_file(file_hash) + entry = _create_entry(file_path, tags, size_bytes, info.get("hash")) + results.append(entry) + if limit is not None and len(results) >= limit: + return results + + query_pattern = f"%{query_lower}%" + tag_rows = api.get_files_by_simple_tag_pattern(query_pattern, limit) + + for file_hash, file_path_str, size_bytes, ext in tag_rows: + if not file_path_str or file_path_str in seen_files: + continue + seen_files.add(file_path_str) + + file_path = Path(file_path_str) + if file_path.exists(): + if size_bytes is None: + size_bytes = file_path.stat().st_size + + tags = api.get_tags_for_file(file_hash) + entry = _create_entry(file_path, tags, size_bytes, file_hash) + results.append(entry) + + if limit is not None and len(results) >= limit: + return results + + else: + rows = api.get_all_files(limit) + for file_hash, file_path_str, size_bytes, ext in rows: + if file_path_str: + file_path = Path(file_path_str) + if file_path.exists(): + if size_bytes is None: + size_bytes = file_path.stat().st_size + + tags = api.get_tags_for_file(file_hash) + entry = _create_entry(file_path, tags, size_bytes, file_hash) + results.append(entry) + + if results: + debug(f"Returning {len(results)} results from DB") + else: + debug("No results found in DB") + return results + + except Exception as e: + log(f"⚠️ Database search failed: {e}", file=sys.stderr) + debug(f"DB search exception details: {e}") + return [] + + except Exception as exc: + log(f"❌ Local search failed: {exc}", file=sys.stderr) + raise + + def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: + """Alias for search_file to match the interface expected by FileStorage.""" + return self.search_store(query, **kwargs) + + def _resolve_library_root(self, file_path: Path, config: Dict[str, Any]) -> Optional[Path]: + """Return the library root containing medios-macina.db. + + Prefer the store's configured location, then config override, then walk parents + of the file path to find a directory with medios-macina.db.""" + candidates: list[Path] = [] + if self._location: + candidates.append(Path(self._location).expanduser()) + cfg_root = get_local_storage_path(config) if config else None + if cfg_root: + candidates.append(Path(cfg_root).expanduser()) + + for root in candidates: + db_path = root / "medios-macina.db" + if db_path.exists(): + return root + + try: + for parent in [file_path] + list(file_path.parents): + db_path = parent / "medios-macina.db" + if db_path.exists(): + return parent + except Exception: + pass + return None + + def get_file(self, file_hash: str, **kwargs: Any) -> Optional[Path]: + """Retrieve file by hash, returning path to the file. + + Args: + file_hash: SHA256 hash of the file (64-char hex string) + + Returns: + Path to the file or None if not found + """ + try: + # Normalize the hash + normalized_hash = _normalize_hash(file_hash) + if not normalized_hash: + return None + + search_dir = Path(self._location).expanduser() + from API.folder import API_folder_store + + with API_folder_store(search_dir) as db: + # Search for file by hash + file_path = db.search_hash(normalized_hash) + + if file_path and file_path.exists(): + return file_path + + return None + + except Exception as exc: + debug(f"Failed to get file for hash {file_hash}: {exc}") + return None + + def get_metadata(self, file_hash: str) -> Optional[Dict[str, Any]]: + """Get metadata for a file from the database by hash. + + Args: + file_hash: SHA256 hash of the file (64-char hex string) + + Returns: + Dict with metadata fields (ext, size, hash, duration, etc.) or None if not found + """ + try: + # Normalize the hash + normalized_hash = _normalize_hash(file_hash) + if not normalized_hash: + return None + + search_dir = Path(self._location).expanduser() + from API.folder import DatabaseAPI + + with DatabaseAPI(search_dir) as api: + # Get file hash + file_hash_result = api.get_file_hash_by_hash(normalized_hash) + if not file_hash_result: + return None + + # Query metadata directly from database + cursor = api.get_cursor() + cursor.execute(""" + SELECT * FROM metadata WHERE hash = ? + """, (file_hash_result,)) + + row = cursor.fetchone() + if not row: + return None + + metadata = dict(row) + + # Canonicalize metadata keys (no legacy aliases) + if "file_path" in metadata and "path" not in metadata: + metadata["path"] = metadata.get("file_path") + metadata.pop("file_path", None) + + # Parse JSON fields + for field in ['url', 'relationships']: + if metadata.get(field): + try: + metadata[field] = json.loads(metadata[field]) + except (json.JSONDecodeError, TypeError): + metadata[field] = [] if field == 'url' else [] + + return metadata + except Exception as exc: + debug(f"Failed to get metadata for hash {file_hash}: {exc}") + return None + + def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: + """Get tags for a local file by hash. + + Returns: + Tuple of (tags_list, store_name) where store_name is the actual store name + """ + from API.folder import API_folder_store + try: + file_hash = file_identifier + if self._location: + try: + with API_folder_store(Path(self._location)) as db: + db_tags = db.get_tags(file_hash) + if db_tags: + # Return actual store name instead of generic "local_db" + store_name = self._name if self._name else "local" + return list(db_tags), store_name + except Exception as exc: + debug(f"Local DB lookup failed: {exc}") + return [], "unknown" + except Exception as exc: + debug(f"get_tags failed for local file: {exc}") + return [], "unknown" + + def add_tag(self, hash: str, tag: List[str], **kwargs: Any) -> bool: + """Add tags to a local file by hash (via API_folder_store). + + Handles namespace collapsing: when adding namespace:value, removes existing namespace:* tags. + Returns True if tags were successfully added. + """ + from API.folder import API_folder_store + try: + if not self._location: + return False + + try: + with API_folder_store(Path(self._location)) as db: + # Get existing tags + existing_tags = list(db.get_tags(hash) or []) + original_tags_lower = {t.lower() for t in existing_tags} + + # Merge new tags, handling namespace overwrites + for new_tag in tag: + if ':' in new_tag: + namespace = new_tag.split(':', 1)[0] + # Remove existing tags in same namespace + existing_tags = [t for t in existing_tags if not t.startswith(namespace + ':')] + # Add new tag if not already present (case-insensitive check) + if new_tag.lower() not in original_tags_lower: + existing_tags.append(new_tag) + + # Save merged tags + db.add_tags_to_hash(hash, existing_tags) + return True + except Exception as exc: + debug(f"Local DB add_tags failed: {exc}") + return False + except Exception as exc: + debug(f"add_tag failed for local file: {exc}") + return False + + def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: + """Remove tags from a local file by hash.""" + from API.folder import API_folder_store + try: + file_hash = file_identifier + if self._location: + try: + with API_folder_store(Path(self._location)) as db: + db.remove_tags_from_hash(file_hash, list(tags)) + return True + except Exception as exc: + debug(f"Local DB remove_tags failed: {exc}") + return False + except Exception as exc: + debug(f"delete_tag failed for local file: {exc}") + return False + + def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: + """Get known url for a local file by hash.""" + from API.folder import API_folder_store + try: + file_hash = file_identifier + if self._location: + try: + with API_folder_store(Path(self._location)) as db: + meta = db.get_metadata(file_hash) or {} + return list(meta.get("url") or []) + except Exception as exc: + debug(f"Local DB get_metadata failed: {exc}") + return [] + except Exception as exc: + debug(f"get_url failed for local file: {exc}") + return [] + + def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: + """Add known url to a local file by hash.""" + from API.folder import API_folder_store + try: + file_hash = file_identifier + if self._location: + try: + with API_folder_store(Path(self._location)) as db: + meta = db.get_metadata(file_hash) or {} + existing_urls = list(meta.get("url") or []) + changed = False + for u in list(url or []): + if not u: + continue + if u not in existing_urls: + existing_urls.append(u) + changed = True + if changed: + db.update_metadata_by_hash(file_hash, {"url": existing_urls}) + return True + except Exception as exc: + debug(f"Local DB add_url failed: {exc}") + return False + except Exception as exc: + debug(f"add_url failed for local file: {exc}") + return False + + def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: + """Delete known url from a local file by hash.""" + from API.folder import API_folder_store + try: + file_hash = file_identifier + if self._location: + try: + with API_folder_store(Path(self._location)) as db: + meta = db.get_metadata(file_hash) or {} + existing_urls = list(meta.get("url") or []) + remove_set = {u for u in (url or []) if u} + if not remove_set: + return False + new_urls = [u for u in existing_urls if u not in remove_set] + if new_urls != existing_urls: + db.update_metadata_by_hash(file_hash, {"url": new_urls}) + return True + except Exception as exc: + debug(f"Local DB delete_url failed: {exc}") + return False + except Exception as exc: + debug(f"delete_url failed for local file: {exc}") + return False + + def delete_file(self, file_identifier: str, **kwargs: Any) -> bool: + """Delete a file from the folder store. + + Args: + file_identifier: The file path (as string) or hash of the file to delete + **kwargs: Optional parameters + + Returns: + True if deletion succeeded, False otherwise + """ + from API.folder import API_folder_store + try: + file_path = Path(file_identifier) + + # Delete from database + with API_folder_store(Path(self._location)) as db: + db.delete_file(file_path) + + # Delete the actual file from disk + if file_path.exists(): + file_path.unlink() + debug(f"Deleted file: {file_path}") + return True + else: + debug(f"File not found on disk: {file_path}") + return True # Already gone + except Exception as exc: + debug(f"delete_file failed: {exc}") + return False diff --git a/Store/HydrusNetwork.py b/Store/HydrusNetwork.py new file mode 100644 index 0000000..2f47694 --- /dev/null +++ b/Store/HydrusNetwork.py @@ -0,0 +1,597 @@ +from __future__ import annotations + +import re +import sys +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +from SYS.logger import debug, log +from SYS.utils_constant import mime_maps + +from Store._base import StoreBackend + + +class HydrusNetwork(StoreBackend): + """File storage backend for Hydrus client. + + Each instance represents a specific Hydrus client connection. + Maintains its own HydrusClient with session key. + """ + + def __init__(self, instance_name: str, api_key: str, url: str) -> None: + """Initialize Hydrus storage backend. + + Args: + instance_name: Name of this Hydrus instance (e.g., 'home', 'work') + api_key: Hydrus Client API access key + url: Hydrus client URL (e.g., 'http://192.168.1.230:45869') + """ + from API.HydrusNetwork import HydrusClient + + self._instance_name = instance_name + self._api_key = api_key + self._url = url + # Create persistent client with session key for this instance + self._client = HydrusClient(url=url, access_key=api_key) + + def name(self) -> str: + return self._instance_name + + def get_name(self) -> str: + return self._instance_name + + def add_file(self, file_path: Path, **kwargs: Any) -> str: + """Upload file to Hydrus with full metadata support. + + Args: + file_path: Path to the file to upload + tags: Optional list of tags to add + url: Optional list of url to associate with the file + title: Optional title (will be added as 'title:value' tag) + + Returns: + File hash from Hydrus + + Raises: + Exception: If upload fails + """ + from SYS.utils import sha256_file + + tags = kwargs.get("tags", []) + url = kwargs.get("url", []) + title = kwargs.get("title") + + # Add title to tags if provided and not already present + if title: + title_tag = f"title:{title}" + if not any(str(tag).lower().startswith("title:") for tag in tags): + tags = [title_tag] + list(tags) + + try: + # Compute file hash + file_hash = sha256_file(file_path) + debug(f"File hash: {file_hash}") + + # Use persistent client with session key + client = self._client + if client is None: + raise Exception("Hydrus client unavailable") + + # Check if file already exists in Hydrus + file_exists = False + try: + metadata = client.fetch_file_metadata(hashes=[file_hash]) + if metadata and isinstance(metadata, dict): + files = metadata.get("file_metadata", []) + if files: + file_exists = True + log( + f"ℹ️ Duplicate detected - file already in Hydrus with hash: {file_hash}", + file=sys.stderr, + ) + except Exception: + pass + + # Upload file if not already present + if not file_exists: + log(f"Uploading to Hydrus: {file_path.name}", file=sys.stderr) + response = client.add_file(file_path) + + # Extract hash from response + hydrus_hash: Optional[str] = None + if isinstance(response, dict): + hydrus_hash = response.get("hash") or response.get("file_hash") + if not hydrus_hash: + hashes = response.get("hashes") + if isinstance(hashes, list) and hashes: + hydrus_hash = hashes[0] + + if not hydrus_hash: + raise Exception(f"Hydrus response missing file hash: {response}") + + file_hash = hydrus_hash + log(f"Hydrus: {file_hash}", file=sys.stderr) + + # Add tags if provided (both for new and existing files) + if tags: + try: + # Use default tag service + service_name = "my tags" + except Exception: + service_name = "my tags" + + try: + debug(f"Adding {len(tags)} tag(s) to Hydrus: {tags}") + client.add_tags(file_hash, tags, service_name) + log(f"Tags added via '{service_name}'", file=sys.stderr) + except Exception as exc: + log(f"⚠️ Failed to add tags: {exc}", file=sys.stderr) + + # Associate url if provided (both for new and existing files) + if url: + log(f"Associating {len(url)} URL(s) with file", file=sys.stderr) + for url in url: + if url: + try: + client.associate_url(file_hash, str(url)) + debug(f"Associated URL: {url}") + except Exception as exc: + log(f"⚠️ Failed to associate URL {url}: {exc}", file=sys.stderr) + + return file_hash + + except Exception as exc: + log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr) + raise + + def search_store(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: + """Search Hydrus database for files matching query. + + Args: + query: Search query (tags, filenames, hashes, etc.) + limit: Maximum number of results to return (default: 100) + + Returns: + List of dicts with 'name', 'hash', 'size', 'tags' fields + + Example: + results = storage["hydrus"].search("artist:john_doe music") + results = storage["hydrus"].search("Simple Man") + """ + limit = kwargs.get("limit", 100) + + try: + client = self._client + if client is None: + raise Exception("Hydrus client unavailable") + + debug(f"Searching Hydrus for: {query}") + + # Parse the query into tags + # Handle both simple tags and complex queries + # "*" means "match all" - use system:everything tag in Hydrus + if query.strip() == "*": + # Use system:everything to match all files in Hydrus + tags = ["system:everything"] + else: + query_lower = query.lower().strip() + # If query doesn't have a namespace (no ':'), search all files and filter by title/tags + # If query has explicit namespace, use it as a tag search + if ':' not in query_lower: + # No namespace provided: search all files, then filter by title/tags containing the query + tags = ["system:everything"] + else: + # User provided explicit namespace (e.g., "creator:john" or "system:has_audio") + # Use it as a tag search + tags = [query_lower] + + if not tags: + debug(f"Found 0 result(s)") + return [] + + # Search files with the tags + search_result = client.search_files( + tags=tags, + return_hashes=True, + return_file_ids=True + ) + + # Extract file IDs from search result + file_ids = search_result.get("file_ids", []) + hashes = search_result.get("hashes", []) + + if not file_ids and not hashes: + debug(f"Found 0 result(s)") + return [] + + # Fetch metadata for the found files + results = [] + query_lower = query.lower().strip() + # Split by comma or space for AND logic + search_terms = set(query_lower.replace(',', ' ').split()) # For substring matching + + if file_ids: + metadata = client.fetch_file_metadata(file_ids=file_ids) + metadata_list = metadata.get("metadata", []) + + for meta in metadata_list: + if len(results) >= limit: + break + + file_id = meta.get("file_id") + hash_hex = meta.get("hash") + size = meta.get("size", 0) + + # Get tags for this file and extract title + tags_set = meta.get("tags", {}) + all_tags = [] + title = f"Hydrus File {file_id}" # Default fallback + all_tags_str = "" # For substring matching + + # debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}") + + if isinstance(tags_set, dict): + # Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen + def _collect(tag_list: Any) -> None: + nonlocal title, all_tags_str + if not isinstance(tag_list, list): + return + for tag in tag_list: + tag_text = str(tag) if tag else "" + if not tag_text: + continue + all_tags.append(tag_text) + all_tags_str += " " + tag_text.lower() + if tag_text.lower().startswith("title:") and title == f"Hydrus File {file_id}": + title = tag_text.split(":", 1)[1].strip() + + for _service_name, service_tags in tags_set.items(): + if not isinstance(service_tags, dict): + continue + + storage_tags = service_tags.get("storage_tags", {}) + if isinstance(storage_tags, dict): + for tag_list in storage_tags.values(): + _collect(tag_list) + + display_tags = service_tags.get("display_tags", []) + _collect(display_tags) + + # Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it) + top_level_tags = meta.get("tags_flat", []) or meta.get("tags", []) + _collect(top_level_tags) + + # Resolve extension from MIME type + mime_type = meta.get("mime") + ext = "" + if mime_type: + for category in mime_maps.values(): + for _ext_key, info in category.items(): + if mime_type in info.get("mimes", []): + ext = info.get("ext", "").lstrip('.') + break + if ext: + break + + # Filter results based on query type + # If user provided explicit namespace (has ':'), don't do substring filtering + # Just include what the tag search returned + has_namespace = ':' in query_lower + + if has_namespace: + # Explicit namespace search - already filtered by Hydrus tag search + # Include this result as-is + file_url = f"{self._url.rstrip('/')}/get_files/file?hash={hash_hex}" + results.append({ + "hash": hash_hex, + "url": file_url, + "name": title, + "title": title, + "size": size, + "size_bytes": size, + "store": self._instance_name, + "tags": all_tags, + "file_id": file_id, + "mime": mime_type, + "ext": ext, + }) + else: + # Free-form search: check if search terms match the title or tags + # Match if ALL search terms are found in title or tags (AND logic) + # AND use whole word matching + + # Combine title and tags for searching + searchable_text = (title + " " + all_tags_str).lower() + + match = True + if query_lower != "*": + for term in search_terms: + # Regex for whole word: \bterm\b + # Escape term to handle special chars + pattern = r'\b' + re.escape(term) + r'\b' + if not re.search(pattern, searchable_text): + match = False + break + + if match: + file_url = f"{self._url.rstrip('/')}/get_files/file?hash={hash_hex}" + results.append({ + "hash": hash_hex, + "url": file_url, + "name": title, + "title": title, + "size": size, + "size_bytes": size, + "store": self._instance_name, + "tags": all_tags, + "file_id": file_id, + "mime": mime_type, + "ext": ext, + }) + + debug(f"Found {len(results)} result(s)") + return results[:limit] + + except Exception as exc: + log(f"❌ Hydrus search failed: {exc}", file=sys.stderr) + import traceback + traceback.print_exc(file=sys.stderr) + raise + + def get_file(self, file_hash: str, **kwargs: Any) -> Path | str | None: + """Open file in browser via Hydrus client API URL.""" + import webbrowser + + debug(f"[HydrusNetwork.get_file] Starting for hash: {file_hash[:12]}...") + + # Build browser URL with access key + base_url = self._client.url.rstrip('/') + access_key = self._client.access_key + browser_url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}" + debug(f"[HydrusNetwork.get_file] Opening URL: {browser_url}") + + # Open in default browser + webbrowser.open(browser_url) + debug(f"[HydrusNetwork.get_file] Browser opened successfully") + + # Return the URL string instead of downloading + debug(f"[HydrusNetwork.get_file] Returning URL: {browser_url}") + return browser_url + + def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]: + """Get metadata for a file from Hydrus by hash. + + Args: + file_hash: SHA256 hash of the file (64-char hex string) + + Returns: + Dict with metadata fields or None if not found + """ + try: + client = self._client + if not client: + debug("get_metadata: Hydrus client unavailable") + return None + + # Fetch file metadata + payload = client.fetch_file_metadata(hashes=[file_hash], include_service_keys_to_tags=True) + + if not payload or not payload.get("metadata"): + return None + + meta = payload["metadata"][0] + + # Extract title from tags + title = f"Hydrus_{file_hash[:12]}" + tags_payload = meta.get("tags", {}) + if isinstance(tags_payload, dict): + for service_data in tags_payload.values(): + if isinstance(service_data, dict): + display_tags = service_data.get("display_tags", {}) + if isinstance(display_tags, dict): + current_tags = display_tags.get("0", []) + if isinstance(current_tags, list): + for tag in current_tags: + if str(tag).lower().startswith("title:"): + title = tag.split(":", 1)[1].strip() + break + if title != f"Hydrus_{file_hash[:12]}": + break + + # Determine extension from mime type + mime_type = meta.get("mime", "") + ext = "" + if mime_type: + from SYS.utils_constant import mime_maps + for _category, extensions in mime_maps.items(): + for extension, mime in extensions.items(): + if mime == mime_type: + ext = extension.lstrip(".") + break + if ext: + break + + return { + "hash": file_hash, + "title": title, + "ext": ext, + "size": meta.get("size", 0), + "mime": mime_type, + } + + except Exception as exc: + debug(f"Failed to get metadata from Hydrus: {exc}") + return None + + def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: + """Get tags for a file from Hydrus by hash. + + Args: + file_identifier: File hash (SHA256 hex string) + **kwargs: Optional service_name parameter + + Returns: + Tuple of (tags_list, source_description) + where source is always "hydrus" + """ + try: + from API import HydrusNetwork as hydrus_wrapper + + file_hash = str(file_identifier) + + # Get Hydrus client and service info + client = self._client + if not client: + debug("get_tags: Hydrus client unavailable") + return [], "unknown" + + # Fetch file metadata + payload = client.fetch_file_metadata( + hashes=[file_hash], + include_service_keys_to_tags=True, + include_file_url=False + ) + + items = payload.get("metadata") if isinstance(payload, dict) else None + if not isinstance(items, list) or not items: + debug(f"get_tags: No metadata returned for hash {file_hash}") + return [], "unknown" + + meta = items[0] if isinstance(items[0], dict) else None + if not isinstance(meta, dict) or meta.get("file_id") is None: + debug(f"get_tags: Invalid metadata for hash {file_hash}") + return [], "unknown" + + # Extract tags using service name + service_name = "my tags" + service_key = hydrus_wrapper.get_tag_service_key(client, service_name) + + # Extract tags from metadata + tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name) + + return tags, "hydrus" + + except Exception as exc: + debug(f"get_tags failed for Hydrus file: {exc}") + return [], "unknown" + + def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: + """Add tags to a Hydrus file. + """ + try: + client = self._client + if client is None: + debug("add_tag: Hydrus client unavailable") + return False + service_name = kwargs.get("service_name") or "my tags" + # Ensure tags is a list + tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] + if not tag_list: + return False + client.add_tags(file_identifier, tag_list, service_name) + return True + except Exception as exc: + debug(f"Hydrus add_tag failed: {exc}") + return False + + def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: + """Delete tags from a Hydrus file. + """ + try: + client = self._client + if client is None: + debug("delete_tag: Hydrus client unavailable") + return False + service_name = kwargs.get("service_name") or "my tags" + tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] + if not tag_list: + return False + client.delete_tags(file_identifier, tag_list, service_name) + return True + except Exception as exc: + debug(f"Hydrus delete_tag failed: {exc}") + return False + + def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: + """Get known url for a Hydrus file. + """ + try: + client = self._client + if client is None: + debug("get_url: Hydrus client unavailable") + return [] + payload = client.fetch_file_metadata(hashes=[str(file_identifier)], include_file_url=True) + items = payload.get("metadata") if isinstance(payload, dict) else None + if not isinstance(items, list) or not items: + return [] + meta = items[0] + url = meta.get("url") or [] + return list(url) + except Exception as exc: + debug(f"Hydrus get_url failed: {exc}") + return [] + + def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: + """Associate one or more url with a Hydrus file. + """ + try: + client = self._client + if client is None: + debug("add_url: Hydrus client unavailable") + return False + for u in url: + client.associate_url(file_identifier, u) + return True + except Exception as exc: + debug(f"Hydrus add_url failed: {exc}") + return False + + def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: + """Delete one or more url from a Hydrus file. + """ + try: + client = self._client + if client is None: + debug("delete_url: Hydrus client unavailable") + return False + for u in url: + client.delete_url(file_identifier, u) + return True + except Exception as exc: + debug(f"Hydrus delete_url failed: {exc}") + return False + + @staticmethod + def _extract_tags_from_hydrus_meta( + meta: Dict[str, Any], + service_key: Optional[str], + service_name: str + ) -> List[str]: + """Extract current tags from Hydrus metadata dict. + + Prefers display_tags (includes siblings/parents, excludes deleted). + Falls back to storage_tags status '0' (current). + """ + tags_payload = meta.get("tags") + if not isinstance(tags_payload, dict): + return [] + + svc_data = None + if service_key: + svc_data = tags_payload.get(service_key) + if not isinstance(svc_data, dict): + return [] + + # Prefer display_tags (Hydrus computes siblings/parents) + display = svc_data.get("display_tags") + if isinstance(display, list) and display: + return [str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip()] + + # Fallback to storage_tags status '0' (current) + storage = svc_data.get("storage_tags") + if isinstance(storage, dict): + current_list = storage.get("0") or storage.get(0) + if isinstance(current_list, list): + return [str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()] + + return [] diff --git a/Store/__init__.py b/Store/__init__.py new file mode 100644 index 0000000..8f9438a --- /dev/null +++ b/Store/__init__.py @@ -0,0 +1,7 @@ +from Store._base import StoreBackend +from Store.registry import Store + +__all__ = [ + "StoreBackend", + "Store", +] diff --git a/Store/_base.py b/Store/_base.py new file mode 100644 index 0000000..6b8bcf0 --- /dev/null +++ b/Store/_base.py @@ -0,0 +1,55 @@ +"""Store backend base types. + +Concrete store implementations live in the `Store/` package. +""" + +from __future__ import annotations + +from abc import ABC, abstractmethod +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + + +class StoreBackend(ABC): + @abstractmethod + def add_file(self, file_path: Path, **kwargs: Any) -> str: + raise NotImplementedError + + @abstractmethod + def name(self) -> str: + raise NotImplementedError + + def search_store(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: + raise NotImplementedError(f"{self.name()} backend does not support searching") + + @abstractmethod + def get_file(self, file_hash: str, **kwargs: Any) -> Path | str | None: + raise NotImplementedError + + @abstractmethod + def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]: + raise NotImplementedError + + @abstractmethod + def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: + raise NotImplementedError + + @abstractmethod + def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: + raise NotImplementedError + + @abstractmethod + def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: + raise NotImplementedError + + @abstractmethod + def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: + raise NotImplementedError + + @abstractmethod + def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: + raise NotImplementedError + + @abstractmethod + def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: + raise NotImplementedError diff --git a/Store/registry.py b/Store/registry.py new file mode 100644 index 0000000..f6ff96b --- /dev/null +++ b/Store/registry.py @@ -0,0 +1,99 @@ +"""Store registry. + +Concrete store implementations live in the `Store/` package. +This module is the single source of truth for store discovery. + +Config schema (canonical): + +{ + "store": { + "folder": { + "default": {"path": "C:/Media"}, + "test": {"path": "C:/Temp"} + }, + "hydrusnetwork": { + "home": {"Hydrus-Client-API-Access-Key": "...", "url": "http://..."} + } + } +} +""" + +from __future__ import annotations + +from pathlib import Path +from typing import Any, Dict, Optional + +from SYS.logger import debug + +from Store._base import StoreBackend +from Store.Folder import Folder +from Store.HydrusNetwork import HydrusNetwork + + +class Store: + def __init__(self, config: Optional[Dict[str, Any]] = None, suppress_debug: bool = False) -> None: + self._config = config or {} + self._suppress_debug = suppress_debug + self._backends: Dict[str, StoreBackend] = {} + self._load_backends() + + def _load_backends(self) -> None: + store_cfg = self._config.get("store") + if not isinstance(store_cfg, dict): + store_cfg = {} + + folder_cfg = store_cfg.get("folder") + if isinstance(folder_cfg, dict): + for name, value in folder_cfg.items(): + path_val: Optional[str] + if isinstance(value, dict): + path_val = value.get("path") + elif isinstance(value, (str, bytes)): + path_val = str(value) + else: + path_val = None + + if not path_val: + continue + + location = str(Path(str(path_val)).expanduser()) + self._backends[str(name)] = Folder(location=location, name=str(name)) + + hydrus_cfg = store_cfg.get("hydrusnetwork") + if isinstance(hydrus_cfg, dict): + for instance_name, instance_config in hydrus_cfg.items(): + if not isinstance(instance_config, dict): + continue + + api_key = instance_config.get("Hydrus-Client-API-Access-Key") + url = instance_config.get("url") + if not api_key or not url: + continue + + try: + self._backends[str(instance_name)] = HydrusNetwork( + instance_name=str(instance_name), + api_key=str(api_key), + url=str(url), + ) + except Exception as exc: + if not self._suppress_debug: + debug(f"[Store] Failed to register Hydrus instance '{instance_name}': {exc}") + + def list_backends(self) -> list[str]: + return sorted(self._backends.keys()) + + def list_searchable_backends(self) -> list[str]: + searchable: list[str] = [] + for name, backend in self._backends.items(): + if type(backend).search_store is not StoreBackend.search_store: + searchable.append(name) + return sorted(searchable) + + def __getitem__(self, backend_name: str) -> StoreBackend: + if backend_name not in self._backends: + raise KeyError(f"Unknown store backend: {backend_name}. Available: {list(self._backends.keys())}") + return self._backends[backend_name] + + def is_available(self, backend_name: str) -> bool: + return backend_name in self._backends diff --git a/TUI/modalscreen/download.py b/TUI/modalscreen/download.py index ca2fe86..181b68f 100644 --- a/TUI/modalscreen/download.py +++ b/TUI/modalscreen/download.py @@ -18,7 +18,7 @@ from typing import Optional, Callable, Any from pathlib import Path import sys -from helper.logger import log +from SYS.logger import log import json # Add parent directory to path for imports @@ -327,7 +327,7 @@ class DownloadModal(ModalScreen): else: # Fallback if helper not available import uuid - from helper.worker_manager import Worker + from SYS.worker_manager import Worker worker_id = f"dl_{uuid.uuid4().hex[:8]}" worker = Worker(worker_id, "download", f"Download: {url[:50]}", f"Tags: {', '.join(tags) if tags else 'None'}", None) @@ -688,7 +688,6 @@ class DownloadModal(ModalScreen): 'target': str(filepath), 'path': str(filepath), 'media_kind': 'audio', - 'hash_hex': None, 'hash': None, 'url': [], 'title': filepath_obj.stem diff --git a/TUI/modalscreen/export.py b/TUI/modalscreen/export.py index 104482a..fcd0ebe 100644 --- a/TUI/modalscreen/export.py +++ b/TUI/modalscreen/export.py @@ -15,7 +15,7 @@ from datetime import datetime # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent.parent)) -from helper.utils import format_metadata_value +from SYS.utils import format_metadata_value from config import load_config logger = logging.getLogger(__name__) @@ -69,7 +69,7 @@ class ExportModal(ModalScreen): """ ext_lower = ext.lower() if ext else '' - from helper.utils_constant import mime_maps + from SYS.utils_constant import mime_maps found_type = "unknown" @@ -404,9 +404,9 @@ class ExportModal(ModalScreen): metadata = self.result_data.get('metadata', {}) # Extract file source info from result_data (passed by hub-ui) - file_hash = self.result_data.get('hash') or self.result_data.get('file_hash') - file_url = self.result_data.get('url') or self.result_data.get('file_url') - file_path = self.result_data.get('path') or self.result_data.get('file_path') # For local files + file_hash = self.result_data.get('hash') + file_url = self.result_data.get('url') + file_path = self.result_data.get('path') source = self.result_data.get('source', 'unknown') # Prepare export data @@ -420,11 +420,8 @@ class ExportModal(ModalScreen): 'metadata': metadata, 'original_data': self.result_data, 'hash': file_hash, - 'file_hash': file_hash, 'url': file_url, - 'file_url': file_url, 'path': file_path, - 'file_path': file_path, # Pass file path for local files 'source': source, } diff --git a/TUI/modalscreen/search.py b/TUI/modalscreen/search.py index a180e11..d293e5b 100644 --- a/TUI/modalscreen/search.py +++ b/TUI/modalscreen/search.py @@ -16,7 +16,7 @@ import asyncio sys.path.insert(0, str(Path(__file__).parent.parent)) from config import load_config from result_table import ResultTable -from helper.provider import get_provider +from Provider.registry import get_search_provider logger = logging.getLogger(__name__) @@ -158,7 +158,7 @@ class SearchModal(ModalScreen): self.current_worker.log_step(f"Connecting to {source}...") try: - provider = get_provider(source) + provider = get_search_provider(source) if not provider: logger.error(f"[search-modal] Provider not available: {source}") if self.current_worker: @@ -282,7 +282,7 @@ class SearchModal(ModalScreen): # Handle both SearchResult objects and dicts if hasattr(result, 'full_metadata'): metadata = result.full_metadata or {} - source = result.origin + source = result.table title = result.title else: # Handle dict (legacy or from to_dict) @@ -293,7 +293,7 @@ class SearchModal(ModalScreen): else: metadata = result - source = result.get('origin', result.get('source', '')) + source = result.get('table', '') title = result.get('title', '') # Format tags based on result source @@ -332,7 +332,7 @@ class SearchModal(ModalScreen): async def _download_book(self, result: Any) -> None: """Download a book from OpenLibrary using unified downloader.""" try: - from helper.unified_book_downloader import UnifiedBookDownloader + from Provider.unified_book_downloader import UnifiedBookDownloader from config import load_config # Convert SearchResult to dict if needed diff --git a/TUI/pipeline_runner.py b/TUI/pipeline_runner.py index c38b5ea..aa67cca 100644 --- a/TUI/pipeline_runner.py +++ b/TUI/pipeline_runner.py @@ -25,7 +25,7 @@ for path in (ROOT_DIR, BASE_DIR): import pipeline as ctx from cmdlets import REGISTRY from config import get_local_storage_path, load_config -from helper.worker_manager import WorkerManager +from SYS.worker_manager import WorkerManager try: # Reuse the CLI selection parser instead of reimplementing it. from CLI import _parse_selection_syntax diff --git a/cmdlets/__init__.py b/cmdlets/__init__.py index 6898ec3..551f1a5 100644 --- a/cmdlets/__init__.py +++ b/cmdlets/__init__.py @@ -72,6 +72,6 @@ for _root_mod in ("select_cmdlet",): # Also import helper modules that register cmdlets try: - import helper.alldebrid as _alldebrid + import API.alldebrid as _alldebrid except Exception: pass diff --git a/cmdlets/_shared.py b/cmdlets/_shared.py index e22fc31..3efbc5f 100644 --- a/cmdlets/_shared.py +++ b/cmdlets/_shared.py @@ -11,7 +11,7 @@ import sys import inspect from collections.abc import Iterable as IterableABC -from helper.logger import log, debug +from SYS.logger import log, debug from pathlib import Path from typing import Any, Dict, Iterable, List, Optional, Sequence, Set from dataclasses import dataclass, field @@ -149,7 +149,7 @@ class SharedArgs: @staticmethod def get_store_choices(config: Optional[Dict[str, Any]] = None) -> List[str]: - """Get list of available storage backend names from FileStorage. + """Get list of available store backend names. This method dynamically discovers all configured storage backends instead of using a static list. Should be called when building @@ -162,13 +162,10 @@ class SharedArgs: List of backend names (e.g., ['default', 'test', 'home', 'work']) Example: - # In a cmdlet that needs dynamic choices - from helper.store import FileStorage - storage = FileStorage(config) SharedArgs.STORE.choices = SharedArgs.get_store_choices(config) """ try: - from helper.store import FileStorage + from Store import Store # If no config provided, try to load it if config is None: @@ -178,8 +175,8 @@ class SharedArgs: except Exception: return [] - file_storage = FileStorage(config) - return file_storage.list_backends() + store = Store(config) + return store.list_backends() except Exception: # Fallback to empty list if FileStorage isn't available return [] @@ -609,7 +606,7 @@ def normalize_hash(hash_hex: Optional[str]) -> Optional[str]: return text.lower() if text else None -def get_hash_for_operation(override_hash: Optional[str], result: Any, field_name: str = "hash_hex") -> Optional[str]: +def get_hash_for_operation(override_hash: Optional[str], result: Any, field_name: str = "hash") -> Optional[str]: """Get normalized hash from override or result object, consolidating common pattern. Eliminates repeated pattern: normalize_hash(override) if override else normalize_hash(get_field(result, ...)) @@ -617,15 +614,14 @@ def get_hash_for_operation(override_hash: Optional[str], result: Any, field_name Args: override_hash: Hash passed as command argument (takes precedence) result: Object containing hash field (fallback) - field_name: Name of hash field in result object (default: "hash_hex") + field_name: Name of hash field in result object (default: "hash") Returns: Normalized hash string, or None if neither override nor result provides valid hash """ if override_hash: return normalize_hash(override_hash) - # Try multiple field names for robustness - hash_value = get_field(result, field_name) or getattr(result, field_name, None) or getattr(result, "hash", None) or result.get("file_hash") if isinstance(result, dict) else None + hash_value = get_field(result, field_name) or getattr(result, field_name, None) or getattr(result, "hash", None) return normalize_hash(hash_value) @@ -645,8 +641,8 @@ def fetch_hydrus_metadata(config: Any, hash_hex: str, **kwargs) -> tuple[Optiona - metadata_dict: Dict from Hydrus (first item in metadata list) or None if unavailable - error_code: 0 on success, 1 on any error (suitable for returning from cmdlet execute()) """ - from helper import hydrus - hydrus_wrapper = hydrus + from API import HydrusNetwork + hydrus_wrapper = HydrusNetwork try: client = hydrus_wrapper.get_client(config) @@ -670,24 +666,6 @@ def fetch_hydrus_metadata(config: Any, hash_hex: str, **kwargs) -> tuple[Optiona return meta, 0 -def get_origin(obj: Any, default: Optional[str] = None) -> Optional[str]: - """Extract origin field with fallback to store/source field, consolidating common pattern. - - Supports both dict and object access patterns. - - Args: - obj: Object (dict or dataclass) with 'store', 'origin', or 'source' field - default: Default value if none of the fields are found - - Returns: - Store/origin/source string, or default if none exist - """ - if isinstance(obj, dict): - return obj.get("store") or obj.get("origin") or obj.get("source") or default - else: - return getattr(obj, "store", None) or getattr(obj, "origin", None) or getattr(obj, "source", None) or default - - def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any: """Extract a field from either a dict or object with fallback default. @@ -706,56 +684,19 @@ def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any: Examples: get_field(result, "hash") # From dict or object - get_field(result, "origin", "unknown") # With default + get_field(result, "table", "unknown") # With default """ # Handle lists by accessing the first element if isinstance(obj, list) and obj: obj = obj[0] if isinstance(obj, dict): - # Direct lookup first - val = obj.get(field, default) - if val is not None: - return val - # Fallback aliases for common fields - if field == "path": - for alt in ("file_path", "target", "filepath", "file"): - v = obj.get(alt) - if v: - return v - if field == "hash": - for alt in ("file_hash", "hash_hex"): - v = obj.get(alt) - if v: - return v - if field == "store": - for alt in ("storage", "storage_source", "origin"): - v = obj.get(alt) - if v: - return v - return default + return obj.get(field, default) else: # Try direct attribute access first value = getattr(obj, field, None) if value is not None: return value - - # Attribute fallback aliases for common fields - if field == "path": - for alt in ("file_path", "target", "filepath", "file", "url"): - v = getattr(obj, alt, None) - if v: - return v - if field == "hash": - for alt in ("file_hash", "hash_hex"): - v = getattr(obj, alt, None) - if v: - return v - if field == "store": - for alt in ("storage", "storage_source", "origin"): - v = getattr(obj, alt, None) - if v: - return v # For PipeObjects, also check the extra field if hasattr(obj, 'extra') and isinstance(obj.extra, dict): @@ -1148,7 +1089,7 @@ def create_pipe_object_result( file_path: str, cmdlet_name: str, title: Optional[str] = None, - file_hash: Optional[str] = None, + hash_value: Optional[str] = None, is_temp: bool = False, parent_hash: Optional[str] = None, tags: Optional[List[str]] = None, @@ -1165,7 +1106,7 @@ def create_pipe_object_result( file_path: Path to the file cmdlet_name: Name of the cmdlet that created this (e.g., 'download-data', 'screen-shot') title: Human-readable title - file_hash: SHA-256 hash of file (for integrity) + hash_value: SHA-256 hash of file (for integrity) is_temp: If True, this is a temporary/intermediate artifact parent_hash: Hash of the parent file in the chain (for provenance) tags: List of tags to apply @@ -1183,13 +1124,12 @@ def create_pipe_object_result( if title: result['title'] = title - if file_hash: - result['file_hash'] = file_hash - result['hash'] = file_hash + if hash_value: + result['hash'] = hash_value if is_temp: result['is_temp'] = True if parent_hash: - result['parent_id'] = parent_hash # parent_id is the parent's file_hash + result['parent_hash'] = parent_hash if tags: result['tags'] = tags @@ -1219,17 +1159,17 @@ def mark_as_temp(pipe_object: Dict[str, Any]) -> Dict[str, Any]: return pipe_object -def set_parent_id(pipe_object: Dict[str, Any], parent_hash: str) -> Dict[str, Any]: - """Set the parent_id for provenance tracking. +def set_parent_hash(pipe_object: Dict[str, Any], parent_hash: str) -> Dict[str, Any]: + """Set the parent_hash for provenance tracking. Args: pipe_object: Result dict parent_hash: Parent file's hash Returns: - Modified dict with parent_id set to the hash + Modified dict with parent_hash set to the hash """ - pipe_object['parent_id'] = parent_hash + pipe_object['parent_hash'] = parent_hash return pipe_object @@ -1254,13 +1194,13 @@ def get_pipe_object_hash(pipe_object: Any) -> Optional[str]: """Extract file hash from PipeObject, dict, or pipeline-friendly object.""" if pipe_object is None: return None - for attr in ('file_hash', 'hash_hex', 'hash'): + for attr in ('hash',): if hasattr(pipe_object, attr): value = getattr(pipe_object, attr) if value: return value if isinstance(pipe_object, dict): - for key in ('file_hash', 'hash_hex', 'hash'): + for key in ('hash',): value = pipe_object.get(key) if value: return value @@ -1522,13 +1462,12 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod """ # Debug: Print ResultItem details if coming from search_file.py try: - from helper.logger import is_debug_enabled, debug + from SYS.logger import is_debug_enabled, debug if is_debug_enabled() and hasattr(value, '__class__') and value.__class__.__name__ == 'ResultItem': debug("[ResultItem -> PipeObject conversion]") - debug(f" origin={getattr(value, 'origin', None)}") debug(f" title={getattr(value, 'title', None)}") debug(f" target={getattr(value, 'target', None)}") - debug(f" hash_hex={getattr(value, 'hash_hex', None)}") + debug(f" hash={getattr(value, 'hash', None)}") debug(f" media_kind={getattr(value, 'media_kind', None)}") debug(f" tags={getattr(value, 'tags', None)}") debug(f" tag_summary={getattr(value, 'tag_summary', None)}") @@ -1554,14 +1493,11 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod if isinstance(value, dict): # Extract hash and store (canonical identifiers) - hash_val = value.get("hash") or value.get("file_hash") - # Recognize multiple possible store naming conventions (store, origin, storage, storage_source) - store_val = value.get("store") or value.get("origin") or value.get("storage") or value.get("storage_source") or "PATH" - # If the store value is embedded under extra, also detect it - if not store_val or store_val in ("local", "PATH"): - extra_store = None + hash_val = value.get("hash") + store_val = value.get("store") or "PATH" + if not store_val or store_val == "PATH": try: - extra_store = value.get("extra", {}).get("store") or value.get("extra", {}).get("storage") or value.get("extra", {}).get("storage_source") + extra_store = value.get("extra", {}).get("store") except Exception: extra_store = None if extra_store: @@ -1572,7 +1508,7 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod path_val = value.get("path") if path_val: try: - from helper.utils import sha256_file + from SYS.utils import sha256_file from pathlib import Path hash_val = sha256_file(Path(path_val)) except Exception: @@ -1655,7 +1591,7 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod relationships=rels, is_temp=bool(value.get("is_temp", False)), action=value.get("action"), - parent_hash=value.get("parent_hash") or value.get("parent_id"), + parent_hash=value.get("parent_hash"), extra=extra, ) @@ -1671,7 +1607,7 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod if path_val and path_val != "unknown": try: - from helper.utils import sha256_file + from SYS.utils import sha256_file from pathlib import Path path_obj = Path(path_val) hash_val = sha256_file(path_obj) @@ -1714,7 +1650,7 @@ def register_url_with_local_library(pipe_obj: models.PipeObject, config: Dict[st try: from config import get_local_storage_path - from helper.folder_store import FolderDB + from API.folder import API_folder_store file_path = get_field(pipe_obj, "path") url_field = get_field(pipe_obj, "url", []) @@ -1735,7 +1671,7 @@ def register_url_with_local_library(pipe_obj: models.PipeObject, config: Dict[st if not storage_path: return False - with FolderDB(storage_path) as db: + with API_folder_store(storage_path) as db: file_hash = db.get_file_hash(path_obj) if not file_hash: return False diff --git a/cmdlets/add_file.py b/cmdlets/add_file.py index 10c3cab..19bc487 100644 --- a/cmdlets/add_file.py +++ b/cmdlets/add_file.py @@ -7,26 +7,22 @@ import shutil import models import pipeline as ctx -from helper import hydrus as hydrus_wrapper -from helper.logger import log, debug -from helper.store import FileStorage +from API import HydrusNetwork as hydrus_wrapper +from SYS.logger import log, debug +from Store import Store from ._shared import ( Cmdlet, CmdletArg, parse_cmdlet_args, SharedArgs, extract_tags_from_result, extract_title_from_result, extract_url_from_result, - merge_sequences, extract_relationships, extract_duration, get_origin, coerce_to_pipe_object + merge_sequences, extract_relationships, extract_duration, coerce_to_pipe_object ) from ._shared import collapse_namespace_tags -from helper.folder_store import read_sidecar, find_sidecar, write_sidecar, FolderDB -from helper.utils import sha256_file, unique_path +from API.folder import read_sidecar, find_sidecar, write_sidecar, API_folder_store +from SYS.utils import sha256_file, unique_path from metadata import write_metadata # Use official Hydrus supported filetypes from hydrus_wrapper SUPPORTED_MEDIA_EXTENSIONS = hydrus_wrapper.ALL_SUPPORTED_EXTENSIONS -# Initialize file storage system -storage = FileStorage() - - class Add_File(Cmdlet): """Add file into the DB""" @@ -96,8 +92,11 @@ class Add_File(Cmdlet): media_path_or_url, file_hash = self._resolve_source(result, path_arg, pipe_obj, config) debug(f"[add-file] RESOLVED source: path={media_path_or_url}, hash={file_hash[:12] if file_hash else 'N/A'}...") if not media_path_or_url: - debug(f"[add-file] ERROR: Could not resolve source file/URL") - return 1 + debug(f"[add-file] ERROR: Could not resolve source file/URL") + return 1 + + # Update pipe_obj with resolved path + pipe_obj.path = str(media_path_or_url) if isinstance(media_path_or_url, (str, Path)) else str(media_path_or_url) # Check if it's a URL before validating as file if isinstance(media_path_or_url, str) and media_path_or_url.lower().startswith(("http://", "https://", "magnet:", "torrent:")): @@ -116,15 +115,15 @@ class Add_File(Cmdlet): debug(f"[add-file] DECISION POINT: provider={provider_name}, location={location}") debug(f" media_path={media_path}, exists={media_path.exists()}") - # Execute transfer based on destination (using class-based FileStorage system) + # Execute transfer based on destination (using Store registry) if provider_name: debug(f"[add-file] ROUTE: file provider upload") return self._handle_provider_upload(media_path, provider_name, pipe_obj, config, delete_after) elif location: - # Check if location is a registered backend name using FileStorage + # Check if location is a registered backend name try: - storage = FileStorage(config) - backends = storage.list_backends() + store = Store(config) + backends = store.list_backends() if location in backends: debug(f"[add-file] ROUTE: storage backend '{location}'") @@ -165,15 +164,19 @@ class Add_File(Cmdlet): debug(f"[add-file] Using hash+store from result: hash={result_hash[:12]}..., store={result_store}") # Use get_file to retrieve from the specific store try: - from helper.store import FileStorage - storage = FileStorage(config) - if result_store in storage.list_backends(): - backend = storage[result_store] + store = Store(config) + if result_store in store.list_backends(): + backend = store[result_store] media_path = backend.get_file(result_hash) - if media_path and media_path.exists(): + if isinstance(media_path, Path) and media_path.exists(): pipe_obj.path = str(media_path) debug(f"[add-file] Retrieved file from {result_store}: {media_path}") return media_path, result_hash + + if isinstance(media_path, str) and media_path.lower().startswith(("http://", "https://")): + pipe_obj.path = media_path + debug(f"[add-file] Retrieved URL from {result_store}: {media_path}") + return media_path, result_hash except Exception as exc: debug(f"[add-file] Failed to retrieve via hash+store: {exc}") @@ -385,20 +388,6 @@ class Add_File(Cmdlet): url = list(extract_url_from_result(result) or []) return url - @staticmethod - def _get_origin(result: Any, pipe_obj: models.PipeObject) -> Optional[str]: - try: - if isinstance(pipe_obj.extra, dict): - origin = get_origin(pipe_obj.extra) - if origin: - return origin - except Exception: - pass - - if isinstance(result, dict): - return get_origin(result) - return None - @staticmethod def _get_relationships(result: Any, pipe_obj: models.PipeObject) -> Optional[Dict[str, Any]]: try: @@ -427,16 +416,16 @@ class Add_File(Cmdlet): def _update_pipe_object_destination( pipe_obj: models.PipeObject, *, - hash: str, + hash_value: str, store: str, - file_path: str, + path: Optional[str], tags: List[str], title: Optional[str], extra_updates: Optional[Dict[str, Any]] = None, ) -> None: - pipe_obj.hash = hash + pipe_obj.hash = hash_value pipe_obj.store = store - pipe_obj.path = file_path + pipe_obj.path = path pipe_obj.tags = tags if title: pipe_obj.title = title @@ -485,9 +474,9 @@ class Add_File(Cmdlet): if preferred_title: preferred_title = preferred_title.replace("_", " ").strip() - result_origin = Add_File._get_origin(result, pipe_obj) + store = getattr(pipe_obj, "store", None) _, sidecar_hash, sidecar_tags, sidecar_url = Add_File._load_sidecar_bundle( - media_path, result_origin, config + media_path, store, config ) def normalize_title_tag(tag: str) -> str: @@ -589,7 +578,6 @@ class Add_File(Cmdlet): # Update PipeObject and emit extra_updates = { - "storage_source": "local", "url": url, "export_path": str(destination_root), } @@ -600,9 +588,9 @@ class Add_File(Cmdlet): Add_File._update_pipe_object_destination( pipe_obj, - hash=f_hash or "unknown", + hash_value=f_hash or "unknown", store="local", - file_path=str(target_path), + path=str(target_path), tags=tags, title=chosen_title, extra_updates=extra_updates, @@ -615,6 +603,78 @@ class Add_File(Cmdlet): return 0 + @staticmethod + def _download_soulseek_file( + result: Any, + config: Dict[str, Any] + ) -> Optional[Path]: + """ + Download a file from Soulseek peer. + + Extracts username and filename from soulseek result metadata and initiates download. + """ + try: + import asyncio + from Provider.registry import download_soulseek_file + from pathlib import Path + + # Extract metadata from result + full_metadata = {} + if isinstance(result, dict): + full_metadata = result.get("full_metadata", {}) + elif hasattr(result, "extra") and isinstance(result.extra, dict) and "full_metadata" in result.extra: + full_metadata = result.extra.get("full_metadata", {}) + elif hasattr(result, "full_metadata"): + # Direct attribute access (fallback) + val = getattr(result, "full_metadata", {}) + if isinstance(val, dict): + full_metadata = val + + username = full_metadata.get("username") + filename = full_metadata.get("filename") + + if not username or not filename: + debug(f"[add-file] ERROR: Could not extract soulseek metadata from result (type={type(result).__name__})") + if hasattr(result, "extra"): + debug(f"[add-file] Result extra keys: {list(result.extra.keys())}") + return None + + if not username or not filename: + debug(f"[add-file] ERROR: Missing soulseek metadata (username={username}, filename={filename})") + return None + + debug(f"[add-file] Starting soulseek download: {username} -> {filename}") + + # Determine output directory (prefer downloads folder in config) + output_dir = Path(config.get("output_dir", "./downloads")) if isinstance(config.get("output_dir"), str) else Path("./downloads") + output_dir.mkdir(parents=True, exist_ok=True) + + # Run async download in event loop + try: + loop = asyncio.get_event_loop() + if loop.is_closed(): + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + + downloaded_path = loop.run_until_complete( + download_soulseek_file( + username=username, + filename=filename, + output_dir=output_dir, + timeout=1200 # 20 minutes + ) + ) + + return downloaded_path + + except Exception as e: + log(f"[add-file] Soulseek download error: {type(e).__name__}: {e}", file=sys.stderr) + debug(f"[add-file] Soulseek download traceback: {e}") + return None + @staticmethod def _handle_provider_upload( media_path: Path, @@ -624,7 +684,7 @@ class Add_File(Cmdlet): delete_after: bool, ) -> int: """Handle uploading to a file provider (e.g. 0x0).""" - from helper.provider import get_file_provider + from Provider.registry import get_file_provider log(f"Uploading via {provider_name}: {media_path.name}", file=sys.stderr) @@ -666,9 +726,9 @@ class Add_File(Cmdlet): file_path = pipe_obj.path or (str(media_path) if media_path else None) or "" Add_File._update_pipe_object_destination( pipe_obj, - hash=f_hash or "unknown", + hash_value=f_hash or "unknown", store=provider_name or "provider", - file_path=file_path, + path=file_path, tags=pipe_obj.tags, title=pipe_obj.title or (media_path.name if media_path else None), extra_updates=extra_updates, @@ -687,14 +747,11 @@ class Add_File(Cmdlet): delete_after: bool, ) -> int: """Handle uploading to a registered storage backend (e.g., 'test' folder store, 'hydrus', etc.).""" - from config import load_config - log(f"Adding file to storage backend '{backend_name}': {media_path.name}", file=sys.stderr) try: - cfg = load_config() - storage = FileStorage(cfg) - backend = storage[backend_name] + store = Store(config) + backend = store[backend_name] # Prepare metadata from pipe_obj and sidecars tags, url, title, f_hash = Add_File._prepare_metadata(None, media_path, pipe_obj, config) @@ -708,24 +765,26 @@ class Add_File(Cmdlet): url=url ) log(f"✓ File added to '{backend_name}': {file_identifier}", file=sys.stderr) - - # Update pipe object with result - # For backends that return paths, file_path = identifier - # For backends that return hashes, file_path = "backend:hash" - file_path_str = str(file_identifier) - if len(file_identifier) == 64 and all(c in '0123456789abcdef' for c in file_identifier.lower()): - # It's a hash - use backend:hash format - file_path_str = f"{backend_name}:{file_identifier}" + + stored_path: Optional[str] = None + try: + maybe_path = backend.get_file(file_identifier) + if isinstance(maybe_path, Path): + stored_path = str(maybe_path) + elif isinstance(maybe_path, str) and maybe_path: + # Some backends may return a browser URL + stored_path = maybe_path + except Exception: + stored_path = None Add_File._update_pipe_object_destination( pipe_obj, - hash=file_identifier if len(file_identifier) == 64 else f_hash or "unknown", + hash_value=file_identifier if len(file_identifier) == 64 else f_hash or "unknown", store=backend_name, - file_path=file_path_str, + path=stored_path, tags=tags, title=title or pipe_obj.title or media_path.name, extra_updates={ - "storage_source": backend_name, "url": url, }, ) @@ -745,16 +804,16 @@ class Add_File(Cmdlet): @staticmethod def _load_sidecar_bundle( media_path: Path, - origin: Optional[str], + store: Optional[str], config: Dict[str, Any], ) -> Tuple[Optional[Path], Optional[str], List[str], List[str]]: """Load sidecar metadata.""" - if origin and origin.lower() == "local": + if store and store.lower() == "local": try: from config import get_local_storage_path db_root = get_local_storage_path(config) if db_root: - with FolderDB(Path(db_root)) as db: + with API_folder_store(Path(db_root)) as db: file_hash = db.get_file_hash(media_path) if file_hash: tags = db.get_tags(file_hash) or [] @@ -837,7 +896,7 @@ class Add_File(Cmdlet): except OSError: payload['size'] = None - with FolderDB(library_root) as db: + with API_folder_store(library_root) as db: try: db.save_file_info(dest_path, payload, tags) except Exception as exc: diff --git a/cmdlets/add_note.py b/cmdlets/add_note.py index fbc87f0..805f7a3 100644 --- a/cmdlets/add_note.py +++ b/cmdlets/add_note.py @@ -6,9 +6,9 @@ import json from . import register import models import pipeline as ctx -from helper import hydrus as hydrus_wrapper +from API import HydrusNetwork as hydrus_wrapper from ._shared import Cmdlet, CmdletArg, normalize_hash, should_show_help -from helper.logger import log +from SYS.logger import log CMDLET = Cmdlet( name="add-note", diff --git a/cmdlets/add_relationship.py b/cmdlets/add_relationship.py index 935b135..f0f4703 100644 --- a/cmdlets/add_relationship.py +++ b/cmdlets/add_relationship.py @@ -8,14 +8,14 @@ import re from pathlib import Path import sys -from helper.logger import log +from SYS.logger import log from . import register import models import pipeline as ctx -from helper import hydrus as hydrus_wrapper +from API import HydrusNetwork as hydrus_wrapper from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, should_show_help, get_field -from helper.folder_store import read_sidecar, find_sidecar +from API.folder import read_sidecar, find_sidecar CMDLET = Cmdlet( @@ -228,7 +228,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: items_to_process = [{"file_path": arg_path}] # Import local storage utilities - from helper.folder_store import LocalLibrarySearchOptimizer + from API.folder import LocalLibrarySearchOptimizer from config import get_local_storage_path local_storage_path = get_local_storage_path(config) if config else None diff --git a/cmdlets/add_tag.py b/cmdlets/add_tag.py index 6daac33..fb64a76 100644 --- a/cmdlets/add_tag.py +++ b/cmdlets/add_tag.py @@ -4,13 +4,13 @@ from typing import Any, Dict, List, Sequence, Optional from pathlib import Path import sys -from helper.logger import log +from SYS.logger import log import models import pipeline as ctx from ._shared import normalize_result_input, filter_results_by_temp -from helper import hydrus as hydrus_wrapper -from helper.folder_store import write_sidecar, FolderDB +from API import HydrusNetwork as hydrus_wrapper +from API.folder import write_sidecar, API_folder_store from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, expand_tag_groups, parse_cmdlet_args, collapse_namespace_tags, should_show_help, get_field from config import get_local_storage_path @@ -91,17 +91,16 @@ class Add_Tag(Cmdlet): res["columns"] = updated @staticmethod - def _matches_target(item: Any, hydrus_hash: Optional[str], file_hash: Optional[str], file_path: Optional[str]) -> bool: + def _matches_target(item: Any, file_hash: Optional[str], path: Optional[str]) -> bool: """Determine whether a result item refers to the given hash/path target.""" - hydrus_hash_l = hydrus_hash.lower() if hydrus_hash else None file_hash_l = file_hash.lower() if file_hash else None - file_path_l = file_path.lower() if file_path else None + path_l = path.lower() if path else None def norm(val: Any) -> Optional[str]: return str(val).lower() if val is not None else None - hash_fields = ["hydrus_hash", "hash", "hash_hex", "file_hash"] - path_fields = ["path", "file_path", "target"] + hash_fields = ["hash"] + path_fields = ["path", "target"] if isinstance(item, dict): hashes = [norm(item.get(field)) for field in hash_fields] @@ -110,11 +109,9 @@ class Add_Tag(Cmdlet): hashes = [norm(get_field(item, field)) for field in hash_fields] paths = [norm(get_field(item, field)) for field in path_fields] - if hydrus_hash_l and hydrus_hash_l in hashes: - return True if file_hash_l and file_hash_l in hashes: return True - if file_path_l and file_path_l in paths: + if path_l and path_l in paths: return True return False @@ -146,7 +143,7 @@ class Add_Tag(Cmdlet): if changed: item["columns"] = updated_cols - def _refresh_result_table_title(self, new_title: str, hydrus_hash: Optional[str], file_hash: Optional[str], file_path: Optional[str]) -> None: + def _refresh_result_table_title(self, new_title: str, file_hash: Optional[str], path: Optional[str]) -> None: """Refresh the cached result table with an updated title and redisplay it.""" try: last_table = ctx.get_last_result_table() @@ -158,7 +155,7 @@ class Add_Tag(Cmdlet): match_found = False for item in items: try: - if self._matches_target(item, hydrus_hash, file_hash, file_path): + if self._matches_target(item, file_hash, path): self._update_item_title_fields(item, new_title) match_found = True except Exception: @@ -178,21 +175,21 @@ class Add_Tag(Cmdlet): except Exception: pass - def _refresh_tags_view(self, res: Any, hydrus_hash: Optional[str], file_hash: Optional[str], file_path: Optional[str], config: Dict[str, Any]) -> None: + def _refresh_tags_view(self, res: Any, file_hash: Optional[str], path: Optional[str], config: Dict[str, Any]) -> None: """Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh.""" try: from cmdlets import get_tag as get_tag_cmd # type: ignore except Exception: return - target_hash = hydrus_hash or file_hash + target_hash = file_hash refresh_args: List[str] = [] if target_hash: - refresh_args = ["-hash", target_hash, "-store", target_hash] + refresh_args = ["-hash", target_hash] try: subject = ctx.get_last_result_subject() - if subject and self._matches_target(subject, hydrus_hash, file_hash, file_path): + if subject and self._matches_target(subject, file_hash, path): get_tag_cmd._run(subject, refresh_args, config) return except Exception: @@ -217,7 +214,7 @@ class Add_Tag(Cmdlet): # Get explicit -hash and -store overrides from CLI hash_override = normalize_hash(parsed.get("hash")) - store_override = parsed.get("store") or parsed.get("storage") + store_override = parsed.get("store") # Normalize input to list results = normalize_result_input(result) @@ -327,7 +324,7 @@ class Add_Tag(Cmdlet): file_path = get_field(res, "path") # Try tags from top-level 'tags' or from 'extra.tags' tags = get_field(res, "tags") or (get_field(res, "extra") or {}).get("tags", []) - file_hash = get_field(res, "hash") or get_field(res, "file_hash") or get_field(res, "hash_hex") or "" + file_hash = get_field(res, "hash") or "" if not file_path: log(f"[add_tag] Warning: Result has no path, skipping", file=sys.stderr) ctx.emit(res) @@ -362,16 +359,8 @@ class Add_Tag(Cmdlet): existing_tags = get_field(res, "tags") or [] if not existing_tags: existing_tags = (get_field(res, "extra", {}) or {}).get("tags") or [] - file_hash = get_field(res, "hash") or get_field(res, "file_hash") or get_field(res, "hash_hex") or "" - storage_source = get_field(res, "store") or get_field(res, "storage") or get_field(res, "storage_source") or get_field(res, "origin") - hydrus_hash = get_field(res, "hydrus_hash") or file_hash - - # Infer storage source from result if not found - if not storage_source: - if file_path: - storage_source = 'local' - elif file_hash and file_hash != "unknown": - storage_source = 'hydrus' + file_hash = get_field(res, "hash") or "" + store_name = store_override or get_field(res, "store") original_tags_lower = {str(t).lower() for t in existing_tags if isinstance(t, str)} original_title = self._extract_title_tag(list(existing_tags)) @@ -379,8 +368,10 @@ class Add_Tag(Cmdlet): # Apply CLI overrides if provided if hash_override and not file_hash: file_hash = hash_override - if store_override and not storage_source: - storage_source = store_override + if not store_name: + log("[add_tag] Missing store (use -store or pipe a result with store)", file=sys.stderr) + ctx.emit(res) + continue # Check if we have sufficient identifier (file_path OR file_hash) if not file_path and not file_hash: @@ -418,146 +409,78 @@ class Add_Tag(Cmdlet): new_tags_added = [] final_tags = list(existing_tags) if existing_tags else [] - # Determine where to add tags: Hydrus or Folder storage - if storage_source and storage_source.lower() == 'hydrus': - # Add tags to Hydrus using the API - target_hash = file_hash - if target_hash: - try: - hydrus_client = hydrus_wrapper.get_client(config) - service_name = hydrus_wrapper.get_tag_service_name(config) - - # For namespaced tags, remove old tags in same namespace - removed_tags = [] - for new_tag in tags_to_add: - if ':' in new_tag: - namespace = new_tag.split(':', 1)[0] - to_remove = [t for t in existing_tags if t.startswith(namespace + ':') and t.lower() != new_tag.lower()] - removed_tags.extend(to_remove) - - # Add new tags - if tags_to_add: - log(f"[add_tag] Adding {len(tags_to_add)} tag(s) to Hydrus file: {target_hash}", file=sys.stderr) - hydrus_client.add_tags(target_hash, tags_to_add, service_name) - - # Delete replaced namespace tags - if removed_tags: - unique_removed = sorted(set(removed_tags)) - hydrus_client.delete_tags(target_hash, unique_removed, service_name) - - if tags_to_add or removed_tags: - total_new_tags += len(tags_to_add) - total_modified += 1 - log(f"[add_tag] ✓ Added {len(tags_to_add)} tag(s) to Hydrus", file=sys.stderr) - # Refresh final tag list from the backend for accurate display - try: - from helper.store import FileStorage - storage = FileStorage(config) - if storage and storage_source in storage.list_backends(): - backend = storage[storage_source] - refreshed_tags, _ = backend.get_tag(target_hash) - if refreshed_tags is not None: - final_tags = refreshed_tags - new_tags_added = [t for t in refreshed_tags if t.lower() not in original_tags_lower] - # Update result tags for downstream cmdlets/UI - if isinstance(res, models.PipeObject): - res.tags = refreshed_tags - if isinstance(res.extra, dict): - res.extra['tags'] = refreshed_tags - elif isinstance(res, dict): - res['tags'] = refreshed_tags - except Exception: - # Ignore failures - this is best-effort for refreshing tag state - pass - except Exception as e: - log(f"[add_tag] Warning: Failed to add tags to Hydrus: {e}", file=sys.stderr) - else: - log(f"[add_tag] Warning: No hash available for Hydrus file, skipping", file=sys.stderr) - elif storage_source: - # For any Folder-based storage (local, test, default, etc.), delegate to backend - # If storage_source is not a registered backend, fallback to writing a sidecar - from helper.store import FileStorage - storage = FileStorage(config) + # Resolve hash from path if needed + if not file_hash and file_path: try: - if storage and storage_source in storage.list_backends(): - backend = storage[storage_source] - if file_hash and backend.add_tag(file_hash, tags_to_add): - # Refresh tags from backend to get merged result - refreshed_tags, _ = backend.get_tag(file_hash) - if refreshed_tags: - # Update result tags - if isinstance(res, models.PipeObject): - res.tags = refreshed_tags - # Also keep as extra for compatibility - if isinstance(res.extra, dict): - res.extra['tags'] = refreshed_tags - elif isinstance(res, dict): - res['tags'] = refreshed_tags + from SYS.utils import sha256_file + file_hash = sha256_file(Path(file_path)) + except Exception: + file_hash = "" - # Update title if changed - title_value = self._extract_title_tag(refreshed_tags) - self._apply_title_to_result(res, title_value) - - # Compute stats - new_tags_added = [t for t in refreshed_tags if t.lower() not in original_tags_lower] - total_new_tags += len(new_tags_added) - if new_tags_added: - total_modified += 1 - - log(f"[add_tag] Added {len(new_tags_added)} new tag(s); {len(refreshed_tags)} total tag(s) stored in {storage_source}", file=sys.stderr) - final_tags = refreshed_tags - else: - log(f"[add_tag] Warning: Failed to add tags to {storage_source}", file=sys.stderr) - else: - # Not a registered backend - fallback to sidecar if we have a path - if file_path: - try: - sidecar_path = write_sidecar(Path(file_path), tags_to_add, [], file_hash) - log(f"[add_tag] Wrote {len(tags_to_add)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr) - total_new_tags += len(tags_to_add) - total_modified += 1 - # Update res tags - if isinstance(res, models.PipeObject): - res.tags = (res.tags or []) + tags_to_add - if isinstance(res.extra, dict): - res.extra['tags'] = res.tags - elif isinstance(res, dict): - res['tags'] = list(set((res.get('tags') or []) + tags_to_add)) - except Exception as exc: - log(f"[add_tag] Warning: Failed to write sidecar for {file_path}: {exc}", file=sys.stderr) - else: - log(f"[add_tag] Warning: Storage backend '{storage_source}' not found in config", file=sys.stderr) - except KeyError: - # storage[storage_source] raised KeyError - treat as absent backend - if file_path: - try: - sidecar_path = write_sidecar(Path(file_path), tags_to_add, [], file_hash) - log(f"[add_tag] Wrote {len(tags_to_add)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr) - total_new_tags += len(tags_to_add) - total_modified += 1 - # Update res tags for downstream - if isinstance(res, models.PipeObject): - res.tags = (res.tags or []) + tags_to_add - if isinstance(res.extra, dict): - res.extra['tags'] = res.tags - elif isinstance(res, dict): - res['tags'] = list(set((res.get('tags') or []) + tags_to_add)) - except Exception as exc: - log(f"[add_tag] Warning: Failed to write sidecar for {file_path}: {exc}", file=sys.stderr) - else: - log(f"[add_tag] Warning: Storage backend '{storage_source}' not found in config", file=sys.stderr) - else: - # For other storage types or unknown sources, avoid writing sidecars to reduce clutter - # (local/hydrus are handled above). + if not file_hash: + log("[add_tag] Warning: No hash available, skipping", file=sys.stderr) ctx.emit(res) continue + + # Route tag updates through the configured store backend + try: + storage = Store(config) + backend = storage[store_name] + + # For namespaced tags, compute old tags in same namespace to remove + removed_tags = [] + for new_tag in tags_to_add: + if ':' in new_tag: + namespace = new_tag.split(':', 1)[0] + to_remove = [t for t in existing_tags if t.startswith(namespace + ':') and t.lower() != new_tag.lower()] + removed_tags.extend(to_remove) + + ok = backend.add_tag(file_hash, tags_to_add, config=config) + if removed_tags: + unique_removed = sorted(set(removed_tags)) + backend.delete_tag(file_hash, unique_removed, config=config) + + if not ok: + log(f"[add_tag] Warning: Failed to add tags via store '{store_name}'", file=sys.stderr) + ctx.emit(res) + continue + + refreshed_tags, _ = backend.get_tag(file_hash, config=config) + refreshed_tags = list(refreshed_tags or []) + final_tags = refreshed_tags + new_tags_added = [t for t in refreshed_tags if t.lower() not in original_tags_lower] + + # Update result tags for downstream cmdlets/UI + if isinstance(res, models.PipeObject): + res.tags = refreshed_tags + if isinstance(res.extra, dict): + res.extra['tags'] = refreshed_tags + elif isinstance(res, dict): + res['tags'] = refreshed_tags + + # Update title if changed + title_value = self._extract_title_tag(refreshed_tags) + self._apply_title_to_result(res, title_value) + + total_new_tags += len(new_tags_added) + if new_tags_added: + total_modified += 1 + except KeyError: + log(f"[add_tag] Store '{store_name}' not configured", file=sys.stderr) + ctx.emit(res) + continue + except Exception as e: + log(f"[add_tag] Warning: Backend error for store '{store_name}': {e}", file=sys.stderr) + ctx.emit(res) + continue + # If title changed, refresh the cached result table so the display reflects the new name final_title = self._extract_title_tag(final_tags) if final_title and (not original_title or final_title.lower() != original_title.lower()): - self._refresh_result_table_title(final_title, hydrus_hash or file_hash, file_hash, file_path) - # If tags changed, refresh tag view via get-tag (prefer current subject; fall back to hash refresh) + self._refresh_result_table_title(final_title, file_hash, file_path) + # If tags changed, refresh tag view via get-tag if new_tags_added or removed_tags: - self._refresh_tags_view(res, hydrus_hash, file_hash, file_path, config) + self._refresh_tags_view(res, file_hash, file_path, config) # Emit the modified result ctx.emit(res) log(f"[add_tag] Added {total_new_tags} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)", file=sys.stderr) diff --git a/cmdlets/add_tags.py b/cmdlets/add_tags.py index 3cba2f9..24e96fb 100644 --- a/cmdlets/add_tags.py +++ b/cmdlets/add_tags.py @@ -4,16 +4,25 @@ from typing import Any, Dict, List, Sequence, Optional from pathlib import Path import sys -from helper.logger import log +from SYS.logger import log import models import pipeline as ctx from ._shared import normalize_result_input, filter_results_by_temp -from helper import hydrus as hydrus_wrapper -from helper.folder_store import read_sidecar, write_sidecar, find_sidecar, has_sidecar, FolderDB -from metadata import rename -from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, expand_tag_groups, parse_cmdlet_args, collapse_namespace_tags, should_show_help, get_field -from config import get_local_storage_path +from ._shared import ( + Cmdlet, + CmdletArg, + SharedArgs, + normalize_hash, + parse_tag_arguments, + expand_tag_groups, + parse_cmdlet_args, + collapse_namespace_tags, + should_show_help, + get_field, +) +from Store import Store +from SYS.utils import sha256_file def _extract_title_tag(tags: List[str]) -> Optional[str]: @@ -57,31 +66,25 @@ def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None: res["columns"] = updated -def _matches_target(item: Any, hydrus_hash: Optional[str], file_hash: Optional[str], file_path: Optional[str]) -> bool: - """Determine whether a result item refers to the given hash/path target.""" - hydrus_hash_l = hydrus_hash.lower() if hydrus_hash else None - file_hash_l = file_hash.lower() if file_hash else None - file_path_l = file_path.lower() if file_path else None +def _matches_target(item: Any, target_hash: Optional[str], target_path: Optional[str]) -> bool: + """Determine whether a result item refers to the given hash/path target (canonical fields only).""" def norm(val: Any) -> Optional[str]: return str(val).lower() if val is not None else None - # Define field names to check for hashes and paths - hash_fields = ["hydrus_hash", "hash", "hash_hex", "file_hash"] - path_fields = ["path", "file_path", "target"] + target_hash_l = target_hash.lower() if target_hash else None + target_path_l = target_path.lower() if target_path else None if isinstance(item, dict): - hashes = [norm(item.get(field)) for field in hash_fields] - paths = [norm(item.get(field)) for field in path_fields] + hashes = [norm(item.get("hash"))] + paths = [norm(item.get("path"))] else: - hashes = [norm(get_field(item, field)) for field in hash_fields] - paths = [norm(get_field(item, field)) for field in path_fields] + hashes = [norm(get_field(item, "hash"))] + paths = [norm(get_field(item, "path"))] - if hydrus_hash_l and hydrus_hash_l in hashes: + if target_hash_l and target_hash_l in hashes: return True - if file_hash_l and file_hash_l in hashes: - return True - if file_path_l and file_path_l in paths: + if target_path_l and target_path_l in paths: return True return False @@ -114,7 +117,7 @@ def _update_item_title_fields(item: Any, new_title: str) -> None: item["columns"] = updated_cols -def _refresh_result_table_title(new_title: str, hydrus_hash: Optional[str], file_hash: Optional[str], file_path: Optional[str]) -> None: +def _refresh_result_table_title(new_title: str, target_hash: Optional[str], target_path: Optional[str]) -> None: """Refresh the cached result table with an updated title and redisplay it.""" try: last_table = ctx.get_last_result_table() @@ -126,7 +129,7 @@ def _refresh_result_table_title(new_title: str, hydrus_hash: Optional[str], file match_found = False for item in items: try: - if _matches_target(item, hydrus_hash, file_hash, file_path): + if _matches_target(item, target_hash, target_path): _update_item_title_fields(item, new_title) match_found = True except Exception: @@ -149,31 +152,30 @@ def _refresh_result_table_title(new_title: str, hydrus_hash: Optional[str], file pass -def _refresh_tags_view(res: Any, hydrus_hash: Optional[str], file_hash: Optional[str], file_path: Optional[str], config: Dict[str, Any]) -> None: +def _refresh_tags_view(res: Any, target_hash: Optional[str], store_name: Optional[str], target_path: Optional[str], config: Dict[str, Any]) -> None: """Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh.""" try: from cmdlets import get_tag as get_tag_cmd # type: ignore except Exception: return - target_hash = hydrus_hash or file_hash - refresh_args: List[str] = [] - if target_hash: - refresh_args = ["-hash", target_hash, "-store", target_hash] + if not target_hash or not store_name: + return + + refresh_args: List[str] = ["-hash", target_hash, "-store", store_name] try: subject = ctx.get_last_result_subject() - if subject and _matches_target(subject, hydrus_hash, file_hash, file_path): + if subject and _matches_target(subject, target_hash, target_path): get_tag_cmd._run(subject, refresh_args, config) return except Exception: pass - if target_hash: - try: - get_tag_cmd._run(res, refresh_args, config) - except Exception: - pass + try: + get_tag_cmd._run(res, refresh_args, config) + except Exception: + pass @@ -183,10 +185,11 @@ class Add_Tag(Cmdlet): def __init__(self) -> None: super().__init__( name="add-tags", - summary="Add tags to a Hydrus file or write them to a local .tags sidecar.", - usage="add-tags [-hash ] [-duplicate ] [-list [,...]] [--all] [,...]", + summary="Add tags to a file in a store.", + usage="add-tags -store [-hash ] [-duplicate ] [-list [,...]] [--all] [,...]", arg=[ SharedArgs.HASH, + SharedArgs.STORE, CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"), CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."), CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."), @@ -194,11 +197,11 @@ class Add_Tag(Cmdlet): ], detail=[ "- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.", - "- Without -hash and when the selection is a local file, tags are written to .tags.", - "- With a Hydrus hash, tags are sent to the 'my tags' service.", + "- Requires a store backend: use -store or pipe items that include store.", + "- If -hash is not provided, uses the piped item's hash (or derives from its path when possible).", "- Multiple tags can be comma-separated or space-separated.", "- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult", - "- Tags can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"", + "- Tags can also reference lists with curly braces: add-tags {philosophy} \"other:tag\"", "- Use -duplicate to copy EXISTING tag values to new namespaces:", " Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)", " Inferred format: -duplicate title,album,artist (first is source, rest are targets)", @@ -245,10 +248,8 @@ class Add_Tag(Cmdlet): # Try multiple tag lookup strategies in order tag_lookups = [ - lambda x: x.extra.get("tags") if isinstance(x, models.PipeObject) and isinstance(x.extra, dict) else None, - lambda x: x.get("tags") if isinstance(x, dict) else None, - lambda x: x.get("extra", {}).get("tags") if isinstance(x, dict) and isinstance(x.get("extra"), dict) else None, lambda x: getattr(x, "tags", None), + lambda x: x.get("tags") if isinstance(x, dict) else None, ] for lookup in tag_lookups: @@ -297,288 +298,158 @@ class Add_Tag(Cmdlet): hash_override = normalize_hash(parsed.get("hash")) or extracted_hash duplicate_arg = parsed.get("duplicate") - # If no tags provided (and no list), write sidecar files with embedded tags - # Note: Since 'tags' is required=False in the cmdlet arg, this block can be reached via CLI - # when no tag arguments are provided. - if not tags_to_add and not duplicate_arg: - # Write sidecar files with the tags that are already in the result dicts - sidecar_count = 0 - for res in results: - # Handle both dict and PipeObject formats - file_path = None - tags = [] - file_hash = "" - - if isinstance(res, models.PipeObject): - file_path = res.file_path - tags = res.extra.get('tags', []) - file_hash = res.hash or "" - elif isinstance(res, dict): - file_path = res.get('file_path') - # Try multiple tag locations in order - tag_sources = [lambda: res.get('tags', []), lambda: res.get('extra', {}).get('tags', [])] - for source in tag_sources: - tags = source() - if tags: - break - file_hash = res.get('hash', "") - - if not file_path: - log(f"[add_tags] Warning: Result has no file_path, skipping", file=sys.stderr) - ctx.emit(res) - continue - - if tags: - # Write sidecar file for this file with its tags - try: - sidecar_path = write_sidecar(Path(file_path), tags, [], file_hash) - log(f"[add_tags] Wrote {len(tags)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr) - sidecar_count += 1 - except Exception as e: - log(f"[add_tags] Warning: Failed to write sidecar for {file_path}: {e}", file=sys.stderr) - - ctx.emit(res) - - if sidecar_count > 0: - log(f"[add_tags] Wrote {sidecar_count} sidecar file(s) with embedded tags", file=sys.stderr) - else: - log(f"[add_tags] No tags to write - passed {len(results)} result(s) through unchanged", file=sys.stderr) - return 0 - - # Tags ARE provided - append them to each result and write sidecar files or add to Hydrus - sidecar_count = 0 - total_new_tags = 0 + # Tags ARE provided - apply them to each store-backed result + total_added = 0 total_modified = 0 - for res in results: - # Handle both dict and PipeObject formats - file_path = None - existing_tags = [] - file_hash = "" - storage_source = None - hydrus_hash = None - # Define field name aliases to check - path_field_names = ['file_path', 'path'] - source_field_names = ['storage_source', 'source', 'origin'] - hash_field_names = ['hydrus_hash', 'hash', 'hash_hex'] + store_override = parsed.get("store") + + for res in results: + store_name: Optional[str] + raw_hash: Optional[str] + raw_path: Optional[str] if isinstance(res, models.PipeObject): - file_path = res.file_path - existing_tags = res.extra.get('tags', []) - file_hash = res.file_hash or "" - for field in source_field_names: - storage_source = res.extra.get(field) - if storage_source: - break - hydrus_hash = res.extra.get('hydrus_hash') + store_name = store_override or res.store + raw_hash = res.hash + raw_path = res.path elif isinstance(res, dict): - # Try path field names in order - for field in path_field_names: - file_path = res.get(field) - if file_path: - break - - # Try tag locations in order - tag_sources = [lambda: res.get('tags', []), lambda: res.get('extra', {}).get('tags', [])] - for source in tag_sources: - existing_tags = source() - if existing_tags: - break - - file_hash = res.get('file_hash', "") - - # Try source field names in order (top-level then extra) - for field in source_field_names: - storage_source = res.get(field) - if storage_source: - break - if not storage_source and 'extra' in res: - for field in source_field_names: - storage_source = res.get('extra', {}).get(field) - if storage_source: - break - - # Try hash field names in order (top-level then extra) - for field in hash_field_names: - hydrus_hash = res.get(field) - if hydrus_hash: - break - if not hydrus_hash and 'extra' in res: - for field in hash_field_names: - hydrus_hash = res.get('extra', {}).get(field) - if hydrus_hash: - break - - if not hydrus_hash and file_hash: - hydrus_hash = file_hash - if not storage_source and hydrus_hash and not file_path: - storage_source = 'hydrus' - # If we have a file path but no storage source, assume local to avoid sidecar spam - if not storage_source and file_path: - storage_source = 'local' + store_name = store_override or res.get("store") + raw_hash = res.get("hash") + raw_path = res.get("path") else: ctx.emit(res) continue - original_tags_lower = {str(t).lower() for t in existing_tags if isinstance(t, str)} - original_tags_snapshot = list(existing_tags) - original_title = _extract_title_tag(original_tags_snapshot) - removed_tags: List[str] = [] + if not store_name: + log("[add_tags] Error: Missing -store and item has no store field", file=sys.stderr) + return 1 - # Apply hash override if provided - if hash_override: - hydrus_hash = hash_override - # If we have a hash override, we treat it as a Hydrus target - storage_source = "hydrus" + resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash) + if not resolved_hash and raw_path: + try: + p = Path(str(raw_path)) + stem = p.stem + if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()): + resolved_hash = stem.lower() + elif p.exists() and p.is_file(): + resolved_hash = sha256_file(p) + except Exception: + resolved_hash = None - if not file_path and not hydrus_hash: - log(f"[add_tags] Warning: Result has neither file_path nor hash available, skipping", file=sys.stderr) + if not resolved_hash: + log("[add_tags] Warning: Item missing usable hash (and could not derive from path); skipping", file=sys.stderr) ctx.emit(res) continue + try: + backend = Store(config)[str(store_name)] + except Exception as exc: + log(f"[add_tags] Error: Unknown store '{store_name}': {exc}", file=sys.stderr) + return 1 + + try: + existing_tags, _src = backend.get_tag(resolved_hash, config=config) + except Exception: + existing_tags = [] + + existing_tags_list = [t for t in (existing_tags or []) if isinstance(t, str)] + existing_lower = {t.lower() for t in existing_tags_list} + original_title = _extract_title_tag(existing_tags_list) + + # Per-item tag list (do not mutate shared list) + item_tags_to_add = list(tags_to_add) + item_tags_to_add = collapse_namespace_tags(item_tags_to_add, "title", prefer="last") + # Handle -duplicate logic (copy existing tags to new namespaces) if duplicate_arg: - # Parse duplicate format: source:target1,target2 or source,target1,target2 - parts = duplicate_arg.split(':') + parts = str(duplicate_arg).split(':') source_ns = "" - targets = [] + targets: list[str] = [] if len(parts) > 1: - # Explicit format: source:target1,target2 source_ns = parts[0] - targets = parts[1].split(',') + targets = [t.strip() for t in parts[1].split(',') if t.strip()] else: - # Inferred format: source,target1,target2 - parts = duplicate_arg.split(',') - if len(parts) > 1: - source_ns = parts[0] - targets = parts[1:] + parts2 = str(duplicate_arg).split(',') + if len(parts2) > 1: + source_ns = parts2[0] + targets = [t.strip() for t in parts2[1:] if t.strip()] if source_ns and targets: - # Find tags in source namespace - source_tags = [t for t in existing_tags if t.startswith(source_ns + ':')] - for t in source_tags: - value = t.split(':', 1)[1] + source_prefix = source_ns.lower() + ":" + for t in existing_tags_list: + if not t.lower().startswith(source_prefix): + continue + value = t.split(":", 1)[1] for target_ns in targets: new_tag = f"{target_ns}:{value}" - if new_tag not in existing_tags and new_tag not in tags_to_add: - tags_to_add.append(new_tag) + if new_tag.lower() not in existing_lower: + item_tags_to_add.append(new_tag) - # Merge new tags with existing tags, handling namespace overwrites - # When adding a tag like "namespace:value", remove any existing "namespace:*" tags - for new_tag in tags_to_add: - # Check if this is a namespaced tag (format: "namespace:value") - if ':' in new_tag: - namespace = new_tag.split(':', 1)[0] - # Track removals for Hydrus: delete old tags in same namespace (except identical) - to_remove = [t for t in existing_tags if t.startswith(namespace + ':') and t.lower() != new_tag.lower()] - removed_tags.extend(to_remove) - # Remove any existing tags with the same namespace - existing_tags = [t for t in existing_tags if not (t.startswith(namespace + ':'))] + # Namespace replacement: delete old namespace:* when adding namespace:value + removed_namespace_tags: list[str] = [] + for new_tag in item_tags_to_add: + if not isinstance(new_tag, str) or ":" not in new_tag: + continue + ns = new_tag.split(":", 1)[0].strip() + if not ns: + continue + ns_prefix = ns.lower() + ":" + for t in existing_tags_list: + if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower(): + removed_namespace_tags.append(t) - # Add the new tag if not already present - if new_tag not in existing_tags: - existing_tags.append(new_tag) + removed_namespace_tags = sorted({t for t in removed_namespace_tags}) - # Ensure only one tag per namespace (e.g., single title:) with latest preferred - existing_tags = collapse_namespace_tags(existing_tags, "title", prefer="last") + actual_tags_to_add = [t for t in item_tags_to_add if isinstance(t, str) and t.lower() not in existing_lower] - # Compute new tags relative to original - new_tags_added = [t for t in existing_tags if isinstance(t, str) and t.lower() not in original_tags_lower] - total_new_tags += len(new_tags_added) + changed = False + if removed_namespace_tags: + try: + backend.delete_tag(resolved_hash, removed_namespace_tags, config=config) + changed = True + except Exception as exc: + log(f"[add_tags] Warning: Failed deleting namespace tags: {exc}", file=sys.stderr) - # Update the result's tags + if actual_tags_to_add: + try: + backend.add_tag(resolved_hash, actual_tags_to_add, config=config) + changed = True + except Exception as exc: + log(f"[add_tags] Warning: Failed adding tags: {exc}", file=sys.stderr) + + if changed: + total_added += len(actual_tags_to_add) + total_modified += 1 + + try: + refreshed_tags, _src2 = backend.get_tag(resolved_hash, config=config) + refreshed_list = [t for t in (refreshed_tags or []) if isinstance(t, str)] + except Exception: + refreshed_list = existing_tags_list + + # Update the result's tags using canonical field if isinstance(res, models.PipeObject): - res.extra['tags'] = existing_tags + res.tags = refreshed_list elif isinstance(res, dict): - res['tags'] = existing_tags + res["tags"] = refreshed_list - # If a title: tag was added, update the in-memory title and columns so downstream display reflects it immediately - title_value = _extract_title_tag(existing_tags) - _apply_title_to_result(res, title_value) + final_title = _extract_title_tag(refreshed_list) + _apply_title_to_result(res, final_title) - final_tags = existing_tags - - # Determine where to add tags: Hydrus, local DB, or sidecar - if storage_source and storage_source.lower() == 'hydrus': - # Add tags to Hydrus using the API - target_hash = hydrus_hash or file_hash - if target_hash: - try: - tags_to_send = [t for t in existing_tags if isinstance(t, str) and t.lower() not in original_tags_lower] - hydrus_client = hydrus_wrapper.get_client(config) - service_name = hydrus_wrapper.get_tag_service_name(config) - if tags_to_send: - log(f"[add_tags] Adding {len(tags_to_send)} new tag(s) to Hydrus file: {target_hash}", file=sys.stderr) - hydrus_client.add_tags(target_hash, tags_to_send, service_name) - else: - log(f"[add_tags] No new tags to add for Hydrus file: {target_hash}", file=sys.stderr) - # Delete old namespace tags we replaced (e.g., previous title:) - if removed_tags: - unique_removed = sorted(set(removed_tags)) - hydrus_client.delete_tags(target_hash, unique_removed, service_name) - if tags_to_send: - log(f"[add_tags] ✓ Tags added to Hydrus", file=sys.stderr) - elif removed_tags: - log(f"[add_tags] ✓ Removed {len(unique_removed)} tag(s) from Hydrus", file=sys.stderr) - sidecar_count += 1 - if tags_to_send or removed_tags: - total_modified += 1 - except Exception as e: - log(f"[add_tags] Warning: Failed to add tags to Hydrus: {e}", file=sys.stderr) - else: - log(f"[add_tags] Warning: No hash available for Hydrus file, skipping", file=sys.stderr) - elif storage_source and storage_source.lower() == 'local': - # For local storage, save directly to DB (no sidecar needed) - if file_path: - library_root = get_local_storage_path(config) - if library_root: - try: - path_obj = Path(file_path) - with FolderDB(library_root) as db: - db.save_tags(path_obj, existing_tags) - # Reload tags to reflect DB state (preserves auto-title logic) - file_hash = db.get_file_hash(path_obj) - refreshed_tags = db.get_tags(file_hash) if file_hash else existing_tags - # Recompute title from refreshed tags for accurate display - refreshed_title = _extract_title_tag(refreshed_tags) - if refreshed_title: - _apply_title_to_result(res, refreshed_title) - res_tags = refreshed_tags or existing_tags - if isinstance(res, models.PipeObject): - res.extra['tags'] = res_tags - elif isinstance(res, dict): - res['tags'] = res_tags - log(f"[add_tags] Added {len(new_tags_added)} new tag(s); {len(res_tags)} total tag(s) stored locally", file=sys.stderr) - sidecar_count += 1 - if new_tags_added or removed_tags: - total_modified += 1 - final_tags = res_tags - except Exception as e: - log(f"[add_tags] Warning: Failed to save tags to local DB: {e}", file=sys.stderr) - else: - log(f"[add_tags] Warning: No library root configured for local storage, skipping", file=sys.stderr) - else: - log(f"[add_tags] Warning: No file path for local storage, skipping", file=sys.stderr) - else: - # For other storage types or unknown sources, avoid writing sidecars to reduce clutter - # (local/hydrus are handled above). - ctx.emit(res) - continue - - # If title changed, refresh the cached result table so the display reflects the new name - final_title = _extract_title_tag(final_tags) if final_title and (not original_title or final_title.lower() != original_title.lower()): - _refresh_result_table_title(final_title, hydrus_hash or file_hash, file_hash, file_path) + _refresh_result_table_title(final_title, resolved_hash, raw_path) - # If tags changed, refresh tag view via get-tag (prefer current subject; fall back to hash refresh) - if new_tags_added or removed_tags: - _refresh_tags_view(res, hydrus_hash, file_hash, file_path, config) + if changed: + _refresh_tags_view(res, resolved_hash, str(store_name), raw_path, config) - # Emit the modified result ctx.emit(res) - log(f"[add_tags] Added {total_new_tags} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)", file=sys.stderr) + log( + f"[add_tags] Added {total_added} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)", + file=sys.stderr, + ) return 0 diff --git a/cmdlets/add_url.py b/cmdlets/add_url.py index a48a517..2254d6e 100644 --- a/cmdlets/add_url.py +++ b/cmdlets/add_url.py @@ -6,8 +6,8 @@ import sys from . import register import pipeline as ctx from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash -from helper.logger import log -from helper.store import FileStorage +from SYS.logger import log +from Store import Store class Add_Url(Cmdlet): @@ -54,19 +54,19 @@ class Add_Url(Cmdlet): return 1 # Parse url (comma-separated) - url = [u.strip() for u in str(url_arg).split(',') if u.strip()] - if not url: + urls = [u.strip() for u in str(url_arg).split(',') if u.strip()] + if not urls: log("Error: No valid url provided") return 1 # Get backend and add url try: - storage = FileStorage(config) + storage = Store(config) backend = storage[store_name] - - for url in url: - backend.add_url(file_hash, url) - ctx.emit(f"Added URL: {url}") + + backend.add_url(file_hash, urls) + for u in urls: + ctx.emit(f"Added URL: {u}") return 0 diff --git a/helper/cmdlet_catalog.py b/cmdlets/catalog.py similarity index 99% rename from helper/cmdlet_catalog.py rename to cmdlets/catalog.py index c45e759..46290f2 100644 --- a/helper/cmdlet_catalog.py +++ b/cmdlets/catalog.py @@ -81,7 +81,6 @@ def get_cmdlet_metadata(cmd_name: str) -> Optional[Dict[str, Any]]: mod = import_cmd_module(normalized) data = getattr(mod, "CMDLET", None) if mod else None - # Fallback: resolve via registered function's module (covers aliases) if data is None: try: reg_fn = (REGISTRY or {}).get(cmd_name.replace('_', '-').lower()) diff --git a/cmdlets/check_file_status.py b/cmdlets/check_file_status.py index 46c975f..9328436 100644 --- a/cmdlets/check_file_status.py +++ b/cmdlets/check_file_status.py @@ -4,10 +4,10 @@ from typing import Any, Dict, Sequence import json import sys -from helper.logger import log +from SYS.logger import log from . import register -from helper import hydrus as hydrus_wrapper +from API import HydrusNetwork as hydrus_wrapper from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, should_show_help diff --git a/cmdlets/cleanup.py b/cmdlets/cleanup.py index 43ba924..bcc684d 100644 --- a/cmdlets/cleanup.py +++ b/cmdlets/cleanup.py @@ -11,7 +11,7 @@ from pathlib import Path import sys import json -from helper.logger import log +from SYS.logger import log from . import register from ._shared import Cmdlet, CmdletArg, get_pipe_object_path, normalize_result_input, filter_results_by_temp, should_show_help diff --git a/cmdlets/delete_file.py b/cmdlets/delete_file.py index 012eae9..df31369 100644 --- a/cmdlets/delete_file.py +++ b/cmdlets/delete_file.py @@ -5,10 +5,10 @@ from typing import Any, Dict, Sequence import sys from pathlib import Path -from helper.logger import debug, log -from helper.store import Folder -from ._shared import Cmdlet, CmdletArg, normalize_hash, looks_like_hash, get_origin, get_field, should_show_help -from helper import hydrus as hydrus_wrapper +from SYS.logger import debug, log +from Store.Folder import Folder +from ._shared import Cmdlet, CmdletArg, normalize_hash, looks_like_hash, get_field, should_show_help +from API import HydrusNetwork as hydrus_wrapper import pipeline as ctx @@ -48,17 +48,17 @@ class Delete_File(Cmdlet): hash_hex_raw = get_field(item, "hash_hex") or get_field(item, "hash") target = get_field(item, "target") or get_field(item, "file_path") or get_field(item, "path") - origin = get_origin(item) - - # Also check the store field explicitly from PipeObject store = None if isinstance(item, dict): store = item.get("store") else: store = get_field(item, "store") + + store_lower = str(store).lower() if store else "" + is_hydrus_store = bool(store_lower) and ("hydrus" in store_lower or store_lower in {"home", "work"}) # For Hydrus files, the target IS the hash - if origin and origin.lower() == "hydrus" and not hash_hex_raw: + if is_hydrus_store and not hash_hex_raw: hash_hex_raw = target hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(hash_hex_raw) @@ -72,7 +72,7 @@ class Delete_File(Cmdlet): # If lib_root is provided and this is from a folder store, use the Folder class if lib_root: try: - folder = Folder(Path(lib_root), name=origin or "local") + folder = Folder(Path(lib_root), name=store or "local") if folder.delete_file(str(path)): local_deleted = True ctx.emit(f"Removed file: {path.name}") @@ -109,17 +109,7 @@ class Delete_File(Cmdlet): pass hydrus_deleted = False - # Only attempt Hydrus deletion if store is explicitly Hydrus-related - # Check both origin and store fields to determine if this is a Hydrus file - - should_try_hydrus = False - - # Check if store indicates this is a Hydrus backend - if store and ("hydrus" in store.lower() or store.lower() == "home" or store.lower() == "work"): - should_try_hydrus = True - # Fallback to origin check if store not available - elif origin and origin.lower() == "hydrus": - should_try_hydrus = True + should_try_hydrus = is_hydrus_store # If conserve is set to hydrus, definitely don't delete if conserve == "hydrus": diff --git a/cmdlets/delete_note.py b/cmdlets/delete_note.py index f7346cb..c0ef7a6 100644 --- a/cmdlets/delete_note.py +++ b/cmdlets/delete_note.py @@ -4,9 +4,9 @@ from typing import Any, Dict, Sequence import json import pipeline as ctx -from helper import hydrus as hydrus_wrapper +from API import HydrusNetwork as hydrus_wrapper from ._shared import Cmdlet, CmdletArg, normalize_hash, get_hash_for_operation, fetch_hydrus_metadata, should_show_help, get_field -from helper.logger import log +from SYS.logger import log CMDLET = Cmdlet( name="delete-note", diff --git a/cmdlets/delete_relationship.py b/cmdlets/delete_relationship.py index 3ff270b..fb93078 100644 --- a/cmdlets/delete_relationship.py +++ b/cmdlets/delete_relationship.py @@ -7,11 +7,11 @@ import json from pathlib import Path import sys -from helper.logger import log +from SYS.logger import log import pipeline as ctx from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, get_field -from helper.folder_store import LocalLibrarySearchOptimizer +from API.folder import LocalLibrarySearchOptimizer from config import get_local_storage_path diff --git a/cmdlets/delete_tag.py b/cmdlets/delete_tag.py index 50c6c11..edaef54 100644 --- a/cmdlets/delete_tag.py +++ b/cmdlets/delete_tag.py @@ -8,12 +8,12 @@ import sys from . import register import models import pipeline as ctx -from helper import hydrus as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, fetch_hydrus_metadata, should_show_help, get_field -from helper.logger import debug, log +from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, should_show_help, get_field +from SYS.logger import debug, log +from Store import Store -def _refresh_tag_view_if_current(hash_hex: str | None, file_path: str | None, config: Dict[str, Any]) -> None: +def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None, path: str | None, config: Dict[str, Any]) -> None: """If the current subject matches the target, refresh tags via get-tag.""" try: from cmdlets import get_tag as get_tag_cmd # type: ignore @@ -28,17 +28,17 @@ def _refresh_tag_view_if_current(hash_hex: str | None, file_path: str | None, co def norm(val: Any) -> str: return str(val).lower() - target_hash = norm(hash_hex) if hash_hex else None - target_path = norm(file_path) if file_path else None + target_hash = norm(file_hash) if file_hash else None + target_path = norm(path) if path else None subj_hashes: list[str] = [] subj_paths: list[str] = [] if isinstance(subject, dict): - subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v] - subj_paths = [norm(v) for v in [subject.get("file_path"), subject.get("path"), subject.get("target")] if v] + subj_hashes = [norm(v) for v in [subject.get("hash")] if v] + subj_paths = [norm(v) for v in [subject.get("path"), subject.get("target")] if v] else: - subj_hashes = [norm(get_field(subject, f)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if get_field(subject, f)] - subj_paths = [norm(get_field(subject, f)) for f in ("file_path", "path", "target") if get_field(subject, f)] + subj_hashes = [norm(get_field(subject, f)) for f in ("hash",) if get_field(subject, f)] + subj_paths = [norm(get_field(subject, f)) for f in ("path", "target") if get_field(subject, f)] is_match = False if target_hash and target_hash in subj_hashes: @@ -49,20 +49,20 @@ def _refresh_tag_view_if_current(hash_hex: str | None, file_path: str | None, co return refresh_args: list[str] = [] - if hash_hex: - refresh_args.extend(["-hash", hash_hex]) + if file_hash: + refresh_args.extend(["-hash", file_hash]) get_tag_cmd._run(subject, refresh_args, config) except Exception: pass CMDLET = Cmdlet( - name="delete-tags", - summary="Remove tags from a Hydrus file.", - usage="del-tags [-hash ] [,...]", - alias=["del-tag", "del-tags", "delete-tag"], + name="delete-tag", + summary="Remove tags from a file in a store.", + usage="delete-tag -store [-hash ] [,...]", arg=[ SharedArgs.HASH, + SharedArgs.STORE, CmdletArg("[,...]", required=True, description="One or more tags to remove. Comma- or space-separated."), ], detail=[ @@ -71,7 +71,7 @@ CMDLET = Cmdlet( ], ) -@register(["del-tag", "del-tags", "delete-tag", "delete-tags"]) # Still needed for backward compatibility +@register(["delete-tag"]) def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Help if should_show_help(args): @@ -94,6 +94,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Parse -hash override and collect tags from remaining args override_hash: str | None = None + override_store: str | None = None rest: list[str] = [] i = 0 while i < len(args): @@ -103,6 +104,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: override_hash = str(args[i + 1]).strip() i += 2 continue + if low in {"-store", "--store", "store"} and i + 1 < len(args): + override_store = str(args[i + 1]).strip() + i += 2 + continue rest.append(a) i += 1 @@ -110,7 +115,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # @5 or @{2,5,8} to delete tags from ResultTable by index tags_from_at_syntax = [] hash_from_at_syntax = None - file_path_from_at_syntax = None + path_from_at_syntax = None + store_from_at_syntax = None if rest and str(rest[0]).startswith("@"): selector_arg = str(rest[0]) @@ -142,9 +148,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: tags_from_at_syntax.append(tag_name) # Also get hash from first item for consistency if not hash_from_at_syntax: - hash_from_at_syntax = get_field(item, 'hash_hex') - if not file_path_from_at_syntax: - file_path_from_at_syntax = get_field(item, 'file_path') + hash_from_at_syntax = get_field(item, 'hash') + if not path_from_at_syntax: + path_from_at_syntax = get_field(item, 'path') + if not store_from_at_syntax: + store_from_at_syntax = get_field(item, 'store') if not tags_from_at_syntax: log(f"No tags found at indices: {indices}") @@ -201,10 +209,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # This preserves the existing logic for @ selection. tags = tags_from_at_syntax - hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(hash_from_at_syntax) - file_path = file_path_from_at_syntax + file_hash = normalize_hash(override_hash) if override_hash else normalize_hash(hash_from_at_syntax) + path = path_from_at_syntax + store_name = override_store or store_from_at_syntax - if _process_deletion(tags, hash_hex, file_path, config): + if _process_deletion(tags, file_hash, path, store_name, config): success_count += 1 else: @@ -216,13 +225,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: for item in items_to_process: tags_to_delete = [] - item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash_hex")) + item_hash = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(item, "hash")) item_path = ( get_field(item, "path") - or get_field(item, "file_path") or get_field(item, "target") ) - item_source = get_field(item, "source") + item_store = override_store or get_field(item, "store") if hasattr(item, '__class__') and item.__class__.__name__ == 'TagItem': # It's a TagItem @@ -248,66 +256,43 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # but inside the loop we might have mixed items? Unlikely. continue - if tags_to_delete and (item_hash or item_path): - if _process_deletion(tags_to_delete, item_hash, item_path, config, source=item_source): + if tags_to_delete: + if _process_deletion(tags_to_delete, item_hash, item_path, item_store, config): success_count += 1 if success_count > 0: return 0 return 1 -def _process_deletion(tags: list[str], hash_hex: str | None, file_path: str | None, config: Dict[str, Any], source: str | None = None) -> bool: +def _process_deletion(tags: list[str], file_hash: str | None, path: str | None, store_name: str | None, config: Dict[str, Any]) -> bool: """Helper to execute the deletion logic for a single target.""" if not tags: return False + if not store_name: + log("Store is required (use -store or pipe a result with store)", file=sys.stderr) + return False + + resolved_hash = normalize_hash(file_hash) if file_hash else None + if not resolved_hash and path: + try: + from SYS.utils import sha256_file + resolved_hash = sha256_file(Path(path)) + except Exception: + resolved_hash = None + + if not resolved_hash: + log("Item does not include a usable hash (and hash could not be derived from path)", file=sys.stderr) + return False + def _fetch_existing_tags() -> list[str]: - existing: list[str] = [] - # Prefer local DB when we have a path and not explicitly hydrus - if file_path and (source == "local" or (source != "hydrus" and not hash_hex)): - try: - from helper.folder_store import FolderDB - from config import get_local_storage_path - path_obj = Path(file_path) - local_root = get_local_storage_path(config) or path_obj.parent - with FolderDB(local_root) as db: - file_hash = db.get_file_hash(path_obj) - existing = db.get_tags(file_hash) if file_hash else [] - except Exception: - existing = [] - elif hash_hex: - meta, _ = fetch_hydrus_metadata( - config, hash_hex, - include_service_keys_to_tags=True, - include_file_url=False, - ) - if isinstance(meta, dict): - tags_payload = meta.get("tags") - if isinstance(tags_payload, dict): - seen: set[str] = set() - for svc_data in tags_payload.values(): - if not isinstance(svc_data, dict): - continue - display = svc_data.get("display_tags") - if isinstance(display, list): - for t in display: - if isinstance(t, (str, bytes)): - val = str(t).strip() - if val and val not in seen: - seen.add(val) - existing.append(val) - storage = svc_data.get("storage_tags") - if isinstance(storage, dict): - current_list = storage.get("0") or storage.get(0) - if isinstance(current_list, list): - for t in current_list: - if isinstance(t, (str, bytes)): - val = str(t).strip() - if val and val not in seen: - seen.add(val) - existing.append(val) - return existing + try: + backend = Store(config)[store_name] + existing, _src = backend.get_tag(resolved_hash, config=config) + return list(existing or []) + except Exception: + return [] # Safety: only block if this deletion would remove the final title tag title_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("title:")] @@ -320,61 +305,17 @@ def _process_deletion(tags: list[str], hash_hex: str | None, file_path: str | No log("Cannot delete the last title: tag. Add a replacement title first (add-tag \"title:new title\").", file=sys.stderr) return False - if not hash_hex and not file_path: - log("Item does not include a hash or file path") - return False - - # Handle local file tag deletion - if file_path and (source == "local" or (not hash_hex and source != "hydrus")): - try: - from helper.folder_store import FolderDB - from pathlib import Path - - path_obj = Path(file_path) - if not path_obj.exists(): - log(f"File not found: {file_path}") - return False - - # Try to get local storage path from config - from config import get_local_storage_path - local_root = get_local_storage_path(config) - - if not local_root: - # Fallback: assume file is in a library root or use its parent - local_root = path_obj.parent - - with FolderDB(local_root) as db: - db.remove_tags(path_obj, tags) - debug(f"Removed {len(tags)} tag(s) from {path_obj.name} (local)") - _refresh_tag_view_if_current(hash_hex, file_path, config) - return True - - except Exception as exc: - log(f"Failed to remove local tags: {exc}") - return False - - # Hydrus deletion logic - if not hash_hex: - return False - try: - service_name = hydrus_wrapper.get_tag_service_name(config) - client = hydrus_wrapper.get_client(config) - - if client is None: - log("Hydrus client unavailable") - return False - - debug(f"Sending deletion request: hash={hash_hex}, tags={tags}, service={service_name}") - client.delete_tags(hash_hex, tags, service_name) - - preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '') - debug(f"Removed {len(tags)} tag(s) from {preview} via '{service_name}'.") - _refresh_tag_view_if_current(hash_hex, None, config) - return True - + backend = Store(config)[store_name] + ok = backend.delete_tag(resolved_hash, list(tags), config=config) + if ok: + preview = resolved_hash[:12] + ('…' if len(resolved_hash) > 12 else '') + debug(f"Removed {len(tags)} tag(s) from {preview} via store '{store_name}'.") + _refresh_tag_view_if_current(resolved_hash, store_name, path, config) + return True + return False except Exception as exc: - log(f"Hydrus del-tag failed: {exc}") + log(f"del-tag failed: {exc}") return False diff --git a/cmdlets/delete_url.py b/cmdlets/delete_url.py index 751b233..479ac2b 100644 --- a/cmdlets/delete_url.py +++ b/cmdlets/delete_url.py @@ -6,8 +6,8 @@ import sys from . import register import pipeline as ctx from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash -from helper.logger import log -from helper.store import FileStorage +from SYS.logger import log +from Store import Store class Delete_Url(Cmdlet): @@ -54,19 +54,19 @@ class Delete_Url(Cmdlet): return 1 # Parse url (comma-separated) - url = [u.strip() for u in str(url_arg).split(',') if u.strip()] - if not url: + urls = [u.strip() for u in str(url_arg).split(',') if u.strip()] + if not urls: log("Error: No valid url provided") return 1 # Get backend and delete url try: - storage = FileStorage(config) + storage = Store(config) backend = storage[store_name] - - for url in url: - backend.delete_url(file_hash, url) - ctx.emit(f"Deleted URL: {url}") + + backend.delete_url(file_hash, urls) + for u in urls: + ctx.emit(f"Deleted URL: {u}") return 0 diff --git a/cmdlets/download_file.py b/cmdlets/download_file.py index 663ebfd..4de636a 100644 --- a/cmdlets/download_file.py +++ b/cmdlets/download_file.py @@ -15,8 +15,8 @@ import sys from pathlib import Path from typing import Any, Dict, List, Optional, Sequence -from helper.download import DownloadError, _download_direct_file -from helper.logger import log, debug +from SYS.download import DownloadError, _download_direct_file +from SYS.logger import log, debug from models import DownloadOptions import pipeline as pipeline_context @@ -168,19 +168,16 @@ class Download_File(Cmdlet): # Build tags with title for searchability tags = [f"title:{title}"] - # Prefer canonical fields while keeping legacy keys for compatibility + # Canonical pipeline payload (no legacy aliases) return { "path": str(media_path), "hash": hash_value, - "file_hash": hash_value, "title": title, "file_title": title, "action": "cmdlet:download-file", "download_mode": "file", "url": url or (download_result.get('url') if isinstance(download_result, dict) else None), - "url": [url] if url else [], "store": "local", - "storage_source": "downloads", "media_kind": "file", "tags": tags, } diff --git a/cmdlets/download_media.py b/cmdlets/download_media.py index c736650..ed9cae8 100644 --- a/cmdlets/download_media.py +++ b/cmdlets/download_media.py @@ -29,9 +29,9 @@ from typing import Any, Dict, Iterator, List, Optional import httpx -from helper.logger import log, debug -from helper.utils import ensure_directory, sha256_file -from helper.http_client import HTTPClient +from SYS.logger import log, debug +from SYS.utils import ensure_directory, sha256_file +from API.HTTP import HTTPClient from models import DownloadError, DownloadOptions, DownloadMediaResult, DebugLogger, ProgressBar import pipeline as pipeline_context from result_table import ResultTable @@ -1199,7 +1199,7 @@ class Download_Media(Cmdlet): # Build format dict for emission and table format_dict = { - "origin": "download-media", + "table": "download-media", "title": f"Format {format_id}", "url": url, "target": url, diff --git a/cmdlets/download_provider.py b/cmdlets/download_provider.py new file mode 100644 index 0000000..21db6c5 --- /dev/null +++ b/cmdlets/download_provider.py @@ -0,0 +1,157 @@ +"""download-provider cmdlet: Download items from external providers.""" +from __future__ import annotations + +from typing import Any, Dict, Sequence, List, Optional +from pathlib import Path +import sys +import json + +from SYS.logger import log, debug +from Provider.registry import get_search_provider, SearchResult +from SYS.utils import unique_path + +from ._shared import Cmdlet, CmdletArg, should_show_help, get_field, coerce_to_pipe_object +import pipeline as ctx + +# Optional dependencies +try: + from config import get_local_storage_path, resolve_output_dir +except Exception: # pragma: no cover + get_local_storage_path = None # type: ignore + resolve_output_dir = None # type: ignore + +class Download_Provider(Cmdlet): + """Download items from external providers.""" + + def __init__(self): + super().__init__( + name="download-provider", + summary="Download items from external providers (soulseek, libgen, etc).", + usage="download-provider [item] [-output DIR]", + arg=[ + CmdletArg("output", type="string", alias="o", description="Output directory"), + ], + detail=[ + "Download items from external providers.", + "Usually called automatically by @N selection on provider results.", + "Can be used manually by piping a provider result item.", + ], + exec=self.run + ) + self.register() + + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Execute download-provider cmdlet.""" + if should_show_help(args): + ctx.emit(self.__dict__) + return 0 + + # Parse arguments + output_dir_arg = None + i = 0 + while i < len(args): + arg = args[i] + if arg in ("-output", "--output", "-o") and i + 1 < len(args): + output_dir_arg = args[i+1] + i += 2 + else: + i += 1 + + # Determine output directory + if output_dir_arg: + output_dir = Path(output_dir_arg) + elif resolve_output_dir: + output_dir = resolve_output_dir(config) + else: + output_dir = Path("./downloads") + + output_dir.mkdir(parents=True, exist_ok=True) + + # Process input result + items = [] + if isinstance(result, list): + items = result + elif result: + items = [result] + + if not items: + log("No items to download", file=sys.stderr) + return 1 + + success_count = 0 + + for item in items: + try: + # Extract provider info + table = get_field(item, "table") + if not table: + log(f"Skipping item without provider info: {item}", file=sys.stderr) + continue + + provider = get_search_provider(table, config) + if not provider: + log(f"Provider '{table}' not available for download", file=sys.stderr) + continue + + # Reconstruct SearchResult if needed + # The provider.download method expects a SearchResult object or compatible dict + if isinstance(item, dict): + # Ensure full_metadata is present + if "full_metadata" not in item and "extra" in item: + item["full_metadata"] = item["extra"].get("full_metadata", {}) + + search_result = SearchResult( + table=table, + title=item.get("title", "Unknown"), + path=item.get("path", ""), + full_metadata=item.get("full_metadata", {}) + ) + else: + # Assume it's an object with attributes (like PipeObject) + full_metadata = getattr(item, "full_metadata", {}) + # Check extra dict if full_metadata is missing/empty + if not full_metadata and hasattr(item, "extra") and isinstance(item.extra, dict): + full_metadata = item.extra.get("full_metadata", {}) + # Fallback: if full_metadata key isn't there, maybe the extra dict IS the metadata + if not full_metadata and "username" in item.extra: + full_metadata = item.extra + + search_result = SearchResult( + table=table, + title=getattr(item, "title", "Unknown"), + path=getattr(item, "path", ""), + full_metadata=full_metadata + ) + + debug(f"[download-provider] Downloading '{search_result.title}' via {table}...") + downloaded_path = provider.download(search_result, output_dir) + + if downloaded_path: + debug(f"[download-provider] Download successful: {downloaded_path}") + + # Create PipeObject for the downloaded file + pipe_obj = coerce_to_pipe_object({ + "path": str(downloaded_path), + "title": search_result.title, + "table": "local", # Now it's a local file + "media_kind": getattr(item, "media_kind", "other"), + "tags": getattr(item, "tags", []), + "full_metadata": search_result.full_metadata + }) + + ctx.emit(pipe_obj) + success_count += 1 + else: + log(f"Download failed for '{search_result.title}'", file=sys.stderr) + + except Exception as e: + log(f"Error downloading item: {e}", file=sys.stderr) + import traceback + debug(traceback.format_exc()) + + if success_count > 0: + return 0 + return 1 + +# Register cmdlet instance +Download_Provider_Instance = Download_Provider() diff --git a/cmdlets/download_torrent.py b/cmdlets/download_torrent.py index 2293d29..85baa1a 100644 --- a/cmdlets/download_torrent.py +++ b/cmdlets/download_torrent.py @@ -14,7 +14,7 @@ import threading from pathlib import Path from typing import Any, Dict, Optional, Sequence -from helper.logger import log +from SYS.logger import log from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args class Download_Torrent(Cmdlet): @@ -66,7 +66,7 @@ class Download_Torrent(Cmdlet): worker_manager: Optional[Any] = None, ) -> None: try: - from helper.alldebrid import AllDebridClient + from API.alldebrid import AllDebridClient client = AllDebridClient(api_key) log(f"[Worker {worker_id}] Submitting magnet to AllDebrid...") magnet_info = client.magnet_add(magnet_url) diff --git a/cmdlets/get_file.py b/cmdlets/get_file.py index f36f01a..a1de5e0 100644 --- a/cmdlets/get_file.py +++ b/cmdlets/get_file.py @@ -8,8 +8,8 @@ import shutil from . import register import pipeline as ctx from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash -from helper.logger import log, debug -from helper.store import FileStorage +from SYS.logger import log, debug +from Store import Store from config import resolve_output_dir @@ -68,8 +68,8 @@ class Get_File(Cmdlet): debug(f"[get-file] Getting storage backend: {store_name}") # Get storage backend - storage = FileStorage(config) - backend = storage[store_name] + store = Store(config) + backend = store[store_name] debug(f"[get-file] Backend retrieved: {type(backend).__name__}") # Get file metadata to determine name and extension diff --git a/cmdlets/get_file.py.backup b/cmdlets/get_file.py.backup deleted file mode 100644 index f2c8cfe..0000000 --- a/cmdlets/get_file.py.backup +++ /dev/null @@ -1,1708 +0,0 @@ -from __future__ import annotations - -from typing import Any, Callable, Dict, List, Optional, Sequence -from pathlib import Path -import shutil as _shutil -import subprocess as _subprocess -import json -import sys -import platform - -import threading - -from helper.logger import log, debug -import uuid as _uuid -import time as _time - -from helper.progress import print_progress, print_final_progress -from helper.http_client import HTTPClient -from helper.mpv_ipc import get_ipc_pipe_path, send_to_mpv, MPV_LUA_SCRIPT_PATH -import fnmatch as _fnmatch - -from . import register -import models -import pipeline as ctx -from helper import hydrus as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, normalize_hash, looks_like_hash, create_pipe_object_result, get_origin, get_field, should_show_help -from config import resolve_output_dir, get_hydrus_url, get_hydrus_access_key -from helper.alldebrid import AllDebridClient - -DEFAULT_DEBRID_WAIT_TIMEOUT = 600 -DEBRID_WORKER_PREFIX = "debrid_" - - - - - -def _is_alldebrid_pipe_data(line: str) -> bool: - """Check if line is AllDebrid pipe format: ID|filename|size|...""" - parts = line.strip().split('|') - if len(parts) < 5: - return False - try: - # Check if first part is magnet ID (integer) - magnet_id = int(parts[0]) - # Check if 3rd part (size) is integer - size = int(parts[2]) - # Check if 4th part (status_code) is integer - status_code = int(parts[3]) - return magnet_id > 0 and size >= 0 and status_code in {0, 1, 2, 3, 4} - except (ValueError, IndexError): - return False - - -def _handle_alldebrid_pipe(config: Dict[str, Any], args: Sequence[str]) -> int: - """Handle AllDebrid magnet downloads from piped stdin.""" - # Parse arguments - out_path = None - file_filter = None - i = 0 - while i < len(args): - if args[i].lower() in {"-path", "--path", "path"} and i + 1 < len(args): - out_path = Path(args[i + 1]).expanduser() - i += 2 - elif args[i].lower() in {"-file", "--file", "file"} and i + 1 < len(args): - file_filter = args[i + 1] - i += 2 - else: - i += 1 - - if not out_path: - log("✗ -path required for AllDebrid downloads", file=sys.stderr) - return 1 - - # Read magnet IDs from stdin - magnets = [] - try: - for line in sys.stdin: - line = line.strip() - if line and _is_alldebrid_pipe_data(line): - parts = line.split('|') - magnet_id = int(parts[0]) - magnets.append(magnet_id) - except Exception as e: - log(f"✗ Error reading stdin: {e}", file=sys.stderr) - return 1 - - if not magnets: - log("✗ No valid magnet IDs in pipe", file=sys.stderr) - return 1 - - return _queue_alldebrid_worker( - config=config, - output_dir=out_path, - magnet_ids=magnets, - title=f"AllDebrid pipe ({len(magnets)} magnet{'s' if len(magnets) != 1 else ''})", - file_filter=file_filter, - ) - - -def _extract_files_from_magnet(magnet_info: Dict[str, Any], filter_pattern: Optional[str] = None) -> list: - """Extract files from magnet file tree, optionally filtering by pattern.""" - files = [] - - def traverse(items: Any, prefix: str = "") -> None: - if not isinstance(items, list): - return - for item in items: - if not isinstance(item, dict): - continue - name = item.get('n', '') - link = item.get('l', '') - size = item.get('s', 0) - entries = item.get('e', []) - - # File - if link: - full_path = f"{prefix}/{name}" if prefix else name - if filter_pattern is None or _fnmatch.fnmatch(name.lower(), filter_pattern.lower()): - files.append({'name': name, 'path': full_path, 'size': size, 'link': link}) - - # Folder - if entries: - full_path = f"{prefix}/{name}" if prefix else name - traverse(entries, full_path) - - items = magnet_info.get('files', []) - traverse(items) - return files - - -def _download_file_from_alldebrid(url: str, output_path: Path, filename: str, file_size: int) -> bool: - """Download a single file from AllDebrid with progress bar.""" - output_path.parent.mkdir(parents=True, exist_ok=True) - - try: - downloaded = 0 - chunk_size = 1024 * 1024 - start_time = _time.time() - last_update = start_time - - with HTTPClient(timeout=30.0, headers={'User-Agent': 'downlow/1.0'}) as client: - response = client.get(url) - response.raise_for_status() - with open(output_path, 'wb', buffering=1024*1024) as f: - for chunk in response.iter_bytes(chunk_size): - if not chunk: - break - f.write(chunk) - downloaded += len(chunk) - - # Update progress every 0.5 seconds to avoid spam - now = _time.time() - if now - last_update >= 0.5 or downloaded == file_size: - elapsed = now - start_time - speed = downloaded / elapsed if elapsed > 0 else 0 - print_progress(filename, downloaded, file_size, speed) - last_update = now - - # Print final progress line - elapsed = _time.time() - start_time - print_final_progress(filename, file_size, elapsed) - log(f"✓ {filename} downloaded", file=sys.stderr) - - return True - except Exception as e: - log(f"\n[get-file] ✗ Download error: {e}", file=sys.stderr) - return False - - -def _queue_alldebrid_worker( - config: Dict[str, Any], - output_dir: Path, - magnet_ids: Sequence[int], - title: str, - file_filter: Optional[str] = None, - wait_timeout: int = DEFAULT_DEBRID_WAIT_TIMEOUT, -): - """Spawn a background worker to download AllDebrid magnets.""" - from config import get_debrid_api_key - - if not magnet_ids: - log("✗ No magnet IDs provided for AllDebrid download", file=sys.stderr) - return 1 - - api_key = get_debrid_api_key(config) - if not api_key: - log("✗ AllDebrid API key not configured", file=sys.stderr) - return 1 - - worker_id = f"{DEBRID_WORKER_PREFIX}{_uuid.uuid4().hex[:8]}" - worker_manager = config.get('_worker_manager') - if worker_manager: - try: - worker_manager.track_worker( - worker_id, - worker_type="download_debrid", - title=title, - description=f"AllDebrid download for {title}", - pipe=ctx.get_current_command_text(), - ) - except Exception as exc: - debug(f"⚠ Failed to register AllDebrid worker: {exc}") - worker_manager = None - - thread = threading.Thread( - target=_run_alldebrid_download_worker, - args=( - worker_id, - api_key, - output_dir, - list(magnet_ids), - file_filter, - title, - worker_manager, - wait_timeout, - ), - daemon=False, - name=f"AllDebridWorker_{worker_id}" - ) - thread.start() - - ctx.emit({ - 'worker_id': worker_id, - 'worker_type': 'download_debrid', - 'status': 'running', - 'message': f"{title} (queued)", - }) - - log(f"🌀 AllDebrid download queued (worker {worker_id})", file=sys.stderr) - return 0 - - -def _run_alldebrid_download_worker( - worker_id: str, - api_key: str, - output_dir: Path, - magnet_ids: List[int], - file_filter: Optional[str], - title: str, - worker_manager: Optional[Any], - wait_timeout: int, -): - """Worker entrypoint that polls AllDebrid and downloads magnet files.""" - def log_progress(message: str) -> None: - safe = f"[Worker {worker_id}] {message}" - debug(safe) - if worker_manager: - try: - worker_manager.log_step(worker_id, message) - except Exception: - pass - - try: - client = AllDebridClient(api_key) - except Exception as exc: - log_progress(f"✗ Failed to initialize AllDebrid client: {exc}") - if worker_manager: - try: - worker_manager.finish_worker(worker_id, "failed", str(exc)) - except Exception: - pass - return - - output_dir.mkdir(parents=True, exist_ok=True) - total_downloaded = 0 - total_failed = 0 - - for magnet_id in magnet_ids: - log_progress(f"⧗ Processing magnet {magnet_id}") - try: - status_info = client.magnet_status(magnet_id) - except Exception as exc: - log_progress(f"✗ Failed to query magnet {magnet_id}: {exc}") - total_failed += 1 - continue - - try: - ready_status = _wait_for_magnet_ready(client, magnet_id, log_progress, wait_timeout) - except Exception as exc: - log_progress(f"✗ Magnet {magnet_id} did not become ready: {exc}") - total_failed += 1 - continue - - try: - magnet_info = client.magnet_status(magnet_id, include_files=True) - except Exception as exc: - log_progress(f"✗ Failed to list files for magnet {magnet_id}: {exc}") - total_failed += 1 - continue - - files_list = _extract_files_from_magnet(magnet_info, file_filter) - if not files_list: - log_progress(f"⊘ Magnet {magnet_id} has no files") - total_failed += 1 - continue - - for file_info in files_list: - name = file_info.get('name', 'unknown') - log_progress(f"⇓ Downloading {name}") - link = file_info.get('link') - if not link: - log_progress(f"✗ Missing link for {name}") - total_failed += 1 - continue - - try: - direct_url = client.unlock_link(link) - except Exception as exc: - log_progress(f"✗ Failed to unlock {name}: {exc}") - total_failed += 1 - continue - - output_file = output_dir / name - if _download_file_from_alldebrid(direct_url, output_file, name, file_info.get('size', 0)): - total_downloaded += 1 - else: - total_failed += 1 - - if total_downloaded or total_failed: - summary = f"{total_downloaded} file(s) downloaded, {total_failed} failed" - else: - summary = "No files were processed" - - log(f"✓ AllDebrid worker {worker_id}: {summary}", file=sys.stderr) - if worker_manager: - status = "success" if total_downloaded > 0 else "failed" - try: - worker_manager.finish_worker(worker_id, status, summary if status == "failed" else "") - except Exception: - pass - - -def _wait_for_magnet_ready( - client: AllDebridClient, - magnet_id: int, - log_progress: Callable[[str], None], - wait_timeout: int, -) -> Dict[str, Any]: - elapsed = 0 - last_report = -5 - while elapsed < wait_timeout: - try: - status = client.magnet_status(magnet_id) - except Exception as exc: - log_progress(f"⚠ Live status check failed: {exc}") - _time.sleep(2) - elapsed += 2 - continue - - status_code = int(status.get('statusCode', -1)) - if status_code == 4: - return status - if status_code >= 5: - raise RuntimeError(status.get('status', f"Failed code {status_code}")) - if elapsed - last_report >= 5: - downloaded = status.get('downloaded', 0) - size = status.get('size', 0) - percent = (downloaded / size * 100) if size else 0 - log_progress(f"⧗ {status.get('status', 'processing')} — {percent:.1f}%") - last_report = elapsed - _time.sleep(2) - elapsed += 2 - raise TimeoutError(f"Magnet {magnet_id} not ready after {wait_timeout}s") - - -def _is_playable_in_mpv(file_path_or_ext: str, mime_type: Optional[str] = None) -> bool: - """Check if file can be played in MPV based on extension or mime type.""" - from helper.utils_constant import mime_maps - - # Check mime type first if provided - if mime_type: - mime_lower = mime_type.lower() - # Simple prefix check for common media types - if any(mime_lower.startswith(prefix) for prefix in ['video/', 'audio/', 'image/']): - return True - - # Extract extension - if file_path_or_ext.startswith('.'): - ext = file_path_or_ext.lower() - else: - ext = Path(file_path_or_ext).suffix.lower() - - if not ext: - return False - - # Check if extension is in playable categories - playable_categories = ['video', 'audio', 'image', 'image_sequence'] - - for category in playable_categories: - if category in mime_maps: - for key, info in mime_maps[category].items(): - if info.get('ext', '').lower() == ext: - return True - return False - - -def _play_in_mpv(file_url: str, file_title: str, is_stream: bool = False, headers: Optional[Dict[str, str]] = None) -> bool: - """Play file in MPV using centralized IPC pipe, creating new instance if needed. - - Returns True on success, False on error. - """ - try: - # First try to send to existing MPV instance - if send_to_mpv(file_url, file_title, headers): - debug(f"Added to MPV: {file_title}") - return True - - # No existing MPV or pipe unavailable - start new instance - ipc_pipe = get_ipc_pipe_path() - debug(f"[get-file] Starting new MPV instance (pipe: {ipc_pipe})", file=sys.stderr) - - # Build command - start MPV without a file initially, just with IPC server and our Lua helper - cmd = ['mpv', f'--input-ipc-server={ipc_pipe}'] - try: - if MPV_LUA_SCRIPT_PATH and Path(MPV_LUA_SCRIPT_PATH).exists(): - cmd.append(f"--scripts-append={MPV_LUA_SCRIPT_PATH}") - except Exception: - pass - - if headers: - # Format headers for command line - # --http-header-fields="Header1: Val1,Header2: Val2" - header_str = ",".join([f"{k}: {v}" for k, v in headers.items()]) - cmd.append(f'--http-header-fields={header_str}') - - # Add --idle flag so MPV stays running and waits for playlist commands - cmd.append('--idle') - - # Detach process to prevent freezing parent CLI - kwargs = {} - if platform.system() == 'Windows': - kwargs['creationflags'] = 0x00000008 # DETACHED_PROCESS - - _subprocess.Popen(cmd, stdin=_subprocess.DEVNULL, stdout=_subprocess.DEVNULL, stderr=_subprocess.DEVNULL, **kwargs) - - debug(f"[get-file] Started MPV instance (IPC: {ipc_pipe})", file=sys.stderr) - - # Give MPV time to start and open IPC pipe - # Windows needs more time than Unix - wait_time = 1.0 if platform.system() == 'Windows' else 0.5 - debug(f"[get-file] Waiting {wait_time}s for MPV to initialize IPC...", file=sys.stderr) - _time.sleep(wait_time) - - # Try up to 3 times to send the file via IPC - for attempt in range(3): - debug(f"[get-file] Sending file via IPC (attempt {attempt + 1}/3)", file=sys.stderr) - if send_to_mpv(file_url, file_title, headers): - debug(f"{'Streaming' if is_stream else 'Playing'} in MPV: {file_title}") - debug(f"[get-file] Added to new MPV instance (IPC: {ipc_pipe})", file=sys.stderr) - return True - - if attempt < 2: - # Wait before retrying - _time.sleep(0.3) - - # IPC send failed after all retries - log("Error: Could not send file to MPV via IPC after startup", file=sys.stderr) - return False - - except FileNotFoundError: - log("Error: MPV not found. Install mpv to play media files", file=sys.stderr) - return False - except Exception as e: - log(f"Error launching MPV: {e}", file=sys.stderr) - return False - - -# Backward-compatible alias for modules expecting the old IPC helper name. -def _get_fixed_ipc_pipe() -> str: - """Return the shared MPV IPC pipe path (compat shim).""" - return get_ipc_pipe_path() - - -def _handle_search_result(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - """Handle a file from search-file results using FileStorage backend.""" - try: - from helper.store import FileStorage - - # Helper to get field from both dict and object - # Extract file information from ResultItem - storage_name = get_origin(result) - file_hash = get_field(result, 'hash_hex', None) - # Also check for file_hash field (from add-file and other cmdlets) - if not file_hash: - file_hash = get_field(result, 'file_hash', None) - file_title = get_field(result, 'title', 'file') - mime_type = get_field(result, 'mime', None) - file_path = get_field(result, 'target', None) - # Also check for 'file_path' field (from add-file and other cmdlets) - if not file_path: - file_path = get_field(result, 'file_path', None) - # Also check for 'path' field (from search-file and other cmdlets) - if not file_path: - file_path = get_field(result, 'path', None) - - full_metadata = get_field(result, 'full_metadata', {}) - magnet_id = full_metadata.get('magnet_id') if isinstance(full_metadata, dict) else None - - if not storage_name: - log("Error: No storage backend specified in result", file=sys.stderr) - return 1 - - debug(f"[get-file] Retrieving file from storage: {storage_name}", file=sys.stderr) - - # Handle different storage backends - if storage_name.lower() == 'hydrus': - return _handle_hydrus_file(file_hash, file_title, config, args, mime_type=mime_type) - elif storage_name.lower() == 'local': - return _handle_local_file(file_path, file_title, config, args, file_hash=file_hash) - elif storage_name.lower() == 'download': - # Downloads are local files - return _handle_local_file(file_path, file_title, config, args, file_hash=file_hash) - elif storage_name.lower() == 'debrid': - # Extract magnet_id from result (search-file stores it in full_metadata or as custom attribute) - if not magnet_id: - magnet_id = get_field(result, 'magnet_id', None) - if not magnet_id: - log("Error: No magnet ID in debrid result", file=sys.stderr) - return 1 - return _handle_debrid_file(magnet_id, file_title, config, args) - elif storage_name.lower() in {'bandcamp', 'youtube'}: - # Handle Bandcamp/YouTube via yt-dlp - url = get_field(result, 'target', None) - if not url: - # Try to find URL in other fields - url = get_field(result, 'url', None) - - if not url: - log(f"Error: No URL found for {storage_name} result", file=sys.stderr) - return 1 - - return _handle_ytdlp_download(url, file_title, config, args) - else: - log(f"Unknown storage backend: {storage_name}", file=sys.stderr) - return 1 - - except Exception as e: - log(f"Error processing search result: {e}", file=sys.stderr) - import traceback - traceback.print_exc(file=sys.stderr) - return 1 - - -def _handle_hydrus_file(file_hash: Optional[str], file_title: str, config: Dict[str, Any], args: Sequence[str], mime_type: Optional[str] = None) -> int: - """Handle file from Hydrus - auto-play in MPV if media file, otherwise open web URL.""" - if not file_hash: - log("Error: No file hash provided", file=sys.stderr) - return 1 - - try: - hydrus_url = get_hydrus_url(config) - access_key = get_hydrus_access_key(config) - - if not hydrus_url or not access_key: - log("Error: Hydrus not configured", file=sys.stderr) - return 1 - - # Check if it's a playable media file based on filename or mime type - is_media = _is_playable_in_mpv(file_title) - if not is_media and mime_type: - # Check mime type if filename check failed - if any(m in mime_type.lower() for m in ['video/', 'audio/', 'image/']): - is_media = True - - force_mpv = any(str(a).lower() in {'-mpv', '--mpv', 'mpv'} for a in args) - force_browser = any(str(a).lower() in {'-web', '--web', 'web', '-browser', '--browser'} for a in args) - - # Check MPV availability - from hydrus_health_check import check_mpv_availability - mpv_available, _ = check_mpv_availability() - - # Construct url for streaming/viewing - # For streaming, we use headers for auth, so we don't put the key in the URL - stream_url = f"{hydrus_url}/get_files/file?hash={file_hash}" - # For browser, we still need the key in the URL - web_url = f"{hydrus_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}" - - headers = { - "Hydrus-Client-API-Access-Key": access_key - } - - if force_browser: - # User explicitly wants browser - ipc_pipe = get_ipc_pipe_path() - result_dict = create_pipe_object_result( - source='hydrus', - identifier=file_hash, - file_path=web_url, - cmdlet_name='get-file', - title=file_title, - file_hash=file_hash, - extra={ - 'ipc': ipc_pipe, - 'action_type': 'browser', - 'web_url': web_url, - 'hydrus_url': hydrus_url, - 'access_key': access_key - } - ) - ctx.emit(result_dict) - try: - import webbrowser - webbrowser.open(web_url) - debug(f"[get-file] Opened in browser: {file_title}", file=sys.stderr) - except Exception: - pass - return 0 - elif force_mpv or (is_media and mpv_available): - # Auto-play in MPV for media files (if available), or user requested it - if _play_in_mpv(stream_url, file_title, is_stream=True, headers=headers): - # Show unified MPV playlist view (reuse cmdnats.pipe display) - try: - from cmdnats import pipe as mpv_pipe - mpv_pipe._run(None, [], config) - except Exception: - pass - return 0 - else: - # Fall back to browser - try: - import webbrowser - webbrowser.open(web_url) - debug(f"[get-file] Opened in browser instead", file=sys.stderr) - except Exception: - pass - return 0 - else: - # Not media, open in browser - ipc_pipe = get_ipc_pipe_path() - result_dict = create_pipe_object_result( - source='hydrus', - identifier=file_hash, - file_path=web_url, - cmdlet_name='get-file', - title=file_title, - file_hash=file_hash, - extra={ - 'ipc': ipc_pipe, - 'action_type': 'browser', - 'web_url': web_url, - 'hydrus_url': hydrus_url, - 'access_key': access_key - } - ) - ctx.emit(result_dict) - try: - import webbrowser - webbrowser.open(web_url) - debug(f"[get-file] Opened in browser: {file_title}", file=sys.stderr) - except Exception: - pass - return 0 - - except Exception as e: - log(f"Error handling Hydrus file: {e}", file=sys.stderr) - import traceback - traceback.print_exc(file=sys.stderr) - return 1 - - -def _handle_local_file(file_path: Optional[str], file_title: str, config: Dict[str, Any], args: Sequence[str], file_hash: Optional[str] = None) -> int: - """Handle file from local storage - auto-play in MPV if media, otherwise open with default app.""" - if not file_path: - log("Error: No file path provided", file=sys.stderr) - return 1 - - try: - source = Path(file_path) - if not source.exists(): - # Try to resolve by hash if the path looks like a hash - resolved_local = False - if looks_like_hash(str(file_path)): - try: - from config import get_local_storage_path - from helper.folder_store import FolderDB - storage_path = get_local_storage_path(config) - if storage_path: - with FolderDB(storage_path) as db: - resolved_path = db.search_hash(str(file_path)) - if resolved_path and resolved_path.exists(): - source = resolved_path - file_path = str(resolved_path) - resolved_local = True - # Also set file_hash since we know it - file_hash = str(file_path) - except Exception: - pass - - if not resolved_local: - log(f"Error: File not found: {file_path}", file=sys.stderr) - return 1 - - # Check for explicit user flags - force_mpv = any(str(a).lower() in {'-mpv', '--mpv', 'mpv'} for a in args) - force_default = any(str(a).lower() in {'-open', '--open', 'open'} for a in args) - - # Check if it's a playable media file - is_media = _is_playable_in_mpv(str(source)) - - # Check MPV availability - from hydrus_health_check import check_mpv_availability - mpv_available, _ = check_mpv_availability() - - if force_default: - # User explicitly wants default application - import subprocess as sp - import platform - import os - try: - if platform.system() == 'Darwin': # macOS - sp.run(['open', file_path]) - elif platform.system() == 'Windows': - os.startfile(file_path) - else: # Linux - sp.run(['xdg-open', file_path]) - ctx.emit(f"Opened: {file_title}") - debug(f"[get-file] Opened {file_title} with default app", file=sys.stderr) - return 0 - except Exception as e: - log(f"Error opening file: {e}", file=sys.stderr) - return 1 - elif force_mpv or (is_media and mpv_available): - # Auto-play in MPV for media files (if available), or user requested it - if _play_in_mpv(file_path, file_title, is_stream=False): - # Show unified MPV playlist view (reuse cmdnats.pipe display) - try: - from cmdnats import pipe as mpv_pipe - mpv_pipe._run(None, [], config) - except Exception: - pass - return 0 - else: - # Fall back to default application - try: - import os - import platform - if platform.system() == 'Darwin': # macOS - _subprocess.run(['open', file_path]) - elif platform.system() == 'Windows': - os.startfile(file_path) - else: # Linux - _subprocess.run(['xdg-open', file_path]) - debug(f"[get-file] Opened with default app instead", file=sys.stderr) - except Exception: - pass - return 0 - else: - # Not media - open with default application - import subprocess as sp - import platform - import os - try: - if platform.system() == 'Darwin': # macOS - sp.run(['open', file_path]) - elif platform.system() == 'Windows': - # Use os.startfile for more reliable Windows handling - os.startfile(file_path) - else: # Linux - sp.run(['xdg-open', file_path]) - print(f"Opened: {file_title}") - debug(f"[get-file] Opened {file_title} with default app", file=sys.stderr) - - # Emit result for downstream processing - result_dict = create_pipe_object_result( - source='local', - identifier=str(Path(file_path).stem) if file_path else 'unknown', - file_path=file_path, - cmdlet_name='get-file', - title=file_title, - file_hash=file_hash, - extra={'action_type': 'opened'} - ) - ctx.emit(result_dict) - return 0 - except Exception as e: - log(f"Error opening file with default app: {e}", file=sys.stderr) - return 1 - - except Exception as e: - log(f"Error handling local file: {e}", file=sys.stderr) - return 1 - - -def _handle_debrid_file(magnet_id: int, magnet_title: str, config: Dict[str, Any], args: Sequence[str]) -> int: - """Handle magnet file from AllDebrid storage - download to local path.""" - # Parse output path argument - out_path = None - i = 0 - args_list = [str(a) for a in args] - while i < len(args_list): - if args_list[i].lower() in {"-path", "--path", "path"} and i + 1 < len(args_list): - out_path = Path(args_list[i + 1]).expanduser() - i += 2 - else: - i += 1 - - if not out_path: - log("✗ -Path required for debrid downloads", file=sys.stderr) - return 1 - - # Ensure output directory exists - try: - out_path.mkdir(parents=True, exist_ok=True) - except Exception as e: - log(f"✗ Error creating output directory: {e}", file=sys.stderr) - return 1 - - return _queue_alldebrid_worker( - config=config, - output_dir=out_path, - magnet_ids=[magnet_id], - title=magnet_title or f"magnet {magnet_id}", - ) - - -@register(["get-file"]) # primary name -def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - # Help: if any help token is present, print CMDLET JSON and exit - if should_show_help(args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - - # Check if result is a list (from @N selection) and extract the first item - actual_result = result - if isinstance(result, list) and len(result) > 0: - actual_result = result[0] - - # Check if this is a FileStorage search result (has origin field indicating a backend) - # This handles both dict and ResultItem objects - origin = get_origin(actual_result) - if origin and origin.lower() in {'hydrus', 'local', 'debrid', 'alldebrid', 'bandcamp', 'youtube'}: - # This is a search result with explicit origin - handle it via _handle_search_result - return _handle_search_result(actual_result, args, config) - - # Handle ResultItem from search-file via @N selection - # The result can be either: - # 1. A single ResultItem (direct call) - # 2. A list of ResultItems (from @N selection in CLI) - result_item = None - if result and hasattr(result, '__class__'): - if result.__class__.__name__ == 'ResultItem': - result_item = result - elif isinstance(result, list) and len(result) > 0: - # @N selection creates a list, extract the first item if it's a ResultItem - if hasattr(result[0], '__class__') and result[0].__class__.__name__ == 'ResultItem': - result_item = result[0] - - if result_item: - return _handle_search_result(result_item, args, config) - - # Handle PipeObject results from previous get-file call (for chaining) - if result and isinstance(result, dict) and result.get('action', '').startswith('cmdlet:get-file'): - # This is from a previous get-file result - just pass it through - # Don't treat it as a new file to play, just emit for pipeline chaining - ctx.emit(result) - return 0 - - # Check for AllDebrid pipe input (from search-debrid) - # Try to read first line from stdin to detect format - first_line = None - try: - # Try to read one line without blocking - if hasattr(sys.stdin, 'readable') and sys.stdin.readable(): - first_line = sys.stdin.readline().strip() - except Exception: - pass - - if first_line and _is_alldebrid_pipe_data(first_line): - # This is AllDebrid pipe data - handle it separately - # Put the line back by creating a chain with the rest of stdin - import io - try: - remaining_stdin = sys.stdin.read() - except: - remaining_stdin = "" - sys.stdin = io.StringIO(first_line + '\n' + remaining_stdin) - return _handle_alldebrid_pipe(config, args) - elif first_line: - # Not AllDebrid data, put it back for normal processing - import io - try: - remaining_stdin = sys.stdin.read() - except: - remaining_stdin = "" - sys.stdin = io.StringIO(first_line + '\n' + remaining_stdin) - - # Helpers - def _sanitize_name(text: str) -> str: - allowed = [] - for ch in text: - allowed.append(ch if (ch.isalnum() or ch in {"-", "_", " ", "."}) else " ") - return (" ".join("".join(allowed).split()) or "export").strip() - - def _ffprobe_duration_seconds(path: Path) -> Optional[float]: - ffprobe_path = _shutil.which('ffprobe') - if not ffprobe_path: - return None - try: - res = _subprocess.run( - [ffprobe_path, '-v', 'error', '-show_entries', 'format=duration', '-of', 'default=noprint_wrappers=1:nokey=1', str(path)], - stdout=_subprocess.PIPE, - stderr=_subprocess.PIPE, - check=True, - text=True, - ) - out = (res.stdout or '').strip() - if not out: - return None - value = float(out) - return value if value > 0 else None - except Exception: - return None - - def _parse_args(tokens: Sequence[str]) -> tuple[Optional[Path], Optional[str], Optional[str], Optional[str], bool]: - out_override: Optional[Path] = None - size_spec: Optional[str] = None - convert_spec: Optional[str] = None - hash_spec: Optional[str] = None - export_metadata: bool = False - i = 0 - while i < len(tokens): - t = tokens[i] - low = t.lower() - if low in {"-path", "--path", "path"} and i + 1 < len(tokens): - try: - out_override = Path(tokens[i + 1]).expanduser() - except Exception: - out_override = None - i += 2 - continue - if low in {"size", "-size", "--size"} and i + 1 < len(tokens): - size_spec = tokens[i + 1] - i += 2 - continue - if low in {"convert", "-convert", "--convert"} and i + 1 < len(tokens): - convert_spec = tokens[i + 1] - i += 2 - continue - if low in {"-hash", "--hash", "hash"} and i + 1 < len(tokens): - hash_spec = tokens[i + 1] - i += 2 - continue - if low in {"-metadata", "--metadata", "metadata"}: - export_metadata = True - i += 1 - continue - i += 1 - return out_override, size_spec, convert_spec, hash_spec, export_metadata - - def _compute_target_bytes(size_spec: Optional[str], source_bytes: int) -> Optional[int]: - if not size_spec: - return None - text = str(size_spec).strip().lower() - if not text: - return None - if text.endswith('%'): - try: - pct = float(text[:-1]) - except ValueError: - return None - pct = max(0.0, min(100.0, pct)) - target = int(round(source_bytes * (pct / 100.0))) - else: - val = text - if val.endswith('mb'): - val = val[:-2] - elif val.endswith('m'): - val = val[:-1] - try: - mb = float(val) - except ValueError: - return None - target = int(round(mb * 1024 * 1024)) - min_bytes = 1 * 1024 * 1024 - if target <= 0: - target = min_bytes - return min(target, source_bytes) - - def _guess_kind_from_suffix(path: Path) -> str: - sfx = path.suffix.lower() - if sfx in {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'}: - return 'video' - if sfx in {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.mka'}: - return 'audio' - return 'other' - - def _extract_metadata_from_tags(tags_payload: Dict[str, Any], file_hash: str, input_kind: str = '') -> Dict[str, str]: - """Extract common metadata fields from Hydrus tags. - - Returns a dict mapping FFmpeg metadata keys to values. - Supports: title, artist, album, track, date, genre, etc. - - For audio files, applies sensible defaults: - - If no album, uses title as album - - If no track, defaults to 1 - - album_artist is set to artist value - """ - metadata = {} - - # Map of common tag namespaces to FFmpeg metadata keys - tag_map = { - 'title': 'title', - 'artist': 'artist', - 'album': 'album', - 'track': 'track', - 'track_number': 'track', - 'date': 'date', - 'year': 'date', - 'genre': 'genre', - 'composer': 'composer', - 'comment': 'comment', - } - - if not tags_payload or 'metadata' not in tags_payload or not tags_payload['metadata']: - return metadata - - entry = tags_payload['metadata'][0] - if 'tags' not in entry or not isinstance(entry['tags'], dict): - return metadata - - tags_dict = entry['tags'] - - # Extract metadata from tags - for _service_key, service_data in tags_dict.items(): - if not isinstance(service_data, dict): - continue - - display_tags = service_data.get('display_tags', {}) - if not isinstance(display_tags, dict): - continue - - current_tags = display_tags.get('0', []) - if not isinstance(current_tags, list): - continue - - for tag in current_tags: - tag_str = str(tag).strip() - if ':' in tag_str: - namespace, value = tag_str.split(':', 1) - namespace = namespace.lower().strip() - value = value.strip() - if namespace in tag_map and value: - ffmpeg_key = tag_map[namespace] - # Use first occurrence - if ffmpeg_key not in metadata: - metadata[ffmpeg_key] = value - - # Apply sensible defaults for audio files - if input_kind == 'audio': - # If no album, use title as album - if 'album' not in metadata and 'title' in metadata: - metadata['album'] = metadata['title'] - # If no track, default to 1 - if 'track' not in metadata: - metadata['track'] = '1' - # If no album_artist, use artist - if 'artist' in metadata: - metadata['album_artist'] = metadata['artist'] - - return metadata - - out_override, size_spec, convert_spec, hash_spec, export_metadata = _parse_args(args) - default_dir = resolve_output_dir(config) - - media_kind = (get_field(result, 'media_kind', '') or '').lower() - - _chk = [] - if out_override: - _chk.append(f"Path={out_override}") - if size_spec: - _chk.append(f"Size={size_spec}") - if convert_spec: - _chk.append(f"Convert={convert_spec}") - # Prefer explicit -hash over result hash for logging - file_hash_for_log = None - if hash_spec and looks_like_hash(hash_spec): - file_hash_for_log = normalize_hash(hash_spec) - else: - hash_value = get_field(result, 'hash_hex', None) - file_hash_for_log = normalize_hash(hash_value) if hash_value else None - if _chk or file_hash_for_log: - msg = "get-file: " + ", ".join(_chk) if _chk else "get-file" - if file_hash_for_log: - msg = f"{msg} (Hash={file_hash_for_log})" - ctx.emit(msg) - - base_name = _sanitize_name(get_field(result, 'title', None) or '') - if not base_name: - target_attr = get_field(result, 'target', None) - if isinstance(target_attr, str) and target_attr and not target_attr.startswith(('http://', 'https://')): - base_name = _sanitize_name(Path(target_attr).stem) - else: - base_name = 'export' - - # Accept multiple path-ish fields so @ selection from MPV playlist rows or ad-hoc dicts still resolve. - local_target = ( - get_field(result, 'target', None) - or get_field(result, 'path', None) - or get_field(result, 'file_path', None) - or get_field(result, 'filename', None) - ) - is_url = isinstance(local_target, str) and local_target.startswith(('http://', 'https://')) - # Establish file hash (prefer -hash override when provided and valid) - if hash_spec and looks_like_hash(hash_spec): - file_hash = normalize_hash(hash_spec) - else: - file_hash = normalize_hash(get_field(result, 'hash_hex', None)) if get_field(result, 'hash_hex', None) else None - - source_path: Optional[Path] = None - source_size: Optional[int] = None - duration_sec: Optional[float] = None - tags_payload: Dict[str, Any] = {} - url_payload: Dict[str, Any] = {} - cleanup_source: bool = False - - if isinstance(local_target, str) and not is_url and not (hash_spec and file_hash): - p = Path(local_target) - if not p.exists(): - # Check if it's a hash and try to resolve locally - resolved_local = False - if looks_like_hash(local_target): - try: - from config import get_local_storage_path - from helper.folder_store import FolderDB - storage_path = get_local_storage_path(config) - if storage_path: - with FolderDB(storage_path) as db: - resolved_path = db.search_hash(local_target) - if resolved_path and resolved_path.exists(): - p = resolved_path - resolved_local = True - # Also set file_hash since we know it - file_hash = local_target - except Exception: - pass - - if not resolved_local: - log(f"File missing: {p}") - return 1 - - source_path = p - try: - source_size = p.stat().st_size - except OSError: - source_size = None - duration_sec = _ffprobe_duration_seconds(p) - if file_hash is None: - for sc in (p.with_suffix('.tags'), p.with_suffix('.tags.txt')): - try: - if sc.exists(): - text = sc.read_text(encoding='utf-8', errors='ignore') - for line in text.splitlines(): - ls = line.strip().lower() - if ls.startswith('hash:'): - candidate = line.split(':', 1)[1].strip() if ':' in line else '' - if looks_like_hash(candidate): - file_hash = candidate.lower() - break - except OSError: - pass - elif file_hash: - # Try local resolution first if origin is local or just in case - resolved_local = False - try: - from config import get_local_storage_path - from helper.folder_store import FolderDB - storage_path = get_local_storage_path(config) - if storage_path: - with FolderDB(storage_path) as db: - resolved_path = db.search_hash(file_hash) - if resolved_path and resolved_path.exists(): - source_path = resolved_path - resolved_local = True - try: - source_size = source_path.stat().st_size - except OSError: - source_size = None - duration_sec = _ffprobe_duration_seconds(source_path) - except Exception: - pass - - if not resolved_local: - try: - client = hydrus_wrapper.get_client(config) - except Exception as exc: - log(f"Hydrus client unavailable: {exc}") - return 1 - - if client is None: - log("Hydrus client unavailable") - return 1 - - # Fetch metadata and tags (needed for both -metadata flag and audio tagging) - # Fetch tags - try: - tags_payload = client.fetch_file_metadata(hashes=[file_hash], include_service_keys_to_tags=True) - except Exception: - tags_payload = {} - - # Fetch url - try: - url_payload = client.fetch_file_metadata(hashes=[file_hash], include_file_url=True) - except Exception: - url_payload = {} - - # Extract title from metadata if base_name is still 'export' - if base_name == 'export' and tags_payload: - try: - file_metadata = tags_payload.get('file_metadata', []) - if file_metadata and isinstance(file_metadata, list) and len(file_metadata) > 0: - meta = file_metadata[0] - if isinstance(meta, dict): - tags_dict = meta.get('tags', {}) - if isinstance(tags_dict, dict): - # Look for title in storage tags - for service in tags_dict.values(): - if isinstance(service, dict): - storage = service.get('storage_tags', {}) - if isinstance(storage, dict): - for tag_list in storage.values(): - if isinstance(tag_list, list): - for tag in tag_list: - if isinstance(tag, str) and tag.lower().startswith('title:'): - title_val = tag.split(':', 1)[1].strip() - if title_val: - base_name = _sanitize_name(title_val) - break - if base_name != 'export': - break - if base_name != 'export': - break - except Exception: - pass - - # Normal file export (happens regardless of -metadata flag) - try: - from helper.hydrus import hydrus_export as _hydrus_export - except Exception: - _hydrus_export = None # type: ignore - if _hydrus_export is None: - log("Hydrus export helper unavailable") - return 1 - download_dir = out_override if (out_override and out_override.is_dir()) else default_dir - try: - download_dir.mkdir(parents=True, exist_ok=True) - except Exception: - # If mkdir fails, fall back to default_dir - download_dir = default_dir - - # Verify the directory is writable; if not, fall back to default - try: - test_file = download_dir / f".downlow_write_test_{_uuid.uuid4().hex[:8]}" - test_file.touch() - test_file.unlink() - except (OSError, PermissionError): - # Directory is not writable, use default_dir instead - download_dir = default_dir - try: - download_dir.mkdir(parents=True, exist_ok=True) - except Exception: - pass - token = (_uuid.uuid4().hex[:8]) - provisional_stem = f"{base_name}.dlhx_{token}" - provisional = download_dir / f"{provisional_stem}.bin" - class _Args: - pass - args_obj = _Args() - setattr(args_obj, 'output', provisional) - setattr(args_obj, 'format', 'copy') - setattr(args_obj, 'tmp_dir', str(download_dir)) - setattr(args_obj, 'metadata_json', None) - setattr(args_obj, 'hydrus_url', get_hydrus_url(config, "home") or "http://localhost:45869") - setattr(args_obj, 'access_key', get_hydrus_access_key(config, "home") or "") - setattr(args_obj, 'timeout', float(config.get('HydrusNetwork_Request_Timeout') or 60.0)) - try: - file_url = client.file_url(file_hash) - except Exception: - file_url = None - setattr(args_obj, 'file_url', file_url) - setattr(args_obj, 'file_hash', file_hash) - import io as _io, contextlib as _contextlib - _buf = _io.StringIO() - status = 1 - with _contextlib.redirect_stdout(_buf): - status = _hydrus_export(args_obj, None) - if status != 0: - stderr_text = _buf.getvalue().strip() - if stderr_text: - log(stderr_text) - return status - json_text = _buf.getvalue().strip().splitlines()[-1] if _buf.getvalue() else '' - final_from_json: Optional[Path] = None - try: - payload = json.loads(json_text) if json_text else None - if isinstance(payload, dict): - outp = payload.get('output') - if isinstance(outp, str) and outp: - final_from_json = Path(outp) - except Exception: - final_from_json = None - if final_from_json and final_from_json.exists(): - source_path = final_from_json - else: - candidates = [p for p in provisional.parent.glob(provisional_stem + '*') if p.exists() and p.is_file()] - non_provisional = [p for p in candidates if p.suffix.lower() not in {'.bin', '.hydrus'}] - pick_from = non_provisional if non_provisional else candidates - if pick_from: - try: - source_path = max(pick_from, key=lambda p: p.stat().st_mtime) - except Exception: - source_path = pick_from[0] - else: - source_path = provisional - candidates = [p for p in provisional.parent.glob(provisional_stem + '*') if p.exists() and p.is_file()] - non_provisional = [p for p in candidates if p.suffix.lower() not in {'.bin', '.hydrus'}] - pick_from = non_provisional if non_provisional else candidates - if pick_from: - try: - source_path = max(pick_from, key=lambda p: p.stat().st_mtime) - except Exception: - source_path = pick_from[0] - else: - source_path = provisional - try: - source_size = source_size or (source_path.stat().st_size if source_path.exists() else None) - except OSError: - source_size = source_size - if duration_sec is None: - duration_sec = _ffprobe_duration_seconds(source_path) - cleanup_source = True - else: - log("Selected result is neither a local file nor a Hydrus record") - return 1 - - convert = (str(convert_spec or '').strip().lower()) - if convert not in {'', 'copy', 'mp4', 'webm', 'audio', 'mp3', 'opus'}: - log(f"Unsupported Convert value: {convert_spec}") - return 1 - if not convert: - convert = 'copy' - input_kind = media_kind or _guess_kind_from_suffix(source_path) - if input_kind == 'audio' and convert in {'mp4', 'webm'}: - log("Cannot convert audio to video") - return 1 - - def _ext_for_convert(conv: str, src: Path) -> str: - if conv == 'mp4': - return '.mp4' - if conv == 'webm': - return '.webm' - if conv in {'audio', 'mp3'}: - return '.mp3' - if conv == 'opus': - return '.opus' - return src.suffix or '' - - auto_named = True - if out_override is not None and out_override.exists() and out_override.is_dir(): - dest_dir = out_override - dest_ext = _ext_for_convert(convert, source_path) - dest_path = dest_dir / f"{base_name}{dest_ext}" - else: - dest_dir = default_dir - dest_ext = _ext_for_convert(convert, source_path) - if out_override and not out_override.exists() and not str(out_override).endswith(('/', '\\')): - dest_path = out_override - auto_named = False - else: - dest_path = (dest_dir / f"{base_name}{dest_ext}") - - if source_size is None: - try: - source_size = source_path.stat().st_size - except OSError: - source_size = None - if source_size is None: - log("Unable to determine source size for sizing logic; proceeding without Size targeting") - target_bytes = None - else: - target_bytes = _compute_target_bytes(size_spec, int(source_size)) - if target_bytes and (source_size or 0): - try: - from ..downlow import _fmt_bytes as _fmt_bytes_helper - except ImportError: - try: - from downlow import _fmt_bytes as _fmt_bytes_helper # type: ignore - except ImportError: - _fmt_bytes_helper = lambda x: f"{x} bytes" # type: ignore - except Exception: - _fmt_bytes_helper = lambda x: f"{x} bytes" # type: ignore - ctx.emit(f"Resizing target: {_fmt_bytes_helper(source_size)} -> {_fmt_bytes_helper(target_bytes)}") - - cleanup_source = locals().get('cleanup_source', False) - if convert == 'copy' and (not target_bytes or target_bytes >= (source_size or 0)): - # Simple copy without FFmpeg processing - # Only skip this if we need to write metadata (then FFmpeg handles it) - if not (export_metadata or (tags_payload and tags_payload.get('metadata'))): - try: - dest_path.parent.mkdir(parents=True, exist_ok=True) - final_dest = _unique_path(dest_path) - _shutil.copy2(source_path, final_dest) - ctx.emit(f"Exported to {final_dest}") - log(f"Exported: {final_dest}", file=sys.stderr) - if cleanup_source: - try: - if source_path.exists() and source_path != final_dest: - source_path.unlink() - except OSError: - pass - - return 0 - except Exception as exc: - log(f"Copy failed: {exc}") - return 1 - else: - # Metadata exists, so we need to go through FFmpeg to embed and write sidecar - # Fall through to FFmpeg section below - pass - - convert_effective = convert - if convert == 'copy' and target_bytes and (source_size or 0) > target_bytes: - if input_kind == 'video': - convert_effective = 'mp4' - elif input_kind == 'audio': - convert_effective = 'copy' - else: - convert_effective = convert - - ffmpeg_path = _shutil.which('ffmpeg') - if not ffmpeg_path: - log("ffmpeg executable not found in PATH") - return 1 - - # Extract metadata from tags to embed in file - file_metadata = _extract_metadata_from_tags(tags_payload, file_hash or '', input_kind) - if file_metadata: - metadata_msg = ', '.join(f'{k}={v}' for k, v in file_metadata.items()) - ctx.emit(f"[metadata] Embedding: {metadata_msg}") - ctx.print_if_visible(f"[get-file] Embedding metadata: {metadata_msg}", file=sys.stderr) - else: - ctx.print_if_visible(f"[get-file] No metadata tags found to embed", file=sys.stderr) - - cmd: list[str] = [ffmpeg_path, '-y', '-i', str(source_path)] - - # Add metadata flags to FFmpeg command - for key, value in file_metadata.items(): - cmd.extend(['-metadata', f'{key}={value}']) - - conv = convert_effective - if conv in {'mp4', 'webm', 'copy'}: - video_bitrate: Optional[int] = None - audio_bitrate: int = 128_000 - if target_bytes and duration_sec and duration_sec > 0: - total_bps = max(1, int((target_bytes * 8) / duration_sec)) - if total_bps <= audio_bitrate + 50_000: - if input_kind == 'video': - video_bitrate = max(50_000, total_bps - audio_bitrate) - else: - video_bitrate = None - else: - video_bitrate = total_bps - audio_bitrate - if conv == 'webm': - cmd += ['-c:v', 'libvpx-vp9'] - if video_bitrate: - cmd += ['-b:v', str(video_bitrate)] - else: - cmd += ['-b:v', '0', '-crf', '32'] - cmd += ['-c:a', 'libopus', '-b:a', '160k'] - elif conv == 'mp4' or (conv == 'copy' and input_kind == 'video'): - cmd += ['-c:v', 'libx265', '-preset', 'medium', '-tag:v', 'hvc1', '-pix_fmt', 'yuv420p'] - if video_bitrate: - cmd += ['-b:v', str(video_bitrate)] - else: - cmd += ['-crf', '26'] - cmd += ['-c:a', 'aac', '-b:a', '192k'] - if conv == 'mp4' or (conv == 'copy' and input_kind == 'video'): - cmd += ['-movflags', '+faststart'] - if convert_spec and conv != 'copy': - ctx.emit(f"Converting video -> {conv} (duration={duration_sec or 'unknown'}s)") - else: - if target_bytes and duration_sec and duration_sec > 0: - total_bps = max(1, int((target_bytes * 8) / duration_sec)) - abr = max(32_000, min(320_000, total_bps)) - else: - abr = 192_000 - if conv in {'audio', 'mp3'}: - cmd += ['-vn', '-c:a', 'libmp3lame', '-b:a', str(abr)] - elif conv == 'opus': - cmd += ['-vn', '-c:a', 'libopus', '-b:a', str(abr)] - else: - ext = (source_path.suffix.lower() if source_path else '') - if ext in {'.mp3'}: - cmd += ['-vn', '-c:a', 'libmp3lame', '-b:a', str(abr)] - elif ext in {'.opus', '.ogg'}: - cmd += ['-vn', '-c:a', 'libopus', '-b:a', str(abr)] - elif ext in {'.m4a', '.aac'}: - cmd += ['-vn', '-c:a', 'aac', '-b:a', str(abr)] - else: - cmd += ['-vn', '-c:a', 'libmp3lame', '-b:a', str(abr)] - if convert_spec and conv != 'copy': - ctx.emit(f"Converting audio -> {conv}") - - if conv in {'audio','mp3'}: - desired_ext = '.mp3' - elif conv == 'opus': - desired_ext = '.opus' - elif conv == 'webm': - desired_ext = '.webm' - elif conv == 'mp4': - desired_ext = '.mp4' - else: - desired_ext = source_path.suffix - if (not dest_path.suffix) or auto_named or (dest_path.suffix.lower() in {'.hydrus', '.bin'}): - dest_path = dest_path.with_suffix(desired_ext) - - suffix_parts: list[str] = [] - def _size_label(raw: Optional[str], tb: Optional[int]) -> Optional[str]: - if not raw: - return None - text = str(raw).strip() - if text.endswith('%'): - return text - if not tb: - return None - mb = int(round(tb / (1024*1024))) - return f"{mb}Mb" - label = _size_label(size_spec, locals().get('target_bytes')) - if label: - suffix_parts.append(label) - if convert_spec and convert.lower() != 'copy': - label_map = {'mp4':'MP4','webm':'WEBM','audio':'AUDIO','mp3':'MP3','opus':'OPUS'} - suffix_parts.append(label_map.get(convert.lower(), convert.upper())) - if suffix_parts and auto_named: - _aug = f"{base_name} (" + ",".join(suffix_parts) + ")" - dest_path = dest_path.with_name(_aug + dest_path.suffix) - - try: - dest_path.parent.mkdir(parents=True, exist_ok=True) - final_dest = _unique_path(dest_path) - cmd.append(str(final_dest)) - completed = _subprocess.run(cmd, stdout=_subprocess.PIPE, stderr=_subprocess.PIPE, text=True) - if completed.returncode != 0: - stderr = (completed.stderr or '').strip() - log(f"ffmpeg failed ({completed.returncode}): {stderr}") - return 1 - ctx.emit(f"Exported to {final_dest}") - log(f"Exported: {final_dest}", file=sys.stderr) - - # Always write the .tags sidecar with metadata (hash, tags, url) - # This ensures metadata is preserved even if FFmpeg embedding didn't work - try: - metadata_lines = [] - - # Add hash - if file_hash: - metadata_lines.append(f"hash:{file_hash}") - - # Extract tags from metadata payload using correct structure - tags_set = set() - if 'metadata' in tags_payload and tags_payload['metadata']: - entry = tags_payload['metadata'][0] - if 'tags' in entry and isinstance(entry['tags'], dict): - for _service_key, service_data in entry['tags'].items(): - if isinstance(service_data, dict): - display_tags = service_data.get('display_tags', {}) - if isinstance(display_tags, dict): - current_tags = display_tags.get('0', []) - if isinstance(current_tags, list): - tags_set.update(current_tags) - - # Add tags (sorted, no prefix) - for tag in sorted(tags_set): - metadata_lines.append(tag) - - # Extract and add url - if 'metadata' in url_payload and url_payload['metadata']: - entry = url_payload['metadata'][0] - if 'url' in entry and isinstance(entry['url'], list): - for url in entry['url']: - metadata_lines.append(f"url:{url}") - - # Write sidecar if we have any metadata - if metadata_lines: - sidecar_path = final_dest.parent / f"{final_dest.name}.tags" - sidecar_path.write_text('\n'.join(metadata_lines), encoding='utf-8') - ctx.emit(f"Sidecar: {sidecar_path.name}") - log(f"Tags file: {sidecar_path}", file=sys.stderr) - except Exception as exc: - log(f"Warning: Could not write metadata sidecar: {exc}", file=sys.stderr) - - if cleanup_source: - try: - if source_path.exists() and source_path != final_dest: - source_path.unlink() - except OSError: - pass - return 0 - except Exception as exc: - log(f"Export failed: {exc}") - return 1 - - -def _unique_path(p: Path) -> Path: - if not p.exists(): - return p - stem = p.stem - suffix = p.suffix - parent = p.parent - for i in range(1, 1000): - candidate = parent / f"{stem} ({i}){suffix}" - if not candidate.exists(): - return candidate - return p - - -def _handle_ytdlp_download(url: str, title: str, config: Dict[str, Any], args: Sequence[str]) -> int: - """Handle download/streaming of URL using yt-dlp.""" - if not url: - log("Error: No URL provided", file=sys.stderr) - return 1 - - # Check for -storage local - args_list = list(map(str, args)) - storage_mode = None - if '-storage' in args_list: - try: - idx = args_list.index('-storage') - if idx + 1 < len(args_list): - storage_mode = args_list[idx + 1].lower() - except ValueError: - pass - - force_local = (storage_mode == 'local') - - if not force_local: - # Default: Stream to MPV - if _play_in_mpv(url, title, is_stream=True): - try: - from cmdnats import pipe as mpv_pipe - mpv_pipe._run(None, [], config) - except Exception: - pass - return 0 - else: - # Fallback to browser - try: - import webbrowser - webbrowser.open(url) - debug(f"[get-file] Opened in browser: {title}", file=sys.stderr) - return 0 - except Exception: - pass - return 1 - - # Download mode - try: - import yt_dlp - except ImportError: - log("Error: yt-dlp not installed. Please install it to download.", file=sys.stderr) - return 1 - - log(f"Downloading {title}...", file=sys.stderr) - - # Determine output directory - download_dir = resolve_output_dir(config) - try: - download_dir.mkdir(parents=True, exist_ok=True) - except Exception: - pass - - # Configure yt-dlp - ydl_opts = { - 'outtmpl': str(download_dir / '%(title)s.%(ext)s'), - 'quiet': False, - 'no_warnings': True, - # Use best audio/video - 'format': 'best', - } - - try: - with yt_dlp.YoutubeDL(ydl_opts) as ydl: - ydl.download([url]) - log(f"Downloaded to: {download_dir}", file=sys.stderr) - return 0 - except Exception as e: - log(f"Error downloading: {e}", file=sys.stderr) - return 1 - - -CMDLET = Cmdlet( - name="get-file", - summary="Export files: from Hydrus database OR from AllDebrid magnets via pipe. Auto-detects source and handles accordingly.", - usage="get-file [-Path ] [Size <50%|34MB>] [Convert ] [-metadata] [-file ]", - arg=[ - CmdletArg("Path", description="Output directory for files."), - CmdletArg("Size", description="Target size (Hydrus only): 50% or 34MB."), - CmdletArg("Convert", description="Convert format (Hydrus only): mp4, webm, audio, mp3, opus."), - CmdletArg("metadata", type="flag", description="Export metadata to .tags file (Hydrus only)."), - CmdletArg("file", description="Filter files by pattern (AllDebrid only)."), - ], - detail=[ - "Hydrus mode: exports media with optional size/format conversion", - "AllDebrid mode: downloads files from piped magnet IDs from search-debrid", - "Auto-detects pipe format and routes to correct handler", - "Magnet pipe format: ID|filename|size|statusCode|status|progress|...", - ], - -) \ No newline at end of file diff --git a/cmdlets/get_metadata.py b/cmdlets/get_metadata.py index 603e3c7..c550eeb 100644 --- a/cmdlets/get_metadata.py +++ b/cmdlets/get_metadata.py @@ -4,7 +4,7 @@ from typing import Any, Dict, Sequence, Optional import json import sys -from helper.logger import log +from SYS.logger import log from pathlib import Path from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field @@ -69,7 +69,7 @@ class Get_Metadata(Cmdlet): return "" @staticmethod - def _build_table_row(title: str, origin: str, path: str, mime: str, size_bytes: Optional[int], + def _build_table_row(title: str, store: str, path: str, mime: str, size_bytes: Optional[int], dur_seconds: Optional[int], imported_ts: Optional[int], url: list[str], hash_value: Optional[str], pages: Optional[int] = None) -> Dict[str, Any]: """Build a table row dict with metadata fields.""" @@ -97,13 +97,13 @@ class Get_Metadata(Cmdlet): ("Size(MB)", str(size_mb) if size_mb is not None else ""), (duration_label, duration_value), ("Imported", imported_label), - ("Store", origin or ""), + ("Store", store or ""), ] return { "title": title or path, "path": path, - "origin": origin, + "store": store, "mime": mime, "size_bytes": size_bytes, "duration_seconds": dur_int, @@ -143,8 +143,8 @@ class Get_Metadata(Cmdlet): parsed = parse_cmdlet_args(args, self) # Get hash and store from parsed args or result - file_hash = parsed.get("hash") or get_field(result, "hash") or get_field(result, "file_hash") or get_field(result, "hash_hex") - storage_source = parsed.get("store") or get_field(result, "store") or get_field(result, "storage") or get_field(result, "origin") + file_hash = parsed.get("hash") or get_field(result, "hash") + storage_source = parsed.get("store") or get_field(result, "store") if not file_hash: log("No hash available - use -hash to specify", file=sys.stderr) @@ -156,8 +156,8 @@ class Get_Metadata(Cmdlet): # Use storage backend to get metadata try: - from helper.store import FileStorage - storage = FileStorage(config) + from Store import Store + storage = Store(config) backend = storage[storage_source] # Get metadata from backend @@ -200,8 +200,8 @@ class Get_Metadata(Cmdlet): # Build display row row = self._build_table_row( title=title, - origin=storage_source, - path=metadata.get("file_path", ""), + store=storage_source, + path=metadata.get("path", ""), mime=mime_type, size_bytes=file_size, dur_seconds=duration_seconds, diff --git a/cmdlets/get_note.py b/cmdlets/get_note.py index f0402ed..5e37f4b 100644 --- a/cmdlets/get_note.py +++ b/cmdlets/get_note.py @@ -6,9 +6,9 @@ import json from . import register import models import pipeline as ctx -from helper import hydrus as hydrus_wrapper +from API import HydrusNetwork as hydrus_wrapper from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, get_hash_for_operation, fetch_hydrus_metadata, get_field, should_show_help -from helper.logger import log +from SYS.logger import log CMDLET = Cmdlet( name="get-note", diff --git a/cmdlets/get_relationship.py b/cmdlets/get_relationship.py index b0dcdb8..176ee35 100644 --- a/cmdlets/get_relationship.py +++ b/cmdlets/get_relationship.py @@ -5,13 +5,13 @@ import json import sys from pathlib import Path -from helper.logger import log +from SYS.logger import log import models import pipeline as ctx -from helper import hydrus as hydrus_wrapper +from API import HydrusNetwork as hydrus_wrapper from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, fmt_bytes, get_hash_for_operation, fetch_hydrus_metadata, should_show_help -from helper.folder_store import FolderDB +from API.folder import API_folder_store from config import get_local_storage_path from result_table import ResultTable @@ -53,7 +53,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: result = result[0] # Initialize results collection - found_relationships = [] # List of dicts: {hash, type, title, path, origin} + found_relationships = [] # List of dicts: {hash, type, title, path, store} source_title = "Unknown" def _add_relationship(entry: Dict[str, Any]) -> None: @@ -89,7 +89,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: storage_path = get_local_storage_path(config) print(f"[DEBUG] Storage path: {storage_path}", file=sys.stderr) if storage_path: - with FolderDB(storage_path) as db: + with API_folder_store(storage_path) as db: file_hash = db.get_file_hash(path_obj) metadata = db.get_metadata(file_hash) if file_hash else None print(f"[DEBUG] Metadata found: {metadata is not None}", file=sys.stderr) @@ -130,7 +130,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: "type": entry_type, "title": title, "path": path, - "origin": "local" + "store": "local" }) # RECURSIVE LOOKUP: If this is an "alt" relationship (meaning we're an alt pointing to a king), @@ -169,7 +169,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: "type": "king" if rel_type.lower() == "alt" else rel_type, "title": parent_title, "path": str(path), - "origin": "local" + "store": "local" }) else: # If already in results, ensure it's marked as king if appropriate @@ -223,7 +223,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: "type": f"alt" if child_type == "alt" else f"sibling ({child_type})", "title": child_title, "path": str(child_path_obj), - "origin": "local" + "store": "local" }) else: print(f"[DEBUG] ⚠️ Parent has no relationships metadata", file=sys.stderr) @@ -261,7 +261,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: "type": f"alt" if child_type == "alt" else f"sibling ({child_type})", "title": child_title, "path": child_path, - "origin": "local" + "store": "local" }) except Exception as e: @@ -299,7 +299,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: # "type": f"reverse-{rev_type}", # e.g. reverse-alt # "title": rev_title, # "path": rev_path, - # "origin": "local" + # "store": "local" # }) except Exception as e: @@ -352,7 +352,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: "type": rel_name, "title": rel_hash, # Can't resolve title easily without another API call "path": None, - "origin": "hydrus" + "store": "hydrus" }) except Exception as exc: # Only log error if we didn't find local relationships either @@ -390,7 +390,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: row.add_column("Type", item['type'].title()) row.add_column("Title", item['title']) # row.add_column("Hash", item['hash'][:16] + "...") # User requested removal - row.add_column("Origin", item['origin']) + row.add_column("Store", item['store']) # Create result object for pipeline res_obj = { @@ -398,7 +398,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: "hash": item['hash'], "file_hash": item['hash'], "relationship_type": item['type'], - "origin": item['origin'] + "store": item['store'] } if item['path']: res_obj["path"] = item['path'] diff --git a/cmdlets/get_tag.py b/cmdlets/get_tag.py index 3b42ad0..5ebd24c 100644 --- a/cmdlets/get_tag.py +++ b/cmdlets/get_tag.py @@ -1,7 +1,7 @@ """Get tags from Hydrus or local sidecar metadata. This cmdlet retrieves tags for a selected result, supporting both: -- Hydrus Network (for files with hash_hex) +- Hydrus Network (for files with hash) - Local sidecar files (.tags) In interactive mode: navigate with numbers, add/delete tags @@ -12,15 +12,15 @@ from __future__ import annotations import sys -from helper.logger import log, debug -from helper.metadata_search import get_metadata_provider, list_metadata_providers +from SYS.logger import log, debug +from Provider.metadata_provider import get_metadata_provider, list_metadata_providers import subprocess from pathlib import Path from typing import Any, Dict, List, Optional, Sequence, Tuple import pipeline as ctx -from helper import hydrus -from helper.folder_store import read_sidecar, write_sidecar, find_sidecar, FolderDB +from API import HydrusNetwork +from API.folder import read_sidecar, write_sidecar, find_sidecar, API_folder_store from ._shared import normalize_hash, looks_like_hash, Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field from config import get_local_storage_path @@ -47,15 +47,14 @@ class TagItem: """ tag_name: str tag_index: int # 1-based index for user reference - hash_hex: Optional[str] = None - source: str = "hydrus" + hash: Optional[str] = None + store: str = "hydrus" service_name: Optional[str] = None - file_path: Optional[str] = None + path: Optional[str] = None def __post_init__(self): # Make ResultTable happy by adding standard fields # NOTE: Don't set 'title' - we want only the tag column in ResultTable - self.origin = self.source self.detail = f"Tag #{self.tag_index}" self.target = self.tag_name self.media_kind = "tag" @@ -65,20 +64,21 @@ class TagItem: return { "tag_name": self.tag_name, "tag_index": self.tag_index, - "hash_hex": self.hash_hex, - "source": self.source, + "hash": self.hash, + "store": self.store, + "path": self.path, "service_name": self.service_name, } def _emit_tags_as_table( tags_list: List[str], - hash_hex: Optional[str], - source: str = "hydrus", + file_hash: Optional[str], + store: str = "hydrus", service_name: Optional[str] = None, config: Dict[str, Any] = None, item_title: Optional[str] = None, - file_path: Optional[str] = None, + path: Optional[str] = None, subject: Optional[Any] = None, ) -> None: """Emit tags as TagItem objects and display via ResultTable. @@ -92,8 +92,8 @@ def _emit_tags_as_table( table_title = "Tags" if item_title: table_title = f"Tags: {item_title}" - if hash_hex: - table_title += f" [{hash_hex[:8]}]" + if file_hash: + table_title += f" [{file_hash[:8]}]" table = ResultTable(table_title, max_columns=1) table.set_source_command("get-tag", []) @@ -104,10 +104,10 @@ def _emit_tags_as_table( tag_item = TagItem( tag_name=tag_name, tag_index=idx, - hash_hex=hash_hex, - source=source, + hash=file_hash, + store=store, service_name=service_name, - file_path=file_path, + path=path, ) tag_items.append(tag_item) table.add_result(tag_item) @@ -401,8 +401,8 @@ def _emit_tag_payload(source: str, tags_list: List[str], *, hash_value: Optional tag_item = TagItem( tag_name=tag_name, tag_index=idx, - hash_hex=hash_value, - source=source, + hash=hash_value, + store=source, service_name=None ) ctx.emit(tag_item) @@ -698,7 +698,7 @@ def _scrape_isbn_metadata(isbn: str) -> List[str]: """Scrape metadata for an ISBN using Open Library API.""" new_tags = [] try: - from ..helper.http_client import HTTPClient + from ..API.HTTP import HTTPClient import json as json_module isbn_clean = isbn.replace('-', '').strip() @@ -822,7 +822,7 @@ def _scrape_openlibrary_metadata(olid: str) -> List[str]: """ new_tags = [] try: - from ..helper.http_client import HTTPClient + from ..API.HTTP import HTTPClient import json as json_module # Format: OL9674499M or just 9674499M @@ -995,7 +995,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: get-tag -scrape Options: - -hash : Override hash to use instead of result's hash_hex + -hash : Override hash to use instead of result's hash --store : Store result to this key for pipeline --emit: Emit result without interactive prompt (quiet mode) -scrape : Scrape metadata from URL or provider name (itunes, openlibrary, googlebooks) @@ -1150,7 +1150,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: table = ResultTable(f"Metadata: {provider.name}") table.set_source_command("get-tag", []) selection_payload = [] - hash_for_payload = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash_hex", None)) + hash_for_payload = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash", None)) + store_for_payload = get_field(result, "store", None) for idx, item in enumerate(items): tags = provider.to_tags(item) row = table.add_row() @@ -1165,13 +1166,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: "artist": item.get("artist"), "album": item.get("album"), "year": item.get("year"), + "hash": hash_for_payload, + "store": store_for_payload, "extra": { "tags": tags, "provider": provider.name, - "hydrus_hash": hash_for_payload, - "storage_source": get_field(result, "source", None) or get_field(result, "origin", None), }, - "file_hash": hash_for_payload, } selection_payload.append(payload) table.set_row_selection_args(idx, [str(idx + 1)]) @@ -1192,30 +1192,29 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: if isinstance(result, list) and len(result) > 0: result = result[0] - hash_from_result = normalize_hash(get_field(result, "hash_hex", None)) - hash_hex = hash_override or hash_from_result + hash_from_result = normalize_hash(get_field(result, "hash", None)) + file_hash = hash_override or hash_from_result # Only use emit mode if explicitly requested with --emit flag, not just because we're in a pipeline # This allows interactive REPL to work even in pipelines emit_mode = emit_requested or bool(store_key) store_label = (store_key.strip() if store_key and store_key.strip() else None) # Get hash and store from result - file_hash = hash_hex - storage_source = get_field(result, "store") or get_field(result, "storage") or get_field(result, "origin") + store_name = get_field(result, "store") if not file_hash: log("No hash available in result", file=sys.stderr) return 1 - if not storage_source: - log("No storage backend specified in result", file=sys.stderr) + if not store_name: + log("No store specified in result", file=sys.stderr) return 1 # Get tags using storage backend try: - from helper.store import FileStorage - storage = FileStorage(config) - backend = storage[storage_source] + from Store import Store + storage = Store(config) + backend = storage[store_name] current, source = backend.get_tag(file_hash, config=config) if not current: @@ -1224,7 +1223,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: service_name = "" except KeyError: - log(f"Storage backend '{storage_source}' not found", file=sys.stderr) + log(f"Store '{store_name}' not found", file=sys.stderr) return 1 except Exception as exc: log(f"Failed to get tags: {exc}", file=sys.stderr) @@ -1235,48 +1234,42 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: item_title = get_field(result, "title", None) or get_field(result, "name", None) or get_field(result, "filename", None) # Build a subject payload representing the file whose tags are being shown - subject_origin = get_field(result, "origin", None) or get_field(result, "source", None) or source + subject_store = get_field(result, "store", None) or store_name subject_payload: Dict[str, Any] = { "tags": list(current), "title": item_title, "name": item_title, - "origin": subject_origin, - "source": subject_origin, - "storage_source": subject_origin, + "store": subject_store, "service_name": service_name, "extra": { "tags": list(current), - "storage_source": subject_origin, - "hydrus_hash": hash_hex, }, } - if hash_hex: - subject_payload.update({ - "hash": hash_hex, - "hash_hex": hash_hex, - "file_hash": hash_hex, - "hydrus_hash": hash_hex, - }) + if file_hash: + subject_payload["hash"] = file_hash if local_path: try: path_text = str(local_path) subject_payload.update({ - "file_path": path_text, "path": path_text, - "target": path_text, }) - subject_payload["extra"]["file_path"] = path_text except Exception: pass - if source == "hydrus": - _emit_tags_as_table(current, hash_hex=hash_hex, source="hydrus", service_name=service_name, config=config, item_title=item_title, subject=subject_payload) - else: - _emit_tags_as_table(current, hash_hex=hash_hex, source="local", service_name=None, config=config, item_title=item_title, file_path=str(local_path) if local_path else None, subject=subject_payload) + _emit_tags_as_table( + current, + file_hash=file_hash, + store=subject_store, + service_name=service_name if source == "hydrus" else None, + config=config, + item_title=item_title, + path=str(local_path) if local_path else None, + subject=subject_payload, + ) # If emit requested or store key provided, emit payload if emit_mode: - _emit_tag_payload(source, current, hash_value=hash_hex, store_label=store_label) + _emit_tag_payload(source, current, hash_value=file_hash, store_label=store_label) return 0 @@ -1341,22 +1334,22 @@ class Get_Tag(Cmdlet): # Get hash and store from parsed args or result hash_override = parsed.get("hash") - file_hash = hash_override or get_field(result, "hash") or get_field(result, "file_hash") or get_field(result, "hash_hex") - storage_source = parsed.get("store") or get_field(result, "store") or get_field(result, "storage") or get_field(result, "origin") + file_hash = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash")) + store_name = parsed.get("store") or get_field(result, "store") if not file_hash: log("No hash available in result", file=sys.stderr) return 1 - if not storage_source: - log("No storage backend specified in result", file=sys.stderr) + if not store_name: + log("No store specified in result", file=sys.stderr) return 1 # Get tags using storage backend try: - from helper.store import FileStorage - storage_obj = FileStorage(config) - backend = storage_obj[storage_source] + from Store import Store + storage_obj = Store(config) + backend = storage_obj[store_name] current, source = backend.get_tag(file_hash, config=config) if not current: @@ -1367,18 +1360,18 @@ class Get_Tag(Cmdlet): item_title = get_field(result, "title") or file_hash[:16] _emit_tags_as_table( tags_list=current, - hash_hex=file_hash, - source=source, + file_hash=file_hash, + store=store_name, service_name="", config=config, item_title=item_title, - file_path=None, + path=None, subject=result, ) return 0 except KeyError: - log(f"Storage backend '{storage_source}' not found", file=sys.stderr) + log(f"Store '{store_name}' not found", file=sys.stderr) return 1 except Exception as exc: log(f"Failed to get tags: {exc}", file=sys.stderr) diff --git a/cmdlets/get_tag.py.orig b/cmdlets/get_tag.py.orig deleted file mode 100644 index a49b6f5..0000000 --- a/cmdlets/get_tag.py.orig +++ /dev/null @@ -1,1415 +0,0 @@ -"""Get tags from Hydrus or local sidecar metadata. - -This cmdlet retrieves tags for a selected result, supporting both: -- Hydrus Network (for files with hash_hex) -- Local sidecar files (.tags) - -In interactive mode: navigate with numbers, add/delete tags -In pipeline mode: display tags as read-only table, emit as structured JSON -""" - -from __future__ import annotations - -import sys - -from helper.logger import log, debug -from helper.metadata_search import get_metadata_provider, list_metadata_providers -import subprocess -from pathlib import Path -from typing import Any, Dict, List, Optional, Sequence, Tuple - -import pipeline as ctx -from helper import hydrus -from helper.local_library import read_sidecar, write_sidecar, find_sidecar, LocalLibraryDB -from ._shared import normalize_hash, looks_like_hash, Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args -from config import get_local_storage_path - - -try: - from metadata import extract_title -except ImportError: - extract_title = None - - - - - -# Tag item for ResultTable display and piping -from dataclasses import dataclass - -@dataclass -class TagItem: - """Tag item for display in ResultTable and piping to other cmdlets. - - Allows tags to be selected and piped like: - - delete-tag @{3,4,9} (delete tags at indices 3, 4, 9) - - add-tag @"namespace:value" (add this tag) - """ - tag_name: str - tag_index: int # 1-based index for user reference - hash_hex: Optional[str] = None - source: str = "hydrus" - service_name: Optional[str] = None - file_path: Optional[str] = None - - def __post_init__(self): - # Make ResultTable happy by adding standard fields - # NOTE: Don't set 'title' - we want only the tag column in ResultTable - self.origin = self.source - self.detail = f"Tag #{self.tag_index}" - self.target = self.tag_name - self.media_kind = "tag" - - def to_dict(self) -> Dict[str, Any]: - """Convert to dict for JSON serialization.""" - return { - "tag_name": self.tag_name, - "tag_index": self.tag_index, - "hash_hex": self.hash_hex, - "source": self.source, - "service_name": self.service_name, - } - - -def _extract_my_tags_from_hydrus_meta(meta: Dict[str, Any], service_key: Optional[str], service_name: str) -> List[str]: - """Extract current tags from Hydrus metadata dict. - - Prefers display_tags (includes siblings/parents, excludes deleted). - Falls back to storage_tags status '0' (current). - """ - tags_payload = meta.get("tags") - if not isinstance(tags_payload, dict): - return [] - svc_data = None - if service_key: - svc_data = tags_payload.get(service_key) - if not isinstance(svc_data, dict): - return [] - # Prefer display_tags (Hydrus computes siblings/parents) - display = svc_data.get("display_tags") - if isinstance(display, list) and display: - return [str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip()] - # Fallback to storage_tags status '0' (current) - storage = svc_data.get("storage_tags") - if isinstance(storage, dict): - current_list = storage.get("0") or storage.get(0) - if isinstance(current_list, list): - return [str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()] - return [] - - -def _emit_tags_as_table( - tags_list: List[str], - hash_hex: Optional[str], - source: str = "hydrus", - service_name: Optional[str] = None, - config: Dict[str, Any] = None, - item_title: Optional[str] = None, - file_path: Optional[str] = None, - subject: Optional[Any] = None, -) -> None: - """Emit tags as TagItem objects and display via ResultTable. - - This replaces _print_tag_list to make tags pipe-able. - Stores the table in ctx._LAST_RESULT_TABLE for downstream @ selection. - """ - from result_table import ResultTable - - # Create ResultTable with just tag column (no title) - table_title = "Tags" - if item_title: - table_title = f"Tags: {item_title}" - if hash_hex: - table_title += f" [{hash_hex[:8]}]" - - table = ResultTable(table_title, max_columns=1) - table.set_source_command("get-tag", []) - - # Create TagItem for each tag - tag_items = [] - for idx, tag_name in enumerate(tags_list, start=1): - tag_item = TagItem( - tag_name=tag_name, - tag_index=idx, - hash_hex=hash_hex, - source=source, - service_name=service_name, - file_path=file_path, - ) - tag_items.append(tag_item) - table.add_result(tag_item) - # Also emit to pipeline for downstream processing - ctx.emit(tag_item) - - # Store the table and items in history so @.. works to go back - # Use overlay mode so it doesn't push the previous search to history stack - # This makes get-tag behave like a transient view - try: - ctx.set_last_result_table_overlay(table, tag_items, subject) - except AttributeError: - ctx.set_last_result_table(table, tag_items, subject) - # Note: CLI will handle displaying the table via ResultTable formatting -def _summarize_tags(tags_list: List[str], limit: int = 8) -> str: - """Create a summary of tags for display.""" - shown = [t for t in tags_list[:limit] if t] - summary = ", ".join(shown) - remaining = max(0, len(tags_list) - len(shown)) - if remaining > 0: - summary = f"{summary} (+{remaining} more)" if summary else f"(+{remaining} more)" - if len(summary) > 200: - summary = summary[:197] + "..." - return summary - - -def _extract_title_from(tags_list: List[str]) -> Optional[str]: - """Extract title from tags list.""" - if extract_title: - try: - return extract_title(tags_list) - except Exception: - pass - for t in tags_list: - if isinstance(t, str) and t.lower().startswith("title:"): - val = t.split(":", 1)[1].strip() - if val: - return val - return None - - -def _rename_file_if_title_tag(media: Optional[Path], tags_added: List[str]) -> bool: - """Rename a local file if title: tag was added. - - Returns True if file was renamed, False otherwise. - """ - if not media or not tags_added: - return False - - # Check if any of the added tags is a title: tag - title_value = None - for tag in tags_added: - if isinstance(tag, str): - lower_tag = tag.lower() - if lower_tag.startswith("title:"): - title_value = tag.split(":", 1)[1].strip() - break - - if not title_value: - return False - - try: - # Get current file path - file_path = media - if not file_path.exists(): - return False - - # Parse file path - dir_path = file_path.parent - old_name = file_path.name - - # Get file extension - suffix = file_path.suffix or '' - - # Sanitize title for use as filename - import re - safe_title = re.sub(r'[<>:"/\\|?*]', '', title_value).strip() - if not safe_title: - return False - - new_name = safe_title + suffix - new_file_path = dir_path / new_name - - if new_file_path == file_path: - return False - - # Build sidecar paths BEFORE renaming the file - old_sidecar = Path(str(file_path) + '.tags') - new_sidecar = Path(str(new_file_path) + '.tags') - - # Rename file - try: - file_path.rename(new_file_path) - log(f"Renamed file: {old_name} → {new_name}") - - # Rename .tags sidecar if it exists - if old_sidecar.exists(): - try: - old_sidecar.rename(new_sidecar) - log(f"Renamed sidecar: {old_name}.tags → {new_name}.tags") - except Exception as e: - log(f"Failed to rename sidecar: {e}", file=sys.stderr) - - return True - except Exception as e: - log(f"Failed to rename file: {e}", file=sys.stderr) - return False - except Exception as e: - log(f"Error during file rename: {e}", file=sys.stderr) - return False - - -def _apply_result_updates_from_tags(result: Any, tag_list: List[str]) -> None: - """Update result object with title and tag summary from tags.""" - try: - new_title = _extract_title_from(tag_list) - if new_title: - setattr(result, "title", new_title) - setattr(result, "tag_summary", _summarize_tags(tag_list)) - except Exception: - pass - - -def _handle_title_rename(old_path: Path, tags_list: List[str]) -> Optional[Path]: - """If a title: tag is present, rename the file and its .tags sidecar to match. - - Returns the new path if renamed, otherwise returns None. - """ - # Extract title from tags - new_title = None - for tag in tags_list: - if isinstance(tag, str) and tag.lower().startswith('title:'): - new_title = tag.split(':', 1)[1].strip() - break - - if not new_title or not old_path.exists(): - return None - - try: - # Build new filename with same extension - old_name = old_path.name - old_suffix = old_path.suffix - - # Create new filename: title + extension - new_name = f"{new_title}{old_suffix}" - new_path = old_path.parent / new_name - - # Don't rename if already the same name - if new_path == old_path: - return None - - # Rename the main file - if new_path.exists(): - log(f"Warning: Target filename already exists: {new_name}", file=sys.stderr) - return None - - old_path.rename(new_path) - log(f"Renamed file: {old_name} → {new_name}", file=sys.stderr) - - # Rename the .tags sidecar if it exists - old_tags_path = old_path.parent / (old_name + '.tags') - if old_tags_path.exists(): - new_tags_path = old_path.parent / (new_name + '.tags') - if new_tags_path.exists(): - log(f"Warning: Target sidecar already exists: {new_tags_path.name}", file=sys.stderr) - else: - old_tags_path.rename(new_tags_path) - log(f"Renamed sidecar: {old_tags_path.name} → {new_tags_path.name}", file=sys.stderr) - - return new_path - except Exception as exc: - log(f"Warning: Failed to rename file: {exc}", file=sys.stderr) - return None - - - -def _read_sidecar_fallback(p: Path) -> tuple[Optional[str], List[str], List[str]]: - """Fallback sidecar reader if metadata module unavailable. - - Format: - - Lines with "hash:" prefix: file hash - - Lines with "url:" or "url:" prefix: url - - Lines with "relationship:" prefix: ignored (internal relationships) - - Lines with "key:", "namespace:value" format: treated as namespace tags - - Plain lines without colons: freeform tags - - Excluded namespaces (treated as metadata, not tags): hash, url, url, relationship - """ - try: - raw = p.read_text(encoding="utf-8", errors="ignore") - except OSError: - return None, [], [] - t: List[str] = [] - u: List[str] = [] - h: Optional[str] = None - - # Namespaces to exclude from tags - excluded_namespaces = {"hash", "url", "url", "relationship"} - - for line in raw.splitlines(): - s = line.strip() - if not s: - continue - low = s.lower() - - # Check if this is a hash line - if low.startswith("hash:"): - h = s.split(":", 1)[1].strip() if ":" in s else h - # Check if this is a URL line - elif low.startswith("url:") or low.startswith("url:"): - val = s.split(":", 1)[1].strip() if ":" in s else "" - if val: - u.append(val) - # Check if this is an excluded namespace - elif ":" in s: - namespace = s.split(":", 1)[0].strip().lower() - if namespace not in excluded_namespaces: - # Include as namespace tag (e.g., "title: The Freemasons") - t.append(s) - else: - # Plain text without colon = freeform tag - t.append(s) - - return h, t, u - - -def _write_sidecar(p: Path, media: Path, tag_list: List[str], url: List[str], hash_in_sidecar: Optional[str]) -> Path: - """Write tags to sidecar file and handle title-based renaming. - - Returns the new media path if renamed, otherwise returns the original media path. - """ - success = write_sidecar(media, tag_list, url, hash_in_sidecar) - if success: - _apply_result_updates_from_tags(None, tag_list) - # Check if we should rename the file based on title tag - new_media = _handle_title_rename(media, tag_list) - if new_media: - return new_media - return media - - # Fallback writer - ordered = [s for s in tag_list if s and s.strip()] - lines = [] - if hash_in_sidecar: - lines.append(f"hash:{hash_in_sidecar}") - lines.extend(ordered) - for u in url: - lines.append(f"url:{u}") - try: - p.write_text("\n".join(lines) + "\n", encoding="utf-8") - # Check if we should rename the file based on title tag - new_media = _handle_title_rename(media, tag_list) - if new_media: - return new_media - return media - except OSError as exc: - log(f"Failed to write sidecar: {exc}", file=sys.stderr) - return media - - -def _emit_tag_payload(source: str, tags_list: List[str], *, hash_value: Optional[str], extra: Optional[Dict[str, Any]] = None, store_label: Optional[str] = None) -> int: - """Emit tags as structured payload to pipeline. - - Also emits individual tag objects to _PIPELINE_LAST_ITEMS so they can be selected by index. - """ - payload: Dict[str, Any] = { - "source": source, - "tags": list(tags_list), - "count": len(tags_list), - } - if hash_value: - payload["hash"] = hash_value - if extra: - for key, value in extra.items(): - if value is not None: - payload[key] = value - label = None - if store_label: - label = store_label - elif ctx._PIPE_ACTIVE: - label = "tags" - if label: - ctx.store_value(label, payload) - if ctx._PIPE_ACTIVE and label.lower() != "tags": - ctx.store_value("tags", payload) - - # Emit individual TagItem objects so they can be selected by bare index - # When in pipeline, emit individual TagItem objects - if ctx._PIPE_ACTIVE: - for idx, tag_name in enumerate(tags_list, start=1): - tag_item = TagItem( - tag_name=tag_name, - tag_index=idx, - hash_hex=hash_value, - source=source, - service_name=None - ) - ctx.emit(tag_item) - else: - # When not in pipeline, just emit the payload - ctx.emit(payload) - - return 0 - - - -def _extract_scrapable_identifiers(tags_list: List[str]) -> Dict[str, str]: - """Extract scrapable identifiers from tags.""" - identifiers = {} - scrapable_prefixes = { - 'openlibrary', 'isbn', 'isbn_10', 'isbn_13', - 'musicbrainz', 'musicbrainzalbum', 'imdb', 'tmdb', 'tvdb' - } - - for tag in tags_list: - if not isinstance(tag, str) or ':' not in tag: - continue - - parts = tag.split(':', 1) - if len(parts) != 2: - continue - - key_raw = parts[0].strip().lower() - key = key_raw.replace('-', '_') - if key == 'isbn10': - key = 'isbn_10' - elif key == 'isbn13': - key = 'isbn_13' - value = parts[1].strip() - - # Normalize ISBN values by removing hyphens for API friendliness - if key.startswith('isbn'): - value = value.replace('-', '') - - if key in scrapable_prefixes and value: - identifiers[key] = value - - return identifiers - - -def _extract_tag_value(tags_list: List[str], namespace: str) -> Optional[str]: - """Get first tag value for a namespace (e.g., artist:, title:).""" - ns = namespace.lower() - for tag in tags_list: - if not isinstance(tag, str) or ':' not in tag: - continue - prefix, _, value = tag.partition(':') - if prefix.strip().lower() != ns: - continue - candidate = value.strip() - if candidate: - return candidate - return None - - -def _scrape_url_metadata(url: str) -> Tuple[Optional[str], List[str], List[Tuple[str, str]], List[Dict[str, Any]]]: - """Scrape metadata from a URL using yt-dlp. - - Returns: - (title, tags, formats, playlist_items) tuple where: - - title: Video/content title - - tags: List of extracted tags (both namespaced and freeform) - - formats: List of (display_label, format_id) tuples - - playlist_items: List of playlist entry dicts (empty if not a playlist) - """ - try: - import json as json_module - - try: - from metadata import extract_ytdlp_tags - except ImportError: - extract_ytdlp_tags = None - - # Build yt-dlp command with playlist support - # IMPORTANT: Do NOT use --flat-playlist! It strips metadata like artist, album, uploader, genre - # Without it, yt-dlp gives us full metadata in an 'entries' array within a single JSON object - # This ensures we get album-level metadata from sources like BandCamp, YouTube Music, etc. - cmd = [ - "yt-dlp", - "-j", # Output JSON - "--no-warnings", - "--playlist-items", "1-10", # Get first 10 items if it's a playlist (provides entries) - "-f", "best", - url - ] - - result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) - - if result.returncode != 0: - log(f"yt-dlp error: {result.stderr}", file=sys.stderr) - return None, [], [], [] - - # Parse JSON output - WITHOUT --flat-playlist, we get ONE JSON object with 'entries' array - # This gives us full metadata instead of flat format - lines = result.stdout.strip().split('\n') - if not lines or not lines[0]: - log("yt-dlp returned empty output", file=sys.stderr) - return None, [], [], [] - - # Parse the single JSON object - try: - data = json_module.loads(lines[0]) - except json_module.JSONDecodeError as e: - log(f"Failed to parse yt-dlp JSON: {e}", file=sys.stderr) - return None, [], [], [] - - # Extract title - use the main title - title = data.get('title', 'Unknown') - - # Determine if this is a playlist/album (has entries array) - # is_playlist = 'entries' in data and isinstance(data.get('entries'), list) - - # Extract tags and playlist items - tags = [] - playlist_items = [] - - # IMPORTANT: Extract album/playlist-level tags FIRST (before processing entries) - # This ensures we get metadata about the collection, not just individual tracks - if extract_ytdlp_tags: - album_tags = extract_ytdlp_tags(data) - tags.extend(album_tags) - - # Case 1: Entries are nested in the main object (standard playlist structure) - if 'entries' in data and isinstance(data.get('entries'), list): - entries = data['entries'] - # Build playlist items with title and duration - for idx, entry in enumerate(entries, 1): - if isinstance(entry, dict): - item_title = entry.get('title', entry.get('id', f'Track {idx}')) - item_duration = entry.get('duration', 0) - playlist_items.append({ - 'index': idx, - 'id': entry.get('id', f'track_{idx}'), - 'title': item_title, - 'duration': item_duration, - 'url': entry.get('url') or entry.get('webpage_url', ''), - }) - - # Extract tags from each entry and merge (but don't duplicate album-level tags) - # Only merge entry tags that are multi-value prefixes (not single-value like title:, artist:, etc.) - if extract_ytdlp_tags: - entry_tags = extract_ytdlp_tags(entry) - - # Single-value namespaces that should not be duplicated from entries - single_value_namespaces = {'title', 'artist', 'album', 'creator', 'channel', 'release_date', 'upload_date', 'license', 'location'} - - for tag in entry_tags: - # Extract the namespace (part before the colon) - tag_namespace = tag.split(':', 1)[0].lower() if ':' in tag else None - - # Skip if this namespace already exists in tags (from album level) - if tag_namespace and tag_namespace in single_value_namespaces: - # Check if any tag with this namespace already exists in tags - already_has_namespace = any( - t.split(':', 1)[0].lower() == tag_namespace - for t in tags if ':' in t - ) - if already_has_namespace: - continue # Skip this tag, keep the album-level one - - if tag not in tags: # Avoid exact duplicates - tags.append(tag) - - # Case 2: Playlist detected by playlist_count field (BandCamp albums, etc.) - # These need a separate call with --flat-playlist to get the actual entries - elif (data.get('playlist_count') or 0) > 0 and 'entries' not in data: - try: - # Make a second call with --flat-playlist to get the actual tracks - flat_cmd = [ - "yt-dlp", - "-j", - "--no-warnings", - "--flat-playlist", - "-f", "best", - url - ] - flat_result = subprocess.run(flat_cmd, capture_output=True, text=True, timeout=30) - if flat_result.returncode == 0: - flat_lines = flat_result.stdout.strip().split('\n') - # With --flat-playlist, each line is a separate track JSON object - # (not nested in a playlist container), so process ALL lines - for idx, line in enumerate(flat_lines, 1): - if line.strip().startswith('{'): - try: - entry = json_module.loads(line) - item_title = entry.get('title', entry.get('id', f'Track {idx}')) - item_duration = entry.get('duration', 0) - playlist_items.append({ - 'index': idx, - 'id': entry.get('id', f'track_{idx}'), - 'title': item_title, - 'duration': item_duration, - 'url': entry.get('url') or entry.get('webpage_url', ''), - }) - except json_module.JSONDecodeError: - pass - except Exception as e: - pass # Silently ignore if we can't get playlist entries - - - # Fallback: if still no tags detected, get from first item - if not tags and extract_ytdlp_tags: - tags = extract_ytdlp_tags(data) - - # Extract formats from the main data object - formats = [] - if 'formats' in data: - formats = _extract_url_formats(data.get('formats', [])) - - # Deduplicate tags by namespace to prevent duplicate title:, artist:, etc. - try: - from metadata import dedup_tags_by_namespace as _dedup - if _dedup: - tags = _dedup(tags, keep_first=True) - except Exception: - pass # If dedup fails, return tags as-is - - return title, tags, formats, playlist_items - - except subprocess.TimeoutExpired: - log("yt-dlp timeout (>30s)", file=sys.stderr) - return None, [], [], [] - except Exception as e: - log(f"URL scraping error: {e}", file=sys.stderr) - return None, [], [], [] - - -def _extract_url_formats(formats: list) -> List[Tuple[str, str]]: - """Extract best formats from yt-dlp formats list. - - Returns list of (display_label, format_id) tuples. - """ - try: - video_formats = {} # {resolution: format_data} - audio_formats = {} # {quality_label: format_data} - - for fmt in formats: - vcodec = fmt.get('vcodec', 'none') - acodec = fmt.get('acodec', 'none') - height = fmt.get('height') - ext = fmt.get('ext', 'unknown') - format_id = fmt.get('format_id', '') - tbr = fmt.get('tbr', 0) - abr = fmt.get('abr', 0) - - # Video format - if vcodec and vcodec != 'none' and height: - if height < 480: - continue - res_key = f"{height}p" - if res_key not in video_formats or tbr > video_formats[res_key].get('tbr', 0): - video_formats[res_key] = { - 'label': f"{height}p ({ext})", - 'format_id': format_id, - 'tbr': tbr, - } - - # Audio-only format - elif acodec and acodec != 'none' and (not vcodec or vcodec == 'none'): - audio_key = f"audio_{abr}" - if audio_key not in audio_formats or abr > audio_formats[audio_key].get('abr', 0): - audio_formats[audio_key] = { - 'label': f"audio ({ext})", - 'format_id': format_id, - 'abr': abr, - } - - result = [] - - # Add video formats in descending resolution order - for res in sorted(video_formats.keys(), key=lambda x: int(x.replace('p', '')), reverse=True): - fmt = video_formats[res] - result.append((fmt['label'], fmt['format_id'])) - - # Add best audio format - if audio_formats: - best_audio = max(audio_formats.values(), key=lambda x: x.get('abr', 0)) - result.append((best_audio['label'], best_audio['format_id'])) - - return result - - except Exception as e: - log(f"Error extracting formats: {e}", file=sys.stderr) - return [] - - -def _scrape_isbn_metadata(isbn: str) -> List[str]: - """Scrape metadata for an ISBN using Open Library API.""" - new_tags = [] - try: - from ..helper.http_client import HTTPClient - import json as json_module - - isbn_clean = isbn.replace('-', '').strip() - url = f"https://openlibrary.org/api/books?bibkeys=ISBN:{isbn_clean}&jscmd=data&format=json" - - try: - with HTTPClient() as client: - response = client.get(url) - response.raise_for_status() - data = json_module.loads(response.content.decode('utf-8')) - except Exception as e: - log(f"Failed to fetch ISBN metadata: {e}", file=sys.stderr) - return [] - - if not data: - log(f"No ISBN metadata found for: {isbn}") - return [] - - book_data = next(iter(data.values()), None) - if not book_data: - return [] - - if 'title' in book_data: - new_tags.append(f"title:{book_data['title']}") - - if 'authors' in book_data and isinstance(book_data['authors'], list): - for author in book_data['authors'][:3]: - if 'name' in author: - new_tags.append(f"author:{author['name']}") - - if 'publish_date' in book_data: - new_tags.append(f"publish_date:{book_data['publish_date']}") - - if 'publishers' in book_data and isinstance(book_data['publishers'], list): - for pub in book_data['publishers'][:1]: - if 'name' in pub: - new_tags.append(f"publisher:{pub['name']}") - - if 'description' in book_data: - desc = book_data['description'] - if isinstance(desc, dict) and 'value' in desc: - desc = desc['value'] - if desc: - desc_str = str(desc).strip() - # Include description if available (limit to 200 chars to keep it manageable) - if len(desc_str) > 0: - new_tags.append(f"description:{desc_str[:200]}") - - if 'number_of_pages' in book_data: - page_count = book_data['number_of_pages'] - if page_count and isinstance(page_count, int) and page_count > 0: - new_tags.append(f"pages:{page_count}") - - if 'identifiers' in book_data and isinstance(book_data['identifiers'], dict): - identifiers = book_data['identifiers'] - - if 'openlibrary' in identifiers: - ol_ids = identifiers['openlibrary'] - if isinstance(ol_ids, list) and ol_ids: - new_tags.append(f"openlibrary:{ol_ids[0]}") - elif isinstance(ol_ids, str): - new_tags.append(f"openlibrary:{ol_ids}") - - if 'lccn' in identifiers: - lccn_list = identifiers['lccn'] - if isinstance(lccn_list, list) and lccn_list: - new_tags.append(f"lccn:{lccn_list[0]}") - elif isinstance(lccn_list, str): - new_tags.append(f"lccn:{lccn_list}") - - if 'oclc' in identifiers: - oclc_list = identifiers['oclc'] - if isinstance(oclc_list, list) and oclc_list: - new_tags.append(f"oclc:{oclc_list[0]}") - elif isinstance(oclc_list, str): - new_tags.append(f"oclc:{oclc_list}") - - if 'goodreads' in identifiers: - goodreads_list = identifiers['goodreads'] - if isinstance(goodreads_list, list) and goodreads_list: - new_tags.append(f"goodreads:{goodreads_list[0]}") - elif isinstance(goodreads_list, str): - new_tags.append(f"goodreads:{goodreads_list}") - - if 'librarything' in identifiers: - lt_list = identifiers['librarything'] - if isinstance(lt_list, list) and lt_list: - new_tags.append(f"librarything:{lt_list[0]}") - elif isinstance(lt_list, str): - new_tags.append(f"librarything:{lt_list}") - - if 'doi' in identifiers: - doi_list = identifiers['doi'] - if isinstance(doi_list, list) and doi_list: - new_tags.append(f"doi:{doi_list[0]}") - elif isinstance(doi_list, str): - new_tags.append(f"doi:{doi_list}") - - if 'internet_archive' in identifiers: - ia_list = identifiers['internet_archive'] - if isinstance(ia_list, list) and ia_list: - new_tags.append(f"internet_archive:{ia_list[0]}") - elif isinstance(ia_list, str): - new_tags.append(f"internet_archive:{ia_list}") - - log(f"Found {len(new_tags)} tag(s) from ISBN lookup") - return new_tags - except Exception as e: - log(f"ISBN scraping error: {e}", file=sys.stderr) - return [] - - -def _scrape_openlibrary_metadata(olid: str) -> List[str]: - """Scrape metadata for an OpenLibrary ID using the .json API endpoint. - - Fetches from https://openlibrary.org/books/{OLID}.json and extracts: - - Title, authors, publish date, publishers - - Description - - Subjects as freeform tags (without namespace prefix) - - Identifiers (ISBN, LCCN, OCLC, etc.) - """ - new_tags = [] - try: - from ..helper.http_client import HTTPClient - import json as json_module - - # Format: OL9674499M or just 9674499M - olid_clean = olid.replace('OL', '').replace('M', '') - if not olid_clean.isdigit(): - olid_clean = olid - - # Ensure we have the full OLID format for the URL - if not olid.startswith('OL'): - url = f"https://openlibrary.org/books/OL{olid_clean}M.json" - else: - url = f"https://openlibrary.org/books/{olid}.json" - - try: - with HTTPClient() as client: - response = client.get(url) - response.raise_for_status() - data = json_module.loads(response.content.decode('utf-8')) - except Exception as e: - log(f"Failed to fetch OpenLibrary metadata: {e}", file=sys.stderr) - return [] - - if not data: - log(f"No OpenLibrary metadata found for: {olid}") - return [] - - # Add title - if 'title' in data: - new_tags.append(f"title:{data['title']}") - - # Add authors - if 'authors' in data and isinstance(data['authors'], list): - for author in data['authors'][:3]: - if isinstance(author, dict) and 'name' in author: - new_tags.append(f"author:{author['name']}") - elif isinstance(author, str): - new_tags.append(f"author:{author}") - - # Add publish date - if 'publish_date' in data: - new_tags.append(f"publish_date:{data['publish_date']}") - - # Add publishers - if 'publishers' in data and isinstance(data['publishers'], list): - for pub in data['publishers'][:1]: - if isinstance(pub, dict) and 'name' in pub: - new_tags.append(f"publisher:{pub['name']}") - elif isinstance(pub, str): - new_tags.append(f"publisher:{pub}") - - # Add description - if 'description' in data: - desc = data['description'] - if isinstance(desc, dict) and 'value' in desc: - desc = desc['value'] - if desc: - desc_str = str(desc).strip() - if len(desc_str) > 0: - new_tags.append(f"description:{desc_str[:200]}") - - # Add number of pages - if 'number_of_pages' in data: - page_count = data['number_of_pages'] - if page_count and isinstance(page_count, int) and page_count > 0: - new_tags.append(f"pages:{page_count}") - - # Add subjects as FREEFORM tags (no namespace prefix) - if 'subjects' in data and isinstance(data['subjects'], list): - for subject in data['subjects'][:10]: - if subject and isinstance(subject, str): - subject_clean = str(subject).strip() - if subject_clean and subject_clean not in new_tags: - new_tags.append(subject_clean) - - # Add identifiers - if 'identifiers' in data and isinstance(data['identifiers'], dict): - identifiers = data['identifiers'] - - if 'isbn_10' in identifiers: - isbn_10_list = identifiers['isbn_10'] - if isinstance(isbn_10_list, list) and isbn_10_list: - new_tags.append(f"isbn_10:{isbn_10_list[0]}") - elif isinstance(isbn_10_list, str): - new_tags.append(f"isbn_10:{isbn_10_list}") - - if 'isbn_13' in identifiers: - isbn_13_list = identifiers['isbn_13'] - if isinstance(isbn_13_list, list) and isbn_13_list: - new_tags.append(f"isbn_13:{isbn_13_list[0]}") - elif isinstance(isbn_13_list, str): - new_tags.append(f"isbn_13:{isbn_13_list}") - - if 'lccn' in identifiers: - lccn_list = identifiers['lccn'] - if isinstance(lccn_list, list) and lccn_list: - new_tags.append(f"lccn:{lccn_list[0]}") - elif isinstance(lccn_list, str): - new_tags.append(f"lccn:{lccn_list}") - - if 'oclc_numbers' in identifiers: - oclc_list = identifiers['oclc_numbers'] - if isinstance(oclc_list, list) and oclc_list: - new_tags.append(f"oclc:{oclc_list[0]}") - elif isinstance(oclc_list, str): - new_tags.append(f"oclc:{oclc_list}") - - if 'goodreads' in identifiers: - goodreads_list = identifiers['goodreads'] - if isinstance(goodreads_list, list) and goodreads_list: - new_tags.append(f"goodreads:{goodreads_list[0]}") - elif isinstance(goodreads_list, str): - new_tags.append(f"goodreads:{goodreads_list}") - - log(f"Found {len(new_tags)} tag(s) from OpenLibrary lookup") - return new_tags - except Exception as e: - log(f"OpenLibrary scraping error: {e}", file=sys.stderr) - return [] - - -def _perform_scraping(tags_list: List[str]) -> List[str]: - """Perform scraping based on identifiers in tags. - - Priority order: - 1. openlibrary: (preferred - more complete metadata) - 2. isbn_10 or isbn (fallback) - """ - identifiers = _extract_scrapable_identifiers(tags_list) - - if not identifiers: - log("No scrapable identifiers found (openlibrary, ISBN, musicbrainz, imdb)") - return [] - - log(f"Found scrapable identifiers: {', '.join(identifiers.keys())}") - - new_tags = [] - - # Prefer OpenLibrary over ISBN (more complete metadata) - if 'openlibrary' in identifiers: - olid = identifiers['openlibrary'] - if olid: - log(f"Scraping OpenLibrary: {olid}") - new_tags.extend(_scrape_openlibrary_metadata(olid)) - elif 'isbn_13' in identifiers or 'isbn_10' in identifiers or 'isbn' in identifiers: - isbn = identifiers.get('isbn_13') or identifiers.get('isbn_10') or identifiers.get('isbn') - if isbn: - log(f"Scraping ISBN: {isbn}") - new_tags.extend(_scrape_isbn_metadata(isbn)) - - existing_tags_lower = {tag.lower() for tag in tags_list} - scraped_unique = [] - seen = set() - for tag in new_tags: - tag_lower = tag.lower() - if tag_lower not in existing_tags_lower and tag_lower not in seen: - scraped_unique.append(tag) - seen.add(tag_lower) - - if scraped_unique: - log(f"Added {len(scraped_unique)} new tag(s) from scraping") - - return scraped_unique - - -def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - """Get tags from Hydrus, local sidecar, or URL metadata. - - Usage: - get-tag [-hash ] [--store ] [--emit] - get-tag -scrape - - Options: - -hash : Override hash to use instead of result's hash_hex - --store : Store result to this key for pipeline - --emit: Emit result without interactive prompt (quiet mode) - -scrape : Scrape metadata from URL or provider name (itunes, openlibrary, googlebooks) - """ - args_list = [str(arg) for arg in (args or [])] - raw_args = list(args_list) - - # Support numeric selection tokens (e.g., "@1" leading to argument "1") without treating - # them as hash overrides. This lets users pick from the most recent table overlay/results. - if len(args_list) == 1: - token = args_list[0] - if not token.startswith("-") and token.isdigit(): - try: - idx = int(token) - 1 - items_pool = ctx.get_last_result_items() - if 0 <= idx < len(items_pool): - result = items_pool[idx] - args_list = [] - debug(f"[get_tag] Resolved numeric selection arg {token} -> last_result_items[{idx}]") - else: - debug(f"[get_tag] Numeric selection arg {token} out of range (items={len(items_pool)})") - except Exception as exc: - debug(f"[get_tag] Failed to resolve numeric selection arg {token}: {exc}") - # Helper to get field from both dict and object - def get_field(obj: Any, field: str, default: Any = None) -> Any: - if isinstance(obj, dict): - return obj.get(field, default) - else: - return getattr(obj, field, default) - - # Parse arguments using shared parser - parsed_args = parse_cmdlet_args(args_list, CMDLET) - - # Detect if -scrape flag was provided without a value (parse_cmdlet_args skips missing values) - scrape_flag_present = any(str(arg).lower() in {"-scrape", "--scrape"} for arg in args_list) - - # Extract values - hash_override_raw = parsed_args.get("hash") - hash_override = normalize_hash(hash_override_raw) - store_key = parsed_args.get("store") - emit_requested = parsed_args.get("emit", False) - scrape_url = parsed_args.get("scrape") - scrape_requested = scrape_flag_present or scrape_url is not None - - explicit_hash_flag = any(str(arg).lower() in {"-hash", "--hash"} for arg in raw_args) - if hash_override_raw is not None: - if not hash_override or not looks_like_hash(hash_override): - debug(f"[get_tag] Ignoring invalid hash override '{hash_override_raw}' (explicit_flag={explicit_hash_flag})") - if explicit_hash_flag: - log("Invalid hash format: expected 64 hex characters", file=sys.stderr) - return 1 - hash_override = None - - if scrape_requested and (not scrape_url or str(scrape_url).strip() == ""): - log("-scrape requires a URL or provider name", file=sys.stderr) - return 1 - - # Handle URL or provider scraping mode - if scrape_requested and scrape_url: - import json as json_module - - if scrape_url.startswith("http://") or scrape_url.startswith("https://"): - # URL scraping (existing behavior) - title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url) - if not tags: - log("No tags extracted from URL", file=sys.stderr) - return 1 - output = { - "title": title, - "tags": tags, - "formats": [(label, fmt_id) for label, fmt_id in formats], - "playlist_items": playlist_items, - } - print(json_module.dumps(output, ensure_ascii=False)) - return 0 - - # Provider scraping (e.g., itunes) - provider = get_metadata_provider(scrape_url, config) - if provider is None: - log(f"Unknown metadata provider: {scrape_url}", file=sys.stderr) - return 1 - - # Prefer identifier tags (ISBN/OLID/etc.) when available; fallback to title/filename - identifier_tags: List[str] = [] - result_tags = get_field(result, "tags", None) - if isinstance(result_tags, list): - identifier_tags = [str(t) for t in result_tags if isinstance(t, (str, bytes))] - - # Try local sidecar if no tags present on result - if not identifier_tags: - file_path = get_field(result, "target", None) or get_field(result, "path", None) or get_field(result, "file_path", None) or get_field(result, "filename", None) - if isinstance(file_path, str) and file_path and not file_path.lower().startswith(("http://", "https://")): - try: - media_path = Path(str(file_path)) - if media_path.exists(): - tags_from_sidecar = read_sidecar(media_path) - if isinstance(tags_from_sidecar, list): - identifier_tags = [str(t) for t in tags_from_sidecar if isinstance(t, (str, bytes))] - except Exception: - pass - - title_from_tags = _extract_tag_value(identifier_tags, "title") - artist_from_tags = _extract_tag_value(identifier_tags, "artist") - - identifiers = _extract_scrapable_identifiers(identifier_tags) - identifier_query: Optional[str] = None - if identifiers: - if provider.name in {"openlibrary", "googlebooks", "google"}: - identifier_query = identifiers.get("isbn_13") or identifiers.get("isbn_10") or identifiers.get("isbn") or identifiers.get("openlibrary") - elif provider.name == "itunes": - identifier_query = identifiers.get("musicbrainz") or identifiers.get("musicbrainzalbum") - - # Determine query from identifier first, else title on the result or filename - title_hint = title_from_tags or get_field(result, "title", None) or get_field(result, "name", None) - if not title_hint: - file_path = get_field(result, "path", None) or get_field(result, "filename", None) - if file_path: - title_hint = Path(str(file_path)).stem - artist_hint = artist_from_tags or get_field(result, "artist", None) or get_field(result, "uploader", None) - if not artist_hint: - meta_field = get_field(result, "metadata", None) - if isinstance(meta_field, dict): - meta_artist = meta_field.get("artist") or meta_field.get("uploader") - if meta_artist: - artist_hint = str(meta_artist) - - combined_query: Optional[str] = None - if not identifier_query and title_hint and artist_hint and provider.name in {"itunes", "musicbrainz"}: - if provider.name == "musicbrainz": - combined_query = f'recording:"{title_hint}" AND artist:"{artist_hint}"' - else: - combined_query = f"{title_hint} {artist_hint}" - - query_hint = identifier_query or combined_query or title_hint - if not query_hint: - log("No title or identifier available to search for metadata", file=sys.stderr) - return 1 - - if identifier_query: - log(f"Using identifier for metadata search: {identifier_query}") - elif combined_query: - log(f"Using title+artist for metadata search: {title_hint} - {artist_hint}") - else: - log(f"Using title for metadata search: {query_hint}") - - items = provider.search(query_hint, limit=10) - if not items: - log("No metadata results found", file=sys.stderr) - return 1 - - from result_table import ResultTable - table = ResultTable(f"Metadata: {provider.name}") - table.set_source_command("get-tag", []) - selection_payload = [] - hash_for_payload = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash_hex", None)) - for idx, item in enumerate(items): - tags = provider.to_tags(item) - row = table.add_row() - row.add_column("Title", item.get("title", "")) - row.add_column("Artist", item.get("artist", "")) - row.add_column("Album", item.get("album", "")) - row.add_column("Year", item.get("year", "")) - payload = { - "tags": tags, - "provider": provider.name, - "title": item.get("title"), - "artist": item.get("artist"), - "album": item.get("album"), - "year": item.get("year"), - "extra": { - "tags": tags, - "provider": provider.name, - "hydrus_hash": hash_for_payload, - "storage_source": get_field(result, "source", None) or get_field(result, "origin", None), - }, - "file_hash": hash_for_payload, - } - selection_payload.append(payload) - table.set_row_selection_args(idx, [str(idx + 1)]) - - ctx.set_last_result_table_overlay(table, selection_payload) - ctx.set_current_stage_table(table) - # Preserve items for @ selection and downstream pipes without emitting duplicates - ctx.set_last_result_items_only(selection_payload) - print(table) - return 0 - - # If -scrape was requested but no URL, that's an error - if scrape_requested and not scrape_url: - log("-scrape requires a URL argument", file=sys.stderr) - return 1 - - # Handle @N selection which creates a list - extract the first item - if isinstance(result, list) and len(result) > 0: - result = result[0] - - hash_from_result = normalize_hash(get_field(result, "hash_hex", None)) - hash_hex = hash_override or hash_from_result - # Only use emit mode if explicitly requested with --emit flag, not just because we're in a pipeline - # This allows interactive REPL to work even in pipelines - emit_mode = emit_requested or bool(store_key) - store_label = (store_key.strip() if store_key and store_key.strip() else None) - - # Check Hydrus availability - hydrus_available, _ = hydrus.is_available(config) - - # Try to find path in result object - local_path = get_field(result, "target", None) or get_field(result, "path", None) or get_field(result, "file_path", None) - - # Determine if local file - is_local_file = False - media: Optional[Path] = None - if local_path and isinstance(local_path, str) and not local_path.startswith(("http://", "https://")): - is_local_file = True - try: - media = Path(str(local_path)) - except Exception: - media = None - - # Try Hydrus first (always prioritize if available and has hash) - use_hydrus = False - hydrus_meta = None # Cache the metadata from first fetch - client = None - if hash_hex and hydrus_available: - try: - client = hydrus.get_client(config) - payload = client.fetch_file_metadata(hashes=[str(hash_hex)], include_service_keys_to_tags=True, include_file_url=False) - items = payload.get("metadata") if isinstance(payload, dict) else None - if isinstance(items, list) and items: - meta = items[0] if isinstance(items[0], dict) else None - # Only accept file if it has a valid file_id (not None) - if isinstance(meta, dict) and meta.get("file_id") is not None: - use_hydrus = True - hydrus_meta = meta # Cache for tag extraction - except Exception: - pass - - # Get tags - try Hydrus first, fallback to sidecar - current = [] - service_name = "" - service_key = None - source = "unknown" - - if use_hydrus and hash_hex and hydrus_meta: - try: - # Use cached metadata from above, don't fetch again - service_name = hydrus.get_tag_service_name(config) - if client is None: - client = hydrus.get_client(config) - service_key = hydrus.get_tag_service_key(client, service_name) - current = _extract_my_tags_from_hydrus_meta(hydrus_meta, service_key, service_name) - source = "hydrus" - except Exception as exc: - log(f"Warning: Failed to extract tags from Hydrus: {exc}", file=sys.stderr) - - # Fallback to local sidecar or local DB if no tags - if not current and is_local_file and media and media.exists(): - try: - # First try local library DB - library_root = get_local_storage_path(config) - if library_root: - try: - with LocalLibraryDB(library_root) as db: - db_tags = db.get_tags(media) - if db_tags: - current = db_tags - source = "local_db" - except Exception as exc: - log(f"[get_tag] DB lookup failed, trying sidecar: {exc}", file=sys.stderr) - - # Fall back to sidecar if DB didn't have tags - if not current: - sidecar_path = find_sidecar(media) - if sidecar_path and sidecar_path.exists(): - try: - _, current, _ = read_sidecar(sidecar_path) - except Exception: - _, current, _ = _read_sidecar_fallback(sidecar_path) - if current: - source = "sidecar" - except Exception as exc: - log(f"Warning: Failed to load tags from local storage: {exc}", file=sys.stderr) - - # Fallback to tags in the result object if Hydrus/local lookup returned nothing - if not current: - # Check if result has 'tags' attribute (PipeObject) - if hasattr(result, 'tags') and getattr(result, 'tags', None): - current = getattr(result, 'tags') - source = "pipeline_result" - # Check if result is a dict with 'tags' key - elif isinstance(result, dict) and 'tags' in result: - tags_val = result['tags'] - if isinstance(tags_val, list): - current = tags_val - source = "pipeline_result" - source = "pipeline_result" - - # Error if no tags found - if not current: - log("No tags found", file=sys.stderr) - return 1 - - # Always output to ResultTable (pipeline mode only) - # Extract title for table header - item_title = get_field(result, "title", None) or get_field(result, "name", None) or get_field(result, "filename", None) - - # Build a subject payload representing the file whose tags are being shown - subject_origin = get_field(result, "origin", None) or get_field(result, "source", None) or source - subject_payload: Dict[str, Any] = { - "tags": list(current), - "title": item_title, - "name": item_title, - "origin": subject_origin, - "source": subject_origin, - "storage_source": subject_origin, - "service_name": service_name, - "extra": { - "tags": list(current), - "storage_source": subject_origin, - "hydrus_hash": hash_hex, - }, - } - if hash_hex: - subject_payload.update({ - "hash": hash_hex, - "hash_hex": hash_hex, - "file_hash": hash_hex, - "hydrus_hash": hash_hex, - }) - if local_path: - try: - path_text = str(local_path) - subject_payload.update({ - "file_path": path_text, - "path": path_text, - "target": path_text, - }) - subject_payload["extra"]["file_path"] = path_text - except Exception: - pass - - if source == "hydrus": - _emit_tags_as_table(current, hash_hex=hash_hex, source="hydrus", service_name=service_name, config=config, item_title=item_title, subject=subject_payload) - else: - _emit_tags_as_table(current, hash_hex=hash_hex, source="local", service_name=None, config=config, item_title=item_title, file_path=str(local_path) if local_path else None, subject=subject_payload) - - # If emit requested or store key provided, emit payload - if emit_mode: - _emit_tag_payload(source, current, hash_value=hash_hex, store_label=store_label) - - return 0 - - -_SCRAPE_CHOICES = [] -try: - _SCRAPE_CHOICES = sorted(list_metadata_providers().keys()) -except Exception: - _SCRAPE_CHOICES = ["itunes", "openlibrary", "googlebooks", "google", "musicbrainz"] - - -CMDLET = Cmdlet( - name="get-tag", - summary="Get tags from Hydrus or local sidecar metadata", - usage="get-tag [-hash ] [--store ] [--emit] [-scrape ]", - aliases=["tags"], - args=[ - SharedArgs.HASH, - CmdletArg( - name="-store", - type="string", - description="Store result to this key for pipeline", - alias="store" - ), - CmdletArg( - name="-emit", - type="flag", - description="Emit result without interactive prompt (quiet mode)", - alias="emit-only" - ), - CmdletArg( - name="-scrape", - type="string", - description="Scrape metadata from URL or provider name (returns tags as JSON or table)", - required=False, - choices=_SCRAPE_CHOICES, - ) - ] -) - - diff --git a/cmdlets/get_url.py b/cmdlets/get_url.py index 0e60b81..2c3f473 100644 --- a/cmdlets/get_url.py +++ b/cmdlets/get_url.py @@ -6,8 +6,8 @@ import sys from . import register import pipeline as ctx from ._shared import Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field, normalize_hash -from helper.logger import log -from helper.store import FileStorage +from SYS.logger import log +from Store import Store class Get_Url(Cmdlet): @@ -48,16 +48,16 @@ class Get_Url(Cmdlet): # Get backend and retrieve url try: - storage = FileStorage(config) + storage = Store(config) backend = storage[store_name] - - url = backend.get_url(file_hash) - - if url: - for url in url: + + urls = backend.get_url(file_hash) + + if urls: + for u in urls: # Emit rich object for pipeline compatibility ctx.emit({ - "url": url, + "url": u, "hash": file_hash, "store": store_name, }) diff --git a/cmdlets/merge_file.py b/cmdlets/merge_file.py index 02186b7..a33a7e5 100644 --- a/cmdlets/merge_file.py +++ b/cmdlets/merge_file.py @@ -6,8 +6,8 @@ from pathlib import Path import json import sys -from helper.logger import log -from helper.download import download_media +from SYS.logger import log +from cmdlets.download_media import download_media from models import DownloadOptions from config import resolve_output_dir import subprocess as _subprocess @@ -326,7 +326,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: self.media_kind = media_kind self.tags = tags or [] self.url = url or [] - self.origin = "local" # Ensure origin is set for add-file + self.store = "local" PipelineItem = SimpleItem merged_item = PipelineItem( @@ -589,7 +589,7 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool: # Run ffmpeg with progress monitoring try: - from helper.progress import print_progress, print_final_progress + from SYS.progress import print_progress, print_final_progress import re process = _subprocess.Popen( diff --git a/cmdlets/screen_shot.py b/cmdlets/screen_shot.py index 4526bd7..ee31f53 100644 --- a/cmdlets/screen_shot.py +++ b/cmdlets/screen_shot.py @@ -18,9 +18,9 @@ from pathlib import Path from typing import Any, Dict, List, Optional, Sequence, Tuple from urllib.parse import urlsplit, quote, urljoin -from helper.logger import log, debug -from helper.http_client import HTTPClient -from helper.utils import ensure_directory, unique_path, unique_preserve_order +from SYS.logger import log, debug +from API.HTTP import HTTPClient +from SYS.utils import ensure_directory, unique_path, unique_preserve_order from . import register from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input, should_show_help, get_field @@ -661,7 +661,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: file_path=str(screenshot_result.path), cmdlet_name='screen-shot', title=f"Screenshot: {Path(screenshot_result.path).name}", - file_hash=screenshot_hash, + hash_value=screenshot_hash, is_temp=True, parent_hash=hashlib.sha256(url.encode()).hexdigest(), extra={ @@ -695,30 +695,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: return exit_code CMDLET = Cmdlet( name="screen-shot", - summary="Capture a screenshot of a URL or file and mark as temporary artifact", + summary="Capture a website screenshot", usage="screen-shot [options] or download-data | screen-shot [options]", alias=["screenshot", "ss"], arg=[ - CmdletArg(name="url", type="string", required=False, description="URL to screenshot (or from pipeline)"), + SharedArgs.URL, CmdletArg(name="format", type="string", description="Output format: png, jpeg, or pdf"), CmdletArg(name="selector", type="string", description="CSS selector for element capture"), - SharedArgs.ARCHIVE, # Use shared archive argument - SharedArgs.STORE, # Use shared storage argument + ], - detail=[ - "Take screenshots of url with optional archiving and element targeting.", - "Screenshots are marked as temporary artifacts for cleanup by the cleanup cmdlet.", - "", - "Arguments:", - " url URL to capture (optional if piped from pipeline)", - " --format FORMAT Output format: png (default), jpeg, or pdf", - " --selector SEL CSS selector for capturing specific element", - " --archive, -arch Archive URL to Wayback/Archive.today/Archive.ph", - " --storage LOCATION Storage destination: hydrus, local, 0x0, debrid, or ftp", - "", - "Examples:", - " download-data https://example.com | screen-shot --storage local", - " download-data https://twitter.com/user/status/123 | screen-shot --selector 'article[role=article]' --storage hydrus --archive", - " screen-shot https://example.com --format jpeg --storage 0x0 --archive", - ] + detail= + [""" + + + + """] ) diff --git a/cmdlets/search_provider.py b/cmdlets/search_provider.py index 61dfc31..664d1f6 100644 --- a/cmdlets/search_provider.py +++ b/cmdlets/search_provider.py @@ -1,15 +1,23 @@ """search-provider cmdlet: Search external providers (bandcamp, libgen, soulseek, youtube).""" from __future__ import annotations -from typing import Any, Dict, List, Sequence +from typing import Any, Dict, List, Sequence, Optional import sys +import json +import uuid +import importlib -from helper.logger import log, debug -from helper.provider import get_search_provider, list_search_providers +from SYS.logger import log, debug +from Provider.registry import get_search_provider, list_search_providers from ._shared import Cmdlet, CmdletArg, should_show_help import pipeline as ctx +# Optional dependencies +try: + from config import get_local_storage_path +except Exception: # pragma: no cover + get_local_storage_path = None # type: ignore class Search_Provider(Cmdlet): """Search external content providers.""" @@ -88,30 +96,74 @@ class Search_Provider(Cmdlet): if available: log(f" - {name}", file=sys.stderr) return 1 - - # Execute search - try: - debug(f"[search-provider] Calling {provider_name}.search()") - results = provider.search(query, limit=limit) - debug(f"[search-provider] Got {len(results)} results") - - if not results: - log(f"No results found for query: {query}", file=sys.stderr) - return 0 - - # Emit results for pipeline - for search_result in results: - ctx.emit(search_result.to_dict()) - - log(f"Found {len(results)} result(s) from {provider_name}", file=sys.stderr) - return 0 - - except Exception as e: - log(f"Error searching {provider_name}: {e}", file=sys.stderr) - import traceback - debug(traceback.format_exc()) + + from API.folder import API_folder_store + worker_id = str(uuid.uuid4()) + library_root = get_local_storage_path(config or {}) + if not library_root: + log("No library root configured", file=sys.stderr) return 1 + # Use context manager to ensure database is always closed + with API_folder_store(library_root) as db: + try: + db.insert_worker( + worker_id, + "search-provider", + title=f"Search: {query}", + description=f"Provider: {provider_name}, Query: {query}", + pipe=ctx.get_current_command_text() + ) + + results_list = [] + import result_table + importlib.reload(result_table) + from result_table import ResultTable + + table_title = f"Search: {query} [{provider_name}]" + preserve_order = provider_name.lower() in ('youtube', 'openlibrary') + table = ResultTable(table_title).set_preserve_order(preserve_order) + table.set_table(provider_name) + + debug(f"[search-provider] Calling {provider_name}.search()") + results = provider.search(query, limit=limit) + debug(f"[search-provider] Got {len(results)} results") + + if not results: + log(f"No results found for query: {query}", file=sys.stderr) + db.append_worker_stdout(worker_id, json.dumps([], indent=2)) + db.update_worker_status(worker_id, 'completed') + return 0 + + # Emit results for pipeline + for search_result in results: + item_dict = search_result.to_dict() if hasattr(search_result, 'to_dict') else dict(search_result) + + # Ensure table field is set (should be by provider, but just in case) + if 'table' not in item_dict: + item_dict['table'] = provider_name + + table.add_result(search_result) # ResultTable handles SearchResult objects + results_list.append(item_dict) + ctx.emit(item_dict) + + ctx.set_last_result_table(table, results_list) + db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2)) + db.update_worker_status(worker_id, 'completed') + + log(f"Found {len(results)} result(s) from {provider_name}", file=sys.stderr) + return 0 + + except Exception as e: + log(f"Error searching {provider_name}: {e}", file=sys.stderr) + import traceback + debug(traceback.format_exc()) + try: + db.update_worker_status(worker_id, 'error') + except Exception: + pass + return 1 + # Register cmdlet instance Search_Provider_Instance = Search_Provider() diff --git a/cmdlets/search_store.py b/cmdlets/search_store.py index 8c7efbc..9b1646d 100644 --- a/cmdlets/search_store.py +++ b/cmdlets/search_store.py @@ -9,9 +9,9 @@ import re import json import sys -from helper.logger import log, debug +from SYS.logger import log, debug -from ._shared import Cmdlet, CmdletArg, get_origin, get_field, should_show_help +from ._shared import Cmdlet, CmdletArg, get_field, should_show_help import pipeline as ctx # Optional dependencies @@ -27,18 +27,18 @@ except Exception: # pragma: no cover resolve_output_dir = None # type: ignore try: - from helper.hydrus import HydrusClient, HydrusRequestError + from API.HydrusNetwork import HydrusClient, HydrusRequestError except ImportError: # pragma: no cover HydrusClient = None # type: ignore HydrusRequestError = RuntimeError # type: ignore try: - from helper.utils import sha256_file + from SYS.utils import sha256_file except ImportError: # pragma: no cover sha256_file = None # type: ignore try: - from helper.utils_constant import mime_maps + from SYS.utils_constant import mime_maps except ImportError: # pragma: no cover mime_maps = {} # type: ignore @@ -48,7 +48,7 @@ class SearchRecord: size_bytes: int | None = None duration_seconds: str | None = None tags: str | None = None - hash_hex: str | None = None + hash: str | None = None def as_dict(self) -> dict[str, str]: payload: dict[str, str] = {"path": self.path} @@ -58,8 +58,8 @@ class SearchRecord: payload["duration"] = self.duration_seconds if self.tags: payload["tags"] = self.tags - if self.hash_hex: - payload["hash"] = self.hash_hex + if self.hash: + payload["hash"] = self.hash return payload @@ -115,7 +115,7 @@ class Search_Store(Cmdlet): def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]: """Ensure storage results have the necessary fields for result_table display.""" - store_value = str(get_origin(payload, "") or "").lower() + store_value = str(payload.get("store") or "").lower() if store_value not in STORAGE_ORIGINS: return payload @@ -162,7 +162,7 @@ class Search_Store(Cmdlet): while i < len(args_list): arg = args_list[i] low = arg.lower() - if low in {"-store", "--store", "-storage", "--storage"} and i + 1 < len(args_list): + if low in {"-store", "--store"} and i + 1 < len(args_list): storage_backend = args_list[i + 1] i += 2 elif low in {"-tag", "--tag"} and i + 1 < len(args_list): @@ -199,7 +199,7 @@ class Search_Store(Cmdlet): log("Provide a search query", file=sys.stderr) return 1 - from helper.folder_store import FolderDB + from API.folder import API_folder_store from config import get_local_storage_path import uuid worker_id = str(uuid.uuid4()) @@ -209,7 +209,7 @@ class Search_Store(Cmdlet): return 1 # Use context manager to ensure database is always closed - with FolderDB(library_root) as db: + with API_folder_store(library_root) as db: try: db.insert_worker( worker_id, @@ -231,8 +231,8 @@ class Search_Store(Cmdlet): table = ResultTable(table_title) - from helper.store import FileStorage - storage = FileStorage(config=config or {}) + from Store import Store + storage = Store(config=config or {}) backend_to_search = storage_backend or None if backend_to_search: @@ -242,18 +242,21 @@ class Search_Store(Cmdlet): log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr) db.update_worker_status(worker_id, 'error') return 1 - results = target_backend.search_file(query, limit=limit) + results = target_backend.search_store(query, limit=limit) else: - from helper.hydrus import is_hydrus_available + from API.HydrusNetwork import is_hydrus_available hydrus_available = is_hydrus_available(config or {}) + from Store.HydrusNetwork import HydrusNetwork all_results = [] for backend_name in storage.list_searchable_backends(): - if backend_name.startswith("hydrus") and not hydrus_available: - continue - searched_backends.append(backend_name) try: - backend_results = storage[backend_name].search_file(query, limit=limit - len(all_results)) + backend = storage[backend_name] + if isinstance(backend, HydrusNetwork) and not hydrus_available: + continue + searched_backends.append(backend_name) + + backend_results = backend.search_store(query, limit=limit - len(all_results)) if backend_results: all_results.extend(backend_results) if len(all_results) >= limit: @@ -270,10 +273,10 @@ class Search_Store(Cmdlet): storage_counts: OrderedDict[str, int] = OrderedDict((name, 0) for name in searched_backends) for item in results or []: - origin = get_origin(item) - if not origin: + store = get_field(item, "store") + if not store: continue - key = str(origin).lower() + key = str(store).lower() if key not in storage_counts: storage_counts[key] = 0 storage_counts[key] += 1 @@ -295,14 +298,14 @@ class Search_Store(Cmdlet): item_dict = _as_dict(item) if store_filter: - origin_val = str(get_origin(item_dict) or "").lower() - if store_filter != origin_val: + store_val = str(item_dict.get("store") or "").lower() + if store_filter != store_val: continue normalized = self._ensure_storage_columns(item_dict) # Make hash/store available for downstream cmdlets without rerunning search hash_val = normalized.get("hash") - store_val = normalized.get("store") or get_origin(item_dict) + store_val = normalized.get("store") or item_dict.get("store") if hash_val and not normalized.get("hash"): normalized["hash"] = hash_val if store_val and not normalized.get("store"): diff --git a/cmdlets/trim_file.py b/cmdlets/trim_file.py index 569004d..909b164 100644 --- a/cmdlets/trim_file.py +++ b/cmdlets/trim_file.py @@ -9,8 +9,8 @@ import subprocess import shutil import re -from helper.logger import log, debug -from helper.utils import sha256_file +from SYS.logger import log, debug +from SYS.utils import sha256_file from . import register from ._shared import ( Cmdlet, @@ -219,11 +219,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Update original file in local DB if possible try: from config import get_local_storage_path - from helper.folder_store import FolderDB + from API.folder import API_folder_store storage_path = get_local_storage_path(config) if storage_path: - with FolderDB(storage_path) as db: + with API_folder_store(storage_path) as db: # Get original file metadata # We need to find the original file by hash or path # Try path first diff --git a/helper/adjective.json b/cmdnats/adjective.json similarity index 100% rename from helper/adjective.json rename to cmdnats/adjective.json diff --git a/cmdnats/adjective.py b/cmdnats/adjective.py index 53a5edf..cfaa63a 100644 --- a/cmdnats/adjective.py +++ b/cmdnats/adjective.py @@ -3,11 +3,11 @@ import os import sys from typing import List, Dict, Any, Optional, Sequence from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args -from helper.logger import log +from SYS.logger import log from result_table import ResultTable import pipeline as ctx -ADJECTIVE_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "helper", "adjective.json") +ADJECTIVE_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "cmdnats", "adjective.json") def _load_adjectives() -> Dict[str, List[str]]: try: diff --git a/cmdnats/help.py b/cmdnats/help.py index 106ec29..340158b 100644 --- a/cmdnats/help.py +++ b/cmdnats/help.py @@ -5,7 +5,7 @@ import shlex import sys from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args -from helper.logger import log +from SYS.logger import log from result_table import ResultTable import pipeline as ctx @@ -135,7 +135,7 @@ def _render_detail(meta: Dict[str, Any], args: Sequence[str]) -> None: def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: try: - from helper import cmdlet_catalog as _catalog + from cmdlets import catalog as _catalog CMDLET.arg[0].choices = _normalize_choice_list(_catalog.list_cmdlet_names()) metadata = _catalog.list_cmdlet_metadata() diff --git a/cmdnats/matrix.py b/cmdnats/matrix.py index 4701d76..7e62de1 100644 --- a/cmdnats/matrix.py +++ b/cmdnats/matrix.py @@ -1,10 +1,9 @@ from typing import Any, Dict, Sequence, List import sys from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args -from helper.logger import log, debug +from SYS.logger import log, debug from result_table import ResultTable # REFACTOR: Commenting out Matrix import until provider refactor is complete -# from helper.store import MatrixStorageBackend from config import save_config, load_config import pipeline as ctx diff --git a/cmdnats/pipe.py b/cmdnats/pipe.py index 4ac57a6..87b5f02 100644 --- a/cmdnats/pipe.py +++ b/cmdnats/pipe.py @@ -1,54 +1,34 @@ from typing import Any, Dict, Sequence, List, Optional import sys import json -import platform import socket import re import subprocess from urllib.parse import urlparse, parse_qs from pathlib import Path from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args -from helper.logger import log, debug +from SYS.logger import debug from result_table import ResultTable -from helper.mpv_ipc import get_ipc_pipe_path, MPVIPCClient +from MPV.mpv_ipc import MPV import pipeline as ctx -from helper.download import is_url_supported_by_ytdlp +from SYS.download import is_url_supported_by_ytdlp +from models import PipeObject -from helper.folder_store import LocalLibrarySearchOptimizer +from API.folder import LocalLibrarySearchOptimizer from config import get_local_storage_path, get_hydrus_access_key, get_hydrus_url from hydrus_health_check import get_cookies_file_path def _send_ipc_command(command: Dict[str, Any], silent: bool = False) -> Optional[Any]: """Send a command to the MPV IPC pipe and return the response.""" try: - ipc_pipe = get_ipc_pipe_path() - client = MPVIPCClient(socket_path=ipc_pipe) - - if not client.connect(): - return None # MPV not running - - response = client.send_command(command) - client.disconnect() - return response + mpv = MPV() + return mpv.send(command, silent=silent) except Exception as e: if not silent: debug(f"IPC Error: {e}", file=sys.stderr) return None -def _is_mpv_running() -> bool: - """Check if MPV is currently running and accessible via IPC.""" - try: - ipc_pipe = get_ipc_pipe_path() - client = MPVIPCClient(socket_path=ipc_pipe) - if client.connect(): - client.disconnect() - return True - return False - except Exception: - return False - - def _get_playlist(silent: bool = False) -> Optional[List[Dict[str, Any]]]: """Get the current playlist from MPV. Returns None if MPV is not running.""" cmd = {"command": ["get_property", "playlist"], "request_id": 100} @@ -194,8 +174,7 @@ def _normalize_playlist_path(text: Optional[str]) -> Optional[str]: pass # Normalize slashes for Windows paths and lowercase for comparison - real = real.replace('\\', '\\') - real = real.replace('\\', '\\') + real = real.replace('\\', '/') return real.lower() @@ -300,37 +279,6 @@ def _infer_store_from_playlist_item(item: Dict[str, Any], file_storage: Optional return host_stripped -def _format_playlist_location(name: str, max_len: int = 48) -> str: - """Format playlist filename/URL for display while keeping backend untouched.""" - target = name or "" - memory_target = _extract_target_from_memory_uri(target) - if memory_target: - target = memory_target - - lower = target.lower() - # Local paths: show basename only - if re.match(r"^[a-z]:[\\/]", target, flags=re.IGNORECASE) or target.startswith("\\\\"): - target = Path(target).name - elif lower.startswith("file://"): - parsed = urlparse(target) - target = Path(parsed.path or "").name or target - else: - parsed = urlparse(target) - host = parsed.netloc or "" - if host: - host_no_port = host.split(":", 1)[0] - host_no_port = host_no_port[4:] if host_no_port.startswith("www.") else host_no_port - tail = parsed.path.split('/')[-1] if parsed.path else "" - if tail: - target = f"{host_no_port}/{tail}" - else: - target = host_no_port - - if len(target) > max_len: - return target[: max_len - 3] + "..." - return target - - def _build_hydrus_header(config: Dict[str, Any]) -> Optional[str]: """Return header string for Hydrus auth if configured.""" try: @@ -399,7 +347,8 @@ def _ensure_ytdl_cookies() -> None: def _monitor_mpv_logs(duration: float = 3.0) -> None: """Monitor MPV logs for a short duration to capture errors.""" try: - client = MPVIPCClient() + mpv = MPV() + client = mpv.client() if not client.connect(): debug("Failed to connect to MPV for log monitoring", file=sys.stderr) return @@ -416,9 +365,14 @@ def _monitor_mpv_logs(duration: float = 3.0) -> None: start_time = time.time() # Unix sockets already have timeouts set; read until duration expires + sock_obj = client.sock + if not isinstance(sock_obj, socket.socket): + client.disconnect() + return + while time.time() - start_time < duration: try: - chunk = client.sock.recv(4096) + chunk = sock_obj.recv(4096) except socket.timeout: continue except Exception: @@ -451,15 +405,14 @@ def _get_playable_path(item: Any, file_storage: Optional[Any], config: Optional[ Returns: Tuple of (path, title) or None if no valid path found """ - path = None - title = None - store = None - file_hash = None + path: Optional[str] = None + title: Optional[str] = None + store: Optional[str] = None + file_hash: Optional[str] = None # Extract fields from item - prefer a disk path ('path'), but accept 'url' as fallback for providers if isinstance(item, dict): - # Support both canonical 'path' and legacy 'file_path' keys, and provider 'url' keys - path = item.get("path") or item.get("file_path") + path = item.get("path") # Fallbacks for provider-style entries where URL is stored in 'url' or 'source_url' or 'target' if not path: path = item.get("url") or item.get("source_url") or item.get("target") @@ -468,11 +421,11 @@ def _get_playable_path(item: Any, file_storage: Optional[Any], config: Optional[ if known and isinstance(known, list): path = known[0] title = item.get("title") or item.get("file_title") - store = item.get("store") or item.get("storage") or item.get("storage_source") or item.get("origin") - file_hash = item.get("hash") or item.get("file_hash") or item.get("hash_hex") + store = item.get("store") + file_hash = item.get("hash") elif hasattr(item, "path") or hasattr(item, "url") or hasattr(item, "source_url") or hasattr(item, "store") or hasattr(item, "hash"): # Handle PipeObject / dataclass objects - prefer path, but fall back to url/source_url attributes - path = getattr(item, "path", None) or getattr(item, "file_path", None) + path = getattr(item, "path", None) if not path: path = getattr(item, "url", None) or getattr(item, "source_url", None) or getattr(item, "target", None) if not path: @@ -480,7 +433,7 @@ def _get_playable_path(item: Any, file_storage: Optional[Any], config: Optional[ if known and isinstance(known, list): path = known[0] title = getattr(item, "title", None) or getattr(item, "file_title", None) - store = getattr(item, "store", None) or getattr(item, "origin", None) + store = getattr(item, "store", None) file_hash = getattr(item, "hash", None) elif isinstance(item, str): path = item @@ -493,56 +446,51 @@ def _get_playable_path(item: Any, file_storage: Optional[Any], config: Optional[ if not path: return None + + if not isinstance(path, str): + path = str(path) + if title is not None and not isinstance(title, str): + title = str(title) - # If we have a store and hash, use store's .pipe() method if available - # Skip this for URL-based providers (YouTube, SoundCloud, etc.) which have hash="unknown" - # Also skip if path is already a URL (http/https) + # Resolve hash+store into a playable target (file path or URL). + # This is unrelated to MPV's IPC pipe and keeps "pipe" terminology reserved for: + # - MPV IPC pipe (transport) + # - PipeObject (pipeline data) if store and file_hash and file_hash != "unknown" and file_storage: - # Check if this is actually a URL - if so, just return it + # If it's already a URL, MPV can usually play it directly. if path.startswith(("http://", "https://")): return (path, title) - + try: backend = file_storage[store] - # Check if backend has a .pipe() method - if hasattr(backend, 'pipe') and callable(backend.pipe): - pipe_path = backend.pipe(file_hash, config) - if pipe_path: - path = pipe_path - debug(f"Got pipe path from {store} backend: {path}") - except KeyError: - # Store not found in file_storage - it could be a search provider (youtube, bandcamp, etc.) - from helper.provider import get_search_provider - try: - provider = get_search_provider(store, config or {}) - if provider and hasattr(provider, 'pipe') and callable(provider.pipe): - try: - debug(f"Calling provider.pipe for '{store}' with path: {path}") - provider_path = provider.pipe(path, config or {}) - debug(f"provider.pipe returned: {provider_path}") - if provider_path: - path = provider_path - debug(f"Got pipe path from provider '{store}': {path}") - except Exception as e: - debug(f"Error in provider.pipe for '{store}': {e}", file=sys.stderr) - except Exception as e: - debug(f"Error calling provider.pipe for '{store}': {e}", file=sys.stderr) - except Exception as e: - debug(f"Error calling .pipe() on store '{store}': {e}", file=sys.stderr) - - # As a fallback, if a provider exists for this store (e.g., youtube) and - # this store is not part of FileStorage backends, call provider.pipe() - if store and (not file_storage or store not in (file_storage.list_backends() if file_storage else [])): - try: - from helper.provider import get_search_provider - provider = get_search_provider(store, config or {}) - if provider and hasattr(provider, 'pipe') and callable(provider.pipe): - provider_path = provider.pipe(path, config or {}) - if provider_path: - path = provider_path - debug(f"Got pipe path from provider '{store}' (fallback): {path}") - except Exception as e: - debug(f"Error calling provider.pipe (fallback) for '{store}': {e}", file=sys.stderr) + except Exception: + backend = None + + if backend is not None: + backend_class = type(backend).__name__ + + # Folder stores: resolve to an on-disk file path. + if hasattr(backend, "get_file") and callable(getattr(backend, "get_file")) and backend_class == "Folder": + try: + resolved = backend.get_file(file_hash) + if isinstance(resolved, Path): + path = str(resolved) + elif resolved is not None: + path = str(resolved) + except Exception as e: + debug(f"Error resolving file path from store '{store}': {e}", file=sys.stderr) + + # HydrusNetwork: build a playable API file URL without browser side-effects. + elif backend_class == "HydrusNetwork": + try: + client = getattr(backend, "_client", None) + base_url = getattr(client, "url", None) + access_key = getattr(client, "access_key", None) + if base_url and access_key: + base_url = str(base_url).rstrip("/") + path = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}" + except Exception as e: + debug(f"Error building Hydrus URL from store '{store}': {e}", file=sys.stderr) return (path, title) @@ -574,13 +522,13 @@ def _queue_items(items: List[Any], clear_first: bool = False, config: Optional[D except Exception: hydrus_url = None - # Initialize FileStorage for path resolution + # Initialize Store registry for path resolution file_storage = None try: - from helper.store import FileStorage - file_storage = FileStorage(config or {}) + from Store import Store + file_storage = Store(config or {}) except Exception as e: - debug(f"Warning: Could not initialize FileStorage: {e}", file=sys.stderr) + debug(f"Warning: Could not initialize Store registry: {e}", file=sys.stderr) # Dedupe existing playlist before adding more (unless we're replacing it) existing_targets: set[str] = set() @@ -695,13 +643,13 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: parsed = parse_cmdlet_args(args, CMDLET) - # Initialize FileStorage for detecting Hydrus instance names + # Initialize Store registry for detecting Hydrus instance names file_storage = None try: - from helper.store import FileStorage - file_storage = FileStorage(config) + from Store import Store + file_storage = Store(config) except Exception as e: - debug(f"Warning: Could not initialize FileStorage: {e}", file=sys.stderr) + debug(f"Warning: Could not initialize Store registry: {e}", file=sys.stderr) # Initialize mpv_started flag mpv_started = False @@ -1119,7 +1067,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Check if this backend has the file try: result_path = backend.get_file(file_hash) - if result_path and result_path.exists(): + if isinstance(result_path, Path) and result_path.exists(): store_name = backend_name break except Exception: @@ -1130,7 +1078,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: store_name = _infer_store_from_playlist_item(item, file_storage=file_storage) # Build PipeObject with proper metadata - from models import PipeObject pipe_obj = PipeObject( hash=file_hash or "unknown", store=store_name or "unknown", @@ -1163,23 +1110,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: def _start_mpv(items: List[Any], config: Optional[Dict[str, Any]] = None) -> None: """Start MPV with a list of items.""" - import subprocess import time as _time_module - - # Kill any existing MPV processes to ensure clean start - try: - subprocess.run(['taskkill', '/IM', 'mpv.exe', '/F'], - stdin=subprocess.DEVNULL, stdout=subprocess.DEVNULL, - stderr=subprocess.DEVNULL, timeout=2) - _time_module.sleep(0.5) # Wait for process to die - except Exception: - pass - - ipc_pipe = get_ipc_pipe_path() - - # Start MPV in idle mode with IPC server - cmd = ['mpv', f'--input-ipc-server={ipc_pipe}', '--idle', '--force-window'] - cmd.append('--ytdl-format=bestvideo[height<=?1080]+bestaudio/best[height<=?1080]') + + mpv = MPV() + mpv.kill_existing_windows() + _time_module.sleep(0.5) # Wait for process to die hydrus_header = _build_hydrus_header(config or {}) ytdl_opts = _build_ytdl_options(config, hydrus_header) @@ -1190,35 +1125,26 @@ def _start_mpv(items: List[Any], config: Optional[Dict[str, Any]] = None) -> Non else: debug("Starting MPV with browser cookies: chrome") - if ytdl_opts: - cmd.append(f'--ytdl-raw-options={ytdl_opts}') - try: - kwargs = {} - if platform.system() == 'Windows': - kwargs['creationflags'] = 0x00000008 # DETACHED_PROCESS - - # Log the complete MPV command being executed - debug(f"DEBUG: Full MPV command: {' '.join(cmd)}") - - if hydrus_header: - cmd.append(f'--http-header-fields={hydrus_header}') - subprocess.Popen(cmd, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs) - debug(f"Started MPV process") + # Always start MPV with the bundled Lua script via MPV class. + mpv.start( + extra_args=[ + '--ytdl-format=bestvideo[height<=?1080]+bestaudio/best[height<=?1080]', + ], + ytdl_raw_options=ytdl_opts, + http_header_fields=hydrus_header, + detached=True, + ) + debug("Started MPV process") # Wait for IPC pipe to be ready - import time - max_retries = 20 - for i in range(max_retries): - time.sleep(0.2) - client = MPVIPCClient(socket_path=ipc_pipe) - if client.connect(): - client.disconnect() - break - else: + if not mpv.wait_for_ipc(retries=20, delay_seconds=0.2): debug("Timed out waiting for MPV IPC connection", file=sys.stderr) return + # Ensure Lua script is loaded (redundant when started with --script, but safe) + mpv.ensure_lua_loaded() + # Queue items via IPC if items: _queue_items(items, config=config) diff --git a/cmdnats/worker.py b/cmdnats/worker.py index 7b91c8f..898a553 100644 --- a/cmdnats/worker.py +++ b/cmdnats/worker.py @@ -10,7 +10,7 @@ from typing import Any, Dict, Sequence, List from cmdlets import register from cmdlets._shared import Cmdlet, CmdletArg import pipeline as ctx -from helper.logger import log +from SYS.logger import log from config import get_local_storage_path DEFAULT_LIMIT = 100 @@ -74,9 +74,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: return 1 try: - from helper.folder_store import FolderDB + from API.folder import API_folder_store - with FolderDB(library_root) as db: + with API_folder_store(library_root) as db: if options.clear: count = db.clear_finished_workers() log(f"Cleared {count} finished workers.") diff --git a/config.py b/config.py index 098916c..47c0754 100644 --- a/config.py +++ b/config.py @@ -6,7 +6,7 @@ import json from pathlib import Path from typing import Any, Dict, Optional from pathlib import Path -from helper.logger import log +from SYS.logger import log DEFAULT_CONFIG_FILENAME = "config.json" SCRIPT_DIR = Path(__file__).resolve().parent diff --git a/helper/__init__.py b/helper/__init__.py deleted file mode 100644 index 4d1878d..0000000 --- a/helper/__init__.py +++ /dev/null @@ -1,91 +0,0 @@ -"""Helper modules for the downlow mpv integration.""" -from . import hydrus as _hydrus -from . import download as _download -from . import tasks as _tasks -from . import utils as _utils - -try: # Optional dependency on Playwright - from . import webshot as _webshot -except Exception as exc: # pragma: no cover - surfaced when Playwright is missing - _webshot = None # type: ignore - ScreenshotError = None # type: ignore[assignment] - ScreenshotOptions = None # type: ignore[assignment] - ScreenshotResult = None # type: ignore[assignment] - capture_screenshot = None # type: ignore[assignment] - ScreenshotImportError = exc # type: ignore[assignment] -else: - ScreenshotError = _webshot.ScreenshotError - ScreenshotOptions = _webshot.ScreenshotOptions - ScreenshotResult = _webshot.ScreenshotResult - capture_screenshot = _webshot.capture_screenshot - ScreenshotImportError = None -# CBOR utilities -decode_cbor = _utils.decode_cbor -jsonify = _utils.jsonify -# General utilities -CHUNK_SIZE = _utils.CHUNK_SIZE -ensure_directory = _utils.ensure_directory -unique_path = _utils.unique_path -download_hydrus_file = _hydrus.download_hydrus_file -sanitize_metadata_value = _utils.sanitize_metadata_value -unique_preserve_order = _utils.unique_preserve_order -sha256_file = _utils.sha256_file -create_metadata_sidecar = _utils.create_metadata_sidecar -create_tags_sidecar = _utils.create_tags_sidecar -# Format utilities -format_bytes = _utils.format_bytes -format_duration = _utils.format_duration -format_timestamp = _utils.format_timestamp -format_metadata_value = _utils.format_metadata_value -# Link utilities -extract_link = _utils.extract_link -extract_link_from_args = _utils.extract_link_from_args -extract_link_from_result = _utils.extract_link_from_result -get_api_key = _utils.get_api_key -add_direct_link_to_result = _utils.add_direct_link_to_result -# URL policy utilities -resolve_url_policy = _utils.resolve_url_policy -UrlPolicy = _utils.UrlPolicy -# Download utilities -DownloadOptions = _download.DownloadOptions -DownloadError = _download.DownloadError -DownloadMediaResult = _download.DownloadMediaResult -is_url_supported_by_ytdlp = _download.is_url_supported_by_ytdlp -probe_url = _download.probe_url -# Hydrus utilities -hydrus_request = _hydrus.hydrus_request -hydrus_export = _hydrus.hydrus_export -HydrusClient = _hydrus.HydrusClient -HydrusRequestError = _hydrus.HydrusRequestError -connect_ipc = _tasks.connect_ipc -ipc_sender = _tasks.ipc_sender -__all__ = [ - 'decode_cbor', - 'jsonify', - 'CHUNK_SIZE', - 'ensure_directory', - 'unique_path', - 'download_hydrus_file', - 'sanitize_metadata_value', - 'unique_preserve_order', - 'sha256_file', - 'resolve_url_policy', - 'UrlPolicy', - 'ScreenshotError', - 'ScreenshotOptions', - 'ScreenshotResult', - 'capture_screenshot', - 'ScreenshotImportError', - 'DownloadOptions', - 'DownloadError', - 'DownloadMediaResult', - 'download_media', - 'is_url_supported_by_ytdlp', - 'probe_url', - 'HydrusClient', - 'HydrusRequestError', - 'hydrus_request', - 'hydrus_export', - 'connect_ipc', - 'ipc_sender', -] diff --git a/helper/mpv_file.py b/helper/mpv_file.py deleted file mode 100644 index 26fdbb4..0000000 --- a/helper/mpv_file.py +++ /dev/null @@ -1,951 +0,0 @@ -"""MPV file metadata aggregation helpers.""" -from __future__ import annotations - -import os -import re -from dataclasses import dataclass, field -from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Sequence -from urllib.parse import parse_qs, urlparse, unquote - -from config import get_hydrus_url -from helper.utils import sha256_file, unique_preserve_order -from helper.hydrus import HydrusClient, HydrusRequestError - -import metadata - - -class MPVFileError(RuntimeError): - """Raised when we cannot construct an MPV file snapshot.""" - - -@dataclass(slots=True) -class DebridMagnet: - """Represents a magnet result from AllDebrid search. - - This class matches the structure expected by the TUI (like Hydrus results) - with title, target, media_kind attributes for compatibility. - """ - magnet_id: str - title: str - size: int - status_code: int - status_text: str - progress: float - downloaded: int - seeders: int - dl_speed: int - tag_summary: Optional[str] = None - metadata: Optional[Dict[str, Any]] = None # Complete magnet file metadata from AllDebrid API - - @property - def target(self) -> str: - """Return the target URI for this magnet (used by TUI for access operations).""" - return f"alldebrid://{self.magnet_id}" - - @property - def media_kind(self) -> str: - """Return media kind for display.""" - return "magnet" - - def to_dict(self) -> Dict[str, Any]: - """Convert to dictionary for metadata display.""" - return { - "magnet_id": self.magnet_id, - "title": self.title, - "size": self.size, - "status_code": self.status_code, - "status_text": self.status_text, - "progress": f"{self.progress:.1f}%", - "downloaded": self.downloaded, - "seeders": self.seeders, - "dl_speed": self.dl_speed, - } - - -@dataclass(slots=True) -class HydrusSettings: - base_url: Optional[str] - access_key: Optional[str] - timeout: float - prefer_service_name: Optional[str] - include_relationships: bool - - def as_metadata_options(self) -> Dict[str, Any]: - options: Dict[str, Any] = { - "timeout": self.timeout, - "include_relationships": self.include_relationships, - } - if self.prefer_service_name: - options["prefer_service_name"] = self.prefer_service_name - return options - - - -@dataclass(slots=True) -class MPVfile: - path: Optional[str] = None - filename: Optional[str] = None - type: str = "unknown" - hash: Optional[str] = None - local_path: Optional[str] = None - mpv_metadata: Dict[str, Any] = field(default_factory=dict) - metadata: Dict[str, Any] = field(default_factory=dict) - remote_metadata: Optional[Dict[str, Any]] = None - relationships: Optional[Dict[str, Any]] = None - relationship_metadata: Dict[str, Any] = field(default_factory=dict) - tags: List[str] = field(default_factory=list) - original_tags: Dict[str, str] = field(default_factory=dict) - url: List[str] = field(default_factory=list) - title: Optional[str] = None - source_url: Optional[str] = None - clip_time: Optional[str] = None - duration: Optional[float] = None - filesize_mb: Optional[float] = None - is_video: bool = False - is_audio: bool = False - is_deleted: Optional[bool] = None - is_local: Optional[bool] = None - has_current_file_service: Optional[bool] = None - tag_service_key: Optional[str] = None - swap_recommended: bool = False - warnings: List[str] = field(default_factory=list) - # New relationship fields for menu - king: Optional[str] = None - alts: List[str] = field(default_factory=list) - - def to_dict(self) -> Dict[str, Any]: - payload: Dict[str, Any] = { - "path": self.path, - "filename": self.filename, - "type": self.type, - "hash": self.hash, - "local_path": self.local_path, - "mpv_metadata": self.mpv_metadata, - "metadata": self.metadata, - "remote_metadata": self.remote_metadata, - "relationships": self.relationships, - "relationship_metadata": self.relationship_metadata, - "tags": self.tags, - "original_tags": self.original_tags, - "url": self.url, - "title": self.title, - "source_url": self.source_url, - "clip_time": self.clip_time, - "duration": self.duration, - "filesize_mb": self.filesize_mb, - "is_video": self.is_video, - "is_audio": self.is_audio, - "is_deleted": self.is_deleted, - "is_local": self.is_local, - "has_current_file_service": self.has_current_file_service, - "tag_service_key": self.tag_service_key, - "swap_recommended": self.swap_recommended, - "warnings": self.warnings, - # relationship summary fields for easier Lua consumption - "king": self.king, - "alts": self.alts, - } - # Remove empty optional values for terser payloads. - for key in list(payload.keys()): - value = payload[key] - if value in (None, [], {}, ""): - del payload[key] - return payload - - -def _normalise_string_list(values: Optional[Iterable[Any]]) -> List[str]: - if not values: - return [] - seen: set[str] = set() - result: List[str] = [] - for value in values: - if value is None: - continue - text = str(value).strip() - if not text or text in seen: - continue - seen.add(text) - result.append(text) - return result - - -def _looks_like_hash(value: Optional[str]) -> bool: - if not value: - return False - candidate = value.strip().lower() - return len(candidate) == 64 and all(ch in "0123456789abcdef" for ch in candidate) - - -class MPVFileBuilder: - def __init__(self, payload: Dict[str, Any], config: Dict[str, Any]): - self.payload = payload or {} - self.config = config or {} - self.state = MPVfile() - self.hydrus_settings = self._resolve_hydrus_settings() - self.remote_options = self._resolve_remote_options() - self.include_relationships = bool(self.payload.get("include_relationships", True)) - self.last_url = self._normalise_url(self.payload.get("last_url")) - self._initialise_identity() - - # ------------------------------------------------------------------ - # public API - # ------------------------------------------------------------------ - - def build(self) -> Dict[str, Any]: - if self.state.type == "hydrus": - self._populate_hydrus_by_hash() - elif self.state.type == "local": - self._populate_local() - elif self.state.type == "remote": - self._populate_remote() - else: - # Attempt best effort resolution even for unknown types. - self._populate_local(best_effort=True) - self._finalise() - result = self.state.to_dict() - # Append King and Alts info to mpv_metadata for info menu - king = self.state.king - alts = self.state.alts - if king: - result.setdefault("mpv_metadata", {})["King"] = king - if alts: - result.setdefault("mpv_metadata", {})["Alts"] = ", ".join(alts) - return result - - # ------------------------------------------------------------------ - # configuration helpers - # ------------------------------------------------------------------ - - def _resolve_hydrus_settings(self) -> HydrusSettings: - overrides = self.payload.get("hydrus") - overrides = overrides if isinstance(overrides, dict) else {} - base_url = overrides.get("url") or overrides.get("base_url") - access_key = overrides.get("access_key") - timeout_raw = overrides.get("timeout") or overrides.get("hydrus_timeout") - prefer_service = overrides.get("prefer_service_name") - include_relationships = overrides.get("include_relationships") - if base_url is None: - base_url = get_hydrus_url(self.config) - if access_key is None: - raw_key = self.config.get("HydrusNetwork_Access_Key") - access_key = str(raw_key) if raw_key is not None else None - if timeout_raw is None: - timeout_raw = self.config.get("HydrusNetwork_Request_Timeout") - try: - timeout = float(timeout_raw) if timeout_raw is not None else 60.0 - except (TypeError, ValueError): - timeout = 60.0 - if prefer_service is None: - prefer_service = self.config.get("Hydrus_Tag_Service") - if isinstance(prefer_service, str): - prefer_service = prefer_service.strip() or None - if include_relationships is None: - include_relationships = self.payload.get("include_relationships") - include_relationships = bool(True if include_relationships is None else include_relationships) - base_url = base_url.strip() if isinstance(base_url, str) else None - access_key = access_key.strip() if isinstance(access_key, str) else None - return HydrusSettings( - base_url=base_url or None, - access_key=access_key or None, - timeout=timeout, - prefer_service_name=prefer_service, - include_relationships=include_relationships, - ) - - def _resolve_remote_options(self) -> Dict[str, Any]: - remote_payload = self.payload.get("remote") - remote_payload = remote_payload if isinstance(remote_payload, dict) else {} - options = remote_payload.get("options") - options = options if isinstance(options, dict) else {} - ytdlp_args = options.get("ytdlp_args") - if not ytdlp_args: - options["ytdlp_args"] = ["--no-playlist", "--skip-download", "--no-warnings"] - existing_timeout = options.get("timeout") - if existing_timeout is None: - options["timeout"] = min(90.0, max(10.0, float(self.payload.get("remote_timeout") or 45.0))) - return options - - # ------------------------------------------------------------------ - # initialisation - # ------------------------------------------------------------------ - - def _initialise_identity(self) -> None: - s = self.state - p = self.payload - - def _str_or_none(v): - return str(v) if v is not None and v != "" else None - - def _copy_dict_if_dict(v): - return dict(v) if isinstance(v, dict) else {} - - # path and filename - s.path = _str_or_none(p.get("path")) - s.filename = _str_or_none(p.get("filename")) - - # mpv metadata - s.mpv_metadata = _copy_dict_if_dict(p.get("mpv_metadata")) - - # tags (support both "tags" and legacy "existing_tags") - existing_tags = p.get("tags") or p.get("existing_tags") - s.tags = _normalise_string_list(existing_tags) - if s.tags: - s.original_tags = {tag: tag for tag in s.tags} - - # known url + last_url - s.url = _normalise_string_list(p.get("url")) - if self.last_url and self.last_url not in s.url: - s.url.append(self.last_url) - - # source URL (explicit or fallback to last_url) - explicit_source = p.get("source_url") - s.source_url = self._normalise_url(explicit_source) or self.last_url - - # hash (validate looks-like-hash) - hash_candidate = p.get("hash") - if isinstance(hash_candidate, str): - candidate = hash_candidate.strip().lower() - if _looks_like_hash(candidate): - s.hash = candidate - - # local_path (non-empty string) - local_path_override = p.get("local_path") - if isinstance(local_path_override, str): - lp = local_path_override.strip() - if lp: - s.local_path = lp - - # derive remaining fields from path/filename/type - self._derive_filename_from_path() - self._determine_type() - - - def _derive_filename_from_path(self) -> None: - if self.state.filename or not self.state.path: - return - parsed = urlparse(self.state.path) - if parsed.scheme in ("http", "https", "ytdl") and parsed.path: - candidate = Path(parsed.path).name - if candidate: - self.state.filename = candidate - elif parsed.scheme == "file": - decoded = self._decode_file_url(self.state.path) - if decoded: - self.state.filename = Path(decoded).name - else: - try: - self.state.filename = Path(self.state.path).name - except Exception: - pass - - def _determine_type(self) -> None: - s = self.state - p = self.payload - - def _set_local_from_path(pth: str | None): - if not pth: - return - # Prefer resolved local path when available - resolved = self._resolve_local_path(pth) - s.local_path = resolved if resolved else pth - s.type = "local" - - # 1) Respect explicit type when valid - explicit = p.get("type") - if isinstance(explicit, str): - lowered = explicit.strip().lower() - if lowered in {"local", "hydrus", "remote"}: - s.type = lowered - if lowered == "local": - s.local_path = self._resolve_local_path(s.path) - return - - # 2) Work from path - path = s.path or "" - if not path: - s.type = "unknown" - return - - # 3) Hydrus-specific quick checks - if self._looks_like_hydrus_url(path): - s.type = "hydrus" - return - - parsed = urlparse(path) - scheme = (parsed.scheme or "").lower() - - # 4) scheme-based handling - if scheme == "hydrus": - s.type = "hydrus" - return - - if scheme in {"http", "https", "rtmp", "rtsp", "magnet", "ytdl"}: - s.type = "hydrus" if self._looks_like_hydrus_url(path) else "remote" - return - - if scheme == "file": - decoded = self._decode_file_url(path) - if decoded: - s.local_path = decoded - s.type = "local" - return - - # 5) Windows/UNC absolute paths - if re.match(r"^[A-Za-z]:[\\/]", path) or path.startswith(("\\\\", "//")): - s.type = "local" - s.local_path = path - return - - # 6) Fallback: if it looks like a URL with a scheme separator treat as remote/hydrus - if "://" in path: - s.type = "hydrus" if self._looks_like_hydrus_url(path) else "remote" - return - - # 7) Otherwise treat as a local path - _set_local_from_path(path) - - - # ------------------------------------------------------------------ - # population helpers - # ------------------------------------------------------------------ - - def _populate_local(self, best_effort: bool = False) -> None: - local_path = self.state.local_path or self._resolve_local_path(self.state.path) - if local_path: - self.state.local_path = local_path - self._load_sidecar_tags(local_path) - if not self.state.hash: - self._compute_local_hash(local_path) - # If Hydrus is configured and we have a hash, enrich from Hydrus; otherwise keep local tags only - if self.state.hash and self.hydrus_settings.base_url and self.hydrus_settings.access_key: - self._populate_hydrus_by_hash() - elif best_effort and self.hydrus_settings.base_url and self.state.source_url and self.hydrus_settings.access_key: - self._populate_hydrus_by_url(self.state.source_url) - - # (helpers for resolving local path and loading sidecars already exist below) - - def _populate_remote(self) -> None: - source_url = self.state.source_url or self.last_url or self.state.path - source_url = self._normalise_url(source_url) - if source_url: - self.state.source_url = source_url - remote_payload = { - "source_url": self.state.source_url, - "existing_tags": self.state.tags, - "metadata": self.payload.get("remote_metadata"), - "mpv_metadata": self.state.mpv_metadata, - "options": self.remote_options, - } - try: - remote_result = metadata.resolve_remote_metadata(remote_payload) - except Exception as exc: # pragma: no cover - surfaced to the caller - self.state.warnings.append(str(exc)) - remote_result = None - if remote_result: - tags = remote_result.get("tags") or [] - self._merge_tags(tags) - self.state.remote_metadata = remote_result.get("metadata") - self.state.title = remote_result.get("title") or self.state.title - self.state.duration = remote_result.get("duration") or self.state.duration - self.state.source_url = remote_result.get("source_url") or self.state.source_url - warnings = remote_result.get("warnings") or [] - if warnings: - self.state.warnings.extend(warnings) - if self.hydrus_settings.base_url and self.state.source_url: - self._populate_hydrus_by_url(self.state.source_url) - - def _populate_hydrus_by_hash(self) -> None: - hash_hex = self.state.hash or self._extract_hash_from_path(self.state.path) - if hash_hex and not _looks_like_hash(hash_hex): - hash_hex = None - if not hash_hex: - return - self.state.hash = hash_hex - if not self.hydrus_settings.base_url: - return - payload: Dict[str, Any] = { - "api_url": self.hydrus_settings.base_url, - "access_key": self.hydrus_settings.access_key or "", - "options": self.hydrus_settings.as_metadata_options(), - "hash": hash_hex, - } - try: - result = metadata.fetch_hydrus_metadata(payload) - except Exception as exc: # pragma: no cover - surfaced to caller - self.state.warnings.append(str(exc)) - return - self._apply_hydrus_result(result) - # Enrich relationships using the dedicated Hydrus endpoint (robust GET) - if self.include_relationships and self.state.hash and self.hydrus_settings.base_url: - self._enrich_relationships_from_api(self.state.hash) - - def _populate_hydrus_by_url(self, url: str) -> None: - if not self.hydrus_settings.base_url: - return - payload: Dict[str, Any] = { - "api_url": self.hydrus_settings.base_url, - "access_key": self.hydrus_settings.access_key or "", - "options": self.hydrus_settings.as_metadata_options(), - "url": url, - } - try: - result = metadata.fetch_hydrus_metadata_by_url(payload) - except Exception as exc: # pragma: no cover - surfaced to caller - self.state.warnings.append(str(exc)) - return - if result.get("error") == "not_found": - self.state.warnings.extend(result.get("warnings") or []) - return - self._apply_hydrus_result(result) - self.state.type = "hydrus" - matched_url = result.get("matched_url") or result.get("url") - if matched_url and matched_url not in self.state.url: - self.state.url.append(matched_url) - # Enrich relationships once we know the hash - if self.include_relationships and self.state.hash and self.hydrus_settings.base_url: - self._enrich_relationships_from_api(self.state.hash) - - # ------------------------------------------------------------------ - # state modification helpers - # ------------------------------------------------------------------ - - - def _apply_hydrus_result(self, result: Dict[str, Any]) -> None: - metadata_payload = result.get("metadata") - if isinstance(metadata_payload, dict): - # Process mime into type for Lua - mime = metadata_payload.get("mime") - if isinstance(mime, str): - if mime.startswith("video/"): - metadata_payload["type"] = "video" - elif mime.startswith("audio/"): - metadata_payload["type"] = "audio" - elif mime.startswith("image/"): - metadata_payload["type"] = "image" - else: - metadata_payload["type"] = "other" - self.state.metadata = metadata_payload - # Do NOT overwrite MPVfile.type with metadata.type - self._merge_url(metadata_payload.get("url") or metadata_payload.get("url_set")) - source_url = metadata_payload.get("original_url") or metadata_payload.get("source_url") - if source_url and not self.state.source_url: - self.state.source_url = self._normalise_url(source_url) - # If file_relationships are embedded in metadata, capture as relationships when missing - if self.state.relationships is None: - embedded = metadata_payload.get("file_relationships") - if isinstance(embedded, dict) and embedded: - self.state.relationships = embedded - tags = result.get("tags") or [] - self._merge_tags(tags) - hash_value = result.get("hash") or result.get("matched_hash") - if isinstance(hash_value, str) and _looks_like_hash(hash_value): - self.state.hash = hash_value.lower() - self.state.tag_service_key = result.get("tag_service_key") or self.state.tag_service_key - self.state.duration = result.get("duration") or self.state.duration - self.state.filesize_mb = result.get("filesize_mb") or self.state.filesize_mb - self.state.is_video = bool(result.get("is_video") or self.state.is_video) - self.state.is_audio = bool(result.get("is_audio") or self.state.is_audio) - if result.get("is_deleted") is not None: - self.state.is_deleted = bool(result.get("is_deleted")) - if result.get("is_local") is not None: - self.state.is_local = bool(result.get("is_local")) - if result.get("has_current_file_service") is not None: - self.state.has_current_file_service = bool(result.get("has_current_file_service")) - # Consolidate relationships from explicit result or embedded metadata - relationships_obj: Optional[Dict[str, Any]] = None - if isinstance(result.get("relationships"), dict): - relationships_obj = result["relationships"] - self.state.relationships = relationships_obj - elif isinstance(self.state.relationships, dict): - relationships_obj = self.state.relationships - - # Helper to flatten any hashes from the relationships object - def _collect_hashes(obj: Any, acc: set[str]) -> None: - if obj is None: - return - if isinstance(obj, dict): - for v in obj.values(): - _collect_hashes(v, acc) - elif isinstance(obj, (list, tuple, set)): - for v in obj: - _collect_hashes(v, acc) - elif isinstance(obj, str) and _looks_like_hash(obj): - acc.add(obj.lower()) - - # Derive king and alts robustly from available data - king: Optional[str] = None - alts: list[str] = [] - - # 1) Try direct king fields on relationships object - rels = relationships_obj or {} - if isinstance(rels, dict): - # Common variants - for key in ("king", "king_hash", "duplicate_king", "best", "best_hash"): - val = rels.get(key) - if isinstance(val, str) and _looks_like_hash(val): - king = val.lower() - break - if isinstance(val, list): - for h in val: - if isinstance(h, str) and _looks_like_hash(h): - king = h.lower() - break - if king: - break - # 2) Extract alternates from known fields: numeric "3" (clips), or textual synonyms - for alt_key in ("3", "alternates", "alts", "clips"): - val = rels.get(alt_key) - if isinstance(val, list): - for h in val: - if isinstance(h, str) and _looks_like_hash(h): - h_low = h.lower() - if not king or h_low != king: - alts.append(h_low) - # some APIs might nest - elif isinstance(val, dict): - tmp: set[str] = set() - _collect_hashes(val, tmp) - for h in sorted(tmp): - if not king or h != king: - alts.append(h) - - # 3) Use relationship_metadata keys as additional alternates and king hint - rel_meta = result.get("relationship_metadata") - if isinstance(rel_meta, dict): - # prefer king candidate with no clip_time if not set - if not king: - for h, meta in rel_meta.items(): - if isinstance(h, str) and _looks_like_hash(h) and isinstance(meta, dict): - if not meta.get("clip_time"): - king = h.lower() - break - for h in rel_meta.keys(): - if isinstance(h, str) and _looks_like_hash(h): - h_low = h.lower() - if not king or h_low != king: - alts.append(h_low) - - # 4) As a last resort, flatten all relationship hashes - if not alts and relationships_obj: - tmp: set[str] = set() - _collect_hashes(relationships_obj, tmp) - for h in sorted(tmp): - if not king or h != king: - alts.append(h) - - # 5) Include current file when appropriate - if self.state.hash and (not king or self.state.hash != king) and self.state.hash not in alts: - alts.append(self.state.hash) - - # 6) Sort alternates by clip start time when available - rel_meta_all = result.get("relationship_metadata") if isinstance(result.get("relationship_metadata"), dict) else {} - def _clip_start_for(h: str) -> float: - meta = rel_meta_all.get(h) if isinstance(rel_meta_all, dict) else None - clip = meta.get("clip_time") if isinstance(meta, dict) else None - if isinstance(clip, str): - m = re.match(r"^(\d+)-(\d+)$", clip) - if m: - try: - return float(m.group(1)) - except Exception: - return float("inf") - return float("inf") - - if alts: - # de-duplicate while preserving earliest clip time ordering - seen: set[str] = set() - alts = [h for h in sorted(alts, key=_clip_start_for) if (h not in seen and not seen.add(h))] - - self.state.king = king - self.state.alts = alts - if isinstance(result.get("relationship_metadata"), dict): - self.state.relationship_metadata = result["relationship_metadata"] - self.state.title = result.get("title") or self.state.title - self.state.clip_time = result.get("clip_time") or self.state.clip_time - if result.get("swap_recommended"): - self.state.swap_recommended = True - warnings = result.get("warnings") or [] - if warnings: - self.state.warnings.extend(warnings) - - # ------------------------------------------------------------------ - # relationships enrichment (Hydrus endpoint + alt metadata) - # ------------------------------------------------------------------ - - def _enrich_relationships_from_api(self, file_hash: str) -> None: - """Fetch relationships for the given hash and enrich state's king/alts and alt metadata. - - - Uses GET /manage_file_relationships/get_file_relationships?hash=... - - If alts exist, batch-fetch their metadata via GET /get_files/file_metadata?hashes=[...] - - Extracts title, duration, size, tags (cleaned: title: kept with namespace, others stripped) - """ - base_url = self.hydrus_settings.base_url or "" - access_key = self.hydrus_settings.access_key or "" - if not base_url: - return - try: - client = HydrusClient(base_url, access_key, timeout=self.hydrus_settings.timeout) - except Exception as exc: # pragma: no cover - construction should rarely fail - self.state.warnings.append(f"Hydrus client init failed: {exc}") - return - try: - rel_resp = client.get_file_relationships(file_hash) - except HydrusRequestError as hre: # pragma: no cover - surfaced but non-fatal - self.state.warnings.append(f"relationships api: {hre}") - return - except Exception as exc: # pragma: no cover - self.state.warnings.append(f"relationships api: {exc}") - return - - rel_map = rel_resp.get("file_relationships") or {} - rel_obj = None - if isinstance(rel_map, dict): - rel_obj = rel_map.get(file_hash) or next((v for v in rel_map.values() if isinstance(v, dict)), None) - if isinstance(rel_obj, dict): - # Preserve the full relationships object - self.state.relationships = rel_obj - # Update king and alts from canonical fields - king = rel_obj.get("king") - alts = rel_obj.get("3") or [] - if isinstance(king, str) and _looks_like_hash(king): - self.state.king = king.lower() - if isinstance(alts, list): - self.state.alts = [h.lower() for h in alts if isinstance(h, str) and _looks_like_hash(h)] - - # Fetch alt metadata if we have alts - if not self.state.alts: - return - try: - meta_resp = client.fetch_file_metadata( - hashes=self.state.alts, - include_service_keys_to_tags=True, - include_duration=True, - include_size=True, - include_file_url=False, - include_mime=False, - ) - except HydrusRequestError as hre: # pragma: no cover - self.state.warnings.append(f"metadata api: {hre}") - return - except Exception as exc: # pragma: no cover - self.state.warnings.append(f"metadata api: {exc}") - return - - if not isinstance(meta_resp, dict): - return - entries = meta_resp.get("metadata") or [] - if not isinstance(entries, list): - return - - def _extract_tags(meta: Dict[str, Any]) -> list[str]: - tags: list[str] = [] - tag_root = meta.get("tags") or meta.get("service_keys_to_statuses_to_tags") or {} - if isinstance(tag_root, dict): - for service_dict in tag_root.values(): - if not isinstance(service_dict, dict): - continue - # Prefer storage_tags but fall back to any list values under known keys - storage = service_dict.get("storage_tags") - if isinstance(storage, dict): - for vals in storage.values(): - if isinstance(vals, list): - tags.extend([str(t) for t in vals if isinstance(t, str)]) - else: - # fall back: inspect lists directly under service_dict - for vals in service_dict.values(): - if isinstance(vals, list): - tags.extend([str(t) for t in vals if isinstance(t, str)]) - return tags - - def _clean_tags_and_title(all_tags: list[str]) -> tuple[Optional[str], list[str]]: - title_val: Optional[str] = None - cleaned: list[str] = [] - for tag in all_tags: - if not isinstance(tag, str): - continue - if tag.startswith("title:"): - if title_val is None: - title_val = tag.split(":", 1)[1] - cleaned.append(tag) # keep namespaced title - else: - if ":" in tag: - cleaned.append(tag.split(":", 1)[1]) - else: - cleaned.append(tag) - return title_val, cleaned - - for meta in entries: - if not isinstance(meta, dict): - continue - h = meta.get("hash") - if not (isinstance(h, str) and _looks_like_hash(h)): - continue - tags_all = _extract_tags(meta) - title_val, tags_clean = _clean_tags_and_title(tags_all) - alt_info = { - "title": title_val, - "duration": meta.get("duration"), - "size": meta.get("size"), - "tags": tags_clean, - } - self.state.relationship_metadata[h.lower()] = alt_info - - def _merge_tags(self, tags: Sequence[Any]) -> None: - incoming = _normalise_string_list(tags) - if not incoming: - return - combined = list(self.state.tags or []) + incoming - self.state.tags = unique_preserve_order(combined) - for tag in incoming: - if tag not in self.state.original_tags: - self.state.original_tags[tag] = tag - - def _merge_url(self, url: Optional[Iterable[Any]]) -> None: - if not url: - return - combined = list(self.state.url or []) + _normalise_string_list(url) - self.state.url = unique_preserve_order(combined) - - def _load_sidecar_tags(self, local_path: str) -> None: - try: - media_path = Path(local_path) - except Exception: - return - if not media_path.exists(): - return - candidates = [media_path.with_suffix(".tags"), media_path.with_suffix(".tags.txt")] - for candidate in candidates: - if candidate.exists(): - hash_value, tags, known = self._read_sidecar(candidate) - if hash_value and not self.state.hash and _looks_like_hash(hash_value): - self.state.hash = hash_value.lower() - self._merge_tags(tags) - self._merge_url(known) - break - - def _read_sidecar(self, sidecar_path: Path) -> tuple[Optional[str], List[str], List[str]]: - try: - raw = sidecar_path.read_text(encoding="utf-8", errors="ignore") - except OSError: - return None, [], [] - hash_value: Optional[str] = None - tags: List[str] = [] - url: List[str] = [] - for line in raw.splitlines(): - trimmed = line.strip() - if not trimmed: - continue - lowered = trimmed.lower() - if lowered.startswith("hash:"): - candidate = trimmed.split(":", 1)[1].strip() if ":" in trimmed else "" - if candidate: - hash_value = candidate - elif lowered.startswith("url:") or lowered.startswith("url:"): - candidate = trimmed.split(":", 1)[1].strip() if ":" in trimmed else "" - if candidate: - url.append(candidate) - else: - tags.append(trimmed) - return hash_value, tags, url - - def _compute_local_hash(self, local_path: str) -> None: - try: - digest = sha256_file(Path(local_path)) - except OSError as exc: - self.state.warnings.append(f"sha256 failed: {exc}") - return - self.state.hash = digest.lower() - - # ------------------------------------------------------------------ - # finalisation helpers - # ------------------------------------------------------------------ - - def _finalise(self) -> None: - if self.state.tags: - self.state.tags = unique_preserve_order(self.state.tags) - if self.state.url: - self.state.url = unique_preserve_order(self.state.url) - # Ensure metadata.type is always present for Lua, but do NOT overwrite MPVfile.type - if not self.state.title: - if self.state.metadata.get("title"): - self.state.title = str(self.state.metadata["title"]).strip() - elif self.state.filename: - self.state.title = self.state.filename - if self.state.hash and not _looks_like_hash(self.state.hash): - self.state.hash = None - if self.state.relationship_metadata is None: - self.state.relationship_metadata = {} - if self.state.relationships is not None and not isinstance(self.state.relationships, dict): - self.state.relationships = None - if self.state.original_tags is None: - self.state.original_tags = {} - - # ------------------------------------------------------------------ - # util helpers - # ------------------------------------------------------------------ - - @staticmethod - def _normalise_url(value: Any) -> Optional[str]: - if value is None: - return None - text = str(value).strip() - if not text: - return None - return text - - @staticmethod - def _resolve_local_path(path: Optional[str]) -> Optional[str]: - if not path: - return None - parsed = urlparse(path) - if parsed.scheme == "file": - decoded = MPVFileBuilder._decode_file_url(path) - return decoded - return path - - @staticmethod - def _decode_file_url(value: str) -> Optional[str]: - parsed = urlparse(value) - if parsed.scheme != "file": - return None - netloc = parsed.netloc or "" - path = unquote(parsed.path or "") - if netloc: - path = f"//{netloc}{path}" - if os.name == "nt" and path.startswith("/") and re.match(r"/[A-Za-z]:", path): - path = path[1:] - path = path.replace("/", os.sep) - return path - - def _looks_like_hydrus_url(self, url: str) -> bool: - if not url: - return False - if url.startswith("hydrus://"): - return True - if "Hydrus-Client-API-Access-Key=" in url: - return True - base = self.hydrus_settings.base_url - if base and url.startswith(base) and "/get_files/" in url: - return True - return False - - @staticmethod - def _extract_hash_from_path(path: Optional[str]) -> Optional[str]: - if not path: - return None - parsed = urlparse(path) - query = parse_qs(parsed.query) - if "hash" in query and query["hash"]: - candidate = query["hash"][0].strip() - if candidate: - return candidate.lower() - match = re.search(r"hash=([0-9a-fA-F]{64})", path) - if match: - return match.group(1).lower() - return None - - -def build_mpv_file_state(payload: Dict[str, Any], config: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: - builder = MPVFileBuilder(payload or {}, config or {}) - return builder.build() diff --git a/helper/provider.py b/helper/provider.py deleted file mode 100644 index 0ddb60f..0000000 --- a/helper/provider.py +++ /dev/null @@ -1,818 +0,0 @@ -"""Provider interfaces for search and file upload functionality. - -This module defines two distinct provider types: -1. SearchProvider: For searching content (books, music, videos, games) -2. FileProvider: For uploading files to hosting services - -No legacy code or backwards compatibility - clean, single source of truth. -""" - -from __future__ import annotations - -from abc import ABC, abstractmethod -from typing import Any, Dict, List, Optional, Tuple -from dataclasses import dataclass, field -from pathlib import Path -import sys -import os -import json -import re -import time -import asyncio -import subprocess -import shutil -import mimetypes -import traceback -import requests - -from helper.logger import log, debug - -# Optional dependencies -try: - from playwright.sync_api import sync_playwright - PLAYWRIGHT_AVAILABLE = True -except ImportError: - PLAYWRIGHT_AVAILABLE = False - - -# ============================================================================ -# SEARCH PROVIDERS -# ============================================================================ - -@dataclass -class SearchResult: - """Unified search result format across all search providers.""" - - origin: str # Provider name: "libgen", "soulseek", "debrid", "bandcamp", etc. - title: str # Display title/filename - path: str # Download target (URL, path, magnet, identifier) - - detail: str = "" # Additional description - annotations: List[str] = field(default_factory=list) # Tags: ["120MB", "flac", "ready"] - media_kind: str = "other" # Type: "book", "audio", "video", "game", "magnet" - size_bytes: Optional[int] = None - tags: set[str] = field(default_factory=set) # Searchable tags - columns: List[Tuple[str, str]] = field(default_factory=list) # Display columns - full_metadata: Dict[str, Any] = field(default_factory=dict) # Extra metadata - - def to_dict(self) -> Dict[str, Any]: - """Convert to dictionary for pipeline processing.""" - return { - "origin": self.origin, - "title": self.title, - "path": self.path, - "detail": self.detail, - "annotations": self.annotations, - "media_kind": self.media_kind, - "size_bytes": self.size_bytes, - "tags": list(self.tags), - "columns": list(self.columns), - "full_metadata": self.full_metadata, - } - - -class SearchProvider(ABC): - """Base class for search providers.""" - - def __init__(self, config: Dict[str, Any] = None): - self.config = config or {} - self.name = self.__class__.__name__.lower() - - @abstractmethod - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - """Search for items matching the query. - - Args: - query: Search query string - limit: Maximum results to return - filters: Optional filtering criteria - **kwargs: Provider-specific arguments - - Returns: - List of SearchResult objects - """ - pass - - def validate(self) -> bool: - """Check if provider is available and properly configured.""" - return True - - -class Libgen(SearchProvider): - """Search provider for Library Genesis books.""" - - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - filters = filters or {} - - try: - from helper.unified_book_downloader import UnifiedBookDownloader - from helper.query_parser import parse_query, get_field, get_free_text - - parsed = parse_query(query) - isbn = get_field(parsed, 'isbn') - author = get_field(parsed, 'author') - title = get_field(parsed, 'title') - free_text = get_free_text(parsed) - - search_query = isbn or title or author or free_text or query - - downloader = UnifiedBookDownloader(config=self.config) - books = downloader.search_libgen(search_query, limit=limit) - - results = [] - for idx, book in enumerate(books, 1): - title = book.get("title", "Unknown") - author = book.get("author", "Unknown") - year = book.get("year", "Unknown") - pages = book.get("pages") or book.get("pages_str") or "" - extension = book.get("extension", "") or book.get("ext", "") - filesize = book.get("filesize_str", "Unknown") - isbn = book.get("isbn", "") - mirror_url = book.get("mirror_url", "") - - columns = [ - ("Title", title), - ("Author", author), - ("Pages", str(pages)), - ("Ext", str(extension)), - ] - - detail = f"By: {author}" - if year and year != "Unknown": - detail += f" ({year})" - - annotations = [f"{filesize}"] - if isbn: - annotations.append(f"ISBN: {isbn}") - - results.append(SearchResult( - origin="libgen", - title=title, - path=mirror_url or f"libgen:{book.get('id', '')}", - detail=detail, - annotations=annotations, - media_kind="book", - columns=columns, - full_metadata={ - "number": idx, - "author": author, - "year": year, - "isbn": isbn, - "filesize": filesize, - "pages": pages, - "extension": extension, - "book_id": book.get("book_id", ""), - "md5": book.get("md5", ""), - }, - )) - - return results - - except Exception as e: - log(f"[libgen] Search error: {e}", file=sys.stderr) - return [] - - def validate(self) -> bool: - try: - from helper.unified_book_downloader import UnifiedBookDownloader - return True - except Exception: - return False - - -class Soulseek(SearchProvider): - """Search provider for Soulseek P2P network.""" - - MUSIC_EXTENSIONS = { - '.flac', '.mp3', '.m4a', '.aac', '.ogg', '.opus', - '.wav', '.alac', '.wma', '.ape', '.aiff', '.dsf', - '.dff', '.wv', '.tta', '.tak', '.ac3', '.dts' - } - - USERNAME = "asjhkjljhkjfdsd334" - PASSWORD = "khhhg" - DOWNLOAD_DIR = "./downloads" - MAX_WAIT_TRANSFER = 1200 - - async def perform_search( - self, - query: str, - timeout: float = 9.0, - limit: int = 50 - ) -> List[Dict[str, Any]]: - """Perform async Soulseek search.""" - import os - from aioslsk.client import SoulSeekClient - from aioslsk.settings import Settings, CredentialsSettings - - os.makedirs(self.DOWNLOAD_DIR, exist_ok=True) - - settings = Settings(credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD)) - client = SoulSeekClient(settings) - - try: - await client.start() - await client.login() - except Exception as e: - log(f"[soulseek] Login failed: {type(e).__name__}: {e}", file=sys.stderr) - return [] - - try: - search_request = await client.searches.search(query) - await self._collect_results(client, search_request, timeout=timeout) - return self._flatten_results(search_request)[:limit] - except Exception as e: - log(f"[soulseek] Search error: {type(e).__name__}: {e}", file=sys.stderr) - return [] - finally: - try: - await client.stop() - except Exception: - pass - - def _flatten_results(self, search_request) -> List[dict]: - flat = [] - for result in search_request.results: - username = getattr(result, "username", "?") - - for file_data in getattr(result, "shared_items", []): - flat.append({ - "file": file_data, - "username": username, - "filename": getattr(file_data, "filename", "?"), - "size": getattr(file_data, "filesize", 0), - }) - - for file_data in getattr(result, "locked_results", []): - flat.append({ - "file": file_data, - "username": username, - "filename": getattr(file_data, "filename", "?"), - "size": getattr(file_data, "filesize", 0), - }) - - return flat - - async def _collect_results(self, client, search_request, timeout: float = 75.0) -> None: - end = time.time() + timeout - last_count = 0 - while time.time() < end: - current_count = len(search_request.results) - if current_count > last_count: - debug(f"[soulseek] Got {current_count} result(s)...") - last_count = current_count - await asyncio.sleep(0.5) - - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - filters = filters or {} - - try: - flat_results = asyncio.run(self.perform_search(query, timeout=9.0, limit=limit)) - - if not flat_results: - return [] - - # Filter to music files only - music_results = [] - for item in flat_results: - filename = item['filename'] - ext = '.' + filename.rsplit('.', 1)[-1].lower() if '.' in filename else '' - if ext in self.MUSIC_EXTENSIONS: - music_results.append(item) - - if not music_results: - return [] - - # Extract metadata - enriched_results = [] - for item in music_results: - filename = item['filename'] - ext = '.' + filename.rsplit('.', 1)[-1].lower() if '.' in filename else '' - - # Get display filename - display_name = filename.split('\\')[-1] if '\\' in filename else filename.split('/')[-1] if '/' in filename else filename - - # Extract path hierarchy - path_parts = filename.replace('\\', '/').split('/') - artist = path_parts[-3] if len(path_parts) >= 3 else '' - album = path_parts[-2] if len(path_parts) >= 3 else path_parts[-2] if len(path_parts) == 2 else '' - - # Extract track number and title - base_name = display_name.rsplit('.', 1)[0] if '.' in display_name else display_name - track_num = '' - title = base_name - filename_artist = '' - - match = re.match(r'^(\d{1,3})\s*[\.\-]?\s+(.+)$', base_name) - if match: - track_num = match.group(1) - rest = match.group(2) - if ' - ' in rest: - filename_artist, title = rest.split(' - ', 1) - else: - title = rest - - if filename_artist: - artist = filename_artist - - enriched_results.append({ - **item, - 'artist': artist, - 'album': album, - 'title': title, - 'track_num': track_num, - 'ext': ext - }) - - # Apply filters - if filters: - artist_filter = filters.get('artist', '').lower() if filters.get('artist') else '' - album_filter = filters.get('album', '').lower() if filters.get('album') else '' - track_filter = filters.get('track', '').lower() if filters.get('track') else '' - - if artist_filter or album_filter or track_filter: - filtered = [] - for item in enriched_results: - if artist_filter and artist_filter not in item['artist'].lower(): - continue - if album_filter and album_filter not in item['album'].lower(): - continue - if track_filter and track_filter not in item['title'].lower(): - continue - filtered.append(item) - enriched_results = filtered - - # Sort: .flac first, then by size - enriched_results.sort(key=lambda item: (item['ext'].lower() != '.flac', -item['size'])) - - # Convert to SearchResult - results = [] - for idx, item in enumerate(enriched_results, 1): - artist_display = item['artist'] if item['artist'] else "(no artist)" - album_display = item['album'] if item['album'] else "(no album)" - size_mb = int(item['size'] / 1024 / 1024) - - columns = [ - ("Track", item['track_num'] or "?"), - ("Title", item['title'][:40]), - ("Artist", artist_display[:32]), - ("Album", album_display[:32]), - ("Size", f"{size_mb} MB"), - ] - - results.append(SearchResult( - origin="soulseek", - title=item['title'], - path=item['filename'], - detail=f"{artist_display} - {album_display}", - annotations=[f"{size_mb} MB", item['ext'].lstrip('.').upper()], - media_kind="audio", - size_bytes=item['size'], - columns=columns, - full_metadata={ - "username": item['username'], - "filename": item['filename'], - "artist": item['artist'], - "album": item['album'], - "track_num": item['track_num'], - "ext": item['ext'], - }, - )) - - return results - - except Exception as e: - log(f"[soulseek] Search error: {e}", file=sys.stderr) - return [] - - def validate(self) -> bool: - try: - from aioslsk.client import SoulSeekClient - return True - except ImportError: - return False - - -class Bandcamp(SearchProvider): - """Search provider for Bandcamp.""" - - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - if not PLAYWRIGHT_AVAILABLE: - log("[bandcamp] Playwright not available. Install with: pip install playwright", file=sys.stderr) - return [] - - results = [] - try: - with sync_playwright() as p: - browser = p.chromium.launch(headless=True) - page = browser.new_page() - - # Parse query for artist: prefix - if query.strip().lower().startswith("artist:"): - artist_name = query[7:].strip().strip('"') - search_url = f"https://bandcamp.com/search?q={artist_name}&item_type=b" - else: - search_url = f"https://bandcamp.com/search?q={query}&item_type=a" - - results = self._scrape_url(page, search_url, limit) - - browser.close() - except Exception as e: - log(f"[bandcamp] Search error: {e}", file=sys.stderr) - return [] - - return results - - def _scrape_url(self, page, url: str, limit: int) -> List[SearchResult]: - debug(f"[bandcamp] Scraping: {url}") - - page.goto(url) - page.wait_for_load_state("domcontentloaded") - - results = [] - - # Check for search results - search_results = page.query_selector_all(".searchresult") - if search_results: - for item in search_results[:limit]: - try: - heading = item.query_selector(".heading") - if not heading: - continue - - link = heading.query_selector("a") - if not link: - continue - - title = link.inner_text().strip() - target_url = link.get_attribute("href") - - subhead = item.query_selector(".subhead") - artist = subhead.inner_text().strip() if subhead else "Unknown" - - itemtype = item.query_selector(".itemtype") - media_type = itemtype.inner_text().strip() if itemtype else "album" - - results.append(SearchResult( - origin="bandcamp", - title=title, - path=target_url, - detail=f"By: {artist}", - annotations=[media_type], - media_kind="audio", - columns=[ - ("Name", title), - ("Artist", artist), - ("Type", media_type), - ], - full_metadata={ - "artist": artist, - "type": media_type, - }, - )) - except Exception as e: - debug(f"[bandcamp] Error parsing result: {e}") - continue - - return results - - def validate(self) -> bool: - return PLAYWRIGHT_AVAILABLE - - -class YouTube(SearchProvider): - """Search provider for YouTube using yt-dlp.""" - - def search( - self, - query: str, - limit: int = 10, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - ytdlp_path = shutil.which("yt-dlp") - if not ytdlp_path: - log("[youtube] yt-dlp not found in PATH", file=sys.stderr) - return [] - - search_query = f"ytsearch{limit}:{query}" - - cmd = [ - ytdlp_path, - "--dump-json", - "--flat-playlist", - "--no-warnings", - search_query - ] - - try: - process = subprocess.run( - cmd, - capture_output=True, - text=True, - encoding="utf-8", - errors="replace" - ) - - if process.returncode != 0: - log(f"[youtube] yt-dlp failed: {process.stderr}", file=sys.stderr) - return [] - - results = [] - for line in process.stdout.splitlines(): - if not line.strip(): - continue - try: - video_data = json.loads(line) - title = video_data.get("title", "Unknown") - video_id = video_data.get("id", "") - url = video_data.get("url") or f"https://youtube.com/watch?v={video_id}" - uploader = video_data.get("uploader", "Unknown") - duration = video_data.get("duration", 0) - view_count = video_data.get("view_count", 0) - - duration_str = f"{int(duration//60)}:{int(duration%60):02d}" if duration else "" - views_str = f"{view_count:,}" if view_count else "" - - results.append(SearchResult( - origin="youtube", - title=title, - path=url, - detail=f"By: {uploader}", - annotations=[duration_str, f"{views_str} views"], - media_kind="video", - columns=[ - ("Title", title), - ("Uploader", uploader), - ("Duration", duration_str), - ("Views", views_str), - ], - full_metadata={ - "video_id": video_id, - "uploader": uploader, - "duration": duration, - "view_count": view_count, - }, - )) - except json.JSONDecodeError: - continue - - return results - - except Exception as e: - log(f"[youtube] Error: {e}", file=sys.stderr) - return [] - - def validate(self) -> bool: - return shutil.which("yt-dlp") is not None - - def pipe(self, path: str, config: Optional[Dict[str, Any]] = None) -> Optional[str]: - """Return the playable URL for MPV (just the path for YouTube).""" - return path - - -# Search provider registry -_SEARCH_PROVIDERS = { - "libgen": Libgen, - "soulseek": Soulseek, - "bandcamp": Bandcamp, - "youtube": YouTube, -} - - -def get_search_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[SearchProvider]: - """Get a search provider by name.""" - provider_class = _SEARCH_PROVIDERS.get(name.lower()) - - if provider_class is None: - log(f"[provider] Unknown search provider: {name}", file=sys.stderr) - return None - - try: - provider = provider_class(config) - if not provider.validate(): - log(f"[provider] Provider '{name}' is not available", file=sys.stderr) - return None - return provider - except Exception as e: - log(f"[provider] Error initializing '{name}': {e}", file=sys.stderr) - return None - - -def list_search_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]: - """List all search providers and their availability.""" - availability = {} - for name, provider_class in _SEARCH_PROVIDERS.items(): - try: - provider = provider_class(config) - availability[name] = provider.validate() - except Exception: - availability[name] = False - return availability - - -# ============================================================================ -# FILE PROVIDERS -# ============================================================================ - -class FileProvider(ABC): - """Base class for file upload providers.""" - - def __init__(self, config: Optional[Dict[str, Any]] = None): - self.config = config or {} - self.name = self.__class__.__name__.lower() - - @abstractmethod - def upload(self, file_path: str, **kwargs: Any) -> str: - """Upload a file and return the URL.""" - pass - - def validate(self) -> bool: - """Check if provider is available/configured.""" - return True - - -class ZeroXZero(FileProvider): - """File provider for 0x0.st.""" - - def upload(self, file_path: str, **kwargs: Any) -> str: - from helper.http_client import HTTPClient - - if not os.path.exists(file_path): - raise FileNotFoundError(f"File not found: {file_path}") - - try: - headers = {"User-Agent": "Medeia-Macina/1.0"} - with HTTPClient(headers=headers) as client: - with open(file_path, 'rb') as f: - response = client.post( - "https://0x0.st", - files={"file": f} - ) - - if response.status_code == 200: - return response.text.strip() - else: - raise Exception(f"Upload failed: {response.status_code} - {response.text}") - - except Exception as e: - log(f"[0x0] Upload error: {e}", file=sys.stderr) - raise - - def validate(self) -> bool: - return True - - -class Matrix(FileProvider): - """File provider for Matrix (Element) chat rooms.""" - - def validate(self) -> bool: - if not self.config: - return False - matrix_conf = self.config.get('storage', {}).get('matrix', {}) - return bool( - matrix_conf.get('homeserver') and - matrix_conf.get('room_id') and - (matrix_conf.get('access_token') or matrix_conf.get('password')) - ) - - def upload(self, file_path: str, **kwargs: Any) -> str: - from pathlib import Path - - path = Path(file_path) - if not path.exists(): - raise FileNotFoundError(f"File not found: {file_path}") - - matrix_conf = self.config.get('storage', {}).get('matrix', {}) - homeserver = matrix_conf.get('homeserver') - access_token = matrix_conf.get('access_token') - room_id = matrix_conf.get('room_id') - - if not homeserver.startswith('http'): - homeserver = f"https://{homeserver}" - - # Upload media - upload_url = f"{homeserver}/_matrix/media/v3/upload" - headers = { - "Authorization": f"Bearer {access_token}", - "Content-Type": "application/octet-stream" - } - - mime_type, _ = mimetypes.guess_type(path) - if mime_type: - headers["Content-Type"] = mime_type - - filename = path.name - - with open(path, 'rb') as f: - resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename}) - - if resp.status_code != 200: - raise Exception(f"Matrix upload failed: {resp.text}") - - content_uri = resp.json().get('content_uri') - if not content_uri: - raise Exception("No content_uri returned") - - # Send message - send_url = f"{homeserver}/_matrix/client/v3/rooms/{room_id}/send/m.room.message" - - # Determine message type - msgtype = "m.file" - ext = path.suffix.lower() - - AUDIO_EXTS = {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka', '.alac'} - VIDEO_EXTS = {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'} - IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'} - - if ext in AUDIO_EXTS: - msgtype = "m.audio" - elif ext in VIDEO_EXTS: - msgtype = "m.video" - elif ext in IMAGE_EXTS: - msgtype = "m.image" - - info = { - "mimetype": mime_type, - "size": path.stat().st_size - } - - payload = { - "msgtype": msgtype, - "body": filename, - "url": content_uri, - "info": info - } - - resp = requests.post(send_url, headers=headers, json=payload) - if resp.status_code != 200: - raise Exception(f"Matrix send message failed: {resp.text}") - - event_id = resp.json().get('event_id') - return f"https://matrix.to/#/{room_id}/{event_id}" - - -# File provider registry -_FILE_PROVIDERS = { - "0x0": ZeroXZero, - "matrix": Matrix, -} - - -def get_file_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[FileProvider]: - """Get a file provider by name.""" - provider_class = _FILE_PROVIDERS.get(name.lower()) - - if provider_class is None: - log(f"[provider] Unknown file provider: {name}", file=sys.stderr) - return None - - try: - provider = provider_class(config) - if not provider.validate(): - log(f"[provider] File provider '{name}' is not available", file=sys.stderr) - return None - return provider - except Exception as e: - log(f"[provider] Error initializing file provider '{name}': {e}", file=sys.stderr) - return None - - -def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]: - """List all file providers and their availability.""" - availability = {} - for name, provider_class in _FILE_PROVIDERS.items(): - try: - provider = provider_class(config) - availability[name] = provider.validate() - except Exception: - availability[name] = False - return availability - - - - diff --git a/helper/store.py b/helper/store.py deleted file mode 100644 index d2ff41c..0000000 --- a/helper/store.py +++ /dev/null @@ -1,2268 +0,0 @@ -"""File storage abstraction layer for uploading files to different services. -""" - -from __future__ import annotations - -from abc import ABC, abstractmethod -from pathlib import Path -from typing import Any, Dict, Optional, Tuple, List -import sys -import shutil -import requests -import re - -from helper.logger import log, debug -from helper.utils_constant import mime_maps -from helper.utils import sha256_file -from helper.folder_store import FolderDB -from config import get_local_storage_path - - -HEX_DIGITS = set("0123456789abcdef") - - -def _normalize_hex_hash(value: Optional[str]) -> Optional[str]: - """Return a normalized 64-character lowercase hash or None.""" - if value is None: - return None - - try: - cleaned = ''.join(ch for ch in str(value).strip().lower() if ch in HEX_DIGITS) - except Exception: - return None - - if len(cleaned) == 64: - return cleaned - return None - - -def _resolve_file_hash(candidate: Optional[str], path: Path) -> Optional[str]: - """Return the given hash if valid, otherwise compute sha256 from disk.""" - normalized = _normalize_hex_hash(candidate) - if normalized is not None: - return normalized - - if not path.exists(): - return None - - try: - return sha256_file(path) - except Exception as exc: - debug(f"Failed to compute hash for {path}: {exc}") - return None - - -class store(ABC): - """""" - @abstractmethod - def add_file(self, file_path: Path, **kwargs: Any) -> str: - """""" - @abstractmethod - def name(self) -> str: - """""" - def search_file(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: - """""" - raise NotImplementedError(f"{self.name()} backend does not support searching") - - @abstractmethod - def get_file(self, file_hash: str, **kwargs: Any) -> Optional[Path]: - """Retrieve file by hash, returning path to the file. - - Args: - file_hash: SHA256 hash of the file (64-char hex string) - - Returns: - Path to the file or None if not found - """ - raise NotImplementedError(f"{self.name()} backend does not support get_file") - - @abstractmethod - def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]: - """Get metadata for a file by hash. - - Args: - file_hash: SHA256 hash of the file (64-char hex string) - - Returns: - Dict with metadata fields or None if not found - """ - raise NotImplementedError(f"{self.name()} backend does not support get_metadata") - - @abstractmethod - def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: - """""" - raise NotImplementedError(f"{self.name()} backend does not support get_tags") - @abstractmethod - def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: - """""" - raise NotImplementedError(f"{self.name()} backend does not support add_tag") - @abstractmethod - def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: - """""" - raise NotImplementedError(f"{self.name()} backend does not support delete_tag") - @abstractmethod - def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: - """""" - raise NotImplementedError(f"{self.name()} backend does not support get_url") - @abstractmethod - def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: - """""" - raise NotImplementedError(f"{self.name()} backend does not support add_url") - @abstractmethod - def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: - """""" - raise NotImplementedError(f"{self.name()} backend does not support delete_url") -class Folder(store): - """""" - # Track which locations have already been migrated to avoid repeated migrations - _migrated_locations = set() - - def __init__(self, location: Optional[str] = None, name: Optional[str] = None) -> None: - self._location = location - self._name = name - - if self._location: - try: - from helper.folder_store import FolderDB - from pathlib import Path - location_path = Path(self._location).expanduser() - - # Use context manager to ensure connection is properly closed - with FolderDB(location_path) as db: - if db.connection: - db.connection.commit() - - # Call migration and discovery at startup - Folder.migrate_location(self._location) - except Exception as exc: - debug(f"Failed to initialize database for '{name}': {exc}") - - @classmethod - def migrate_location(cls, location: Optional[str]) -> None: - """Migrate a location to hash-based storage (one-time operation, call explicitly at startup).""" - if not location: - return - - from pathlib import Path - location_path = Path(location).expanduser() - location_str = str(location_path) - - # Only migrate once per location - if location_str in cls._migrated_locations: - return - - cls._migrated_locations.add(location_str) - - # Create a temporary instance just to call the migration - temp_instance = cls(location=location) - temp_instance._migrate_to_hash_storage(location_path) - - def _migrate_to_hash_storage(self, location_path: Path) -> None: - """Migrate existing files from filename-based to hash-based storage. - - Checks for sidecars (.metadata, .tag) and imports them before renaming. - Also ensures all files have a title: tag. - """ - from helper.folder_store import read_sidecar, write_sidecar, find_sidecar - - try: - with FolderDB(location_path) as db: - cursor = db.connection.cursor() - - # First pass: migrate filename-based files and add title tags - # Scan all files in the storage directory - for file_path in sorted(location_path.iterdir()): - if not file_path.is_file(): - continue - - # Skip database files and sidecars - if file_path.suffix in ('.db', '.metadata', '.tag', '-shm', '-wal'): - continue - # Also skip if the file ends with -shm or -wal (SQLite journal files) - if file_path.name.endswith(('-shm', '-wal')): - continue - - # Check if filename is already a hash (without extension) - if len(file_path.stem) == 64 and all(c in '0123456789abcdef' for c in file_path.stem.lower()): - continue # Already migrated, will process in second pass - - try: - # Compute file hash - file_hash = sha256_file(file_path) - # Preserve extension in the hash-based filename - file_ext = file_path.suffix # e.g., '.mp4' - hash_filename = file_hash + file_ext if file_ext else file_hash - hash_path = location_path / hash_filename - - # Check for sidecars and import them - sidecar_path = find_sidecar(file_path) - tags_to_add = [] - url_to_add = [] - has_title_tag = False - - if sidecar_path and sidecar_path.exists(): - try: - _, tags, url = read_sidecar(sidecar_path) - if tags: - tags_to_add = list(tags) - # Check if title tag exists - has_title_tag = any(t.lower().startswith('title:') for t in tags_to_add) - if url: - url_to_add = list(url) - debug(f"Found sidecar for {file_path.name}: {len(tags_to_add)} tags, {len(url_to_add)} url", file=sys.stderr) - # Delete the sidecar after importing - sidecar_path.unlink() - except Exception as exc: - debug(f"Failed to read sidecar for {file_path.name}: {exc}", file=sys.stderr) - - # Ensure there's a title tag (use original filename if not present) - if not has_title_tag: - tags_to_add.append(f"title:{file_path.name}") - - # Rename file to hash if needed - if hash_path != file_path and not hash_path.exists(): - debug(f"Migrating: {file_path.name} -> {hash_filename}", file=sys.stderr) - file_path.rename(hash_path) - - # Create or update database entry using FolderDB methods - db.get_or_create_file_entry(hash_path) - - # Save extension metadata - ext_clean = file_ext.lstrip('.') if file_ext else '' - db.save_metadata(hash_path, { - 'hash': file_hash, - 'ext': ext_clean, - 'size': hash_path.stat().st_size - }) - - # Add all tags (including title tag) - if tags_to_add: - db.save_tags(hash_path, tags_to_add) - debug(f"Added {len(tags_to_add)} tags to {file_hash}", file=sys.stderr) - - # Note: url would need a separate table if you want to store them - # For now, we're just noting them in debug - if url_to_add: - debug(f"Imported {len(url_to_add)} url for {file_hash}: {url_to_add}", file=sys.stderr) - - except Exception as exc: - debug(f"Failed to migrate file {file_path.name}: {exc}", file=sys.stderr) - - # Second pass: ensure all files in database have a title: tag - db.connection.commit() - cursor.execute(''' - SELECT f.hash, f.file_path - FROM files f - WHERE NOT EXISTS ( - SELECT 1 FROM tags t WHERE t.hash = f.hash AND LOWER(t.tag) LIKE 'title:%' - ) - ''') - files_without_title = cursor.fetchall() - - for file_hash, file_path_str in files_without_title: - try: - file_path = Path(file_path_str) - if file_path.exists(): - # Use the filename as the title - title_tag = f"title:{file_path.name}" - db.save_tags(file_path, [title_tag]) - debug(f"Added title tag to {file_path.name}", file=sys.stderr) - except Exception as exc: - debug(f"Failed to add title tag to file {file_path_str}: {exc}", file=sys.stderr) - - db.connection.commit() - - # Third pass: discover files on disk that aren't in the database yet - # These are hash-named files that were added after initial indexing - cursor.execute('SELECT LOWER(hash) FROM files') - db_hashes = {row[0] for row in cursor.fetchall()} - - discovered = 0 - for file_path in sorted(location_path.rglob("*")): - if file_path.is_file(): - # Check if file name (without extension) is a 64-char hex hash - name_without_ext = file_path.stem - if len(name_without_ext) == 64 and all(c in '0123456789abcdef' for c in name_without_ext.lower()): - file_hash = name_without_ext.lower() - - # Skip if already in DB - if file_hash in db_hashes: - continue - - try: - # Add file to DB (creates entry and auto-adds title: tag) - db.get_or_create_file_entry(file_path) - - # Save extension metadata - file_ext = file_path.suffix - ext_clean = file_ext.lstrip('.') if file_ext else '' - db.save_metadata(file_path, { - 'hash': file_hash, - 'ext': ext_clean, - 'size': file_path.stat().st_size - }) - - discovered += 1 - except Exception as e: - debug(f"Failed to discover file {file_path.name}: {e}", file=sys.stderr) - - if discovered > 0: - debug(f"Discovered and indexed {discovered} undiscovered files in {location_path.name}", file=sys.stderr) - db.connection.commit() - except Exception as exc: - debug(f"Migration to hash storage failed: {exc}", file=sys.stderr) - - - def location(self) -> str: - return self._location - - def name(self) -> str: - return self._name - - def add_file(self, file_path: Path, **kwargs: Any) -> str: - """Add file to local folder storage with full metadata support. - - Args: - file_path: Path to the file to add - move: If True, move file instead of copy (default: False) - tags: Optional list of tags to add - url: Optional list of url to associate with the file - title: Optional title (will be added as 'title:value' tag) - - Returns: - File hash (SHA256 hex string) as identifier - """ - move_file = bool(kwargs.get("move")) - tags = kwargs.get("tags", []) - url = kwargs.get("url", []) - title = kwargs.get("title") - - # Extract title from tags if not explicitly provided - if not title: - for tag in tags: - if isinstance(tag, str) and tag.lower().startswith("title:"): - title = tag.split(":", 1)[1].strip() - break - - # Fallback to filename if no title - if not title: - title = file_path.name - - # Ensure title is in tags - title_tag = f"title:{title}" - if not any(str(tag).lower().startswith("title:") for tag in tags): - tags = [title_tag] + list(tags) - - try: - file_hash = sha256_file(file_path) - debug(f"File hash: {file_hash}", file=sys.stderr) - - # Preserve extension in the stored filename - file_ext = file_path.suffix # e.g., '.mp4' - save_filename = file_hash + file_ext if file_ext else file_hash - save_file = Path(self._location) / save_filename - - # Check if file already exists - with FolderDB(Path(self._location)) as db: - existing_path = db.search_hash(file_hash) - if existing_path and existing_path.exists(): - log( - f"✓ File already in local storage: {existing_path}", - file=sys.stderr, - ) - # Still add tags and url if provided - if tags: - self.add_tag(file_hash, tags) - if url: - self.add_url(file_hash, url) - return file_hash - - # Move or copy file - if move_file: - shutil.move(str(file_path), str(save_file)) - debug(f"Local move: {save_file}", file=sys.stderr) - else: - shutil.copy2(str(file_path), str(save_file)) - debug(f"Local copy: {save_file}", file=sys.stderr) - - # Save to database - with FolderDB(Path(self._location)) as db: - db.get_or_create_file_entry(save_file) - # Save metadata including extension - ext_clean = file_ext.lstrip('.') if file_ext else '' - db.save_metadata(save_file, { - 'hash': file_hash, - 'ext': ext_clean, - 'size': file_path.stat().st_size - }) - - # Add tags if provided - if tags: - self.add_tag(file_hash, tags) - - # Add url if provided - if url: - self.add_url(file_hash, url) - - log(f"✓ Added to local storage: {save_file.name}", file=sys.stderr) - return file_hash - - except Exception as exc: - log(f"❌ Local storage failed: {exc}", file=sys.stderr) - raise - - def search_file(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: - """Search local database for files by title tag or filename.""" - from fnmatch import fnmatch - from helper.folder_store import DatabaseAPI - - limit = kwargs.get("limit") - try: - limit = int(limit) if limit is not None else None - except (TypeError, ValueError): - limit = None - if isinstance(limit, int) and limit <= 0: - limit = None - - query = query.lower() - query_lower = query # Ensure query_lower is defined for all code paths - match_all = query == "*" - results = [] - search_dir = Path(self._location).expanduser() - - tokens = [t.strip() for t in query.split(',') if t.strip()] - - if not match_all and len(tokens) == 1 and _normalize_hex_hash(query): - debug("Hash queries require 'hash:' prefix for local search") - return results - - if not match_all and _normalize_hex_hash(query): - debug("Hash queries require 'hash:' prefix for local search") - return results - - def _create_entry(file_path: Path, tags: list[str], size_bytes: int | None, db_hash: Optional[str]) -> dict[str, Any]: - path_str = str(file_path) - # Get title from tags if available, otherwise use hash as fallback - title = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None) - if not title: - # Fallback to hash if no title tag exists - hash_value = _resolve_file_hash(db_hash, file_path) - title = hash_value if hash_value else file_path.stem - - # Extract extension from file path - ext = file_path.suffix.lstrip('.') - if not ext: - # Fallback: try to extract from title (original filename might be in title) - title_path = Path(title) - ext = title_path.suffix.lstrip('.') - - # Build clean entry with only necessary fields - hash_value = _resolve_file_hash(db_hash, file_path) - entry = { - "title": title, - "ext": ext, - "path": path_str, - "target": path_str, - "store": self._name, - "size": size_bytes, - "hash": hash_value, - "tag": tags, - } - return entry - - try: - if not search_dir.exists(): - debug(f"Search directory does not exist: {search_dir}") - return results - - try: - with DatabaseAPI(search_dir) as api: - if tokens and len(tokens) > 1: - def _like_pattern(term: str) -> str: - return term.replace('*', '%').replace('?', '_') - - def _ids_for_token(token: str) -> set[int]: - token = token.strip() - if not token: - return set() - - if ':' in token and not token.startswith(':'): - namespace, pattern = token.split(':', 1) - namespace = namespace.strip().lower() - pattern = pattern.strip().lower() - - if namespace == 'hash': - normalized_hash = _normalize_hex_hash(pattern) - if not normalized_hash: - return set() - h = api.get_file_hash_by_hash(normalized_hash) - return {h} if h else set() - - if namespace == 'store': - if pattern not in {'local', 'file', 'filesystem'}: - return set() - return api.get_all_file_hashes() - - query_pattern = f"{namespace}:%" - tag_rows = api.get_file_hashes_by_tag_pattern(query_pattern) - matched: set[str] = set() - for file_hash, tag_val in tag_rows: - if not tag_val: - continue - tag_lower = str(tag_val).lower() - if not tag_lower.startswith(f"{namespace}:"): - continue - value = tag_lower[len(namespace)+1:] - if fnmatch(value, pattern): - matched.add(file_hash) - return matched - - term = token.lower() - like_pattern = f"%{_like_pattern(term)}%" - hashes = api.get_file_hashes_by_path_pattern(like_pattern) - hashes.update(api.get_file_hashes_by_tag_substring(like_pattern)) - return hashes - - try: - matching_hashes: set[str] | None = None - for token in tokens: - hashes = _ids_for_token(token) - matching_hashes = hashes if matching_hashes is None else matching_hashes & hashes - if not matching_hashes: - return results - - if not matching_hashes: - return results - - rows = api.get_file_metadata(matching_hashes, limit) - for file_hash, file_path_str, size_bytes, ext in rows: - if not file_path_str: - continue - file_path = Path(file_path_str) - if not file_path.exists(): - continue - if size_bytes is None: - try: - size_bytes = file_path.stat().st_size - except OSError: - size_bytes = None - tags = api.get_tags_for_file(file_hash) - entry = _create_entry(file_path, tags, size_bytes, file_hash) - results.append(entry) - if limit is not None and len(results) >= limit: - return results - return results - except Exception as exc: - log(f"⚠️ AND search failed: {exc}", file=sys.stderr) - debug(f"AND search exception details: {exc}") - return [] - - if ":" in query and not query.startswith(":"): - namespace, pattern = query.split(":", 1) - namespace = namespace.strip().lower() - pattern = pattern.strip().lower() - debug(f"Performing namespace search: {namespace}:{pattern}") - - if namespace == "hash": - normalized_hash = _normalize_hex_hash(pattern) - if not normalized_hash: - return results - h = api.get_file_hash_by_hash(normalized_hash) - hashes = {h} if h else set() - rows = api.get_file_metadata(hashes, limit) - for file_hash, file_path_str, size_bytes, ext in rows: - if not file_path_str: - continue - file_path = Path(file_path_str) - if not file_path.exists(): - continue - if size_bytes is None: - try: - size_bytes = file_path.stat().st_size - except OSError: - size_bytes = None - tags = api.get_tags_for_file(file_hash) - entry = _create_entry(file_path, tags, size_bytes, file_hash) - results.append(entry) - if limit is not None and len(results) >= limit: - return results - return results - - query_pattern = f"{namespace}:%" - rows = api.get_files_by_namespace_pattern(query_pattern, limit) - debug(f"Found {len(rows)} potential matches in DB") - - for file_hash, file_path_str, size_bytes, ext in rows: - if not file_path_str: - continue - - tags = api.get_tags_by_namespace_and_file(file_hash, query_pattern) - - for tag in tags: - tag_lower = tag.lower() - if tag_lower.startswith(f"{namespace}:"): - value = tag_lower[len(namespace)+1:] - if fnmatch(value, pattern): - file_path = Path(file_path_str) - if file_path.exists(): - if size_bytes is None: - size_bytes = file_path.stat().st_size - all_tags = api.get_tags_for_file(file_hash) - entry = _create_entry(file_path, all_tags, size_bytes, file_hash) - results.append(entry) - else: - debug(f"File missing on disk: {file_path}") - break - - if limit is not None and len(results) >= limit: - return results - elif not match_all: - terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()] - if not terms: - terms = [query_lower] - - debug(f"Performing filename/tag search for terms: {terms}") - - fetch_limit = (limit or 45) * 50 - - conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms] - params = [f"%{t}%" for t in terms] - - rows = api.get_files_by_multiple_path_conditions(conditions, params, fetch_limit) - debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)") - - word_regex = None - if len(terms) == 1: - term = terms[0] - has_wildcard = '*' in term or '?' in term - - if has_wildcard: - try: - from fnmatch import translate - word_regex = re.compile(translate(term), re.IGNORECASE) - except Exception: - word_regex = None - else: - try: - pattern = r'(?= limit: - return results - - if terms: - title_hits: dict[str, dict[str, Any]] = {} - for term in terms: - title_pattern = f"title:%{term}%" - title_rows = api.get_files_by_title_tag_pattern(title_pattern, fetch_limit) - for file_hash, file_path_str, size_bytes, ext in title_rows: - if not file_path_str: - continue - entry = title_hits.get(file_hash) - if entry: - entry["count"] += 1 - if size_bytes is not None: - entry["size"] = size_bytes - else: - title_hits[file_hash] = { - "path": file_path_str, - "size": size_bytes, - "hash": file_hash, - "count": 1, - } - - if title_hits: - required = len(terms) - for file_hash, info in title_hits.items(): - if info.get("count") != required: - continue - file_path_str = info.get("path") - if not file_path_str or file_path_str in seen_files: - continue - file_path = Path(file_path_str) - if not file_path.exists(): - continue - seen_files.add(file_path_str) - - size_bytes = info.get("size") - if size_bytes is None: - try: - size_bytes = file_path.stat().st_size - except OSError: - size_bytes = None - - tags = api.get_tags_for_file(file_hash) - entry = _create_entry(file_path, tags, size_bytes, info.get("hash")) - results.append(entry) - if limit is not None and len(results) >= limit: - return results - - query_pattern = f"%{query_lower}%" - tag_rows = api.get_files_by_simple_tag_pattern(query_pattern, limit) - - for file_hash, file_path_str, size_bytes, ext in tag_rows: - if not file_path_str or file_path_str in seen_files: - continue - seen_files.add(file_path_str) - - file_path = Path(file_path_str) - if file_path.exists(): - if size_bytes is None: - size_bytes = file_path.stat().st_size - - tags = api.get_tags_for_file(file_hash) - entry = _create_entry(file_path, tags, size_bytes, file_hash) - results.append(entry) - - if limit is not None and len(results) >= limit: - return results - - else: - rows = api.get_all_files(limit) - for file_hash, file_path_str, size_bytes, ext in rows: - if file_path_str: - file_path = Path(file_path_str) - if file_path.exists(): - if size_bytes is None: - size_bytes = file_path.stat().st_size - - tags = api.get_tags_for_file(file_hash) - entry = _create_entry(file_path, tags, size_bytes, file_hash) - results.append(entry) - - if results: - debug(f"Returning {len(results)} results from DB") - else: - debug("No results found in DB") - return results - - except Exception as e: - log(f"⚠️ Database search failed: {e}", file=sys.stderr) - debug(f"DB search exception details: {e}") - return [] - - except Exception as exc: - log(f"❌ Local search failed: {exc}", file=sys.stderr) - raise - - def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: - """Alias for search_file to match the interface expected by FileStorage.""" - return self.search_file(query, **kwargs) - - def _resolve_library_root(self, file_path: Path, config: Dict[str, Any]) -> Optional[Path]: - """Return the library root containing medios-macina.db. - - Prefer the store's configured location, then config override, then walk parents - of the file path to find a directory with medios-macina.db.""" - candidates: list[Path] = [] - if self._location: - candidates.append(Path(self._location).expanduser()) - cfg_root = get_local_storage_path(config) if config else None - if cfg_root: - candidates.append(Path(cfg_root).expanduser()) - - for root in candidates: - db_path = root / "medios-macina.db" - if db_path.exists(): - return root - - try: - for parent in [file_path] + list(file_path.parents): - db_path = parent / "medios-macina.db" - if db_path.exists(): - return parent - except Exception: - pass - return None - - def get_file(self, file_hash: str, **kwargs: Any) -> Optional[Path]: - """Retrieve file by hash, returning path to the file. - - Args: - file_hash: SHA256 hash of the file (64-char hex string) - - Returns: - Path to the file or None if not found - """ - try: - # Normalize the hash - normalized_hash = _normalize_hex_hash(file_hash) - if not normalized_hash: - return None - - search_dir = Path(self._location).expanduser() - from helper.folder_store import FolderDB - - with FolderDB(search_dir) as db: - # Search for file by hash - file_path = db.search_hash(normalized_hash) - - if file_path and file_path.exists(): - return file_path - - return None - - except Exception as exc: - debug(f"Failed to get file for hash {file_hash}: {exc}") - return None - - def pipe(self, file_hash: str, config: Optional[Dict[str, Any]] = None) -> Optional[str]: - """Get a playable path for a file in this folder store. - - For folder stores, this resolves the hash to the actual file path on disk. - - Args: - file_hash: SHA256 hash of the file - config: Optional config dict (unused for folder stores) - - Returns: - Absolute file path as string, or None if file not found - """ - file_path = self.get_file(file_hash) - if file_path: - return str(file_path.absolute()) - return None - - def get_metadata(self, file_hash: str) -> Optional[Dict[str, Any]]: - """Get metadata for a file from the database by hash. - - Args: - file_hash: SHA256 hash of the file (64-char hex string) - - Returns: - Dict with metadata fields (ext, size, hash, duration, etc.) or None if not found - """ - try: - # Normalize the hash - normalized_hash = _normalize_hex_hash(file_hash) - if not normalized_hash: - return None - - search_dir = Path(self._location).expanduser() - from helper.folder_store import DatabaseAPI - - with DatabaseAPI(search_dir) as api: - # Get file hash - file_hash_result = api.get_file_hash_by_hash(normalized_hash) - if not file_hash_result: - return None - - # Query metadata directly from database - cursor = api.get_cursor() - cursor.execute(""" - SELECT * FROM metadata WHERE hash = ? - """, (file_hash_result,)) - - row = cursor.fetchone() - if not row: - return None - - metadata = dict(row) - - # Parse JSON fields - for field in ['url', 'relationships']: - if metadata.get(field): - try: - import json - metadata[field] = json.loads(metadata[field]) - except (json.JSONDecodeError, TypeError): - metadata[field] = [] if field == 'url' else [] - - return metadata - except Exception as exc: - debug(f"Failed to get metadata for hash {file_hash}: {exc}") - return None - - def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: - """Get tags for a local file by hash. - - Returns: - Tuple of (tags_list, store_name) where store_name is the actual store name - """ - from helper.folder_store import FolderDB - try: - file_hash = file_identifier - if self._location: - try: - with FolderDB(Path(self._location)) as db: - db_tags = db.get_tags(file_hash) - if db_tags: - # Return actual store name instead of generic "local_db" - store_name = self._name if self._name else "local" - return list(db_tags), store_name - except Exception as exc: - debug(f"Local DB lookup failed: {exc}") - return [], "unknown" - except Exception as exc: - debug(f"get_tags failed for local file: {exc}") - return [], "unknown" - - def add_tag(self, hash: str, tag: List[str], **kwargs: Any) -> bool: - """Add tags to a local file by hash (via FolderDB). - - Handles namespace collapsing: when adding namespace:value, removes existing namespace:* tags. - Returns True if tags were successfully added. - """ - from helper.folder_store import FolderDB - try: - if not self._location: - return False - - try: - with FolderDB(Path(self._location)) as db: - # Get existing tags - existing_tags = list(db.get_tags(hash) or []) - original_tags_lower = {t.lower() for t in existing_tags} - - # Merge new tags, handling namespace overwrites - for new_tag in tag: - if ':' in new_tag: - namespace = new_tag.split(':', 1)[0] - # Remove existing tags in same namespace - existing_tags = [t for t in existing_tags if not t.startswith(namespace + ':')] - # Add new tag if not already present (case-insensitive check) - if new_tag.lower() not in original_tags_lower: - existing_tags.append(new_tag) - - # Save merged tags - db.add_tags_to_hash(hash, existing_tags) - return True - except Exception as exc: - debug(f"Local DB add_tags failed: {exc}") - return False - except Exception as exc: - debug(f"add_tag failed for local file: {exc}") - return False - - def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: - """Remove tags from a local file by hash.""" - from helper.folder_store import FolderDB - try: - file_hash = file_identifier - if self._location: - try: - with FolderDB(Path(self._location)) as db: - db.remove_tags_from_hash(file_hash, list(tags)) - return True - except Exception as exc: - debug(f"Local DB remove_tags failed: {exc}") - return False - except Exception as exc: - debug(f"delete_tag failed for local file: {exc}") - return False - - def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: - """Get known url for a local file by hash.""" - from helper.folder_store import FolderDB - try: - file_hash = file_identifier - if self._location: - try: - with FolderDB(Path(self._location)) as db: - meta = db.get_metadata(file_hash) or {} - return list(meta.get("url") or []) - except Exception as exc: - debug(f"Local DB get_metadata failed: {exc}") - return [] - except Exception as exc: - debug(f"get_url failed for local file: {exc}") - return [] - - def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: - """Add known url to a local file by hash.""" - from helper.folder_store import FolderDB - try: - file_hash = file_identifier - if self._location: - try: - with FolderDB(Path(self._location)) as db: - meta = db.get_metadata(file_hash) or {} - url = list(meta.get("url") or []) - changed = False - for u in url: - if u not in url: - url.append(u) - changed = True - if changed: - db.update_metadata_by_hash(file_hash, {"url": url}) - return True - except Exception as exc: - debug(f"Local DB add_url failed: {exc}") - return False - except Exception as exc: - debug(f"add_url failed for local file: {exc}") - return False - - def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: - """Delete known url from a local file by hash.""" - from helper.folder_store import FolderDB - try: - file_hash = file_identifier - if self._location: - try: - with FolderDB(Path(self._location)) as db: - meta = db.get_metadata(file_hash) or {} - url = list(meta.get("url") or []) - changed = False - for u in url: - if u in url: - url.remove(u) - changed = True - if changed: - db.update_metadata_by_hash(file_hash, {"url": url}) - return True - except Exception as exc: - debug(f"Local DB delete_url failed: {exc}") - return False - except Exception as exc: - debug(f"delete_url failed for local file: {exc}") - return False - - def delete_file(self, file_identifier: str, **kwargs: Any) -> bool: - """Delete a file from the folder store. - - Args: - file_identifier: The file path (as string) or hash of the file to delete - **kwargs: Optional parameters - - Returns: - True if deletion succeeded, False otherwise - """ - from helper.folder_store import FolderDB - try: - file_path = Path(file_identifier) - - # Delete from database - with FolderDB(Path(self._location)) as db: - db.delete_file(file_path) - - # Delete the actual file from disk - if file_path.exists(): - file_path.unlink() - debug(f"Deleted file: {file_path}") - return True - else: - debug(f"File not found on disk: {file_path}") - return True # Already gone - except Exception as exc: - debug(f"delete_file failed: {exc}") - return False - - -class HydrusNetwork(store): - """File storage backend for Hydrus client. - - Each instance represents a specific Hydrus client connection. - Maintains its own HydrusClient with session key. - """ - - def __init__(self, instance_name: str, api_key: str, url: str) -> None: - """Initialize Hydrus storage backend. - - Args: - instance_name: Name of this Hydrus instance (e.g., 'home', 'work') - api_key: Hydrus Client API access key - url: Hydrus client URL (e.g., 'http://192.168.1.230:45869') - """ - from helper import hydrus as hydrus_wrapper - - self._instance_name = instance_name - self._api_key = api_key - self._url = url - # Create persistent client with session key for this instance - self._client = hydrus_wrapper.HydrusClient(url=url, access_key=api_key) - - def name(self) -> str: - return self._instance_name - - def get_name(self) -> str: - return self._instance_name - - def add_file(self, file_path: Path, **kwargs: Any) -> str: - """Upload file to Hydrus with full metadata support. - - Args: - file_path: Path to the file to upload - tags: Optional list of tags to add - url: Optional list of url to associate with the file - title: Optional title (will be added as 'title:value' tag) - - Returns: - File hash from Hydrus - - Raises: - Exception: If upload fails - """ - from helper import hydrus as hydrus_wrapper - from helper.utils import sha256_file - - tags = kwargs.get("tags", []) - url = kwargs.get("url", []) - title = kwargs.get("title") - - # Add title to tags if provided and not already present - if title: - title_tag = f"title:{title}" - if not any(str(tag).lower().startswith("title:") for tag in tags): - tags = [title_tag] + list(tags) - - try: - # Compute file hash - file_hash = sha256_file(file_path) - debug(f"File hash: {file_hash}") - - # Use persistent client with session key - client = self._client - if client is None: - raise Exception("Hydrus client unavailable") - - # Check if file already exists in Hydrus - file_exists = False - try: - metadata = client.fetch_file_metadata(hashes=[file_hash]) - if metadata and isinstance(metadata, dict): - files = metadata.get("file_metadata", []) - if files: - file_exists = True - log( - f"ℹ️ Duplicate detected - file already in Hydrus with hash: {file_hash}", - file=sys.stderr, - ) - except Exception: - pass - - # Upload file if not already present - if not file_exists: - log(f"Uploading to Hydrus: {file_path.name}", file=sys.stderr) - response = client.add_file(file_path) - - # Extract hash from response - hydrus_hash: Optional[str] = None - if isinstance(response, dict): - hydrus_hash = response.get("hash") or response.get("file_hash") - if not hydrus_hash: - hashes = response.get("hashes") - if isinstance(hashes, list) and hashes: - hydrus_hash = hashes[0] - - if not hydrus_hash: - raise Exception(f"Hydrus response missing file hash: {response}") - - file_hash = hydrus_hash - log(f"Hydrus: {file_hash}", file=sys.stderr) - - # Add tags if provided (both for new and existing files) - if tags: - try: - # Use default tag service - service_name = "my tags" - except Exception: - service_name = "my tags" - - try: - debug(f"Adding {len(tags)} tag(s) to Hydrus: {tags}") - client.add_tags(file_hash, tags, service_name) - log(f"Tags added via '{service_name}'", file=sys.stderr) - except Exception as exc: - log(f"⚠️ Failed to add tags: {exc}", file=sys.stderr) - - # Associate url if provided (both for new and existing files) - if url: - log(f"Associating {len(url)} URL(s) with file", file=sys.stderr) - for url in url: - if url: - try: - client.associate_url(file_hash, str(url)) - debug(f"Associated URL: {url}") - except Exception as exc: - log(f"⚠️ Failed to associate URL {url}: {exc}", file=sys.stderr) - - return file_hash - - except Exception as exc: - log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr) - raise - - def search_file(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: - """Search Hydrus database for files matching query. - - Args: - query: Search query (tags, filenames, hashes, etc.) - limit: Maximum number of results to return (default: 100) - - Returns: - List of dicts with 'name', 'hash', 'size', 'tags' fields - - Example: - results = storage["hydrus"].search("artist:john_doe music") - results = storage["hydrus"].search("Simple Man") - """ - limit = kwargs.get("limit", 100) - - try: - client = self._client - if client is None: - raise Exception("Hydrus client unavailable") - - debug(f"Searching Hydrus for: {query}") - - # Parse the query into tags - # Handle both simple tags and complex queries - # "*" means "match all" - use system:everything tag in Hydrus - if query.strip() == "*": - # Use system:everything to match all files in Hydrus - tags = ["system:everything"] - else: - query_lower = query.lower().strip() - # If query doesn't have a namespace (no ':'), search all files and filter by title/tags - # If query has explicit namespace, use it as a tag search - if ':' not in query_lower: - # No namespace provided: search all files, then filter by title/tags containing the query - tags = ["system:everything"] - else: - # User provided explicit namespace (e.g., "creator:john" or "system:has_audio") - # Use it as a tag search - tags = [query_lower] - - if not tags: - debug(f"Found 0 result(s)") - return [] - - # Search files with the tags - search_result = client.search_files( - tags=tags, - return_hashes=True, - return_file_ids=True - ) - - # Extract file IDs from search result - file_ids = search_result.get("file_ids", []) - hashes = search_result.get("hashes", []) - - if not file_ids and not hashes: - debug(f"Found 0 result(s)") - return [] - - # Fetch metadata for the found files - results = [] - query_lower = query.lower().strip() - # Split by comma or space for AND logic - search_terms = set(query_lower.replace(',', ' ').split()) # For substring matching - - if file_ids: - metadata = client.fetch_file_metadata(file_ids=file_ids) - metadata_list = metadata.get("metadata", []) - - for meta in metadata_list: - if len(results) >= limit: - break - - file_id = meta.get("file_id") - hash_hex = meta.get("hash") - size = meta.get("size", 0) - - # Get tags for this file and extract title - tags_set = meta.get("tags", {}) - all_tags = [] - title = f"Hydrus File {file_id}" # Default fallback - all_tags_str = "" # For substring matching - - # debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}") - - if isinstance(tags_set, dict): - # Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen - def _collect(tag_list: Any) -> None: - nonlocal title, all_tags_str - if not isinstance(tag_list, list): - return - for tag in tag_list: - tag_text = str(tag) if tag else "" - if not tag_text: - continue - all_tags.append(tag_text) - all_tags_str += " " + tag_text.lower() - if tag_text.lower().startswith("title:") and title == f"Hydrus File {file_id}": - title = tag_text.split(":", 1)[1].strip() - - for service_name, service_tags in tags_set.items(): - if not isinstance(service_tags, dict): - continue - - storage_tags = service_tags.get("storage_tags", {}) - if isinstance(storage_tags, dict): - for tag_list in storage_tags.values(): - _collect(tag_list) - - display_tags = service_tags.get("display_tags", []) - _collect(display_tags) - - # Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it) - top_level_tags = meta.get("tags_flat", []) or meta.get("tags", []) - _collect(top_level_tags) - - # Resolve extension from MIME type - mime_type = meta.get("mime") - ext = "" - if mime_type: - for category in mime_maps.values(): - for ext_key, info in category.items(): - if mime_type in info.get("mimes", []): - ext = info.get("ext", "").lstrip('.') - break - if ext: - break - - # Filter results based on query type - # If user provided explicit namespace (has ':'), don't do substring filtering - # Just include what the tag search returned - has_namespace = ':' in query_lower - - if has_namespace: - # Explicit namespace search - already filtered by Hydrus tag search - # Include this result as-is - results.append({ - "hash": hash_hex, - "hash_hex": hash_hex, - "target": hash_hex, - "name": title, - "title": title, - "size": size, - "size_bytes": size, - "origin": self._instance_name, - "tags": all_tags, - "file_id": file_id, - "mime": mime_type, - "ext": ext, - }) - else: - # Free-form search: check if search terms match the title or tags - # Match if ALL search terms are found in title or tags (AND logic) - # AND use whole word matching - - # Combine title and tags for searching - searchable_text = (title + " " + all_tags_str).lower() - - match = True - if query_lower != "*": - for term in search_terms: - # Regex for whole word: \bterm\b - # Escape term to handle special chars - pattern = r'\b' + re.escape(term) + r'\b' - if not re.search(pattern, searchable_text): - match = False - break - - if match: - results.append({ - "hash": hash_hex, - "hash_hex": hash_hex, - "target": hash_hex, - "name": title, - "title": title, - "size": size, - "size_bytes": size, - "origin": self._instance_name, - "tags": all_tags, - "file_id": file_id, - "mime": mime_type, - "ext": ext, - }) - - debug(f"Found {len(results)} result(s)") - return results[:limit] - - except Exception as exc: - log(f"❌ Hydrus search failed: {exc}", file=sys.stderr) - import traceback - traceback.print_exc(file=sys.stderr) - raise - - def get_file(self, file_hash: str, **kwargs: Any) -> Optional[Path]: - """Open file in browser via Hydrus client API URL.""" - import tempfile - import webbrowser - - debug(f"[HydrusNetwork.get_file] Starting for hash: {file_hash[:12]}...") - - # Build browser URL with access key - base_url = self._client.url.rstrip('/') - access_key = self._client.access_key - browser_url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}" - debug(f"[HydrusNetwork.get_file] Opening URL: {browser_url}") - - # Open in default browser - webbrowser.open(browser_url) - debug(f"[HydrusNetwork.get_file] Browser opened successfully") - - # Return the URL string instead of downloading - debug(f"[HydrusNetwork.get_file] Returning URL: {browser_url}") - return browser_url - - def pipe(self, file_hash: str, config: Optional[Dict[str, Any]] = None) -> Optional[str]: - """Get a playable path for a file in this Hydrus instance. - - For Hydrus stores, this builds a file URL with authentication. - - Args: - file_hash: SHA256 hash of the file - config: Optional config dict (unused, URL and key are from instance) - - Returns: - Hydrus API file URL with embedded access key, or None if client unavailable - """ - try: - if not self._client: - return None - - base_url = self._client.url.rstrip('/') - access_key = self._client.access_key - - # Build Hydrus file URL with access key - url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}" - return url - except Exception as e: - debug(f"Error building Hydrus URL for {file_hash}: {e}") - return None - - def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]: - """Get metadata for a file from Hydrus by hash. - - Args: - file_hash: SHA256 hash of the file (64-char hex string) - - Returns: - Dict with metadata fields or None if not found - """ - try: - client = self._client - if not client: - debug("get_metadata: Hydrus client unavailable") - return None - - # Fetch file metadata - payload = client.fetch_file_metadata(hashes=[file_hash], include_service_keys_to_tags=True) - - if not payload or not payload.get("metadata"): - return None - - meta = payload["metadata"][0] - - # Extract title from tags - title = f"Hydrus_{file_hash[:12]}" - tags_payload = meta.get("tags", {}) - if isinstance(tags_payload, dict): - for service_data in tags_payload.values(): - if isinstance(service_data, dict): - display_tags = service_data.get("display_tags", {}) - if isinstance(display_tags, dict): - current_tags = display_tags.get("0", []) - if isinstance(current_tags, list): - for tag in current_tags: - if str(tag).lower().startswith("title:"): - title = tag.split(":", 1)[1].strip() - break - if title != f"Hydrus_{file_hash[:12]}": - break - - # Determine extension from mime type - mime_type = meta.get("mime", "") - ext = "" - if mime_type: - from helper.utils_constant import mime_maps - for category, extensions in mime_maps.items(): - for extension, mime in extensions.items(): - if mime == mime_type: - ext = extension.lstrip(".") - break - if ext: - break - - return { - "hash": file_hash, - "title": title, - "ext": ext, - "size": meta.get("size", 0), - "mime": mime_type, - } - - except Exception as exc: - debug(f"Failed to get metadata from Hydrus: {exc}") - return None - - def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: - """Get tags for a file from Hydrus by hash. - - Args: - file_identifier: File hash (SHA256 hex string) - **kwargs: Optional service_name parameter - - Returns: - Tuple of (tags_list, source_description) - where source is always "hydrus" - """ - try: - file_hash = str(file_identifier) - - # Get Hydrus client and service info - client = self._client - if not client: - debug("get_tags: Hydrus client unavailable") - return [], "unknown" - - # Fetch file metadata - payload = client.fetch_file_metadata( - hashes=[file_hash], - include_service_keys_to_tags=True, - include_file_url=False - ) - - items = payload.get("metadata") if isinstance(payload, dict) else None - if not isinstance(items, list) or not items: - debug(f"get_tags: No metadata returned for hash {file_hash}") - return [], "unknown" - - meta = items[0] if isinstance(items[0], dict) else None - if not isinstance(meta, dict) or meta.get("file_id") is None: - debug(f"get_tags: Invalid metadata for hash {file_hash}") - return [], "unknown" - - # Extract tags using service name - service_name = "my tags" - service_key = hydrus_wrapper.get_tag_service_key(client, service_name) - - # Extract tags from metadata - tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name) - - return tags, "hydrus" - - except Exception as exc: - debug(f"get_tags failed for Hydrus file: {exc}") - return [], "unknown" - - def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: - """Add tags to a Hydrus file. - """ - try: - client = self._client - if client is None: - debug("add_tag: Hydrus client unavailable") - return False - service_name = kwargs.get("service_name") or "my tags" - # Ensure tags is a list - tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] - if not tag_list: - return False - client.add_tags(file_identifier, tag_list, service_name) - return True - except Exception as exc: - debug(f"Hydrus add_tag failed: {exc}") - return False - - def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: - """Delete tags from a Hydrus file. - """ - try: - client = self._client - if client is None: - debug("delete_tag: Hydrus client unavailable") - return False - service_name = kwargs.get("service_name") or "my tags" - tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] - if not tag_list: - return False - client.delete_tags(file_identifier, tag_list, service_name) - return True - except Exception as exc: - debug(f"Hydrus delete_tag failed: {exc}") - return False - - def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: - """Get known url for a Hydrus file. - """ - try: - client = self._client - if client is None: - debug("get_url: Hydrus client unavailable") - return [] - payload = client.fetch_file_metadata(hashes=[str(file_identifier)], include_file_url=True) - items = payload.get("metadata") if isinstance(payload, dict) else None - if not isinstance(items, list) or not items: - return [] - meta = items[0] - url = meta.get("url") or [] - return list(url) - except Exception as exc: - debug(f"Hydrus get_url failed: {exc}") - return [] - - def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: - """Associate one or more url with a Hydrus file. - """ - try: - client = self._client - if client is None: - debug("add_url: Hydrus client unavailable") - return False - for u in url: - client.associate_url(file_identifier, u) - return True - except Exception as exc: - debug(f"Hydrus add_url failed: {exc}") - return False - - def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: - """Delete one or more url from a Hydrus file. - """ - try: - client = self._client - if client is None: - debug("delete_url: Hydrus client unavailable") - return False - for u in url: - client.delete_url(file_identifier, u) - return True - except Exception as exc: - debug(f"Hydrus delete_url failed: {exc}") - return False - - @staticmethod - def _extract_tags_from_hydrus_meta( - meta: Dict[str, Any], - service_key: Optional[str], - service_name: str - ) -> List[str]: - """Extract current tags from Hydrus metadata dict. - - Prefers display_tags (includes siblings/parents, excludes deleted). - Falls back to storage_tags status '0' (current). - """ - tags_payload = meta.get("tags") - if not isinstance(tags_payload, dict): - return [] - - svc_data = None - if service_key: - svc_data = tags_payload.get(service_key) - if not isinstance(svc_data, dict): - return [] - - # Prefer display_tags (Hydrus computes siblings/parents) - display = svc_data.get("display_tags") - if isinstance(display, list) and display: - return [str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip()] - - # Fallback to storage_tags status '0' (current) - storage = svc_data.get("storage_tags") - if isinstance(storage, dict): - current_list = storage.get("0") or storage.get(0) - if isinstance(current_list, list): - return [str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()] - - return [] - - - -class MatrixStorageBackend(store): - """File storage backend for Matrix (Element) chat rooms.""" - - def get_name(self) -> str: - return "matrix" - - def list_rooms(self, config: Dict[str, Any]) -> List[Dict[str, Any]]: - """List joined rooms with their names.""" - matrix_conf = config.get('storage', {}).get('matrix', {}) - homeserver = matrix_conf.get('homeserver') - access_token = matrix_conf.get('access_token') - - if not homeserver or not access_token: - return [] - - if not homeserver.startswith('http'): - homeserver = f"https://{homeserver}" - - headers = {"Authorization": f"Bearer {access_token}"} - - try: - # Get joined rooms - resp = requests.get(f"{homeserver}/_matrix/client/v3/joined_rooms", headers=headers, timeout=10) - if resp.status_code != 200: - return [] - - room_ids = resp.json().get('joined_rooms', []) - rooms = [] - - for rid in room_ids: - # Try to get room name - name = "Unknown Room" - try: - # Get state event for name - name_resp = requests.get( - f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.name", - headers=headers, - timeout=2 - ) - if name_resp.status_code == 200: - name = name_resp.json().get('name', name) - else: - # Try canonical alias - alias_resp = requests.get( - f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.canonical_alias", - headers=headers, - timeout=2 - ) - if alias_resp.status_code == 200: - name = alias_resp.json().get('alias', name) - except Exception: - pass - - rooms.append({'id': rid, 'name': name}) - - return rooms - except Exception as e: - log(f"Error listing Matrix rooms: {e}", file=sys.stderr) - return [] - - def upload(self, file_path: Path, **kwargs: Any) -> str: - """Upload file to Matrix room. - - Requires 'config' in kwargs with 'storage.matrix' settings: - - homeserver: URL of homeserver (e.g. https://matrix.org) - - user_id: User ID (e.g. @user:matrix.org) - - access_token: Access token (preferred) OR password - - room_id: Room ID to upload to (e.g. !roomid:matrix.org) - """ - config = kwargs.get('config', {}) - if not config: - raise ValueError("Config required for Matrix upload") - - matrix_conf = config.get('storage', {}).get('matrix', {}) - if not matrix_conf: - raise ValueError("Matrix storage not configured in config.json") - - homeserver = matrix_conf.get('homeserver') - # user_id = matrix_conf.get('user_id') # Not strictly needed if we have token - access_token = matrix_conf.get('access_token') - room_id = matrix_conf.get('room_id') - - if not homeserver: - raise ValueError("Matrix homeserver required") - - # Ensure homeserver has protocol - if not homeserver.startswith('http'): - homeserver = f"https://{homeserver}" - - # Login if no access token (optional implementation, for now assume token) - if not access_token: - raise ValueError("Matrix access_token required (login not yet implemented)") - - # Handle room selection if not provided - if not room_id: - log("No room_id configured. Fetching joined rooms...", file=sys.stderr) - rooms = self.list_rooms(config) - - if not rooms: - raise ValueError("No joined rooms found or failed to fetch rooms.") - - from result_table import ResultTable - table = ResultTable("Matrix Rooms") - for i, room in enumerate(rooms): - row = table.add_row() - row.add_column("#", str(i + 1)) - row.add_column("Name", room['name']) - row.add_column("ID", room['id']) - - print(table) - - # Simple interactive selection - try: - selection = input("Select room # to upload to: ") - idx = int(selection) - 1 - if 0 <= idx < len(rooms): - room_id = rooms[idx]['id'] - log(f"Selected room: {rooms[idx]['name']} ({room_id})", file=sys.stderr) - else: - raise ValueError("Invalid selection") - except Exception: - raise ValueError("Invalid room selection") - - if not room_id: - raise ValueError("Matrix room_id required") - - # 1. Upload Media - upload_url = f"{homeserver}/_matrix/media/r3/upload" - headers = { - "Authorization": f"Bearer {access_token}", - "Content-Type": "application/octet-stream" # Or guess mime type - } - - import mimetypes - mime_type, _ = mimetypes.guess_type(file_path) - if mime_type: - headers["Content-Type"] = mime_type - - filename = file_path.name - - try: - with open(file_path, 'rb') as f: - resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename}) - - if resp.status_code != 200: - raise Exception(f"Matrix upload failed: {resp.text}") - - content_uri = resp.json().get('content_uri') - if not content_uri: - raise Exception("No content_uri returned from Matrix upload") - - # 2. Send Message - send_url = f"{homeserver}/_matrix/client/r0/rooms/{room_id}/send/m.room.message" - - # Determine msgtype - msgtype = "m.file" - if mime_type: - if mime_type.startswith("image/"): msgtype = "m.image" - elif mime_type.startswith("video/"): msgtype = "m.video" - elif mime_type.startswith("audio/"): msgtype = "m.audio" - - payload = { - "msgtype": msgtype, - "body": filename, - "url": content_uri, - "info": { - "mimetype": mime_type, - "size": file_path.stat().st_size - } - } - - resp = requests.post(send_url, headers=headers, json=payload) - if resp.status_code != 200: - raise Exception(f"Matrix send message failed: {resp.text}") - - event_id = resp.json().get('event_id') - return f"matrix://{room_id}/{event_id}" - - except Exception as e: - log(f"❌ Matrix upload error: {e}", file=sys.stderr) - raise - - - # --- Not supported for Matrix: tagging & URL operations (return safe defaults) --- - def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: - return [], "matrix" - - def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: - return False - - def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: - return False - - def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: - return [] - - def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: - return False - - def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: - return False - - - -class RemoteStorageBackend(store): - """File storage backend for remote Android/network storage servers. - - Connects to a remote storage server (e.g., running on Android phone) - via REST API. All operations are proxied to the remote server. - """ - - def __init__(self, server_url: str, timeout: int = 30, api_key: str = None) -> None: - """Initialize remote storage backend. - - Args: - server_url: Base URL of remote storage server (e.g., http://192.168.1.100:5000) - timeout: Request timeout in seconds - api_key: Optional API key for authentication - """ - try: - import requests - except ImportError: - raise ImportError("requests library required for RemoteStorageBackend. Install with: pip install requests") - - self.server_url = server_url.rstrip('/') - self.timeout = timeout - self.api_key = api_key - self._session = requests.Session() - - # Add API key to default headers if provided - if self.api_key: - self._session.headers.update({'X-API-Key': self.api_key}) - - def get_name(self) -> str: - return "remote" - - - - def _request(self, method: str, endpoint: str, **kwargs) -> Dict[str, Any]: - """Make HTTP request to remote server.""" - import requests - from urllib.parse import urljoin - - url = urljoin(self.server_url, endpoint) - - try: - response = self._session.request( - method, - url, - timeout=self.timeout, - **kwargs - ) - - if response.status_code == 404: - raise Exception(f"Remote resource not found: {endpoint}") - - if response.status_code >= 400: - try: - error_data = response.json() - error_msg = error_data.get('error', response.text) - except: - error_msg = response.text - raise Exception(f"Remote server error {response.status_code}: {error_msg}") - - return response.json() - - except requests.exceptions.RequestException as e: - raise Exception(f"Connection to {self.server_url} failed: {e}") - - def upload(self, file_path: Path, **kwargs: Any) -> str: - """Upload file to remote storage. - - Args: - file_path: Path to the file to upload - tags: Optional list of tags to add - url: Optional list of known url - - Returns: - Remote file hash - """ - from helper.utils import sha256_file - - if not file_path.exists(): - raise ValueError(f"File not found: {file_path}") - - try: - # Index the file on remote server - data = {"path": str(file_path)} - - tags = kwargs.get("tags", []) - if tags: - data["tags"] = tags - - url = kwargs.get("url", []) - if url: - data["url"] = url - - result = self._request('POST', '/files/index', json=data) - file_hash = result.get('hash') - - if file_hash: - log(f"✓ File indexed on remote storage: {file_hash}", file=sys.stderr) - return file_hash - else: - raise Exception("Remote server did not return file hash") - - except Exception as exc: - debug(f"Remote upload failed: {exc}", file=sys.stderr) - raise - - # Tag and URL operations - Remote server default: not supported - def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: - return [], "remote" - - def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: - return False - - def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: - return False - - def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: - return [] - - def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: - return False - - def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: - return False - - def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: - """Search files on remote storage. - - Args: - query: Search query - limit: Maximum results - - Returns: - List of search results - """ - limit = kwargs.get("limit") - try: - limit = int(limit) if limit is not None else 100 - except (TypeError, ValueError): - limit = 100 - - if limit <= 0: - limit = 100 - - try: - response = self._request('GET', '/files/search', params={ - 'q': query, - 'limit': limit - }) - - files = response.get('files', []) - - # Transform remote format to standard result format - results = [] - for f in files: - results.append({ - "name": f.get('name', '').split('/')[-1], # Get filename from path - "title": f.get('name', f.get('path', '')).split('/')[-1], - "ext": f.get('ext', ''), - "path": f.get('path', ''), - "target": f.get('path', ''), - "hash": f.get('hash', ''), - "origin": "remote", - "size": f.get('size', 0), - "size_bytes": f.get('size', 0), - "tags": f.get('tags', []), - }) - - debug(f"Remote search found {len(results)} results", file=sys.stderr) - return results - - except Exception as exc: - log(f"❌ Remote search failed: {exc}", file=sys.stderr) - raise - - -class FileStorage: - """Unified file storage interface supporting multiple backend instances. - - Each backend type (folder, hydrusnetwork) can have multiple named instances. - Access backends by their configured names. - - Config structure: - { - "store": { - "folder": { - "default": {"path": "C:\\Media Machina"}, - "test": {"path": "C:\\Users\\Admin\\Downloads\\Video"} - }, - "hydrusnetwork": { - "home": { - "Hydrus-Client-API-Access-Key": "d4321f...", - "url": "http://192.168.1.230:45869" - }, - "work": { - "Hydrus-Client-API-Access-Key": "abc123...", - "url": "http://192.168.1.100:45869" - } - } - } - } - - Example: - storage = FileStorage(config) - - # Upload to different named instances - hash1 = storage["test"].add_file(Path("file.mp3"), tags=["music"]) - hash2 = storage["home"].add_file(Path("file.mp3"), tags=["music"]) - hash3 = storage["work"].add_file(Path("file.mp3"), tags=["music"]) - - # Search across different instances - results = storage["home"].search("music") - results = storage["test"].search("song") - """ - - def __init__(self, config: Optional[Dict[str, Any]] = None, suppress_debug: bool = False) -> None: - """Initialize the file storage system with available backends. - - Args: - config: Configuration dict with backend settings (Local.path, HydrusNetwork, Debrid, etc.) - suppress_debug: If True, suppress debug output during initialization (useful for autocomplete) - """ - self.suppress_debug = suppress_debug - config = config or {} - - # Extract backend-specific settings from config - from config import get_local_storage_path - - local_path = get_local_storage_path(config) - local_path_str = str(local_path) if local_path else None - - self._backends: Dict[str, store] = {} - - # Build folder stores from config (support both 'storage' and legacy 'store' top-level keys) - folder_sources = None - cfg_storage = config.get("storage") or config.get("store") or {} - if isinstance(cfg_storage, dict): - val = cfg_storage.get("folder") - if isinstance(val, dict): - folder_sources = val - - # If folder sources provided, create backends for each entry - if folder_sources: - # Normalize into name -> path mapping - folder_map: Dict[str, str] = {} - for key, value in folder_sources.items(): - if isinstance(value, dict): - path_val = value.get("path") - elif isinstance(value, (str, bytes)): - path_val = str(value) - else: - path_val = None - if path_val: - folder_map[str(key)] = str(Path(path_val).expanduser()) - - # Register all folder stores by their explicit names from config - for name, path in folder_map.items(): - self._backends[name] = Folder(location=path, name=name) - else: - # Fallback: use legacy single local path if present - if local_path_str: - self._backends["default"] = Folder(location=local_path_str, name="default") - - # Matrix (chat room) acts as a provider, not a persistent storage backend. - # We no longer register Matrix as a storage backend here; providers should be separate classes. - - # Build HydrusNetwork backends from config['store']['hydrusnetwork'] - # Register all instances regardless of current connectivity - connection errors - # will be caught when actually trying to use the backend - hydrus_sources = cfg_storage.get("hydrusnetwork") - if isinstance(hydrus_sources, dict): - for instance_name, instance_config in hydrus_sources.items(): - if isinstance(instance_config, dict): - api_key = instance_config.get("Hydrus-Client-API-Access-Key") - url = instance_config.get("url") - - # Skip if missing credentials - don't register instances without full config - if not api_key or not url: - continue - - # Register the instance - connection will be tested when actually used - try: - self._backends[instance_name] = HydrusNetwork( - instance_name=instance_name, - api_key=api_key, - url=url - ) - if not self.suppress_debug: - debug(f"[FileStorage] Registered Hydrus instance '{instance_name}': {url}") - except Exception as e: - if not self.suppress_debug: - debug(f"[FileStorage] Failed to register Hydrus instance '{instance_name}': {e}") - continue - - # Include remote storage backends from config (for Android/network servers) - remote_storages = config.get("remote_storages", []) - if isinstance(remote_storages, list): - for remote_config in remote_storages: - if isinstance(remote_config, dict): - name = remote_config.get("name", "remote") - url = remote_config.get("url") - timeout = remote_config.get("timeout", 30) - api_key = remote_config.get("api_key") - - if url: - try: - backend = RemoteStorageBackend(url, timeout=timeout, api_key=api_key) - self._backends[name] = backend - auth_status = " (with auth)" if api_key else " (no auth)" - log(f"Registered remote storage backend: {name} -> {url}{auth_status}", file=sys.stderr) - except Exception as e: - log(f"Failed to register remote storage '{name}': {e}", file=sys.stderr) - - def list_backends(self) -> list[str]: - """Return available backend keys for autocomplete and validation.""" - return sorted(self._backends.keys()) - - def list_searchable_backends(self) -> list[str]: - """Return backend names that support searching.""" - searchable = [] - for name, backend in self._backends.items(): - if callable(getattr(backend, 'search', None)): - searchable.append(name) - return sorted(searchable) - - def __getitem__(self, backend_name: str) -> store: - """Get a storage backend by name. - - Args: - backend_name: Name of the backend ('0x0', 'local', 'hydrus') - - Returns: - StorageBackend instance - - Raises: - KeyError: If backend not found - """ - if backend_name not in self._backends: - raise KeyError( - f"Unknown storage backend: {backend_name}. " - f"Available: {list(self._backends.keys())}" - ) - return self._backends[backend_name] - - def register(self, backend: store) -> None: - """Register a custom storage backend. - - Args: - backend: StorageBackend instance to register - """ - name = backend.get_name() - self._backends[name] = backend - log(f"Registered storage backend: {name}", file=sys.stderr) - - def is_available(self, backend_name: str) -> bool: - """Check if a backend is available. - - Args: - backend_name: Name of the backend - - Returns: - True if backend is registered - """ - return backend_name in self._backends - - def list_searchable_backends(self) -> list[str]: - """Get list of backends that support searching. - - Returns: - List of searchable backend names - """ - return [ - name for name, backend in self._backends.items() - if callable(getattr(backend, 'search', None)) or callable(getattr(backend, 'search_file', None)) - ] - - # --- remaining FileStorage methods --- - - diff --git a/hydrus_health_check.py b/hydrus_health_check.py index 0c1af67..92a8680 100644 --- a/hydrus_health_check.py +++ b/hydrus_health_check.py @@ -7,7 +7,7 @@ disables Hydrus features if the API is unavailable. import logging import sys -from helper.logger import log, debug +from SYS.logger import log, debug from typing import Tuple, Optional, Dict, Any from pathlib import Path @@ -37,7 +37,7 @@ def check_hydrus_availability(config: Dict[str, Any]) -> Tuple[bool, Optional[st - (False, reason) if Hydrus is unavailable with reason """ try: - from helper.hydrus import is_available as _is_hydrus_available + from API.HydrusNetwork import is_available as _is_hydrus_available logger.info("[Hydrus Health Check] Pinging Hydrus API...") is_available, reason = _is_hydrus_available(config, use_cache=False) @@ -97,7 +97,7 @@ def initialize_hydrus_health_check(config: Dict[str, Any], emit_debug: bool = Tr def check_debrid_availability(config: Dict[str, Any]) -> Tuple[bool, Optional[str]]: """Check if Debrid API is available.""" try: - from helper.http_client import HTTPClient + from API.HTTP import HTTPClient logger.info("[Debrid Health Check] Pinging Debrid API...") with HTTPClient(timeout=10.0, verify_ssl=True) as client: response = client.get('https://api.alldebrid.com/v4/ping') @@ -387,8 +387,8 @@ def initialize_local_library_scan(config: Dict[str, Any], emit_debug: bool = Tru This ensures that any new files in configured folder stores are indexed and their sidecar files are imported and cleaned up. """ - from helper.folder_store import LocalLibraryInitializer - from helper.store import Folder + from API.folder import LocalLibraryInitializer + from Store.Folder import Folder logger.info("[Startup] Starting folder store scans...") diff --git a/metadata.py b/metadata.py index 56d94dc..59190de 100644 --- a/metadata.py +++ b/metadata.py @@ -5,7 +5,7 @@ import sys import shutil import sqlite3 import requests -from helper.logger import log, debug +from SYS.logger import log, debug from urllib.parse import urlsplit, urlunsplit, unquote from collections import deque from pathlib import Path @@ -29,7 +29,7 @@ except ImportError: # pragma: no cover resolve_output_dir = None # type: ignore[assignment] try: - from helper.utils import sha256_file + from SYS.utils import sha256_file except ImportError: # pragma: no cover sha256_file = None # type: ignore[assignment] @@ -3215,7 +3215,7 @@ def enrich_playlist_entries(entries: list, extractor: str) -> list: List of enriched entry dicts """ # Import here to avoid circular dependency - from helper.download import is_url_supported_by_ytdlp + from SYS.download import is_url_supported_by_ytdlp if not entries: return entries @@ -3609,7 +3609,7 @@ def scrape_isbn_metadata(isbn: str) -> List[str]: """Scrape metadata for an ISBN using Open Library API.""" new_tags = [] try: - from helper.http_client import HTTPClient + from API.HTTP import HTTPClient import json as json_module isbn_clean = isbn.replace('-', '').strip() @@ -3733,7 +3733,7 @@ def scrape_openlibrary_metadata(olid: str) -> List[str]: """ new_tags = [] try: - from helper.http_client import HTTPClient + from API.HTTP import HTTPClient import json as json_module # Format: OL9674499M or just 9674499M diff --git a/models.py b/models.py index 1773004..3fb3b5d 100644 --- a/models.py +++ b/models.py @@ -79,7 +79,7 @@ class PipeObject: object state throughout the pipeline. """ try: - from helper.logger import is_debug_enabled, debug + from SYS.logger import is_debug_enabled, debug if not is_debug_enabled(): return diff --git a/pipeline.py b/pipeline.py index 31820f1..8d3be86 100644 --- a/pipeline.py +++ b/pipeline.py @@ -22,7 +22,7 @@ import sys from typing import Any, Dict, List, Optional, Sequence from models import PipelineStageContext -from helper.logger import log +from SYS.logger import log def _is_selectable_table(table: Any) -> bool: diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..87d6aed --- /dev/null +++ b/readme.md @@ -0,0 +1,73 @@ +# Medios-Macina +- Audio +- Video +- Image +- Text + +### File Store +- HydrusNetwork https://github.com/hydrusnetwork/hydrus +- Local drive (C://mylibrary/path) + +### File Providers +- Youtube +- Openlibrary/Archive.org (free account needed) +- Soulseek +- Gog-Games (limited without paid API) +- Libgen +- All-Debrid https://alldebrid.com/ + +### Features +- Full MPV integration https://github.com/mpv-player/mpv +- Database file management +- API/CLI exclusive +- Plug and play stores and add-ons +- YT-DLP integration +- CMDLET easy syntax +- CLI auto-complete + +Install what you need and want, after you have the requirements.txt installed as well you will need to open terminal at the repository download location and run the cli file like . + + +#### Quick + +```shell +cd "C:\location\to\repository\medios-machina\" +python cli.py +``` +Adding your first file +```python +.pipe -list # List MPV current playing/list +.pipe -save # Save current MPV playlist to local library +.pipe -load # List saved playlists; use @N to load one +.pipe "https://www.youtube.com/watch?v=_23dFb50Z2Y" # Add URL to current playlist +``` + +Example pipelines: + +1. **Simple download with metadata (tags and URL registration)**: +``` +download-media "https://www.youtube.com/watch?v=dQw4w9WgXcQ" | add-file -storage local | add-url +``` + +2. **Download playlist item with tags**: +``` +download-media "https://www.youtube.com/playlist?list=PLxxxxx" -item 2 | add-file -storage local | add-url +``` + +3. **Download with merge (e.g., Bandcamp albums)**: +``` +download-data "https://altrusiangrace.bandcamp.com/album/ancient-egyptian-legends-full-audiobook" | merge-file | add-file -storage local | add-url +``` + +4. **Download direct file (PDF, document)**: +``` +download-file "https://example.com/file.pdf" | add-file -storage local | add-url +``` + +Search examples: + +1. search-file -provider youtube "something in the way" + +2. @1 + +3. download-media [URL] | add-file -storage local | add-url \ No newline at end of file diff --git a/result_table.py b/result_table.py index a6496be..1858e7a 100644 --- a/result_table.py +++ b/result_table.py @@ -194,6 +194,13 @@ class ResultTable: """If True, skip automatic sorting so display order matches input order.""" self.no_choice: bool = False """When True, suppress row numbers/selection to make the table non-interactive.""" + self.table: Optional[str] = None + """Table type (e.g., 'youtube', 'soulseek') for context-aware selection logic.""" + + def set_table(self, table: str) -> "ResultTable": + """Set the table type for context-aware selection logic.""" + self.table = table + return self def set_no_choice(self, no_choice: bool = True) -> "ResultTable": """Mark the table as non-interactive (no row numbers, no selection parsing).""" @@ -393,11 +400,11 @@ class ResultTable: # Core fields (legacy fallback) title = getattr(result, 'title', '') - origin = getattr(result, 'origin', '').lower() + table = str(getattr(result, 'table', '') or '').lower() # Handle extension separation for local files extension = "" - if title and origin == 'local': + if title and table == 'local': path_obj = Path(title) if path_obj.suffix: extension = path_obj.suffix.lstrip('.') @@ -409,8 +416,8 @@ class ResultTable: # Extension column row.add_column("Ext", extension) - if hasattr(result, 'origin') and result.origin: - row.add_column("Source", result.origin) + if hasattr(result, 'table') and getattr(result, 'table', None): + row.add_column("Source", str(getattr(result, 'table'))) if hasattr(result, 'detail') and result.detail: row.add_column("Detail", result.detail) @@ -448,19 +455,19 @@ class ResultTable: Shows only essential columns: - Title (required) - Ext (extension) - - Origin (source backend) + - Storage (source backend) - Size (formatted MB, integer only) All other fields are stored in item but not displayed to keep table compact. Use @row# syntax to pipe full item data to next command. """ - # Title (required - use origin as fallback) - title = getattr(item, 'title', None) or getattr(item, 'origin', 'Unknown') - origin = getattr(item, 'origin', '').lower() + # Title (required) + title = getattr(item, 'title', None) or 'Unknown' + table = str(getattr(item, 'table', '') or getattr(item, 'store', '') or '').lower() # Handle extension separation for local files extension = "" - if title and origin == 'local': + if title and table == 'local': # Try to split extension path_obj = Path(title) if path_obj.suffix: @@ -474,8 +481,10 @@ class ResultTable: row.add_column("Ext", extension) # Storage (source backend - hydrus, local, debrid, etc) - if hasattr(item, 'origin') and item.origin: - row.add_column("Storage", item.origin) + if getattr(item, 'table', None): + row.add_column("Storage", str(getattr(item, 'table'))) + elif getattr(item, 'store', None): + row.add_column("Storage", str(getattr(item, 'store'))) # Size (for files) - integer MB only if hasattr(item, 'size_bytes') and item.size_bytes: @@ -499,8 +508,6 @@ class ResultTable: # Source/Store (where the tags come from) if hasattr(item, 'source') and item.source: row.add_column("Store", item.source) - elif hasattr(item, 'origin') and item.origin: - row.add_column("Store", item.origin) def _add_pipe_object(self, row: ResultRow, obj: Any) -> None: @@ -549,7 +556,7 @@ class ResultTable: Priority field groups (uses first match within each group): - title | name | filename - - origin | source + - store | table | source - type | media_kind | kind - target | path | url - hash | hash_hex | file_hash @@ -574,12 +581,12 @@ class ResultTable: visible_data = {k: v for k, v in data.items() if not is_hidden_field(k)} # Handle extension separation for local files - origin = str(visible_data.get('origin', '') or visible_data.get('source', '')).lower() + store_val = str(visible_data.get('store', '') or visible_data.get('table', '') or visible_data.get('source', '')).lower() # Debug logging - # print(f"DEBUG: Processing dict result. Origin: {origin}, Keys: {list(visible_data.keys())}") + # print(f"DEBUG: Processing dict result. Store: {store_val}, Keys: {list(visible_data.keys())}") - if origin == 'local': + if store_val == 'local': # Find title field title_field = next((f for f in ['title', 'name', 'filename'] if f in visible_data), None) if title_field: @@ -629,8 +636,8 @@ class ResultTable: # Mark 'columns' as handled so we don't add it as a field added_fields.add('columns') # Also mark common fields that shouldn't be re-displayed if they're in columns - # This prevents showing both "Store" (from columns) and "Origin" (from data fields) - added_fields.add('origin') + # This prevents showing both "Store" (from columns) and "Store" (from data fields) + added_fields.add('table') added_fields.add('source') added_fields.add('target') added_fields.add('path') @@ -649,7 +656,7 @@ class ResultTable: ('title', ['title']), ('ext', ['ext']), ('size', ['size', 'size_bytes']), - ('store', ['store', 'origin', 'source']), + ('store', ['store', 'table', 'source']), ] # Add priority field groups first - use first match in each group @@ -668,7 +675,7 @@ class ResultTable: value_str = value_str[:57] + "..." # Map field names to display column names - if field in ['store', 'origin', 'source']: + if field in ['store', 'table', 'source']: col_name = "Store" elif field in ['size', 'size_bytes']: col_name = "Size (Mb)" diff --git a/scripts/check_cmdlets_import.py b/scripts/check_cmdlets_import.py deleted file mode 100644 index 91c73de..0000000 --- a/scripts/check_cmdlets_import.py +++ /dev/null @@ -1,10 +0,0 @@ -import importlib -import traceback -import sys - -try: - importlib.import_module('cmdlets') - print('cmdlets imported OK') -except Exception: - traceback.print_exc() - sys.exit(1) diff --git a/scripts/check_download_media.py b/scripts/check_download_media.py deleted file mode 100644 index e6e08a6..0000000 --- a/scripts/check_download_media.py +++ /dev/null @@ -1,8 +0,0 @@ -import importlib, traceback, sys - -try: - importlib.import_module('cmdlets.download_media') - print('download_media imported OK') -except Exception: - traceback.print_exc() - sys.exit(1) diff --git a/scripts/inspect_shared_lines.py b/scripts/inspect_shared_lines.py deleted file mode 100644 index e9a9b26..0000000 --- a/scripts/inspect_shared_lines.py +++ /dev/null @@ -1,5 +0,0 @@ -from pathlib import Path -p = Path('cmdlets/_shared.py') -for i, line in enumerate(p.read_text().splitlines(), start=1): - if 1708 <= i <= 1720: - print(f"{i:4}: {repr(line)}") diff --git a/scripts/normalize_shared_indent.py b/scripts/normalize_shared_indent.py deleted file mode 100644 index 8286e75..0000000 --- a/scripts/normalize_shared_indent.py +++ /dev/null @@ -1,24 +0,0 @@ -from pathlib import Path -import re - -p = Path('cmdlets/_shared.py') -src = p.read_text(encoding='utf-8') -lines = src.splitlines(True) -changed = False -new_lines = [] -for line in lines: - m = re.match(r'^(?P[ \t]*)', line) - ws = m.group('ws') if m else '' - if '\t' in ws: - new_ws = ws.replace('\t', ' ') - new_line = new_ws + line[len(ws):] - new_lines.append(new_line) - changed = True - else: - new_lines.append(line) - -if changed: - p.write_text(''.join(new_lines), encoding='utf-8') - print('Normalized leading tabs to spaces in', p) -else: - print('No leading tabs found; no changes made') diff --git a/scripts/refactor_download_careful.py b/scripts/refactor_download_careful.py deleted file mode 100644 index bb415c9..0000000 --- a/scripts/refactor_download_careful.py +++ /dev/null @@ -1,160 +0,0 @@ -#!/usr/bin/env python3 -""" -Careful refactoring of download_data.py to class-based pattern. -Handles nested functions and inner definitions correctly. -""" - -import re -from pathlib import Path - -def refactor_download_data(): - backup_file = Path('cmdlets/download_data_backup.py') - output_file = Path('cmdlets/download_data.py') - - print(f"Reading: {backup_file}") - content = backup_file.read_text(encoding='utf-8') - lines = content.split('\n') - - output = [] - i = 0 - in_cmdlet_def = False - skip_old_run_wrapper = False - class_added = False - - while i < len(lines): - line = lines[i] - - # Skip old _run wrapper function - if line.strip().startswith('def _run(result: Any'): - while i < len(lines): - i += 1 - if lines[i] and not lines[i][0].isspace(): - break - continue - - # Skip old CMDLET definition - if line.strip().startswith('CMDLET = Cmdlet('): - while i < len(lines): - i += 1 - if lines[i].strip() == ')': - i += 1 - break - output.append('') - output.append('# Create and register the cmdlet') - output.append('CMDLET = Download_Data()') - output.append('') - continue - - # Insert class definition before first top-level helper - if not class_added and line.strip().startswith('def _download_torrent_worker('): - # Add class header with __init__ and run() - output.extend([ - '', - '', - 'class Download_Data(Cmdlet):', - ' """Class-based download-data cmdlet with self-registration."""', - '', - ' def __init__(self) -> None:', - ' """Initialize download-data cmdlet."""', - ' super().__init__(', - ' name="download-data",', - ' summary="Download data from url with playlist/clip support using yt-dlp",', - ' usage="download-data [options] or search-file | download-data [options]",', - ' alias=["download", "dl"],', - ' arg=[', - ' CmdletArg(name="url", type="string", required=False, description="URL to download (HTTP/HTTPS or file with URL list)", variadic=True),', - ' CmdletArg(name="-url", type="string", description="URL to download (alias for positional argument)", variadic=True),', - ' CmdletArg(name="list-formats", type="flag", description="List available formats without downloading"),', - ' CmdletArg(name="audio", type="flag", alias="a", description="Download audio only (extract from video)"),', - ' CmdletArg(name="video", type="flag", alias="v", description="Download video (default if not specified)"),', - ' CmdletArg(name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector (e.g., bestvideo+bestaudio)"),', - ' CmdletArg(name="clip", type="string", description="Extract time range: MM:SS-MM:SS (e.g., 34:03-35:08) or seconds"),', - ' CmdletArg(name="section", type="string", description="Download sections (yt-dlp only): TIME_RANGE[,TIME_RANGE...] (e.g., 1:30-1:35,0:05-0:15)"),', - ' CmdletArg(name="cookies", type="string", description="Path to cookies.txt file for authentication"),', - ' CmdletArg(name="torrent", type="flag", description="Download torrent/magnet via AllDebrid (requires API key in config)"),', - ' CmdletArg(name="wait", type="float", description="Wait time (seconds) for magnet processing timeout"),', - ' CmdletArg(name="background", type="flag", alias="bg", description="Start download in background and return to prompt immediately"),', - ' CmdletArg(name="item", type="string", alias="items", description="Item selection for playlists/formats: use -item N to select format N, or -item to show table for @N selection in next command"),', - ' SharedArgs.STORAGE,', - ' ],', - ' detail=["Download media from url with advanced features.", "", "See help for full usage examples."],', - ' exec=self.run,', - ' )', - ' self.register()', - '', - ' def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:', - ' """Main execution method."""', - ' stage_ctx = pipeline_context.get_stage_context()', - ' in_pipeline = stage_ctx is not None and getattr(stage_ctx, "total_stages", 1) > 1', - ' if in_pipeline and isinstance(config, dict):', - ' config["_quiet_background_output"] = True', - ' return self._run_impl(result, args, config, emit_results=True)', - '', - ' # ' + '='*70, - ' # HELPER METHODS', - ' # ' + '='*70, - '', - ]) - class_added = True - - # Convert top-level helper functions to static methods - if class_added and line and not line[0].isspace() and line.strip().startswith('def _'): - output.append(' @staticmethod') - output.append(f' {line}') - i += 1 - # Copy function body with indentation - while i < len(lines): - next_line = lines[i] - # Stop at next top-level definition - if next_line and not next_line[0].isspace() and (next_line.strip().startswith(('def ', 'class ', 'CMDLET'))): - break - # Add indentation - if next_line.strip(): - output.append(f' {next_line}') - else: - output.append(next_line) - i += 1 - continue - - output.append(line) - i += 1 - - result_text = '\n'.join(output) - - # NOW: Update function calls carefully - # Only update calls in _run_impl, not in nested function definitions - # Pattern: match _func( but NOT when it's after "def " on the same line - helper_funcs = [ - '_download_torrent_worker', '_guess_libgen_title', '_is_libgen_entry', - '_download_libgen_entry', '_libgen_background_worker', - '_start_libgen_background_worker', '_run_pipeline_tail', - '_download_http_background_worker', '_start_http_background_download', - '_parse_torrent_file', '_download_torrent_file', '_is_torrent_file_or_url', - '_process_torrent_input', '_show_playlist_table', '_parse_time_range', - '_parse_section_ranges', '_parse_playlist_selection_indices', - '_select_playlist_entries', '_sanitize_title_for_filename', - '_find_playlist_files_from_entries', '_snapshot_playlist_paths', - '_is_openlibrary_downloadable', '_as_dict', '_is_youtube_url', - ] - - # Split into lines for careful replacement - result_lines = result_text.split('\n') - for idx, line in enumerate(result_lines): - # Skip lines that are function definitions - if 'def ' in line: - continue - # Replace helper function calls with self. - for func in helper_funcs: - # Pattern: _func( with word boundary before - pattern = rf'\b({re.escape(func)})\(' - if re.search(pattern, line): - result_lines[idx] = re.sub(pattern, r'self.\1(', line) - - result_text = '\n'.join(result_lines) - - output_file.write_text(result_text, encoding='utf-8') - print(f"✓ Written: {output_file}") - print(f"✓ Class-based refactor complete") - -if __name__ == '__main__': - refactor_download_data() diff --git a/scripts/refactor_download_data.py b/scripts/refactor_download_data.py deleted file mode 100644 index 557d561..0000000 --- a/scripts/refactor_download_data.py +++ /dev/null @@ -1,131 +0,0 @@ -#!/usr/bin/env python3 -""" -Automated refactoring script for download_data.py -Converts module-level functions to class-based cmdlet pattern. -""" - -import re -from pathlib import Path - -def main(): - backup_file = Path('cmdlets/download_data_backup.py') - output_file = Path('cmdlets/download_data.py') - - print(f"Reading: {backup_file}") - content = backup_file.read_text(encoding='utf-8') - lines = content.split('\n') - - output = [] - i = 0 - in_cmdlet_def = False - skip_old_run_wrapper = False - class_section_added = False - - # Track where to insert class definition - last_import_line = 0 - - while i < len(lines): - line = lines[i] - - # Track imports - if line.strip().startswith(('import ', 'from ')): - last_import_line = len(output) - - # Skip old _run wrapper function - if 'def _run(result: Any' in line: - skip_old_run_wrapper = True - i += 1 - continue - - if skip_old_run_wrapper: - if line and not line[0].isspace(): - skip_old_run_wrapper = False - else: - i += 1 - continue - - # Skip old CMDLET definition - if line.strip().startswith('CMDLET = Cmdlet('): - in_cmdlet_def = True - i += 1 - continue - - if in_cmdlet_def: - if line.strip() == ')': - in_cmdlet_def = False - # Add class instantiation instead - output.append('') - output.append('# Create and register the cmdlet') - output.append('CMDLET = Download_Data()') - output.append('') - i += 1 - continue - - # Insert class definition before first helper function - if not class_section_added and line.strip().startswith('def _download_torrent_worker('): - output.append('') - output.append('') - output.append('class Download_Data(Cmdlet):') - output.append(' """Class-based download-data cmdlet with self-registration."""') - output.append('') - output.append(' # Full __init__ implementation to be added') - output.append(' # Full run() method to be added') - output.append('') - output.append(' # ' + '='*70) - output.append(' # HELPER METHODS') - output.append(' # ' + '='*70) - output.append('') - class_section_added = True - - # Convert top-level helper functions to static methods - if class_section_added and line.strip().startswith('def _') and not line.strip().startswith('def __'): - # Check if this is a top-level function (no indentation) - if not line.startswith((' ', '\t')): - output.append(' @staticmethod') - output.append(f' {line}') - i += 1 - # Copy function body with indentation - while i < len(lines): - next_line = lines[i] - # Stop at next top-level definition - if next_line and not next_line[0].isspace() and (next_line.strip().startswith('def ') or next_line.strip().startswith('class ') or next_line.strip().startswith('CMDLET')): - break - # Add indentation - if next_line.strip(): - output.append(f' {next_line}') - else: - output.append(next_line) - i += 1 - continue - - # Convert _run_impl to method (but keep as-is for now, will be updated later) - if class_section_added and line.strip().startswith('def _run_impl('): - output.append(' def _run_impl(self, result: Any, args: Sequence[str], config: Dict[str, Any], emit_results: bool = True) -> int:') - i += 1 - # Copy function body with indentation - while i < len(lines): - next_line = lines[i] - if next_line and not next_line[0].isspace() and next_line.strip(): - break - if next_line.strip(): - output.append(f' {next_line}') - else: - output.append(next_line) - i += 1 - continue - - output.append(line) - i += 1 - - # Write output - result_text = '\n'.join(output) - output_file.write_text(result_text, encoding='utf-8') - print(f"✓ Written: {output_file}") - print(f"✓ Converted {content.count('def _')} helper functions to static methods") - print("\nNext steps:") - print("1. Add full __init__ method with cmdlet args") - print("2. Add run() method that calls _run_impl") - print("3. Update function calls in _run_impl from _func() to self._func()") - -if __name__ == '__main__': - main() diff --git a/helper/remote_storage_server.py b/scripts/remote_storage_server.py similarity index 93% rename from helper/remote_storage_server.py rename to scripts/remote_storage_server.py index aa0c2e0..8d5ba20 100644 --- a/helper/remote_storage_server.py +++ b/scripts/remote_storage_server.py @@ -57,7 +57,7 @@ from functools import wraps # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent.parent)) -from helper.logger import log +from SYS.logger import log # ============================================================================ # CONFIGURATION @@ -159,8 +159,8 @@ def create_app(): status["storage_path"] = str(STORAGE_PATH) status["storage_exists"] = STORAGE_PATH.exists() try: - from helper.folder_store import FolderDB - with FolderDB(STORAGE_PATH) as db: + from API.folder import API_folder_store + with API_folder_store(STORAGE_PATH) as db: status["database_accessible"] = True except Exception as e: status["database_accessible"] = False @@ -177,7 +177,7 @@ def create_app(): @require_storage() def search_files(): """Search for files by name or tag.""" - from helper.folder_store import LocalLibrarySearchOptimizer + from API.folder import LocalLibrarySearchOptimizer query = request.args.get('q', '') limit = request.args.get('limit', 100, type=int) @@ -205,10 +205,10 @@ def create_app(): @require_storage() def get_file_metadata(file_hash: str): """Get metadata for a specific file by hash.""" - from helper.folder_store import FolderDB + from API.folder import API_folder_store try: - with FolderDB(STORAGE_PATH) as db: + with API_folder_store(STORAGE_PATH) as db: file_path = db.search_hash(file_hash) if not file_path or not file_path.exists(): @@ -233,8 +233,8 @@ def create_app(): @require_storage() def index_file(): """Index a new file in the storage.""" - from helper.folder_store import FolderDB - from helper.utils import sha256_file + from API.folder import API_folder_store + from SYS.utils import sha256_file data = request.get_json() or {} file_path_str = data.get('path') @@ -250,7 +250,7 @@ def create_app(): if not file_path.exists(): return jsonify({"error": "File does not exist"}), 404 - with FolderDB(STORAGE_PATH) as db: + with API_folder_store(STORAGE_PATH) as db: db.get_or_create_file_entry(file_path) if tags: @@ -280,10 +280,10 @@ def create_app(): @require_storage() def get_tags(file_hash: str): """Get tags for a file.""" - from helper.folder_store import FolderDB + from API.folder import API_folder_store try: - with FolderDB(STORAGE_PATH) as db: + with API_folder_store(STORAGE_PATH) as db: file_path = db.search_hash(file_hash) if not file_path: return jsonify({"error": "File not found"}), 404 @@ -299,7 +299,7 @@ def create_app(): @require_storage() def add_tags(file_hash: str): """Add tags to a file.""" - from helper.folder_store import FolderDB + from API.folder import API_folder_store data = request.get_json() or {} tags = data.get('tags', []) @@ -309,7 +309,7 @@ def create_app(): return jsonify({"error": "Tags required"}), 400 try: - with FolderDB(STORAGE_PATH) as db: + with API_folder_store(STORAGE_PATH) as db: file_path = db.search_hash(file_hash) if not file_path: return jsonify({"error": "File not found"}), 404 @@ -328,12 +328,12 @@ def create_app(): @require_storage() def remove_tags(file_hash: str): """Remove tags from a file.""" - from helper.folder_store import FolderDB + from API.folder import API_folder_store tags_str = request.args.get('tags', '') try: - with FolderDB(STORAGE_PATH) as db: + with API_folder_store(STORAGE_PATH) as db: file_path = db.search_hash(file_hash) if not file_path: return jsonify({"error": "File not found"}), 404 @@ -358,10 +358,10 @@ def create_app(): @require_storage() def get_relationships(file_hash: str): """Get relationships for a file.""" - from helper.folder_store import FolderDB + from API.folder import API_folder_store try: - with FolderDB(STORAGE_PATH) as db: + with API_folder_store(STORAGE_PATH) as db: file_path = db.search_hash(file_hash) if not file_path: return jsonify({"error": "File not found"}), 404 @@ -378,7 +378,7 @@ def create_app(): @require_storage() def set_relationship(): """Set a relationship between two files.""" - from helper.folder_store import FolderDB + from API.folder import API_folder_store data = request.get_json() or {} from_hash = data.get('from_hash') @@ -389,7 +389,7 @@ def create_app(): return jsonify({"error": "from_hash and to_hash required"}), 400 try: - with FolderDB(STORAGE_PATH) as db: + with API_folder_store(STORAGE_PATH) as db: from_path = db.search_hash(from_hash) to_path = db.search_hash(to_hash) @@ -411,10 +411,10 @@ def create_app(): @require_storage() def get_url(file_hash: str): """Get known url for a file.""" - from helper.folder_store import FolderDB + from API.folder import API_folder_store try: - with FolderDB(STORAGE_PATH) as db: + with API_folder_store(STORAGE_PATH) as db: file_path = db.search_hash(file_hash) if not file_path: return jsonify({"error": "File not found"}), 404 @@ -431,7 +431,7 @@ def create_app(): @require_storage() def add_url(file_hash: str): """Add url to a file.""" - from helper.folder_store import FolderDB + from API.folder import API_folder_store data = request.get_json() or {} url = data.get('url', []) @@ -440,7 +440,7 @@ def create_app(): return jsonify({"error": "url required"}), 400 try: - with FolderDB(STORAGE_PATH) as db: + with API_folder_store(STORAGE_PATH) as db: file_path = db.search_hash(file_hash) if not file_path: return jsonify({"error": "File not found"}), 404 @@ -509,8 +509,8 @@ def main(): print(f"\n{'='*70}\n") try: - from helper.folder_store import FolderDB - with FolderDB(STORAGE_PATH) as db: + from API.folder import API_folder_store + with API_folder_store(STORAGE_PATH) as db: logger.info("Database initialized successfully") except Exception as e: logger.error(f"Failed to initialize database: {e}") diff --git a/search_file.py b/search_file.py new file mode 100644 index 0000000..797d0a2 --- /dev/null +++ b/search_file.py @@ -0,0 +1,530 @@ +"""Search-file cmdlet: Search for files by query, tags, size, type, duration, etc.""" +from __future__ import annotations + +from typing import Any, Dict, Sequence, List, Optional, Tuple +from pathlib import Path +from dataclasses import dataclass, field +from collections import OrderedDict +import re +import json +import sys + +from SYS.logger import log, debug + +from Provider.registry import get_search_provider + +from cmdlets._shared import Cmdlet, CmdletArg, get_field, should_show_help +import pipeline as ctx + + +def get_origin(obj: Any, default: Any = None) -> Any: + """Return the canonical origin/table identifier from a payload-like object.""" + value = get_field(obj, "origin", None) + if value is not None: + return value + value = get_field(obj, "table", None) + if value is not None: + return value + value = get_field(obj, "store", None) + if value is not None: + return value + return default + +# Optional dependencies +try: + import mutagen # type: ignore +except ImportError: # pragma: no cover + mutagen = None # type: ignore + +try: + from config import get_hydrus_url, resolve_output_dir +except Exception: # pragma: no cover + get_hydrus_url = None # type: ignore + resolve_output_dir = None # type: ignore + +try: + from API.HydrusNetwork import HydrusClient, HydrusRequestError +except ImportError: # pragma: no cover + HydrusClient = None # type: ignore + HydrusRequestError = RuntimeError # type: ignore + +try: + from SYS.utils import sha256_file +except ImportError: # pragma: no cover + sha256_file = None # type: ignore + +try: + from SYS.utils_constant import mime_maps +except ImportError: # pragma: no cover + mime_maps = {} # type: ignore + +@dataclass(slots=True) +class SearchRecord: + path: str + size_bytes: int | None = None + duration_seconds: str | None = None + tags: str | None = None + hash: str | None = None + + def as_dict(self) -> dict[str, str]: + payload: dict[str, str] = {"path": self.path} + if self.size_bytes is not None: + payload["size"] = str(self.size_bytes) + if self.duration_seconds: + payload["duration"] = self.duration_seconds + if self.tags: + payload["tags"] = self.tags + if self.hash: + payload["hash"] = self.hash + return payload + + +@dataclass +class ResultItem: + table: str # Renamed from origin + title: str + detail: str + annotations: List[str] + target: str + media_kind: str = "other" + hash: Optional[str] = None + columns: List[tuple[str, str]] = field(default_factory=list) + tag_summary: Optional[str] = None + duration_seconds: Optional[float] = None + size_bytes: Optional[int] = None + full_metadata: Optional[Dict[str, Any]] = None + tags: Optional[set[str]] = field(default_factory=set) + relationships: Optional[List[str]] = field(default_factory=list) + known_urls: Optional[List[str]] = field(default_factory=list) + + @property + def origin(self) -> str: + return self.table + + def to_dict(self) -> Dict[str, Any]: + payload: Dict[str, Any] = { + "title": self.title, + } + + # Always include these core fields for downstream cmdlets (get-file, download-data, etc) + payload["table"] = self.table + payload["target"] = self.target + payload["media_kind"] = self.media_kind + + # Always include full_metadata if present (needed by download-data, etc) + # This is NOT for display, but for downstream processing + if self.full_metadata: + payload["full_metadata"] = self.full_metadata + + # Include columns if defined (result renderer will use these for display) + if self.columns: + payload["columns"] = list(self.columns) + else: + # If no columns, include the detail for backwards compatibility + payload["detail"] = self.detail + payload["annotations"] = list(self.annotations) + + # Include optional fields + if self.hash: + payload["hash"] = self.hash + if self.tag_summary: + payload["tags"] = self.tag_summary + if self.tags: + payload["tags_set"] = list(self.tags) + if self.relationships: + payload["relationships"] = self.relationships + if self.known_urls: + payload["known_urls"] = self.known_urls + return payload + + +STORAGE_ORIGINS = {"local", "hydrus", "debrid"} + + +class Search_File(Cmdlet): + """Class-based search-file cmdlet with self-registration.""" + + def __init__(self) -> None: + super().__init__( + name="search-file", + summary="Unified search cmdlet for storage (Hydrus, Local) and providers (Debrid, LibGen, OpenLibrary, Soulseek).", + usage="search-file [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-store BACKEND] [-provider PROVIDER]", + arg=[ + CmdletArg("query", description="Search query string"), + CmdletArg("tag", description="Filter by tag (can be used multiple times)"), + CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"), + CmdletArg("type", description="Filter by type: audio, video, image, document"), + CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"), + CmdletArg("limit", type="integer", description="Limit results (default: 45)"), + CmdletArg("store", description="Search storage backend: hydrus, local (default: all searchable storages)"), + CmdletArg("provider", description="Search provider: libgen, openlibrary, soulseek, debrid, local (overrides -storage)"), + ], + detail=[ + "Search across storage (Hydrus, Local) and providers (Debrid, LibGen, OpenLibrary, Soulseek)", + "Use -provider to search a specific source, or -store to search file backends", + "Filter results by: tag, size, type, duration", + "Results can be piped to other commands", + "Examples:", + "search-file foo # Search all file backends", + "search-file -provider libgen 'python programming' # Search LibGen books", + "search-file -provider debrid 'movie' # Search AllDebrid magnets", + "search-file 'music' -provider soulseek # Search Soulseek P2P", + "search-file -provider openlibrary 'tolkien' # Search OpenLibrary", + "search-file song -store hydrus -type audio # Search only Hydrus audio", + "search-file movie -tag action -provider debrid # Debrid with filters", + ], + exec=self.run, + ) + self.register() + + # --- Helper methods ------------------------------------------------- + @staticmethod + def _normalize_extension(ext_value: Any) -> str: + """Sanitize extension strings to alphanumerics and cap at 5 chars.""" + ext = str(ext_value or "").strip().lstrip(".") + for sep in (" ", "|", "(", "[", "{", ",", ";"): + if sep in ext: + ext = ext.split(sep, 1)[0] + break + if "." in ext: + ext = ext.split(".")[-1] + ext = "".join(ch for ch in ext if ch.isalnum()) + return ext[:5] + + def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]: + """Ensure storage results have the necessary fields for result_table display.""" + store_value = str(get_origin(payload, "") or "").lower() + if store_value not in STORAGE_ORIGINS: + return payload + + # Ensure we have title field + if "title" not in payload: + payload["title"] = payload.get("name") or payload.get("target") or payload.get("path") or "Result" + + # Ensure we have ext field + if "ext" not in payload: + title = str(payload.get("title", "")) + path_obj = Path(title) + if path_obj.suffix: + payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip('.')) + else: + payload["ext"] = payload.get("ext", "") + + # Ensure size_bytes is present for display (already set by search_file()) + # result_table will handle formatting it + + # Don't create manual columns - let result_table handle display + # This allows the table to respect max_columns and apply consistent formatting + return payload + + # --- Execution ------------------------------------------------------ + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Search across multiple providers: Hydrus, Local, Debrid, LibGen, etc.""" + if should_show_help(args): + log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}") + return 0 + + args_list = [str(arg) for arg in (args or [])] + + # Parse arguments + query = "" + tag_filters: List[str] = [] + size_filter: Optional[Tuple[str, int]] = None + duration_filter: Optional[Tuple[str, float]] = None + type_filter: Optional[str] = None + storage_backend: Optional[str] = None + provider_name: Optional[str] = None + limit = 45 + searched_backends: List[str] = [] + + i = 0 + while i < len(args_list): + arg = args_list[i] + low = arg.lower() + if low in {"-provider", "--provider"} and i + 1 < len(args_list): + provider_name = args_list[i + 1].lower() + i += 2 + elif low in {"-store", "--store", "-storage", "--storage"} and i + 1 < len(args_list): + storage_backend = args_list[i + 1].lower() + i += 2 + elif low in {"-tag", "--tag"} and i + 1 < len(args_list): + tag_filters.append(args_list[i + 1]) + i += 2 + elif low in {"-limit", "--limit"} and i + 1 < len(args_list): + try: + limit = int(args_list[i + 1]) + except ValueError: + limit = 100 + i += 2 + elif low in {"-type", "--type"} and i + 1 < len(args_list): + type_filter = args_list[i + 1].lower() + i += 2 + elif not arg.startswith("-"): + query = f"{query} {arg}".strip() if query else arg + i += 1 + else: + i += 1 + + store_filter: Optional[str] = None + if query: + match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE) + if match: + store_filter = match.group(1).strip().lower() or None + query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE) + query = re.sub(r"\s{2,}", " ", query) + query = query.strip().strip(',') + + if storage_backend and storage_backend.lower() == "debrid": + log("Use -provider debrid instead of -store debrid (debrid is provider-only)", file=sys.stderr) + return 1 + + if store_filter and not provider_name and not storage_backend: + if store_filter in {"hydrus", "local", "debrid"}: + storage_backend = store_filter + + # --- Feature: Filter provider result table by Name column --- + filter_after_search: Optional[str] = None + if result: + actual_result = result[0] if isinstance(result, list) and result else result + origin = get_origin(actual_result) + target = get_field(actual_result, 'target') + + # If the incoming result is from a provider (not storage) AND this invocation looks like a filter (no flags) + positional_args = [a for a in args_list if not a.startswith('-')] + no_flags = len(positional_args) == len(args_list) + looks_like_filter = no_flags and len(positional_args) == 1 and not provider_name and not storage_backend and not tag_filters and not size_filter and not duration_filter and not type_filter + + if origin and origin.lower() not in STORAGE_ORIGINS and looks_like_filter and query: + # Save the filter string to apply AFTER loading the provider data + filter_after_search = query.strip() + query = "" # Clear query so we load the target URL instead + + # If result is from a provider, extract the target as query and set provider + if not query: + if origin == 'bandcamp' and target: + query = target + if not provider_name: + provider_name = 'bandcamp' + elif origin == 'youtube' and target: + query = target + if not provider_name: + provider_name = 'youtube' + elif target and str(target).startswith(('http://', 'https://')): + query = target + if not provider_name: + if 'bandcamp.com' in target: + provider_name = 'bandcamp' + elif 'youtube.com' in target or 'youtu.be' in target: + provider_name = 'youtube' + + if not query: + log("Provide a search query", file=sys.stderr) + return 1 + + from API.folder import API_folder_store + from config import get_local_storage_path + import uuid + worker_id = str(uuid.uuid4()) + library_root = get_local_storage_path(config or {}) + if not library_root: + log("No library root configured", file=sys.stderr) + return 1 + + db = None + try: + db = API_folder_store(library_root) + db.insert_worker( + worker_id, + "search", + title=f"Search: {query}", + description=f"Query: {query}", + pipe=ctx.get_current_command_text() + ) + + results_list = [] + import result_table + import importlib + importlib.reload(result_table) + from result_table import ResultTable + + table_title = f"Search: {query}" + if provider_name: + table_title += f" [{provider_name}]" + elif storage_backend: + table_title += f" [{storage_backend}]" + + preserve_order = provider_name and provider_name.lower() in ('youtube', 'openlibrary') + # Avoid setting source_command so @N does not re-run search-file; preserve row order when needed + table = ResultTable(table_title).set_preserve_order(preserve_order) + + if provider_name: + debug(f"[search_file] Attempting provider search with: {provider_name}") + provider = get_search_provider(provider_name, config) + if not provider: + log(f"Provider '{provider_name}' not available", file=sys.stderr) + db.update_worker_status(worker_id, 'error') + return 1 + + debug(f"[search_file] Provider loaded, calling search with query: {query}") + search_result = provider.search(query, limit=limit) + debug(f"[search_file] Provider search returned {len(search_result)} results") + + # Apply post-search filter if one was set + if filter_after_search: + debug(f"[search_file] Applying filter: {filter_after_search}") + filtered_result = [] + for item in search_result: + item_dict = item.to_dict() if hasattr(item, 'to_dict') else dict(item) + title_val = get_field(item_dict, 'title') or get_field(item_dict, 'name') or "" + if filter_after_search.lower() in str(title_val).lower(): + filtered_result.append(item) + search_result = filtered_result + if not search_result: + log(f"No results match filter: '{filter_after_search}'", file=sys.stderr) + db.update_worker_status(worker_id, 'completed') + return 0 + debug(f"[search_file] Filter matched {len(search_result)} results") + table.title = f"Filter: {filter_after_search}" + + for item in search_result: + table.add_result(item) + item_dict = item.to_dict() + results_list.append(item_dict) + ctx.emit(item_dict) + + ctx.set_last_result_table(table, results_list) + debug(f"[search_file] Emitted {len(results_list)} results") + db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2)) + db.update_worker_status(worker_id, 'completed') + return 0 + + from Store import Store + storage = Store(config=config or {}, suppress_debug=True) + + backend_to_search = storage_backend or None + if backend_to_search: + if backend_to_search == "hydrus": + from API.HydrusNetwork import is_hydrus_available + if not is_hydrus_available(config or {}): + log(f"Backend 'hydrus' is not available (Hydrus service not running)", file=sys.stderr) + db.update_worker_status(worker_id, 'error') + return 1 + searched_backends.append(backend_to_search) + target_backend = storage[backend_to_search] + results = target_backend.search_store(query, limit=limit) + else: + from API.HydrusNetwork import is_hydrus_available + hydrus_available = is_hydrus_available(config or {}) + + all_results = [] + for backend_name in storage.list_searchable_backends(): + if backend_name == "hydrus" and not hydrus_available: + continue + searched_backends.append(backend_name) + try: + backend_results = storage[backend_name].search_store(query, limit=limit - len(all_results)) + if backend_results: + all_results.extend(backend_results) + if len(all_results) >= limit: + break + except Exception as exc: + log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr) + results = all_results[:limit] + + if not provider_name and not storage_backend: + try: + debrid_provider = get_search_provider("debrid", config) + if debrid_provider and debrid_provider.validate(): + remaining = max(0, limit - len(results)) if isinstance(results, list) else limit + if remaining > 0: + debrid_results = debrid_provider.search(query, limit=remaining) + if debrid_results: + if "debrid" not in searched_backends: + searched_backends.append("debrid") + if results is None: + results = [] + results.extend(debrid_results) + except Exception as exc: + log(f"Debrid provider search failed: {exc}", file=sys.stderr) + + def _format_storage_label(name: str) -> str: + clean = str(name or "").strip() + if not clean: + return "Unknown" + return clean.replace("_", " ").title() + + storage_counts: OrderedDict[str, int] = OrderedDict((name, 0) for name in searched_backends) + for item in results or []: + origin = get_origin(item) + if not origin: + continue + key = str(origin).lower() + if key not in storage_counts: + storage_counts[key] = 0 + storage_counts[key] += 1 + + if storage_counts or query: + display_counts = OrderedDict((_format_storage_label(name), count) for name, count in storage_counts.items()) + summary_line = table.set_storage_summary(display_counts, query, inline=True) + if summary_line: + table.title = summary_line + + if results: + for item in results: + def _as_dict(obj: Any) -> Dict[str, Any]: + if isinstance(obj, dict): + return dict(obj) + if hasattr(obj, "to_dict") and callable(getattr(obj, "to_dict")): + return obj.to_dict() # type: ignore[arg-type] + return {"title": str(obj)} + + item_dict = _as_dict(item) + if store_filter: + origin_val = str(get_origin(item_dict) or "").lower() + if store_filter != origin_val: + continue + normalized = self._ensure_storage_columns(item_dict) + + # Make hash/store available for downstream cmdlets without rerunning search-file + hash_val = normalized.get("hash") + store_val = normalized.get("store") or get_origin(item_dict) + if hash_val and not normalized.get("hash"): + normalized["hash"] = hash_val + if store_val and not normalized.get("store"): + normalized["store"] = store_val + + table.add_result(normalized) + + results_list.append(normalized) + ctx.emit(normalized) + + ctx.set_last_result_table(table, results_list) + db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2)) + else: + log("No results found", file=sys.stderr) + db.append_worker_stdout(worker_id, json.dumps([], indent=2)) + + db.update_worker_status(worker_id, 'completed') + return 0 + + except Exception as exc: + log(f"Search failed: {exc}", file=sys.stderr) + import traceback + traceback.print_exc(file=sys.stderr) + if db: + try: + db.update_worker_status(worker_id, 'error') + except Exception: + pass + return 1 + + finally: + if db: + try: + db.close() + except Exception: + pass + + +CMDLET = Search_File() + diff --git a/test/0e6509a4c01cd6e4584a4d5b335a4bce196d51c5a73a988cabdd152efa5e6a89/SaveTwitter.Net_lrO5QUBSiiHiGidl_(480p).mp4 b/test/0e6509a4c01cd6e4584a4d5b335a4bce196d51c5a73a988cabdd152efa5e6a89/SaveTwitter.Net_lrO5QUBSiiHiGidl_(480p).mp4 deleted file mode 100644 index 8c9eb5c..0000000 Binary files a/test/0e6509a4c01cd6e4584a4d5b335a4bce196d51c5a73a988cabdd152efa5e6a89/SaveTwitter.Net_lrO5QUBSiiHiGidl_(480p).mp4 and /dev/null differ diff --git a/test/medios-macina.db b/test/medios-macina.db deleted file mode 100644 index b6176ef..0000000 Binary files a/test/medios-macina.db and /dev/null differ diff --git a/test/yapping.m4a b/test/yapping.m4a deleted file mode 100644 index 390f666..0000000 Binary files a/test/yapping.m4a and /dev/null differ diff --git a/test/yapping.m4a.metadata b/test/yapping.m4a.metadata deleted file mode 100644 index 044f709..0000000 --- a/test/yapping.m4a.metadata +++ /dev/null @@ -1 +0,0 @@ -hash:00beb438e3c02cdc0340526deb0c51f916ffd6330259be4f350009869c5448d9 diff --git a/test/yapping.m4a.tag b/test/yapping.m4a.tag deleted file mode 100644 index 99c9383..0000000 --- a/test/yapping.m4a.tag +++ /dev/null @@ -1 +0,0 @@ -title:yapping