From 85750247cc105cb6e219e9826abb9dced7c5ec8a Mon Sep 17 00:00:00 2001 From: nose Date: Fri, 12 Dec 2025 21:55:38 -0800 Subject: [PATCH] dfdfdf --- API/HydrusNetwork.py | 48 +- API/alldebrid.py | 4 +- API/archive_client.py | 2 +- API/folder.py | 125 +- CLI.py | 76 +- MPV/LUA/main.lua | 29 + MPV/lyric.py | 1195 +++++++++++ MPV/mpv_ipc.py | 210 +- Provider/__init__.py | 2 +- Provider/bandcamp.py | 2 +- Provider/libgen.py | 479 ++++- Provider/libgen_service.py | 523 ----- Provider/matrix.py | 2 +- Provider/openlibrary.py | 358 ++++ Provider/query_parser.py | 159 -- Provider/soulseek.py | 2 +- Provider/unified_book_downloader.py | 707 ------- Provider/youtube.py | 2 +- Provider/zeroxzero.py | 2 +- ProviderCore/__init__.py | 5 + Provider/_base.py => ProviderCore/base.py | 0 ProviderCore/download.py | 42 + {Provider => ProviderCore}/registry.py | 4 +- Store/Folder.py | 73 + Store/HydrusNetwork.py | 100 +- Store/_base.py | 18 + TUI/modalscreen/download.py | 16 +- TUI/modalscreen/search.py | 64 +- TUI/pipeline_runner.py | 2 +- cli_syntax.py | 166 ++ {cmdlets => cmdlet}/__init__.py | 36 +- {cmdlets => cmdlet}/_shared.py | 38 +- {cmdlets => cmdlet}/add_file.py | 6 +- cmdlet/add_note.py | 148 ++ {cmdlets => cmdlet}/add_relationship.py | 43 +- {cmdlets => cmdlet}/add_tag.py | 61 +- cmdlet/add_tags.py | 456 ++++ {cmdlets => cmdlet}/add_url.py | 0 {cmdlets => cmdlet}/catalog.py | 37 +- {cmdlets => cmdlet}/check_file_status.py | 10 +- {cmdlets => cmdlet}/cleanup.py | 8 +- {cmdlets => cmdlet}/delete_file.py | 0 cmdlet/delete_note.py | 140 ++ {cmdlets => cmdlet}/delete_relationship.py | 17 +- {cmdlets => cmdlet}/delete_tag.py | 23 +- {cmdlets => cmdlet}/delete_url.py | 0 {cmdlets => cmdlet}/download_file.py | 2 +- {cmdlets => cmdlet}/download_media.py | 120 +- {cmdlets => cmdlet}/download_torrent.py | 0 {cmdlets => cmdlet}/get_file.py | 0 {cmdlets => cmdlet}/get_metadata.py | 0 cmdlet/get_note.py | 143 ++ {cmdlets => cmdlet}/get_relationship.py | 6 +- {cmdlets => cmdlet}/get_tag.py | 2 +- {cmdlets => cmdlet}/get_url.py | 0 {cmdlets => cmdlet}/merge_file.py | 201 +- {cmdlets => cmdlet}/screen_shot.py | 46 +- {cmdlets => cmdlet}/search_provider.py | 4 +- {cmdlets => cmdlet}/search_store.py | 2 +- {cmdlets => cmdlet}/trim_file.py | 7 +- cmdlets/add_note.py | 106 - cmdlets/delete_note.py | 102 - cmdlets/get_note.py | 66 - {cmdnats => cmdnat}/__init__.py | 0 {cmdnats => cmdnat}/adjective.json | 0 {cmdnats => cmdnat}/adjective.py | 4 +- {cmdnats => cmdnat}/config.py | 2 +- {cmdnats => cmdnat}/help.py | 8 +- {cmdnats => cmdnat}/matrix.py | 2 +- cmdnat/pipe.py | 1486 +++++++++++++ {cmdnats => cmdnat}/worker.py | 4 +- cmdnats/pipe.py | 1228 ----------- helper/search_provider.py | 2215 -------------------- metadata.py | 219 +- pipeline.py | 16 +- pyproject.toml | 2 +- scripts/remote_storage_server.py | 2 +- search_file.py | 530 ----- 78 files changed, 5726 insertions(+), 6239 deletions(-) create mode 100644 MPV/lyric.py delete mode 100644 Provider/libgen_service.py create mode 100644 Provider/openlibrary.py delete mode 100644 Provider/query_parser.py delete mode 100644 Provider/unified_book_downloader.py create mode 100644 ProviderCore/__init__.py rename Provider/_base.py => ProviderCore/base.py (100%) create mode 100644 ProviderCore/download.py rename {Provider => ProviderCore}/registry.py (95%) create mode 100644 cli_syntax.py rename {cmdlets => cmdlet}/__init__.py (64%) rename {cmdlets => cmdlet}/_shared.py (98%) rename {cmdlets => cmdlet}/add_file.py (99%) create mode 100644 cmdlet/add_note.py rename {cmdlets => cmdlet}/add_relationship.py (95%) rename {cmdlets => cmdlet}/add_tag.py (91%) create mode 100644 cmdlet/add_tags.py rename {cmdlets => cmdlet}/add_url.py (100%) rename {cmdlets => cmdlet}/catalog.py (88%) rename {cmdlets => cmdlet}/check_file_status.py (96%) rename {cmdlets => cmdlet}/cleanup.py (97%) rename {cmdlets => cmdlet}/delete_file.py (100%) create mode 100644 cmdlet/delete_note.py rename {cmdlets => cmdlet}/delete_relationship.py (95%) rename {cmdlets => cmdlet}/delete_tag.py (96%) rename {cmdlets => cmdlet}/delete_url.py (100%) rename {cmdlets => cmdlet}/download_file.py (99%) rename {cmdlets => cmdlet}/download_media.py (95%) rename {cmdlets => cmdlet}/download_torrent.py (100%) rename {cmdlets => cmdlet}/get_file.py (100%) rename {cmdlets => cmdlet}/get_metadata.py (100%) create mode 100644 cmdlet/get_note.py rename {cmdlets => cmdlet}/get_relationship.py (99%) rename {cmdlets => cmdlet}/get_tag.py (99%) rename {cmdlets => cmdlet}/get_url.py (100%) rename {cmdlets => cmdlet}/merge_file.py (86%) rename {cmdlets => cmdlet}/screen_shot.py (96%) rename {cmdlets => cmdlet}/search_provider.py (98%) rename {cmdlets => cmdlet}/search_store.py (99%) rename {cmdlets => cmdlet}/trim_file.py (99%) delete mode 100644 cmdlets/add_note.py delete mode 100644 cmdlets/delete_note.py delete mode 100644 cmdlets/get_note.py rename {cmdnats => cmdnat}/__init__.py (100%) rename {cmdnats => cmdnat}/adjective.json (100%) rename {cmdnats => cmdnat}/adjective.py (98%) rename {cmdnats => cmdnat}/config.py (98%) rename {cmdnats => cmdnat}/help.py (96%) rename {cmdnats => cmdnat}/matrix.py (93%) create mode 100644 cmdnat/pipe.py rename {cmdnats => cmdnat}/worker.py (99%) delete mode 100644 cmdnats/pipe.py delete mode 100644 helper/search_provider.py delete mode 100644 search_file.py diff --git a/API/HydrusNetwork.py b/API/HydrusNetwork.py index 233e971..fc82647 100644 --- a/API/HydrusNetwork.py +++ b/API/HydrusNetwork.py @@ -388,25 +388,55 @@ class HydrusNetwork: results[file_hash] = self._post("/add_url/associate_url", data=body) return {"batched": results} - def set_notes(self, file_hashes: Union[str, Iterable[str]], notes: dict[str, str], service_name: str) -> dict[str, Any]: + def set_notes( + self, + file_hash: str, + notes: dict[str, str], + *, + merge_cleverly: bool = False, + extend_existing_note_if_possible: bool = True, + conflict_resolution: int = 3, + ) -> dict[str, Any]: + """Add or update notes associated with a file. + + Hydrus Client API: POST /add_notes/set_notes + Required JSON args: {"hash": , "notes": {name: text}} + """ if not notes: raise ValueError("notes mapping must not be empty") - hashes = self._ensure_hashes(file_hashes) - body = {"hashes": hashes, "service_names_to_notes": {service_name: notes}} + + file_hash = str(file_hash or "").strip().lower() + if not file_hash: + raise ValueError("file_hash must not be empty") + + body: dict[str, Any] = {"hash": file_hash, "notes": notes} + + if merge_cleverly: + body["merge_cleverly"] = True + body["extend_existing_note_if_possible"] = bool(extend_existing_note_if_possible) + body["conflict_resolution"] = int(conflict_resolution) return self._post("/add_notes/set_notes", data=body) def delete_notes( self, - file_hashes: Union[str, Iterable[str]], + file_hash: str, note_names: Sequence[str], - service_name: str, ) -> dict[str, Any]: - names = [name for name in note_names if name] + """Delete notes associated with a file. + + Hydrus Client API: POST /add_notes/delete_notes + Required JSON args: {"hash": , "note_names": [..]} + """ + names = [str(name) for name in note_names if str(name or "").strip()] if not names: raise ValueError("note_names must not be empty") - hashes = self._ensure_hashes(file_hashes) - body = {"hashes": hashes, "service_names_to_deleted_note_names": {service_name: names}} - return self._post("/add_notes/set_notes", data=body) + + file_hash = str(file_hash or "").strip().lower() + if not file_hash: + raise ValueError("file_hash must not be empty") + + body = {"hash": file_hash, "note_names": names} + return self._post("/add_notes/delete_notes", data=body) def get_file_relationships(self, file_hash: str) -> dict[str, Any]: query = {"hash": file_hash} diff --git a/API/alldebrid.py b/API/alldebrid.py index 3f91b16..37e2712 100644 --- a/API/alldebrid.py +++ b/API/alldebrid.py @@ -804,7 +804,7 @@ def unlock_link_cmdlet(result: Any, args: Sequence[str], config: Dict[str, Any]) def _register_unlock_link(): """Register unlock-link command with cmdlet registry if available.""" try: - from cmdlets import register + from cmdlet import register @register(["unlock-link"]) def unlock_link_wrapper(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: @@ -821,7 +821,7 @@ def _register_unlock_link(): return unlock_link_wrapper except ImportError: - # If cmdlets module not available, just return None + # If cmdlet module not available, just return None return None diff --git a/API/archive_client.py b/API/archive_client.py index 3278e80..91ec609 100644 --- a/API/archive_client.py +++ b/API/archive_client.py @@ -7,7 +7,7 @@ This module provides low-level functions for interacting with Archive.org: - Image downloading and deobfuscation - PDF creation with metadata -Used by unified_book_downloader.py for the borrowing workflow. +Used by Provider/openlibrary.py for the borrowing workflow. """ from __future__ import annotations diff --git a/API/folder.py b/API/folder.py index ed84b61..8c3fd4c 100644 --- a/API/folder.py +++ b/API/folder.py @@ -231,11 +231,13 @@ class API_folder_store: cursor.execute(""" CREATE TABLE IF NOT EXISTS notes ( - hash TEXT PRIMARY KEY NOT NULL, + hash TEXT NOT NULL, + name TEXT NOT NULL, note TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - FOREIGN KEY (hash) REFERENCES files(hash) ON DELETE CASCADE + FOREIGN KEY (hash) REFERENCES files(hash) ON DELETE CASCADE, + PRIMARY KEY (hash, name) ) """) @@ -261,6 +263,11 @@ class API_folder_store: cursor.execute("CREATE INDEX IF NOT EXISTS idx_worker_type ON worker(worker_type)") self._migrate_metadata_schema(cursor) + self._migrate_notes_schema(cursor) + + # Notes indices (after migration so columns exist) + cursor.execute("CREATE INDEX IF NOT EXISTS idx_notes_hash ON notes(hash)") + cursor.execute("CREATE INDEX IF NOT EXISTS idx_notes_name ON notes(name)") self.connection.commit() logger.debug("Database tables created/verified") @@ -448,6 +455,42 @@ class API_folder_store: self.connection.commit() except Exception as e: logger.debug(f"Note: Schema import/migration completed with status: {e}") + + def _migrate_notes_schema(self, cursor) -> None: + """Migrate legacy notes schema (hash PRIMARY KEY, note) to named notes (hash,name PRIMARY KEY).""" + try: + cursor.execute("PRAGMA table_info(notes)") + cols = [row[1] for row in cursor.fetchall()] + if not cols: + return + if "name" in cols: + return + + logger.info("Migrating legacy notes table to named notes schema") + cursor.execute(""" + CREATE TABLE IF NOT EXISTS notes_new ( + hash TEXT NOT NULL, + name TEXT NOT NULL, + note TEXT, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (hash) REFERENCES files(hash) ON DELETE CASCADE, + PRIMARY KEY (hash, name) + ) + """) + + # Copy existing notes into the default key + cursor.execute(""" + INSERT INTO notes_new (hash, name, note, created_at, updated_at) + SELECT hash, 'default', note, created_at, updated_at + FROM notes + """) + + cursor.execute("DROP TABLE notes") + cursor.execute("ALTER TABLE notes_new RENAME TO notes") + self.connection.commit() + except Exception as exc: + logger.debug(f"Notes schema migration skipped/failed: {exc}") def _update_metadata_modified_time(self, file_hash: str) -> None: """Update the time_modified timestamp for a file's metadata.""" @@ -1052,40 +1095,78 @@ class API_folder_store: return [] def get_note(self, file_hash: str) -> Optional[str]: - """Get note for a file by hash.""" + """Get the default note for a file by hash.""" try: - cursor = self.connection.cursor() - - cursor.execute(""" - SELECT n.note FROM notes n - WHERE n.hash = ? - """, (file_hash,)) - - row = cursor.fetchone() - return row[0] if row else None + notes = self.get_notes(file_hash) + if not notes: + return None + return notes.get("default") except Exception as e: logger.error(f"Error getting note for hash {file_hash}: {e}", exc_info=True) return None + + def get_notes(self, file_hash: str) -> Dict[str, str]: + """Get all notes for a file by hash.""" + try: + cursor = self.connection.cursor() + cursor.execute( + "SELECT name, note FROM notes WHERE hash = ? ORDER BY name ASC", + (file_hash,), + ) + out: Dict[str, str] = {} + for name, note in cursor.fetchall() or []: + if not name: + continue + out[str(name)] = str(note or "") + return out + except Exception as e: + logger.error(f"Error getting notes for hash {file_hash}: {e}", exc_info=True) + return {} def save_note(self, file_path: Path, note: str) -> None: - """Save note for a file.""" + """Save the default note for a file.""" + self.set_note(file_path, "default", note) + + def set_note(self, file_path: Path, name: str, note: str) -> None: + """Set a named note for a file.""" try: + note_name = str(name or "").strip() + if not note_name: + raise ValueError("Note name is required") + file_hash = self.get_or_create_file_entry(file_path) cursor = self.connection.cursor() - - cursor.execute(""" - INSERT INTO notes (hash, note) - VALUES (?, ?) - ON CONFLICT(hash) DO UPDATE SET + cursor.execute( + """ + INSERT INTO notes (hash, name, note) + VALUES (?, ?, ?) + ON CONFLICT(hash, name) DO UPDATE SET note = excluded.note, updated_at = CURRENT_TIMESTAMP - """, (file_hash, note)) - + """, + (file_hash, note_name, note), + ) self.connection.commit() - logger.debug(f"Saved note for {file_path}") + logger.debug(f"Saved note '{note_name}' for {file_path}") except Exception as e: logger.error(f"Error saving note for {file_path}: {e}", exc_info=True) raise + + def delete_note(self, file_hash: str, name: str) -> None: + """Delete a named note for a file by hash.""" + try: + note_name = str(name or "").strip() + if not note_name: + raise ValueError("Note name is required") + cursor = self.connection.cursor() + cursor.execute( + "DELETE FROM notes WHERE hash = ? AND name = ?", + (file_hash, note_name), + ) + self.connection.commit() + except Exception as e: + logger.error(f"Error deleting note '{name}' for hash {file_hash}: {e}", exc_info=True) + raise def search_by_tag(self, tag: str, limit: int = 100) -> List[tuple]: """Search for files with a specific tag. Returns list of (hash, file_path) tuples.""" @@ -2027,7 +2108,7 @@ def migrate_tags_to_db(library_root: Path, db: API_folder_store) -> int: try: for tags_file in library_root.rglob("*.tag"): try: - base_path = tags_file.with_suffix("") + base_path = tags_file.with_suffix("") tags_text = tags_file.read_text(encoding='utf-8') tags = [line.strip() for line in tags_text.splitlines() if line.strip()] diff --git a/CLI.py b/CLI.py index a6025ce..71ef3d1 100644 --- a/CLI.py +++ b/CLI.py @@ -68,7 +68,7 @@ from typing import Callable from config import get_local_storage_path, load_config -from cmdlets.catalog import ( +from cmdlet.catalog import ( import_cmd_module as _catalog_import_cmd_module, list_cmdlet_metadata as _catalog_list_cmdlet_metadata, list_cmdlet_names as _catalog_list_cmdlet_names, @@ -482,7 +482,7 @@ def _get_cmdlet_names() -> List[str]: def _import_cmd_module(mod_name: str): - """Import a cmdlet/native module from cmdlets or cmdnats packages.""" + """Import a cmdlet/native module from cmdlet or cmdnat packages.""" try: return _catalog_import_cmd_module(mod_name) except Exception: @@ -518,7 +518,7 @@ def _get_arg_choices(cmd_name: str, arg_name: str) -> List[str]: # Dynamic search providers if normalized_arg == "provider": try: - from Provider.registry import list_search_providers + from ProviderCore.registry import list_search_providers providers = list_search_providers(_load_cli_config()) available = [name for name, is_ready in providers.items() if is_ready] provider_choices = sorted(available) if available else sorted(providers.keys()) @@ -607,9 +607,23 @@ if ( return arg_names = _get_cmdlet_args(cmd_name) + logical_seen: Set[str] = set() for arg in arg_names: - if arg.lower().startswith(current_token): + arg_low = arg.lower() + + # If the user has only typed '-', prefer single-dash flags (e.g. -url) + # and avoid suggesting both -name and --name for the same logical arg. + if current_token == "-" and arg_low.startswith("--"): + continue + + logical = arg.lstrip("-").lower() + if current_token == "-" and logical in logical_seen: + continue + + if arg_low.startswith(current_token): yield CompletionType(arg, start_position=-len(current_token)) + if current_token == "-": + logical_seen.add(logical) if "--help".startswith(current_token): yield CompletionType("--help", start_position=-len(current_token)) @@ -715,10 +729,21 @@ def _create_cmdlet_cli(): print(f"Error parsing seeds JSON: {e}") return + try: + from cli_syntax import validate_pipeline_text + syntax_error = validate_pipeline_text(command) + if syntax_error: + print(syntax_error.message, file=sys.stderr) + return + except Exception: + # Best-effort only; if validator can't load, fall back to shlex handling below. + pass + try: tokens = shlex.split(command) - except ValueError: - tokens = command.split() + except ValueError as exc: + print(f"Syntax error: {exc}", file=sys.stderr) + return if not tokens: return @@ -728,7 +753,7 @@ def _create_cmdlet_cli(): @app.command("repl") def repl(): - """Start interactive REPL for cmdlets with autocomplete.""" + """Start interactive REPL for cmdlet with autocomplete.""" banner = """ Medeia-Macina ===================== @@ -967,11 +992,22 @@ def _create_cmdlet_cli(): except Exception: pipeline_ctx_ref = None + try: + from cli_syntax import validate_pipeline_text + syntax_error = validate_pipeline_text(user_input) + if syntax_error: + print(syntax_error.message, file=sys.stderr) + continue + except Exception: + # Best-effort only; if validator can't load, continue with shlex. + pass + try: import shlex tokens = shlex.split(user_input) - except ValueError: - tokens = user_input.split() + except ValueError as exc: + print(f"Syntax error: {exc}", file=sys.stderr) + continue if not tokens: continue @@ -1078,12 +1114,12 @@ def _create_cmdlet_cli(): def _execute_pipeline(tokens: list): - """Execute a pipeline of cmdlets separated by pipes (|). + """Execute a pipeline of cmdlet separated by pipes (|). Example: cmd1 arg1 arg2 | cmd2 arg2 | cmd3 arg3 """ try: - from cmdlets import REGISTRY + from cmdlet import REGISTRY import json import pipeline as ctx @@ -1333,7 +1369,7 @@ def _execute_pipeline(tokens: list): filtered = [resolved_items[i] for i in first_stage_selection_indices if 0 <= i < len(resolved_items)] if filtered: # Convert filtered items to PipeObjects for consistent pipeline handling - from cmdlets._shared import coerce_to_pipe_object + from cmdlet._shared import coerce_to_pipe_object filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered] piped_result = filtered_pipe_objs if len(filtered_pipe_objs) > 1 else filtered_pipe_objs[0] # Build log message with proper string conversion @@ -1529,7 +1565,7 @@ def _execute_pipeline(tokens: list): filtered = [resolved_list[i] for i in selection_indices if 0 <= i < len(resolved_list)] if filtered: # Convert filtered items to PipeObjects for consistent pipeline handling - from cmdlets._shared import coerce_to_pipe_object + from cmdlet._shared import coerce_to_pipe_object filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered] piped_result = filtered_pipe_objs if len(filtered_pipe_objs) > 1 else filtered_pipe_objs[0] print(f"Selected {len(filtered)} item(s) using {cmd_name}") @@ -1817,13 +1853,13 @@ def _execute_cmdlet(cmd_name: str, args: list): - @{1,3,5} - select rows 1, 3, 5 """ try: - from cmdlets import REGISTRY + from cmdlet import REGISTRY import json import pipeline as ctx - # Ensure native commands (cmdnats) are loaded + # Ensure native commands (cmdnat) are loaded try: - from cmdlets.catalog import ensure_registry_loaded as _ensure_registry_loaded + from cmdlet.catalog import ensure_registry_loaded as _ensure_registry_loaded _ensure_registry_loaded() except Exception: pass @@ -1832,7 +1868,7 @@ def _execute_cmdlet(cmd_name: str, args: list): cmd_fn = REGISTRY.get(cmd_name) if not cmd_fn: # Attempt lazy import of the module and retry - from cmdlets.catalog import import_cmd_module as _catalog_import + from cmdlet.catalog import import_cmd_module as _catalog_import try: mod = _catalog_import(cmd_name) data = getattr(mod, "CMDLET", None) if mod else None @@ -1893,7 +1929,7 @@ def _execute_cmdlet(cmd_name: str, args: list): # Filter to selected indices only result = [piped_items[idx] for idx in selected_indices if 0 <= idx < len(piped_items)] else: - # No selection specified, pass all items (cmdlets handle lists via normalize_result_input) + # No selection specified, pass all items (cmdlet handle lists via normalize_result_input) result = piped_items worker_manager = _ensure_worker_manager(config) @@ -2038,10 +2074,10 @@ def _execute_cmdlet(cmd_name: str, args: list): def _show_cmdlet_list(): - """Display available cmdlets with full metadata: cmd:name alias:aliases args:args.""" + """Display available cmdlet with full metadata: cmd:name alias:aliases args:args.""" try: metadata = _catalog_list_cmdlet_metadata() - print("\nAvailable cmdlets:") + print("\nAvailable cmdlet:") for cmd_name in sorted(metadata.keys()): info = metadata[cmd_name] aliases = info.get("aliases", []) diff --git a/MPV/LUA/main.lua b/MPV/LUA/main.lua index 514a9f8..706cee6 100644 --- a/MPV/LUA/main.lua +++ b/MPV/LUA/main.lua @@ -4,6 +4,31 @@ local msg = require 'mp.msg' local M = {} +-- Lyrics overlay toggle +-- The Python helper (python -m MPV.lyric) will read this property via IPC. +local LYRIC_VISIBLE_PROP = "user-data/medeia-lyric-visible" + +local function lyric_get_visible() + local ok, v = pcall(mp.get_property_native, LYRIC_VISIBLE_PROP) + if not ok or v == nil then + return true + end + return v and true or false +end + +local function lyric_set_visible(v) + pcall(mp.set_property_native, LYRIC_VISIBLE_PROP, v and true or false) +end + +local function lyric_toggle() + local now = not lyric_get_visible() + lyric_set_visible(now) + mp.osd_message("Lyrics: " .. (now and "on" or "off"), 1) +end + +-- Default to visible unless user overrides. +lyric_set_visible(true) + -- Configuration local opts = { python_path = "python", @@ -138,4 +163,8 @@ mp.add_key_binding("mbtn_right", "medios-menu-right-click", M.show_menu) mp.add_key_binding("ctrl+i", "medios-info", M.get_file_info) mp.add_key_binding("ctrl+del", "medios-delete", M.delete_current_file) +-- Lyrics toggle (requested: 'L') +mp.add_key_binding("l", "medeia-lyric-toggle", lyric_toggle) +mp.add_key_binding("L", "medeia-lyric-toggle-shift", lyric_toggle) + return M diff --git a/MPV/lyric.py b/MPV/lyric.py new file mode 100644 index 0000000..d402ed0 --- /dev/null +++ b/MPV/lyric.py @@ -0,0 +1,1195 @@ +r"""Timed lyric overlay for mpv via JSON IPC. + +This is intentionally implemented from scratch (no vendored/copied code) while +providing the same *kind* of functionality as popular mpv lyric scripts: +- Parse LRC (timestamped lyrics) +- Track mpv playback time via IPC +- Show the current line on mpv's OSD + +Primary intended usage in this repo: +- Auto mode (no stdin / no --lrc): loads lyrics from store notes. + A lyric note is stored under the note name 'lyric'. +- If the lyric note is missing, auto mode will attempt to auto-fetch synced lyrics + from a public API (LRCLIB) and store it into the 'lyric' note. + You can disable this by setting config key `lyric_autofetch` to false. +- You can still pipe LRC into this script (stdin) and it will render lyrics in mpv. + +Example (PowerShell): + Get-Content .\song.lrc | python -m MPV.lyric + +If you want to connect to a non-default mpv IPC server: + Get-Content .\song.lrc | python -m MPV.lyric --ipc "\\.\pipe\mpv-custom" +""" + +from __future__ import annotations + +import argparse +import bisect +import hashlib +import os +import re +import sys +import tempfile +import time +from dataclasses import dataclass +from datetime import datetime +from pathlib import Path +from typing import Any, Dict, List, Optional, TextIO +from urllib.parse import parse_qs, unquote, urlencode +from urllib.request import Request, urlopen +from urllib.parse import urlparse + +from MPV.mpv_ipc import MPV, MPVIPCClient + + +_TIMESTAMP_RE = re.compile(r"\[(?P\d+):(?P\d{2})(?:\.(?P\d{1,3}))?\]") +_OFFSET_RE = re.compile(r"^\[offset:(?P[+-]?\d+)\]$", re.IGNORECASE) +_HASH_RE = re.compile(r"[0-9a-f]{64}", re.IGNORECASE) +_HYDRUS_HASH_QS_RE = re.compile(r"hash=([0-9a-f]{64})", re.IGNORECASE) + +_WIN_DRIVE_RE = re.compile(r"^[a-zA-Z]:[\\/]") +_WIN_UNC_RE = re.compile(r"^\\\\") + + +_LOG_FH: Optional[TextIO] = None +_SINGLE_INSTANCE_LOCK_FH: Optional[TextIO] = None + + +_LYRIC_VISIBLE_PROP = "user-data/medeia-lyric-visible" + +# mpv osd-overlay IDs are scoped to the IPC client connection. +# MPV.lyric keeps a persistent connection, so we can safely reuse a constant ID. +_LYRIC_OSD_OVERLAY_ID = 4242 + + +def _single_instance_lock_path(ipc_path: str) -> Path: + # Key the lock to the mpv IPC target so multiple mpv instances with different + # IPC servers can still run independent lyric helpers. + key = hashlib.sha1((ipc_path or "").encode("utf-8", errors="ignore")).hexdigest() + tmp_dir = Path(tempfile.gettempdir()) + return (tmp_dir / f"medeia-mpv-lyric-{key}.lock").resolve() + + +def _acquire_single_instance_lock(ipc_path: str) -> bool: + """Ensure only one MPV.lyric process runs per IPC server. + + This prevents duplicate overlays (e.g. one old show-text overlay + one new osd-overlay). + """ + global _SINGLE_INSTANCE_LOCK_FH + + if _SINGLE_INSTANCE_LOCK_FH is not None: + return True + + lock_path = _single_instance_lock_path(ipc_path) + lock_path.parent.mkdir(parents=True, exist_ok=True) + + try: + fh = open(lock_path, "a", encoding="utf-8", errors="replace") + except Exception: + # If we can't create the lock file, don't block playback; just proceed. + return True + + try: + if os.name == "nt": + import msvcrt + + # Lock the first byte (non-blocking). + msvcrt.locking(fh.fileno(), msvcrt.LK_NBLCK, 1) + else: + import fcntl + + fcntl.flock(fh.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB) + + _SINGLE_INSTANCE_LOCK_FH = fh + try: + fh.write(f"pid={os.getpid()} ipc={ipc_path}\n") + fh.flush() + except Exception: + pass + return True + except Exception: + try: + fh.close() + except Exception: + pass + return False + + +def _ass_escape(text: str) -> str: + # Escape braces/backslashes so lyric text can't break ASS formatting. + t = str(text or "") + t = t.replace("\\", "\\\\") + t = t.replace("{", "\\{") + t = t.replace("}", "\\}") + t = t.replace("\r\n", "\n").replace("\r", "\n") + t = t.replace("\n", "\\N") + return t + + +def _format_lyric_as_subtitle(text: str) -> str: + # Bottom-center like a subtitle (ASS alignment 2). + # NOTE: show-text escapes ASS by default; we use osd-overlay so this is honored. + return "{\\an2}" + _ass_escape(text) + + +def _osd_overlay_set_ass(client: MPVIPCClient, ass_text: str) -> Optional[dict]: + # Use osd-overlay with ass-events so ASS override tags (e.g. {\an2}) are applied. + # Keep z low so UI scripts (like uosc) can draw above it if they use higher z. + return client.send_command( + { + "command": { + "name": "osd-overlay", + "id": _LYRIC_OSD_OVERLAY_ID, + "format": "ass-events", + "data": ass_text, + "res_y": 720, + "z": -50, + } + } + ) + + +def _osd_overlay_clear(client: MPVIPCClient) -> None: + client.send_command({"command": {"name": "osd-overlay", "id": _LYRIC_OSD_OVERLAY_ID, "format": "none"}}) + + +def _log(msg: str) -> None: + line = f"[{datetime.now().isoformat(timespec='seconds')}] {msg}" + try: + if _LOG_FH is not None: + _LOG_FH.write(line + "\n") + _LOG_FH.flush() + return + except Exception: + pass + + print(line, file=sys.stderr, flush=True) + + +def _ipc_get_property( + client: MPVIPCClient, + name: str, + default: object = None, + *, + raise_on_disconnect: bool = False, +) -> object: + resp = client.send_command({"command": ["get_property", name]}) + if resp is None: + if raise_on_disconnect: + raise ConnectionError("Lost mpv IPC connection") + return default + if resp and resp.get("error") == "success": + return resp.get("data", default) + return default +def _http_get_json(url: str, *, timeout_s: float = 10.0) -> Optional[dict]: + try: + req = Request( + url, + headers={ + "User-Agent": "medeia-macina/lyric", + "Accept": "application/json", + }, + method="GET", + ) + with urlopen(req, timeout=timeout_s) as resp: + data = resp.read() + import json + + obj = json.loads(data.decode("utf-8", errors="replace")) + return obj if isinstance(obj, dict) else None + except Exception as exc: + _log(f"HTTP JSON failed: {exc} ({url})") + return None + + +def _http_get_json_list(url: str, *, timeout_s: float = 10.0) -> Optional[list]: + try: + req = Request( + url, + headers={ + "User-Agent": "medeia-macina/lyric", + "Accept": "application/json", + }, + method="GET", + ) + with urlopen(req, timeout=timeout_s) as resp: + data = resp.read() + import json + + obj = json.loads(data.decode("utf-8", errors="replace")) + return obj if isinstance(obj, list) else None + except Exception as exc: + _log(f"HTTP JSON(list) failed: {exc} ({url})") + return None + + +def _sanitize_query(s: Optional[str]) -> Optional[str]: + if not isinstance(s, str): + return None + t = s.strip().strip("\ufeff") + return t if t else None + + +def _infer_artist_title_from_tags(tags: List[str]) -> tuple[Optional[str], Optional[str]]: + artist = None + title = None + for t in tags or []: + ts = str(t) + low = ts.lower() + if low.startswith("artist:") and artist is None: + artist = ts.split(":", 1)[1].strip() or None + elif low.startswith("title:") and title is None: + title = ts.split(":", 1)[1].strip() or None + if artist and title: + break + return _sanitize_query(artist), _sanitize_query(title) + + +def _wrap_plain_lyrics_as_lrc(text: str) -> str: + # Fallback: create a crude LRC that advances every 4 seconds. + # This is intentionally simple and deterministic. + lines = [ln.strip() for ln in (text or "").splitlines()] + lines = [ln for ln in lines if ln] + if not lines: + return "" + out: List[str] = [] + t_s = 0 + for ln in lines: + mm = t_s // 60 + ss = t_s % 60 + out.append(f"[{mm:02d}:{ss:02d}.00]{ln}") + t_s += 4 + return "\n".join(out) + "\n" + + +def _fetch_lrclib(*, artist: Optional[str], title: Optional[str], duration_s: Optional[float] = None) -> Optional[str]: + base = "https://lrclib.net/api" + + # Require both artist and title; title-only lookups cause frequent mismatches. + if not artist or not title: + return None + + # Try direct get. + q: Dict[str, str] = { + "artist_name": artist, + "track_name": title, + } + if isinstance(duration_s, (int, float)) and duration_s and duration_s > 0: + q["duration"] = str(int(duration_s)) + url = f"{base}/get?{urlencode(q)}" + obj = _http_get_json(url) + if isinstance(obj, dict): + synced = obj.get("syncedLyrics") + if isinstance(synced, str) and synced.strip(): + _log("LRCLIB: got syncedLyrics") + return synced + plain = obj.get("plainLyrics") + if isinstance(plain, str) and plain.strip(): + _log("LRCLIB: only plainLyrics; wrapping") + wrapped = _wrap_plain_lyrics_as_lrc(plain) + return wrapped if wrapped.strip() else None + + # Fallback: search using artist+title only. + q_text = f"{artist} {title}" + url = f"{base}/search?{urlencode({'q': q_text})}" + items = _http_get_json_list(url) or [] + for item in items: + if not isinstance(item, dict): + continue + synced = item.get("syncedLyrics") + if isinstance(synced, str) and synced.strip(): + _log("LRCLIB: search hit with syncedLyrics") + return synced + # Plain lyrics fallback from search if available + for item in items: + if not isinstance(item, dict): + continue + plain = item.get("plainLyrics") + if isinstance(plain, str) and plain.strip(): + _log("LRCLIB: search hit only plainLyrics; wrapping") + wrapped = _wrap_plain_lyrics_as_lrc(plain) + return wrapped if wrapped.strip() else None + + return None + + +def _fetch_lyrics_ovh(*, artist: Optional[str], title: Optional[str]) -> Optional[str]: + # Public, no-auth lyrics provider (typically plain lyrics, not time-synced). + if not artist or not title: + return None + try: + # Endpoint uses path segments, so we urlencode each part. + from urllib.parse import quote + + url = f"https://api.lyrics.ovh/v1/{quote(artist)}/{quote(title)}" + obj = _http_get_json(url) + if not isinstance(obj, dict): + return None + lyr = obj.get("lyrics") + if isinstance(lyr, str) and lyr.strip(): + _log("lyrics.ovh: got plain lyrics; wrapping") + wrapped = _wrap_plain_lyrics_as_lrc(lyr) + return wrapped if wrapped.strip() else None + except Exception as exc: + _log(f"lyrics.ovh failed: {exc}") + return None + try: + print(line, file=sys.stderr) + except Exception: + pass + + +@dataclass(frozen=True) +class LrcLine: + time_s: float + text: str + + +def _frac_to_ms(frac: str) -> int: + # LRC commonly uses centiseconds (2 digits), but can be 1–3 digits. + if not frac: + return 0 + if len(frac) == 3: + return int(frac) + if len(frac) == 2: + return int(frac) * 10 + return int(frac) * 100 + + +def parse_lrc(text: str) -> List[LrcLine]: + """Parse LRC into sorted timestamped lines.""" + offset_ms = 0 + lines: List[LrcLine] = [] + + for raw_line in text.splitlines(): + line = raw_line.strip("\ufeff\r\n") + if not line: + continue + + # Optional global offset. + off_m = _OFFSET_RE.match(line) + if off_m: + try: + offset_ms = int(off_m.group("ms")) + except Exception: + offset_ms = 0 + continue + + matches = list(_TIMESTAMP_RE.finditer(line)) + if not matches: + # Ignore non-timestamp metadata lines like [ar:], [ti:], etc. + continue + + lyric_text = line[matches[-1].end() :].strip() + for m in matches: + mm = int(m.group("m")) + ss = int(m.group("s")) + frac = m.group("frac") or "" + ts_ms = (mm * 60 + ss) * 1000 + _frac_to_ms(frac) + offset_ms + if ts_ms < 0: + continue + lines.append(LrcLine(time_s=ts_ms / 1000.0, text=lyric_text)) + + # Sort and de-dupe by timestamp (prefer last non-empty text). + lines.sort(key=lambda x: x.time_s) + deduped: List[LrcLine] = [] + for item in lines: + if deduped and abs(deduped[-1].time_s - item.time_s) < 1e-6: + if item.text: + deduped[-1] = item + else: + deduped.append(item) + return deduped + + +def _read_all_stdin() -> str: + return sys.stdin.read() + + +def _current_index(time_s: float, times: List[float]) -> int: + # Index of last timestamp <= time_s + return bisect.bisect_right(times, time_s) - 1 + + +def _unwrap_memory_m3u(text: Optional[str]) -> Optional[str]: + """Extract the real target URL/path from a memory:// M3U payload.""" + if not isinstance(text, str) or not text.startswith("memory://"): + return text + for line in text.splitlines(): + s = line.strip() + if not s or s.startswith("#") or s.startswith("memory://"): + continue + return s + return text + + +def _extract_hash_from_target(target: str) -> Optional[str]: + if not isinstance(target, str): + return None + m = _HYDRUS_HASH_QS_RE.search(target) + if m: + return m.group(1).lower() + + # Fallback: plain hash string + s = target.strip().lower() + if _HASH_RE.fullmatch(s): + return s + return None + + +def _load_config_best_effort() -> dict: + try: + from config import load_config + + cfg = load_config() + return cfg if isinstance(cfg, dict) else {} + except Exception: + return {} + + +def _extract_lrc_from_notes(notes: Dict[str, str]) -> Optional[str]: + """Return raw LRC text from the note named 'lyric'.""" + if not isinstance(notes, dict) or not notes: + return None + + raw = None + for k, v in notes.items(): + if not isinstance(k, str): + continue + if k.strip() == "lyric": + raw = v + break + + if not isinstance(raw, str): + return None + + text = raw.strip("\ufeff\r\n") + return text if text.strip() else None + + +def _is_stream_target(target: str) -> bool: + """Return True when mpv's 'path' is not a local filesystem file. + + We intentionally treat any URL/streaming scheme as invalid for lyrics in auto mode. + """ + if not isinstance(target, str): + return False + s = target.strip() + if not s: + return False + + # Windows local paths: drive letter or UNC. + if _WIN_DRIVE_RE.match(s) or _WIN_UNC_RE.match(s): + return False + + # Common streaming prefixes. + if s.startswith("http://") or s.startswith("https://"): + return True + + # Generic scheme:// (e.g. ytdl://, edl://, rtmp://, etc.). + if "://" in s: + try: + parsed = urlparse(s) + scheme = (parsed.scheme or "").lower() + if scheme and scheme not in {"file"}: + return True + except Exception: + return True + + return False + + +def _normalize_file_uri_target(target: str) -> str: + """Convert file:// URIs to a local filesystem path string when possible.""" + if not isinstance(target, str): + return target + s = target.strip() + if not s: + return target + if not s.lower().startswith("file://"): + return target + + try: + parsed = urlparse(s) + path = unquote(parsed.path or "") + + if os.name == "nt": + # UNC: file://server/share/path -> \\server\share\path + if parsed.netloc: + p = path.replace("/", "\\") + if p.startswith("\\"): + p = p.lstrip("\\") + return f"\\\\{parsed.netloc}\\{p}" if p else f"\\\\{parsed.netloc}" + + # Drive letter: file:///C:/path -> C:/path + if path.startswith("/") and len(path) >= 3 and path[2] == ":": + path = path[1:] + + return path or target + except Exception: + return target + + +def _extract_store_from_url_target(target: str) -> Optional[str]: + """Extract explicit store name from a URL query param `store=...` (if present).""" + if not isinstance(target, str): + return None + s = target.strip() + if not (s.startswith("http://") or s.startswith("https://")): + return None + try: + parsed = urlparse(s) + if not parsed.query: + return None + qs = parse_qs(parsed.query) + raw = qs.get("store", [None])[0] + if isinstance(raw, str) and raw.strip(): + return raw.strip() + except Exception: + return None + return None + + +def _infer_hydrus_store_from_url_target(*, target: str, config: dict) -> Optional[str]: + """Infer a Hydrus store backend by matching the URL prefix to the backend base URL.""" + if not isinstance(target, str): + return None + s = target.strip() + if not (s.startswith("http://") or s.startswith("https://")): + return None + + try: + from Store import Store as StoreRegistry + + reg = StoreRegistry(config, suppress_debug=True) + backends = [(name, reg[name]) for name in reg.list_backends()] + except Exception: + return None + + matches: List[str] = [] + for name, backend in backends: + if type(backend).__name__ != "HydrusNetwork": + continue + base_url = getattr(backend, "_url", None) + if not base_url: + client = getattr(backend, "_client", None) + base_url = getattr(client, "url", None) if client else None + if not base_url: + continue + base = str(base_url).rstrip("/") + if s.startswith(base): + matches.append(name) + + if len(matches) == 1: + return matches[0] + return None + + +def _resolve_store_backend_for_target( + *, + target: str, + file_hash: str, + config: dict, +) -> tuple[Optional[str], Any]: + """Resolve a store backend for a local mpv target using the store DB. + + A target is considered valid only when: + - target is a local filesystem file + - a backend's get_file(hash) returns a local file path + - that path resolves to the same target path + """ + try: + p = Path(target) + if not p.exists() or not p.is_file(): + return None, None + target_resolved = p.resolve() + except Exception: + return None, None + + try: + from Store import Store as StoreRegistry + + reg = StoreRegistry(config, suppress_debug=True) + backend_names = list(reg.list_backends()) + except Exception: + return None, None + + # Prefer the inferred Folder store (fast), but still validate via get_file(). + preferred = _infer_store_for_target(target=target, config=config) + if preferred and preferred in backend_names: + backend_names.remove(preferred) + backend_names.insert(0, preferred) + + for name in backend_names: + try: + backend = reg[name] + except Exception: + continue + + store_file = None + try: + store_file = backend.get_file(file_hash, config=config) + except TypeError: + try: + store_file = backend.get_file(file_hash) + except Exception: + store_file = None + except Exception: + store_file = None + + if not store_file: + continue + + # Only accept local files; if the backend returns a URL, it's not valid for lyrics. + try: + store_path = Path(str(store_file)).expanduser() + if not store_path.exists() or not store_path.is_file(): + continue + if store_path.resolve() != target_resolved: + continue + except Exception: + continue + + return name, backend + + return None, None + + +def _infer_store_for_target(*, target: str, config: dict) -> Optional[str]: + """Infer store name from the current mpv target (local path under a folder root). + + Note: URLs/streams are intentionally not mapped to stores for lyrics. + """ + if isinstance(target, str) and _is_stream_target(target): + return None + + try: + from Store import Store as StoreRegistry + + reg = StoreRegistry(config, suppress_debug=True) + backends = [(name, reg[name]) for name in reg.list_backends()] + except Exception: + backends = [] + + # Local file path: choose the deepest Folder root that contains it. + try: + p = Path(target) + if not p.exists() or not p.is_file(): + return None + p_str = str(p.resolve()).lower() + except Exception: + return None + + best: Optional[str] = None + best_len = -1 + for name, backend in backends: + if type(backend).__name__ != "Folder": + continue + root = None + try: + root = getattr(backend, "_location", None) or getattr(backend, "location", lambda: None)() + except Exception: + root = None + if not root: + continue + try: + root_path = Path(str(root)).expanduser().resolve() + root_str = str(root_path).lower().rstrip("\\/") + except Exception: + continue + + if p_str.startswith(root_str) and len(root_str) > best_len: + best = name + best_len = len(root_str) + + return best + + +def _infer_hash_for_target(target: str) -> Optional[str]: + """Infer SHA256 hash from Hydrus URL query, hash-named local files, or by hashing local file content.""" + h = _extract_hash_from_target(target) + if h: + return h + + try: + p = Path(target) + if not p.exists() or not p.is_file(): + return None + stem = p.stem + if isinstance(stem, str) and _HASH_RE.fullmatch(stem.strip()): + return stem.strip().lower() + from SYS.utils import sha256_file + + return sha256_file(p) + except Exception: + return None + + +def run_auto_overlay(*, mpv: MPV, poll_s: float = 0.15, config: Optional[dict] = None) -> int: + """Auto mode: track mpv's current file and render lyrics from store notes (note name: 'lyric').""" + cfg = config or {} + + client = mpv.client() + if not client.connect(): + _log("mpv IPC is not reachable (is mpv running with --input-ipc-server?).") + return 3 + + _log(f"Auto overlay connected (ipc={getattr(mpv, 'ipc_path', None)})") + + last_target: Optional[str] = None + current_store_name: Optional[str] = None + current_file_hash: Optional[str] = None + current_key: Optional[str] = None + current_backend: Optional[Any] = None + last_loaded_key: Optional[str] = None + last_fetch_attempt_key: Optional[str] = None + last_fetch_attempt_at: float = 0.0 + + entries: List[LrcLine] = [] + times: List[float] = [] + last_idx: Optional[int] = None + last_text: Optional[str] = None + last_visible: Optional[bool] = None + + while True: + try: + # Toggle support (mpv Lua script sets this property; default to visible). + visible_raw = _ipc_get_property(client, _LYRIC_VISIBLE_PROP, True, raise_on_disconnect=True) + raw_path = _ipc_get_property(client, "path", None, raise_on_disconnect=True) + except ConnectionError: + try: + _osd_overlay_clear(client) + except Exception: + pass + try: + client.disconnect() + except Exception: + pass + if not client.connect(): + _log("mpv IPC disconnected; exiting MPV.lyric") + return 4 + time.sleep(poll_s) + continue + + visible = bool(visible_raw) if isinstance(visible_raw, (bool, int)) else True + + if last_visible is None: + last_visible = visible + elif last_visible is True and visible is False: + # Clear immediately when switching off. + try: + _osd_overlay_clear(client) + except Exception: + pass + last_idx = None + last_text = None + last_visible = visible + elif last_visible is False and visible is True: + # Force a refresh on next tick. + last_idx = None + last_text = None + last_visible = visible + else: + last_visible = visible + + target = _unwrap_memory_m3u(str(raw_path)) if isinstance(raw_path, str) else None + if isinstance(target, str): + target = _normalize_file_uri_target(target) + + if not isinstance(target, str) or not target: + time.sleep(poll_s) + continue + + is_http = target.startswith("http://") or target.startswith("https://") + if (not is_http) and _is_stream_target(target): + # Non-http streams (ytdl://, edl://, rtmp://, etc.) are never valid for lyrics. + if last_loaded_key is not None: + try: + _osd_overlay_clear(client) + except Exception: + pass + last_target = target + current_store_name = None + current_file_hash = None + current_key = None + current_backend = None + entries = [] + times = [] + last_loaded_key = None + time.sleep(poll_s) + continue + + if target != last_target: + last_target = target + last_idx = None + last_text = None + + _log(f"Target changed: {target}") + + current_file_hash = _infer_hash_for_target(target) + if not current_file_hash: + entries = [] + times = [] + if last_loaded_key is not None: + _osd_overlay_clear(client) + last_loaded_key = None + time.sleep(poll_s) + continue + + if is_http: + # HTTP/HTTPS targets are only valid if they map to a store backend. + store_from_url = _extract_store_from_url_target(target) + store_name = store_from_url or _infer_hydrus_store_from_url_target(target=target, config=cfg) + if not store_name: + _log("HTTP target has no store mapping; lyrics disabled") + current_store_name = None + current_backend = None + current_key = None + entries = [] + times = [] + if last_loaded_key is not None: + _osd_overlay_clear(client) + last_loaded_key = None + time.sleep(poll_s) + continue + + try: + from Store import Store as StoreRegistry + + reg = StoreRegistry(cfg, suppress_debug=True) + current_backend = reg[store_name] + current_store_name = store_name + except Exception: + _log(f"HTTP target store {store_name!r} not available; lyrics disabled") + current_store_name = None + current_backend = None + current_key = None + entries = [] + times = [] + if last_loaded_key is not None: + _osd_overlay_clear(client) + last_loaded_key = None + time.sleep(poll_s) + continue + + # Optional existence check: if metadata is unavailable, treat as not-a-store-item. + try: + meta = current_backend.get_metadata(current_file_hash, config=cfg) + except Exception: + meta = None + if meta is None: + _log(f"HTTP target not found in store DB (store={store_name!r} hash={current_file_hash}); lyrics disabled") + current_store_name = None + current_backend = None + current_key = None + entries = [] + times = [] + if last_loaded_key is not None: + _osd_overlay_clear(client) + last_loaded_key = None + time.sleep(poll_s) + continue + + current_key = f"{current_store_name}:{current_file_hash}" + _log(f"Resolved store={current_store_name!r} hash={current_file_hash!r} valid=True") + + else: + # Local files: resolve store item via store DB. If not resolvable, lyrics are disabled. + current_store_name, current_backend = _resolve_store_backend_for_target( + target=target, + file_hash=current_file_hash, + config=cfg, + ) + current_key = f"{current_store_name}:{current_file_hash}" if current_store_name and current_file_hash else None + + _log(f"Resolved store={current_store_name!r} hash={current_file_hash!r} valid={bool(current_key)}") + + if not current_key or not current_backend: + current_store_name = None + current_backend = None + current_key = None + entries = [] + times = [] + if last_loaded_key is not None: + _osd_overlay_clear(client) + last_loaded_key = None + time.sleep(poll_s) + continue + + # Load/reload lyrics when we have a resolvable key and it differs from what we loaded. + # This is important for the autofetch path: the note can appear without the mpv target changing. + if current_key and current_key != last_loaded_key and current_store_name and current_file_hash and current_backend: + notes: Dict[str, str] = {} + try: + notes = current_backend.get_note(current_file_hash, config=cfg) or {} + except Exception: + notes = {} + + try: + _log(f"Loaded notes keys: {sorted([str(k) for k in notes.keys()]) if isinstance(notes, dict) else 'N/A'}") + except Exception: + _log("Loaded notes keys: ") + + lrc_text = _extract_lrc_from_notes(notes) + if not lrc_text: + _log("No lyric note found (note name: 'lyric')") + + # Auto-fetch path: fetch and persist lyrics into the note named 'lyric'. + # Throttle attempts per key to avoid hammering APIs. + autofetch_enabled = bool(cfg.get("lyric_autofetch", True)) + now = time.time() + if autofetch_enabled and current_key != last_fetch_attempt_key and (now - last_fetch_attempt_at) > 2.0: + last_fetch_attempt_key = current_key + last_fetch_attempt_at = now + + artist = None + title = None + duration_s = None + try: + duration_s = _ipc_get_property(client, "duration", None) + except Exception: + duration_s = None + + # Use store tags only (artist:/title:). No filename/metadata/media-title fallbacks. + try: + tags, _src = current_backend.get_tag(current_file_hash, config=cfg) + if isinstance(tags, list): + artist, title = _infer_artist_title_from_tags([str(x) for x in tags]) + except Exception: + pass + + _log(f"Autofetch query artist={artist!r} title={title!r} duration={duration_s!r}") + + if not artist or not title: + _log("Autofetch skipped: requires both artist and title") + fetched = None + else: + fetched = _fetch_lrclib( + artist=artist, + title=title, + duration_s=float(duration_s) if isinstance(duration_s, (int, float)) else None, + ) + if not fetched or not fetched.strip(): + fetched = _fetch_lyrics_ovh(artist=artist, title=title) + if fetched and fetched.strip(): + try: + ok = bool(current_backend.set_note(current_file_hash, "lyric", fetched, config=cfg)) + _log(f"Autofetch stored lyric note ok={ok}") + # Next loop iteration will re-load the note. + except Exception as exc: + _log(f"Autofetch failed to store lyric note: {exc}") + else: + _log("Autofetch: no lyrics found") + + entries = [] + times = [] + if last_loaded_key is not None: + _osd_overlay_clear(client) + last_loaded_key = None + else: + _log(f"Loaded lyric note ({len(lrc_text)} chars)") + + parsed = parse_lrc(lrc_text) + entries = parsed + times = [e.time_s for e in entries] + last_loaded_key = current_key + + try: + # mpv returns None when idle/no file. + t = _ipc_get_property(client, "time-pos", None, raise_on_disconnect=True) + except ConnectionError: + try: + _osd_overlay_clear(client) + except Exception: + pass + try: + client.disconnect() + except Exception: + pass + if not client.connect(): + _log("mpv IPC disconnected; exiting MPV.lyric") + return 4 + time.sleep(poll_s) + continue + + if not isinstance(t, (int, float)): + time.sleep(poll_s) + continue + + if not entries: + time.sleep(poll_s) + continue + + if not visible: + time.sleep(poll_s) + continue + + idx = _current_index(float(t), times) + + if idx < 0: + time.sleep(poll_s) + continue + + line = entries[idx] + if idx != last_idx or line.text != last_text: + # osd-overlay has no duration; refresh periodically. + resp = _osd_overlay_set_ass(client, _format_lyric_as_subtitle(line.text)) + if resp is None: + client.disconnect() + if not client.connect(): + print("Lost mpv IPC connection.", file=sys.stderr) + return 4 + elif isinstance(resp, dict) and resp.get("error") not in (None, "success"): + try: + _log(f"mpv osd-overlay returned error={resp.get('error')!r}") + except Exception: + pass + last_idx = idx + last_text = line.text + + time.sleep(poll_s) + + +def run_overlay(*, mpv: MPV, entries: List[LrcLine], poll_s: float = 0.15) -> int: + if not entries: + print("No timestamped LRC lines found.", file=sys.stderr) + return 2 + + times = [e.time_s for e in entries] + last_idx: Optional[int] = None + last_text: Optional[str] = None + + client = mpv.client() + if not client.connect(): + print("mpv IPC is not reachable (is mpv running with --input-ipc-server?).", file=sys.stderr) + return 3 + + while True: + try: + # mpv returns None when idle/no file. + t = _ipc_get_property(client, "time-pos", None, raise_on_disconnect=True) + except ConnectionError: + try: + _osd_overlay_clear(client) + except Exception: + pass + try: + client.disconnect() + except Exception: + pass + if not client.connect(): + print("Lost mpv IPC connection.", file=sys.stderr) + return 4 + time.sleep(poll_s) + continue + + if not isinstance(t, (int, float)): + time.sleep(poll_s) + continue + + idx = _current_index(float(t), times) + + if idx < 0: + # Before first lyric timestamp. + time.sleep(poll_s) + continue + + line = entries[idx] + if idx != last_idx or line.text != last_text: + # osd-overlay has no duration; refresh periodically. + resp = _osd_overlay_set_ass(client, _format_lyric_as_subtitle(line.text)) + if resp is None: + client.disconnect() + if not client.connect(): + print("Lost mpv IPC connection.", file=sys.stderr) + return 4 + elif isinstance(resp, dict) and resp.get("error") not in (None, "success"): + try: + _log(f"mpv osd-overlay returned error={resp.get('error')!r}") + except Exception: + pass + last_idx = idx + last_text = line.text + + time.sleep(poll_s) + + + +def main(argv: Optional[List[str]] = None) -> int: + parser = argparse.ArgumentParser(prog="python -m MPV.lyric", add_help=True) + parser.add_argument( + "--ipc", + default=None, + help="mpv IPC path. Defaults to the repo's fixed IPC pipe name.", + ) + parser.add_argument( + "--lrc", + default=None, + help="Path to an .lrc file. If omitted, reads LRC from stdin.", + ) + parser.add_argument( + "--poll", + type=float, + default=0.15, + help="Polling interval in seconds for time-pos updates.", + ) + parser.add_argument( + "--log", + default=None, + help="Optional path to a log file for diagnostics.", + ) + + args = parser.parse_args(argv) + + # Configure logging early. + global _LOG_FH + if args.log: + try: + log_path = Path(str(args.log)).expanduser().resolve() + log_path.parent.mkdir(parents=True, exist_ok=True) + _LOG_FH = open(log_path, "a", encoding="utf-8", errors="replace") + _log("MPV.lyric starting") + except Exception: + _LOG_FH = None + + mpv = MPV(ipc_path=args.ipc) if args.ipc else MPV() + + # Prevent multiple lyric helpers from running at once for the same mpv IPC. + if not _acquire_single_instance_lock(getattr(mpv, "ipc_path", "") or ""): + _log("Another MPV.lyric instance is already running for this IPC; exiting.") + return 0 + + # If --lrc is provided, use it. + if args.lrc: + with open(args.lrc, "r", encoding="utf-8", errors="replace") as f: + lrc_text = f.read() + entries = parse_lrc(lrc_text) + try: + return run_overlay(mpv=mpv, entries=entries, poll_s=float(args.poll)) + except KeyboardInterrupt: + return 0 + + # Otherwise: if stdin has content, treat it as LRC; if stdin is empty/TTY, auto-discover. + lrc_text = "" + try: + if not sys.stdin.isatty(): + lrc_text = _read_all_stdin() or "" + except Exception: + lrc_text = "" + + if lrc_text.strip(): + entries = parse_lrc(lrc_text) + try: + return run_overlay(mpv=mpv, entries=entries, poll_s=float(args.poll)) + except KeyboardInterrupt: + return 0 + + cfg = _load_config_best_effort() + try: + return run_auto_overlay(mpv=mpv, poll_s=float(args.poll), config=cfg) + except KeyboardInterrupt: + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/MPV/mpv_ipc.py b/MPV/mpv_ipc.py index 4e8a899..ed7fa73 100644 --- a/MPV/mpv_ipc.py +++ b/MPV/mpv_ipc.py @@ -12,6 +12,7 @@ import os import platform import socket import subprocess +import sys import time as _time from pathlib import Path from typing import Any, Dict, Optional, List, BinaryIO, cast @@ -24,6 +25,88 @@ FIXED_IPC_PIPE_NAME = "mpv-medeia-macina" MPV_LUA_SCRIPT_PATH = str(Path(__file__).resolve().parent / "LUA" / "main.lua") +_LYRIC_PROCESS: Optional[subprocess.Popen] = None +_LYRIC_LOG_FH: Optional[Any] = None + + +def _windows_list_lyric_helper_pids(ipc_path: str) -> List[int]: + """Return PIDs of `python -m MPV.lyric --ipc ` helpers (Windows only).""" + if platform.system() != "Windows": + return [] + try: + ipc_path = str(ipc_path or "") + except Exception: + ipc_path = "" + if not ipc_path: + return [] + + # Use CIM to query command lines; output as JSON for robust parsing. + # Note: `ConvertTo-Json` returns a number for single item, array for many, or null. + ps_script = ( + "$ipc = " + + json.dumps(ipc_path) + + "; " + "Get-CimInstance Win32_Process | " + "Where-Object { $_.CommandLine -and $_.CommandLine -match ' -m\\s+MPV\\.lyric(\\s|$)' -and $_.CommandLine -match ('--ipc\\s+' + [regex]::Escape($ipc)) } | " + "Select-Object -ExpandProperty ProcessId | ConvertTo-Json -Compress" + ) + + try: + out = subprocess.check_output( + ["powershell", "-NoProfile", "-Command", ps_script], + stdin=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=2, + text=True, + ) + except Exception: + return [] + + txt = (out or "").strip() + if not txt or txt == "null": + return [] + try: + obj = json.loads(txt) + except Exception: + return [] + + pids: List[int] = [] + if isinstance(obj, list): + for v in obj: + try: + pids.append(int(v)) + except Exception: + pass + else: + try: + pids.append(int(obj)) + except Exception: + pass + + # De-dupe and filter obvious junk. + uniq: List[int] = [] + for pid in pids: + if pid and pid > 0 and pid not in uniq: + uniq.append(pid) + return uniq + + +def _windows_kill_pids(pids: List[int]) -> None: + if platform.system() != "Windows": + return + for pid in pids or []: + try: + subprocess.run( + ["taskkill", "/PID", str(int(pid)), "/F"], + stdin=subprocess.DEVNULL, + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + timeout=2, + ) + except Exception: + continue + + class MPVIPCError(Exception): """Raised when MPV IPC communication fails.""" pass @@ -38,7 +121,7 @@ class MPV: - Query playlist and currently playing item via IPC This class intentionally stays "dumb": it does not implement app logic. - App behavior is driven by cmdlets (e.g. `.pipe`) and the bundled Lua script. + App behavior is driven by cmdlet (e.g. `.pipe`) and the bundled Lua script. """ def __init__( @@ -55,11 +138,11 @@ class MPV: lua_path = Path(str(lua_script_path)).resolve() self.lua_script_path = str(lua_path) - def client(self) -> "MPVIPCClient": - return MPVIPCClient(socket_path=self.ipc_path, timeout=self.timeout) + def client(self, silent: bool = False) -> "MPVIPCClient": + return MPVIPCClient(socket_path=self.ipc_path, timeout=self.timeout, silent=bool(silent)) def is_running(self) -> bool: - client = self.client() + client = self.client(silent=True) try: ok = client.connect() return bool(ok) @@ -67,7 +150,7 @@ class MPV: client.disconnect() def send(self, command: Dict[str, Any] | List[Any], silent: bool = False) -> Optional[Dict[str, Any]]: - client = self.client() + client = self.client(silent=bool(silent)) try: if not client.connect(): return None @@ -136,9 +219,109 @@ class MPV: except Exception: return + def ensure_lyric_loader_running(self) -> None: + """Start (or keep) the Python lyric overlay helper. + + Uses the fixed IPC pipe name so it can follow playback. + """ + global _LYRIC_PROCESS, _LYRIC_LOG_FH + + # Cross-session guard (Windows): avoid spawning multiple helpers across separate CLI runs. + # Also clean up stale helpers when mpv isn't running anymore. + if platform.system() == "Windows": + try: + existing = _windows_list_lyric_helper_pids(str(self.ipc_path)) + if existing: + if not self.is_running(): + _windows_kill_pids(existing) + return + # If multiple exist, kill them and start fresh (prevents double overlays). + if len(existing) == 1: + return + _windows_kill_pids(existing) + except Exception: + pass + + try: + if _LYRIC_PROCESS is not None and _LYRIC_PROCESS.poll() is None: + return + except Exception: + pass + + try: + if _LYRIC_PROCESS is not None: + try: + _LYRIC_PROCESS.terminate() + except Exception: + pass + finally: + _LYRIC_PROCESS = None + try: + if _LYRIC_LOG_FH is not None: + _LYRIC_LOG_FH.close() + except Exception: + pass + _LYRIC_LOG_FH = None + + try: + try: + tmp_dir = Path(os.environ.get("TEMP") or os.environ.get("TMP") or ".") + except Exception: + tmp_dir = Path(".") + log_path = str((tmp_dir / "medeia-mpv-lyric.log").resolve()) + + # Ensure the module can be imported even when the app is launched from a different cwd. + # Repo root = parent of the MPV package directory. + try: + repo_root = Path(__file__).resolve().parent.parent + except Exception: + repo_root = Path.cwd() + + cmd: List[str] = [ + sys.executable, + "-m", + "MPV.lyric", + "--ipc", + str(self.ipc_path), + "--log", + log_path, + ] + + # Redirect helper stdout/stderr to the log file so we can see crashes/import errors. + try: + _LYRIC_LOG_FH = open(log_path, "a", encoding="utf-8", errors="replace") + except Exception: + _LYRIC_LOG_FH = None + + kwargs: Dict[str, Any] = { + "stdin": subprocess.DEVNULL, + "stdout": _LYRIC_LOG_FH or subprocess.DEVNULL, + "stderr": _LYRIC_LOG_FH or subprocess.DEVNULL, + } + + # Ensure immediate flushing to the log file. + env = os.environ.copy() + env["PYTHONUNBUFFERED"] = "1" + try: + existing_pp = env.get("PYTHONPATH") + env["PYTHONPATH"] = str(repo_root) if not existing_pp else (str(repo_root) + os.pathsep + str(existing_pp)) + except Exception: + pass + kwargs["env"] = env + + # Make the current directory the repo root so `-m MPV.lyric` resolves reliably. + kwargs["cwd"] = str(repo_root) + if platform.system() == "Windows": + kwargs["creationflags"] = 0x00000008 # DETACHED_PROCESS + + _LYRIC_PROCESS = subprocess.Popen(cmd, **kwargs) + debug(f"Lyric loader started (log={log_path})") + except Exception as exc: + debug(f"Lyric loader failed to start: {exc}") + def wait_for_ipc(self, retries: int = 20, delay_seconds: float = 0.2) -> bool: for _ in range(max(1, retries)): - client = self.client() + client = self.client(silent=True) try: if client.connect(): return True @@ -233,7 +416,7 @@ class MPVIPCClient: It handles platform-specific differences (Windows named pipes vs Unix sockets). """ - def __init__(self, socket_path: Optional[str] = None, timeout: float = 5.0): + def __init__(self, socket_path: Optional[str] = None, timeout: float = 5.0, silent: bool = False): """Initialize MPV IPC client. Args: @@ -244,6 +427,7 @@ class MPVIPCClient: self.socket_path = socket_path or get_ipc_pipe_path() self.sock: socket.socket | BinaryIO | None = None self.is_windows = platform.system() == "Windows" + self.silent = bool(silent) def connect(self) -> bool: """Connect to mpv IPC socket. @@ -259,17 +443,20 @@ class MPVIPCClient: self.sock = open(self.socket_path, 'r+b', buffering=0) return True except (OSError, IOError) as exc: - debug(f"Failed to connect to MPV named pipe: {exc}") + if not self.silent: + debug(f"Failed to connect to MPV named pipe: {exc}") return False else: # Unix domain socket (Linux, macOS) if not os.path.exists(self.socket_path): - debug(f"IPC socket not found: {self.socket_path}") + if not self.silent: + debug(f"IPC socket not found: {self.socket_path}") return False af_unix = getattr(socket, "AF_UNIX", None) if af_unix is None: - debug("IPC AF_UNIX is not available on this platform") + if not self.silent: + debug("IPC AF_UNIX is not available on this platform") return False self.sock = socket.socket(af_unix, socket.SOCK_STREAM) @@ -277,7 +464,8 @@ class MPVIPCClient: self.sock.connect(self.socket_path) return True except Exception as exc: - debug(f"Failed to connect to MPV IPC: {exc}") + if not self.silent: + debug(f"Failed to connect to MPV IPC: {exc}") self.sock = None return False diff --git a/Provider/__init__.py b/Provider/__init__.py index 31d441a..6c8aad4 100644 --- a/Provider/__init__.py +++ b/Provider/__init__.py @@ -1,5 +1,5 @@ """Provider plugin modules. Concrete provider implementations live in this package. -The public entrypoint/registry is Provider.registry. +The public entrypoint/registry is ProviderCore.registry. """ diff --git a/Provider/bandcamp.py b/Provider/bandcamp.py index 52f7def..cf5a99a 100644 --- a/Provider/bandcamp.py +++ b/Provider/bandcamp.py @@ -3,7 +3,7 @@ from __future__ import annotations import sys from typing import Any, Dict, List, Optional -from Provider._base import SearchProvider, SearchResult +from ProviderCore.base import SearchProvider, SearchResult from SYS.logger import log, debug try: diff --git a/Provider/libgen.py b/Provider/libgen.py index d261e7c..303b6d4 100644 --- a/Provider/libgen.py +++ b/Provider/libgen.py @@ -1,12 +1,24 @@ from __future__ import annotations +import logging +import re +import requests import sys -from typing import Any, Dict, List, Optional +from pathlib import Path +from typing import Any, Callable, Dict, List, Optional, Tuple +from urllib.parse import quote, urljoin, urlparse, unquote -from Provider._base import SearchProvider, SearchResult +from ProviderCore.base import SearchProvider, SearchResult from SYS.logger import log +# Optional dependencies +try: + from bs4 import BeautifulSoup +except ImportError: + BeautifulSoup = None + + class Libgen(SearchProvider): """Search provider for Library Genesis books.""" @@ -20,8 +32,7 @@ class Libgen(SearchProvider): filters = filters or {} try: - from Provider.unified_book_downloader import UnifiedBookDownloader - from Provider.query_parser import parse_query, get_field, get_free_text + from cli_syntax import get_field, get_free_text, parse_query parsed = parse_query(query) isbn = get_field(parsed, "isbn") @@ -31,8 +42,11 @@ class Libgen(SearchProvider): search_query = isbn or title or author or free_text or query - downloader = UnifiedBookDownloader(config=self.config) - books = downloader.search_libgen(search_query, limit=limit) + books = search_libgen( + search_query, + limit=limit, + log_error=lambda msg: log(msg, file=sys.stderr), + ) results: List[SearchResult] = [] for idx, book in enumerate(books, 1): @@ -91,8 +105,455 @@ class Libgen(SearchProvider): def validate(self) -> bool: try: - from Provider.unified_book_downloader import UnifiedBookDownloader # noqa: F401 - - return True + return BeautifulSoup is not None except Exception: return False + + +LogFn = Optional[Callable[[str], None]] +ErrorFn = Optional[Callable[[str], None]] + +DEFAULT_TIMEOUT = 20.0 +DEFAULT_LIMIT = 50 + +# Mirrors to try in order +MIRRORS = [ + "https://libgen.is", + "https://libgen.rs", + "https://libgen.st", + "http://libgen.is", + "http://libgen.rs", + "http://libgen.st", + "https://libgen.li", # Different structure, fallback + "http://libgen.li", + "https://libgen.gl", # Different structure, fallback + "http://libgen.gl", +] + +logging.getLogger(__name__).setLevel(logging.INFO) + + +def _call(logger: LogFn, message: str) -> None: + if logger: + logger(message) + + +class LibgenSearch: + """Robust LibGen searcher.""" + + def __init__(self, session: Optional[requests.Session] = None): + self.session = session or requests.Session() + self.session.headers.update({ + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" + }) + + def search(self, query: str, limit: int = DEFAULT_LIMIT) -> List[Dict[str, Any]]: + """Search LibGen mirrors.""" + if not BeautifulSoup: + logging.error("BeautifulSoup not installed. Cannot search LibGen.") + return [] + + for mirror in MIRRORS: + try: + if "libgen.li" in mirror or "libgen.gl" in mirror: + results = self._search_libgen_li(mirror, query, limit) + else: + results = self._search_libgen_rs(mirror, query, limit) + + if results: + return results + except Exception as e: + logging.debug(f"Mirror {mirror} failed: {e}") + continue + + return [] + + def _search_libgen_rs(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]: + """Search libgen.rs/is/st style mirrors.""" + url = f"{mirror}/search.php" + params = { + "req": query, + "res": 100, + "column": "def", + "open": 0, + "view": "simple", + "phrase": 1, + } + + resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT) + resp.raise_for_status() + + soup = BeautifulSoup(resp.text, "html.parser") + + table = soup.find("table", {"class": "c"}) + if not table: + tables = soup.find_all("table") + for t in tables: + if len(t.find_all("tr")) > 5: + table = t + break + + if not table: + return [] + + results: List[Dict[str, Any]] = [] + rows = table.find_all("tr")[1:] + + for row in rows: + cols = row.find_all("td") + if len(cols) < 9: + continue + + try: + libgen_id = cols[0].get_text(strip=True) + authors = [a.get_text(strip=True) for a in cols[1].find_all("a")] + if not authors: + authors = [cols[1].get_text(strip=True)] + + title_tag = cols[2].find("a") + title = title_tag.get_text(strip=True) if title_tag else cols[2].get_text(strip=True) + + md5 = "" + if title_tag and title_tag.has_attr("href"): + href = title_tag["href"] + match = re.search(r"md5=([a-fA-F0-9]{32})", href) + if match: + md5 = match.group(1) + + publisher = cols[3].get_text(strip=True) + year = cols[4].get_text(strip=True) + pages = cols[5].get_text(strip=True) + language = cols[6].get_text(strip=True) + size = cols[7].get_text(strip=True) + extension = cols[8].get_text(strip=True) + + mirror_links = [] + for i in range(9, len(cols)): + a = cols[i].find("a") + if a and a.has_attr("href"): + mirror_links.append(a["href"]) + + if md5: + download_link = f"http://library.lol/main/{md5}" + elif mirror_links: + download_link = mirror_links[0] + else: + download_link = "" + + results.append({ + "id": libgen_id, + "title": title, + "author": ", ".join(authors), + "publisher": publisher, + "year": year, + "pages": pages, + "language": language, + "filesize_str": size, + "extension": extension, + "md5": md5, + "mirror_url": download_link, + "cover": "", + }) + + if len(results) >= limit: + break + + except Exception as e: + logging.debug(f"Error parsing row: {e}") + continue + + return results + + def _search_libgen_li(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]: + """Search libgen.li/gl style mirrors.""" + url = f"{mirror}/index.php" + params = { + "req": query, + "res": 100, + "covers": "on", + "filesuns": "all", + } + + resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT) + resp.raise_for_status() + + soup = BeautifulSoup(resp.text, "html.parser") + table = soup.find("table", {"id": "tablelibgen"}) + if not table: + table = soup.find("table", {"class": "table table-striped"}) + + if not table: + return [] + + results: List[Dict[str, Any]] = [] + rows = table.find_all("tr")[1:] + + for row in rows: + cols = row.find_all("td") + if len(cols) < 9: + continue + + try: + title_col = cols[1] + title_link = title_col.find("a") + title = title_link.get_text(strip=True) if title_link else title_col.get_text(strip=True) + + libgen_id = "" + if title_link and title_link.has_attr("href"): + href = title_link["href"] + match = re.search(r"id=(\d+)", href) + if match: + libgen_id = match.group(1) + + authors = cols[2].get_text(strip=True) + publisher = cols[3].get_text(strip=True) + year = cols[4].get_text(strip=True) + language = cols[5].get_text(strip=True) + pages = cols[6].get_text(strip=True) + size = cols[7].get_text(strip=True) + extension = cols[8].get_text(strip=True) + + mirror_url = "" + if title_link: + href = title_link["href"] + if href.startswith("/"): + mirror_url = mirror + href + else: + mirror_url = urljoin(mirror, href) + + results.append({ + "id": libgen_id, + "title": title, + "author": authors, + "publisher": publisher, + "year": year, + "pages": pages, + "language": language, + "filesize_str": size, + "extension": extension, + "md5": "", + "mirror_url": mirror_url, + }) + + if len(results) >= limit: + break + except Exception: + continue + + return results + + +def search_libgen( + query: str, + limit: int = DEFAULT_LIMIT, + *, + log_info: LogFn = None, + log_error: ErrorFn = None, + session: Optional[requests.Session] = None, +) -> List[Dict[str, Any]]: + """Search Libgen using the robust scraper.""" + searcher = LibgenSearch(session=session) + try: + results = searcher.search(query, limit=limit) + _call(log_info, f"[libgen] Found {len(results)} results") + return results + except Exception as e: + _call(log_error, f"[libgen] Search failed: {e}") + return [] + + +def _resolve_download_url( + session: requests.Session, + url: str, + log_info: LogFn = None, +) -> Optional[str]: + """Resolve the final download URL by following the LibGen chain.""" + current_url = url + visited = set() + + for _ in range(6): + if current_url in visited: + break + visited.add(current_url) + + _call(log_info, f"[resolve] Checking: {current_url}") + + if current_url.lower().endswith((".pdf", ".epub", ".mobi", ".djvu", ".azw3", ".cbz", ".cbr")): + return current_url + + try: + with session.get(current_url, stream=True, timeout=30) as resp: + resp.raise_for_status() + ct = resp.headers.get("Content-Type", "").lower() + + if "text/html" not in ct: + return current_url + + content = resp.text + except Exception as e: + _call(log_info, f"[resolve] Failed to fetch {current_url}: {e}") + return None + + soup = BeautifulSoup(content, "html.parser") + + get_link = soup.find("a", string=re.compile(r"^GET$", re.IGNORECASE)) + if not get_link: + h2_get = soup.find("h2", string=re.compile(r"^GET$", re.IGNORECASE)) + if h2_get and h2_get.parent.name == "a": + get_link = h2_get.parent + + if get_link and get_link.has_attr("href"): + return urljoin(current_url, get_link["href"]) + + if "series.php" in current_url: + edition_link = soup.find("a", href=re.compile(r"edition\.php")) + if edition_link: + current_url = urljoin(current_url, edition_link["href"]) + continue + + if "edition.php" in current_url: + file_link = soup.find("a", href=re.compile(r"file\.php")) + if file_link: + current_url = urljoin(current_url, file_link["href"]) + continue + + if "file.php" in current_url: + libgen_link = soup.find("a", title="libgen") + if not libgen_link: + libgen_link = soup.find("a", string=re.compile(r"Libgen", re.IGNORECASE)) + + if libgen_link and libgen_link.has_attr("href"): + current_url = urljoin(current_url, libgen_link["href"]) + continue + + if "ads.php" in current_url: + get_php_link = soup.find("a", href=re.compile(r"get\.php")) + if get_php_link: + return urljoin(current_url, get_php_link["href"]) + + for text in ["Cloudflare", "IPFS.io", "Infura"]: + link = soup.find("a", string=re.compile(text, re.IGNORECASE)) + if link and link.has_attr("href"): + return urljoin(current_url, link["href"]) + + break + + return None + + +def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Optional[str]: + """Guess the file extension from headers or the download URL.""" + content_disposition = headers.get("content-disposition", "") + if content_disposition: + match = re.search(r"filename\*?=(?:UTF-8\'\'|\"?)([^\";]+)", content_disposition, flags=re.IGNORECASE) + if match: + filename = unquote(match.group(1).strip('"')) + suffix = Path(filename).suffix + if suffix: + return suffix.lstrip(".") + + parsed = urlparse(download_url) + suffix = Path(parsed.path).suffix + if suffix: + return suffix.lstrip(".") + + content_type = headers.get("content-type", "").lower() + mime_map = { + "application/pdf": "pdf", + "application/epub+zip": "epub", + "application/x-mobipocket-ebook": "mobi", + "application/x-cbr": "cbr", + "application/x-cbz": "cbz", + "application/zip": "zip", + } + + for mime, ext in mime_map.items(): + if mime in content_type: + return ext + + return None + + +def _apply_extension(path: Path, extension: Optional[str]) -> Path: + """Rename the path to match the detected extension, if needed.""" + if not extension: + return path + + suffix = extension if extension.startswith(".") else f".{extension}" + if path.suffix.lower() == suffix.lower(): + return path + + candidate = path.with_suffix(suffix) + base_stem = path.stem + counter = 1 + while candidate.exists() and counter < 100: + candidate = path.with_name(f"{base_stem}({counter}){suffix}") + counter += 1 + + try: + path.replace(candidate) + return candidate + except Exception: + return path + + +def download_from_mirror( + mirror_url: str, + output_path: Path, + *, + log_info: LogFn = None, + log_error: ErrorFn = None, + session: Optional[requests.Session] = None, + progress_callback: Optional[Callable[[int, int], None]] = None, +) -> Tuple[bool, Optional[Path]]: + """Download file from a LibGen mirror URL with optional progress tracking.""" + session = session or requests.Session() + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + try: + _call(log_info, f"[download] Resolving download link from: {mirror_url}") + + download_url = _resolve_download_url(session, mirror_url, log_info) + + if not download_url: + _call(log_error, "[download] Could not find direct download link") + return False, None + + _call(log_info, f"[download] Downloading from: {download_url}") + + downloaded = 0 + total_size = 0 + headers: Dict[str, str] = {} + + with session.get(download_url, stream=True, timeout=60) as r: + r.raise_for_status() + headers = dict(r.headers) + + ct = headers.get("content-type", "").lower() + if "text/html" in ct: + _call(log_error, "[download] Final URL returned HTML, not a file.") + return False, None + + total_size = int(headers.get("content-length", 0) or 0) + + with open(output_path, "wb") as f: + for chunk in r.iter_content(chunk_size=8192): + if chunk: + f.write(chunk) + downloaded += len(chunk) + if progress_callback: + progress_callback(downloaded, total_size) + + final_extension = _guess_filename_extension(download_url, headers) + final_path = _apply_extension(output_path, final_extension) + + if progress_callback and total_size > 0: + progress_callback(downloaded, total_size) + + _call(log_info, f"[download] Saved to {final_path}") + return True, final_path + + except Exception as e: + _call(log_error, f"[download] Download failed: {e}") + return False, None diff --git a/Provider/libgen_service.py b/Provider/libgen_service.py deleted file mode 100644 index 9a8ddfe..0000000 --- a/Provider/libgen_service.py +++ /dev/null @@ -1,523 +0,0 @@ -"""Shared Library Genesis search and download helpers. - -Replaces the old libgen backend with a robust scraper based on libgen-api-enhanced logic. -Targets libgen.is/rs/st mirrors and parses the results table directly. -""" -from __future__ import annotations - -import logging -import re -import requests -from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Tuple -from urllib.parse import quote, urljoin, urlparse, unquote - -# Optional dependencies -try: - from bs4 import BeautifulSoup -except ImportError: - BeautifulSoup = None - -LogFn = Optional[Callable[[str], None]] -ErrorFn = Optional[Callable[[str], None]] - -DEFAULT_TIMEOUT = 20.0 -DEFAULT_LIMIT = 50 - -# Mirrors to try in order -MIRRORS = [ - "https://libgen.is", - "https://libgen.rs", - "https://libgen.st", - "http://libgen.is", - "http://libgen.rs", - "http://libgen.st", - "https://libgen.li", # Different structure, fallback - "http://libgen.li", - "https://libgen.gl", # Different structure, fallback - "http://libgen.gl", -] - -logging.getLogger(__name__).setLevel(logging.INFO) - - -def _call(logger: LogFn, message: str) -> None: - if logger: - logger(message) - - -class LibgenSearch: - """Robust LibGen searcher.""" - - def __init__(self, session: Optional[requests.Session] = None): - self.session = session or requests.Session() - self.session.headers.update({ - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" - }) - - def search(self, query: str, limit: int = DEFAULT_LIMIT) -> List[Dict[str, Any]]: - """Search LibGen mirrors.""" - if not BeautifulSoup: - logging.error("BeautifulSoup not installed. Cannot search LibGen.") - return [] - - for mirror in MIRRORS: - try: - if "libgen.li" in mirror or "libgen.gl" in mirror: - results = self._search_libgen_li(mirror, query, limit) - else: - results = self._search_libgen_rs(mirror, query, limit) - - if results: - return results - except Exception as e: - logging.debug(f"Mirror {mirror} failed: {e}") - continue - - return [] - - def _search_libgen_rs(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]: - """Search libgen.rs/is/st style mirrors.""" - # Search URL: /search.php?req=QUERY&res=100&column=def - url = f"{mirror}/search.php" - params = { - "req": query, - "res": 100, # Request more to filter later - "column": "def", - "open": 0, - "view": "simple", - "phrase": 1, - } - - resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT) - resp.raise_for_status() - - soup = BeautifulSoup(resp.text, "html.parser") - - # Find the table with results. usually class 'c' - table = soup.find("table", {"class": "c"}) - if not table: - # Try finding by structure (table with many rows) - tables = soup.find_all("table") - for t in tables: - if len(t.find_all("tr")) > 5: - table = t - break - - if not table: - return [] - - results = [] - # Skip header row - rows = table.find_all("tr")[1:] - - for row in rows: - cols = row.find_all("td") - if len(cols) < 9: - continue - - # Columns: - # 0: ID - # 1: Author(s) - # 2: Title - # 3: Publisher - # 4: Year - # 5: Pages - # 6: Language - # 7: Size - # 8: Extension - # 9+: Mirrors - - try: - libgen_id = cols[0].get_text(strip=True) - authors = [a.get_text(strip=True) for a in cols[1].find_all("a")] - if not authors: - authors = [cols[1].get_text(strip=True)] - - title_tag = cols[2].find("a") - title = title_tag.get_text(strip=True) if title_tag else cols[2].get_text(strip=True) - - # Extract MD5 from title link if possible (often in href) - # href='book/index.php?md5=...' - md5 = "" - if title_tag and title_tag.has_attr("href"): - href = title_tag["href"] - match = re.search(r"md5=([a-fA-F0-9]{32})", href) - if match: - md5 = match.group(1) - - publisher = cols[3].get_text(strip=True) - year = cols[4].get_text(strip=True) - pages = cols[5].get_text(strip=True) - language = cols[6].get_text(strip=True) - size = cols[7].get_text(strip=True) - extension = cols[8].get_text(strip=True) - - # Mirrors - # Usually col 9 is http://library.lol/main/MD5 - mirror_links = [] - for i in range(9, len(cols)): - a = cols[i].find("a") - if a and a.has_attr("href"): - mirror_links.append(a["href"]) - - # Construct direct download page link (library.lol) - # If we have MD5, we can guess it: http://library.lol/main/{md5} - if md5: - download_link = f"http://library.lol/main/{md5}" - elif mirror_links: - download_link = mirror_links[0] - else: - download_link = "" - - results.append({ - "id": libgen_id, - "title": title, - "author": ", ".join(authors), - "publisher": publisher, - "year": year, - "pages": pages, - "language": language, - "filesize_str": size, - "extension": extension, - "md5": md5, - "mirror_url": download_link, - "cover": "", # Could extract from hover if needed - }) - - if len(results) >= limit: - break - - except Exception as e: - logging.debug(f"Error parsing row: {e}") - continue - - return results - - def _search_libgen_li(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]: - """Search libgen.li/gl style mirrors.""" - # Search URL: /index.php?req=QUERY&columns[]=t&columns[]=a... - url = f"{mirror}/index.php" - params = { - "req": query, - "res": 100, - "covers": "on", - "filesuns": "all", - } - - resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT) - resp.raise_for_status() - - soup = BeautifulSoup(resp.text, "html.parser") - table = soup.find("table", {"id": "tablelibgen"}) - if not table: - table = soup.find("table", {"class": "table table-striped"}) - - if not table: - return [] - - results = [] - rows = table.find_all("tr")[1:] - - for row in rows: - cols = row.find_all("td") - if len(cols) < 9: - continue - - try: - # Structure is different - # 0: Cover - # 1: Title (with link to file.php?id=...) - # 2: Author - # 3: Publisher - # 4: Year - # 5: Language - # 6: Pages - # 7: Size - # 8: Extension - # 9: Mirrors - - title_col = cols[1] - title_link = title_col.find("a") - title = title_link.get_text(strip=True) if title_link else title_col.get_text(strip=True) - - # Extract ID from link - libgen_id = "" - if title_link and title_link.has_attr("href"): - href = title_link["href"] - # href is usually "file.php?id=..." or "edition.php?id=..." - match = re.search(r"id=(\d+)", href) - if match: - libgen_id = match.group(1) - - authors = cols[2].get_text(strip=True) - publisher = cols[3].get_text(strip=True) - year = cols[4].get_text(strip=True) - language = cols[5].get_text(strip=True) - pages = cols[6].get_text(strip=True) - size = cols[7].get_text(strip=True) - extension = cols[8].get_text(strip=True) - - # Mirror link - # Usually in col 9 or title link - mirror_url = "" - if title_link: - href = title_link["href"] - if href.startswith("/"): - mirror_url = mirror + href - else: - mirror_url = urljoin(mirror, href) - - results.append({ - "id": libgen_id, - "title": title, - "author": authors, - "publisher": publisher, - "year": year, - "pages": pages, - "language": language, - "filesize_str": size, - "extension": extension, - "md5": "", # .li doesn't show MD5 easily in table - "mirror_url": mirror_url, - }) - - if len(results) >= limit: - break - except Exception: - continue - - return results - - -def search_libgen( - query: str, - limit: int = DEFAULT_LIMIT, - *, - log_info: LogFn = None, - log_error: ErrorFn = None, - session: Optional[requests.Session] = None, -) -> List[Dict[str, Any]]: - """Search Libgen using the robust scraper.""" - searcher = LibgenSearch(session=session) - try: - results = searcher.search(query, limit=limit) - _call(log_info, f"[libgen] Found {len(results)} results") - return results - except Exception as e: - _call(log_error, f"[libgen] Search failed: {e}") - return [] - - -def _resolve_download_url( - session: requests.Session, - url: str, - log_info: LogFn = None -) -> Optional[str]: - """Resolve the final download URL by following the LibGen chain.""" - current_url = url - visited = set() - - # Max hops to prevent infinite loops - for _ in range(6): - if current_url in visited: - break - visited.add(current_url) - - _call(log_info, f"[resolve] Checking: {current_url}") - - # Simple heuristic: if it looks like a file, return it - if current_url.lower().endswith(('.pdf', '.epub', '.mobi', '.djvu', '.azw3', '.cbz', '.cbr')): - return current_url - - try: - # Use HEAD first to check content type if possible, but some mirrors block HEAD or return 405 - # So we'll just GET with stream=True to peek headers/content without downloading everything - with session.get(current_url, stream=True, timeout=30) as resp: - resp.raise_for_status() - ct = resp.headers.get("Content-Type", "").lower() - - if "text/html" not in ct: - # It's a binary file - return current_url - - # It's HTML, read content - content = resp.text - except Exception as e: - _call(log_info, f"[resolve] Failed to fetch {current_url}: {e}") - return None - - soup = BeautifulSoup(content, "html.parser") - - # 1. Check for "GET" link (library.lol / ads.php style) - # Usually

GET

inside or just text "GET" - get_link = soup.find("a", string=re.compile(r"^GET$", re.IGNORECASE)) - if not get_link: - # Try finding containing

GET

- h2_get = soup.find("h2", string=re.compile(r"^GET$", re.IGNORECASE)) - if h2_get and h2_get.parent.name == "a": - get_link = h2_get.parent - - if get_link and get_link.has_attr("href"): - return urljoin(current_url, get_link["href"]) - - # 2. Check for "series.php" -> "edition.php" - if "series.php" in current_url: - # Find first edition link - edition_link = soup.find("a", href=re.compile(r"edition\.php")) - if edition_link: - current_url = urljoin(current_url, edition_link["href"]) - continue - - # 3. Check for "edition.php" -> "file.php" - if "edition.php" in current_url: - file_link = soup.find("a", href=re.compile(r"file\.php")) - if file_link: - current_url = urljoin(current_url, file_link["href"]) - continue - - # 4. Check for "file.php" -> "ads.php" (Libgen badge) - if "file.php" in current_url: - # Look for link with title="libgen" or text "Libgen" - libgen_link = soup.find("a", title="libgen") - if not libgen_link: - libgen_link = soup.find("a", string=re.compile(r"Libgen", re.IGNORECASE)) - - if libgen_link and libgen_link.has_attr("href"): - current_url = urljoin(current_url, libgen_link["href"]) - continue - - # 5. Check for "ads.php" -> "get.php" (Fallback if GET link logic above failed) - if "ads.php" in current_url: - get_php_link = soup.find("a", href=re.compile(r"get\.php")) - if get_php_link: - return urljoin(current_url, get_php_link["href"]) - - # 6. Library.lol / generic fallback - for text in ["Cloudflare", "IPFS.io", "Infura"]: - link = soup.find("a", string=re.compile(text, re.IGNORECASE)) - if link and link.has_attr("href"): - return urljoin(current_url, link["href"]) - - # If we found nothing new, stop - break - - return None - - -def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Optional[str]: - """Guess the file extension from headers or the download URL.""" - content_disposition = headers.get("content-disposition", "") - if content_disposition: - match = re.search(r'filename\*?=(?:UTF-8\'\'|"?)([^";]+)', content_disposition, flags=re.IGNORECASE) - if match: - filename = unquote(match.group(1).strip('"')) - suffix = Path(filename).suffix - if suffix: - return suffix.lstrip('.') - - parsed = urlparse(download_url) - suffix = Path(parsed.path).suffix - if suffix: - return suffix.lstrip('.') - - content_type = headers.get('content-type', '').lower() - mime_map = { - 'application/pdf': 'pdf', - 'application/epub+zip': 'epub', - 'application/x-mobipocket-ebook': 'mobi', - 'application/x-cbr': 'cbr', - 'application/x-cbz': 'cbz', - 'application/zip': 'zip', - } - - for mime, ext in mime_map.items(): - if mime in content_type: - return ext - - return None - - -def _apply_extension(path: Path, extension: Optional[str]) -> Path: - """Rename the path to match the detected extension, if needed.""" - if not extension: - return path - - suffix = extension if extension.startswith('.') else f'.{extension}' - if path.suffix.lower() == suffix.lower(): - return path - - candidate = path.with_suffix(suffix) - base_stem = path.stem - counter = 1 - while candidate.exists() and counter < 100: - candidate = path.with_name(f"{base_stem}({counter}){suffix}") - counter += 1 - - try: - path.replace(candidate) - return candidate - except Exception: - return path - -def download_from_mirror( - mirror_url: str, - output_path: Path, - *, - log_info: LogFn = None, - log_error: ErrorFn = None, - session: Optional[requests.Session] = None, - progress_callback: Optional[Callable[[int, int], None]] = None, -) -> Tuple[bool, Optional[Path]]: - """Download file from a LibGen mirror URL with optional progress tracking.""" - session = session or requests.Session() - output_path = Path(output_path) - output_path.parent.mkdir(parents=True, exist_ok=True) - - try: - _call(log_info, f"[download] Resolving download link from: {mirror_url}") - - download_url = _resolve_download_url(session, mirror_url, log_info) - - if not download_url: - _call(log_error, "[download] Could not find direct download link") - return False, None - - _call(log_info, f"[download] Downloading from: {download_url}") - - downloaded = 0 - total_size = 0 - headers: Dict[str, str] = {} - - with session.get(download_url, stream=True, timeout=60) as r: - r.raise_for_status() - headers = dict(r.headers) - - # Verify it's not HTML (error page) - ct = headers.get("content-type", "").lower() - if "text/html" in ct: - _call(log_error, "[download] Final URL returned HTML, not a file.") - return False, None - - total_size = int(headers.get("content-length", 0) or 0) - - with open(output_path, "wb") as f: - for chunk in r.iter_content(chunk_size=8192): - if chunk: - f.write(chunk) - downloaded += len(chunk) - if progress_callback: - progress_callback(downloaded, total_size) - - final_extension = _guess_filename_extension(download_url, headers) - final_path = _apply_extension(output_path, final_extension) - - if progress_callback and total_size > 0: - progress_callback(downloaded, total_size) - - _call(log_info, f"[download] Saved to {final_path}") - return True, final_path - - except Exception as e: - _call(log_error, f"[download] Download failed: {e}") - return False, None diff --git a/Provider/matrix.py b/Provider/matrix.py index eb77e45..b645921 100644 --- a/Provider/matrix.py +++ b/Provider/matrix.py @@ -6,7 +6,7 @@ from typing import Any import requests -from Provider._base import FileProvider +from ProviderCore.base import FileProvider class Matrix(FileProvider): diff --git a/Provider/openlibrary.py b/Provider/openlibrary.py new file mode 100644 index 0000000..38f842a --- /dev/null +++ b/Provider/openlibrary.py @@ -0,0 +1,358 @@ +from __future__ import annotations + +import shutil +import sys +import tempfile +from pathlib import Path +from typing import Any, Dict, List, Optional, Tuple + +import requests + +from ProviderCore.base import SearchProvider, SearchResult +from ProviderCore.download import download_file, sanitize_filename +from cli_syntax import get_field, get_free_text, parse_query +from SYS.logger import log +from SYS.utils import unique_path + + +def _looks_like_isbn(text: str) -> bool: + t = (text or "").replace("-", "").strip() + return t.isdigit() and len(t) in (10, 13) + + +def _first_str(value: Any) -> Optional[str]: + if isinstance(value, str): + v = value.strip() + return v if v else None + if isinstance(value, list) and value: + first = value[0] + if isinstance(first, str): + v = first.strip() + return v if v else None + return str(first) if first is not None else None + return None + + +def _resolve_edition_id(doc: Dict[str, Any]) -> str: + # OpenLibrary Search API typically provides edition_key: ["OL...M", ...] + edition_key = doc.get("edition_key") + if isinstance(edition_key, list) and edition_key: + return str(edition_key[0]).strip() + + # Fallback: sometimes key can be /books/OL...M + key = doc.get("key") + if isinstance(key, str) and key.startswith("/books/"): + return key.split("/books/", 1)[1].strip("/") + + return "" + + +def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, str]: + """Return (lendable, status_text) using OpenLibrary volumes API.""" + try: + if not edition_id or not edition_id.startswith("OL") or not edition_id.endswith("M"): + return False, "not-an-edition" + + url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{edition_id}" + resp = session.get(url, timeout=10) + resp.raise_for_status() + data = resp.json() or {} + wrapped = data.get(f"OLID:{edition_id}") + if not isinstance(wrapped, dict): + return False, "no-availability" + + items = wrapped.get("items") + if not isinstance(items, list) or not items: + return False, "no-items" + + first = items[0] + status_val = "" + if isinstance(first, dict): + status_val = str(first.get("status", "")) + else: + status_val = str(first) + + return ("lendable" in status_val.lower()), status_val + except requests.exceptions.Timeout: + return False, "api-timeout" + except Exception: + return False, "api-error" + + +def _resolve_archive_id(session: requests.Session, edition_id: str, ia_candidates: List[str]) -> str: + # Prefer IA identifiers already present in search results. + if ia_candidates: + first = ia_candidates[0].strip() + if first: + return first + + # Otherwise query the edition JSON. + try: + resp = session.get(f"https://openlibrary.org/books/{edition_id}.json", timeout=10) + resp.raise_for_status() + data = resp.json() or {} + + ocaid = data.get("ocaid") + if isinstance(ocaid, str) and ocaid.strip(): + return ocaid.strip() + + identifiers = data.get("identifiers") + if isinstance(identifiers, dict): + ia = identifiers.get("internet_archive") + ia_id = _first_str(ia) + if ia_id: + return ia_id + + except Exception: + pass + + return "" + + +class OpenLibrary(SearchProvider): + """Search provider for OpenLibrary books + Archive.org direct/borrow download.""" + + def __init__(self, config: Optional[Dict[str, Any]] = None): + super().__init__(config) + self._session = requests.Session() + + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs: Any, + ) -> List[SearchResult]: + filters = filters or {} + + parsed = parse_query(query) + isbn = get_field(parsed, "isbn") + author = get_field(parsed, "author") + title = get_field(parsed, "title") + free_text = get_free_text(parsed) + + q = (isbn or title or author or free_text or query or "").strip() + if not q: + return [] + + if _looks_like_isbn(q): + q = f"isbn:{q.replace('-', '')}" + + try: + resp = self._session.get( + "https://openlibrary.org/search.json", + params={"q": q, "limit": int(limit)}, + timeout=10, + ) + resp.raise_for_status() + data = resp.json() or {} + except Exception as exc: + log(f"[openlibrary] Search failed: {exc}", file=sys.stderr) + return [] + + results: List[SearchResult] = [] + docs = data.get("docs") or [] + if not isinstance(docs, list): + return [] + + for doc in docs[: int(limit)]: + if not isinstance(doc, dict): + continue + + book_title = str(doc.get("title") or "").strip() or "Unknown" + + authors = doc.get("author_name") or [] + if isinstance(authors, str): + authors = [authors] + if not isinstance(authors, list): + authors = [] + authors_list = [str(a) for a in authors if a] + + year_val = doc.get("first_publish_year") + year = str(year_val) if year_val is not None else "" + + edition_id = _resolve_edition_id(doc) + + ia_val = doc.get("ia") or [] + if isinstance(ia_val, str): + ia_val = [ia_val] + if not isinstance(ia_val, list): + ia_val = [] + ia_ids = [str(x) for x in ia_val if x] + + isbn_list = doc.get("isbn") or [] + if isinstance(isbn_list, str): + isbn_list = [isbn_list] + if not isinstance(isbn_list, list): + isbn_list = [] + + isbn_13 = next((str(i) for i in isbn_list if len(str(i)) == 13), "") + isbn_10 = next((str(i) for i in isbn_list if len(str(i)) == 10), "") + + columns = [ + ("Title", book_title), + ("Author", ", ".join(authors_list)), + ("Year", year), + ("OLID", edition_id), + ] + + annotations: List[str] = [] + if isbn_13: + annotations.append(f"isbn_13:{isbn_13}") + elif isbn_10: + annotations.append(f"isbn_10:{isbn_10}") + if ia_ids: + annotations.append("archive") + + results.append( + SearchResult( + table="openlibrary", + title=book_title, + path=(f"https://openlibrary.org/books/{edition_id}" if edition_id else "https://openlibrary.org"), + detail=( + (f"By: {', '.join(authors_list)}" if authors_list else "") + + (f" ({year})" if year else "") + ).strip(), + annotations=annotations, + media_kind="book", + columns=columns, + full_metadata={ + "openlibrary_id": edition_id, + "authors": authors_list, + "year": year, + "isbn_10": isbn_10, + "isbn_13": isbn_13, + "ia": ia_ids, + "raw": doc, + }, + ) + ) + + return results + + def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]: + output_dir = Path(output_dir) + output_dir.mkdir(parents=True, exist_ok=True) + + meta = result.full_metadata or {} + edition_id = str(meta.get("openlibrary_id") or "").strip() + if not edition_id: + log("[openlibrary] Missing openlibrary_id; cannot download", file=sys.stderr) + return None + + ia_ids = meta.get("ia") or [] + if isinstance(ia_ids, str): + ia_ids = [ia_ids] + if not isinstance(ia_ids, list): + ia_ids = [] + ia_candidates = [str(x) for x in ia_ids if x] + + archive_id = _resolve_archive_id(self._session, edition_id, ia_candidates) + if not archive_id: + log("[openlibrary] No archive identifier available; cannot download", file=sys.stderr) + return None + + safe_title = sanitize_filename(result.title) + + # 1) Direct download if available. + try: + from API.archive_client import check_direct_download + + can_direct, pdf_url = check_direct_download(archive_id) + except Exception: + can_direct, pdf_url = False, "" + + if can_direct and pdf_url: + out_path = unique_path(output_dir / f"{safe_title}.pdf") + ok = download_file(pdf_url, out_path, session=self._session) + if ok: + return out_path + log("[openlibrary] Direct download failed", file=sys.stderr) + return None + + # 2) Borrow flow (credentials required). + try: + from API.archive_client import BookNotAvailableError, credential_openlibrary, download as archive_download + from API.archive_client import get_book_infos, loan, login + + email, password = credential_openlibrary(self.config or {}) + if not email or not password: + log("[openlibrary] Archive credentials missing; cannot borrow", file=sys.stderr) + return None + + lendable, reason = _check_lendable(self._session, edition_id) + if not lendable: + log(f"[openlibrary] Not lendable: {reason}", file=sys.stderr) + return None + + session = login(email, password) + try: + session = loan(session, archive_id, verbose=False) + except BookNotAvailableError: + log("[openlibrary] Book not available to borrow", file=sys.stderr) + return None + except SystemExit: + log("[openlibrary] Borrow failed", file=sys.stderr) + return None + + urls = [f"https://archive.org/borrow/{archive_id}", f"https://archive.org/details/{archive_id}"] + title = safe_title + links: Optional[List[str]] = None + last_exc: Optional[Exception] = None + for u in urls: + try: + title_raw, links, _metadata = get_book_infos(session, u) + if title_raw: + title = sanitize_filename(title_raw) + break + except Exception as exc: + last_exc = exc + continue + + if not links: + log(f"[openlibrary] Failed to extract pages: {last_exc}", file=sys.stderr) + return None + + temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=str(output_dir)) + try: + images = archive_download(session=session, n_threads=10, directory=temp_dir, links=links, scale=3, book_id=archive_id) + + try: + import img2pdf # type: ignore + + pdf_bytes = img2pdf.convert(images) if images else None + if not pdf_bytes: + log("[openlibrary] PDF conversion failed", file=sys.stderr) + try: + shutil.rmtree(temp_dir) + except Exception: + pass + return None + + pdf_path = unique_path(output_dir / f"{title}.pdf") + with open(pdf_path, "wb") as f: + f.write(pdf_bytes) + + try: + shutil.rmtree(temp_dir) + except Exception: + pass + return pdf_path + + except ImportError: + # Keep images folder. + return Path(temp_dir) + + except Exception: + try: + shutil.rmtree(temp_dir) + except Exception: + pass + raise + + except Exception as exc: + log(f"[openlibrary] Borrow workflow error: {exc}", file=sys.stderr) + return None + + def validate(self) -> bool: + return True diff --git a/Provider/query_parser.py b/Provider/query_parser.py deleted file mode 100644 index ec4ae9c..0000000 --- a/Provider/query_parser.py +++ /dev/null @@ -1,159 +0,0 @@ -"""Dynamic query parser for filtering and field extraction. - -Supports query syntax like: - - isbn:0557677203 - - author:"Albert Pike" - - title:"Morals and Dogma" - - year:2010 - - isbn:0557677203 author:"Albert Pike" - - Mixed with free text: "Morals" isbn:0557677203 - -This allows flexible query strings that can be parsed by any search provider -to extract specific fields for filtering and searching. -""" - -from typing import Dict, List, Tuple, Optional, Any -import re - - -def parse_query(query: str) -> Dict[str, Any]: - """Parse a query string into field:value pairs and free text. - - Args: - query: Query string like 'isbn:0557677203 author:"Albert Pike" Morals' - - Returns: - Dictionary with: - - 'fields': Dict[field_name, field_value] for structured fields - - 'text': str with remaining free text - - 'raw': str original query - """ - result = { - 'fields': {}, - 'text': '', - 'raw': query, - } - - if not query or not query.strip(): - return result - - query = query.strip() - remaining_parts = [] - - # Pattern to match: field:value or field:"quoted value" - # Matches: word: followed by either quoted string or unquoted word - pattern = r'(\w+):(?:"([^"]*)"|(\S+))' - - pos = 0 - for match in re.finditer(pattern, query): - # Add any text before this match - if match.start() > pos: - before_text = query[pos:match.start()].strip() - if before_text: - remaining_parts.append(before_text) - - field_name = match.group(1).lower() - field_value = match.group(2) if match.group(2) is not None else match.group(3) - - result['fields'][field_name] = field_value - pos = match.end() - - # Add any remaining text after last match - if pos < len(query): - remaining_text = query[pos:].strip() - if remaining_text: - remaining_parts.append(remaining_text) - - result['text'] = ' '.join(remaining_parts) - - return result - - -def get_field(parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None) -> Optional[str]: - """Get a field value from parsed query, with optional default. - - Args: - parsed_query: Result from parse_query() - field_name: Field name to look up (case-insensitive) - default: Default value if field not found - - Returns: - Field value or default - """ - return parsed_query.get('fields', {}).get(field_name.lower(), default) - - -def has_field(parsed_query: Dict[str, Any], field_name: str) -> bool: - """Check if a field exists in parsed query. - - Args: - parsed_query: Result from parse_query() - field_name: Field name to check (case-insensitive) - - Returns: - True if field exists - """ - return field_name.lower() in parsed_query.get('fields', {}) - - -def get_free_text(parsed_query: Dict[str, Any]) -> str: - """Get the free text portion of a parsed query. - - Args: - parsed_query: Result from parse_query() - - Returns: - Free text or empty string - """ - return parsed_query.get('text', '') - - -def build_query_for_provider( - parsed_query: Dict[str, Any], - provider: str, - extraction_map: Optional[Dict[str, str]] = None -) -> Tuple[str, Dict[str, str]]: - """Build a search query and filters dict for a specific provider. - - Different providers have different search syntax. This function - extracts the appropriate fields for each provider. - - Args: - parsed_query: Result from parse_query() - provider: Provider name ('libgen', 'openlibrary', 'soulseek') - extraction_map: Optional mapping of field names to provider-specific names - e.g. {'isbn': 'isbn', 'author': 'author', 'title': 'title'} - - Returns: - Tuple of (search_query: str, extracted_fields: Dict[field, value]) - """ - extraction_map = extraction_map or {} - extracted = {} - free_text = get_free_text(parsed_query) - - # Extract fields based on map - for field_name, provider_key in extraction_map.items(): - if has_field(parsed_query, field_name): - extracted[provider_key] = get_field(parsed_query, field_name) - - # If provider-specific extraction needed, providers can implement it - # For now, return the free text as query - return free_text, extracted - - -if __name__ == '__main__': - # Test cases - test_queries = [ - 'isbn:0557677203', - 'isbn:0557677203 author:"Albert Pike"', - 'Morals and Dogma isbn:0557677203', - 'title:"Morals and Dogma" author:"Albert Pike" year:2010', - 'search term without fields', - 'author:"John Smith" title:"A Book"', - ] - - for query in test_queries: - print(f"\nQuery: {query}") - parsed = parse_query(query) - print(f" Fields: {parsed['fields']}") - print(f" Text: {parsed['text']}") diff --git a/Provider/soulseek.py b/Provider/soulseek.py index a757b62..2ad7cf0 100644 --- a/Provider/soulseek.py +++ b/Provider/soulseek.py @@ -11,7 +11,7 @@ import time from pathlib import Path from typing import Any, Dict, List, Optional -from Provider._base import SearchProvider, SearchResult +from ProviderCore.base import SearchProvider, SearchResult from SYS.logger import log, debug diff --git a/Provider/unified_book_downloader.py b/Provider/unified_book_downloader.py deleted file mode 100644 index ae4c298..0000000 --- a/Provider/unified_book_downloader.py +++ /dev/null @@ -1,707 +0,0 @@ -"""Unified book downloader - handles Archive.org borrowing and Libgen fallback. - -This module provides a single interface for downloading books from multiple sources: -1. Try Archive.org direct download (if available) -2. Try Archive.org borrowing (if user has credentials) -3. Fallback to Libgen search by ISBN -4. Attempt Libgen download - -All sources integrated with proper metadata scraping and error handling. -""" - -import logging -import asyncio -import requests -from typing import Optional, Dict, Any, Tuple, List, Callable, cast -from pathlib import Path - -from SYS.logger import debug - -logger = logging.getLogger(__name__) - - -class UnifiedBookDownloader: - """Unified interface for downloading books from multiple sources.""" - - def __init__(self, config: Optional[Dict[str, Any]] = None, output_dir: Optional[str] = None): - """Initialize the unified book downloader. - - Args: - config: Configuration dict with credentials - output_dir: Default output directory - """ - self.config = config or {} - self.output_dir = output_dir - self.session = requests.Session() - - # Import download functions from their modules - self._init_downloaders() - - def _init_downloaders(self) -> None: - """Initialize downloader functions from their modules.""" - try: - from API.archive_client import ( - check_direct_download, - get_openlibrary_by_isbn, - loan - ) - self.check_direct_download = check_direct_download - self.get_openlibrary_by_isbn = get_openlibrary_by_isbn - self.loan_func = loan - logger.debug("[UnifiedBookDownloader] Loaded archive.org downloaders from archive_client") - except Exception as e: - logger.warning(f"[UnifiedBookDownloader] Failed to load archive.org functions: {e}") - self.check_direct_download = None - self.get_openlibrary_by_isbn = None - self.loan_func = None - - try: - from Provider.libgen_service import ( - DEFAULT_LIMIT as _LIBGEN_DEFAULT_LIMIT, - download_from_mirror as _libgen_download, - search_libgen as _libgen_search, - ) - - def _log_info(message: str) -> None: - debug(f"[UnifiedBookDownloader] {message}") - - def _log_error(message: str) -> None: - logger.error(f"[UnifiedBookDownloader] {message}") - - self.search_libgen = lambda query, limit=_LIBGEN_DEFAULT_LIMIT: _libgen_search( - query, - limit=limit, - log_info=_log_info, - log_error=_log_error, - ) - self.download_from_mirror = lambda mirror_url, output_path: _libgen_download( - mirror_url, - output_path, - log_info=_log_info, - log_error=_log_error, - ) - logger.debug("[UnifiedBookDownloader] Loaded Libgen helpers") - except Exception as e: - logger.warning(f"[UnifiedBookDownloader] Failed to load Libgen helpers: {e}") - self.search_libgen = None - self.download_from_mirror = None - - def get_download_options(self, book_data: Dict[str, Any]) -> Dict[str, Any]: - """Get all available download options for a book. - - Checks in priority order: - 1. Archive.org direct download (public domain) - 2. Archive.org borrowing (if credentials available and book is borrowable) - 3. Libgen fallback (by ISBN) - - Args: - book_data: Book metadata dict with at least 'openlibrary_id' or 'isbn' - - Returns: - Dict with available download methods and metadata - """ - options = { - 'book_title': book_data.get('title', 'Unknown'), - 'book_author': book_data.get('author', 'Unknown'), - 'isbn': book_data.get('isbn', ''), - 'openlibrary_id': book_data.get('openlibrary_id', ''), - 'methods': [], # Will be sorted by priority - 'metadata': {} - } - - # Extract book ID from openlibrary_id (e.g., OL8513721M -> 8513721, OL8513721W -> 8513721) - ol_id = book_data.get('openlibrary_id', '') - book_id = None - - if ol_id.startswith('OL') and len(ol_id) > 2: - # Remove 'OL' prefix (keep everything after it including the suffix letter) - # The book_id is all digits after 'OL' - book_id = ''.join(c for c in ol_id[2:] if c.isdigit()) - - # PRIORITY 1: Check direct download (fastest, no auth needed) - if self.check_direct_download: - try: - can_download, pdf_url = self.check_direct_download(book_id) - if can_download: - options['methods'].append({ - 'type': 'archive.org_direct', - 'label': 'Archive.org Direct Download', - 'requires_auth': False, - 'pdf_url': pdf_url, - 'book_id': book_id, - 'priority': 1 # Highest priority - }) - logger.info(f"[UnifiedBookDownloader] Direct download available for {book_id}") - except Exception as e: - logger.debug(f"[UnifiedBookDownloader] Direct download check failed: {e}") - - # PRIORITY 2: Check borrowing option (requires auth, 14-day loan) - # First verify the book is actually lendable via OpenLibrary API - if self._has_archive_credentials(): - is_lendable, status = self._check_book_lendable_status(ol_id) - - if is_lendable: - options['methods'].append({ - 'type': 'archive.org_borrow', - 'label': 'Archive.org Borrow', - 'requires_auth': True, - 'book_id': book_id, - 'priority': 2 # Second priority - }) - logger.info(f"[UnifiedBookDownloader] Borrow option available for {book_id} (status: {status})") - else: - logger.debug(f"[UnifiedBookDownloader] Borrow not available for {book_id} (status: {status})") - - # PRIORITY 3: Check Libgen fallback (by ISBN, no auth needed, most reliable) - isbn = book_data.get('isbn', '') - title = book_data.get('title', '') - author = book_data.get('author', '') - - if self.search_libgen: - # Can use Libgen if we have ISBN OR title (or both) - if isbn or title: - options['methods'].append({ - 'type': 'libgen', - 'label': 'Libgen Search & Download', - 'requires_auth': False, - 'isbn': isbn, - 'title': title, - 'author': author, - 'priority': 3 # Third priority (fallback) - }) - logger.info(f"[UnifiedBookDownloader] Libgen fallback available (ISBN: {isbn if isbn else 'N/A'}, Title: {title})") - - # Sort by priority (higher priority first) - options['methods'].sort(key=lambda x: x.get('priority', 999)) - - return options - - def _has_archive_credentials(self) -> bool: - """Check if Archive.org credentials are available.""" - try: - from API.archive_client import credential_openlibrary - email, password = credential_openlibrary(self.config) - return bool(email and password) - except Exception: - return False - - def _check_book_lendable_status(self, ol_id: str) -> Tuple[bool, Optional[str]]: - """Check if a book is lendable via OpenLibrary API. - - Queries: https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id} - Note: Only works with Edition IDs (OL...M), not Work IDs (OL...W) - - Args: - ol_id: OpenLibrary ID (e.g., OL8513721M for Edition or OL4801915W for Work) - - Returns: - Tuple of (is_lendable: bool, status_reason: Optional[str]) - """ - try: - if not ol_id.startswith('OL'): - return False, "Invalid OpenLibrary ID format" - - # If this is a Work ID (ends with W), we can't query Volumes API - # Work IDs are abstract umbrella records, not specific editions - if ol_id.endswith('W'): - logger.debug(f"[UnifiedBookDownloader] Work ID {ol_id} - skipping Volumes API (not lendable)") - return False, "Work ID not supported by Volumes API (not a specific edition)" - - # If it ends with M, it's an Edition ID - proceed with query - if not ol_id.endswith('M'): - logger.debug(f"[UnifiedBookDownloader] Unknown ID type {ol_id} (not M or W)") - return False, "Invalid OpenLibrary ID type" - - url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}" - response = self.session.get(url, timeout=10) - response.raise_for_status() - data = response.json() - - # Empty response means no records found - if not data: - logger.debug(f"[UnifiedBookDownloader] Empty response for {ol_id}") - return False, "No availability data found" - - # The response is wrapped in OLID key - olid_key = f"OLID:{ol_id}" - if olid_key not in data: - logger.debug(f"[UnifiedBookDownloader] OLID key not found in response") - return False, "No availability data found" - - olid_data = data[olid_key] - - # Check items array for lendable status - if 'items' in olid_data and olid_data['items'] and len(olid_data['items']) > 0: - items = olid_data['items'] - - # Check the first item for lending status - first_item = items[0] - - # Handle both dict and string representations (PowerShell converts to string) - if isinstance(first_item, dict): - status = first_item.get('status', '') - else: - # String representation - check if 'lendable' is in it - status = str(first_item).lower() - - is_lendable = 'lendable' in str(status).lower() - - if is_lendable: - logger.info(f"[UnifiedBookDownloader] Book {ol_id} is lendable") - return True, "LENDABLE" - else: - status_str = status.get('status', 'NOT_LENDABLE') if isinstance(status, dict) else 'NOT_LENDABLE' - logger.debug(f"[UnifiedBookDownloader] Book {ol_id} is not lendable (status: {status_str})") - return False, status_str - else: - # No items array or empty - logger.debug(f"[UnifiedBookDownloader] No items found for {ol_id}") - return False, "Not available for lending" - - except requests.exceptions.Timeout: - logger.warning(f"[UnifiedBookDownloader] OpenLibrary API timeout for {ol_id}") - return False, "API timeout" - except Exception as e: - logger.debug(f"[UnifiedBookDownloader] Failed to check lendable status for {ol_id}: {e}") - return False, f"API error" - - - async def download_book(self, method: Dict[str, Any], output_dir: Optional[str] = None) -> Tuple[bool, str]: - """Download a book using the specified method. - - Args: - method: Download method dict from get_download_options() - output_dir: Directory to save the book - - Returns: - Tuple of (success: bool, message: str) - """ - output_dir = output_dir or self.output_dir or str(Path.home() / "Downloads") - method_type = method.get('type', '') - - logger.info(f"[UnifiedBookDownloader] Starting download with method: {method_type}") - - try: - if method_type == 'archive.org_direct': - return await self._download_archive_direct(method, output_dir) - - elif method_type == 'archive.org_borrow': - return await self._download_archive_borrow(method, output_dir) - - elif method_type == 'libgen': - return await self._download_libgen(method, output_dir) - - else: - return False, f"Unknown download method: {method_type}" - - except Exception as e: - logger.error(f"[UnifiedBookDownloader] Download error: {e}", exc_info=True) - return False, f"Download failed: {str(e)}" - - async def _download_archive_direct(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]: - """Download directly from Archive.org.""" - try: - pdf_url = method.get('pdf_url', '') - book_id = method.get('book_id', '') - - if not pdf_url: - return False, "No PDF URL available" - - # Determine output filename - filename = f"{book_id}.pdf" - output_path = Path(output_dir) / filename - - logger.info(f"[UnifiedBookDownloader] Downloading PDF from: {pdf_url}") - - # Download in a thread to avoid blocking - loop = asyncio.get_event_loop() - success = await loop.run_in_executor( - None, - self._download_file, - pdf_url, - str(output_path) - ) - - if success: - logger.info(f"[UnifiedBookDownloader] Successfully downloaded to: {output_path}") - return True, f"Downloaded to: {output_path}" - else: - return False, "Failed to download PDF" - - except Exception as e: - logger.error(f"[UnifiedBookDownloader] Archive direct download error: {e}") - return False, f"Archive download failed: {str(e)}" - - async def _download_archive_borrow(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]: - """Download via Archive.org borrowing (requires credentials). - - Process (follows archive_client.py pattern): - 1. Login to Archive.org with credentials - 2. Call loan endpoint to borrow the book (14-day loan) - 3. Get book info (page links, metadata) - 4. Download all pages as images - 5. Merge images into PDF - - The loan function from archive_client.py handles: - - Checking if book needs borrowing (status 400 = "doesn't need to be borrowed") - - Creating borrow token for access - - Handling borrow failures - - get_book_infos() extracts page links from the borrowed book viewer - download() downloads all pages using thread pool - img2pdf merges pages into searchable PDF - """ - try: - from API.archive_client import credential_openlibrary - - book_id = method.get('book_id', '') - - # Get credentials - email, password = credential_openlibrary(self.config) - if not email or not password: - return False, "Archive.org credentials not configured" - - logger.info(f"[UnifiedBookDownloader] Logging into Archive.org...") - - # Login and borrow (in thread, following download_book.py pattern) - loop = asyncio.get_event_loop() - borrow_result = await loop.run_in_executor( - None, - self._archive_borrow_and_download, - email, - password, - book_id, - output_dir - ) - - if borrow_result and isinstance(borrow_result, tuple): - success, filepath = borrow_result - if success: - logger.info(f"[UnifiedBookDownloader] Borrow succeeded: {filepath}") - return True, filepath - else: - logger.warning(f"[UnifiedBookDownloader] Borrow failed: {filepath}") - return False, filepath - else: - return False, "Failed to borrow book from Archive.org" - - except Exception as e: - logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}") - return False, f"Archive borrow failed: {str(e)}" - - async def _download_libgen(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]: - """Download via Libgen search and download with mirror fallback.""" - try: - isbn = method.get('isbn', '') - title = method.get('title', '') - - if not isbn and not title: - return False, "Need ISBN or title for Libgen search" - - if not self.search_libgen: - return False, "Libgen searcher not available" - - # Define wrapper functions to safely call the methods - search_func = self.search_libgen - if search_func is None: - return False, "Search function not available" - - preloaded_results = method.get('results') - loop = asyncio.get_event_loop() - - if preloaded_results: - results = list(preloaded_results) - if not results: - results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10)) - else: - results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10)) - - if not results: - logger.warning(f"[UnifiedBookDownloader] No Libgen results for: {isbn or title}") - return False, f"No Libgen results found for: {isbn or title}" - - logger.info(f"[UnifiedBookDownloader] Found {len(results)} Libgen results") - - # Determine output filename (use first result for naming) - first_result = results[0] - filename = f"{first_result.get('title', 'book')}" - filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100] - - # Try each result's mirror until one succeeds - for idx, result in enumerate(results, 1): - mirror_url = result.get('mirror_url', '') - - if not mirror_url: - logger.debug(f"[UnifiedBookDownloader] Result {idx}: No mirror URL") - continue - - # Use extension from this result if available - extension = result.get('extension', 'pdf') - if extension and not extension.startswith('.'): - extension = f".{extension}" - elif not extension: - extension = '.pdf' - - output_path = Path(output_dir) / (filename + extension) - - logger.info(f"[UnifiedBookDownloader] Trying mirror {idx}/{len(results)}: {mirror_url}") - - download_func = self.download_from_mirror - if download_func is None: - return False, "Download function not available" - - download_callable = cast(Callable[[str, str], Tuple[bool, Optional[Path]]], download_func) - - def download_wrapper(): - return download_callable(mirror_url, str(output_path)) - - # Download (in thread) - try: - success, downloaded_path = await loop.run_in_executor(None, download_wrapper) - - if success: - dest_path = Path(downloaded_path) if downloaded_path else output_path - # Validate downloaded file is not HTML (common Libgen issue) - if dest_path.exists(): - try: - with open(dest_path, 'rb') as f: - file_start = f.read(1024).decode('utf-8', errors='ignore').lower() - if ' Tuple[bool, str]: - """Download a specific Libgen result with optional fallbacks.""" - - if not isinstance(selected, dict): - return False, "Selected result must be a dictionary" - - ordered_results: List[Dict[str, Any]] = [selected] - if remaining: - for item in remaining: - if isinstance(item, dict) and item is not selected: - ordered_results.append(item) - - method: Dict[str, Any] = { - 'type': 'libgen', - 'isbn': selected.get('isbn', '') or '', - 'title': selected.get('title', '') or '', - 'author': selected.get('author', '') or '', - 'results': ordered_results, - } - - return await self.download_book(method, output_dir) - - def download_libgen_selection_sync( - self, - selected: Dict[str, Any], - remaining: Optional[List[Dict[str, Any]]] = None, - output_dir: Optional[str] = None, - ) -> Tuple[bool, str]: - """Synchronous helper for downloading a Libgen selection.""" - - async def _run() -> Tuple[bool, str]: - return await self.download_libgen_selection(selected, remaining, output_dir) - - loop = asyncio.new_event_loop() - try: - asyncio.set_event_loop(loop) - return loop.run_until_complete(_run()) - finally: - loop.close() - asyncio.set_event_loop(None) - - def _download_file(self, url: str, output_path: str) -> bool: - """Download a file from URL.""" - try: - response = requests.get(url, stream=True, timeout=30) - response.raise_for_status() - - with open(output_path, 'wb') as f: - for chunk in response.iter_content(chunk_size=8192): - if chunk: - f.write(chunk) - - return True - except Exception as e: - logger.error(f"[UnifiedBookDownloader] File download error: {e}") - return False - - def _archive_borrow_and_download(self, email: str, password: str, book_id: str, output_dir: str) -> Tuple[bool, str]: - """Borrow a book from Archive.org and download pages as PDF. - - This follows the exact process from archive_client.py: - 1. Login with credentials - 2. Call loan() to create 14-day borrow - 3. Get book info (extract page url) - 4. Download all pages as images - 5. Merge images into searchable PDF - - Returns tuple of (success: bool, filepath/message: str) - """ - try: - from API.archive_client import login, loan, get_book_infos, download - import tempfile - import shutil - - logger.info(f"[UnifiedBookDownloader] Logging into Archive.org as {email}") - session = login(email, password) - - logger.info(f"[UnifiedBookDownloader] Attempting to borrow book: {book_id}") - # Call loan to create the 14-day borrow - session = loan(session, book_id, verbose=True) - - # If we get here, borrowing succeeded - logger.info(f"[UnifiedBookDownloader] Successfully borrowed book: {book_id}") - - # Now get the book info (page url and metadata) - logger.info(f"[UnifiedBookDownloader] Extracting book page information...") - # Try both URL formats: with /borrow and without - book_url = [ - f"https://archive.org/borrow/{book_id}", # Try borrow page first (for borrowed books) - f"https://archive.org/details/{book_id}" # Fallback to details page - ] - - title = None - links = None - metadata = None - last_error = None - - for book_url in book_url: - try: - logger.debug(f"[UnifiedBookDownloader] Trying to get book info from: {book_url}") - response = session.get(book_url, timeout=10) - - # Log response status - if response.status_code != 200: - logger.debug(f"[UnifiedBookDownloader] URL returned {response.status_code}: {book_url}") - # Continue to try next URL - continue - - # Try to parse the response - title, links, metadata = get_book_infos(session, book_url) - logger.info(f"[UnifiedBookDownloader] Successfully got info from: {book_url}") - logger.info(f"[UnifiedBookDownloader] Found {len(links)} pages to download") - break - except Exception as e: - logger.debug(f"[UnifiedBookDownloader] Failed with {book_url}: {e}") - last_error = e - continue - - if links is None: - logger.error(f"[UnifiedBookDownloader] Failed to get book info from all url: {last_error}") - # Borrow extraction failed - return False - return False, "Could not extract borrowed book pages" - - # Create temporary directory for images - temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=output_dir) - logger.info(f"[UnifiedBookDownloader] Downloading {len(links)} pages to temporary directory...") - - try: - # Download all pages (uses thread pool) - images = download( - session=session, - n_threads=10, - directory=temp_dir, - links=links, - scale=3, # Default resolution - book_id=book_id - ) - - logger.info(f"[UnifiedBookDownloader] Downloaded {len(images)} pages") - - # Try to merge pages into PDF - try: - import img2pdf - logger.info(f"[UnifiedBookDownloader] Merging pages into PDF...") - - # Prepare PDF metadata - pdfmeta = {} - if metadata: - if "title" in metadata: - pdfmeta["title"] = metadata["title"] - if "creator" in metadata: - pdfmeta["author"] = metadata["creator"] - pdfmeta["keywords"] = [f"https://archive.org/details/{book_id}"] - pdfmeta["creationdate"] = None # Avoid timezone issues - - # Convert images to PDF - pdf_content = img2pdf.convert(images, **pdfmeta) if images else None - if not pdf_content: - logger.error(f"[UnifiedBookDownloader] PDF conversion failed") - return False, "Failed to convert pages to PDF" - - # Save the PDF - pdf_filename = f"{title}.pdf" if title else "book.pdf" - pdf_path = Path(output_dir) / pdf_filename - - # Handle duplicate filenames - i = 1 - while pdf_path.exists(): - pdf_path = Path(output_dir) / f"{title or 'book'}({i}).pdf" - i += 1 - - with open(pdf_path, 'wb') as f: - f.write(pdf_content) - - logger.info(f"[UnifiedBookDownloader] Successfully created PDF: {pdf_path}") - - return True, str(pdf_path) - - except ImportError: - logger.warning(f"[UnifiedBookDownloader] img2pdf not available, saving as JPG collection instead") - - # Create JPG collection directory - if not title: - title = f"book_{book_id}" - jpg_dir = Path(output_dir) / title - i = 1 - while jpg_dir.exists(): - jpg_dir = Path(output_dir) / f"{title}({i})" - i += 1 - - # Move temporary directory to final location - shutil.move(temp_dir, str(jpg_dir)) - temp_dir = None # Mark as already moved - - logger.info(f"[UnifiedBookDownloader] Saved as JPG collection: {jpg_dir}") - return True, str(jpg_dir) - - finally: - # Clean up temporary directory if it still exists - if temp_dir and Path(temp_dir).exists(): - shutil.rmtree(temp_dir) - - except SystemExit: - # loan() function calls sys.exit on failure - catch it - logger.error(f"[UnifiedBookDownloader] Borrow process exited (book may not be borrowable)") - return False, "Book could not be borrowed (may not be available for borrowing)" - except Exception as e: - logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}") - return False, f"Borrow failed: {str(e)}" - - def close(self) -> None: - """Close the session.""" - self.session.close() diff --git a/Provider/youtube.py b/Provider/youtube.py index b05a564..a308b1f 100644 --- a/Provider/youtube.py +++ b/Provider/youtube.py @@ -6,7 +6,7 @@ import subprocess import sys from typing import Any, Dict, List, Optional -from Provider._base import SearchProvider, SearchResult +from ProviderCore.base import SearchProvider, SearchResult from SYS.logger import log diff --git a/Provider/zeroxzero.py b/Provider/zeroxzero.py index 0a835ec..6cfc092 100644 --- a/Provider/zeroxzero.py +++ b/Provider/zeroxzero.py @@ -4,7 +4,7 @@ import os import sys from typing import Any -from Provider._base import FileProvider +from ProviderCore.base import FileProvider from SYS.logger import log diff --git a/ProviderCore/__init__.py b/ProviderCore/__init__.py new file mode 100644 index 0000000..55cf273 --- /dev/null +++ b/ProviderCore/__init__.py @@ -0,0 +1,5 @@ +"""Provider core modules. + +This package contains the provider framework (base types, registry, and shared helpers). +Concrete provider implementations live in the `Provider/` package. +""" diff --git a/Provider/_base.py b/ProviderCore/base.py similarity index 100% rename from Provider/_base.py rename to ProviderCore/base.py diff --git a/ProviderCore/download.py b/ProviderCore/download.py new file mode 100644 index 0000000..3cb68ed --- /dev/null +++ b/ProviderCore/download.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Optional + +import requests + + +def sanitize_filename(name: str, *, max_len: int = 150) -> str: + text = str(name or "").strip() + if not text: + return "download" + + forbidden = set('<>:"/\\|?*') + cleaned = "".join("_" if c in forbidden else c for c in text) + cleaned = " ".join(cleaned.split()).strip().strip(".") + if not cleaned: + cleaned = "download" + return cleaned[:max_len] + + +def download_file(url: str, output_path: Path, *, session: Optional[requests.Session] = None, timeout_s: float = 30.0) -> bool: + output_path = Path(output_path) + output_path.parent.mkdir(parents=True, exist_ok=True) + + s = session or requests.Session() + + try: + with s.get(url, stream=True, timeout=timeout_s) as resp: + resp.raise_for_status() + with open(output_path, "wb") as f: + for chunk in resp.iter_content(chunk_size=1024 * 256): + if chunk: + f.write(chunk) + return output_path.exists() and output_path.stat().st_size > 0 + except Exception: + try: + if output_path.exists(): + output_path.unlink() + except Exception: + pass + return False diff --git a/Provider/registry.py b/ProviderCore/registry.py similarity index 95% rename from Provider/registry.py rename to ProviderCore/registry.py index f957c89..f7f5b7a 100644 --- a/Provider/registry.py +++ b/ProviderCore/registry.py @@ -11,10 +11,11 @@ import sys from SYS.logger import log -from Provider._base import FileProvider, SearchProvider, SearchResult +from ProviderCore.base import FileProvider, SearchProvider, SearchResult from Provider.bandcamp import Bandcamp from Provider.libgen import Libgen from Provider.matrix import Matrix +from Provider.openlibrary import OpenLibrary from Provider.soulseek import Soulseek, download_soulseek_file from Provider.youtube import YouTube from Provider.zeroxzero import ZeroXZero @@ -22,6 +23,7 @@ from Provider.zeroxzero import ZeroXZero _SEARCH_PROVIDERS: Dict[str, Type[SearchProvider]] = { "libgen": Libgen, + "openlibrary": OpenLibrary, "soulseek": Soulseek, "bandcamp": Bandcamp, "youtube": YouTube, diff --git a/Store/Folder.py b/Store/Folder.py index 723723c..203ea04 100644 --- a/Store/Folder.py +++ b/Store/Folder.py @@ -943,6 +943,79 @@ class Folder(Store): debug(f"delete_url failed for local file: {exc}") return False + def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]: + """Get notes for a local file by hash.""" + from API.folder import API_folder_store + try: + if not self._location: + return {} + file_hash = str(file_identifier or "").strip().lower() + if not _normalize_hash(file_hash): + return {} + with API_folder_store(Path(self._location)) as db: + getter = getattr(db, "get_notes", None) + if callable(getter): + notes = getter(file_hash) + return notes if isinstance(notes, dict) else {} + # Fallback: default-only + note = db.get_note(file_hash) + return {"default": str(note or "")} if note else {} + except Exception as exc: + debug(f"get_note failed for local file: {exc}") + return {} + + def set_note(self, file_identifier: str, name: str, text: str, **kwargs: Any) -> bool: + """Set a named note for a local file by hash.""" + from API.folder import API_folder_store + try: + if not self._location: + return False + file_hash = str(file_identifier or "").strip().lower() + if not _normalize_hash(file_hash): + return False + + file_path = self.get_file(file_hash, **kwargs) + if not file_path or not isinstance(file_path, Path) or not file_path.exists(): + return False + + with API_folder_store(Path(self._location)) as db: + setter = getattr(db, "set_note", None) + if callable(setter): + setter(file_path, str(name), str(text)) + return True + db.save_note(file_path, str(text)) + return True + except Exception as exc: + debug(f"set_note failed for local file: {exc}") + return False + + def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool: + """Delete a named note for a local file by hash.""" + from API.folder import API_folder_store + try: + if not self._location: + return False + file_hash = str(file_identifier or "").strip().lower() + if not _normalize_hash(file_hash): + return False + with API_folder_store(Path(self._location)) as db: + deleter = getattr(db, "delete_note", None) + if callable(deleter): + deleter(file_hash, str(name)) + return True + # Default-only fallback + if str(name).strip().lower() == "default": + deleter2 = getattr(db, "save_note", None) + if callable(deleter2): + file_path = self.get_file(file_hash, **kwargs) + if file_path and isinstance(file_path, Path) and file_path.exists(): + deleter2(file_path, "") + return True + return False + except Exception as exc: + debug(f"delete_note failed for local file: {exc}") + return False + def delete_file(self, file_identifier: str, **kwargs: Any) -> bool: """Delete a file from the folder store. diff --git a/Store/HydrusNetwork.py b/Store/HydrusNetwork.py index 8d5e3ac..3dc3be2 100644 --- a/Store/HydrusNetwork.py +++ b/Store/HydrusNetwork.py @@ -437,7 +437,10 @@ class HydrusNetwork(Store): try: from API import HydrusNetwork as hydrus_wrapper - file_hash = str(file_identifier) + file_hash = str(file_identifier or "").strip().lower() + if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): + debug(f"get_tags: invalid file hash '{file_identifier}'") + return [], "unknown" # Get Hydrus client and service info client = self._client @@ -483,12 +486,17 @@ class HydrusNetwork(Store): if client is None: debug("add_tag: Hydrus client unavailable") return False + + file_hash = str(file_identifier or "").strip().lower() + if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): + debug(f"add_tag: invalid file hash '{file_identifier}'") + return False service_name = kwargs.get("service_name") or "my tags" # Ensure tags is a list tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] if not tag_list: return False - client.add_tag(file_identifier, tag_list, service_name) + client.add_tag(file_hash, tag_list, service_name) return True except Exception as exc: debug(f"Hydrus add_tag failed: {exc}") @@ -502,11 +510,16 @@ class HydrusNetwork(Store): if client is None: debug("delete_tag: Hydrus client unavailable") return False + + file_hash = str(file_identifier or "").strip().lower() + if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): + debug(f"delete_tag: invalid file hash '{file_identifier}'") + return False service_name = kwargs.get("service_name") or "my tags" tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] if not tag_list: return False - client.delete_tag(file_identifier, tag_list, service_name) + client.delete_tag(file_hash, tag_list, service_name) return True except Exception as exc: debug(f"Hydrus delete_tag failed: {exc}") @@ -520,7 +533,12 @@ class HydrusNetwork(Store): if client is None: debug("get_url: Hydrus client unavailable") return [] - payload = client.fetch_file_metadata(hashes=[str(file_identifier)], include_file_url=True) + + file_hash = str(file_identifier or "").strip().lower() + if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): + return [] + + payload = client.fetch_file_metadata(hashes=[file_hash], include_file_url=True) items = payload.get("metadata") if isinstance(payload, dict) else None if not isinstance(items, list) or not items: return [] @@ -561,6 +579,80 @@ class HydrusNetwork(Store): debug(f"Hydrus delete_url failed: {exc}") return False + def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]: + """Get notes for a Hydrus file (default note service only).""" + try: + client = self._client + if client is None: + debug("get_note: Hydrus client unavailable") + return {} + + file_hash = str(file_identifier or "").strip().lower() + if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): + return {} + + payload = client.fetch_file_metadata(hashes=[file_hash], include_notes=True) + items = payload.get("metadata") if isinstance(payload, dict) else None + if not isinstance(items, list) or not items: + return {} + meta = items[0] if isinstance(items[0], dict) else None + if not isinstance(meta, dict): + return {} + + notes_payload = meta.get("notes") + if isinstance(notes_payload, dict): + return {str(k): str(v or "") for k, v in notes_payload.items() if str(k).strip()} + + return {} + except Exception as exc: + debug(f"Hydrus get_note failed: {exc}") + return {} + + def set_note(self, file_identifier: str, name: str, text: str, **kwargs: Any) -> bool: + """Set a named note for a Hydrus file (default note service only).""" + try: + client = self._client + if client is None: + debug("set_note: Hydrus client unavailable") + return False + + file_hash = str(file_identifier or "").strip().lower() + if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): + return False + + note_name = str(name or "").strip() + if not note_name: + return False + note_text = str(text or "") + + client.set_notes(file_hash, {note_name: note_text}) + return True + except Exception as exc: + debug(f"Hydrus set_note failed: {exc}") + return False + + def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool: + """Delete a named note for a Hydrus file (default note service only).""" + try: + client = self._client + if client is None: + debug("delete_note: Hydrus client unavailable") + return False + + file_hash = str(file_identifier or "").strip().lower() + if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): + return False + + note_name = str(name or "").strip() + if not note_name: + return False + + client.delete_notes(file_hash, [note_name]) + return True + except Exception as exc: + debug(f"Hydrus delete_note failed: {exc}") + return False + @staticmethod def _extract_tags_from_hydrus_meta( meta: Dict[str, Any], diff --git a/Store/_base.py b/Store/_base.py index 9db16fb..1ff9e8d 100644 --- a/Store/_base.py +++ b/Store/_base.py @@ -53,3 +53,21 @@ class Store(ABC): @abstractmethod def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: raise NotImplementedError + + @abstractmethod + def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]: + """Get notes for a file. + + Returns a mapping of note name/key -> note text. + """ + raise NotImplementedError + + @abstractmethod + def set_note(self, file_identifier: str, name: str, text: str, **kwargs: Any) -> bool: + """Add or replace a named note for a file.""" + raise NotImplementedError + + @abstractmethod + def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool: + """Delete a named note for a file.""" + raise NotImplementedError diff --git a/TUI/modalscreen/download.py b/TUI/modalscreen/download.py index 2fa824d..8f8c998 100644 --- a/TUI/modalscreen/download.py +++ b/TUI/modalscreen/download.py @@ -24,9 +24,9 @@ import json # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent.parent)) -# Import cmdlets system to call get-tag +# Import cmdlet system to call get-tag try: - from cmdlets import get as get_cmdlet + from cmdlet import get as get_cmdlet except ImportError: get_cmdlet = None @@ -353,10 +353,10 @@ class DownloadModal(ModalScreen): # Import cmdlet system if not get_cmdlet: - logger.error("cmdlets module not available") + logger.error("cmdlet module not available") self.app.call_from_thread( self.app.notify, - "Cmdlets system unavailable", + "cmdlet system unavailable", title="Error", severity="error" ) @@ -1323,10 +1323,10 @@ class DownloadModal(ModalScreen): # Call get-tag cmdlet to scrape URL if not get_cmdlet: - logger.error("cmdlets module not available") + logger.error("cmdlet module not available") self.app.call_from_thread( self.app.notify, - "cmdlets module not available", + "cmdlet module not available", title="Error", severity="error" ) @@ -1563,13 +1563,13 @@ class DownloadModal(ModalScreen): """ # Import cmdlet system if not get_cmdlet: - error_msg = "cmdlets module not available" + error_msg = "cmdlet module not available" logger.error(error_msg) if worker: worker.append_stdout(f"❌ ERROR: {error_msg}\n") self.app.call_from_thread( self.app.notify, - "Cmdlets system unavailable", + "cmdlet system unavailable", title="Error", severity="error" ) diff --git a/TUI/modalscreen/search.py b/TUI/modalscreen/search.py index d293e5b..9a017e7 100644 --- a/TUI/modalscreen/search.py +++ b/TUI/modalscreen/search.py @@ -14,9 +14,9 @@ import asyncio # Add parent directory to path for imports sys.path.insert(0, str(Path(__file__).parent.parent)) -from config import load_config +from config import load_config, resolve_output_dir from result_table import ResultTable -from Provider.registry import get_search_provider +from ProviderCore.registry import get_search_provider logger = logging.getLogger(__name__) @@ -236,7 +236,7 @@ class SearchModal(ModalScreen): selected_row = self.results_table.cursor_row if 0 <= selected_row < len(self.current_results): result = self.current_results[selected_row] - if result.get("source") == "openlibrary": + if getattr(result, "table", "") == "openlibrary": asyncio.create_task(self._download_book(result)) else: logger.warning("[search-modal] Download only supported for OpenLibrary results") @@ -330,49 +330,29 @@ class SearchModal(ModalScreen): logger.info(f"[search-modal] Populated tags textarea from result") async def _download_book(self, result: Any) -> None: - """Download a book from OpenLibrary using unified downloader.""" + """Download a book from OpenLibrary using the provider.""" + if getattr(result, "table", "") != "openlibrary": + logger.warning("[search-modal] Download only supported for OpenLibrary results") + return + try: - from Provider.unified_book_downloader import UnifiedBookDownloader - from config import load_config - - # Convert SearchResult to dict if needed - if hasattr(result, 'to_dict'): - result_dict = result.to_dict() - # Ensure raw_data is populated for downloader - if 'raw_data' not in result_dict and result.full_metadata: - result_dict['raw_data'] = result.full_metadata - else: - result_dict = result - - logger.info(f"[search-modal] Starting download for: {result_dict.get('title')}") - config = load_config() - downloader = UnifiedBookDownloader(config=config) - - # Get download options for this book - options = downloader.get_download_options(result_dict) - - if not options['methods']: - logger.warning(f"[search-modal] No download methods available for: {result_dict.get('title')}") - # Could show a modal dialog here + output_dir = resolve_output_dir(config) + + provider = get_search_provider("openlibrary", config=config) + if not provider: + logger.error("[search-modal] Provider not available: openlibrary") return - - # For now, use the first available method (we could show a dialog to choose) - method = options['methods'][0] - logger.info(f"[search-modal] Using download method: {method.get('label')}") - - # Perform the download - success, message = await downloader.download_book(method) - - if success: - logger.info(f"[search-modal] Download successful: {message}") - # Could show success dialog + + title = getattr(result, "title", "") + logger.info(f"[search-modal] Starting download for: {title}") + + downloaded = await asyncio.to_thread(provider.download, result, output_dir) + if downloaded: + logger.info(f"[search-modal] Download successful: {downloaded}") else: - logger.warning(f"[search-modal] Download failed: {message}") - # Could show error dialog - - downloader.close() - + logger.warning(f"[search-modal] Download failed for: {title}") + except Exception as e: logger.error(f"[search-modal] Download error: {e}", exc_info=True) diff --git a/TUI/pipeline_runner.py b/TUI/pipeline_runner.py index aa67cca..86f3501 100644 --- a/TUI/pipeline_runner.py +++ b/TUI/pipeline_runner.py @@ -23,7 +23,7 @@ for path in (ROOT_DIR, BASE_DIR): sys.path.insert(0, str_path) import pipeline as ctx -from cmdlets import REGISTRY +from cmdlet import REGISTRY from config import get_local_storage_path, load_config from SYS.worker_manager import WorkerManager diff --git a/cli_syntax.py b/cli_syntax.py new file mode 100644 index 0000000..67d8ae1 --- /dev/null +++ b/cli_syntax.py @@ -0,0 +1,166 @@ +from __future__ import annotations + +from dataclasses import dataclass +from typing import Any, Dict, Optional + +import re + + +@dataclass(frozen=True) +class SyntaxErrorDetail: + message: str + expected: Optional[str] = None + + +def validate_pipeline_text(text: str) -> Optional[SyntaxErrorDetail]: + """Validate raw CLI input before tokenization/execution. + + This is intentionally lightweight and focuses on user-facing syntax issues: + - Unbalanced single/double quotes + - Dangling or empty pipeline stages (|) + + Returns: + None if valid, otherwise a SyntaxErrorDetail describing the issue. + """ + if text is None: + return SyntaxErrorDetail("Empty command") + + raw = text.strip() + if not raw: + return SyntaxErrorDetail("Empty command") + + in_single = False + in_double = False + escaped = False + last_pipe_outside_quotes: Optional[int] = None + + for idx, ch in enumerate(raw): + if escaped: + escaped = False + continue + + if ch == "\\" and (in_single or in_double): + escaped = True + continue + + if ch == '"' and not in_single: + in_double = not in_double + continue + + if ch == "'" and not in_double: + in_single = not in_single + continue + + if ch == "|" and not in_single and not in_double: + # Record pipe locations to catch empty stages/dangling pipe. + if last_pipe_outside_quotes is not None and last_pipe_outside_quotes == idx - 1: + return SyntaxErrorDetail("Syntax error: empty pipeline stage (found '||').") + last_pipe_outside_quotes = idx + + if in_double: + return SyntaxErrorDetail('Syntax error: missing closing ' + '"' + '.', expected='"') + if in_single: + return SyntaxErrorDetail("Syntax error: missing closing '.", expected="'") + + # Dangling pipe at end / pipe as first non-space character + if raw.startswith("|"): + return SyntaxErrorDetail("Syntax error: pipeline cannot start with '|'.") + if raw.endswith("|"): + return SyntaxErrorDetail("Syntax error: pipeline cannot end with '|'.") + + # Empty stage like "cmd1 | | cmd2" (spaces between pipes) + if "|" in raw: + # Simple pass: look for pipes that have only whitespace between them. + # We only check outside quotes by re-scanning and counting non-space chars between pipes. + in_single = False + in_double = False + escaped = False + seen_nonspace_since_pipe = True # start true to allow leading command + for ch in raw: + if escaped: + escaped = False + continue + if ch == "\\" and (in_single or in_double): + escaped = True + continue + if ch == '"' and not in_single: + in_double = not in_double + continue + if ch == "'" and not in_double: + in_single = not in_single + continue + if ch == "|" and not in_single and not in_double: + if not seen_nonspace_since_pipe: + return SyntaxErrorDetail("Syntax error: empty pipeline stage (use a command between '|').") + seen_nonspace_since_pipe = False + continue + if not in_single and not in_double and not ch.isspace(): + seen_nonspace_since_pipe = True + + return None + + +def parse_query(query: str) -> Dict[str, Any]: + """Parse a query string into field:value pairs and free text. + + Supports syntax like: + - isbn:0557677203 + - author:"Albert Pike" + - title:"Morals and Dogma" year:2010 + - Mixed with free text: Morals isbn:0557677203 + + Returns: + Dict with keys: + - fields: Dict[str, str] + - text: str + - raw: str + """ + + result: Dict[str, Any] = { + "fields": {}, + "text": "", + "raw": query, + } + + if not query or not query.strip(): + return result + + raw = query.strip() + remaining_parts: list[str] = [] + + # Match field:value where value is either a quoted string or a non-space token. + pattern = r'(\w+):(?:"([^"]*)"|(\S+))' + + pos = 0 + for match in re.finditer(pattern, raw): + if match.start() > pos: + before_text = raw[pos : match.start()].strip() + if before_text: + remaining_parts.append(before_text) + + field_name = (match.group(1) or "").lower() + field_value = match.group(2) if match.group(2) is not None else match.group(3) + if field_name: + result["fields"][field_name] = field_value + + pos = match.end() + + if pos < len(raw): + remaining_text = raw[pos:].strip() + if remaining_text: + remaining_parts.append(remaining_text) + + result["text"] = " ".join(remaining_parts) + return result + + +def get_field(parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None) -> Optional[str]: + """Get a field value from a parsed query.""" + + return parsed_query.get("fields", {}).get((field_name or "").lower(), default) + + +def get_free_text(parsed_query: Dict[str, Any]) -> str: + """Get the free-text portion of a parsed query.""" + + return str(parsed_query.get("text", "") or "") diff --git a/cmdlets/__init__.py b/cmdlet/__init__.py similarity index 64% rename from cmdlets/__init__.py rename to cmdlet/__init__.py index 400f037..260c4b9 100644 --- a/cmdlets/__init__.py +++ b/cmdlet/__init__.py @@ -10,6 +10,24 @@ Cmdlet = Callable[[Any, Sequence[str], Dict[str, Any]], int] REGISTRY: Dict[str, Cmdlet] = {} +def _normalize_cmd_name(name: str) -> str: + return str(name or "").replace('_', '-').lower().strip() + + +def register_callable(names: Iterable[str], fn: Cmdlet) -> Cmdlet: + """Register a callable under one or more command names. + + This is the single registration mechanism used by both: + - legacy function cmdlet (decorator form) + - class-based cmdlet (Cmdlet.register()) + """ + for name in names: + key = _normalize_cmd_name(name) + if key: + REGISTRY[key] = fn + return fn + + def register(names: Iterable[str]): """Decorator to register a function under one or more command names. @@ -18,18 +36,16 @@ def register(names: Iterable[str]): def _run(result, args, config) -> int: ... """ def _wrap(fn: Cmdlet) -> Cmdlet: - for name in names: - REGISTRY[name.replace('_', '-').lower()] = fn - return fn + return register_callable(names, fn) return _wrap def get(cmd_name: str) -> Cmdlet | None: - return REGISTRY.get(cmd_name.replace('_', '-').lower()) + return REGISTRY.get(_normalize_cmd_name(cmd_name)) # Dynamically import all cmdlet modules in this directory (ignore files starting with _ and __init__.py) -# Cmdlets self-register when instantiated via their __init__ method +# cmdlet self-register when instantiated via their __init__ method import os cmdlet_dir = os.path.dirname(__file__) for filename in os.listdir(cmdlet_dir): @@ -43,7 +59,7 @@ for filename in os.listdir(cmdlet_dir): mod_name = filename[:-3] # Enforce Powershell-style two-word cmdlet naming (e.g., add_file, get_file) - # Skip native/utility scripts that are not cmdlets (e.g., adjective, worker, matrix, pipe) + # Skip native/utility scripts that are not cmdlet (e.g., adjective, worker, matrix, pipe) if "_" not in mod_name: continue @@ -54,15 +70,15 @@ for filename in os.listdir(cmdlet_dir): print(f"Error importing cmdlet '{mod_name}': {e}", file=sys.stderr) continue -# Import and register native commands that are not considered cmdlets +# Import and register native commands that are not considered cmdlet try: - from cmdnats import register_native_commands as _register_native_commands + from cmdnat import register_native_commands as _register_native_commands _register_native_commands(REGISTRY) except Exception: # Native commands are optional; ignore if unavailable pass -# Import root-level modules that also register cmdlets +# Import root-level modules that also register cmdlet for _root_mod in ("select_cmdlet",): try: _import_module(_root_mod) @@ -70,7 +86,7 @@ for _root_mod in ("select_cmdlet",): # Allow missing optional modules continue -# Also import helper modules that register cmdlets +# Also import helper modules that register cmdlet try: import API.alldebrid as _alldebrid except Exception: diff --git a/cmdlets/_shared.py b/cmdlet/_shared.py similarity index 98% rename from cmdlets/_shared.py rename to cmdlet/_shared.py index 896ccfe..f633560 100644 --- a/cmdlets/_shared.py +++ b/cmdlet/_shared.py @@ -10,7 +10,7 @@ from collections.abc import Iterable as IterableABC from SYS.logger import log, debug from pathlib import Path -from typing import Any, Dict, Iterable, List, Optional, Sequence, Set +from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set from dataclasses import dataclass, field import models @@ -94,15 +94,15 @@ class CmdletArg: # ============================================================================ -# SHARED ARGUMENTS - Reusable argument definitions across cmdlets +# SHARED ARGUMENTS - Reusable argument definitions across cmdlet # ============================================================================ class SharedArgs: - """Registry of shared CmdletArg definitions used across multiple cmdlets. + """Registry of shared CmdletArg definitions used across multiple cmdlet. This class provides a centralized location for common arguments so they're defined once and used consistently everywhere. Reduces duplication and ensures - all cmdlets handle the same arguments identically. + all cmdlet handle the same arguments identically. Example: CMDLET = Cmdlet( @@ -367,8 +367,8 @@ class Cmdlet: """List of arguments accepted by this cmdlet""" detail: List[str] = field(default_factory=list) """Detailed explanation lines (for help text)""" - exec: Optional[Any] = field(default=None) - """The execution function: func(result, args, config) -> int""" + # Execution function: func(result, args, config) -> int + exec: Optional[Callable[[Any, Sequence[str], Dict[str, Any]], int]] = field(default=None) @@ -399,7 +399,7 @@ class Cmdlet: if not callable(self.exec): return self try: - from . import register as _register # Local import to avoid circular import cost + from . import register_callable as _register_callable # Local import to avoid circular import cost except Exception: return self @@ -407,7 +407,7 @@ class Cmdlet: if not names: return self - _register(names)(self.exec) + _register_callable(names, self.exec) return self def get_flags(self, arg_name: str) -> set[str]: @@ -599,8 +599,14 @@ def normalize_hash(hash_hex: Optional[str]) -> Optional[str]: """ if not isinstance(hash_hex, str): return None - text = hash_hex.strip() - return text.lower() if text else None + text = hash_hex.strip().lower() + if not text: + return None + if len(text) != 64: + return None + if not all(ch in "0123456789abcdef" for ch in text): + return None + return text def get_hash_for_operation(override_hash: Optional[str], result: Any, field_name: str = "hash") -> Optional[str]: @@ -669,7 +675,7 @@ def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any: Handles both dict.get(field) and getattr(obj, field) access patterns. Also handles lists by accessing the first element. For PipeObjects, checks the extra field as well. - Used throughout cmdlets to uniformly access fields from mixed types. + Used throughout cmdlet to uniformly access fields from mixed types. Args: obj: Dict, object, or list to extract from @@ -705,7 +711,7 @@ def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any: def should_show_help(args: Sequence[str]) -> bool: """Check if help flag was passed in arguments. - Consolidates repeated pattern of checking for help flags across cmdlets. + Consolidates repeated pattern of checking for help flags across cmdlet. Args: args: Command arguments to check @@ -1077,7 +1083,7 @@ def apply_preferred_title(tags: List[str], preferred: Optional[str]) -> List[str # ============================================================================ -# PIPEOBJECT UTILITIES (for chainable cmdlets and multi-action pipelines) +# PIPEOBJECT UTILITIES (for chainable cmdlet and multi-action pipelines) # ============================================================================ def create_pipe_object_result( @@ -1095,7 +1101,7 @@ def create_pipe_object_result( """Create a PipeObject-compatible result dict for pipeline chaining. This is a helper to emit results in the standard format that downstream - cmdlets can process (filter, tag, cleanup, etc.). + cmdlet can process (filter, tag, cleanup, etc.). Args: source: Source system (e.g., 'local', 'hydrus', 'download') @@ -1350,7 +1356,7 @@ def collapse_namespace_tags(tags: Optional[Iterable[Any]], namespace: str, prefe def collapse_namespace_tag(tags: Optional[Iterable[Any]], namespace: str, prefer: str = "last") -> list[str]: """Singular alias for collapse_namespace_tags. - Some cmdlets prefer the singular name; keep behavior centralized. + Some cmdlet prefer the singular name; keep behavior centralized. """ return collapse_namespace_tags(tags, namespace, prefer=prefer) @@ -1643,7 +1649,7 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod def register_url_with_local_library(pipe_obj: models.PipeObject, config: Dict[str, Any]) -> bool: """Register url with a file in the local library database. - This is called automatically by download cmdlets to ensure url are persisted + This is called automatically by download cmdlet to ensure url are persisted without requiring a separate add-url step in the pipeline. Args: diff --git a/cmdlets/add_file.py b/cmdlet/add_file.py similarity index 99% rename from cmdlets/add_file.py rename to cmdlet/add_file.py index de88ca6..358a393 100644 --- a/cmdlets/add_file.py +++ b/cmdlet/add_file.py @@ -350,7 +350,7 @@ class Add_File(Cmdlet): """Delegate URL handling to download-media cmdlet.""" log(f"Target is a URL, delegating to download-media: {url_str}", file=sys.stderr) # Reuse the globally-registered cmdlet instance to avoid duplicative registration - from cmdlets.download_media import CMDLET as dl_cmdlet + from cmdlet.download_media import CMDLET as dl_cmdlet dl_args = list(args) if args else [] # Add the URL to the argument list for download-media @@ -615,7 +615,7 @@ class Add_File(Cmdlet): """ try: import asyncio - from Provider.registry import download_soulseek_file + from ProviderCore.registry import download_soulseek_file from pathlib import Path # Extract metadata from result @@ -684,7 +684,7 @@ class Add_File(Cmdlet): delete_after: bool, ) -> int: """Handle uploading to a file provider (e.g. 0x0).""" - from Provider.registry import get_file_provider + from ProviderCore.registry import get_file_provider log(f"Uploading via {provider_name}: {media_path.name}", file=sys.stderr) diff --git a/cmdlet/add_note.py b/cmdlet/add_note.py new file mode 100644 index 0000000..446ef02 --- /dev/null +++ b/cmdlet/add_note.py @@ -0,0 +1,148 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any, Dict, Optional, Sequence +import sys + +from SYS.logger import log + +import pipeline as ctx +from ._shared import ( + Cmdlet, + CmdletArg, + SharedArgs, + normalize_hash, + parse_cmdlet_args, + normalize_result_input, + should_show_help, +) +from Store import Store +from SYS.utils import sha256_file + + +class Add_Note(Cmdlet): + def __init__(self) -> None: + super().__init__( + name="add-note", + summary="Add or set a named note on a file in a store.", + usage="add-note -store [-hash ] ", + alias=["set-note", "add_note"], + arg=[ + SharedArgs.STORE, + SharedArgs.HASH, + CmdletArg("name", type="string", required=True, description="The note name/key to set (e.g. 'comment', 'lyric')."), + CmdletArg("text", type="string", required=True, description="Note text/content to store.", variadic=True), + ], + detail=[ + "- Notes are stored via the selected store backend.", + "- For lyrics: store LRC text in a note named 'lyric'.", + ], + exec=self.run, + ) + # Populate dynamic store choices for autocomplete + try: + SharedArgs.STORE.choices = SharedArgs.get_store_choices(None) + except Exception: + pass + self.register() + + def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]: + resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash) + if resolved: + return resolved + + if raw_path: + try: + p = Path(str(raw_path)) + stem = p.stem + if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()): + return stem.lower() + if p.exists() and p.is_file(): + return sha256_file(p) + except Exception: + return None + return None + + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + if should_show_help(args): + log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}") + return 0 + + parsed = parse_cmdlet_args(args, self) + + store_override = parsed.get("store") + hash_override = parsed.get("hash") + note_name = str(parsed.get("name") or "").strip() + text_parts = parsed.get("text") + + if not note_name: + log("[add_note] Error: Requires ", file=sys.stderr) + return 1 + + if isinstance(text_parts, list): + note_text = " ".join([str(p) for p in text_parts]).strip() + else: + note_text = str(text_parts or "").strip() + + if not note_text: + log("[add_note] Error: Empty note text", file=sys.stderr) + return 1 + + results = normalize_result_input(result) + if not results: + if store_override and normalize_hash(hash_override): + results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}] + else: + log("[add_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr) + return 1 + + store_registry = Store(config) + updated = 0 + + for res in results: + if not isinstance(res, dict): + ctx.emit(res) + continue + + store_name = str(store_override or res.get("store") or "").strip() + raw_hash = res.get("hash") + raw_path = res.get("path") + + if not store_name: + log("[add_note] Error: Missing -store and item has no store field", file=sys.stderr) + return 1 + + resolved_hash = self._resolve_hash( + raw_hash=str(raw_hash) if raw_hash else None, + raw_path=str(raw_path) if raw_path else None, + override_hash=str(hash_override) if hash_override else None, + ) + if not resolved_hash: + log("[add_note] Warning: Item missing usable hash; skipping", file=sys.stderr) + ctx.emit(res) + continue + + try: + backend = store_registry[store_name] + except Exception as exc: + log(f"[add_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr) + return 1 + + ok = False + try: + ok = bool(backend.set_note(resolved_hash, note_name, note_text, config=config)) + except Exception as exc: + log(f"[add_note] Error: Failed to set note: {exc}", file=sys.stderr) + ok = False + + if ok: + updated += 1 + + ctx.emit(res) + + log(f"[add_note] Updated {updated} item(s)", file=sys.stderr) + return 0 if updated > 0 else 1 + + +CMDLET = Add_Note() + diff --git a/cmdlets/add_relationship.py b/cmdlet/add_relationship.py similarity index 95% rename from cmdlets/add_relationship.py rename to cmdlet/add_relationship.py index f0f4703..13e71dd 100644 --- a/cmdlets/add_relationship.py +++ b/cmdlet/add_relationship.py @@ -10,7 +10,6 @@ import sys from SYS.logger import log -from . import register import models import pipeline as ctx from API import HydrusNetwork as hydrus_wrapper @@ -144,10 +143,18 @@ def _resolve_king_reference(king_arg: str) -> Optional[str]: def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None: """If the current subject matches the target, refresh relationships via get-relationship.""" try: - from cmdlets import get_relationship as get_rel_cmd # type: ignore + from cmdlet import get as get_cmdlet # type: ignore except Exception: return + get_relationship = None + try: + get_relationship = get_cmdlet("get-relationship") + except Exception: + get_relationship = None + if not callable(get_relationship): + return + try: subject = ctx.get_last_result_subject() if subject is None: @@ -179,12 +186,11 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat refresh_args: list[str] = [] if target_hash: refresh_args.extend(["-hash", target_hash]) - get_rel_cmd._run(subject, refresh_args, config) + get_relationship(subject, refresh_args, config) except Exception: pass -@register(["add-relationship", "add-rel"]) # primary name and alias def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: """Associate file relationships in Hydrus. @@ -196,24 +202,21 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: """ # Help if should_show_help(_args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}") return 0 # Parse arguments using CMDLET spec parsed = parse_cmdlet_args(_args, CMDLET) arg_path: Optional[Path] = None - king_arg = parsed.get("king") # New: explicit king argument - rel_type = parsed.get("type", "alt") # New: relationship type (default: alt) - - if parsed: - # Get the first arg value (e.g., -path) - first_arg_name = CMDLET.get("args", [{}])[0].get("name") if CMDLET.get("args") else None - if first_arg_name and first_arg_name in parsed: - arg_value = parsed[first_arg_name] - try: - arg_path = Path(str(arg_value)).expanduser() - except Exception: - arg_path = Path(str(arg_value)) + king_arg = parsed.get("king") + rel_type = parsed.get("type", "alt") + + raw_path = parsed.get("path") + if raw_path: + try: + arg_path = Path(str(raw_path)).expanduser() + except Exception: + arg_path = Path(str(raw_path)) # Handle @N selection which creates a list # Use normalize_result_input to handle both single items and lists @@ -481,3 +484,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: return 1 +# Register cmdlet (no legacy decorator) +CMDLET.exec = _run +CMDLET.alias = ["add-rel"] +CMDLET.register() + + diff --git a/cmdlets/add_tag.py b/cmdlet/add_tag.py similarity index 91% rename from cmdlets/add_tag.py rename to cmdlet/add_tag.py index f0381a7..a1c0066 100644 --- a/cmdlets/add_tag.py +++ b/cmdlet/add_tag.py @@ -66,21 +66,37 @@ def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None: res["columns"] = updated -def _matches_target(item: Any, target_hash: Optional[str], target_path: Optional[str]) -> bool: - """Determine whether a result item refers to the given hash/path target (canonical fields only).""" +def _matches_target( + item: Any, + target_hash: Optional[str], + target_path: Optional[str], + target_store: Optional[str] = None, +) -> bool: + """Determine whether a result item refers to the given target. + + Important: hashes can collide across backends in this app's UX (same media in + multiple stores). When target_store is provided, it must match too. + """ def norm(val: Any) -> Optional[str]: return str(val).lower() if val is not None else None target_hash_l = target_hash.lower() if target_hash else None target_path_l = target_path.lower() if target_path else None + target_store_l = target_store.lower() if target_store else None if isinstance(item, dict): hashes = [norm(item.get("hash"))] paths = [norm(item.get("path"))] + stores = [norm(item.get("store"))] else: hashes = [norm(get_field(item, "hash"))] paths = [norm(get_field(item, "path"))] + stores = [norm(get_field(item, "store"))] + + if target_store_l: + if target_store_l not in stores: + return False if target_hash_l and target_hash_l in hashes: return True @@ -118,7 +134,12 @@ def _update_item_title_fields(item: Any, new_title: str) -> None: item["columns"] = updated_cols -def _refresh_result_table_title(new_title: str, target_hash: Optional[str], target_path: Optional[str]) -> None: +def _refresh_result_table_title( + new_title: str, + target_hash: Optional[str], + target_store: Optional[str], + target_path: Optional[str], +) -> None: """Refresh the cached result table with an updated title and redisplay it.""" try: last_table = ctx.get_last_result_table() @@ -130,7 +151,7 @@ def _refresh_result_table_title(new_title: str, target_hash: Optional[str], targ match_found = False for item in items: try: - if _matches_target(item, target_hash, target_path): + if _matches_target(item, target_hash, target_path, target_store): _update_item_title_fields(item, new_title) match_found = True except Exception: @@ -154,7 +175,7 @@ def _refresh_result_table_title(new_title: str, target_hash: Optional[str], targ def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional[str], target_path: Optional[str], config: Dict[str, Any]) -> None: """Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh.""" try: - from cmdlets import get_tag as get_tag_cmd # type: ignore + from cmdlet import get as get_cmdlet # type: ignore except Exception: return @@ -163,16 +184,24 @@ def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional refresh_args: List[str] = ["-hash", target_hash, "-store", store_name] + get_tag = None + try: + get_tag = get_cmdlet("get-tag") + except Exception: + get_tag = None + if not callable(get_tag): + return + try: subject = ctx.get_last_result_subject() - if subject and _matches_target(subject, target_hash, target_path): - get_tag_cmd._run(subject, refresh_args, config) + if subject and _matches_target(subject, target_hash, target_path, store_name): + get_tag(subject, refresh_args, config) return except Exception: pass try: - get_tag_cmd._run(res, refresh_args, config) + get_tag(res, refresh_args, config) except Exception: pass @@ -187,12 +216,12 @@ class Add_Tag(Cmdlet): summary="Add tag to a file in a store.", usage="add-tag -store [-hash ] [-duplicate ] [-list [,...]] [--all] [,...]", arg=[ + CmdletArg("tag", type="string", required=False, description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.", variadic=True), SharedArgs.HASH, SharedArgs.STORE, CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"), CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."), CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tag non-temporary files)."), - CmdletArg("tag", type="string", required=False, description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.", variadic=True), ], detail=[ "- By default, only tag non-temporary files (from pipelines). Use --all to tag everything.", @@ -406,15 +435,19 @@ class Add_Tag(Cmdlet): changed = False if removed_namespace_tag: try: - backend.delete_tag(resolved_hash, removed_namespace_tag, config=config) - changed = True + ok_del = backend.delete_tag(resolved_hash, removed_namespace_tag, config=config) + if ok_del: + changed = True except Exception as exc: log(f"[add_tag] Warning: Failed deleting namespace tag: {exc}", file=sys.stderr) if actual_tag_to_add: try: - backend.add_tag(resolved_hash, actual_tag_to_add, config=config) - changed = True + ok_add = backend.add_tag(resolved_hash, actual_tag_to_add, config=config) + if ok_add: + changed = True + else: + log("[add_tag] Warning: Store rejected tag update", file=sys.stderr) except Exception as exc: log(f"[add_tag] Warning: Failed adding tag: {exc}", file=sys.stderr) @@ -438,7 +471,7 @@ class Add_Tag(Cmdlet): _apply_title_to_result(res, final_title) if final_title and (not original_title or final_title.lower() != original_title.lower()): - _refresh_result_table_title(final_title, resolved_hash, raw_path) + _refresh_result_table_title(final_title, resolved_hash, str(store_name), raw_path) if changed: _refresh_tag_view(res, resolved_hash, str(store_name), raw_path, config) diff --git a/cmdlet/add_tags.py b/cmdlet/add_tags.py new file mode 100644 index 0000000..6f30ba6 --- /dev/null +++ b/cmdlet/add_tags.py @@ -0,0 +1,456 @@ +from __future__ import annotations + +from typing import Any, Dict, List, Sequence, Optional +from pathlib import Path +import sys + +from SYS.logger import log + +import models +import pipeline as ctx +from ._shared import normalize_result_input, filter_results_by_temp +from ._shared import ( + Cmdlet, + CmdletArg, + SharedArgs, + normalize_hash, + parse_tag_arguments, + expand_tag_groups, + parse_cmdlet_args, + collapse_namespace_tags, + should_show_help, + get_field, +) +from Store import Store +from SYS.utils import sha256_file + + +def _extract_title_tag(tags: List[str]) -> Optional[str]: + """Return the value of the first title: tag if present.""" + for tag in tags: + if isinstance(tag, str) and tag.lower().startswith("title:"): + value = tag.split(":", 1)[1].strip() + if value: + return value + return None + + +def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None: + """Update result object/dict title fields and columns in-place.""" + if not title_value: + return + if isinstance(res, models.PipeObject): + res.title = title_value + # Update columns if present (Title column assumed index 0) + if hasattr(res, "columns") and isinstance(res.columns, list) and res.columns: + label, *_ = res.columns[0] + if str(label).lower() == "title": + res.columns[0] = (res.columns[0][0], title_value) + elif isinstance(res, dict): + res["title"] = title_value + cols = res.get("columns") + if isinstance(cols, list): + updated = [] + changed = False + for col in cols: + if isinstance(col, tuple) and len(col) == 2: + label, val = col + if str(label).lower() == "title": + updated.append((label, title_value)) + changed = True + else: + updated.append(col) + else: + updated.append(col) + if changed: + res["columns"] = updated + + +def _matches_target(item: Any, target_hash: Optional[str], target_path: Optional[str]) -> bool: + """Determine whether a result item refers to the given hash/path target (canonical fields only).""" + + def norm(val: Any) -> Optional[str]: + return str(val).lower() if val is not None else None + + target_hash_l = target_hash.lower() if target_hash else None + target_path_l = target_path.lower() if target_path else None + + if isinstance(item, dict): + hashes = [norm(item.get("hash"))] + paths = [norm(item.get("path"))] + else: + hashes = [norm(get_field(item, "hash"))] + paths = [norm(get_field(item, "path"))] + + if target_hash_l and target_hash_l in hashes: + return True + if target_path_l and target_path_l in paths: + return True + return False + + +def _update_item_title_fields(item: Any, new_title: str) -> None: + """Mutate an item to reflect a new title in plain fields and columns.""" + if isinstance(item, models.PipeObject): + item.title = new_title + if hasattr(item, "columns") and isinstance(item.columns, list) and item.columns: + label, *_ = item.columns[0] + if str(label).lower() == "title": + item.columns[0] = (label, new_title) + elif isinstance(item, dict): + item["title"] = new_title + cols = item.get("columns") + if isinstance(cols, list): + updated_cols = [] + changed = False + for col in cols: + if isinstance(col, tuple) and len(col) == 2: + label, val = col + if str(label).lower() == "title": + updated_cols.append((label, new_title)) + changed = True + else: + updated_cols.append(col) + else: + updated_cols.append(col) + if changed: + item["columns"] = updated_cols + + +def _refresh_result_table_title(new_title: str, target_hash: Optional[str], target_path: Optional[str]) -> None: + """Refresh the cached result table with an updated title and redisplay it.""" + try: + last_table = ctx.get_last_result_table() + items = ctx.get_last_result_items() + if not last_table or not items: + return + + updated_items = [] + match_found = False + for item in items: + try: + if _matches_target(item, target_hash, target_path): + _update_item_title_fields(item, new_title) + match_found = True + except Exception: + pass + updated_items.append(item) + if not match_found: + return + + from result_table import ResultTable # Local import to avoid circular dependency + + new_table = last_table.copy_with_title(getattr(last_table, "title", "")) + + for item in updated_items: + new_table.add_result(item) + + # Keep the underlying history intact; update only the overlay so @.. can + # clear the overlay then continue back to prior tables (e.g., the search list). + ctx.set_last_result_table_overlay(new_table, updated_items) + except Exception: + pass + + +def _refresh_tags_view(res: Any, target_hash: Optional[str], store_name: Optional[str], target_path: Optional[str], config: Dict[str, Any]) -> None: + """Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh.""" + try: + from cmdlet import get_tag as get_tag_cmd # type: ignore + except Exception: + return + + if not target_hash or not store_name: + return + + refresh_args: List[str] = ["-hash", target_hash, "-store", store_name] + + try: + subject = ctx.get_last_result_subject() + if subject and _matches_target(subject, target_hash, target_path): + get_tag_cmd._run(subject, refresh_args, config) + return + except Exception: + pass + + try: + get_tag_cmd._run(res, refresh_args, config) + except Exception: + pass + + + +class Add_Tag(Cmdlet): + """Class-based add-tags cmdlet with Cmdlet metadata inheritance.""" + + def __init__(self) -> None: + super().__init__( + name="add-tags", + summary="Add tags to a file in a store.", + usage="add-tags -store [-hash ] [-duplicate ] [-list [,...]] [--all] [,...]", + arg=[ + SharedArgs.HASH, + SharedArgs.STORE, + CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"), + CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."), + CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."), + CmdletArg("tags", type="string", required=False, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tags from pipeline payload.", variadic=True), + ], + detail=[ + "- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.", + "- Requires a store backend: use -store or pipe items that include store.", + "- If -hash is not provided, uses the piped item's hash (or derives from its path when possible).", + "- Multiple tags can be comma-separated or space-separated.", + "- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult", + "- Tags can also reference lists with curly braces: add-tags {philosophy} \"other:tag\"", + "- Use -duplicate to copy EXISTING tag values to new namespaces:", + " Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)", + " Inferred format: -duplicate title,album,artist (first is source, rest are targets)", + "- The source namespace must already exist in the file being tagged.", + "- Target namespaces that already have a value are skipped (not overwritten).", + "- You can also pass the target hash as a tag token: hash:. This overrides -hash and is removed from the tag list.", + ], + exec=self.run, + ) + self.register() + + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Add tags to a file with smart filtering for pipeline results.""" + if should_show_help(args): + log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}") + return 0 + + # Parse arguments + parsed = parse_cmdlet_args(args, self) + + # Check for --all flag + include_temp = parsed.get("all", False) + + # Normalize input to list + results = normalize_result_input(result) + + # Filter by temp status (unless --all is set) + if not include_temp: + results = filter_results_by_temp(results, include_temp=False) + + if not results: + log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr) + return 1 + + # Get tags from arguments (or fallback to pipeline payload) + raw_tags = parsed.get("tags", []) + if isinstance(raw_tags, str): + raw_tags = [raw_tags] + + # Fallback: if no tags provided explicitly, try to pull from first result payload + if not raw_tags and results: + first = results[0] + payload_tags = None + + # Try multiple tag lookup strategies in order + tag_lookups = [ + lambda x: getattr(x, "tags", None), + lambda x: x.get("tags") if isinstance(x, dict) else None, + ] + + for lookup in tag_lookups: + try: + payload_tags = lookup(first) + if payload_tags: + break + except (AttributeError, TypeError, KeyError): + continue + + if payload_tags: + if isinstance(payload_tags, str): + raw_tags = [payload_tags] + elif isinstance(payload_tags, list): + raw_tags = payload_tags + + # Handle -list argument (convert to {list} syntax) + list_arg = parsed.get("list") + if list_arg: + for l in list_arg.split(','): + l = l.strip() + if l: + raw_tags.append(f"{{{l}}}") + + # Parse and expand tags + tags_to_add = parse_tag_arguments(raw_tags) + tags_to_add = expand_tag_groups(tags_to_add) + + # Allow hash override via namespaced token (e.g., "hash:abcdef...") + extracted_hash = None + filtered_tags: List[str] = [] + for tag in tags_to_add: + if isinstance(tag, str) and tag.lower().startswith("hash:"): + _, _, hash_val = tag.partition(":") + if hash_val: + extracted_hash = normalize_hash(hash_val.strip()) + continue + filtered_tags.append(tag) + tags_to_add = filtered_tags + + if not tags_to_add: + log("No tags provided to add", file=sys.stderr) + return 1 + + # Get other flags (hash override can come from -hash or hash: token) + hash_override = normalize_hash(parsed.get("hash")) or extracted_hash + duplicate_arg = parsed.get("duplicate") + + # Tags ARE provided - apply them to each store-backed result + total_added = 0 + total_modified = 0 + + store_override = parsed.get("store") + + for res in results: + store_name: Optional[str] + raw_hash: Optional[str] + raw_path: Optional[str] + + if isinstance(res, models.PipeObject): + store_name = store_override or res.store + raw_hash = res.hash + raw_path = res.path + elif isinstance(res, dict): + store_name = store_override or res.get("store") + raw_hash = res.get("hash") + raw_path = res.get("path") + else: + ctx.emit(res) + continue + + if not store_name: + log("[add_tags] Error: Missing -store and item has no store field", file=sys.stderr) + return 1 + + resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash) + if not resolved_hash and raw_path: + try: + p = Path(str(raw_path)) + stem = p.stem + if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()): + resolved_hash = stem.lower() + elif p.exists() and p.is_file(): + resolved_hash = sha256_file(p) + except Exception: + resolved_hash = None + + if not resolved_hash: + log("[add_tags] Warning: Item missing usable hash (and could not derive from path); skipping", file=sys.stderr) + ctx.emit(res) + continue + + try: + backend = Store(config)[str(store_name)] + except Exception as exc: + log(f"[add_tags] Error: Unknown store '{store_name}': {exc}", file=sys.stderr) + return 1 + + try: + existing_tags, _src = backend.get_tag(resolved_hash, config=config) + except Exception: + existing_tags = [] + + existing_tags_list = [t for t in (existing_tags or []) if isinstance(t, str)] + existing_lower = {t.lower() for t in existing_tags_list} + original_title = _extract_title_tag(existing_tags_list) + + # Per-item tag list (do not mutate shared list) + item_tags_to_add = list(tags_to_add) + item_tags_to_add = collapse_namespace_tags(item_tags_to_add, "title", prefer="last") + + # Handle -duplicate logic (copy existing tags to new namespaces) + if duplicate_arg: + parts = str(duplicate_arg).split(':') + source_ns = "" + targets: list[str] = [] + + if len(parts) > 1: + source_ns = parts[0] + targets = [t.strip() for t in parts[1].split(',') if t.strip()] + else: + parts2 = str(duplicate_arg).split(',') + if len(parts2) > 1: + source_ns = parts2[0] + targets = [t.strip() for t in parts2[1:] if t.strip()] + + if source_ns and targets: + source_prefix = source_ns.lower() + ":" + for t in existing_tags_list: + if not t.lower().startswith(source_prefix): + continue + value = t.split(":", 1)[1] + for target_ns in targets: + new_tag = f"{target_ns}:{value}" + if new_tag.lower() not in existing_lower: + item_tags_to_add.append(new_tag) + + # Namespace replacement: delete old namespace:* when adding namespace:value + removed_namespace_tags: list[str] = [] + for new_tag in item_tags_to_add: + if not isinstance(new_tag, str) or ":" not in new_tag: + continue + ns = new_tag.split(":", 1)[0].strip() + if not ns: + continue + ns_prefix = ns.lower() + ":" + for t in existing_tags_list: + if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower(): + removed_namespace_tags.append(t) + + removed_namespace_tags = sorted({t for t in removed_namespace_tags}) + + actual_tags_to_add = [t for t in item_tags_to_add if isinstance(t, str) and t.lower() not in existing_lower] + + changed = False + if removed_namespace_tags: + try: + backend.delete_tag(resolved_hash, removed_namespace_tags, config=config) + changed = True + except Exception as exc: + log(f"[add_tags] Warning: Failed deleting namespace tags: {exc}", file=sys.stderr) + + if actual_tags_to_add: + try: + backend.add_tag(resolved_hash, actual_tags_to_add, config=config) + changed = True + except Exception as exc: + log(f"[add_tags] Warning: Failed adding tags: {exc}", file=sys.stderr) + + if changed: + total_added += len(actual_tags_to_add) + total_modified += 1 + + try: + refreshed_tags, _src2 = backend.get_tag(resolved_hash, config=config) + refreshed_list = [t for t in (refreshed_tags or []) if isinstance(t, str)] + except Exception: + refreshed_list = existing_tags_list + + # Update the result's tags using canonical field + if isinstance(res, models.PipeObject): + res.tags = refreshed_list + elif isinstance(res, dict): + res["tags"] = refreshed_list + + final_title = _extract_title_tag(refreshed_list) + _apply_title_to_result(res, final_title) + + if final_title and (not original_title or final_title.lower() != original_title.lower()): + _refresh_result_table_title(final_title, resolved_hash, raw_path) + + if changed: + _refresh_tags_view(res, resolved_hash, str(store_name), raw_path, config) + + ctx.emit(res) + + log( + f"[add_tags] Added {total_added} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)", + file=sys.stderr, + ) + return 0 + + +CMDLET = Add_Tag() \ No newline at end of file diff --git a/cmdlets/add_url.py b/cmdlet/add_url.py similarity index 100% rename from cmdlets/add_url.py rename to cmdlet/add_url.py diff --git a/cmdlets/catalog.py b/cmdlet/catalog.py similarity index 88% rename from cmdlets/catalog.py rename to cmdlet/catalog.py index 84a66ef..68a8f08 100644 --- a/cmdlets/catalog.py +++ b/cmdlet/catalog.py @@ -4,12 +4,12 @@ from importlib import import_module from typing import Any, Dict, List, Optional try: - from cmdlets import REGISTRY + from cmdlet import REGISTRY except Exception: REGISTRY = {} # type: ignore try: - from cmdnats import register_native_commands as _register_native_commands + from cmdnat import register_native_commands as _register_native_commands except Exception: _register_native_commands = None @@ -33,11 +33,11 @@ def _normalize_mod_name(mod_name: str) -> str: def import_cmd_module(mod_name: str): - """Import a cmdlet/native module from cmdnats or cmdlets packages.""" + """Import a cmdlet/native module from cmdnat or cmdlet packages.""" normalized = _normalize_mod_name(mod_name) if not normalized: return None - for package in ("cmdnats", "cmdlets", None): + for package in ("cmdnat", "cmdlet", None): try: qualified = f"{package}.{normalized}" if package else normalized return import_module(qualified) @@ -122,7 +122,7 @@ def get_cmdlet_metadata(cmd_name: str) -> Optional[Dict[str, Any]]: def list_cmdlet_metadata() -> Dict[str, Dict[str, Any]]: - """Collect metadata for all registered cmdlets keyed by canonical name.""" + """Collect metadata for all registered cmdlet keyed by canonical name.""" ensure_registry_loaded() entries: Dict[str, Dict[str, Any]] = {} for reg_name in (REGISTRY or {}).keys(): @@ -186,27 +186,20 @@ def get_cmdlet_arg_flags(cmd_name: str) -> List[str]: if not meta: return [] - raw = meta.get("raw") - if raw and hasattr(raw, "build_flag_registry"): - try: - registry = raw.build_flag_registry() - flags: List[str] = [] - for flag_set in registry.values(): - flags.extend(flag_set) - return sorted(set(flags)) - except Exception: - pass - + # Preserve the order that arguments are defined on the cmdlet (arg=[...]) so + # completions feel stable and predictable. flags: List[str] = [] + seen: set[str] = set() + for arg in meta.get("args", []): - name = arg.get("name") + name = str(arg.get("name") or "").strip().lstrip("-") if not name: continue - flags.append(f"-{name}") - flags.append(f"--{name}") - alias = arg.get("alias") - if alias: - flags.append(f"-{alias}") + for candidate in (f"-{name}", f"--{name}"): + if candidate not in seen: + flags.append(candidate) + seen.add(candidate) + return flags diff --git a/cmdlets/check_file_status.py b/cmdlet/check_file_status.py similarity index 96% rename from cmdlets/check_file_status.py rename to cmdlet/check_file_status.py index 9328436..aeb36f0 100644 --- a/cmdlets/check_file_status.py +++ b/cmdlet/check_file_status.py @@ -6,7 +6,6 @@ import sys from SYS.logger import log -from . import register from API import HydrusNetwork as hydrus_wrapper from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, should_show_help @@ -27,11 +26,10 @@ CMDLET = Cmdlet( ) -@register(["check-file-status", "check-status", "file-status", "status"]) def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Help if should_show_help(args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}") return 0 # Parse arguments @@ -148,3 +146,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: import traceback traceback.print_exc(file=sys.stderr) return 1 + + +# Register cmdlet (no legacy decorator) +CMDLET.exec = _run +CMDLET.alias = ["check-status", "file-status", "status"] +CMDLET.register() diff --git a/cmdlets/cleanup.py b/cmdlet/cleanup.py similarity index 97% rename from cmdlets/cleanup.py rename to cmdlet/cleanup.py index 8574d58..f147b2a 100644 --- a/cmdlets/cleanup.py +++ b/cmdlet/cleanup.py @@ -13,13 +13,10 @@ import json from SYS.logger import log -from . import register from ._shared import Cmdlet, CmdletArg, get_pipe_object_path, normalize_result_input, filter_results_by_temp, should_show_help import models import pipeline as pipeline_context - -@register(["cleanup"]) def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Remove temporary files from pipeline results. @@ -38,7 +35,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Help if should_show_help(args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}") return 0 # Normalize input to list @@ -103,5 +100,6 @@ CMDLET = Cmdlet( "- Typical usage at end of pipeline: ... | add-tag -store local \"tag\" --all | cleanup", "- Exit code 0 if cleanup successful, 1 if no results to process", ], -) + exec=_run, +).register() diff --git a/cmdlets/delete_file.py b/cmdlet/delete_file.py similarity index 100% rename from cmdlets/delete_file.py rename to cmdlet/delete_file.py diff --git a/cmdlet/delete_note.py b/cmdlet/delete_note.py new file mode 100644 index 0000000..1f52688 --- /dev/null +++ b/cmdlet/delete_note.py @@ -0,0 +1,140 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any, Dict, Optional, Sequence +import sys + +from SYS.logger import log + +import pipeline as ctx +from ._shared import ( + Cmdlet, + CmdletArg, + SharedArgs, + normalize_hash, + parse_cmdlet_args, + normalize_result_input, + get_field, + should_show_help, +) +from Store import Store +from SYS.utils import sha256_file + + +class Delete_Note(Cmdlet): + def __init__(self) -> None: + super().__init__( + name="delete-note", + summary="Delete a named note from a file in a store.", + usage="delete-note -store [-hash ] ", + alias=["del-note"], + arg=[ + SharedArgs.STORE, + SharedArgs.HASH, + CmdletArg("name", type="string", required=True, description="The note name/key to delete."), + ], + detail=[ + "- Deletes the named note from the selected store backend.", + ], + exec=self.run, + ) + try: + SharedArgs.STORE.choices = SharedArgs.get_store_choices(None) + except Exception: + pass + self.register() + + def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]: + resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash) + if resolved: + return resolved + if raw_path: + try: + p = Path(str(raw_path)) + stem = p.stem + if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()): + return stem.lower() + if p.exists() and p.is_file(): + return sha256_file(p) + except Exception: + return None + return None + + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + if should_show_help(args): + log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}") + return 0 + + parsed = parse_cmdlet_args(args, self) + + store_override = parsed.get("store") + hash_override = parsed.get("hash") + note_name_override = str(parsed.get("name") or "").strip() + # Allow piping note rows from get-note: the selected item carries note_name. + inferred_note_name = str(get_field(result, "note_name") or "").strip() + if not note_name_override and not inferred_note_name: + log("[delete_note] Error: Requires (or pipe a note row that provides note_name)", file=sys.stderr) + return 1 + + results = normalize_result_input(result) + if not results: + if store_override and normalize_hash(hash_override): + results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}] + else: + log("[delete_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr) + return 1 + + store_registry = Store(config) + deleted = 0 + + for res in results: + if not isinstance(res, dict): + ctx.emit(res) + continue + + # Resolve which note name to delete for this item. + note_name = note_name_override or str(res.get("note_name") or "").strip() or inferred_note_name + if not note_name: + log("[delete_note] Error: Missing note name (pass or pipe a note row)", file=sys.stderr) + return 1 + + store_name = str(store_override or res.get("store") or "").strip() + raw_hash = res.get("hash") + raw_path = res.get("path") + + if not store_name: + log("[delete_note] Error: Missing -store and item has no store field", file=sys.stderr) + return 1 + + resolved_hash = self._resolve_hash( + raw_hash=str(raw_hash) if raw_hash else None, + raw_path=str(raw_path) if raw_path else None, + override_hash=str(hash_override) if hash_override else None, + ) + if not resolved_hash: + ctx.emit(res) + continue + + try: + backend = store_registry[store_name] + except Exception as exc: + log(f"[delete_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr) + return 1 + + ok = False + try: + ok = bool(backend.delete_note(resolved_hash, note_name, config=config)) + except Exception as exc: + log(f"[delete_note] Error: Failed to delete note: {exc}", file=sys.stderr) + ok = False + + if ok: + deleted += 1 + + ctx.emit(res) + + log(f"[delete_note] Deleted note on {deleted} item(s)", file=sys.stderr) + return 0 if deleted > 0 else 1 + + +CMDLET = Delete_Note() diff --git a/cmdlets/delete_relationship.py b/cmdlet/delete_relationship.py similarity index 95% rename from cmdlets/delete_relationship.py rename to cmdlet/delete_relationship.py index fb93078..9c587b6 100644 --- a/cmdlets/delete_relationship.py +++ b/cmdlet/delete_relationship.py @@ -10,7 +10,7 @@ import sys from SYS.logger import log import pipeline as ctx -from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, get_field +from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, get_field, should_show_help from API.folder import LocalLibrarySearchOptimizer from config import get_local_storage_path @@ -18,7 +18,7 @@ from config import get_local_storage_path def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None: """If the current subject matches the target, refresh relationships via get-relationship.""" try: - from cmdlets import get_relationship as get_rel_cmd # type: ignore + from cmdlet import get as get_cmdlet # type: ignore except Exception: return @@ -55,7 +55,11 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat refresh_args: list[str] = [] if target_hash: refresh_args.extend(["-hash", target_hash]) - get_rel_cmd._run(subject, refresh_args, config) + + cmd = get_cmdlet("get-relationship") + if not cmd: + return + cmd(subject, refresh_args, config) except Exception: pass @@ -72,6 +76,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: Exit code (0 = success) """ try: + if should_show_help(args): + log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}") + return 0 + # Parse arguments parsed_args = parse_cmdlet_args(args, CMDLET) delete_all_flag = parsed_args.get("all", False) @@ -203,3 +211,6 @@ CMDLET = Cmdlet( "- Delete all from file: delete-relationship -path --all", ], ) + +CMDLET.exec = _run +CMDLET.register() diff --git a/cmdlets/delete_tag.py b/cmdlet/delete_tag.py similarity index 96% rename from cmdlets/delete_tag.py rename to cmdlet/delete_tag.py index 32a09e4..19bac7b 100644 --- a/cmdlets/delete_tag.py +++ b/cmdlet/delete_tag.py @@ -5,7 +5,6 @@ from pathlib import Path import json import sys -from . import register import models import pipeline as ctx from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, should_show_help, get_field @@ -16,10 +15,18 @@ from Store import Store def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None, path: str | None, config: Dict[str, Any]) -> None: """If the current subject matches the target, refresh tags via get-tag.""" try: - from cmdlets import get_tag as get_tag_cmd # type: ignore + from cmdlet import get as get_cmdlet # type: ignore except Exception: return + get_tag = None + try: + get_tag = get_cmdlet("get-tag") + except Exception: + get_tag = None + if not callable(get_tag): + return + try: subject = ctx.get_last_result_subject() if subject is None: @@ -51,7 +58,9 @@ def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None, refresh_args: list[str] = [] if file_hash: refresh_args.extend(["-hash", file_hash]) - get_tag_cmd._run(subject, refresh_args, config) + if store_name: + refresh_args.extend(["-store", store_name]) + get_tag(subject, refresh_args, config) except Exception: pass @@ -71,11 +80,10 @@ CMDLET = Cmdlet( ], ) -@register(["delete-tag"]) def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Help if should_show_help(args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}") return 0 # Check if we have a piped TagItem with no args (i.e., from @1 | delete-tag) @@ -319,4 +327,9 @@ def _process_deletion(tags: list[str], file_hash: str | None, path: str | None, return False +# Register cmdlet (no legacy decorator) +CMDLET.exec = _run +CMDLET.register() + + diff --git a/cmdlets/delete_url.py b/cmdlet/delete_url.py similarity index 100% rename from cmdlets/delete_url.py rename to cmdlet/delete_url.py diff --git a/cmdlets/download_file.py b/cmdlet/download_file.py similarity index 99% rename from cmdlets/download_file.py rename to cmdlet/download_file.py index cfba1db..5b3043d 100644 --- a/cmdlets/download_file.py +++ b/cmdlet/download_file.py @@ -96,7 +96,7 @@ class Download_File(Cmdlet): get_search_provider = None SearchResult = None try: - from Provider.registry import get_search_provider as _get_search_provider, SearchResult as _SearchResult + from ProviderCore.registry import get_search_provider as _get_search_provider, SearchResult as _SearchResult get_search_provider = _get_search_provider SearchResult = _SearchResult diff --git a/cmdlets/download_media.py b/cmdlet/download_media.py similarity index 95% rename from cmdlets/download_media.py rename to cmdlet/download_media.py index 90a289d..cbf5c44 100644 --- a/cmdlets/download_media.py +++ b/cmdlet/download_media.py @@ -26,6 +26,7 @@ import sys import time import traceback from typing import Any, Dict, Iterator, List, Optional +from urllib.parse import urljoin, urlparse import httpx @@ -89,12 +90,13 @@ def is_url_supported_by_ytdlp(url: str) -> bool: def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]: _ensure_yt_dlp_ready() try: - ydl_opts = {"quiet": True, "no_warnings": True, "socket_timeout": 30} + assert yt_dlp is not None + ydl_opts: Dict[str, Any] = {"quiet": True, "no_warnings": True, "socket_timeout": 30} if no_playlist: ydl_opts["noplaylist"] = True if playlist_items: ydl_opts["playlist_items"] = playlist_items - with yt_dlp.YoutubeDL(ydl_opts) as ydl: + with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type] debug(f"Fetching format list for: {url}") info = ydl.extract_info(url, download=False) formats = info.get("formats", []) @@ -114,6 +116,7 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s "vcodec": fmt.get("vcodec", "none"), "acodec": fmt.get("acodec", "none"), "filesize": fmt.get("filesize"), + "abr": fmt.get("abr"), "tbr": fmt.get("tbr"), }) debug(f"Found {len(result_formats)} available formats") @@ -123,6 +126,49 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s return None +def _pick_best_audio_format_id(formats: List[Dict[str, Any]]) -> Optional[str]: + audio_only: List[Dict[str, Any]] = [] + for fmt in formats: + if not isinstance(fmt, dict): + continue + format_id = str(fmt.get("format_id") or "").strip() + if not format_id: + continue + vcodec = str(fmt.get("vcodec") or "none").lower() + acodec = str(fmt.get("acodec") or "none").lower() + if vcodec != "none": + continue + if not acodec or acodec == "none": + continue + audio_only.append(fmt) + + if not audio_only: + return None + + def score(f: Dict[str, Any]) -> tuple[float, float]: + tbr = f.get("tbr") + abr = f.get("abr") + bitrate = 0.0 + for candidate in (tbr, abr): + try: + if candidate is not None: + bitrate = max(bitrate, float(candidate)) + except Exception: + pass + size = 0.0 + try: + fs = f.get("filesize") + if fs is not None: + size = float(fs) + except Exception: + pass + return (bitrate, size) + + best = max(audio_only, key=score) + best_id = str(best.get("format_id") or "").strip() + return best_id or None + + def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False) -> tuple[Optional[str], Dict[str, Any]]: sections_list = ytdl_options.get("download_sections", []) if not sections_list: @@ -173,6 +219,10 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect cmd.extend(["--cookies", cookies_path]) if ytdl_options.get("noplaylist"): cmd.append("--no-playlist") + + # Apply clip/section selection + cmd.extend(["--download-sections", section]) + cmd.append(url) if not quiet: debug(f"Running yt-dlp for section: {section}") @@ -511,7 +561,7 @@ def _download_direct_file( return DownloadMediaResult( path=file_path, info=info, - tags=tags, + tag=tags, source_url=url, hash_value=hash_value, ) @@ -865,7 +915,7 @@ def download_media( return DownloadMediaResult( path=media_path, info=info_dict, - tags=tags, + tag=tags, source_url=opts.url, hash_value=file_hash, paths=media_paths, # Include all section files if present @@ -944,7 +994,7 @@ def download_media( return DownloadMediaResult( path=media_path, info=entry, - tags=tags, + tag=tags, source_url=source_url, hash_value=hash_value, ) @@ -1001,15 +1051,12 @@ class Download_Media(Cmdlet): name="download-media", summary="Download media from streaming sites (YouTube, Twitch, etc.)", usage="download-media [options] or search-file | download-media [options]", - alias=["dl-media", "download-ytdlp"], + alias=[""], arg=[ - CmdletArg(name="url", type="string", required=False, description="URL to download (yt-dlp supported sites only)", variadic=True), - CmdletArg(name="-url", type="string", description="URL to download (alias for positional argument)", variadic=True), + SharedArgs.URL, CmdletArg(name="audio", type="flag", alias="a", description="Download audio only"), - CmdletArg(name="video", type="flag", alias="v", description="Download video (default)"), CmdletArg(name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector"), CmdletArg(name="clip", type="string", description="Extract time range: MM:SS-MM:SS"), - CmdletArg(name="section", type="string", description="Download sections: TIME_RANGE[,TIME_RANGE...]"), CmdletArg(name="item", type="string", description="Item selection for playlists/formats"), ], detail=["Download media from streaming sites using yt-dlp.", "For direct file downloads, use download-file."], @@ -1073,9 +1120,10 @@ class Download_Media(Cmdlet): # Get other options clip_spec = parsed.get("clip") - section_spec = parsed.get("section") - # Parse clip/section ranges if specified + mode = "audio" if parsed.get("audio") else "video" + + # Parse clip range if specified clip_range = None if clip_spec: clip_range = self._parse_time_range(clip_spec) @@ -1083,19 +1131,19 @@ class Download_Media(Cmdlet): log(f"Invalid clip format: {clip_spec}", file=sys.stderr) return 1 - section_ranges = None - if section_spec: - section_ranges = self._parse_section_ranges(section_spec) - if not section_ranges: - log(f"Invalid section format: {section_spec}", file=sys.stderr) - return 1 - # Check if we need to show format selection playlist_items = str(parsed.get("item")) if parsed.get("item") else None ytdl_format = parsed.get("format") - # If no -item, no explicit -format specified, and single URL, check for multiple formats/playlist - if not playlist_items and not ytdl_format and len(supported_url) == 1: + # If no -item, no explicit -format specified, and single URL, show the format table. + # Do NOT stop to show formats when -audio is used (auto-pick) or when -clip is used. + if ( + mode != "audio" + and not clip_spec + and not playlist_items + and not ytdl_format + and len(supported_url) == 1 + ): url = supported_url[0] formats = list_formats(url, no_playlist=False) @@ -1241,9 +1289,8 @@ class Download_Media(Cmdlet): # Download each URL downloaded_count = 0 - clip_sections_spec = self._build_clip_sections_spec(clip_range, section_ranges) + clip_sections_spec = self._build_clip_sections_spec(clip_range) quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False - mode = "audio" if parsed.get("audio") else "video" for url in supported_url: try: @@ -1263,6 +1310,14 @@ class Download_Media(Cmdlet): actual_format = playlist_items actual_playlist_items = None + # Auto-pick best audio format when -audio is used and no explicit format is given. + if mode == "audio" and not actual_format: + chosen = None + formats = list_formats(url, no_playlist=False, playlist_items=actual_playlist_items) + if formats: + chosen = _pick_best_audio_format_id(formats) + actual_format = chosen or "bestaudio/best" + opts = DownloadOptions( url=url, mode=mode, @@ -1358,31 +1413,14 @@ class Download_Media(Cmdlet): except Exception: return None - def _parse_section_ranges(self, spec: str) -> Optional[List[tuple]]: - """Parse 'RANGE1,RANGE2,...' where each RANGE is 'MM:SS-MM:SS'.""" - try: - ranges = [] - for range_spec in spec.split(","): - r = self._parse_time_range(range_spec.strip()) - if r is None: - return None - ranges.append(r) - return ranges if ranges else None - except Exception: - return None - def _build_clip_sections_spec( self, clip_range: Optional[tuple], - section_ranges: Optional[List[tuple]], ) -> Optional[str]: - """Convert parsed clip/section ranges into downloader spec (seconds).""" + """Convert parsed clip range into downloader spec (seconds).""" ranges: List[str] = [] if clip_range: ranges.append(f"{clip_range[0]}-{clip_range[1]}") - if section_ranges: - for start, end in section_ranges: - ranges.append(f"{start}-{end}") return ",".join(ranges) if ranges else None def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]: diff --git a/cmdlets/download_torrent.py b/cmdlet/download_torrent.py similarity index 100% rename from cmdlets/download_torrent.py rename to cmdlet/download_torrent.py diff --git a/cmdlets/get_file.py b/cmdlet/get_file.py similarity index 100% rename from cmdlets/get_file.py rename to cmdlet/get_file.py diff --git a/cmdlets/get_metadata.py b/cmdlet/get_metadata.py similarity index 100% rename from cmdlets/get_metadata.py rename to cmdlet/get_metadata.py diff --git a/cmdlet/get_note.py b/cmdlet/get_note.py new file mode 100644 index 0000000..2f9935b --- /dev/null +++ b/cmdlet/get_note.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +from pathlib import Path +from typing import Any, Dict, Optional, Sequence +import sys + +from SYS.logger import log + +import pipeline as ctx +from ._shared import ( + Cmdlet, + CmdletArg, + SharedArgs, + normalize_hash, + parse_cmdlet_args, + normalize_result_input, + should_show_help, +) +from Store import Store +from SYS.utils import sha256_file + + +class Get_Note(Cmdlet): + def __init__(self) -> None: + super().__init__( + name="get-note", + summary="List notes on a file in a store.", + usage="get-note -store [-hash ]", + alias=["get-notes", "get_note"], + arg=[ + SharedArgs.STORE, + SharedArgs.HASH, + ], + detail=[ + "- Notes are retrieved via the selected store backend.", + "- Lyrics are stored in a note named 'lyric'.", + ], + exec=self.run, + ) + try: + SharedArgs.STORE.choices = SharedArgs.get_store_choices(None) + except Exception: + pass + self.register() + + def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]: + resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash) + if resolved: + return resolved + if raw_path: + try: + p = Path(str(raw_path)) + stem = p.stem + if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()): + return stem.lower() + if p.exists() and p.is_file(): + return sha256_file(p) + except Exception: + return None + return None + + def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + if should_show_help(args): + log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}") + return 0 + + parsed = parse_cmdlet_args(args, self) + store_override = parsed.get("store") + hash_override = parsed.get("hash") + + results = normalize_result_input(result) + if not results: + if store_override and normalize_hash(hash_override): + results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}] + else: + log("[get_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr) + return 1 + + store_registry = Store(config) + any_notes = False + + for res in results: + if not isinstance(res, dict): + continue + + store_name = str(store_override or res.get("store") or "").strip() + raw_hash = res.get("hash") + raw_path = res.get("path") + + if not store_name: + log("[get_note] Error: Missing -store and item has no store field", file=sys.stderr) + return 1 + + resolved_hash = self._resolve_hash( + raw_hash=str(raw_hash) if raw_hash else None, + raw_path=str(raw_path) if raw_path else None, + override_hash=str(hash_override) if hash_override else None, + ) + if not resolved_hash: + continue + + try: + backend = store_registry[store_name] + except Exception as exc: + log(f"[get_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr) + return 1 + + notes = {} + try: + notes = backend.get_note(resolved_hash, config=config) or {} + except Exception: + notes = {} + + if not notes: + continue + + any_notes = True + # Emit each note as its own row so CLI renders a proper note table + for k in sorted(notes.keys(), key=lambda x: str(x).lower()): + v = notes.get(k) + raw_text = str(v or "") + preview = " ".join(raw_text.replace("\r", "").split("\n")) + ctx.emit( + { + "store": store_name, + "hash": resolved_hash, + "note_name": str(k), + "note_text": raw_text, + "columns": [ + ("Name", str(k)), + ("Text", preview.strip()), + ], + } + ) + + if not any_notes: + ctx.emit("No notes found.") + return 0 + + +CMDLET = Get_Note() + + diff --git a/cmdlets/get_relationship.py b/cmdlet/get_relationship.py similarity index 99% rename from cmdlets/get_relationship.py rename to cmdlet/get_relationship.py index 176ee35..9e2f5f8 100644 --- a/cmdlets/get_relationship.py +++ b/cmdlet/get_relationship.py @@ -33,7 +33,7 @@ CMDLET = Cmdlet( def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: # Help if should_show_help(_args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}") return 0 # Parse -hash override @@ -423,3 +423,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: return 0 +CMDLET.exec = _run +CMDLET.register() + + diff --git a/cmdlets/get_tag.py b/cmdlet/get_tag.py similarity index 99% rename from cmdlets/get_tag.py rename to cmdlet/get_tag.py index fc95a69..e50b7a1 100644 --- a/cmdlets/get_tag.py +++ b/cmdlet/get_tag.py @@ -39,7 +39,7 @@ from dataclasses import dataclass @dataclass class TagItem: - """Tag item for display in ResultTable and piping to other cmdlets. + """Tag item for display in ResultTable and piping to other cmdlet. Allows tags to be selected and piped like: - delete-tag @{3,4,9} (delete tags at indices 3, 4, 9) diff --git a/cmdlets/get_url.py b/cmdlet/get_url.py similarity index 100% rename from cmdlets/get_url.py rename to cmdlet/get_url.py diff --git a/cmdlets/merge_file.py b/cmdlet/merge_file.py similarity index 86% rename from cmdlets/merge_file.py rename to cmdlet/merge_file.py index 9046c66..4c64b70 100644 --- a/cmdlets/merge_file.py +++ b/cmdlet/merge_file.py @@ -3,16 +3,25 @@ from __future__ import annotations from typing import Any, Dict, Optional, Sequence, List from pathlib import Path -import json import sys from SYS.logger import log -from cmdlets.download_media import download_media -from models import DownloadOptions -from config import resolve_output_dir import subprocess as _subprocess import shutil as _shutil -from ._shared import create_pipe_object_result, parse_cmdlet_args + +from ._shared import ( + Cmdlet, + CmdletArg, + create_pipe_object_result, + get_field, + get_pipe_object_hash, + get_pipe_object_path, + normalize_result_input, + parse_cmdlet_args, + should_show_help, +) + +import pipeline as ctx try: from PyPDF2 import PdfWriter, PdfReader @@ -27,31 +36,29 @@ try: read_tags_from_file, write_tags_to_file, dedup_tags_by_namespace, - merge_multiple_tag_lists, - write_tags, write_metadata ) HAS_METADATA_API = True except ImportError: HAS_METADATA_API = False -from . import register -from ._shared import ( - Cmdlet, - CmdletArg, - normalize_result_input, - get_pipe_object_path, - get_pipe_object_hash, - should_show_help, - get_field, -) -import models -import pipeline as ctx - - + def read_tags_from_file(file_path: Path) -> List[str]: + return [] + def write_tags_to_file( + file_path: Path, + tags: List[str], + source_hashes: Optional[List[str]] = None, + url: Optional[List[str]] = None, + append: bool = False, + ) -> bool: + return False + def dedup_tags_by_namespace(tags: List[str]) -> List[str]: + return tags + def write_metadata(*_args: Any, **_kwargs: Any) -> None: + return None def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: @@ -59,7 +66,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Parse help if should_show_help(args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}") return 0 # Parse arguments @@ -95,7 +102,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Extract file paths and metadata from result objects source_files: List[Path] = [] - source_tags_files: List[Path] = [] source_hashes: List[str] = [] source_url: List[str] = [] source_tags: List[str] = [] # NEW: collect tags from source files @@ -111,37 +117,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: if candidate.exists(): target_path = candidate - # Check for playlist item that needs downloading - if not target_path and isinstance(item, dict) and item.get('__action', '').startswith('playlist-item:'): - try: - playlist_url = item.get('__file_path') - item_idx = int(item['__action'].split(':')[1]) - log(f"Downloading playlist item #{item_idx} from {playlist_url}...", flush=True) - - output_dir = resolve_output_dir(config) - opts = DownloadOptions( - url=playlist_url, - output_dir=output_dir, - playlist_items=str(item_idx), - mode="audio" if format_spec == "m4b" else "auto" # Infer mode if possible - ) - - res = download_media(opts) - if res and res.path and res.path.exists(): - target_path = res.path - log(f"✓ Downloaded: {target_path.name}", flush=True) - except Exception as e: - log(f"Failed to download playlist item: {e}", file=sys.stderr) - if target_path and target_path.exists(): source_files.append(target_path) - - # Track the .tag file for this source + + # Track tags from the .tag sidecar for this source (if present) tags_file = target_path.with_suffix(target_path.suffix + '.tag') - if tags_file.exists(): - source_tags_files.append(tags_file) + if tags_file.exists() and HAS_METADATA_API: try: - source_tags.extend(read_tags_from_file(tags_file) if HAS_METADATA_API else []) + source_tags.extend(read_tags_from_file(tags_file) or []) except Exception: pass @@ -201,7 +184,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Determine output path if output_override: if output_override.is_dir(): - base_name = _sanitize_name(getattr(files_to_merge[0], 'title', 'merged')) + base_title = get_field(files_to_merge[0], 'title', 'merged') + base_name = _sanitize_name(str(base_title or 'merged')) output_path = output_override / f"{base_name} (merged).{_ext_for_format(output_format)}" else: output_path = output_override @@ -231,12 +215,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: log(f"Merged {len(source_files)} files into: {output_path}", file=sys.stderr) + merged_tags: List[str] = [f"title:{output_path.stem}"] + # Create .tag sidecar file for the merged output using unified API tags_path = output_path.with_suffix(output_path.suffix + '.tag') try: - # Start with title tag - merged_tags = [f"title:{output_path.stem}"] - # Merge tags from source files using metadata API if source_tags and HAS_METADATA_API: # Use dedup function to normalize and deduplicate @@ -281,8 +264,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Also create .metadata file using centralized function try: - write_metadata(output_path, source_hashes[0] if source_hashes else None, source_url, source_relationships) - log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr) + if HAS_METADATA_API and write_metadata: + write_metadata(output_path, source_hashes[0] if source_hashes else None, source_url, source_relationships) + log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr) except Exception as e: log(f"Warning: Could not create metadata file: {e}", file=sys.stderr) @@ -312,23 +296,26 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Still emit a string representation for feedback ctx.emit(f"Merged: {output_path}") - # Delete source files if requested - # Always delete source files if they were downloaded playlist items (temp files) - # We can detect this if they are in the temp download directory or if we tracked them - if delete_after or True: # Force delete for now as merge consumes them - # First delete all .tag files - for tags_file in source_tags_files: - try: - tags_file.unlink() - log(f"Deleted: {tags_file.name}", file=sys.stderr) - except Exception as e: - log(f"Warning: Could not delete {tags_file.name}: {e}", file=sys.stderr) - - # Then delete all source files + # Cleanup + # - Delete source files only when -delete is set. + if delete_after: for f in source_files: try: - f.unlink() - log(f"Deleted: {f.name}", file=sys.stderr) + # Delete sidecar tags for the source (if any) + tag_file = f.with_suffix(f.suffix + '.tag') + if tag_file.exists(): + try: + tag_file.unlink() + log(f"Deleted: {tag_file.name}", file=sys.stderr) + except Exception as e: + log(f"Warning: Could not delete {tag_file.name}: {e}", file=sys.stderr) + except Exception: + pass + + try: + if f.exists(): + f.unlink() + log(f"Deleted: {f.name}", file=sys.stderr) except Exception as e: log(f"Warning: Could not delete {f.name}: {e}", file=sys.stderr) @@ -348,6 +335,7 @@ def _ext_for_format(fmt: str) -> str: format_map = { 'mp3': 'mp3', 'm4a': 'm4a', + 'm4b': 'm4b', 'aac': 'aac', 'opus': 'opus', 'mka': 'mka', # Matroska Audio - EXCELLENT chapter support (recommended) @@ -361,58 +349,6 @@ def _ext_for_format(fmt: str) -> str: return format_map.get(fmt.lower(), 'mka') -def _add_chapters_to_m4a(file_path: Path, chapters: List[Dict]) -> bool: - """Add chapters to an M4A file using mutagen. - - Args: - file_path: Path to M4A file - chapters: List of chapter dicts with 'title', 'start_ms', 'end_ms' - - Returns: - True if successful, False otherwise - """ - import logging - logger = logging.getLogger(__name__) - - if not chapters: - return True - - try: - from mutagen.mp4 import MP4, Atom - from mutagen.mp4._util import Atom as MP4Atom - except ImportError: - logger.warning("[merge-file] mutagen not available for chapter writing") - return False - - try: - # Load the MP4 file - audio = MP4(str(file_path)) - - # Build the chapter atom - # MP4 chapters are stored in a 'chap' atom with specific structure - chapter_data = b'' - - for i, chapter in enumerate(chapters, 1): - # Each chapter entry: 10-byte header + title - title = chapter.get('title', f'Chapter {i}').encode('utf-8') - start_time_ms = int(chapter.get('start_ms', 0)) - - # Chapter atom format for M4A: - # (uint32: size)(uint32: 'chap')(uint8: reserved)(uint24: atom type) + more... - # This is complex, so we'll use a simpler atom approach - pass - - # Unfortunately, mutagen doesn't have built-in chapter writing for MP4 - # Chapter writing requires low-level atom manipulation - # For now, we'll just return and note this limitation - logger.info("[merge-file] MP4 chapter writing via mutagen not fully supported") - return False - - except Exception as e: - logger.warning(f"[merge-file] Error writing chapters: {e}") - return False - - def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool: """Merge audio files with chapters based on file boundaries.""" import logging @@ -529,7 +465,7 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool: # Audio codec selection for first input if output_format == 'mp3': cmd.extend(['-c:a', 'libmp3lame', '-q:a', '2']) - elif output_format == 'm4a': + elif output_format in {'m4a', 'm4b'}: # Use copy if possible (much faster), otherwise re-encode # Check if inputs are already AAC/M4A to avoid re-encoding # For now, default to copy if format matches, otherwise re-encode @@ -682,7 +618,7 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool: except Exception as e: logger.exception(f"[merge-file] Chapter embedding failed: {e}") log(f"Warning: Chapter embedding failed, using merge without chapters", file=sys.stderr) - elif output_format == 'm4a' or output.suffix.lower() in ['.m4a', '.mp4']: + elif output_format in {'m4a', 'm4b'} or output.suffix.lower() in ['.m4a', '.m4b', '.mp4']: # MP4/M4A format has native chapter support via iTunes metadata atoms log(f"Embedding chapters into MP4 container...", file=sys.stderr) logger.info(f"[merge-file] Adding chapters to M4A/MP4 file via iTunes metadata") @@ -833,16 +769,12 @@ def _merge_text(files: List[Path], output: Path) -> bool: def _merge_pdf(files: List[Path], output: Path) -> bool: """Merge PDF files.""" - if not HAS_PYPDF2: + if (not HAS_PYPDF2) or (PdfWriter is None) or (PdfReader is None): log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr) return False try: - if HAS_PYPDF2: - writer = PdfWriter() - else: - log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr) - return False + writer = PdfWriter() for f in files: try: @@ -866,11 +798,11 @@ def _merge_pdf(files: List[Path], output: Path) -> bool: CMDLET = Cmdlet( name="merge-file", summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.", - usage="merge-file [-delete] [-output ] [-format ]", + usage="merge-file [-delete] [-output ] [-format ]", arg=[ CmdletArg("-delete", type="flag", description="Delete source files after successful merge."), CmdletArg("-output", description="Override output file path."), - CmdletArg("-format", description="Output format (auto/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."), + CmdletArg("-format", description="Output format (auto/mka/m4a/m4b/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."), ], detail=[ "- Pipe multiple files: search-file query | [1,2,3] | merge-file", @@ -882,3 +814,6 @@ CMDLET = Cmdlet( "- -delete flag removes all source files after successful merge.", ], ) + +CMDLET.exec = _run +CMDLET.register() diff --git a/cmdlets/screen_shot.py b/cmdlet/screen_shot.py similarity index 96% rename from cmdlets/screen_shot.py rename to cmdlet/screen_shot.py index 8306f80..97d3e25 100644 --- a/cmdlets/screen_shot.py +++ b/cmdlet/screen_shot.py @@ -8,8 +8,6 @@ from __future__ import annotations import contextlib import hashlib -import importlib -import json import sys import time import httpx @@ -21,10 +19,7 @@ from urllib.parse import urlsplit, quote, urljoin from SYS.logger import log, debug from API.HTTP import HTTPClient from SYS.utils import ensure_directory, unique_path, unique_preserve_order - -from . import register from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input, should_show_help, get_field -import models import pipeline as pipeline_context # ============================================================================ @@ -40,13 +35,17 @@ import pipeline as pipeline_context try: from playwright.sync_api import ( TimeoutError as PlaywrightTimeoutError, - ViewportSize, sync_playwright, ) -except Exception as exc: - raise RuntimeError( - "playwright is required for screenshot capture; install with 'pip install playwright'" - ) from exc + HAS_PLAYWRIGHT = True +except Exception: + HAS_PLAYWRIGHT = False + PlaywrightTimeoutError = TimeoutError # type: ignore + + def sync_playwright(*_args: Any, **_kwargs: Any) -> Any: # type: ignore + raise RuntimeError( + "playwright is required for screenshot capture; install with: pip install playwright; then: playwright install" + ) try: from config import resolve_output_dir @@ -69,7 +68,7 @@ USER_AGENT = ( "Chrome/120.0.0.0 Safari/537.36" ) -DEFAULT_VIEWPORT: ViewportSize = {"width": 1280, "height": 1200} +DEFAULT_VIEWPORT: dict[str, int] = {"width": 1280, "height": 1200} ARCHIVE_TIMEOUT = 30.0 # Configurable selectors for specific websites @@ -114,7 +113,7 @@ class ScreenshotOptions: """Options controlling screenshot capture and post-processing.""" output_dir: Path - url: Sequence[str] = () + url: str = "" output_path: Optional[Path] = None full_page: bool = True headless: bool = True @@ -124,7 +123,6 @@ class ScreenshotOptions: tag: Sequence[str] = () archive: bool = False archive_timeout: float = ARCHIVE_TIMEOUT - url: Sequence[str] = () output_format: Optional[str] = None prefer_platform_target: bool = False target_selectors: Optional[Sequence[str]] = None @@ -470,10 +468,10 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult: warnings: List[str] = [] _capture(options, destination, warnings) - # Build URL list from provided options.url (sequence) and deduplicate - url = unique_preserve_order(list(options.url)) + # Build URL list from captured url and any archives + url: List[str] = [options.url] if options.url else [] archive_url: List[str] = [] - if options.archive: + if options.archive and options.url: debug(f"[_capture_screenshot] Archiving enabled for {options.url}") archives, archive_warnings = _archive_url(options.url, options.archive_timeout) archive_url.extend(archives) @@ -518,9 +516,16 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Help check if should_show_help(args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) + log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}") return 0 + if not HAS_PLAYWRIGHT: + log( + "playwright is required for screenshot capture; install with: pip install playwright; then: playwright install", + file=sys.stderr, + ) + return 1 + # ======================================================================== # ARGUMENT PARSING # ======================================================================== @@ -627,7 +632,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: try: # Create screenshot with provided options options = ScreenshotOptions( - url=[url], + url=url, output_dir=screenshot_dir, output_format=format_name, archive=archive_enabled, @@ -672,7 +677,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: } ) - # Emit the result so downstream cmdlets (like add-file) can use it + # Emit the result so downstream cmdlet (like add-file) can use it pipeline_context.emit(pipe_obj) all_emitted.append(pipe_obj) @@ -711,3 +716,6 @@ CMDLET = Cmdlet( """] ) + +CMDLET.exec = _run +CMDLET.register() diff --git a/cmdlets/search_provider.py b/cmdlet/search_provider.py similarity index 98% rename from cmdlets/search_provider.py rename to cmdlet/search_provider.py index 664d1f6..70e7aac 100644 --- a/cmdlets/search_provider.py +++ b/cmdlet/search_provider.py @@ -8,7 +8,7 @@ import uuid import importlib from SYS.logger import log, debug -from Provider.registry import get_search_provider, list_search_providers +from ProviderCore.registry import get_search_provider, list_search_providers from ._shared import Cmdlet, CmdletArg, should_show_help import pipeline as ctx @@ -49,7 +49,7 @@ class Search_Provider(Cmdlet): "- soulseek: Plain text search", "- youtube: Plain text search", "", - "Results can be piped to other cmdlets:", + "Results can be piped to other cmdlet:", " search-provider bandcamp \"artist:grace\" | @1 | download-data", ], exec=self.run diff --git a/cmdlets/search_store.py b/cmdlet/search_store.py similarity index 99% rename from cmdlets/search_store.py rename to cmdlet/search_store.py index 7f1cf64..185b45c 100644 --- a/cmdlets/search_store.py +++ b/cmdlet/search_store.py @@ -304,7 +304,7 @@ class Search_Store(Cmdlet): continue normalized = self._ensure_storage_columns(item_dict) - # Make hash/store available for downstream cmdlets without rerunning search + # Make hash/store available for downstream cmdlet without rerunning search hash_val = normalized.get("hash") store_val = normalized.get("store") or item_dict.get("store") if hash_val and not normalized.get("hash"): diff --git a/cmdlets/trim_file.py b/cmdlet/trim_file.py similarity index 99% rename from cmdlets/trim_file.py rename to cmdlet/trim_file.py index c60d560..4bfff58 100644 --- a/cmdlets/trim_file.py +++ b/cmdlet/trim_file.py @@ -11,7 +11,6 @@ import re from SYS.logger import log, debug from SYS.utils import sha256_file -from . import register from ._shared import ( Cmdlet, CmdletArg, @@ -112,7 +111,6 @@ def _trim_media(input_path: Path, output_path: Path, start_time: str, end_time: log(f"Error parsing time or running ffmpeg: {e}", file=sys.stderr) return False -@register(["trim-file"]) def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Trim a media file.""" # Parse arguments @@ -292,3 +290,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: log(f"Failed to trim {path_obj.name}", file=sys.stderr) return 0 if success_count > 0 else 1 + + +# Register cmdlet (no legacy decorator) +CMDLET.exec = _run +CMDLET.register() diff --git a/cmdlets/add_note.py b/cmdlets/add_note.py deleted file mode 100644 index 805f7a3..0000000 --- a/cmdlets/add_note.py +++ /dev/null @@ -1,106 +0,0 @@ -from __future__ import annotations - -from typing import Any, Dict, Sequence -import json - -from . import register -import models -import pipeline as ctx -from API import HydrusNetwork as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, normalize_hash, should_show_help -from SYS.logger import log - -CMDLET = Cmdlet( - name="add-note", - summary="Add or set a note on a Hydrus file.", - usage="add-note [-hash ] ", - arg=[ - CmdletArg("hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), - CmdletArg("name", type="string", required=True, description="The note name/key to set (e.g. 'comment', 'source', etc.)."), - CmdletArg("text", type="string", required=True, description="The note text/content to store.", variadic=True), - ], - detail=[ - "- Notes are stored in the 'my notes' service by default.", - ], -) - - -@register(["add-note", "set-note", "add_note"]) # aliases -def add(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - # Help - if should_show_help(args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - - from ._shared import parse_cmdlet_args - parsed = parse_cmdlet_args(args, CMDLET) - override_hash = parsed.get("hash") - name = parsed.get("name") - text_parts = parsed.get("text") - - if not name: - log("Requires a note name") - return 1 - - name = str(name).strip() - - if isinstance(text_parts, list): - text = " ".join(text_parts).strip() - else: - text = str(text_parts or "").strip() - - if not text: - log("Empty note text") - return 1 - - # Handle @N selection which creates a list - extract the first item - if isinstance(result, list) and len(result) > 0: - result = result[0] - - hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None)) - if not hash_hex: - log("Selected result does not include a Hydrus hash") - return 1 - try: - client = hydrus_wrapper.get_client(config) - except Exception as exc: - log(f"Hydrus client unavailable: {exc}") - return 1 - - if client is None: - log("Hydrus client unavailable") - return 1 - try: - service_name = "my notes" - client.set_notes(hash_hex, {name: text}, service_name) - except Exception as exc: - log(f"Hydrus add-note failed: {exc}") - return 1 - - # Refresh notes view if we're operating on the currently selected subject - try: - from cmdlets import get_note as get_note_cmd # type: ignore - except Exception: - get_note_cmd = None - if get_note_cmd: - try: - subject = ctx.get_last_result_subject() - if subject is not None: - def norm(val: Any) -> str: - return str(val).lower() - target_hash = norm(hash_hex) if hash_hex else None - subj_hashes = [] - if isinstance(subject, dict): - subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v] - else: - subj_hashes = [norm(getattr(subject, f, None)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if getattr(subject, f, None)] - if target_hash and target_hash in subj_hashes: - get_note_cmd.get_notes(subject, ["-hash", hash_hex], config) - return 0 - except Exception: - pass - - ctx.emit(f"Added note '{name}' ({len(text)} chars)") - - return 0 - diff --git a/cmdlets/delete_note.py b/cmdlets/delete_note.py deleted file mode 100644 index c0ef7a6..0000000 --- a/cmdlets/delete_note.py +++ /dev/null @@ -1,102 +0,0 @@ -from __future__ import annotations - -from typing import Any, Dict, Sequence -import json - -import pipeline as ctx -from API import HydrusNetwork as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, normalize_hash, get_hash_for_operation, fetch_hydrus_metadata, should_show_help, get_field -from SYS.logger import log - -CMDLET = Cmdlet( - name="delete-note", - summary="Delete a named note from a Hydrus file.", - usage="i | del-note [-hash ] ", - alias=["del-note"], - arg=[ - - ], - detail=[ - "- Removes the note with the given name from the Hydrus file.", - ], -) - - -def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - # Help - if should_show_help(args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - if not args: - log("Requires the note name/key to delete") - return 1 - override_hash: str | None = None - rest: list[str] = [] - i = 0 - while i < len(args): - a = args[i] - low = str(a).lower() - if low in {"-hash", "--hash", "hash"} and i + 1 < len(args): - override_hash = str(args[i + 1]).strip() - i += 2 - continue - rest.append(a) - i += 1 - if not rest: - log("Requires the note name/key to delete") - return 1 - name = str(rest[0] or '').strip() - if not name: - log("Requires a non-empty note name/key") - return 1 - - # Handle @N selection which creates a list - extract the first item - if isinstance(result, list) and len(result) > 0: - result = result[0] - - hash_hex = get_hash_for_operation(override_hash, result) - if not hash_hex: - log("Selected result does not include a Hydrus hash") - return 1 - try: - client = hydrus_wrapper.get_client(config) - except Exception as exc: - log(f"Hydrus client unavailable: {exc}") - return 1 - - if client is None: - log("Hydrus client unavailable") - return 1 - try: - service_name = "my notes" - client.delete_notes(hash_hex, [name], service_name) - except Exception as exc: - log(f"Hydrus delete-note failed: {exc}") - return 1 - - # Refresh notes view if we're operating on the current subject - try: - from cmdlets import get_note as get_note_cmd # type: ignore - except Exception: - get_note_cmd = None - if get_note_cmd: - try: - subject = ctx.get_last_result_subject() - if subject is not None: - def norm(val: Any) -> str: - return str(val).lower() - target_hash = norm(hash_hex) if hash_hex else None - subj_hashes = [] - if isinstance(subject, dict): - subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v] - else: - subj_hashes = [norm(get_field(subject, f)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if get_field(subject, f)] - if target_hash and target_hash in subj_hashes: - get_note_cmd.get_notes(subject, ["-hash", hash_hex], config) - return 0 - except Exception: - pass - - log(f"Deleted note '{name}'") - - return 0 diff --git a/cmdlets/get_note.py b/cmdlets/get_note.py deleted file mode 100644 index 5e37f4b..0000000 --- a/cmdlets/get_note.py +++ /dev/null @@ -1,66 +0,0 @@ -from __future__ import annotations - -from typing import Any, Dict, Sequence -import json - -from . import register -import models -import pipeline as ctx -from API import HydrusNetwork as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, get_hash_for_operation, fetch_hydrus_metadata, get_field, should_show_help -from SYS.logger import log - -CMDLET = Cmdlet( - name="get-note", - summary="List notes on a Hydrus file.", - usage="get-note [-hash ]", - arg=[ - SharedArgs.HASH, - ], - detail=[ - "- Prints notes by service and note name.", - ], -) - - -@register(["get-note", "get-notes", "get_note"]) # aliases -def get_notes(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - # Help - if should_show_help(args): - log(json.dumps(CMDLET, ensure_ascii=False, indent=2)) - return 0 - - from ._shared import parse_cmdlet_args, get_hash_for_operation, fetch_hydrus_metadata - parsed = parse_cmdlet_args(args, CMDLET) - override_hash = parsed.get("hash") - - hash_hex = get_hash_for_operation(override_hash, result) - if not hash_hex: - log("Selected result does not include a Hydrus hash") - return 1 - - meta, error_code = fetch_hydrus_metadata(config, hash_hex, include_service_keys_to_tags=False, include_notes=True) - if error_code != 0: - return error_code - - notes = {} - if isinstance(meta, dict): - # Hydrus returns service_keys_to_tags; for notes we expect 'service_names_to_notes' in modern API - notes = meta.get('notes') or meta.get('service_names_to_notes') or {} - if notes: - ctx.emit("Notes:") - # Print flattened: service -> (name: text) - if isinstance(notes, dict) and any(isinstance(v, dict) for v in notes.values()): - for svc, mapping in notes.items(): - ctx.emit(f"- {svc}:") - if isinstance(mapping, dict): - for k, v in mapping.items(): - ctx.emit(f" • {k}: {str(v).strip()}") - elif isinstance(notes, dict): - for k, v in notes.items(): - ctx.emit(f"- {k}: {str(v).strip()}") - else: - ctx.emit("No notes found.") - return 0 - - diff --git a/cmdnats/__init__.py b/cmdnat/__init__.py similarity index 100% rename from cmdnats/__init__.py rename to cmdnat/__init__.py diff --git a/cmdnats/adjective.json b/cmdnat/adjective.json similarity index 100% rename from cmdnats/adjective.json rename to cmdnat/adjective.json diff --git a/cmdnats/adjective.py b/cmdnat/adjective.py similarity index 98% rename from cmdnats/adjective.py rename to cmdnat/adjective.py index cfaa63a..3b9476c 100644 --- a/cmdnats/adjective.py +++ b/cmdnat/adjective.py @@ -2,12 +2,12 @@ import json import os import sys from typing import List, Dict, Any, Optional, Sequence -from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args +from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args from SYS.logger import log from result_table import ResultTable import pipeline as ctx -ADJECTIVE_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "cmdnats", "adjective.json") +ADJECTIVE_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "cmdnat", "adjective.json") def _load_adjectives() -> Dict[str, List[str]]: try: diff --git a/cmdnats/config.py b/cmdnat/config.py similarity index 98% rename from cmdnats/config.py rename to cmdnat/config.py index ff7c168..9967176 100644 --- a/cmdnats/config.py +++ b/cmdnat/config.py @@ -1,6 +1,6 @@ from typing import List, Dict, Any -from cmdlets._shared import Cmdlet, CmdletArg +from cmdlet._shared import Cmdlet, CmdletArg from config import load_config, save_config CMDLET = Cmdlet( diff --git a/cmdnats/help.py b/cmdnat/help.py similarity index 96% rename from cmdnats/help.py rename to cmdnat/help.py index 72a692e..b6290a2 100644 --- a/cmdnats/help.py +++ b/cmdnat/help.py @@ -4,7 +4,7 @@ from typing import Any, Dict, Sequence, List, Optional import shlex import sys -from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args +from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args from SYS.logger import log from result_table import ResultTable import pipeline as ctx @@ -135,7 +135,7 @@ def _render_detail(meta: Dict[str, Any], args: Sequence[str]) -> None: def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: try: - from cmdlets import catalog as _catalog + from cmdlet import catalog as _catalog CMDLET.arg[0].choices = _normalize_choice_list(_catalog.list_cmdlet_names()) metadata = _catalog.list_cmdlet_metadata() @@ -163,7 +163,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: CMDLET = Cmdlet( name=".help", alias=["help", "?"], - summary="Show cmdlets or detailed help", + summary="Show cmdlet or detailed help", usage=".help [cmd] [-filter text]", arg=[ CmdletArg( @@ -176,7 +176,7 @@ CMDLET = Cmdlet( CmdletArg( name="-filter", type="string", - description="Filter cmdlets by substring", + description="Filter cmdlet by substring", required=False, ), ], diff --git a/cmdnats/matrix.py b/cmdnat/matrix.py similarity index 93% rename from cmdnats/matrix.py rename to cmdnat/matrix.py index 7e62de1..d2fa280 100644 --- a/cmdnats/matrix.py +++ b/cmdnat/matrix.py @@ -1,6 +1,6 @@ from typing import Any, Dict, Sequence, List import sys -from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args +from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args from SYS.logger import log, debug from result_table import ResultTable # REFACTOR: Commenting out Matrix import until provider refactor is complete diff --git a/cmdnat/pipe.py b/cmdnat/pipe.py new file mode 100644 index 0000000..4d727dd --- /dev/null +++ b/cmdnat/pipe.py @@ -0,0 +1,1486 @@ +from typing import Any, Dict, Sequence, List, Optional +import os +import sys +import json +import socket +import re +import subprocess +from urllib.parse import urlparse, parse_qs +from pathlib import Path +from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args +from SYS.logger import debug, get_thread_stream, is_debug_enabled, set_debug, set_thread_stream +from result_table import ResultTable +from MPV.mpv_ipc import MPV +import pipeline as ctx +from SYS.download import is_url_supported_by_ytdlp +from models import PipeObject + +from API.folder import LocalLibrarySearchOptimizer +from config import get_local_storage_path, get_hydrus_access_key, get_hydrus_url +from hydrus_health_check import get_cookies_file_path + + +def _ensure_lyric_overlay(mpv: MPV) -> None: + try: + mpv.ensure_lyric_loader_running() + except Exception: + pass + +def _send_ipc_command(command: Dict[str, Any], silent: bool = False) -> Optional[Any]: + """Send a command to the MPV IPC pipe and return the response.""" + try: + mpv = MPV() + return mpv.send(command, silent=silent) + except Exception as e: + if not silent: + debug(f"IPC Error: {e}", file=sys.stderr) + return None + + +def _get_playlist(silent: bool = False) -> Optional[List[Dict[str, Any]]]: + """Get the current playlist from MPV. Returns None if MPV is not running.""" + cmd = {"command": ["get_property", "playlist"], "request_id": 100} + resp = _send_ipc_command(cmd, silent=silent) + if resp is None: + return None + if resp.get("error") == "success": + return resp.get("data", []) + return [] + +def _extract_title_from_item(item: Dict[str, Any]) -> str: + """Extract a clean title from an MPV playlist item, handling memory:// M3U hacks.""" + title = item.get("title") + filename = item.get("filename") or "" + + # Special handling for memory:// M3U playlists (used to pass titles via IPC) + if "memory://" in filename and "#EXTINF:" in filename: + try: + # Extract title from #EXTINF:-1,Title + # Use regex to find title between #EXTINF:-1, and newline + match = re.search(r"#EXTINF:-1,(.*?)(?:\n|\r|$)", filename) + if match: + extracted_title = match.group(1).strip() + if not title or title == "memory://": + title = extracted_title + + # If we still don't have a title, try to find the URL in the M3U content + if not title: + lines = filename.splitlines() + for line in lines: + line = line.strip() + if line and not line.startswith('#') and not line.startswith('memory://'): + # Found the URL, use it as title + return line + except Exception: + pass + + return title or filename or "Unknown" + + +def _extract_target_from_memory_uri(text: str) -> Optional[str]: + """Extract the real target URL/path from a memory:// M3U payload.""" + if not isinstance(text, str) or not text.startswith("memory://"): + return None + for line in text.splitlines(): + line = line.strip() + if not line or line.startswith('#') or line.startswith('memory://'): + continue + return line + return None + + +def _find_hydrus_instance_for_hash(hash_str: str, file_storage: Any) -> Optional[str]: + """Find which Hydrus instance serves a specific file hash. + + Args: + hash_str: SHA256 hash (64 hex chars) + file_storage: FileStorage instance with Hydrus backends + + Returns: + Instance name (e.g., 'home') or None if not found + """ + # Query each Hydrus backend to see if it has this file + for backend_name in file_storage.list_backends(): + backend = file_storage[backend_name] + # Check if this is a Hydrus backend by checking class name + backend_class = type(backend).__name__ + if backend_class != "HydrusNetwork": + continue + + try: + # Query metadata to see if this instance has the file + metadata = backend.get_metadata(hash_str) + if metadata: + return backend_name + except Exception: + # This instance doesn't have the file or had an error + continue + + return None + + +def _find_hydrus_instance_by_url(url: str, file_storage: Any) -> Optional[str]: + """Find which Hydrus instance matches a given URL. + + Args: + url: Full URL (e.g., http://localhost:45869/get_files/file?hash=...) + file_storage: FileStorage instance with Hydrus backends + + Returns: + Instance name (e.g., 'home') or None if not found + """ + from urllib.parse import urlparse + + parsed_target = urlparse(url) + target_netloc = parsed_target.netloc.lower() + + # Check each Hydrus backend's URL + for backend_name in file_storage.list_backends(): + backend = file_storage[backend_name] + backend_class = type(backend).__name__ + if backend_class != "HydrusNetwork": + continue + + # Get the backend's base URL from its client + try: + backend_url = backend._client.base_url + parsed_backend = urlparse(backend_url) + backend_netloc = parsed_backend.netloc.lower() + + # Match by netloc (host:port) + if target_netloc == backend_netloc: + return backend_name + except Exception: + continue + + return None + + +def _normalize_playlist_path(text: Optional[str]) -> Optional[str]: + """Normalize playlist entry paths for dedupe comparisons.""" + if not text: + return None + real = _extract_target_from_memory_uri(text) or text + real = real.strip() + if not real: + return None + # If it's already a bare hydrus hash, use it directly + lower_real = real.lower() + if re.fullmatch(r"[0-9a-f]{64}", lower_real): + return lower_real + + # If it's a hydrus file URL, normalize to the hash for dedupe + try: + parsed = urlparse(real) + if parsed.scheme in {"http", "https", "hydrus"}: + if parsed.path.endswith("/get_files/file"): + qs = parse_qs(parsed.query) + h = qs.get("hash", [None])[0] + if h and re.fullmatch(r"[0-9a-f]{64}", h.lower()): + return h.lower() + except Exception: + pass + + # Normalize slashes for Windows paths and lowercase for comparison + real = real.replace('\\', '/') + return real.lower() + + +def _infer_store_from_playlist_item(item: Dict[str, Any], file_storage: Optional[Any] = None) -> str: + """Infer a friendly store label from an MPV playlist entry. + + Args: + item: MPV playlist item dict + file_storage: Optional FileStorage instance for querying specific backend instances + + Returns: + Store label (e.g., 'home', 'work', 'local', 'youtube', etc.) + """ + name = item.get("filename") if isinstance(item, dict) else None + target = str(name or "") + + # Unwrap memory:// M3U wrapper + memory_target = _extract_target_from_memory_uri(target) + if memory_target: + target = memory_target + + # Hydrus hashes: bare 64-hex entries + if re.fullmatch(r"[0-9a-f]{64}", target.lower()): + # If we have file_storage, query each Hydrus instance to find which one has this hash + if file_storage: + hash_str = target.lower() + hydrus_instance = _find_hydrus_instance_for_hash(hash_str, file_storage) + if hydrus_instance: + return hydrus_instance + return "hydrus" + + lower = target.lower() + if lower.startswith("magnet:"): + return "magnet" + if lower.startswith("hydrus://"): + # Extract hash from hydrus:// URL if possible + if file_storage: + hash_match = re.search(r"[0-9a-f]{64}", target.lower()) + if hash_match: + hash_str = hash_match.group(0) + hydrus_instance = _find_hydrus_instance_for_hash(hash_str, file_storage) + if hydrus_instance: + return hydrus_instance + return "hydrus" + + # Windows / UNC paths + if re.match(r"^[a-z]:[\\/]", target, flags=re.IGNORECASE) or target.startswith("\\\\"): + return "local" + + # file:// url + if lower.startswith("file://"): + return "local" + + parsed = urlparse(target) + host = (parsed.netloc or "").lower() + path = parsed.path or "" + + if not host: + return "" + + host_no_port = host.split(":", 1)[0] + host_stripped = host_no_port[4:] if host_no_port.startswith("www.") else host_no_port + + if "youtube" in host_stripped or "youtu.be" in target.lower(): + return "youtube" + if "soundcloud" in host_stripped: + return "soundcloud" + if "bandcamp" in host_stripped: + return "bandcamp" + if "get_files" in path or "file?hash=" in path or host_stripped in {"127.0.0.1", "localhost"}: + # Hydrus API URL - try to extract hash and find instance + if file_storage: + # Try to extract hash from URL parameters + hash_match = re.search(r"hash=([0-9a-f]{64})", target.lower()) + if hash_match: + hash_str = hash_match.group(1) + hydrus_instance = _find_hydrus_instance_for_hash(hash_str, file_storage) + if hydrus_instance: + return hydrus_instance + # If no hash in URL, try matching the base URL to configured instances + hydrus_instance = _find_hydrus_instance_by_url(target, file_storage) + if hydrus_instance: + return hydrus_instance + return "hydrus" + if re.match(r"^\d+\.\d+\.\d+\.\d+$", host_stripped) and "get_files" in path: + # IP-based Hydrus URL + if file_storage: + hash_match = re.search(r"hash=([0-9a-f]{64})", target.lower()) + if hash_match: + hash_str = hash_match.group(1) + hydrus_instance = _find_hydrus_instance_for_hash(hash_str, file_storage) + if hydrus_instance: + return hydrus_instance + hydrus_instance = _find_hydrus_instance_by_url(target, file_storage) + if hydrus_instance: + return hydrus_instance + return "hydrus" + + parts = host_stripped.split('.') + if len(parts) >= 2: + return parts[-2] or host_stripped + return host_stripped + + +def _build_hydrus_header(config: Dict[str, Any]) -> Optional[str]: + """Return header string for Hydrus auth if configured.""" + try: + key = get_hydrus_access_key(config) + except Exception: + key = None + if not key: + return None + return f"Hydrus-Client-API-Access-Key: {key}" + + +def _build_ytdl_options(config: Optional[Dict[str, Any]], hydrus_header: Optional[str]) -> Optional[str]: + """Compose ytdl-raw-options string including cookies and optional Hydrus header.""" + opts: List[str] = [] + try: + cookies_path = get_cookies_file_path() + except Exception: + cookies_path = None + if cookies_path: + opts.append(f"cookies={cookies_path.replace('\\', '/')}") + else: + opts.append("cookies-from-browser=chrome") + if hydrus_header: + opts.append(f"add-header={hydrus_header}") + return ",".join(opts) if opts else None + + +def _is_hydrus_path(path: str, hydrus_url: Optional[str]) -> bool: + if not path: + return False + lower = path.lower() + if "hydrus://" in lower: + return True + parsed = urlparse(path) + host = (parsed.netloc or "").lower() + path_part = parsed.path or "" + if hydrus_url: + try: + hydrus_host = urlparse(hydrus_url).netloc.lower() + if hydrus_host and hydrus_host in host: + return True + except Exception: + pass + if "get_files" in path_part or "file?hash=" in path_part: + return True + if re.match(r"^\d+\.\d+\.\d+\.\d+$", host) and "get_files" in path_part: + return True + return False + +def _ensure_ytdl_cookies() -> None: + """Ensure yt-dlp options are set correctly for this session.""" + from pathlib import Path + cookies_path = get_cookies_file_path() + if cookies_path: + # Check if file exists and has content (use forward slashes for path checking) + check_path = cookies_path.replace('\\', '/') + file_obj = Path(cookies_path) + if file_obj.exists(): + file_size = file_obj.stat().st_size + debug(f"Cookies file verified: {check_path} ({file_size} bytes)") + else: + debug(f"WARNING: Cookies file does not exist: {check_path}", file=sys.stderr) + else: + debug("No cookies file configured") + +def _monitor_mpv_logs(duration: float = 3.0) -> None: + """Monitor MPV logs for a short duration to capture errors.""" + try: + mpv = MPV() + client = mpv.client() + if not client.connect(): + debug("Failed to connect to MPV for log monitoring", file=sys.stderr) + return + + # Request log messages + client.send_command({"command": ["request_log_messages", "warn"]}) + + # On Windows named pipes, avoid blocking the CLI; skip log read entirely + if client.is_windows: + client.disconnect() + return + + import time + start_time = time.time() + + # Unix sockets already have timeouts set; read until duration expires + sock_obj = client.sock + if not isinstance(sock_obj, socket.socket): + client.disconnect() + return + + while time.time() - start_time < duration: + try: + chunk = sock_obj.recv(4096) + except socket.timeout: + continue + except Exception: + break + if not chunk: + break + for line in chunk.decode("utf-8", errors="ignore").splitlines(): + try: + msg = json.loads(line) + if msg.get("event") == "log-message": + text = msg.get("text", "").strip() + prefix = msg.get("prefix", "") + level = msg.get("level", "") + if "ytdl" in prefix or level == "error": + debug(f"[MPV {prefix}] {text}", file=sys.stderr) + except json.JSONDecodeError: + continue + + client.disconnect() + except Exception: + pass + + +def _tail_text_file(path: str, *, max_lines: int = 120, max_bytes: int = 65536) -> List[str]: + try: + p = Path(str(path)) + if not p.exists() or not p.is_file(): + return [] + except Exception: + return [] + + try: + with open(p, "rb") as f: + try: + f.seek(0, os.SEEK_END) + end = f.tell() + start = max(0, end - int(max_bytes)) + f.seek(start, os.SEEK_SET) + except Exception: + pass + data = f.read() + text = data.decode("utf-8", errors="replace") + lines = text.splitlines() + if len(lines) > max_lines: + return lines[-max_lines:] + return lines + except Exception: + return [] +def _get_playable_path(item: Any, file_storage: Optional[Any], config: Optional[Dict[str, Any]]) -> Optional[tuple[str, Optional[str]]]: + """Extract a playable path/URL from an item, handling different store types. + + Args: + item: Item to extract path from (dict, PipeObject, or string) + file_storage: FileStorage instance for querying backends + config: Config dict for Hydrus URL + + Returns: + Tuple of (path, title) or None if no valid path found + """ + path: Optional[str] = None + title: Optional[str] = None + store: Optional[str] = None + file_hash: Optional[str] = None + + # Extract fields from item - prefer a disk path ('path'), but accept 'url' as fallback for providers + if isinstance(item, dict): + path = item.get("path") + # Fallbacks for provider-style entries where URL is stored in 'url' or 'source_url' or 'target' + if not path: + path = item.get("url") or item.get("source_url") or item.get("target") + if not path: + known = item.get("url") or item.get("url") or [] + if known and isinstance(known, list): + path = known[0] + title = item.get("title") or item.get("file_title") + store = item.get("store") + file_hash = item.get("hash") + elif hasattr(item, "path") or hasattr(item, "url") or hasattr(item, "source_url") or hasattr(item, "store") or hasattr(item, "hash"): + # Handle PipeObject / dataclass objects - prefer path, but fall back to url/source_url attributes + path = getattr(item, "path", None) + if not path: + path = getattr(item, "url", None) or getattr(item, "source_url", None) or getattr(item, "target", None) + if not path: + known = getattr(item, "url", None) or (getattr(item, "extra", None) or {}).get("url") + if known and isinstance(known, list): + path = known[0] + title = getattr(item, "title", None) or getattr(item, "file_title", None) + store = getattr(item, "store", None) + file_hash = getattr(item, "hash", None) + elif isinstance(item, str): + path = item + + # Debug: show incoming values + try: + debug(f"_get_playable_path: store={store}, path={path}, hash={file_hash}") + except Exception: + pass + + # Treat common placeholders as missing. + if isinstance(path, str) and path.strip().lower() in {"", "n/a", "na", "none"}: + path = None + + if title is not None and not isinstance(title, str): + title = str(title) + + if isinstance(file_hash, str): + file_hash = file_hash.strip().lower() + + # Resolve hash+store into a playable target (file path or URL). + # This is unrelated to MPV's IPC pipe and keeps "pipe" terminology reserved for: + # - MPV IPC pipe (transport) + # - PipeObject (pipeline data) + if store and file_hash and file_hash != "unknown" and file_storage: + # If it's already a URL, MPV can usually play it directly. + if isinstance(path, str) and path.startswith(("http://", "https://")): + return (path, title) + + try: + backend = file_storage[store] + except Exception: + backend = None + + if backend is not None: + backend_class = type(backend).__name__ + + # Folder stores: resolve to an on-disk file path. + if hasattr(backend, "get_file") and callable(getattr(backend, "get_file")) and backend_class == "Folder": + try: + resolved = backend.get_file(file_hash) + if isinstance(resolved, Path): + path = str(resolved) + elif resolved is not None: + path = str(resolved) + except Exception as e: + debug(f"Error resolving file path from store '{store}': {e}", file=sys.stderr) + + # HydrusNetwork: build a playable API file URL without browser side-effects. + elif backend_class == "HydrusNetwork": + try: + client = getattr(backend, "_client", None) + base_url = getattr(client, "url", None) + if base_url: + base_url = str(base_url).rstrip("/") + # Auth is provided via http-header-fields (set in _queue_items). + path = f"{base_url}/get_files/file?hash={file_hash}" + except Exception as e: + debug(f"Error building Hydrus URL from store '{store}': {e}", file=sys.stderr) + + if not path: + # As a last resort, if we have a hash and no path/url, return the hash. + # _queue_items will convert it to a Hydrus file URL when possible. + if store and file_hash and file_hash != "unknown": + return (str(file_hash), title) + return None + + if not isinstance(path, str): + path = str(path) + + return (path, title) + + +def _queue_items( + items: List[Any], + clear_first: bool = False, + config: Optional[Dict[str, Any]] = None, + start_opts: Optional[Dict[str, Any]] = None, +) -> bool: + """Queue items to MPV, starting it if necessary. + + Args: + items: List of items to queue + clear_first: If True, the first item will replace the current playlist + + Returns: + True if MPV was started, False if items were queued via IPC. + """ + # Debug: print incoming items + try: + debug(f"_queue_items: count={len(items)} types={[type(i).__name__ for i in items]}") + except Exception: + pass + + # Just verify cookies are configured, don't try to set via IPC + _ensure_ytdl_cookies() + + hydrus_header = _build_hydrus_header(config or {}) + ytdl_opts = _build_ytdl_options(config, hydrus_header) + hydrus_url = None + try: + hydrus_url = get_hydrus_url(config) if config is not None else None + except Exception: + hydrus_url = None + + # Initialize Store registry for path resolution + file_storage = None + try: + from Store import Store + file_storage = Store(config or {}) + except Exception as e: + debug(f"Warning: Could not initialize Store registry: {e}", file=sys.stderr) + + # Dedupe existing playlist before adding more (unless we're replacing it) + existing_targets: set[str] = set() + if not clear_first: + playlist = _get_playlist(silent=True) or [] + dup_indexes: List[int] = [] + for idx, pl_item in enumerate(playlist): + fname = pl_item.get("filename") if isinstance(pl_item, dict) else str(pl_item) + alt = pl_item.get("playlist-path") if isinstance(pl_item, dict) else None + norm = _normalize_playlist_path(fname) or _normalize_playlist_path(alt) + if not norm: + continue + if norm in existing_targets: + dup_indexes.append(idx) + else: + existing_targets.add(norm) + + # Remove duplicates from playlist starting from the end to keep indices valid + for idx in reversed(dup_indexes): + try: + _send_ipc_command({"command": ["playlist-remove", idx], "request_id": 106}, silent=True) + except Exception: + pass + + new_targets: set[str] = set() + + for i, item in enumerate(items): + # Debug: show the item being processed + try: + debug(f"_queue_items: processing idx={i} type={type(item)} repr={repr(item)[:200]}") + except Exception: + pass + # Extract URL/Path using store-aware logic + result = _get_playable_path(item, file_storage, config) + if not result: + debug(f"_queue_items: item idx={i} produced no playable path") + continue + + target, title = result + + # Prefer per-item Hydrus instance credentials when the item belongs to a Hydrus store. + effective_hydrus_url = hydrus_url + effective_hydrus_header = hydrus_header + effective_ytdl_opts = ytdl_opts + item_store_name: Optional[str] = None + try: + item_store = None + if isinstance(item, dict): + item_store = item.get("store") + else: + item_store = getattr(item, "store", None) + + if item_store: + item_store_name = str(item_store).strip() or None + + if item_store and file_storage: + try: + backend = file_storage[str(item_store)] + except Exception: + backend = None + + if backend is not None and type(backend).__name__ == "HydrusNetwork": + client = getattr(backend, "_client", None) + base_url = getattr(client, "url", None) + key = getattr(client, "access_key", None) + if base_url: + effective_hydrus_url = str(base_url).rstrip("/") + if key: + effective_hydrus_header = f"Hydrus-Client-API-Access-Key: {str(key).strip()}" + effective_ytdl_opts = _build_ytdl_options(config, effective_hydrus_header) + except Exception: + pass + + if target: + # If we just have a hydrus hash, build a direct file URL for MPV + if re.fullmatch(r"[0-9a-f]{64}", str(target).strip().lower()) and effective_hydrus_url: + target = f"{effective_hydrus_url.rstrip('/')}/get_files/file?hash={str(target).strip()}" + + norm_key = _normalize_playlist_path(target) or str(target).strip().lower() + if norm_key in existing_targets or norm_key in new_targets: + debug(f"Skipping duplicate playlist entry: {title or target}") + continue + new_targets.add(norm_key) + + # Check if it's a yt-dlp supported URL + is_ytdlp = False + # Treat any http(s) target as yt-dlp candidate. If the Python yt-dlp + # module is available we also check more deeply, but default to True + # so MPV can use its ytdl hooks for remote streaming sites. + is_hydrus_target = _is_hydrus_path(str(target), effective_hydrus_url) + try: + # Hydrus direct file URLs should not be treated as yt-dlp targets. + is_ytdlp = (not is_hydrus_target) and (target.startswith("http") or is_url_supported_by_ytdlp(target)) + except Exception: + is_ytdlp = (not is_hydrus_target) and target.startswith("http") + + # Use memory:// M3U hack to pass title to MPV + # Skip for yt-dlp url to ensure proper handling + if title and (is_hydrus_target or not is_ytdlp): + # Sanitize title for M3U (remove newlines) + safe_title = title.replace('\n', ' ').replace('\r', '') + + # Carry the store name for hash URLs so MPV.lyric can resolve the backend. + # This is especially important for local file-server URLs like /get_files/file?hash=... + target_for_m3u = target + try: + if item_store_name and isinstance(target_for_m3u, str) and target_for_m3u.startswith("http"): + if "get_files/file" in target_for_m3u and "store=" not in target_for_m3u: + sep = "&" if "?" in target_for_m3u else "?" + target_for_m3u = f"{target_for_m3u}{sep}store={item_store_name}" + except Exception: + target_for_m3u = target + + m3u_content = f"#EXTM3U\n#EXTINF:-1,{safe_title}\n{target_for_m3u}" + target_to_send = f"memory://{m3u_content}" + else: + target_to_send = target + + mode = "append" + if clear_first and i == 0: + mode = "replace" + + # If this is a Hydrus path, set header property and yt-dlp headers before loading + if effective_hydrus_header and _is_hydrus_path(target_to_send, effective_hydrus_url): + header_cmd = {"command": ["set_property", "http-header-fields", effective_hydrus_header], "request_id": 199} + _send_ipc_command(header_cmd, silent=True) + if effective_ytdl_opts: + ytdl_cmd = {"command": ["set_property", "ytdl-raw-options", effective_ytdl_opts], "request_id": 197} + _send_ipc_command(ytdl_cmd, silent=True) + + cmd = {"command": ["loadfile", target_to_send, mode], "request_id": 200} + try: + debug(f"Sending MPV loadfile: {target_to_send} mode={mode}") + resp = _send_ipc_command(cmd, silent=True) + debug(f"MPV loadfile response: {resp}") + except Exception as e: + debug(f"Exception sending loadfile to MPV: {e}", file=sys.stderr) + resp = None + + if resp is None: + # MPV not running (or died) + # Start MPV with remaining items + debug(f"MPV not running/died while queuing, starting MPV with remaining items: {items[i:]}") + _start_mpv(items[i:], config=config, start_opts=start_opts) + return True + elif resp.get("error") == "success": + # Also set property for good measure + if title: + title_cmd = {"command": ["set_property", "force-media-title", title], "request_id": 201} + _send_ipc_command(title_cmd) + debug(f"Queued: {title or target}") + else: + error_msg = str(resp.get('error')) + debug(f"Failed to queue item: {error_msg}", file=sys.stderr) + return False + +def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Manage and play items in the MPV playlist via IPC.""" + + parsed = parse_cmdlet_args(args, CMDLET) + + log_requested = bool(parsed.get("log")) + borderless = bool(parsed.get("borderless")) + + prev_debug = is_debug_enabled() + prev_stream = get_thread_stream() + devnull_fh = None + + mpv_log_path: Optional[str] = None + + try: + # Default: keep `.pipe` quiet even if debug is enabled. + # With -log: enable debug and route it to stdout (pipeable), plus enable mpv log-file. + if log_requested: + set_debug(True) + set_thread_stream(sys.stdout) + try: + tmp_dir = Path(os.environ.get("TEMP") or os.environ.get("TMP") or ".") + except Exception: + tmp_dir = Path(".") + mpv_log_path = str((tmp_dir / "medeia-mpv.log").resolve()) + # Ensure file exists early so we can tail it even if mpv writes later. + try: + Path(mpv_log_path).parent.mkdir(parents=True, exist_ok=True) + with open(mpv_log_path, "a", encoding="utf-8", errors="replace"): + pass + except Exception: + pass + print(f"MPV log file: {mpv_log_path}") + + # If mpv is already running, set log options live via IPC. + try: + mpv_live = MPV() + if mpv_live.is_running(): + mpv_live.set_property("options/log-file", mpv_log_path) + mpv_live.set_property("options/msg-level", "all=v") + except Exception: + pass + else: + if prev_debug: + try: + devnull_fh = open(os.devnull, "w", encoding="utf-8", errors="replace") + set_thread_stream(devnull_fh) + except Exception: + pass + + start_opts: Dict[str, Any] = {"borderless": borderless, "mpv_log_path": mpv_log_path} + + # Initialize Store registry for detecting Hydrus instance names + file_storage = None + try: + from Store import Store + file_storage = Store(config) + except Exception as e: + debug(f"Warning: Could not initialize Store registry: {e}", file=sys.stderr) + + # Initialize mpv_started flag + mpv_started = False + + # Handle positional index argument if provided + index_arg = parsed.get("index") + url_arg = parsed.get("url") + + # If index_arg is provided but is not an integer, treat it as a URL + # This allows .pipe "http://..." without -url flag + if index_arg is not None: + try: + int(index_arg) + except ValueError: + # Not an integer, treat as URL if url_arg is not set + if not url_arg: + url_arg = index_arg + index_arg = None + + clear_mode = parsed.get("clear") + list_mode = parsed.get("list") + play_mode = parsed.get("play") + pause_mode = parsed.get("pause") + save_mode = parsed.get("save") + load_mode = parsed.get("load") + current_mode = parsed.get("current") + + # Handle --current flag: emit currently playing item to pipeline + if current_mode: + items = _get_playlist() + if items is None: + debug("MPV is not running or not accessible.", file=sys.stderr) + return 1 + + # Find the currently playing item + current_item = None + for item in items: + if item.get("current", False): + current_item = item + break + + if current_item is None: + debug("No item is currently playing.", file=sys.stderr) + return 1 + + # Build result object with file info + title = _extract_title_from_item(current_item) + filename = current_item.get("filename", "") + + # Emit the current item to pipeline + result_obj = { + 'path': filename, + 'title': title, + 'cmdlet_name': '.pipe', + 'source': 'pipe', + '__pipe_index': items.index(current_item), + } + + ctx.emit(result_obj) + debug(f"Emitted current item: {title}") + return 0 + + # Handle URL queuing + mpv_started = False + if url_arg: + mpv_started = _queue_items([url_arg], config=config, start_opts=start_opts) + # Auto-play the URL when it's queued via .pipe "url" (without explicit flags) + # unless other flags are present + if not (clear_mode or play_mode or pause_mode or save_mode or load_mode): + if mpv_started: + # MPV was just started, wait a moment for it to be ready, then play first item + import time + time.sleep(0.5) + index_arg = "1" # 1-based index for first item + play_mode = True + else: + # MPV was already running, get playlist and play the newly added item + playlist = _get_playlist(silent=True) + if playlist and len(playlist) > 0: + # Auto-play the last item in the playlist (the one we just added) + # Use 1-based indexing + index_arg = str(len(playlist)) + play_mode = True + else: + # Fallback: just list the playlist if we can't determine index + list_mode = True + + # Ensure lyric overlay is running (auto-discovery handled by MPV.lyric). + try: + mpv = MPV() + _ensure_lyric_overlay(mpv) + except Exception: + pass + + # Handle Save Playlist + if save_mode: + playlist_name = index_arg or f"Playlist {subprocess.check_output(['date', '/t'], shell=True).decode().strip()}" + # If index_arg was used for name, clear it so it doesn't trigger index logic + if index_arg: + index_arg = None + + items = _get_playlist() + if not items: + debug("Cannot save: MPV playlist is empty or MPV is not running.") + return 1 + + # Clean up items for saving (remove current flag, etc) + clean_items = [] + for item in items: + # If title was extracted from memory://, we should probably save the original filename + # if it's a URL, or reconstruct a clean object. + # Actually, _extract_title_from_item handles the display title. + # But for playback, we need the 'filename' (which might be memory://...) + # If we save 'memory://...', it will work when loaded back. + clean_items.append(item) + + # Use config from context or load it + config_data = config if config else {} + + storage_path = get_local_storage_path(config_data) + if not storage_path: + debug("Local storage path not configured.") + return 1 + + with LocalLibrarySearchOptimizer(storage_path) as db: + if db.save_playlist(playlist_name, clean_items): + debug(f"Playlist saved as '{playlist_name}'") + return 0 + else: + debug(f"Failed to save playlist '{playlist_name}'") + return 1 + + # Handle Load Playlist + current_playlist_name = None + if load_mode: + # Use config from context or load it + config_data = config if config else {} + + storage_path = get_local_storage_path(config_data) + if not storage_path: + debug("Local storage path not configured.") + return 1 + + with LocalLibrarySearchOptimizer(storage_path) as db: + if index_arg: + try: + pl_id = int(index_arg) + + # Handle Delete Playlist (if -clear is also passed) + if clear_mode: + if db.delete_playlist(pl_id): + debug(f"Playlist ID {pl_id} deleted.") + # Clear index_arg so we fall through to list mode and show updated list + index_arg = None + # Don't return, let it list the remaining playlists + else: + debug(f"Failed to delete playlist ID {pl_id}.") + return 1 + else: + # Handle Load Playlist + result = db.get_playlist_by_id(pl_id) + if result is None: + debug(f"Playlist ID {pl_id} not found.") + return 1 + + name, items = result + current_playlist_name = name + + # Queue items (replacing current playlist) + if items: + _queue_items(items, clear_first=True, config=config, start_opts=start_opts) + else: + # Empty playlist, just clear + _send_ipc_command({"command": ["playlist-clear"]}, silent=True) + + # Switch to list mode to show the result + list_mode = True + index_arg = None + # Fall through to list logic + + except ValueError: + debug(f"Invalid playlist ID: {index_arg}") + return 1 + + # If we deleted or didn't have an index, list playlists + if not index_arg: + playlists = db.get_playlists() + + if not playlists: + debug("No saved playlists found.") + return 0 + + table = ResultTable("Saved Playlists") + for i, pl in enumerate(playlists): + item_count = len(pl.get('items', [])) + row = table.add_row() + # row.add_column("ID", str(pl['id'])) # Hidden as per user request + row.add_column("Name", pl['name']) + row.add_column("Items", str(item_count)) + row.add_column("Updated", pl['updated_at']) + + # Set the playlist items as the result object for this row + # When user selects @N, they get the list of items + # We also set the source command to .pipe -load so it loads it + table.set_row_selection_args(i, ["-load", str(pl['id'])]) + + table.set_source_command(".pipe") + + # Register results + ctx.set_last_result_table_overlay(table, [p['items'] for p in playlists]) + ctx.set_current_stage_table(table) + + print(table) + return 0 + + # Everything below was originally outside a try block; keep it inside so `start_opts` is in scope. + + # Handle Play/Pause commands (but skip if we have index_arg to play a specific item) + if play_mode and index_arg is None: + cmd = {"command": ["set_property", "pause", False], "request_id": 103} + resp = _send_ipc_command(cmd) + if resp and resp.get("error") == "success": + debug("Resumed playback") + return 0 + else: + debug("Failed to resume playback (MPV not running?)", file=sys.stderr) + return 1 + + if pause_mode: + cmd = {"command": ["set_property", "pause", True], "request_id": 104} + resp = _send_ipc_command(cmd) + if resp and resp.get("error") == "success": + debug("Paused playback") + return 0 + else: + debug("Failed to pause playback (MPV not running?)", file=sys.stderr) + return 1 + + # Handle Clear All command (no index provided) + if clear_mode and index_arg is None: + cmd = {"command": ["playlist-clear"], "request_id": 105} + resp = _send_ipc_command(cmd) + if resp and resp.get("error") == "success": + debug("Playlist cleared") + return 0 + else: + debug("Failed to clear playlist (MPV not running?)", file=sys.stderr) + return 1 + + # Handle piped input (add to playlist) + # Skip adding if -list is specified (user just wants to see current playlist) + if result and not list_mode and not url_arg: + playlist_before = _get_playlist(silent=True) + idle_before = None + try: + idle_resp = _send_ipc_command({"command": ["get_property", "idle-active"], "request_id": 111}, silent=True) + if idle_resp and idle_resp.get("error") == "success": + idle_before = bool(idle_resp.get("data")) + except Exception: + idle_before = None + + # If result is a list of items, add them to playlist + items_to_add = [] + if isinstance(result, list): + items_to_add = result + elif isinstance(result, dict): + items_to_add = [result] + else: + # Handle PipeObject or any other object type + items_to_add = [result] + + # Debug: inspect incoming result and attributes + try: + debug(f"pipe._run: received result type={type(result)} repr={repr(result)[:200]}") + debug(f"pipe._run: attrs path={getattr(result, 'path', None)} url={getattr(result, 'url', None)} store={getattr(result, 'store', None)} hash={getattr(result, 'hash', None)}") + except Exception: + pass + + queued_started_mpv = False + if items_to_add and _queue_items(items_to_add, config=config, start_opts=start_opts): + mpv_started = True + queued_started_mpv = True + + # Ensure lyric overlay is running when we queue anything via .pipe. + if items_to_add and not queued_started_mpv: + try: + mpv = MPV() + _ensure_lyric_overlay(mpv) + except Exception: + pass + + # Auto-play when a single item is piped and mpv was idle/empty. + if items_to_add and len(items_to_add) == 1 and not queued_started_mpv: + try: + playlist_after = _get_playlist(silent=True) + before_len = len(playlist_before) if isinstance(playlist_before, list) else 0 + after_len = len(playlist_after) if isinstance(playlist_after, list) else 0 + + should_autoplay = False + if idle_before is True: + should_autoplay = True + elif isinstance(playlist_before, list) and len(playlist_before) == 0: + should_autoplay = True + + if should_autoplay and after_len > 0: + idx_to_play = min(max(0, before_len), after_len - 1) + play_resp = _send_ipc_command({"command": ["playlist-play-index", idx_to_play], "request_id": 112}, silent=True) + _send_ipc_command({"command": ["set_property", "pause", False], "request_id": 113}, silent=True) + if play_resp and play_resp.get("error") == "success": + debug("Auto-playing piped item") + + # Start lyric overlay (auto-discovery handled by MPV.lyric). + try: + mpv = MPV() + _ensure_lyric_overlay(mpv) + except Exception: + pass + except Exception: + pass + + # Get playlist from MPV (silent: we handle MPV-not-running gracefully below) + items = _get_playlist(silent=True) + + if items is None: + if mpv_started: + # MPV was just started, retry getting playlist after a brief delay + import time + time.sleep(0.3) + items = _get_playlist(silent=True) + + if items is None: + # Still can't connect, but MPV is starting + debug("MPV is starting up...") + return 0 + else: + # Do not auto-launch MPV when no action/inputs were provided; avoid surprise startups + no_inputs = not any([ + result, url_arg, index_arg, clear_mode, play_mode, + pause_mode, save_mode, load_mode, current_mode, list_mode + ]) + + if no_inputs: + # User invoked `.pipe` with no args: treat this as an intent to open MPV. + debug("MPV is not running. Starting new instance...") + _start_mpv([], config=config, start_opts=start_opts) + + # Re-check playlist after startup; if IPC still isn't ready, just exit cleanly. + try: + import time + time.sleep(0.3) + except Exception: + pass + items = _get_playlist(silent=True) + if items is None: + debug("MPV is starting up...") + return 0 + + debug("MPV is not running. Starting new instance...") + _start_mpv([], config=config, start_opts=start_opts) + return 0 + + if not items: + debug("MPV playlist is empty.") + return 0 + + # If index is provided, perform action (Play or Clear) + if index_arg is not None: + try: + # Handle 1-based index + idx = int(index_arg) - 1 + + if idx < 0 or idx >= len(items): + debug(f"Index {index_arg} out of range (1-{len(items)}).") + return 1 + + item = items[idx] + title = _extract_title_from_item(item) + filename = item.get("filename", "") if isinstance(item, dict) else "" + hydrus_header = _build_hydrus_header(config or {}) + hydrus_url = None + try: + hydrus_url = get_hydrus_url(config) if config is not None else None + except Exception: + hydrus_url = None + + if clear_mode: + # Remove item + cmd = {"command": ["playlist-remove", idx], "request_id": 101} + resp = _send_ipc_command(cmd) + if resp and resp.get("error") == "success": + debug(f"Removed: {title}") + # Refresh items for listing + items = _get_playlist() or [] + list_mode = True + index_arg = None + else: + debug(f"Failed to remove item: {resp.get('error') if resp else 'No response'}") + return 1 + else: + # Play item + if hydrus_header and _is_hydrus_path(filename, hydrus_url): + header_cmd = {"command": ["set_property", "http-header-fields", hydrus_header], "request_id": 198} + _send_ipc_command(header_cmd, silent=True) + cmd = {"command": ["playlist-play-index", idx], "request_id": 102} + resp = _send_ipc_command(cmd) + if resp and resp.get("error") == "success": + # Ensure playback starts (unpause) + unpause_cmd = {"command": ["set_property", "pause", False], "request_id": 103} + _send_ipc_command(unpause_cmd) + + debug(f"Playing: {title}") + + # Monitor logs briefly for errors (e.g. ytdl failures) + _monitor_mpv_logs(3.0) + + # Refresh playlist view so the user sees the new current item immediately + items = _get_playlist(silent=True) or items + list_mode = True + index_arg = None + else: + debug(f"Failed to play item: {resp.get('error') if resp else 'No response'}") + return 1 + except ValueError: + debug(f"Invalid index: {index_arg}") + return 1 + + # List items (Default action or after clear) + if list_mode or (index_arg is None and not url_arg): + if not items: + debug("MPV playlist is empty.") + return 0 + + # Use the loaded playlist name if available, otherwise default + # Note: current_playlist_name is defined in the load_mode block if a playlist was loaded + try: + table_title = current_playlist_name or "MPV Playlist" + except NameError: + table_title = "MPV Playlist" + + table = ResultTable(table_title, preserve_order=True) + + # Convert MPV items to PipeObjects with proper hash and store + pipe_objects = [] + for i, item in enumerate(items): + is_current = item.get("current", False) + title = _extract_title_from_item(item) + filename = item.get("filename", "") + + # Extract the real path/URL from memory:// wrapper if present + real_path = _extract_target_from_memory_uri(filename) or filename + + # Try to extract hash from the path/URL + file_hash = None + store_name = None + + # Check if it's a Hydrus URL + if "get_files/file" in real_path or "hash=" in real_path: + # Extract hash from Hydrus URL + hash_match = re.search(r"hash=([0-9a-f]{64})", real_path.lower()) + if hash_match: + file_hash = hash_match.group(1) + # Try to find which Hydrus instance has this file + if file_storage: + store_name = _find_hydrus_instance_for_hash(file_hash, file_storage) + if not store_name: + store_name = "hydrus" + # Check if it's a hash-based local file + elif real_path: + # Try to extract hash from filename (e.g., C:\path\1e8c46...a1b2.mp4) + path_obj = Path(real_path) + stem = path_obj.stem # filename without extension + if len(stem) == 64 and all(c in '0123456789abcdef' for c in stem.lower()): + file_hash = stem.lower() + # Find which folder store has this file + if file_storage: + for backend_name in file_storage.list_backends(): + backend = file_storage[backend_name] + if type(backend).__name__ == "Folder": + # Check if this backend has the file + try: + result_path = backend.get_file(file_hash) + if isinstance(result_path, Path) and result_path.exists(): + store_name = backend_name + break + except Exception: + pass + + # Fallback to inferred store if we couldn't find it + if not store_name: + store_name = _infer_store_from_playlist_item(item, file_storage=file_storage) + + # Build PipeObject with proper metadata + pipe_obj = PipeObject( + hash=file_hash or "unknown", + store=store_name or "unknown", + title=title, + path=real_path + ) + pipe_objects.append(pipe_obj) + + # Truncate title for display + display_title = title + if len(display_title) > 80: + display_title = display_title[:77] + "..." + + row = table.add_row() + row.add_column("Current", "*" if is_current else "") + row.add_column("Store", store_name or "unknown") + row.add_column("Title", display_title) + + table.set_row_selection_args(i, [str(i + 1)]) + + table.set_source_command(".pipe") + + # Register PipeObjects (not raw MPV items) with pipeline context + ctx.set_last_result_table_overlay(table, pipe_objects) + ctx.set_current_stage_table(table) + + print(table) + + return 0 + finally: + if log_requested and isinstance(mpv_log_path, str) and mpv_log_path.strip(): + try: + tail_lines = _tail_text_file(mpv_log_path, max_lines=160) + if tail_lines: + print("MPV log (tail):") + for ln in tail_lines: + print(ln) + except Exception: + pass + try: + set_thread_stream(prev_stream) + except Exception: + pass + try: + set_debug(prev_debug) + except Exception: + pass + try: + if devnull_fh is not None: + devnull_fh.close() + except Exception: + pass + +def _start_mpv(items: List[Any], config: Optional[Dict[str, Any]] = None, start_opts: Optional[Dict[str, Any]] = None) -> None: + """Start MPV with a list of items.""" + import time as _time_module + + mpv = MPV() + mpv.kill_existing_windows() + _time_module.sleep(0.5) # Wait for process to die + + hydrus_header = _build_hydrus_header(config or {}) + ytdl_opts = _build_ytdl_options(config, hydrus_header) + + cookies_path = get_cookies_file_path() + if cookies_path: + debug(f"Starting MPV with cookies file: {cookies_path.replace('\\', '/')}") + else: + debug("Starting MPV with browser cookies: chrome") + + try: + extra_args: List[str] = [ + '--ytdl-format=bestvideo[height<=?1080]+bestaudio/best[height<=?1080]', + ] + + # Optional: borderless window (useful for uosc-like overlay UI without fullscreen). + if start_opts and start_opts.get("borderless"): + extra_args.append("--border=no") + + # Optional: mpv logging to file. + mpv_log_path = (start_opts or {}).get("mpv_log_path") + if isinstance(mpv_log_path, str) and mpv_log_path.strip(): + extra_args.append(f"--log-file={mpv_log_path}") + extra_args.append("--msg-level=all=v") + + # Always start MPV with the bundled Lua script via MPV class. + mpv.start( + extra_args=extra_args, + ytdl_raw_options=ytdl_opts, + http_header_fields=hydrus_header, + detached=True, + ) + debug("Started MPV process") + + # Wait for IPC pipe to be ready + if not mpv.wait_for_ipc(retries=20, delay_seconds=0.2): + debug("Timed out waiting for MPV IPC connection", file=sys.stderr) + return + + # Ensure Lua script is loaded (redundant when started with --script, but safe) + mpv.ensure_lua_loaded() + + # Ensure lyric overlay is running (auto-discovery handled by MPV.lyric). + _ensure_lyric_overlay(mpv) + + # Queue items via IPC + if items: + _queue_items(items, config=config, start_opts=start_opts) + + # Auto-play the first item + import time + time.sleep(0.3) # Give MPV a moment to process the queued items + + # Play the first item (index 0) and unpause + play_cmd = {"command": ["playlist-play-index", 0], "request_id": 102} + play_resp = _send_ipc_command(play_cmd, silent=True) + + if play_resp and play_resp.get("error") == "success": + # Ensure playback starts (unpause) + unpause_cmd = {"command": ["set_property", "pause", False], "request_id": 103} + _send_ipc_command(unpause_cmd, silent=True) + debug("Auto-playing first item") + + # Overlay already started above; it will follow track changes automatically. + + except Exception as e: + debug(f"Error starting MPV: {e}", file=sys.stderr) + + +CMDLET = Cmdlet( + name=".pipe", + alias=["pipe", "playlist", "queue", "ls-pipe"], + summary="Manage and play items in the MPV playlist via IPC", + usage=".pipe [index|url] [-current] [-clear] [-list] [-url URL] [-log] [-borderless]", + arg=[ + CmdletArg( + name="index", + type="string", # Changed to string to allow URL detection + description="Index of item to play/clear, or URL to queue", + required=False + ), + CmdletArg( + name="url", + type="string", + description="URL to queue", + required=False + ), + CmdletArg( + name="clear", + type="flag", + description="Remove the selected item, or clear entire playlist if no index provided" + ), + CmdletArg( + name="list", + type="flag", + description="List items (default)" + ), + CmdletArg( + name="play", + type="flag", + description="Resume playback" + ), + CmdletArg( + name="pause", + type="flag", + description="Pause playback" + ), + CmdletArg( + name="save", + type="flag", + description="Save current playlist to database" + ), + CmdletArg( + name="load", + type="flag", + description="List saved playlists" + ), + CmdletArg( + name="current", + type="flag", + description="Emit the currently playing item to pipeline for further processing" + ), + CmdletArg( + name="log", + type="flag", + description="Enable pipeable debug output and write an mpv log file" + ), + CmdletArg( + name="borderless", + type="flag", + description="Start mpv with no window border (uosc-like overlay feel without fullscreen)" + ), + ], + exec=_run +) + diff --git a/cmdnats/worker.py b/cmdnat/worker.py similarity index 99% rename from cmdnats/worker.py rename to cmdnat/worker.py index 898a553..2283720 100644 --- a/cmdnats/worker.py +++ b/cmdnat/worker.py @@ -7,8 +7,8 @@ from dataclasses import dataclass from datetime import datetime, timezone from typing import Any, Dict, Sequence, List -from cmdlets import register -from cmdlets._shared import Cmdlet, CmdletArg +from cmdlet import register +from cmdlet._shared import Cmdlet, CmdletArg import pipeline as ctx from SYS.logger import log from config import get_local_storage_path diff --git a/cmdnats/pipe.py b/cmdnats/pipe.py deleted file mode 100644 index e7f568f..0000000 --- a/cmdnats/pipe.py +++ /dev/null @@ -1,1228 +0,0 @@ -from typing import Any, Dict, Sequence, List, Optional -import sys -import json -import socket -import re -import subprocess -from urllib.parse import urlparse, parse_qs -from pathlib import Path -from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args -from SYS.logger import debug -from result_table import ResultTable -from MPV.mpv_ipc import MPV -import pipeline as ctx -from SYS.download import is_url_supported_by_ytdlp -from models import PipeObject - -from API.folder import LocalLibrarySearchOptimizer -from config import get_local_storage_path, get_hydrus_access_key, get_hydrus_url -from hydrus_health_check import get_cookies_file_path - -def _send_ipc_command(command: Dict[str, Any], silent: bool = False) -> Optional[Any]: - """Send a command to the MPV IPC pipe and return the response.""" - try: - mpv = MPV() - return mpv.send(command, silent=silent) - except Exception as e: - if not silent: - debug(f"IPC Error: {e}", file=sys.stderr) - return None - - -def _get_playlist(silent: bool = False) -> Optional[List[Dict[str, Any]]]: - """Get the current playlist from MPV. Returns None if MPV is not running.""" - cmd = {"command": ["get_property", "playlist"], "request_id": 100} - resp = _send_ipc_command(cmd, silent=silent) - if resp is None: - return None - if resp.get("error") == "success": - return resp.get("data", []) - return [] - -def _extract_title_from_item(item: Dict[str, Any]) -> str: - """Extract a clean title from an MPV playlist item, handling memory:// M3U hacks.""" - title = item.get("title") - filename = item.get("filename") or "" - - # Special handling for memory:// M3U playlists (used to pass titles via IPC) - if "memory://" in filename and "#EXTINF:" in filename: - try: - # Extract title from #EXTINF:-1,Title - # Use regex to find title between #EXTINF:-1, and newline - match = re.search(r"#EXTINF:-1,(.*?)(?:\n|\r|$)", filename) - if match: - extracted_title = match.group(1).strip() - if not title or title == "memory://": - title = extracted_title - - # If we still don't have a title, try to find the URL in the M3U content - if not title: - lines = filename.splitlines() - for line in lines: - line = line.strip() - if line and not line.startswith('#') and not line.startswith('memory://'): - # Found the URL, use it as title - return line - except Exception: - pass - - return title or filename or "Unknown" - - -def _extract_target_from_memory_uri(text: str) -> Optional[str]: - """Extract the real target URL/path from a memory:// M3U payload.""" - if not isinstance(text, str) or not text.startswith("memory://"): - return None - for line in text.splitlines(): - line = line.strip() - if not line or line.startswith('#') or line.startswith('memory://'): - continue - return line - return None - - -def _find_hydrus_instance_for_hash(hash_str: str, file_storage: Any) -> Optional[str]: - """Find which Hydrus instance serves a specific file hash. - - Args: - hash_str: SHA256 hash (64 hex chars) - file_storage: FileStorage instance with Hydrus backends - - Returns: - Instance name (e.g., 'home') or None if not found - """ - # Query each Hydrus backend to see if it has this file - for backend_name in file_storage.list_backends(): - backend = file_storage[backend_name] - # Check if this is a Hydrus backend by checking class name - backend_class = type(backend).__name__ - if backend_class != "HydrusNetwork": - continue - - try: - # Query metadata to see if this instance has the file - metadata = backend.get_metadata(hash_str) - if metadata: - return backend_name - except Exception: - # This instance doesn't have the file or had an error - continue - - return None - - -def _find_hydrus_instance_by_url(url: str, file_storage: Any) -> Optional[str]: - """Find which Hydrus instance matches a given URL. - - Args: - url: Full URL (e.g., http://localhost:45869/get_files/file?hash=...) - file_storage: FileStorage instance with Hydrus backends - - Returns: - Instance name (e.g., 'home') or None if not found - """ - from urllib.parse import urlparse - - parsed_target = urlparse(url) - target_netloc = parsed_target.netloc.lower() - - # Check each Hydrus backend's URL - for backend_name in file_storage.list_backends(): - backend = file_storage[backend_name] - backend_class = type(backend).__name__ - if backend_class != "HydrusNetwork": - continue - - # Get the backend's base URL from its client - try: - backend_url = backend._client.base_url - parsed_backend = urlparse(backend_url) - backend_netloc = parsed_backend.netloc.lower() - - # Match by netloc (host:port) - if target_netloc == backend_netloc: - return backend_name - except Exception: - continue - - return None - - -def _normalize_playlist_path(text: Optional[str]) -> Optional[str]: - """Normalize playlist entry paths for dedupe comparisons.""" - if not text: - return None - real = _extract_target_from_memory_uri(text) or text - real = real.strip() - if not real: - return None - # If it's already a bare hydrus hash, use it directly - lower_real = real.lower() - if re.fullmatch(r"[0-9a-f]{64}", lower_real): - return lower_real - - # If it's a hydrus file URL, normalize to the hash for dedupe - try: - parsed = urlparse(real) - if parsed.scheme in {"http", "https", "hydrus"}: - if parsed.path.endswith("/get_files/file"): - qs = parse_qs(parsed.query) - h = qs.get("hash", [None])[0] - if h and re.fullmatch(r"[0-9a-f]{64}", h.lower()): - return h.lower() - except Exception: - pass - - # Normalize slashes for Windows paths and lowercase for comparison - real = real.replace('\\', '/') - return real.lower() - - -def _infer_store_from_playlist_item(item: Dict[str, Any], file_storage: Optional[Any] = None) -> str: - """Infer a friendly store label from an MPV playlist entry. - - Args: - item: MPV playlist item dict - file_storage: Optional FileStorage instance for querying specific backend instances - - Returns: - Store label (e.g., 'home', 'work', 'local', 'youtube', etc.) - """ - name = item.get("filename") if isinstance(item, dict) else None - target = str(name or "") - - # Unwrap memory:// M3U wrapper - memory_target = _extract_target_from_memory_uri(target) - if memory_target: - target = memory_target - - # Hydrus hashes: bare 64-hex entries - if re.fullmatch(r"[0-9a-f]{64}", target.lower()): - # If we have file_storage, query each Hydrus instance to find which one has this hash - if file_storage: - hash_str = target.lower() - hydrus_instance = _find_hydrus_instance_for_hash(hash_str, file_storage) - if hydrus_instance: - return hydrus_instance - return "hydrus" - - lower = target.lower() - if lower.startswith("magnet:"): - return "magnet" - if lower.startswith("hydrus://"): - # Extract hash from hydrus:// URL if possible - if file_storage: - hash_match = re.search(r"[0-9a-f]{64}", target.lower()) - if hash_match: - hash_str = hash_match.group(0) - hydrus_instance = _find_hydrus_instance_for_hash(hash_str, file_storage) - if hydrus_instance: - return hydrus_instance - return "hydrus" - - # Windows / UNC paths - if re.match(r"^[a-z]:[\\/]", target, flags=re.IGNORECASE) or target.startswith("\\\\"): - return "local" - - # file:// url - if lower.startswith("file://"): - return "local" - - parsed = urlparse(target) - host = (parsed.netloc or "").lower() - path = parsed.path or "" - - if not host: - return "" - - host_no_port = host.split(":", 1)[0] - host_stripped = host_no_port[4:] if host_no_port.startswith("www.") else host_no_port - - if "youtube" in host_stripped or "youtu.be" in target.lower(): - return "youtube" - if "soundcloud" in host_stripped: - return "soundcloud" - if "bandcamp" in host_stripped: - return "bandcamp" - if "get_files" in path or "file?hash=" in path or host_stripped in {"127.0.0.1", "localhost"}: - # Hydrus API URL - try to extract hash and find instance - if file_storage: - # Try to extract hash from URL parameters - hash_match = re.search(r"hash=([0-9a-f]{64})", target.lower()) - if hash_match: - hash_str = hash_match.group(1) - hydrus_instance = _find_hydrus_instance_for_hash(hash_str, file_storage) - if hydrus_instance: - return hydrus_instance - # If no hash in URL, try matching the base URL to configured instances - hydrus_instance = _find_hydrus_instance_by_url(target, file_storage) - if hydrus_instance: - return hydrus_instance - return "hydrus" - if re.match(r"^\d+\.\d+\.\d+\.\d+$", host_stripped) and "get_files" in path: - # IP-based Hydrus URL - if file_storage: - hash_match = re.search(r"hash=([0-9a-f]{64})", target.lower()) - if hash_match: - hash_str = hash_match.group(1) - hydrus_instance = _find_hydrus_instance_for_hash(hash_str, file_storage) - if hydrus_instance: - return hydrus_instance - hydrus_instance = _find_hydrus_instance_by_url(target, file_storage) - if hydrus_instance: - return hydrus_instance - return "hydrus" - - parts = host_stripped.split('.') - if len(parts) >= 2: - return parts[-2] or host_stripped - return host_stripped - - -def _build_hydrus_header(config: Dict[str, Any]) -> Optional[str]: - """Return header string for Hydrus auth if configured.""" - try: - key = get_hydrus_access_key(config) - except Exception: - key = None - if not key: - return None - return f"Hydrus-Client-API-Access-Key: {key}" - - -def _build_ytdl_options(config: Optional[Dict[str, Any]], hydrus_header: Optional[str]) -> Optional[str]: - """Compose ytdl-raw-options string including cookies and optional Hydrus header.""" - opts: List[str] = [] - try: - cookies_path = get_cookies_file_path() - except Exception: - cookies_path = None - if cookies_path: - opts.append(f"cookies={cookies_path.replace('\\', '/')}") - else: - opts.append("cookies-from-browser=chrome") - if hydrus_header: - opts.append(f"add-header={hydrus_header}") - return ",".join(opts) if opts else None - - -def _is_hydrus_path(path: str, hydrus_url: Optional[str]) -> bool: - if not path: - return False - lower = path.lower() - if "hydrus://" in lower: - return True - parsed = urlparse(path) - host = (parsed.netloc or "").lower() - path_part = parsed.path or "" - if hydrus_url: - try: - hydrus_host = urlparse(hydrus_url).netloc.lower() - if hydrus_host and hydrus_host in host: - return True - except Exception: - pass - if "get_files" in path_part or "file?hash=" in path_part: - return True - if re.match(r"^\d+\.\d+\.\d+\.\d+$", host) and "get_files" in path_part: - return True - return False - -def _ensure_ytdl_cookies() -> None: - """Ensure yt-dlp options are set correctly for this session.""" - from pathlib import Path - cookies_path = get_cookies_file_path() - if cookies_path: - # Check if file exists and has content (use forward slashes for path checking) - check_path = cookies_path.replace('\\', '/') - file_obj = Path(cookies_path) - if file_obj.exists(): - file_size = file_obj.stat().st_size - debug(f"Cookies file verified: {check_path} ({file_size} bytes)") - else: - debug(f"WARNING: Cookies file does not exist: {check_path}", file=sys.stderr) - else: - debug("No cookies file configured") - -def _monitor_mpv_logs(duration: float = 3.0) -> None: - """Monitor MPV logs for a short duration to capture errors.""" - try: - mpv = MPV() - client = mpv.client() - if not client.connect(): - debug("Failed to connect to MPV for log monitoring", file=sys.stderr) - return - - # Request log messages - client.send_command({"command": ["request_log_messages", "warn"]}) - - # On Windows named pipes, avoid blocking the CLI; skip log read entirely - if client.is_windows: - client.disconnect() - return - - import time - start_time = time.time() - - # Unix sockets already have timeouts set; read until duration expires - sock_obj = client.sock - if not isinstance(sock_obj, socket.socket): - client.disconnect() - return - - while time.time() - start_time < duration: - try: - chunk = sock_obj.recv(4096) - except socket.timeout: - continue - except Exception: - break - if not chunk: - break - for line in chunk.decode("utf-8", errors="ignore").splitlines(): - try: - msg = json.loads(line) - if msg.get("event") == "log-message": - text = msg.get("text", "").strip() - prefix = msg.get("prefix", "") - level = msg.get("level", "") - if "ytdl" in prefix or level == "error": - debug(f"[MPV {prefix}] {text}", file=sys.stderr) - except json.JSONDecodeError: - continue - - client.disconnect() - except Exception: - pass -def _get_playable_path(item: Any, file_storage: Optional[Any], config: Optional[Dict[str, Any]]) -> Optional[tuple[str, Optional[str]]]: - """Extract a playable path/URL from an item, handling different store types. - - Args: - item: Item to extract path from (dict, PipeObject, or string) - file_storage: FileStorage instance for querying backends - config: Config dict for Hydrus URL - - Returns: - Tuple of (path, title) or None if no valid path found - """ - path: Optional[str] = None - title: Optional[str] = None - store: Optional[str] = None - file_hash: Optional[str] = None - - # Extract fields from item - prefer a disk path ('path'), but accept 'url' as fallback for providers - if isinstance(item, dict): - path = item.get("path") - # Fallbacks for provider-style entries where URL is stored in 'url' or 'source_url' or 'target' - if not path: - path = item.get("url") or item.get("source_url") or item.get("target") - if not path: - known = item.get("url") or item.get("url") or [] - if known and isinstance(known, list): - path = known[0] - title = item.get("title") or item.get("file_title") - store = item.get("store") - file_hash = item.get("hash") - elif hasattr(item, "path") or hasattr(item, "url") or hasattr(item, "source_url") or hasattr(item, "store") or hasattr(item, "hash"): - # Handle PipeObject / dataclass objects - prefer path, but fall back to url/source_url attributes - path = getattr(item, "path", None) - if not path: - path = getattr(item, "url", None) or getattr(item, "source_url", None) or getattr(item, "target", None) - if not path: - known = getattr(item, "url", None) or (getattr(item, "extra", None) or {}).get("url") - if known and isinstance(known, list): - path = known[0] - title = getattr(item, "title", None) or getattr(item, "file_title", None) - store = getattr(item, "store", None) - file_hash = getattr(item, "hash", None) - elif isinstance(item, str): - path = item - - # Debug: show incoming values - try: - debug(f"_get_playable_path: store={store}, path={path}, hash={file_hash}") - except Exception: - pass - - if not path: - return None - - if not isinstance(path, str): - path = str(path) - if title is not None and not isinstance(title, str): - title = str(title) - - # Resolve hash+store into a playable target (file path or URL). - # This is unrelated to MPV's IPC pipe and keeps "pipe" terminology reserved for: - # - MPV IPC pipe (transport) - # - PipeObject (pipeline data) - if store and file_hash and file_hash != "unknown" and file_storage: - # If it's already a URL, MPV can usually play it directly. - if path.startswith(("http://", "https://")): - return (path, title) - - try: - backend = file_storage[store] - except Exception: - backend = None - - if backend is not None: - backend_class = type(backend).__name__ - - # Folder stores: resolve to an on-disk file path. - if hasattr(backend, "get_file") and callable(getattr(backend, "get_file")) and backend_class == "Folder": - try: - resolved = backend.get_file(file_hash) - if isinstance(resolved, Path): - path = str(resolved) - elif resolved is not None: - path = str(resolved) - except Exception as e: - debug(f"Error resolving file path from store '{store}': {e}", file=sys.stderr) - - # HydrusNetwork: build a playable API file URL without browser side-effects. - elif backend_class == "HydrusNetwork": - try: - client = getattr(backend, "_client", None) - base_url = getattr(client, "url", None) - access_key = getattr(client, "access_key", None) - if base_url and access_key: - base_url = str(base_url).rstrip("/") - path = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}" - except Exception as e: - debug(f"Error building Hydrus URL from store '{store}': {e}", file=sys.stderr) - - return (path, title) - - -def _queue_items(items: List[Any], clear_first: bool = False, config: Optional[Dict[str, Any]] = None) -> bool: - """Queue items to MPV, starting it if necessary. - - Args: - items: List of items to queue - clear_first: If True, the first item will replace the current playlist - - Returns: - True if MPV was started, False if items were queued via IPC. - """ - # Debug: print incoming items - try: - debug(f"_queue_items: count={len(items)} types={[type(i).__name__ for i in items]}") - except Exception: - pass - - # Just verify cookies are configured, don't try to set via IPC - _ensure_ytdl_cookies() - - hydrus_header = _build_hydrus_header(config or {}) - ytdl_opts = _build_ytdl_options(config, hydrus_header) - hydrus_url = None - try: - hydrus_url = get_hydrus_url(config) if config is not None else None - except Exception: - hydrus_url = None - - # Initialize Store registry for path resolution - file_storage = None - try: - from Store import Store - file_storage = Store(config or {}) - except Exception as e: - debug(f"Warning: Could not initialize Store registry: {e}", file=sys.stderr) - - # Dedupe existing playlist before adding more (unless we're replacing it) - existing_targets: set[str] = set() - if not clear_first: - playlist = _get_playlist(silent=True) or [] - dup_indexes: List[int] = [] - for idx, pl_item in enumerate(playlist): - fname = pl_item.get("filename") if isinstance(pl_item, dict) else str(pl_item) - alt = pl_item.get("playlist-path") if isinstance(pl_item, dict) else None - norm = _normalize_playlist_path(fname) or _normalize_playlist_path(alt) - if not norm: - continue - if norm in existing_targets: - dup_indexes.append(idx) - else: - existing_targets.add(norm) - - # Remove duplicates from playlist starting from the end to keep indices valid - for idx in reversed(dup_indexes): - try: - _send_ipc_command({"command": ["playlist-remove", idx], "request_id": 106}, silent=True) - except Exception: - pass - - new_targets: set[str] = set() - - for i, item in enumerate(items): - # Debug: show the item being processed - try: - debug(f"_queue_items: processing idx={i} type={type(item)} repr={repr(item)[:200]}") - except Exception: - pass - # Extract URL/Path using store-aware logic - result = _get_playable_path(item, file_storage, config) - if not result: - debug(f"_queue_items: item idx={i} produced no playable path") - continue - - target, title = result - - if target: - # If we just have a hydrus hash, build a direct file URL for MPV - if re.fullmatch(r"[0-9a-f]{64}", str(target).strip().lower()) and hydrus_url: - target = f"{hydrus_url.rstrip('/')}/get_files/file?hash={str(target).strip()}" - - norm_key = _normalize_playlist_path(target) or str(target).strip().lower() - if norm_key in existing_targets or norm_key in new_targets: - debug(f"Skipping duplicate playlist entry: {title or target}") - continue - new_targets.add(norm_key) - - # Check if it's a yt-dlp supported URL - is_ytdlp = False - # Treat any http(s) target as yt-dlp candidate. If the Python yt-dlp - # module is available we also check more deeply, but default to True - # so MPV can use its ytdl hooks for remote streaming sites. - is_hydrus_target = _is_hydrus_path(str(target), hydrus_url) - try: - # Hydrus direct file URLs should not be treated as yt-dlp targets. - is_ytdlp = (not is_hydrus_target) and (target.startswith("http") or is_url_supported_by_ytdlp(target)) - except Exception: - is_ytdlp = (not is_hydrus_target) and target.startswith("http") - - # Use memory:// M3U hack to pass title to MPV - # Skip for yt-dlp url to ensure proper handling - if title and (is_hydrus_target or not is_ytdlp): - # Sanitize title for M3U (remove newlines) - safe_title = title.replace('\n', ' ').replace('\r', '') - m3u_content = f"#EXTM3U\n#EXTINF:-1,{safe_title}\n{target}" - target_to_send = f"memory://{m3u_content}" - else: - target_to_send = target - - mode = "append" - if clear_first and i == 0: - mode = "replace" - - # If this is a Hydrus path, set header property and yt-dlp headers before loading - if hydrus_header and _is_hydrus_path(target_to_send, hydrus_url): - header_cmd = {"command": ["set_property", "http-header-fields", hydrus_header], "request_id": 199} - _send_ipc_command(header_cmd, silent=True) - if ytdl_opts: - ytdl_cmd = {"command": ["set_property", "ytdl-raw-options", ytdl_opts], "request_id": 197} - _send_ipc_command(ytdl_cmd, silent=True) - - cmd = {"command": ["loadfile", target_to_send, mode], "request_id": 200} - try: - debug(f"Sending MPV loadfile: {target_to_send} mode={mode}") - resp = _send_ipc_command(cmd) - debug(f"MPV loadfile response: {resp}") - except Exception as e: - debug(f"Exception sending loadfile to MPV: {e}", file=sys.stderr) - resp = None - - if resp is None: - # MPV not running (or died) - # Start MPV with remaining items - debug(f"MPV not running/died while queuing, starting MPV with remaining items: {items[i:]}") - _start_mpv(items[i:], config=config) - return True - elif resp.get("error") == "success": - # Also set property for good measure - if title: - title_cmd = {"command": ["set_property", "force-media-title", title], "request_id": 201} - _send_ipc_command(title_cmd) - debug(f"Queued: {title or target}") - else: - error_msg = str(resp.get('error')) - debug(f"Failed to queue item: {error_msg}", file=sys.stderr) - return False - -def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - """Manage and play items in the MPV playlist via IPC.""" - - parsed = parse_cmdlet_args(args, CMDLET) - - # Initialize Store registry for detecting Hydrus instance names - file_storage = None - try: - from Store import Store - file_storage = Store(config) - except Exception as e: - debug(f"Warning: Could not initialize Store registry: {e}", file=sys.stderr) - - # Initialize mpv_started flag - mpv_started = False - - # Handle positional index argument if provided - index_arg = parsed.get("index") - url_arg = parsed.get("url") - - # If index_arg is provided but is not an integer, treat it as a URL - # This allows .pipe "http://..." without -url flag - if index_arg is not None: - try: - int(index_arg) - except ValueError: - # Not an integer, treat as URL if url_arg is not set - if not url_arg: - url_arg = index_arg - index_arg = None - - clear_mode = parsed.get("clear") - list_mode = parsed.get("list") - play_mode = parsed.get("play") - pause_mode = parsed.get("pause") - save_mode = parsed.get("save") - load_mode = parsed.get("load") - current_mode = parsed.get("current") - - # Handle --current flag: emit currently playing item to pipeline - if current_mode: - items = _get_playlist() - if items is None: - debug("MPV is not running or not accessible.", file=sys.stderr) - return 1 - - # Find the currently playing item - current_item = None - for item in items: - if item.get("current", False): - current_item = item - break - - if current_item is None: - debug("No item is currently playing.", file=sys.stderr) - return 1 - - # Build result object with file info - title = _extract_title_from_item(current_item) - filename = current_item.get("filename", "") - - # Emit the current item to pipeline - result_obj = { - 'path': filename, - 'title': title, - 'cmdlet_name': '.pipe', - 'source': 'pipe', - '__pipe_index': items.index(current_item), - } - - ctx.emit(result_obj) - debug(f"Emitted current item: {title}") - return 0 - - # Handle URL queuing - mpv_started = False - if url_arg: - mpv_started = _queue_items([url_arg]) - # Auto-play the URL when it's queued via .pipe "url" (without explicit flags) - # unless other flags are present - if not (clear_mode or play_mode or pause_mode or save_mode or load_mode): - if mpv_started: - # MPV was just started, wait a moment for it to be ready, then play first item - import time - time.sleep(0.5) - index_arg = "1" # 1-based index for first item - play_mode = True - else: - # MPV was already running, get playlist and play the newly added item - playlist = _get_playlist(silent=True) - if playlist and len(playlist) > 0: - # Auto-play the last item in the playlist (the one we just added) - # Use 1-based indexing - index_arg = str(len(playlist)) - play_mode = True - else: - # Fallback: just list the playlist if we can't determine index - list_mode = True - - # Handle Save Playlist - if save_mode: - playlist_name = index_arg or f"Playlist {subprocess.check_output(['date', '/t'], shell=True).decode().strip()}" - # If index_arg was used for name, clear it so it doesn't trigger index logic - if index_arg: - index_arg = None - - items = _get_playlist() - if not items: - debug("Cannot save: MPV playlist is empty or MPV is not running.") - return 1 - - # Clean up items for saving (remove current flag, etc) - clean_items = [] - for item in items: - # If title was extracted from memory://, we should probably save the original filename - # if it's a URL, or reconstruct a clean object. - # Actually, _extract_title_from_item handles the display title. - # But for playback, we need the 'filename' (which might be memory://...) - # If we save 'memory://...', it will work when loaded back. - clean_items.append(item) - - # Use config from context or load it - config_data = config if config else {} - - storage_path = get_local_storage_path(config_data) - if not storage_path: - debug("Local storage path not configured.") - return 1 - - with LocalLibrarySearchOptimizer(storage_path) as db: - if db.save_playlist(playlist_name, clean_items): - debug(f"Playlist saved as '{playlist_name}'") - return 0 - else: - debug(f"Failed to save playlist '{playlist_name}'") - return 1 - - # Handle Load Playlist - current_playlist_name = None - if load_mode: - # Use config from context or load it - config_data = config if config else {} - - storage_path = get_local_storage_path(config_data) - if not storage_path: - debug("Local storage path not configured.") - return 1 - - with LocalLibrarySearchOptimizer(storage_path) as db: - if index_arg: - try: - pl_id = int(index_arg) - - # Handle Delete Playlist (if -clear is also passed) - if clear_mode: - if db.delete_playlist(pl_id): - debug(f"Playlist ID {pl_id} deleted.") - # Clear index_arg so we fall through to list mode and show updated list - index_arg = None - # Don't return, let it list the remaining playlists - else: - debug(f"Failed to delete playlist ID {pl_id}.") - return 1 - else: - # Handle Load Playlist - result = db.get_playlist_by_id(pl_id) - if result is None: - debug(f"Playlist ID {pl_id} not found.") - return 1 - - name, items = result - current_playlist_name = name - - # Queue items (replacing current playlist) - if items: - _queue_items(items, clear_first=True) - else: - # Empty playlist, just clear - _send_ipc_command({"command": ["playlist-clear"]}, silent=True) - - # Switch to list mode to show the result - list_mode = True - index_arg = None - # Fall through to list logic - - except ValueError: - debug(f"Invalid playlist ID: {index_arg}") - return 1 - - # If we deleted or didn't have an index, list playlists - if not index_arg: - playlists = db.get_playlists() - - if not playlists: - debug("No saved playlists found.") - return 0 - - table = ResultTable("Saved Playlists") - for i, pl in enumerate(playlists): - item_count = len(pl.get('items', [])) - row = table.add_row() - # row.add_column("ID", str(pl['id'])) # Hidden as per user request - row.add_column("Name", pl['name']) - row.add_column("Items", str(item_count)) - row.add_column("Updated", pl['updated_at']) - - # Set the playlist items as the result object for this row - # When user selects @N, they get the list of items - # We also set the source command to .pipe -load so it loads it - table.set_row_selection_args(i, ["-load", str(pl['id'])]) - - table.set_source_command(".pipe") - - # Register results - ctx.set_last_result_table_overlay(table, [p['items'] for p in playlists]) - ctx.set_current_stage_table(table) - - print(table) - return 0 - - # Handle Play/Pause commands (but skip if we have index_arg to play a specific item) - if play_mode and index_arg is None: - cmd = {"command": ["set_property", "pause", False], "request_id": 103} - resp = _send_ipc_command(cmd) - if resp and resp.get("error") == "success": - debug("Resumed playback") - return 0 - else: - debug("Failed to resume playback (MPV not running?)", file=sys.stderr) - return 1 - - if pause_mode: - cmd = {"command": ["set_property", "pause", True], "request_id": 104} - resp = _send_ipc_command(cmd) - if resp and resp.get("error") == "success": - debug("Paused playback") - return 0 - else: - debug("Failed to pause playback (MPV not running?)", file=sys.stderr) - return 1 - - # Handle Clear All command (no index provided) - if clear_mode and index_arg is None: - cmd = {"command": ["playlist-clear"], "request_id": 105} - resp = _send_ipc_command(cmd) - if resp and resp.get("error") == "success": - debug("Playlist cleared") - return 0 - else: - debug("Failed to clear playlist (MPV not running?)", file=sys.stderr) - return 1 - - # Handle piped input (add to playlist) - # Skip adding if -list is specified (user just wants to see current playlist) - if result and not list_mode and not url_arg: - # If result is a list of items, add them to playlist - items_to_add = [] - if isinstance(result, list): - items_to_add = result - elif isinstance(result, dict): - items_to_add = [result] - else: - # Handle PipeObject or any other object type - items_to_add = [result] - - # Debug: inspect incoming result and attributes - try: - debug(f"pipe._run: received result type={type(result)} repr={repr(result)[:200]}") - debug(f"pipe._run: attrs path={getattr(result, 'path', None)} url={getattr(result, 'url', None)} store={getattr(result, 'store', None)} hash={getattr(result, 'hash', None)}") - except Exception: - pass - - if items_to_add and _queue_items(items_to_add, config=config): - mpv_started = True - - if items_to_add: - # If we added items, we might want to play the first one if nothing is playing? - # For now, just list the playlist - pass - - # Get playlist from MPV - items = _get_playlist() - - if items is None: - if mpv_started: - # MPV was just started, retry getting playlist after a brief delay - import time - time.sleep(0.3) - items = _get_playlist(silent=True) - - if items is None: - # Still can't connect, but MPV is starting - debug("MPV is starting up...") - return 0 - else: - # Do not auto-launch MPV when no action/inputs were provided; avoid surprise startups - no_inputs = not any([ - result, url_arg, index_arg, clear_mode, play_mode, - pause_mode, save_mode, load_mode, current_mode, list_mode - ]) - - if no_inputs: - debug("MPV is not running. Skipping auto-launch (no inputs).", file=sys.stderr) - return 1 - - debug("MPV is not running. Starting new instance...") - _start_mpv([], config=config) - return 0 - - if not items: - debug("MPV playlist is empty.") - return 0 - - # If index is provided, perform action (Play or Clear) - if index_arg is not None: - try: - # Handle 1-based index - idx = int(index_arg) - 1 - - if idx < 0 or idx >= len(items): - debug(f"Index {index_arg} out of range (1-{len(items)}).") - return 1 - - item = items[idx] - title = _extract_title_from_item(item) - filename = item.get("filename", "") if isinstance(item, dict) else "" - hydrus_header = _build_hydrus_header(config or {}) - hydrus_url = None - try: - hydrus_url = get_hydrus_url(config) if config is not None else None - except Exception: - hydrus_url = None - - if clear_mode: - # Remove item - cmd = {"command": ["playlist-remove", idx], "request_id": 101} - resp = _send_ipc_command(cmd) - if resp and resp.get("error") == "success": - debug(f"Removed: {title}") - # Refresh items for listing - items = _get_playlist() or [] - list_mode = True - index_arg = None - else: - debug(f"Failed to remove item: {resp.get('error') if resp else 'No response'}") - return 1 - else: - # Play item - if hydrus_header and _is_hydrus_path(filename, hydrus_url): - header_cmd = {"command": ["set_property", "http-header-fields", hydrus_header], "request_id": 198} - _send_ipc_command(header_cmd, silent=True) - cmd = {"command": ["playlist-play-index", idx], "request_id": 102} - resp = _send_ipc_command(cmd) - if resp and resp.get("error") == "success": - # Ensure playback starts (unpause) - unpause_cmd = {"command": ["set_property", "pause", False], "request_id": 103} - _send_ipc_command(unpause_cmd) - - debug(f"Playing: {title}") - - # Monitor logs briefly for errors (e.g. ytdl failures) - _monitor_mpv_logs(3.0) - - # Refresh playlist view so the user sees the new current item immediately - items = _get_playlist(silent=True) or items - list_mode = True - index_arg = None - else: - debug(f"Failed to play item: {resp.get('error') if resp else 'No response'}") - return 1 - except ValueError: - debug(f"Invalid index: {index_arg}") - return 1 - - # List items (Default action or after clear) - if list_mode or (index_arg is None and not url_arg): - if not items: - debug("MPV playlist is empty.") - return 0 - - # Use the loaded playlist name if available, otherwise default - # Note: current_playlist_name is defined in the load_mode block if a playlist was loaded - try: - table_title = current_playlist_name or "MPV Playlist" - except NameError: - table_title = "MPV Playlist" - - table = ResultTable(table_title, preserve_order=True) - - # Convert MPV items to PipeObjects with proper hash and store - pipe_objects = [] - for i, item in enumerate(items): - is_current = item.get("current", False) - title = _extract_title_from_item(item) - filename = item.get("filename", "") - - # Extract the real path/URL from memory:// wrapper if present - real_path = _extract_target_from_memory_uri(filename) or filename - - # Try to extract hash from the path/URL - file_hash = None - store_name = None - - # Check if it's a Hydrus URL - if "get_files/file" in real_path or "hash=" in real_path: - # Extract hash from Hydrus URL - hash_match = re.search(r"hash=([0-9a-f]{64})", real_path.lower()) - if hash_match: - file_hash = hash_match.group(1) - # Try to find which Hydrus instance has this file - if file_storage: - store_name = _find_hydrus_instance_for_hash(file_hash, file_storage) - if not store_name: - store_name = "hydrus" - # Check if it's a hash-based local file - elif real_path: - # Try to extract hash from filename (e.g., C:\path\1e8c46...a1b2.mp4) - path_obj = Path(real_path) - stem = path_obj.stem # filename without extension - if len(stem) == 64 and all(c in '0123456789abcdef' for c in stem.lower()): - file_hash = stem.lower() - # Find which folder store has this file - if file_storage: - for backend_name in file_storage.list_backends(): - backend = file_storage[backend_name] - if type(backend).__name__ == "Folder": - # Check if this backend has the file - try: - result_path = backend.get_file(file_hash) - if isinstance(result_path, Path) and result_path.exists(): - store_name = backend_name - break - except Exception: - pass - - # Fallback to inferred store if we couldn't find it - if not store_name: - store_name = _infer_store_from_playlist_item(item, file_storage=file_storage) - - # Build PipeObject with proper metadata - pipe_obj = PipeObject( - hash=file_hash or "unknown", - store=store_name or "unknown", - title=title, - path=real_path - ) - pipe_objects.append(pipe_obj) - - # Truncate title for display - display_title = title - if len(display_title) > 80: - display_title = display_title[:77] + "..." - - row = table.add_row() - row.add_column("Current", "*" if is_current else "") - row.add_column("Store", store_name or "unknown") - row.add_column("Title", display_title) - - table.set_row_selection_args(i, [str(i + 1)]) - - table.set_source_command(".pipe") - - # Register PipeObjects (not raw MPV items) with pipeline context - ctx.set_last_result_table_overlay(table, pipe_objects) - ctx.set_current_stage_table(table) - - print(table) - - return 0 - -def _start_mpv(items: List[Any], config: Optional[Dict[str, Any]] = None) -> None: - """Start MPV with a list of items.""" - import time as _time_module - - mpv = MPV() - mpv.kill_existing_windows() - _time_module.sleep(0.5) # Wait for process to die - - hydrus_header = _build_hydrus_header(config or {}) - ytdl_opts = _build_ytdl_options(config, hydrus_header) - - cookies_path = get_cookies_file_path() - if cookies_path: - debug(f"Starting MPV with cookies file: {cookies_path.replace('\\', '/')}") - else: - debug("Starting MPV with browser cookies: chrome") - - try: - # Always start MPV with the bundled Lua script via MPV class. - mpv.start( - extra_args=[ - '--ytdl-format=bestvideo[height<=?1080]+bestaudio/best[height<=?1080]', - ], - ytdl_raw_options=ytdl_opts, - http_header_fields=hydrus_header, - detached=True, - ) - debug("Started MPV process") - - # Wait for IPC pipe to be ready - if not mpv.wait_for_ipc(retries=20, delay_seconds=0.2): - debug("Timed out waiting for MPV IPC connection", file=sys.stderr) - return - - # Ensure Lua script is loaded (redundant when started with --script, but safe) - mpv.ensure_lua_loaded() - - # Queue items via IPC - if items: - _queue_items(items, config=config) - - # Auto-play the first item - import time - time.sleep(0.3) # Give MPV a moment to process the queued items - - # Play the first item (index 0) and unpause - play_cmd = {"command": ["playlist-play-index", 0], "request_id": 102} - play_resp = _send_ipc_command(play_cmd, silent=True) - - if play_resp and play_resp.get("error") == "success": - # Ensure playback starts (unpause) - unpause_cmd = {"command": ["set_property", "pause", False], "request_id": 103} - _send_ipc_command(unpause_cmd, silent=True) - debug("Auto-playing first item") - - except Exception as e: - debug(f"Error starting MPV: {e}", file=sys.stderr) - - -CMDLET = Cmdlet( - name=".pipe", - alias=["pipe", "playlist", "queue", "ls-pipe"], - summary="Manage and play items in the MPV playlist via IPC", - usage=".pipe [index|url] [-current] [-clear] [-list] [-url URL]", - arg=[ - CmdletArg( - name="index", - type="string", # Changed to string to allow URL detection - description="Index of item to play/clear, or URL to queue", - required=False - ), - CmdletArg( - name="url", - type="string", - description="URL to queue", - required=False - ), - CmdletArg( - name="clear", - type="flag", - description="Remove the selected item, or clear entire playlist if no index provided" - ), - CmdletArg( - name="list", - type="flag", - description="List items (default)" - ), - CmdletArg( - name="play", - type="flag", - description="Resume playback" - ), - CmdletArg( - name="pause", - type="flag", - description="Pause playback" - ), - CmdletArg( - name="save", - type="flag", - description="Save current playlist to database" - ), - CmdletArg( - name="load", - type="flag", - description="List saved playlists" - ), - CmdletArg( - name="current", - type="flag", - description="Emit the currently playing item to pipeline for further processing" - ), - ], - exec=_run -) - diff --git a/helper/search_provider.py b/helper/search_provider.py deleted file mode 100644 index 20ab24b..0000000 --- a/helper/search_provider.py +++ /dev/null @@ -1,2215 +0,0 @@ -""" -SearchProvider: Unified interface for different search backends. - -This module defines a base class and registry for search providers that can be -used by search-file and other search-related cmdlets to handle different sources: -- Local file storage (LocalStorageBackend) -- Hydrus database -- AllDebrid magnets (search-debrid) -- Library Genesis / OpenLibrary books (search-libgen) -- Soulseek P2P network (search-soulseek) -- IMDB movies (future) -- Other sources - -Usage: - from helper.search_provider import SearchProvider, get_provider - - provider = get_provider("libgen") - results = provider.search("python programming", limit=10) - - for result in results: - print(result["title"], result["target"], result["annotations"]) -""" - -from __future__ import annotations - -from abc import ABC, abstractmethod -from typing import Any, Dict, List, Optional, Sequence, Tuple -from dataclasses import dataclass -from pathlib import Path -import sys - -try: - from playwright.sync_api import sync_playwright - PLAYWRIGHT_AVAILABLE = True -except ImportError: - PLAYWRIGHT_AVAILABLE = False -import subprocess -import json -import shutil -from SYS.logger import log, debug - - -from SYS.logger import log, debug - - -@dataclass -class SearchResult: - """Unified search result format across all providers.""" - - # Required fields - origin: str # Provider name: "libgen", "soulseek", "debrid", "local", "hydrus", etc. - title: str # Display title/filename - target: str # Unique identifier or download target (URL, path, magnet hash, etc.) - - # Optional fields - detail: str = "" # Additional details (size, status, format, etc.) - annotations: List[str] = None # Tags/annotations: ["ready", "120MB", "mp3", etc.] - media_kind: str = "other" # Type: "book", "audio", "video", "file", "magnet", etc. - size_bytes: Optional[int] = None # File size in bytes - tag: Optional[set[str]] = None # Searchable tag values - full_metadata: Optional[Dict[str, Any]] = None # Extra metadata (author, year, etc.) - columns: List[Tuple[str, str]] = None # Display columns: [("Header", "value"), ...] for result table - - def __post_init__(self): - """Ensure mutable defaults are properly initialized.""" - if self.annotations is None: - self.annotations = [] - if self.tag is None: - self.tag = set() - if self.full_metadata is None: - self.full_metadata = {} - if self.columns is None: - self.columns = [] - - def to_dict(self) -> Dict[str, Any]: - """Convert to dictionary for JSON serialization. - - Note: full_metadata is excluded from dict to keep response size small - until the result is actually selected/used. This speeds up initial - search result display and piping. - """ - data = { - "origin": self.origin, - "title": self.title, - "target": self.target, - "detail": self.detail, - "annotations": self.annotations, - "media_kind": self.media_kind, - "size_bytes": self.size_bytes, - "tag": list(self.tag) if self.tag else [], - } - if self.columns: - data["columns"] = list(self.columns) - # Note: full_metadata is NOT included in dict to keep payload small - return data - - -class Provider(ABC): - """Abstract base class for search providers.""" - - # Provider-specific field definitions: list of (api_field_name, display_column_name, formatter_func) - # Override in subclasses to define which fields to request and how to display them - # Example: [("title", "Title", None), ("author_name", "Author(s)", lambda x: ", ".join(x) if isinstance(x, list) else x)] - RESULT_FIELDS: List[Tuple[str, str, Optional[Any]]] = [] - - def __init__(self, config: Dict[str, Any] = None): - """ - Initialize provider with optional configuration. - - Args: - config: Configuration dictionary (global config dict) - """ - self.config = config or {} - self.name = self.__class__.__name__.replace("Provider", "").lower() - - @abstractmethod - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - """ - Search for items matching the query. - - Args: - query: Search query string. Special value "*" means "match all" - limit: Maximum number of results to return - filters: Optional filtering criteria (type, size, status, etc.) - **kwargs: Provider-specific arguments - - Returns: - List of SearchResult objects - """ - pass - - @abstractmethod - def get_result_args(self) -> List[str]: - """ - Get command-line arguments from a search result to pass to downstream cmdlets. - - Example: For libgen, returns ["-url", result.target] - For soulseek, returns ["-id", result.target] - For local, returns ["-path", result.target] - - Returns: - List of arguments to append to cmdlet invocation - """ - pass - - def parse_args(self, args: Sequence[str]) -> Tuple[str, Dict[str, Any]]: - """ - Parse provider-specific command-line arguments. - - Args: - args: Sequence of command-line arguments - - Returns: - Tuple of (query, filters_dict) - """ - # Default implementation: first arg is query, rest are filters - query = args[0] if args else "" - filters = {} - return query, filters - - def validate(self) -> bool: - """ - Validate that provider is properly configured and ready to use. - - Returns: - True if provider is available, False otherwise - """ - return True - - def get_columns_format(self) -> List[str]: - """ - Define which columns this provider displays in result table. - - Returns: - List of column names to display. - Each provider can override to customize result table appearance. - Examples: ["Title", "Author", "Year"] for books - ["Title", "Duration", "Format"] for media - ["Title", "Size", "Status"] for files - - Default: Empty list (uses traditional detail/origin/media_kind/target) - """ - return [col_name for _, col_name, _ in self.RESULT_FIELDS] if self.RESULT_FIELDS else [] - - def get_api_fields_string(self) -> str: - """ - Generate comma-separated API fields string from RESULT_FIELDS. - - Returns: - Comma-separated string of API field names to request - Example: "title,author_name,first_publish_year,isbn,key" - """ - if not self.RESULT_FIELDS: - return "" - return ",".join(field_name for field_name, _, _ in self.RESULT_FIELDS) - - def build_columns_from_doc(self, doc: Dict[str, Any], idx: int = None) -> List[Tuple[str, str]]: - """ - Dynamically build columns from a result document using RESULT_FIELDS definition. - - Args: - doc: API response document (dict with field values) - idx: Optional index/number for the result (typically added as first column) - - Returns: - List of (header, value) tuples ready for SearchResult.columns - """ - columns = [] - - # Add index as first column if provided - if idx is not None: - columns.append(("#", str(idx))) - - # Process each field definition - for api_field_name, display_col_name, formatter_func in self.RESULT_FIELDS: - value = doc.get(api_field_name, "") - - # Apply formatter if defined - if formatter_func and value: - value = formatter_func(value) - - # Convert to string and add to columns - value_str = str(value) if value else "Unknown" - columns.append((display_col_name, value_str)) - - def build_result(self, origin: str, title: str, target: str, detail: str = "", - annotations: Optional[List[str]] = None, media_kind: str = "other", - columns: Optional[List[Tuple[str, str]]] = None, - full_metadata: Optional[Dict[str, Any]] = None, - size_bytes: Optional[int] = None) -> SearchResult: - """ - Build a SearchResult with consistent column/annotation handling. - - Consolidates common pattern across all providers of extracting fields, building columns, - and creating SearchResult. Reduces per-provider duplication. - - Args: - origin: Provider name (e.g. "libgen", "openlibrary") - title: Display title - target: Download target/URL/path - detail: Secondary description line - annotations: Tags/metadata labels - media_kind: Type (book, audio, video, file, magnet, etc.) - columns: Pre-built column list, or auto-built from RESULT_FIELDS - full_metadata: Additional metadata for later retrieval - size_bytes: File size in bytes - - Returns: - SearchResult ready to display - """ - return SearchResult( - origin=origin, - title=title, - target=target, - detail=detail, - annotations=annotations or [], - media_kind=media_kind, - columns=columns or [], - full_metadata=full_metadata or {}, - size_bytes=size_bytes - ) - - return columns - - -class Libgen(Provider): - """Search provider for Library Genesis books.""" - - RESULT_FIELDS: List[Tuple[str, str, Optional[Any]]] = [] # columns built manually - - def __init__(self, config: Dict[str, Any] = None): - super().__init__(config) - self.name = "libgen" - - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - """Search Library Genesis for books. - - Supports dynamic query format: - - isbn:0557677203 - - author:"Albert Pike" - - title:"Book Title" - - Combination: isbn:0557677203 author:"Albert Pike" free text - - Priority: ISBN is the authoritative key for searching. - """ - filters = filters or {} - - try: - from helper.unified_book_downloader import UnifiedBookDownloader - from helper.query_parser import parse_query, get_field, get_free_text - - debug(f"[libgen] Starting search for: {query}") - - # Parse the query to extract structured fields - parsed = parse_query(query) - isbn = get_field(parsed, 'isbn') - author = get_field(parsed, 'author') - title = get_field(parsed, 'title') - free_text = get_free_text(parsed) - - # Build the search query for libgen - # Priority: isbn (authoritative key) > title > author > free_text - if isbn: - search_query = isbn - elif title: - search_query = title - elif author: - search_query = author - else: - search_query = free_text or query - - debug(f"[libgen] Built search query: {search_query}") - - downloader = UnifiedBookDownloader(config=self.config) - search_fn = getattr(downloader, "search_libgen", None) - - if not callable(search_fn): - log("[libgen] Searcher unavailable", file=sys.stderr) - return [] - - debug(f"[libgen] Calling search_libgen with query: {search_query}") - books = search_fn(search_query, limit=limit) - debug(f"[libgen] Got {len(books) if books else 0} results from search_libgen") - - search_results = [] - for idx, book in enumerate(books, 1): - title = book.get("title", "Unknown") - author = book.get("author", "Unknown") - year = book.get("year", "Unknown") - pages = book.get("pages") or book.get("pages_str") or "" - extension = book.get("extension", "") or book.get("ext", "") - filesize = book.get("filesize_str", "Unknown") - isbn = book.get("isbn", "") - mirror_url = book.get("mirror_url", "") - - # Columns: Title, Author, Pages, Ext - columns = [ - ("Title", title), - ("Author", author), - ("Pages", str(pages)), - ("Ext", str(extension)), - ] - - # Build detail with author and year - detail = f"By: {author}" - if year and year != "Unknown": - detail += f" ({year})" - - annotations = [f"{filesize}"] - if isbn: - annotations.append(f"ISBN: {isbn}") - - # Store full book data without mirrors in metadata to avoid serialization overhead - search_results.append(self.build_result( - origin="libgen", - title=title, - target=mirror_url or f"libgen:{book.get('id', '')}", - detail=detail, - annotations=annotations, - media_kind="book", - columns=columns, - full_metadata={ - "number": idx, - "author": author, - "year": year, - "isbn": isbn, - "filesize": filesize, - # Exclude mirrors dict from metadata to reduce serialization overhead - # Mirrors can be re-fetched if the result is selected - "book_id": book.get("book_id", ""), - "md5": book.get("md5", ""), - }, - )) - - debug(f"[libgen] Returning {len(search_results)} formatted results") - return search_results - - except Exception as e: - log(f"[libgen] Search error: {e}", file=sys.stderr) - import traceback - log(traceback.format_exc(), file=sys.stderr) - return [] - - def get_result_args(self) -> List[str]: - """LibGen results use -url for download or -mirror for selection.""" - return ["-url"] - - def validate(self) -> bool: - """Check if LibGen downloader is available.""" - try: - from helper.unified_book_downloader import UnifiedBookDownloader - return True - except Exception: - return False - - -class SoulSeek(Provider): - """Search provider for Soulseek P2P network.""" - - # Allowed music file extensions - MUSIC_EXTENSIONS = { - '.flac', '.mp3', '.m4a', '.aac', '.ogg', '.opus', - '.wav', '.alac', '.wma', '.ape', '.aiff', '.dsf', - '.dff', '.wv', '.tta', '.tak', '.ac3', '.dts' - } - - # Display columns for search results - RESULT_FIELDS = [ - ("track_num", "Track", None), - ("title", "Title", None), - ("artist", "Artist", lambda x: (str(x)[:32] + '...') if x and len(str(x)) > 35 else x), - ("album", "Album", lambda x: (str(x)[:32] + '...') if x and len(str(x)) > 35 else x), - ("size", "Size", lambda x: f"{int(int(x)/1024/1024)} MB" if x else ""), - ] - - # Soulseek config - USERNAME = "asjhkjljhkjfdsd334" - PASSWORD = "khhhg" - DOWNLOAD_DIR = "./downloads" - MAX_WAIT_TRANSFER = 1200 - - def __init__(self, config: Dict[str, Any] = None): - super().__init__(config) - self.name = "soulseek" - - async def perform_search( - self, - query: str, - timeout: float = 9.0, - limit: int = 50 - ) -> List[Dict[str, Any]]: - """Perform async Soulseek search and return flattened results.""" - import asyncio - import os - import re - import time - from aioslsk.client import SoulSeekClient - from aioslsk.settings import Settings, CredentialsSettings - - os.makedirs(self.DOWNLOAD_DIR, exist_ok=True) - - settings = Settings(credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD)) - client = SoulSeekClient(settings) - - try: - await client.start() - await client.login() - except Exception as e: - log(f"[soulseek] Login failed: {type(e).__name__}: {e}", file=sys.stderr) - return [] - - try: - search_request = await client.searches.search(query) - await self._collect_search_results(client, search_request, timeout=timeout) - flat = self._flatten_search_results(search_request)[:limit] - return flat - except Exception as e: - log(f"[soulseek] Search error: {type(e).__name__}: {e}", file=sys.stderr) - return [] - finally: - try: - await client.stop() - except Exception: - pass - - def _flatten_search_results(self, search_request) -> List[dict]: - """Extract files from SearchRequest.results.""" - flat: List[dict] = [] - for result in search_request.results: - username = getattr(result, "username", "?") - - for file_data in getattr(result, "shared_items", []): - flat.append({ - "file": file_data, - "username": username, - "filename": getattr(file_data, "filename", "?"), - "size": getattr(file_data, "filesize", 0), - }) - - for file_data in getattr(result, "locked_results", []): - flat.append({ - "file": file_data, - "username": username, - "filename": getattr(file_data, "filename", "?"), - "size": getattr(file_data, "filesize", 0), - }) - - return flat - - async def _collect_search_results(self, client, search_request, timeout: float = 75.0) -> None: - """Collect search results by waiting.""" - import asyncio - import time - debug(f"[soulseek] Collecting results for {timeout}s...") - end = time.time() + timeout - last_count = 0 - while time.time() < end: - current_count = len(search_request.results) - if current_count > last_count: - debug(f"[soulseek] Got {current_count} result(s) so far...") - last_count = current_count - await asyncio.sleep(0.5) - - async def download_file( - self, - username: str, - filename: str, - file_size: int, - target_dir: Optional[str] = None - ) -> bool: - """Download a file from Soulseek to a specific directory.""" - import asyncio - import os - import time - from aioslsk.client import SoulSeekClient - from aioslsk.settings import Settings, CredentialsSettings - from aioslsk.events import TransferProgressEvent - from tqdm import tqdm - - download_dir = target_dir if target_dir else self.DOWNLOAD_DIR - os.makedirs(download_dir, exist_ok=True) - - settings = Settings(credentials=CredentialsSettings(username=self.USERNAME, password=self.PASSWORD)) - settings.shares.download = download_dir - client = SoulSeekClient(settings) - - try: - await client.start() - await client.login() - - debug(f"[soulseek] Starting: {filename} from {username}") - - transfer = await client.transfers.download(username, filename) - if transfer is None: - log("[soulseek] Failed: transfer object is None") - return False - - success = await self._wait_for_transfer(client, transfer, file_size=file_size, max_wait=self.MAX_WAIT_TRANSFER) - - return success - - except Exception as e: - log(f"[soulseek] Download error: {type(e).__name__}: {e}", file=sys.stderr) - return False - - finally: - try: - await client.stop() - except Exception: - pass - - async def _wait_for_transfer(self, client, transfer_obj: Any, file_size: Any = None, max_wait: float = 1200) -> bool: - """Wait for transfer finish using event listeners with TQDM progress bar. - - Returns: - True if transfer completed successfully, False if failed or timed out. - """ - import asyncio - import time - from aioslsk.events import TransferProgressEvent - from tqdm import tqdm - - if transfer_obj is None: - log("[soulseek] No transfer object returned") - return False - - transfer_finished = False - transfer_success = False - pbar = None - total_size = file_size - last_speed_time = time.time() - last_speed = 0 - - async def on_progress(event): - nonlocal last_speed_time, last_speed, transfer_finished, transfer_success, pbar, total_size - if not hasattr(event, 'updates') or not event.updates: - return - - for transfer, _, curr_snapshot in event.updates: - if (transfer.username == transfer_obj.username and transfer.remote_path == transfer_obj.remote_path): - bytes_xfer = getattr(curr_snapshot, 'bytes_transfered', 0) - state_name = curr_snapshot.state.name if hasattr(curr_snapshot, 'state') else "?" - speed = getattr(curr_snapshot, 'speed', 0) - - if total_size is None and hasattr(transfer, 'file_attributes'): - try: - size = getattr(transfer, 'file_size', None) or getattr(transfer, 'size', None) - if size: - total_size = size - except Exception: - pass - - if pbar is None: - total = total_size if total_size else 100 * 1024 * 1024 - pbar = tqdm(total=total, unit='B', unit_scale=True, desc='[transfer]') - - if pbar: - pbar.n = bytes_xfer - if speed > 0: - pbar.set_postfix({"speed": f"{speed/1024:.1f} KB/s", "state": state_name}) - pbar.refresh() - - if state_name in ('FINISHED', 'COMPLETE'): - if pbar: - pbar.close() - debug(f"[soulseek] Transfer {state_name.lower()}") - transfer_finished = True - transfer_success = True - return - elif state_name in ('ABORTED', 'FAILED', 'PAUSED'): - if pbar: - pbar.close() - debug(f"[soulseek] Transfer {state_name.lower()}") - transfer_finished = True - transfer_success = False - return - - if total_size and bytes_xfer >= total_size: - if pbar: - pbar.close() - debug(f"[soulseek] Transfer complete ({bytes_xfer / 1024 / 1024:.1f} MB)") - transfer_finished = True - transfer_success = True - return - - if speed == 0 and bytes_xfer > 0: - now = time.time() - if now - last_speed_time > 3: - if pbar: - pbar.close() - debug(f"[soulseek] Transfer complete ({bytes_xfer / 1024 / 1024:.1f} MB)") - transfer_finished = True - transfer_success = True - return - else: - last_speed_time = time.time() - - last_speed = speed - - client.events.register(TransferProgressEvent, on_progress) - end = time.time() + max_wait - - while time.time() < end: - if transfer_finished: - break - await asyncio.sleep(0.5) - - client.events.unregister(TransferProgressEvent, on_progress) - - if pbar: - pbar.close() - - if not transfer_finished: - log(f"[soulseek] Timed out after {max_wait}s; transfer may still be in progress") - return False - else: - return transfer_success - - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - """Search Soulseek P2P network (synchronous wrapper).""" - import asyncio - import re - - filters = filters or {} - - try: - # Run async search - flat_results = asyncio.run(self.perform_search(query, timeout=9.0, limit=limit)) - - if not flat_results: - return [] - - # Filter to music files only - music_results = [] - for item in flat_results: - filename = item['filename'] - if '.' in filename: - ext = '.' + filename.rsplit('.', 1)[-1].lower() - else: - ext = '' - - if ext in self.MUSIC_EXTENSIONS: - music_results.append(item) - - if not music_results: - return [] - - # Extract metadata for all results - enriched_results = [] - for item in music_results: - filename = item['filename'] - - # Extract extension - if '.' in filename: - _, ext = filename.rsplit('.', 1) - ext = '.' + ext.lower() - else: - ext = '' - - # Get display filename - if '\\' in filename: - display_name = filename.rsplit('\\', 1)[-1] - elif '/' in filename: - display_name = filename.rsplit('/', 1)[-1] - else: - display_name = filename - - # Extract path hierarchy for artist/album - path_parts = filename.replace('\\', '/').split('/') - artist = '' - album = '' - - if len(path_parts) >= 3: - artist = path_parts[-3] - album = path_parts[-2] - if ' - ' in album and re.match(r'^\d{4}', album): - album = album.split(' - ', 1)[1] - elif len(path_parts) == 2: - artist = path_parts[-2] - - # Extract track number and title - base_name = display_name.rsplit('.', 1)[0] if '.' in display_name else display_name - track_num = '' - title = base_name - filename_artist = '' - - # First, extract track number if present (e.g., "30 Stumfol - Prisoner" -> track=30, rest="Stumfol - Prisoner") - match = re.match(r'^(\d{1,3})\s*[\.\-]?\s+(.+)$', base_name) - if match: - track_num = match.group(1) - remainder = match.group(2) - - # Now parse "Artist - Title" from the remainder - # If there's a " - " separator, split on it - if ' - ' in remainder: - parts = remainder.split(' - ', 1) - filename_artist = parts[0].strip() - title = parts[1].strip() - else: - # No artist-title separator, use the whole remainder as title - title = remainder - else: - # No track number, check if there's "Artist - Title" format - if ' - ' in base_name: - parts = base_name.split(' - ', 1) - filename_artist = parts[0].strip() - title = parts[1].strip() - - # Use filename_artist if extracted, otherwise fall back to path artist - if filename_artist: - artist = filename_artist - - enriched_results.append({ - **item, - 'artist': artist, - 'album': album, - 'title': title, - 'track_num': track_num, - 'ext': ext - }) - - # Apply filters if specified - if filters: - artist_filter = filters.get('artist', '').lower() if filters.get('artist') else '' - album_filter = filters.get('album', '').lower() if filters.get('album') else '' - track_filter = filters.get('track', '').lower() if filters.get('track') else '' - - if artist_filter or album_filter or track_filter: - filtered_results = [] - for item in enriched_results: - if artist_filter and artist_filter not in (item['artist'] or '').lower(): - continue - if album_filter and album_filter not in (item['album'] or '').lower(): - continue - if track_filter and track_filter not in (item['title'] or '').lower(): - continue - filtered_results.append(item) - - enriched_results = filtered_results - - # Sort: .flac first, then others - enriched_results.sort(key=lambda item: (item['ext'].lower() != '.flac', -item['size'])) - - # Convert to SearchResult format - search_results = [] - for idx, item in enumerate(enriched_results, 1): - artist_display = item['artist'] if item['artist'] else "(no artist)" - album_display = item['album'] if item['album'] else "(no album)" - size_mb = int(round(item['size'] / 1024 / 1024)) - - if item['track_num']: - track_title = f"[{item['track_num']}] {item['title']}" - else: - track_title = item['title'] or "(untitled)" - - # Build columns from enriched metadata - columns = self.build_columns_from_doc(item, idx=idx) - - search_results.append(self.build_result( - origin="soulseek", - title=track_title, - target=item['filename'], - detail=f"Artist: {artist_display} | Album: {album_display}", - annotations=[f"{size_mb} MB", item['ext']], - media_kind="audio", - size_bytes=item['size'], - columns=columns, - full_metadata={ - "artist": item['artist'], - "album": item['album'], - "track_num": item['track_num'], - "username": item['username'], - "filename": item['filename'], - "ext": item['ext'], - }, - )) - - return search_results - - except Exception as e: - log(f"Soulseek search error: {e}", file=sys.stderr) - return [] - - def get_result_args(self) -> List[str]: - """Soulseek results use filename/path for results.""" - return ["-path"] - - def validate(self) -> bool: - """Check if Soulseek client is available.""" - try: - import aioslsk # type: ignore - return True - except ImportError: - return False - - -class Debrid(Provider): - """Search provider for AllDebrid magnets.""" - - # Status code mappings - STATUS_MAP = { - 0: "In Queue", - 1: "Downloading", - 2: "Compressing", - 3: "Uploading", - 4: "Ready", - 5: "Upload Failed", - 6: "Unpack Error", - 7: "Not Downloaded", - 8: "File Too Big", - 9: "Internal Error", - 10: "Download Timeout", - 11: "Deleted", - 12: "Processing Failed", - 13: "Processing Failed", - 14: "Tracker Error", - 15: "No Peers" - } - - def __init__(self, config: Dict[str, Any] = None): - super().__init__(config) - self.name = "debrid" - self._magnet_files_cache = {} - - def _format_size(self, bytes_val: float) -> str: - """Format bytes to human readable size.""" - for unit in ['B', 'KB', 'MB', 'GB', 'TB']: - if bytes_val < 1024: - return f"{bytes_val:.2f} {unit}" - bytes_val /= 1024 - return f"{bytes_val:.2f} PB" - - def _get_status_display(self, status_code: int) -> str: - """Get human-readable status for AllDebrid status codes.""" - return self.STATUS_MAP.get(status_code, f"Unknown ({status_code})") - - def _should_filter_magnet(self, status_code: int, status_text: str) -> bool: - """Check if magnet should be filtered out (expired/deleted).""" - # Filter expired/deleted entries - return status_code in (5, 6, 7, 8, 11, 12, 13, 14) - - def _fuzzy_match(self, text: str, pattern: str) -> bool: - """Check if pattern fuzzy-matches text (case-insensitive, substring matching).""" - return pattern.lower() in text.lower() - - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - """Search AllDebrid magnets with optional status and name filtering. - - Args: - query: Search query (magnet filename or '*' for all) - limit: Max results to return - filters: Optional dict with 'status' filter ('all', 'active', 'ready', 'error') - - Returns: - List of SearchResult objects - """ - filters = filters or {} - - try: - from API.alldebrid import AllDebridClient - from config import get_debrid_api_key - - api_key = get_debrid_api_key(self.config) - - if not api_key: - log("[debrid] API key not configured", file=sys.stderr) - return [] - - client = AllDebridClient(api_key) - - # Parse status filter - status_filter_param = filters.get('status', 'all').lower() if filters.get('status') else 'all' - - # Get magnets with optional status filter - response = client._request("magnet/status", {}) - - if response.get("status") != "success": - log(f"[debrid] API error: {response.get('error', 'Unknown')}", file=sys.stderr) - return [] - - magnets = response.get("data", {}).get("magnets", []) - - # Handle both list and dict formats - if isinstance(magnets, dict): - magnets = list(magnets.values()) - - # Filter by status if specified - if status_filter_param == 'active': - magnets = [m for m in magnets if m.get('statusCode', -1) in (0, 1, 2, 3)] - elif status_filter_param == 'ready': - magnets = [m for m in magnets if m.get('statusCode', -1) == 4] - elif status_filter_param == 'error': - magnets = [m for m in magnets if m.get('statusCode', -1) in (5, 6, 8, 9, 10, 12, 13, 14, 15)] - # 'all' includes everything - - # Filter by query (fuzzy match on filename) - results = [] - count = 0 - for magnet in magnets: - if count >= limit: - break - - filename = magnet.get("filename", "") - status_code = magnet.get("statusCode", -1) - status_text = magnet.get("status", "Unknown") - - # Skip expired/deleted unless 'all' filter - if status_filter_param != 'all' and self._should_filter_magnet(status_code, status_text): - continue - - # Apply query filter (skip if doesn't match) - if query and query != "*" and not self._fuzzy_match(filename, query): - continue - - magnet_id = magnet.get("id") - size = magnet.get("size", 0) - downloaded = magnet.get("downloaded", 0) - progress = (downloaded / size * 100) if size > 0 else 0 - - # Get status emoji - if status_code == 4: - status_emoji = "✓" - elif status_code < 4: - status_emoji = "⧗" - else: - status_emoji = "✗" - - annotations = [self._get_status_display(status_code)] - if size > 0: - annotations.append(self._format_size(size)) - if progress > 0 and progress < 100: - annotations.append(f"{progress:.1f}%") - - results.append(self.build_result( - origin="debrid", - title=filename or "Unknown", - target=str(magnet_id), - detail=f"{status_emoji} {self._get_status_display(status_code)} | {self._format_size(size)}", - annotations=annotations, - media_kind="magnet", - size_bytes=size, - full_metadata={ - "magnet_id": magnet_id, - "status_code": status_code, - "status_text": status_text, - "progress": progress, - "downloaded": downloaded, - "seeders": magnet.get("seeders", 0), - "download_speed": magnet.get("downloadSpeed", 0), - }, - )) - - count += 1 - - # Cache metadata for ready magnets - if results: - self._cache_ready_magnet_metadata(client, [r for r in results if r.full_metadata.get('status_code') == 4]) - - return results - - except Exception as e: - log(f"Debrid search error: {e}", file=sys.stderr) - return [] - - def _cache_ready_magnet_metadata(self, client, results: List[SearchResult]) -> None: - """Cache file metadata for ready magnets.""" - if not results: - return - - try: - ready_ids = [r.full_metadata.get('magnet_id') for r in results if r.full_metadata.get('status_code') == 4] - if ready_ids: - self._magnet_files_cache = client.magnet_links(ready_ids) - log(f"[debrid] Cached metadata for {len(self._magnet_files_cache)} ready magnet(s)", file=sys.stderr) - except Exception as e: - log(f"[debrid] Warning: Could not cache magnet metadata: {e}", file=sys.stderr) - - def get_magnet_metadata(self, magnet_id: int) -> Optional[Dict[str, Any]]: - """Get cached metadata for a magnet.""" - return self._magnet_files_cache.get(str(magnet_id)) - - def get_result_args(self) -> List[str]: - """Debrid results use magnet ID for download.""" - return ["-id"] - - def validate(self) -> bool: - """Check if AllDebrid is configured.""" - from config import get_debrid_api_key - return bool(get_debrid_api_key(self.config)) - - -class OpenLibrary(Provider): - """Search provider for OpenLibrary.""" - - # Define fields to request from API and how to display them - RESULT_FIELDS: List[Tuple[str, str, Optional[Any]]] = [] # columns built manually - - def __init__(self, config: Dict[str, Any] = None): - super().__init__(config) - self.name = "openlibrary" - - def _derive_status(self, doc: Dict[str, Any]) -> tuple[str, Optional[str]]: - """Determine availability label and archive identifier.""" - ebook_access = str(doc.get("ebook_access", "") or "").strip().lower() - has_fulltext = bool(doc.get("has_fulltext")) - ia_entries = doc.get("ia") - archive_id = "" - if isinstance(ia_entries, list): - for entry in ia_entries: - if isinstance(entry, str) and entry.strip(): - archive_id = entry.strip() - break - elif isinstance(ia_entries, str) and ia_entries.strip(): - archive_id = ia_entries.strip() - elif isinstance(doc.get("ocaid"), str) and doc["ocaid"].strip(): - archive_id = doc["ocaid"].strip() - - available = False - if ebook_access in {"borrowable", "public", "full"}: - available = True - elif has_fulltext: - available = True - elif archive_id: - available = True - - status = "download" if available else "?Libgen" - return status, archive_id or None - - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - """Search OpenLibrary for books. - - Smart search that detects ISBN, OCLC, OpenLibrary ID, and falls back to title search. - """ - filters = filters or {} - - try: - import requests - - query_clean = query.strip() - search_url = "https://openlibrary.org/search.json" - - # Try to detect query type (ISBN, OCLC, OL ID, or title) - if query_clean.isdigit() and len(query_clean) in (10, 13): - # ISBN search - url = f"https://openlibrary.org/isbn/{query_clean}.json" - response = requests.get(url, timeout=9) - if response.status_code == 200: - book_data = response.json() - return [self._format_isbn_result(book_data, query_clean)] - elif response.status_code == 404: - return [] - - # Default to title/general search - requested_fields = [ - "title", - "author_name", - "first_publish_year", - "number_of_pages_median", - "isbn", - "oclc_numbers", - "lccn", - "language", - "key", - "edition_key", - "ebook_access", - "ia", - "has_fulltext", - ] - params = { - "q": query_clean, - "limit": limit, - "fields": ",".join(requested_fields), - } - - response = requests.get(search_url, params=params, timeout=9) - response.raise_for_status() - data = response.json() - - search_results = [] - for idx, doc in enumerate(data.get("docs", []), 1): - # Prefer edition_key (books/OLxxxM). Fallback to work key. - edition_keys = doc.get("edition_key") or [] - olid = "" - if isinstance(edition_keys, list) and edition_keys: - olid = str(edition_keys[0]).strip() - if not olid: - olid = doc.get("key", "").split("/")[-1] - - # Determine status/availability - status, archive_id = self._derive_status(doc) - doc["status"] = status - - # Extract additional metadata - title = doc.get("title", "Unknown") - authors = doc.get("author_name", ["Unknown"]) - year = doc.get("first_publish_year", "") - isbn_list = doc.get("isbn", []) - isbn = isbn_list[0] if isbn_list else "" - oclc_list = doc.get("oclc_numbers", []) - oclc = oclc_list[0] if oclc_list else "" - lccn_list = doc.get("lccn", []) - lccn = lccn_list[0] if lccn_list else "" - pages = doc.get("number_of_pages_median", "") - languages = doc.get("language", []) - language = languages[0] if languages else "" - - author_str = ", ".join(authors) if authors else "Unknown" - - # Format status for display - ebook_access_raw = str(doc.get("ebook_access", "") or "").strip().lower() - status_display = "" - if ebook_access_raw == "borrowable": - status_display = "📚 Borrowable" - elif ebook_access_raw == "public": - status_display = "🌐 Public" - elif ebook_access_raw == "full": - status_display = "✓ Full" - elif doc.get("has_fulltext"): - status_display = "📄 Fulltext" - else: - status_display = "❌ No" - - # Columns: Title, Author, Pages, Borrowable - columns = [ - ("Title", title), - ("Author", author_str), - ("Pages", str(pages or "")), - ("Borrowable", status_display), - ] - - # Build detail with author and year - detail = f"By: {author_str}" - if year: - detail += f" ({year})" - - # Build annotations with additional info - annotations = [] - if pages: - annotations.append(f"{pages} pages") - if isbn: - annotations.append(f"ISBN: {isbn}") - - search_results.append(self.build_result( - origin="openlibrary", - title=title, - target=f"https://openlibrary.org/books/{olid}", - detail=detail, - annotations=annotations, - media_kind="book", - columns=columns, - full_metadata={ - "number": idx, - "authors": authors, - "year": year, - "isbn": isbn, - "oclc": oclc, - "lccn": lccn, - "pages": pages, - "language": language, - "olid": olid, - "ebook_access": doc.get("ebook_access", ""), - "status": status, - "archive_id": archive_id, - }, - )) - - # Sort results: borrowable ones first, then not borrowable, then unknown - def sort_key(result): - status = (result.full_metadata.get("status") or "").strip().lower() - if status == "download": - return (0, result.title) - elif status.startswith("?libgen"): - return (1, result.title) - else: - return (2, result.title) - - search_results.sort(key=sort_key) - - # Rebuild number field after sorting - for new_idx, result in enumerate(search_results, 1): - result.full_metadata["number"] = new_idx - # Update the # column in columns - if result.columns and result.columns[0][0] == "#": - result.columns[0] = ("#", str(new_idx)) - - return search_results - - except Exception as e: - log(f"OpenLibrary search error: {e}", file=sys.stderr) - return [] - - def _format_isbn_result(self, book_data: Dict[str, Any], isbn: str) -> SearchResult: - """Format a book result from ISBN endpoint.""" - # Get title from book data - title = book_data.get("title", "Unknown") - - # Get authors - author_list = [] - for author_key in book_data.get("authors", []): - if isinstance(author_key, dict): - author_list.append(author_key.get("name", "")) - elif isinstance(author_key, str): - author_list.append(author_key) - - author_str = ", ".join(filter(None, author_list)) if author_list else "Unknown" - - # Extract other metadata - year = book_data.get("first_publish_year", "") - publishers = book_data.get("publishers", []) - publisher = publishers[0].get("name", "") if publishers and isinstance(publishers[0], dict) else "" - pages = book_data.get("number_of_pages", "") - languages = book_data.get("languages", []) - language = languages[0].get("key", "").replace("/languages/", "") if languages else "" - olid = book_data.get("key", "").split("/")[-1] if book_data.get("key") else "" - - # Build doc for column rendering - doc = { - "title": title, - "author_name": author_list, - "first_publish_year": year, - "ebook_access": book_data.get("ebook_access", ""), - "has_fulltext": bool(book_data.get("ocaid")), - "ia": [book_data.get("ocaid")] if book_data.get("ocaid") else [], - "ocaid": book_data.get("ocaid", ""), - } - status, archive_id = self._derive_status(doc) - doc["status"] = status - - # Build detail - detail = f"By: {author_str}" - if year: - detail += f" ({year})" - - # Build annotations - annotations = [] - if pages: - annotations.append(f"{pages} pages") - annotations.append(f"ISBN: {isbn}") - - # Build columns using shared helper for consistency - columns = self.build_columns_from_doc(doc, idx=1) - - return SearchResult( - origin="openlibrary", - title=title, - target=f"https://openlibrary.org/books/{olid}", - detail=detail, - annotations=annotations, - media_kind="book", - columns=columns, - full_metadata={ - "number": 1, - "authors": author_list, - "year": year, - "isbn": isbn, - "oclc": "", - "lccn": "", - "pages": pages, - "language": language, - "olid": olid, - "publisher": publisher, - "ebook_access": doc.get("ebook_access", ""), - "status": status, - "archive_id": archive_id, - }, - ) - - def get_result_args(self) -> List[str]: - """OpenLibrary results are info/links only.""" - return ["-info"] - - def validate(self) -> bool: - """OpenLibrary is always available (no auth needed).""" - return True - - -class GogGames(Provider): - """Search provider for GOG Games.""" - - def __init__(self, config: Dict[str, Any] = None): - super().__init__(config) - self.name = "gog" - self.base_url = "https://gog-games.to" - self.headers = { - "Referer": "https://gog-games.to/", - "Origin": "https://gog-games.to", - "X-Requested-With": "XMLHttpRequest" - } - - def _request(self, client, endpoint: str, is_json: bool = True) -> Any: - """Helper for API requests.""" - url = f"{self.base_url}/api/web/{endpoint}" - try: - response = client.get(url, headers=self.headers) - if response.status_code == 200: - return response.json() if is_json else response.text - elif response.status_code == 404: - return None - else: - log(f"[gog] API request failed: {response.status_code} for {endpoint}", file=sys.stderr) - return None - except Exception as e: - log(f"[gog] Request error: {e}", file=sys.stderr) - return None - - def get_all_games(self, client) -> List[Dict[str, Any]]: - """Fetch all games from the API.""" - return self._request(client, "all-games") or [] - - def get_game_details(self, client, slug: str) -> Optional[Dict[str, Any]]: - """Fetch details for a specific game.""" - return self._request(client, f"query-game/{slug}") - - def get_game_md5(self, client, slug: str) -> Optional[str]: - """Fetch MD5 checksums for a game.""" - return self._request(client, f"download-md5/{slug}", is_json=False) - - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - """Search GOG Games.""" - from API.HTTP import HTTPClient - - results = [] - query_norm = query.strip().lower() - - with HTTPClient() as client: - # 1. Fetch all games to perform fuzzy search - all_games = self.get_all_games(client) - - matches = [] - if all_games: - for game in all_games: - if (query_norm in game.get("title", "").lower() or - query_norm in game.get("slug", "").lower()): - matches.append(game) - - # 2. Fallback: If no matches and query looks like a slug, try direct lookup - if not matches and "_" in query_norm: - details = self.get_game_details(client, query_norm) - if details and "game_info" in details: - matches.append(details["game_info"]) - - for game in matches[:limit]: - slug = game.get("slug") - title = game.get("title", slug) - infohash = game.get("infohash") - gog_url = game.get("gog_url", "") - - # Note: 'all-games' endpoint doesn't provide file size. - # We set size to 0 to avoid N+1 requests. - - if infohash: - magnet_link = f"magnet:?xt=urn:btih:{infohash}&dn={slug}" - results.append(self.build_result( - origin="gog", - title=title, - target=magnet_link, - media_kind="magnet", - detail="Magnet Link", - annotations=["Magnet"], - full_metadata=game - )) - else: - results.append(self.build_result( - origin="gog", - title=title, - target=gog_url, - media_kind="game", - detail="No magnet available", - annotations=["No Magnet"], - full_metadata=game - )) - - return results - - def get_result_args(self) -> List[str]: - """GOG results are URLs.""" - return ["-url"] - - def validate(self) -> bool: - """GOG Games is a public website.""" - return True - - -class YouTube(Provider): - """ - Search provider for YouTube using yt-dlp. - """ - - RESULT_FIELDS = [ - ("title", "Title", None), - ("uploader", "Uploader", None), - ("duration_string", "Duration", None), - ("view_count", "Views", lambda x: f"{x:,}" if x else ""), - ] - - def search(self, query: str, limit: int = 10, filters: Optional[Dict[str, Any]] = None, **kwargs) -> List[SearchResult]: - """ - Search YouTube using yt-dlp. - - Args: - query: Search query - limit: Maximum number of results - filters: Optional filtering criteria (ignored for now) - - Returns: - List of SearchResult objects - """ - # Check if yt-dlp is available - ytdlp_path = shutil.which("yt-dlp") - if not ytdlp_path: - log("yt-dlp not found in PATH", file=sys.stderr) - return [] - - # Construct command - # ytsearchN:query searches for N results - search_query = f"ytsearch{limit}:{query}" - - cmd = [ - ytdlp_path, - "--dump-json", - "--flat-playlist", # Don't resolve video details fully, faster - "--no-warnings", - search_query - ] - - try: - # Run yt-dlp - # We need to capture stdout. yt-dlp outputs one JSON object per line for search results - process = subprocess.run( - cmd, - capture_output=True, - text=True, - encoding="utf-8", - errors="replace" - ) - - if process.returncode != 0: - log(f"yt-dlp search failed: {process.stderr}", file=sys.stderr) - return [] - - results = [] - for line in process.stdout.splitlines(): - if not line.strip(): - continue - - try: - data = json.loads(line) - - # Extract fields - title = data.get("title", "Unknown Title") - url = data.get("url") - if not url: - # Sometimes flat-playlist gives 'id', construct URL - video_id = data.get("id") - if video_id: - url = f"https://www.youtube.com/watch?v={video_id}" - else: - continue - - uploader = data.get("uploader", "Unknown Uploader") - duration = data.get("duration") # seconds - view_count = data.get("view_count") - - # Format duration - duration_str = "" - if duration: - try: - m, s = divmod(int(duration), 60) - h, m = divmod(m, 60) - if h > 0: - duration_str = f"{h}:{m:02d}:{s:02d}" - else: - duration_str = f"{m}:{s:02d}" - except (ValueError, TypeError): - pass - - # Create annotations - annotations = [] - if duration_str: - annotations.append(duration_str) - if view_count: - # Simple format for views - try: - vc = int(view_count) - if vc >= 1000000: - views_str = f"{vc/1000000:.1f}M views" - elif vc >= 1000: - views_str = f"{vc/1000:.1f}K views" - else: - views_str = f"{vc} views" - annotations.append(views_str) - except (ValueError, TypeError): - pass - - annotations.append("youtube") - - # Create result - result = self.build_result( - origin="youtube", - title=title, - target=url, - detail=f"by {uploader}", - annotations=annotations, - media_kind="video", - full_metadata=data, - columns=[ - ("Title", title), - ("Uploader", uploader), - ("Duration", duration_str), - ("Views", str(view_count) if view_count else "") - ] - ) - results.append(result) - - except json.JSONDecodeError: - continue - - return results - - except Exception as e: - log(f"Error running yt-dlp: {e}", file=sys.stderr) - return [] - - def get_result_args(self) -> List[str]: - """YouTube results are URLs.""" - return ["-url"] - - def validate(self) -> bool: - """Check if yt-dlp is installed.""" - return shutil.which("yt-dlp") is not None - - -class BandCamp(Provider): - """ - Search provider for Bandcamp using Playwright scraper. - """ - RESULT_FIELDS = [ - ("name", "Name", None), - ("artist", "Artist/Loc", None), - ("type", "Type", None) - ] - - def search( - self, - query: str, - limit: int = 50, - filters: Optional[Dict[str, Any]] = None, - **kwargs - ) -> List[SearchResult]: - if not PLAYWRIGHT_AVAILABLE: - print("Playwright library not available. Please install it (pip install playwright).") - return [] - - results = [] - try: - with sync_playwright() as p: - # Launch browser (headless) - browser = p.chromium.launch(headless=True) - page = browser.new_page() - - # Check if query is a URL (Artist/Album Scraping Mode) - if query.startswith("http://") or query.startswith("https://"): - return self._scrape_url(page, query, limit) - - # Search Mode - # Parse query for prefixes - search_type = "t" # Default to track - clean_query = query - - if "artist:" in query.lower(): - search_type = "b" - clean_query = query.lower().replace("artist:", "").strip() - elif "album:" in query.lower(): - search_type = "a" - clean_query = query.lower().replace("album:", "").strip() - elif "track:" in query.lower(): - search_type = "t" - clean_query = query.lower().replace("track:", "").strip() - elif "label:" in query.lower(): - search_type = "b" - clean_query = query.lower().replace("label:", "").strip() - - # Filters override prefix - if filters: - ftype = filters.get("type", "").lower() - if ftype in ["album", "albums"]: - search_type = "a" - elif ftype in ["artist", "artists", "label", "labels"]: - search_type = "b" - elif ftype in ["track", "tracks"]: - search_type = "t" - - # Construct URL with item_type - url = f"https://bandcamp.com/search?q={clean_query}&item_type={search_type}" - debug(f"[Bandcamp] Navigating to search URL: {url}") - page.goto(url) - page.wait_for_load_state("domcontentloaded") - - # Wait for results - try: - # Wait for the search results to appear in the DOM - page.wait_for_selector(".searchresult", timeout=10000) - except Exception as e: - # No results found or timeout - log(f"Bandcamp search timeout or no results: {e}") - browser.close() - return [] - - # Extract items - items = page.query_selector_all(".searchresult") - debug(f"[Bandcamp] Found {len(items)} results") - - for item in items: - if len(results) >= limit: - break - - try: - # Extract data - heading_el = item.query_selector(".heading a") - if not heading_el: - debug("[Bandcamp] Skipping item: No heading found") - continue - - name = heading_el.inner_text().strip() - item_url = heading_el.get_attribute("href") - # Clean URL (remove query params) - if item_url and "?" in item_url: - item_url = item_url.split("?")[0] - - item_type_el = item.query_selector(".itemtype") - item_type = item_type_el.inner_text().strip() if item_type_el else "Unknown" - - subhead_el = item.query_selector(".subhead") - subhead = subhead_el.inner_text().strip() if subhead_el else "" - - art_el = item.query_selector(".art img") - img = art_el.get_attribute("src") if art_el else None - - # Map to metadata - metadata = { - "name": name, - "type": item_type, - "url": item_url, - "img": img, - "subhead": subhead - } - - # Refine metadata based on type - artist_or_loc = subhead - if "ALBUM" in item_type.upper(): - artist_or_loc = subhead.replace("by ", "").strip() - metadata["artist"] = artist_or_loc - elif "ARTIST" in item_type.upper() or "LABEL" in item_type.upper(): - metadata["location"] = subhead - elif "TRACK" in item_type.upper(): - artist_or_loc = subhead.replace("by ", "").strip() - metadata["artist"] = artist_or_loc - - columns = [ - ("Name", name), - ("Artist/Loc", artist_or_loc), - ("Type", item_type) - ] - - results.append(self.build_result( - origin="bandcamp", - title=name, - target=item_url, - full_metadata=metadata, - columns=columns - )) - except Exception as e: - # Skip malformed items - debug(f"[Bandcamp] Error parsing item: {e}") - continue - - browser.close() - - except Exception as e: - log(f"Bandcamp search error: {e}") - return [] - - return results - - def _scrape_url(self, page, url: str, limit: int) -> List[SearchResult]: - """Scrape a Bandcamp artist or album page.""" - debug(f"[Bandcamp] Scraping URL: {url}") - - # If it's an artist page, try to go to /music to see all - if ".bandcamp.com" in url and "/music" not in url and "/album/" not in url and "/track/" not in url: - # Check if it's likely an artist root - url = url.rstrip("/") + "/music" - debug(f"[Bandcamp] Adjusted to music page: {url}") - - page.goto(url) - page.wait_for_load_state("domcontentloaded") - - results = [] - - # Check for grid items (Artist page /music) - grid_items = page.query_selector_all(".music-grid-item") - if grid_items: - debug(f"[Bandcamp] Found {len(grid_items)} grid items") - - # Try to get global artist name from page metadata/header as fallback - page_artist = "" - try: - og_site_name = page.query_selector('meta[property="og:site_name"]') - if og_site_name: - page_artist = og_site_name.get_attribute("content") or "" - - if not page_artist: - band_name = page.query_selector('#band-name-location .title') - if band_name: - page_artist = band_name.inner_text().strip() - except Exception: - pass - - for item in grid_items: - if len(results) >= limit: - break - try: - title_el = item.query_selector(".title") - # Sanitize title to remove newlines which break the table - title = title_el.inner_text().strip().replace("\n", " ").replace("\r", "") if title_el else "Unknown" - # Remove extra spaces - title = " ".join(title.split()) - - link_el = item.query_selector("a") - href = link_el.get_attribute("href") if link_el else "" - if href and not href.startswith("http"): - # Relative link, construct full URL - base = url.split("/music")[0] - href = base + href - - artist_el = item.query_selector(".artist") - artist = artist_el.inner_text().replace("by ", "").strip() if artist_el else "" - - # Use page artist if item artist is missing - if not artist and page_artist: - artist = page_artist - - # Sanitize artist - artist = artist.replace("\n", " ").replace("\r", "") - artist = " ".join(artist.split()) - - columns = [ - ("Name", title), - ("Artist", artist), - ("Type", "Album/Track") - ] - - results.append(self.build_result( - origin="bandcamp", - title=title, - target=href, - full_metadata={"artist": artist}, - columns=columns - )) - except Exception as e: - debug(f"[Bandcamp] Error parsing grid item: {e}") - continue - return results - - # Check for track list (Album page) - track_rows = page.query_selector_all(".track_row_view") - if track_rows: - debug(f"[Bandcamp] Found {len(track_rows)} track rows") - # Get Album Artist - artist_el = page.query_selector("#name-section h3 span a") - album_artist = artist_el.inner_text().strip() if artist_el else "Unknown" - - for row in track_rows: - if len(results) >= limit: - break - try: - title_el = row.query_selector(".track-title") - # Sanitize title - title = title_el.inner_text().strip().replace("\n", " ").replace("\r", "") if title_el else "Unknown" - title = " ".join(title.split()) - - # Track link - link_el = row.query_selector(".title a") - href = link_el.get_attribute("href") if link_el else "" - if href and not href.startswith("http"): - base = url.split(".com")[0] + ".com" - href = base + href - - duration_el = row.query_selector(".time") - duration = duration_el.inner_text().strip() if duration_el else "" - - columns = [ - ("Name", title), - ("Artist", album_artist), - ("Duration", duration) - ] - - results.append(self.build_result( - origin="bandcamp", - title=title, - target=href, - full_metadata={"artist": album_artist, "duration": duration}, - columns=columns - )) - except Exception as e: - debug(f"[Bandcamp] Error parsing track row: {e}") - continue - return results - - debug("[Bandcamp] No recognizable items found on page") - return [] - - def get_result_args(self) -> List[str]: - return ["-url"] - - -# Provider registry -_PROVIDERS = { - "bandcamp": BandCamp, - "libgen": Libgen, - "soulseek": SoulSeek, - "debrid": Debrid, - "openlibrary": OpenLibrary, - "gog": GogGames, - "youtube": YouTube, -} - - -def get_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[Provider]: - """ - Get a search provider by name. - - Args: - name: Provider name (case-insensitive): "local", "libgen", "soulseek", "debrid", "openlibrary" - config: Optional configuration dictionary - - Returns: - SearchProvider instance or None if not found - """ - provider_class = _PROVIDERS.get(name.lower()) - - if provider_class is None: - log(f"Unknown search provider: {name}", file=sys.stderr) - return None - - try: - provider = provider_class(config) - if not provider.validate(): - log(f"Provider '{name}' is not properly configured or available", file=sys.stderr) - return None - return provider - - except Exception as e: - log(f"Error initializing provider '{name}': {e}", file=sys.stderr) - return None - - -def list_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]: - """ - List all available providers and whether they're available. - - Args: - config: Optional configuration dictionary - - Returns: - Dictionary mapping provider names to availability (True/False) - """ - availability = {} - for name, provider_class in _PROVIDERS.items(): - try: - provider = provider_class(config) - availability[name] = provider.validate() - except Exception: - availability[name] = False - return availability - - -def register_provider(name: str, provider_class: type) -> None: - """ - Register a new search provider. - - Args: - name: Provider name (lowercase) - provider_class: Class that inherits from SearchProvider - """ - _PROVIDERS[name.lower()] = provider_class - - -class FileProvider(ABC): - """Abstract base class for file hosting providers.""" - - def __init__(self, config: Optional[Dict[str, Any]] = None): - self.config = config or {} - self.name = self.__class__.__name__.replace("FileProvider", "").lower() - - @abstractmethod - def upload(self, file_path: str, **kwargs: Any) -> str: - """Upload a file and return the URL.""" - pass - - def validate(self) -> bool: - """Check if provider is available/configured.""" - return True - - -class ZeroXZeroFileProvider(FileProvider): - """File provider for 0x0.st.""" - - def __init__(self, config: Optional[Dict[str, Any]] = None): - super().__init__(config) - self.name = "0x0" - self.base_url = "https://0x0.st" - - def upload(self, file_path: str, **kwargs: Any) -> str: - """Upload file to 0x0.st.""" - from API.HTTP import HTTPClient - import os - - if not os.path.exists(file_path): - raise FileNotFoundError(f"File not found: {file_path}") - - try: - # 0x0.st expects 'file' field in multipart/form-data - # Use a custom User-Agent to avoid 403 Forbidden - headers = {"User-Agent": "Medeia-Macina/1.0"} - with HTTPClient(headers=headers) as client: - with open(file_path, 'rb') as f: - files = {'file': f} - response = client.post(self.base_url, files=files) - - if response.status_code == 200: - return response.text.strip() - else: - raise Exception(f"Upload failed: {response.status_code} - {response.text}") - - except Exception as e: - log(f"[0x0] Upload error: {e}", file=sys.stderr) - raise - - def validate(self) -> bool: - return True - - -class MatrixFileProvider(FileProvider): - """File provider for Matrix (Element) chat rooms.""" - - def __init__(self, config: Optional[Dict[str, Any]] = None): - super().__init__(config) - self.name = "matrix" - - def validate(self) -> bool: - """Check if Matrix is configured.""" - if not self.config: return False - matrix_conf = self.config.get('storage', {}).get('matrix', {}) - return bool(matrix_conf.get('homeserver') and matrix_conf.get('room_id') and (matrix_conf.get('access_token') or matrix_conf.get('password'))) - - def upload(self, file_path: str, **kwargs: Any) -> str: - """Upload file to Matrix room.""" - import requests - import mimetypes - from pathlib import Path - import json - - debug(f"[Matrix] Starting upload for: {file_path}") - debug(f"[Matrix] kwargs: {kwargs}") - - path = Path(file_path) - if not path.exists(): - raise FileNotFoundError(f"File not found: {file_path}") - - matrix_conf = self.config.get('storage', {}).get('matrix', {}) - homeserver = matrix_conf.get('homeserver') - access_token = matrix_conf.get('access_token') - room_id = matrix_conf.get('room_id') - - if not homeserver.startswith('http'): - homeserver = f"https://{homeserver}" - - # 1. Upload Media - # Use v3 API - upload_url = f"{homeserver}/_matrix/media/v3/upload" - headers = { - "Authorization": f"Bearer {access_token}", - "Content-Type": "application/octet-stream" - } - - mime_type, _ = mimetypes.guess_type(path) - if mime_type: - headers["Content-Type"] = mime_type - - filename = path.name - - debug(f"[Matrix] Uploading media to {upload_url} with mime_type: {mime_type}") - - with open(path, 'rb') as f: - resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename}) - - if resp.status_code != 200: - raise Exception(f"Matrix upload failed: {resp.text}") - - content_uri = resp.json().get('content_uri') - if not content_uri: - raise Exception("No content_uri returned from Matrix upload") - - debug(f"[Matrix] Media uploaded, content_uri: {content_uri}") - - # 2. Send Message - # Use v3 API - send_url = f"{homeserver}/_matrix/client/v3/rooms/{room_id}/send/m.room.message" - - # Determine msgtype with better fallback for audio - msgtype = "m.file" - ext = path.suffix.lower() - - # Explicit check for common audio extensions to force m.audio - # This prevents audio files being treated as generic files or video - AUDIO_EXTS = {'.mp3', '.flac', '.wav', '.m4a', '.aac', '.ogg', '.opus', '.wma', '.mka', '.alac'} - VIDEO_EXTS = {'.mp4', '.mkv', '.webm', '.mov', '.avi', '.flv', '.mpg', '.mpeg', '.ts', '.m4v', '.wmv'} - IMAGE_EXTS = {'.jpg', '.jpeg', '.png', '.gif', '.webp', '.bmp', '.tiff'} - - if ext in AUDIO_EXTS: - msgtype = "m.audio" - elif ext in VIDEO_EXTS: - msgtype = "m.video" - elif ext in IMAGE_EXTS: - msgtype = "m.image" - elif mime_type: - if mime_type.startswith("audio/"): msgtype = "m.audio" - elif mime_type.startswith("video/"): msgtype = "m.video" - elif mime_type.startswith("image/"): msgtype = "m.image" - - debug(f"[Matrix] Determined msgtype: {msgtype} (ext: {ext}, mime: {mime_type})") - - info = { - "mimetype": mime_type, - "size": path.stat().st_size - } - - # Try to get duration for audio/video - if msgtype in ("m.audio", "m.video"): - try: - # Try mutagen first (lightweight) - # Use dynamic import to avoid top-level dependency if not installed - # Note: mutagen.File is available at package level at runtime but type checkers might miss it - import mutagen # type: ignore - m = mutagen.File(str(path)) # type: ignore - if m and m.info and hasattr(m.info, 'length'): - duration_ms = int(m.info.length * 1000) - info['duration'] = duration_ms - debug(f"[Matrix] Extracted duration: {duration_ms}ms") - except Exception as e: - debug(f"[Matrix] Failed to extract duration: {e}") - - payload = { - "msgtype": msgtype, - "body": filename, - "url": content_uri, - "info": info - } - - debug(f"[Matrix] Sending message payload: {json.dumps(payload, indent=2)}") - - resp = requests.post(send_url, headers=headers, json=payload) - if resp.status_code != 200: - raise Exception(f"Matrix send message failed: {resp.text}") - - event_id = resp.json().get('event_id') - return f"https://matrix.to/#/{room_id}/{event_id}" - - -# File provider registry -_FILE_PROVIDERS = { - "0x0": ZeroXZeroFileProvider, - "matrix": MatrixFileProvider, -} - - -def get_file_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[FileProvider]: - """ - Get a file hosting provider by name. - - Args: - name: Provider name (case-insensitive): "0x0" - config: Optional configuration dictionary - - Returns: - FileProvider instance or None if not found - """ - provider_class = _FILE_PROVIDERS.get(name.lower()) - - if provider_class is None: - log(f"Unknown file provider: {name}", file=sys.stderr) - return None - - try: - provider = provider_class(config) - if not provider.validate(): - log(f"File provider '{name}' is not properly configured or available", file=sys.stderr) - return None - return provider - - except Exception as e: - log(f"Error initializing file provider '{name}': {e}", file=sys.stderr) - return None - - -def list_file_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]: - """ - List all available file hosting providers and whether they're available. - - Args: - config: Optional configuration dictionary - - Returns: - Dictionary mapping provider names to availability (True/False) - """ - availability = {} - for name, provider_class in _FILE_PROVIDERS.items(): - try: - provider = provider_class(config) - availability[name] = provider.validate() - except Exception: - availability[name] = False - return availability - - -def register_file_provider(name: str, provider_class: type) -> None: - """ - Register a new file hosting provider. - - Args: - name: Provider name (lowercase) - provider_class: Class that inherits from FileProvider - """ - _FILE_PROVIDERS[name.lower()] = provider_class - - - - diff --git a/metadata.py b/metadata.py index 603603a..b8fa907 100644 --- a/metadata.py +++ b/metadata.py @@ -58,10 +58,7 @@ _CURRENT_RELATIONSHIP_TRACKER = FileRelationshipTracker() def prepare_ffmpeg_metadata(payload: Optional[Dict[str, Any]]) -> Dict[str, str]: - """Derive ffmpeg/mutagen metadata tags from a generic metadata payload. - - This is not Hydrus-specific; it is used by exporters/converters. - """ + """Build ffmpeg/mutagen metadata map from payload.""" if not isinstance(payload, dict): return {} @@ -275,29 +272,17 @@ def build_ffmpeg_command( def field(obj: Any, name: str, value: Any = None) -> Any: - """Get or set a field on dict or object. - - Args: - obj: Dict or object to access - name: Field name - value: If None, gets the field; if not None, sets it and returns the value - - Returns: - The field value (when getting) or the value (when setting) - """ - if value is None: - # Get mode - if isinstance(obj, dict): - return obj.get(name) - else: - return getattr(obj, name, None) - else: - # Set mode - if isinstance(obj, dict): - obj[name] = value - else: - setattr(obj, name, value) - return value + """Get or set a field on dict or object.""" + if value is None: + if isinstance(obj, dict): + return obj.get(name) + return getattr(obj, name, None) + + if isinstance(obj, dict): + obj[name] = value + else: + setattr(obj, name, value) + return value @@ -1602,78 +1587,61 @@ def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str] def rename(file_path: Path, tags: Iterable[str]) -> Optional[Path]: - """Rename a file based on title: tag in the tags list. - + """Rename a file based on a title: tag. + If a title: tag is present, renames the file and any .tag/.metadata sidecars. - - Args: - file_path: Path to the file to potentially rename - tags: Iterable of tag strings (should contain title: tag if rename needed) - - Returns: - New path if renamed, None if not renamed or error occurred - """ - # Extract title from tags - new_title = None - for tag in tags: - if isinstance(tag, str) and tag.lower().startswith('title:'): - new_title = tag.split(':', 1)[1].strip() - break - - if not new_title or not file_path.exists(): - return None - - try: - old_name = file_path.name - old_suffix = file_path.suffix - - # Create new filename: title + extension - new_name = f"{new_title}{old_suffix}" - new_path = file_path.parent / new_name - - # Don't rename if already the same name - if new_path == file_path: - return None - - # If target exists, delete it first (replace mode) - if new_path.exists(): - try: - new_path.unlink() - debug(f"Replaced existing file: {new_name}", file=sys.stderr) - except Exception as e: - debug(f"Warning: Could not replace target file {new_name}: {e}", file=sys.stderr) - return None - - file_path.rename(new_path) - debug(f"Renamed file: {old_name} → {new_name}", file=sys.stderr) - - # Rename the .tag sidecar if it exists - old_tags_path = file_path.parent / (old_name + '.tag') - if old_tags_path.exists(): - new_tags_path = file_path.parent / (new_name + '.tag') - if new_tags_path.exists(): - try: - new_tags_path.unlink() - except Exception: - pass - else: - old_tags_path.rename(new_tags_path) - debug(f"Renamed sidecar: {old_tags_path.name} → {new_tags_path.name}", file=sys.stderr) - - # Rename the .metadata sidecar if it exists - old_metadata_path = file_path.parent / (old_name + '.metadata') - if old_metadata_path.exists(): - new_metadata_path = file_path.parent / (new_name + '.metadata') - if new_metadata_path.exists(): - debug(f"Warning: Target metadata already exists: {new_metadata_path.name}", file=sys.stderr) - else: - old_metadata_path.rename(new_metadata_path) - debug(f"Renamed metadata: {old_metadata_path.name} → {new_metadata_path.name}", file=sys.stderr) - - return new_path - except Exception as exc: - debug(f"Warning: Failed to rename file: {exc}", file=sys.stderr) - return None + """ + + new_title: Optional[str] = None + for tag in tags: + if isinstance(tag, str) and tag.lower().startswith("title:"): + new_title = tag.split(":", 1)[1].strip() + break + + if not new_title or not file_path.exists(): + return None + + old_name = file_path.name + old_suffix = file_path.suffix + new_name = f"{new_title}{old_suffix}" + new_path = file_path.with_name(new_name) + + if new_path == file_path: + return None + + def _rename_sidecar(ext: str) -> None: + old_sidecar = file_path.parent / (old_name + ext) + if not old_sidecar.exists(): + return + new_sidecar = file_path.parent / (new_name + ext) + if new_sidecar.exists(): + try: + new_sidecar.unlink() + except Exception as exc: + debug(f"Warning: Could not replace target sidecar {new_sidecar.name}: {exc}", file=sys.stderr) + return + old_sidecar.rename(new_sidecar) + debug(f"Renamed sidecar: {old_sidecar.name} -> {new_sidecar.name}", file=sys.stderr) + + try: + if new_path.exists(): + try: + new_path.unlink() + debug(f"Replaced existing file: {new_name}", file=sys.stderr) + except Exception as exc: + debug(f"Warning: Could not replace target file {new_name}: {exc}", file=sys.stderr) + return None + + file_path.rename(new_path) + debug(f"Renamed file: {old_name} -> {new_name}", file=sys.stderr) + + _rename_sidecar(".tag") + _rename_sidecar(".metadata") + + return new_path + except Exception as exc: + debug(f"Warning: Failed to rename file: {exc}", file=sys.stderr) + return None def write_tags(media_path: Path, tags: Iterable[str], url: Iterable[str], hash_value: Optional[str] = None, db=None) -> None: @@ -2096,26 +2064,7 @@ def apply_tag_mutation(payload: Dict[str, Any], operation: str = 'add') -> Dict[ def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]: - """Extract meaningful metadata tags from yt-dlp entry. - - This is the UNIFIED API for extracting tags from yt-dlp metadata. - All modules (download_data, merge_file, etc.) should use this function - instead of implementing their own extraction logic. - - Extracts meaningful tags (artist, album, creator, genre, track, etc.) - while excluding technical fields (filesize, duration, format, etc.). - - Args: - entry: yt-dlp entry metadata dictionary from download - - Returns: - List of normalized tag strings in format "namespace:value" - - Example: - >>> entry = {'artist': 'The Beatles', 'album': 'Abbey Road', 'duration': 5247} - >>> tags = extract_ytdlp_tags(entry) - >>> debug(tags) - ['artist:The Beatles', 'album:Abbey Road'] + """ """ tags: List[str] = [] seen_namespaces: Set[str] = set() @@ -2186,7 +2135,7 @@ def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]: def dedup_tags_by_namespace(tags: List[str], keep_first: bool = True) -> List[str]: """Deduplicate tags by namespace, keeping consistent order. - This is the UNIFIED API for tag deduplication used across all cmdlets. + This is the UNIFIED API for tag deduplication used across all cmdlet. Replaces custom deduplication logic in merge_file.py and other modules. Groups tags by namespace (e.g., "artist", "album", "tag") and keeps @@ -2345,7 +2294,7 @@ def merge_multiple_tag_lists( def read_tags_from_file(file_path: Path) -> List[str]: """Read and normalize tags from .tag sidecar file. - This is the UNIFIED API for reading .tag files across all cmdlets. + This is the UNIFIED API for reading .tag files across all cmdlet. Handles normalization, deduplication, and format validation. Args: @@ -2397,33 +2346,7 @@ def embed_metadata_in_file( tags: List[str], file_kind: str = '' ) -> bool: - """Embed metadata tags into a media file using FFmpeg. - - Extracts metadata from tags (namespace:value format) and writes to the file's - metadata using FFmpeg with -c copy (no re-encoding). - - Supported tag namespaces: - - title, artist, album, track/track_number, date/year, genre, composer, comment - - For audio files, applies sensible defaults: - - If no album, uses title as album - - If no track, defaults to 1 - - album_artist is set to artist value - - Args: - file_path: Path to media file - tags: List of tags in format ['namespace:value', ...] (e.g., ['artist:Beatles', 'album:Abbey Road']) - file_kind: Type of file: 'audio', 'video', or '' for auto-detect (optional) - - Returns: - True if successful, False otherwise - - Raises: - None (logs errors to stderr) - - Example: - >>> tags = ['artist:Beatles', 'album:Abbey Road', 'track:1'] - >>> success = embed_metadata_in_file(Path('song.mp3'), tags, file_kind='audio') + """ """ if not tags: return True @@ -2550,7 +2473,7 @@ def write_tags_to_file( ) -> bool: """Write tags to .tag sidecar file. - This is the UNIFIED API for writing .tag files across all cmdlets. + This is the UNIFIED API for writing .tag files across all cmdlet. Uses consistent format and handles file creation/overwriting. Args: diff --git a/pipeline.py b/pipeline.py index 249c0c4..425c909 100644 --- a/pipeline.py +++ b/pipeline.py @@ -1,6 +1,6 @@ -"""Pipeline execution context and state management for cmdlets. +"""Pipeline execution context and state management for cmdlet. -This module provides functions for managing pipeline state, allowing cmdlets to +This module provides functions for managing pipeline state, allowing cmdlet to emit results and control printing behavior within a piped execution context. Key Concepts: @@ -76,7 +76,7 @@ _PIPELINE_LAST_SELECTION: List[int] = [] # Track the currently executing command/pipeline string for worker attribution _PIPELINE_COMMAND_TEXT: str = "" -# Shared scratchpad for cmdlets/funacts to stash structured data between stages +# Shared scratchpad for cmdlet/funacts to stash structured data between stages _PIPELINE_VALUES: Dict[str, Any] = {} _PIPELINE_MISSING = object() @@ -128,8 +128,8 @@ def emit(obj: Any) -> None: def emit_list(objects: List[Any]) -> None: """Emit a list of objects to the next pipeline stage. - This allows cmdlets to emit multiple results that are tracked as a list, - enabling downstream cmdlets to process all of them or filter by metadata. + This allows cmdlet to emit multiple results that are tracked as a list, + enabling downstream cmdlet to process all of them or filter by metadata. Args: objects: List of objects to emit @@ -143,7 +143,7 @@ def print_if_visible(*args: Any, file=None, **kwargs: Any) -> None: - Always allow errors printed to stderr by callers (they pass file=sys.stderr). - For normal info messages, this suppresses printing for intermediate pipeline stages. - - Use this instead of log() in cmdlets when you want stage-aware output. + - Use this instead of log() in cmdlet when you want stage-aware output. Args: *args: Arguments to print (same as built-in print) @@ -426,7 +426,7 @@ def get_ui_library_refresh_callback() -> Optional[Any]: def trigger_ui_library_refresh(library_filter: str = 'local') -> None: """Trigger a library refresh in the UI if callback is registered. - This should be called from cmdlets/funacts after content is added to library. + This should be called from cmdlet/funacts after content is added to library. Args: library_filter: Which library to refresh ('local', 'hydrus', etc) @@ -732,7 +732,7 @@ def get_last_result_table_row_selection_args(row_index: int) -> Optional[List[st def set_current_stage_table(result_table: Optional[Any]) -> None: """Store the current pipeline stage table for @N expansion. - Used by cmdlets that display tabular results (e.g., download-data with formats) + Used by cmdlet that display tabular results (e.g., download-data with formats) to make their result table available for @N expansion logic. Does NOT push to history - purely for command expansion in the current pipeline. diff --git a/pyproject.toml b/pyproject.toml index 4ebe258..2a8e02c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -113,7 +113,7 @@ Repository = "https://github.com/yourusername/medeia-macina.git" Issues = "https://github.com/yourusername/medeia-macina/issues" [tool.setuptools] -packages = ["cmdlets", "helper", "TUI", "medeia_macina"] +packages = ["cmdlet", "helper", "TUI", "medeia_macina"] [tool.black] line-length = 100 diff --git a/scripts/remote_storage_server.py b/scripts/remote_storage_server.py index 426856a..c089bc5 100644 --- a/scripts/remote_storage_server.py +++ b/scripts/remote_storage_server.py @@ -34,7 +34,7 @@ server and uses it as a remote storage backend through the RemoteStorageBackend. ## USAGE -After setup, all cmdlets work with the phone: +After setup, all cmdlet work with the phone: $ search-file zohar -store phone $ @1-3 | add-relationship -king @4 -store phone $ @1 | get-relationship -store phone diff --git a/search_file.py b/search_file.py deleted file mode 100644 index 81c0f0c..0000000 --- a/search_file.py +++ /dev/null @@ -1,530 +0,0 @@ -"""Search-file cmdlet: Search for files by query, tag, size, type, duration, etc.""" -from __future__ import annotations - -from typing import Any, Dict, Sequence, List, Optional, Tuple -from pathlib import Path -from dataclasses import dataclass, field -from collections import OrderedDict -import re -import json -import sys - -from SYS.logger import log, debug - -from Provider.registry import get_search_provider - -from cmdlets._shared import Cmdlet, CmdletArg, get_field, should_show_help -import pipeline as ctx - - -def get_origin(obj: Any, default: Any = None) -> Any: - """Return the canonical origin/table identifier from a payload-like object.""" - value = get_field(obj, "origin", None) - if value is not None: - return value - value = get_field(obj, "table", None) - if value is not None: - return value - value = get_field(obj, "store", None) - if value is not None: - return value - return default - -# Optional dependencies -try: - import mutagen # type: ignore -except ImportError: # pragma: no cover - mutagen = None # type: ignore - -try: - from config import get_hydrus_url, resolve_output_dir -except Exception: # pragma: no cover - get_hydrus_url = None # type: ignore - resolve_output_dir = None # type: ignore - -try: - from API.HydrusNetwork import HydrusNetwork, HydrusRequestError -except ImportError: # pragma: no cover - HydrusNetwork = None # type: ignore - HydrusRequestError = RuntimeError # type: ignore - -try: - from SYS.utils import sha256_file -except ImportError: # pragma: no cover - sha256_file = None # type: ignore - -try: - from SYS.utils_constant import mime_maps -except ImportError: # pragma: no cover - mime_maps = {} # type: ignore - -@dataclass(slots=True) -class SearchRecord: - path: str - size_bytes: int | None = None - duration_seconds: str | None = None - tag: str | None = None - hash: str | None = None - - def as_dict(self) -> dict[str, str]: - payload: dict[str, str] = {"path": self.path} - if self.size_bytes is not None: - payload["size"] = str(self.size_bytes) - if self.duration_seconds: - payload["duration"] = self.duration_seconds - if self.tag: - payload["tag"] = self.tag - if self.hash: - payload["hash"] = self.hash - return payload - - -@dataclass -class ResultItem: - table: str # Renamed from origin - title: str - detail: str - annotations: List[str] - target: str - media_kind: str = "other" - hash: Optional[str] = None - columns: List[tuple[str, str]] = field(default_factory=list) - tag_summary: Optional[str] = None - duration_seconds: Optional[float] = None - size_bytes: Optional[int] = None - full_metadata: Optional[Dict[str, Any]] = None - tag: Optional[set[str]] = field(default_factory=set) - relationships: Optional[List[str]] = field(default_factory=list) - known_urls: Optional[List[str]] = field(default_factory=list) - - @property - def origin(self) -> str: - return self.table - - def to_dict(self) -> Dict[str, Any]: - payload: Dict[str, Any] = { - "title": self.title, - } - - # Always include these core fields for downstream cmdlets (get-file, download-data, etc) - payload["table"] = self.table - payload["target"] = self.target - payload["media_kind"] = self.media_kind - - # Always include full_metadata if present (needed by download-data, etc) - # This is NOT for display, but for downstream processing - if self.full_metadata: - payload["full_metadata"] = self.full_metadata - - # Include columns if defined (result renderer will use these for display) - if self.columns: - payload["columns"] = list(self.columns) - else: - # If no columns, include the detail for backwards compatibility - payload["detail"] = self.detail - payload["annotations"] = list(self.annotations) - - # Include optional fields - if self.hash: - payload["hash"] = self.hash - if self.tag_summary: - payload["tag_summary"] = self.tag_summary - if self.tag: - payload["tag"] = list(self.tag) - if self.relationships: - payload["relationships"] = self.relationships - if self.known_urls: - payload["known_urls"] = self.known_urls - return payload - - -STORAGE_ORIGINS = {"local", "hydrus", "debrid"} - - -class Search_File(Cmdlet): - """Class-based search-file cmdlet with self-registration.""" - - def __init__(self) -> None: - super().__init__( - name="search-file", - summary="Unified search cmdlet for storage (Hydrus, Local) and providers (Debrid, LibGen, OpenLibrary, Soulseek).", - usage="search-file [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-store BACKEND] [-provider PROVIDER]", - arg=[ - CmdletArg("query", description="Search query string"), - CmdletArg("tag", description="Filter by tag (can be used multiple times)"), - CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"), - CmdletArg("type", description="Filter by type: audio, video, image, document"), - CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"), - CmdletArg("limit", type="integer", description="Limit results (default: 45)"), - CmdletArg("store", description="Search storage backend: hydrus, local (default: all searchable storages)"), - CmdletArg("provider", description="Search provider: libgen, openlibrary, soulseek, debrid, local (overrides -storage)"), - ], - detail=[ - "Search across storage (Hydrus, Local) and providers (Debrid, LibGen, OpenLibrary, Soulseek)", - "Use -provider to search a specific source, or -store to search file backends", - "Filter results by: tag, size, type, duration", - "Results can be piped to other commands", - "Examples:", - "search-file foo # Search all file backends", - "search-file -provider libgen 'python programming' # Search LibGen books", - "search-file -provider debrid 'movie' # Search AllDebrid magnets", - "search-file 'music' -provider soulseek # Search Soulseek P2P", - "search-file -provider openlibrary 'tolkien' # Search OpenLibrary", - "search-file song -store hydrus -type audio # Search only Hydrus audio", - "search-file movie -tag action -provider debrid # Debrid with filters", - ], - exec=self.run, - ) - self.register() - - # --- Helper methods ------------------------------------------------- - @staticmethod - def _normalize_extension(ext_value: Any) -> str: - """Sanitize extension strings to alphanumerics and cap at 5 chars.""" - ext = str(ext_value or "").strip().lstrip(".") - for sep in (" ", "|", "(", "[", "{", ",", ";"): - if sep in ext: - ext = ext.split(sep, 1)[0] - break - if "." in ext: - ext = ext.split(".")[-1] - ext = "".join(ch for ch in ext if ch.isalnum()) - return ext[:5] - - def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]: - """Ensure storage results have the necessary fields for result_table display.""" - store_value = str(get_origin(payload, "") or "").lower() - if store_value not in STORAGE_ORIGINS: - return payload - - # Ensure we have title field - if "title" not in payload: - payload["title"] = payload.get("name") or payload.get("target") or payload.get("path") or "Result" - - # Ensure we have ext field - if "ext" not in payload: - title = str(payload.get("title", "")) - path_obj = Path(title) - if path_obj.suffix: - payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip('.')) - else: - payload["ext"] = payload.get("ext", "") - - # Ensure size_bytes is present for display (already set by search_file()) - # result_table will handle formatting it - - # Don't create manual columns - let result_table handle display - # This allows the table to respect max_columns and apply consistent formatting - return payload - - # --- Execution ------------------------------------------------------ - def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: - """Search across multiple providers: Hydrus, Local, Debrid, LibGen, etc.""" - if should_show_help(args): - log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}") - return 0 - - args_list = [str(arg) for arg in (args or [])] - - # Parse arguments - query = "" - tag_filters: List[str] = [] - size_filter: Optional[Tuple[str, int]] = None - duration_filter: Optional[Tuple[str, float]] = None - type_filter: Optional[str] = None - storage_backend: Optional[str] = None - provider_name: Optional[str] = None - limit = 45 - searched_backends: List[str] = [] - - i = 0 - while i < len(args_list): - arg = args_list[i] - low = arg.lower() - if low in {"-provider", "--provider"} and i + 1 < len(args_list): - provider_name = args_list[i + 1].lower() - i += 2 - elif low in {"-store", "--store", "-storage", "--storage"} and i + 1 < len(args_list): - storage_backend = args_list[i + 1].lower() - i += 2 - elif low in {"-tag", "--tag"} and i + 1 < len(args_list): - tag_filters.append(args_list[i + 1]) - i += 2 - elif low in {"-limit", "--limit"} and i + 1 < len(args_list): - try: - limit = int(args_list[i + 1]) - except ValueError: - limit = 100 - i += 2 - elif low in {"-type", "--type"} and i + 1 < len(args_list): - type_filter = args_list[i + 1].lower() - i += 2 - elif not arg.startswith("-"): - query = f"{query} {arg}".strip() if query else arg - i += 1 - else: - i += 1 - - store_filter: Optional[str] = None - if query: - match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE) - if match: - store_filter = match.group(1).strip().lower() or None - query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE) - query = re.sub(r"\s{2,}", " ", query) - query = query.strip().strip(',') - - if storage_backend and storage_backend.lower() == "debrid": - log("Use -provider debrid instead of -store debrid (debrid is provider-only)", file=sys.stderr) - return 1 - - if store_filter and not provider_name and not storage_backend: - if store_filter in {"hydrus", "local", "debrid"}: - storage_backend = store_filter - - # --- Feature: Filter provider result table by Name column --- - filter_after_search: Optional[str] = None - if result: - actual_result = result[0] if isinstance(result, list) and result else result - origin = get_origin(actual_result) - target = get_field(actual_result, 'target') - - # If the incoming result is from a provider (not storage) AND this invocation looks like a filter (no flags) - positional_args = [a for a in args_list if not a.startswith('-')] - no_flags = len(positional_args) == len(args_list) - looks_like_filter = no_flags and len(positional_args) == 1 and not provider_name and not storage_backend and not tag_filters and not size_filter and not duration_filter and not type_filter - - if origin and origin.lower() not in STORAGE_ORIGINS and looks_like_filter and query: - # Save the filter string to apply AFTER loading the provider data - filter_after_search = query.strip() - query = "" # Clear query so we load the target URL instead - - # If result is from a provider, extract the target as query and set provider - if not query: - if origin == 'bandcamp' and target: - query = target - if not provider_name: - provider_name = 'bandcamp' - elif origin == 'youtube' and target: - query = target - if not provider_name: - provider_name = 'youtube' - elif target and str(target).startswith(('http://', 'https://')): - query = target - if not provider_name: - if 'bandcamp.com' in target: - provider_name = 'bandcamp' - elif 'youtube.com' in target or 'youtu.be' in target: - provider_name = 'youtube' - - if not query: - log("Provide a search query", file=sys.stderr) - return 1 - - from API.folder import API_folder_store - from config import get_local_storage_path - import uuid - worker_id = str(uuid.uuid4()) - library_root = get_local_storage_path(config or {}) - if not library_root: - log("No library root configured", file=sys.stderr) - return 1 - - db = None - try: - db = API_folder_store(library_root) - db.insert_worker( - worker_id, - "search", - title=f"Search: {query}", - description=f"Query: {query}", - pipe=ctx.get_current_command_text() - ) - - results_list = [] - import result_table - import importlib - importlib.reload(result_table) - from result_table import ResultTable - - table_title = f"Search: {query}" - if provider_name: - table_title += f" [{provider_name}]" - elif storage_backend: - table_title += f" [{storage_backend}]" - - preserve_order = provider_name and provider_name.lower() in ('youtube', 'openlibrary') - # Avoid setting source_command so @N does not re-run search-file; preserve row order when needed - table = ResultTable(table_title).set_preserve_order(preserve_order) - - if provider_name: - debug(f"[search_file] Attempting provider search with: {provider_name}") - provider = get_search_provider(provider_name, config) - if not provider: - log(f"Provider '{provider_name}' not available", file=sys.stderr) - db.update_worker_status(worker_id, 'error') - return 1 - - debug(f"[search_file] Provider loaded, calling search with query: {query}") - search_result = provider.search(query, limit=limit) - debug(f"[search_file] Provider search returned {len(search_result)} results") - - # Apply post-search filter if one was set - if filter_after_search: - debug(f"[search_file] Applying filter: {filter_after_search}") - filtered_result = [] - for item in search_result: - item_dict = item.to_dict() if hasattr(item, 'to_dict') else dict(item) - title_val = get_field(item_dict, 'title') or get_field(item_dict, 'name') or "" - if filter_after_search.lower() in str(title_val).lower(): - filtered_result.append(item) - search_result = filtered_result - if not search_result: - log(f"No results match filter: '{filter_after_search}'", file=sys.stderr) - db.update_worker_status(worker_id, 'completed') - return 0 - debug(f"[search_file] Filter matched {len(search_result)} results") - table.title = f"Filter: {filter_after_search}" - - for item in search_result: - table.add_result(item) - item_dict = item.to_dict() - results_list.append(item_dict) - ctx.emit(item_dict) - - ctx.set_last_result_table(table, results_list) - debug(f"[search_file] Emitted {len(results_list)} results") - db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2)) - db.update_worker_status(worker_id, 'completed') - return 0 - - from Store import Store - storage = Store(config=config or {}, suppress_debug=True) - - backend_to_search = storage_backend or None - if backend_to_search: - if backend_to_search == "hydrus": - from API.HydrusNetwork import is_hydrus_available - if not is_hydrus_available(config or {}): - log(f"Backend 'hydrus' is not available (Hydrus service not running)", file=sys.stderr) - db.update_worker_status(worker_id, 'error') - return 1 - searched_backends.append(backend_to_search) - target_backend = storage[backend_to_search] - results = target_backend.search(query, limit=limit) - else: - from API.HydrusNetwork import is_hydrus_available - hydrus_available = is_hydrus_available(config or {}) - - all_results = [] - for backend_name in storage.list_searchable_backends(): - if backend_name == "hydrus" and not hydrus_available: - continue - searched_backends.append(backend_name) - try: - backend_results = storage[backend_name].search(query, limit=limit - len(all_results)) - if backend_results: - all_results.extend(backend_results) - if len(all_results) >= limit: - break - except Exception as exc: - log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr) - results = all_results[:limit] - - if not provider_name and not storage_backend: - try: - debrid_provider = get_search_provider("debrid", config) - if debrid_provider and debrid_provider.validate(): - remaining = max(0, limit - len(results)) if isinstance(results, list) else limit - if remaining > 0: - debrid_results = debrid_provider.search(query, limit=remaining) - if debrid_results: - if "debrid" not in searched_backends: - searched_backends.append("debrid") - if results is None: - results = [] - results.extend(debrid_results) - except Exception as exc: - log(f"Debrid provider search failed: {exc}", file=sys.stderr) - - def _format_storage_label(name: str) -> str: - clean = str(name or "").strip() - if not clean: - return "Unknown" - return clean.replace("_", " ").title() - - storage_counts: OrderedDict[str, int] = OrderedDict((name, 0) for name in searched_backends) - for item in results or []: - origin = get_origin(item) - if not origin: - continue - key = str(origin).lower() - if key not in storage_counts: - storage_counts[key] = 0 - storage_counts[key] += 1 - - if storage_counts or query: - display_counts = OrderedDict((_format_storage_label(name), count) for name, count in storage_counts.items()) - summary_line = table.set_storage_summary(display_counts, query, inline=True) - if summary_line: - table.title = summary_line - - if results: - for item in results: - def _as_dict(obj: Any) -> Dict[str, Any]: - if isinstance(obj, dict): - return dict(obj) - if hasattr(obj, "to_dict") and callable(getattr(obj, "to_dict")): - return obj.to_dict() # type: ignore[arg-type] - return {"title": str(obj)} - - item_dict = _as_dict(item) - if store_filter: - origin_val = str(get_origin(item_dict) or "").lower() - if store_filter != origin_val: - continue - normalized = self._ensure_storage_columns(item_dict) - - # Make hash/store available for downstream cmdlets without rerunning search-file - hash_val = normalized.get("hash") - store_val = normalized.get("store") or get_origin(item_dict) - if hash_val and not normalized.get("hash"): - normalized["hash"] = hash_val - if store_val and not normalized.get("store"): - normalized["store"] = store_val - - table.add_result(normalized) - - results_list.append(normalized) - ctx.emit(normalized) - - ctx.set_last_result_table(table, results_list) - db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2)) - else: - log("No results found", file=sys.stderr) - db.append_worker_stdout(worker_id, json.dumps([], indent=2)) - - db.update_worker_status(worker_id, 'completed') - return 0 - - except Exception as exc: - log(f"Search failed: {exc}", file=sys.stderr) - import traceback - traceback.print_exc(file=sys.stderr) - if db: - try: - db.update_worker_status(worker_id, 'error') - except Exception: - pass - return 1 - - finally: - if db: - try: - db.close() - except Exception: - pass - - -CMDLET = Search_File() -