This commit is contained in:
nose
2025-12-12 21:55:38 -08:00
parent e2ffcab030
commit 85750247cc
78 changed files with 5726 additions and 6239 deletions

View File

@@ -388,25 +388,55 @@ class HydrusNetwork:
results[file_hash] = self._post("/add_url/associate_url", data=body)
return {"batched": results}
def set_notes(self, file_hashes: Union[str, Iterable[str]], notes: dict[str, str], service_name: str) -> dict[str, Any]:
def set_notes(
self,
file_hash: str,
notes: dict[str, str],
*,
merge_cleverly: bool = False,
extend_existing_note_if_possible: bool = True,
conflict_resolution: int = 3,
) -> dict[str, Any]:
"""Add or update notes associated with a file.
Hydrus Client API: POST /add_notes/set_notes
Required JSON args: {"hash": <sha256 hex>, "notes": {name: text}}
"""
if not notes:
raise ValueError("notes mapping must not be empty")
hashes = self._ensure_hashes(file_hashes)
body = {"hashes": hashes, "service_names_to_notes": {service_name: notes}}
file_hash = str(file_hash or "").strip().lower()
if not file_hash:
raise ValueError("file_hash must not be empty")
body: dict[str, Any] = {"hash": file_hash, "notes": notes}
if merge_cleverly:
body["merge_cleverly"] = True
body["extend_existing_note_if_possible"] = bool(extend_existing_note_if_possible)
body["conflict_resolution"] = int(conflict_resolution)
return self._post("/add_notes/set_notes", data=body)
def delete_notes(
self,
file_hashes: Union[str, Iterable[str]],
file_hash: str,
note_names: Sequence[str],
service_name: str,
) -> dict[str, Any]:
names = [name for name in note_names if name]
"""Delete notes associated with a file.
Hydrus Client API: POST /add_notes/delete_notes
Required JSON args: {"hash": <sha256 hex>, "note_names": [..]}
"""
names = [str(name) for name in note_names if str(name or "").strip()]
if not names:
raise ValueError("note_names must not be empty")
hashes = self._ensure_hashes(file_hashes)
body = {"hashes": hashes, "service_names_to_deleted_note_names": {service_name: names}}
return self._post("/add_notes/set_notes", data=body)
file_hash = str(file_hash or "").strip().lower()
if not file_hash:
raise ValueError("file_hash must not be empty")
body = {"hash": file_hash, "note_names": names}
return self._post("/add_notes/delete_notes", data=body)
def get_file_relationships(self, file_hash: str) -> dict[str, Any]:
query = {"hash": file_hash}

View File

@@ -804,7 +804,7 @@ def unlock_link_cmdlet(result: Any, args: Sequence[str], config: Dict[str, Any])
def _register_unlock_link():
"""Register unlock-link command with cmdlet registry if available."""
try:
from cmdlets import register
from cmdlet import register
@register(["unlock-link"])
def unlock_link_wrapper(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
@@ -821,7 +821,7 @@ def _register_unlock_link():
return unlock_link_wrapper
except ImportError:
# If cmdlets module not available, just return None
# If cmdlet module not available, just return None
return None

View File

@@ -7,7 +7,7 @@ This module provides low-level functions for interacting with Archive.org:
- Image downloading and deobfuscation
- PDF creation with metadata
Used by unified_book_downloader.py for the borrowing workflow.
Used by Provider/openlibrary.py for the borrowing workflow.
"""
from __future__ import annotations

View File

@@ -231,11 +231,13 @@ class API_folder_store:
cursor.execute("""
CREATE TABLE IF NOT EXISTS notes (
hash TEXT PRIMARY KEY NOT NULL,
hash TEXT NOT NULL,
name TEXT NOT NULL,
note TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (hash) REFERENCES files(hash) ON DELETE CASCADE
FOREIGN KEY (hash) REFERENCES files(hash) ON DELETE CASCADE,
PRIMARY KEY (hash, name)
)
""")
@@ -261,6 +263,11 @@ class API_folder_store:
cursor.execute("CREATE INDEX IF NOT EXISTS idx_worker_type ON worker(worker_type)")
self._migrate_metadata_schema(cursor)
self._migrate_notes_schema(cursor)
# Notes indices (after migration so columns exist)
cursor.execute("CREATE INDEX IF NOT EXISTS idx_notes_hash ON notes(hash)")
cursor.execute("CREATE INDEX IF NOT EXISTS idx_notes_name ON notes(name)")
self.connection.commit()
logger.debug("Database tables created/verified")
@@ -449,6 +456,42 @@ class API_folder_store:
except Exception as e:
logger.debug(f"Note: Schema import/migration completed with status: {e}")
def _migrate_notes_schema(self, cursor) -> None:
"""Migrate legacy notes schema (hash PRIMARY KEY, note) to named notes (hash,name PRIMARY KEY)."""
try:
cursor.execute("PRAGMA table_info(notes)")
cols = [row[1] for row in cursor.fetchall()]
if not cols:
return
if "name" in cols:
return
logger.info("Migrating legacy notes table to named notes schema")
cursor.execute("""
CREATE TABLE IF NOT EXISTS notes_new (
hash TEXT NOT NULL,
name TEXT NOT NULL,
note TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (hash) REFERENCES files(hash) ON DELETE CASCADE,
PRIMARY KEY (hash, name)
)
""")
# Copy existing notes into the default key
cursor.execute("""
INSERT INTO notes_new (hash, name, note, created_at, updated_at)
SELECT hash, 'default', note, created_at, updated_at
FROM notes
""")
cursor.execute("DROP TABLE notes")
cursor.execute("ALTER TABLE notes_new RENAME TO notes")
self.connection.commit()
except Exception as exc:
logger.debug(f"Notes schema migration skipped/failed: {exc}")
def _update_metadata_modified_time(self, file_hash: str) -> None:
"""Update the time_modified timestamp for a file's metadata."""
try:
@@ -1052,41 +1095,79 @@ class API_folder_store:
return []
def get_note(self, file_hash: str) -> Optional[str]:
"""Get note for a file by hash."""
"""Get the default note for a file by hash."""
try:
cursor = self.connection.cursor()
cursor.execute("""
SELECT n.note FROM notes n
WHERE n.hash = ?
""", (file_hash,))
row = cursor.fetchone()
return row[0] if row else None
notes = self.get_notes(file_hash)
if not notes:
return None
return notes.get("default")
except Exception as e:
logger.error(f"Error getting note for hash {file_hash}: {e}", exc_info=True)
return None
def save_note(self, file_path: Path, note: str) -> None:
"""Save note for a file."""
def get_notes(self, file_hash: str) -> Dict[str, str]:
"""Get all notes for a file by hash."""
try:
cursor = self.connection.cursor()
cursor.execute(
"SELECT name, note FROM notes WHERE hash = ? ORDER BY name ASC",
(file_hash,),
)
out: Dict[str, str] = {}
for name, note in cursor.fetchall() or []:
if not name:
continue
out[str(name)] = str(note or "")
return out
except Exception as e:
logger.error(f"Error getting notes for hash {file_hash}: {e}", exc_info=True)
return {}
def save_note(self, file_path: Path, note: str) -> None:
"""Save the default note for a file."""
self.set_note(file_path, "default", note)
def set_note(self, file_path: Path, name: str, note: str) -> None:
"""Set a named note for a file."""
try:
note_name = str(name or "").strip()
if not note_name:
raise ValueError("Note name is required")
file_hash = self.get_or_create_file_entry(file_path)
cursor = self.connection.cursor()
cursor.execute("""
INSERT INTO notes (hash, note)
VALUES (?, ?)
ON CONFLICT(hash) DO UPDATE SET
cursor.execute(
"""
INSERT INTO notes (hash, name, note)
VALUES (?, ?, ?)
ON CONFLICT(hash, name) DO UPDATE SET
note = excluded.note,
updated_at = CURRENT_TIMESTAMP
""", (file_hash, note))
""",
(file_hash, note_name, note),
)
self.connection.commit()
logger.debug(f"Saved note for {file_path}")
logger.debug(f"Saved note '{note_name}' for {file_path}")
except Exception as e:
logger.error(f"Error saving note for {file_path}: {e}", exc_info=True)
raise
def delete_note(self, file_hash: str, name: str) -> None:
"""Delete a named note for a file by hash."""
try:
note_name = str(name or "").strip()
if not note_name:
raise ValueError("Note name is required")
cursor = self.connection.cursor()
cursor.execute(
"DELETE FROM notes WHERE hash = ? AND name = ?",
(file_hash, note_name),
)
self.connection.commit()
except Exception as e:
logger.error(f"Error deleting note '{name}' for hash {file_hash}: {e}", exc_info=True)
raise
def search_by_tag(self, tag: str, limit: int = 100) -> List[tuple]:
"""Search for files with a specific tag. Returns list of (hash, file_path) tuples."""
try:
@@ -2027,7 +2108,7 @@ def migrate_tags_to_db(library_root: Path, db: API_folder_store) -> int:
try:
for tags_file in library_root.rglob("*.tag"):
try:
base_path = tags_file.with_suffix("")
base_path = tags_file.with_suffix("")
tags_text = tags_file.read_text(encoding='utf-8')
tags = [line.strip() for line in tags_text.splitlines() if line.strip()]

76
CLI.py
View File

@@ -68,7 +68,7 @@ from typing import Callable
from config import get_local_storage_path, load_config
from cmdlets.catalog import (
from cmdlet.catalog import (
import_cmd_module as _catalog_import_cmd_module,
list_cmdlet_metadata as _catalog_list_cmdlet_metadata,
list_cmdlet_names as _catalog_list_cmdlet_names,
@@ -482,7 +482,7 @@ def _get_cmdlet_names() -> List[str]:
def _import_cmd_module(mod_name: str):
"""Import a cmdlet/native module from cmdlets or cmdnats packages."""
"""Import a cmdlet/native module from cmdlet or cmdnat packages."""
try:
return _catalog_import_cmd_module(mod_name)
except Exception:
@@ -518,7 +518,7 @@ def _get_arg_choices(cmd_name: str, arg_name: str) -> List[str]:
# Dynamic search providers
if normalized_arg == "provider":
try:
from Provider.registry import list_search_providers
from ProviderCore.registry import list_search_providers
providers = list_search_providers(_load_cli_config())
available = [name for name, is_ready in providers.items() if is_ready]
provider_choices = sorted(available) if available else sorted(providers.keys())
@@ -607,9 +607,23 @@ if (
return
arg_names = _get_cmdlet_args(cmd_name)
logical_seen: Set[str] = set()
for arg in arg_names:
if arg.lower().startswith(current_token):
arg_low = arg.lower()
# If the user has only typed '-', prefer single-dash flags (e.g. -url)
# and avoid suggesting both -name and --name for the same logical arg.
if current_token == "-" and arg_low.startswith("--"):
continue
logical = arg.lstrip("-").lower()
if current_token == "-" and logical in logical_seen:
continue
if arg_low.startswith(current_token):
yield CompletionType(arg, start_position=-len(current_token))
if current_token == "-":
logical_seen.add(logical)
if "--help".startswith(current_token):
yield CompletionType("--help", start_position=-len(current_token))
@@ -715,10 +729,21 @@ def _create_cmdlet_cli():
print(f"Error parsing seeds JSON: {e}")
return
try:
from cli_syntax import validate_pipeline_text
syntax_error = validate_pipeline_text(command)
if syntax_error:
print(syntax_error.message, file=sys.stderr)
return
except Exception:
# Best-effort only; if validator can't load, fall back to shlex handling below.
pass
try:
tokens = shlex.split(command)
except ValueError:
tokens = command.split()
except ValueError as exc:
print(f"Syntax error: {exc}", file=sys.stderr)
return
if not tokens:
return
@@ -728,7 +753,7 @@ def _create_cmdlet_cli():
@app.command("repl")
def repl():
"""Start interactive REPL for cmdlets with autocomplete."""
"""Start interactive REPL for cmdlet with autocomplete."""
banner = """
Medeia-Macina
=====================
@@ -967,11 +992,22 @@ def _create_cmdlet_cli():
except Exception:
pipeline_ctx_ref = None
try:
from cli_syntax import validate_pipeline_text
syntax_error = validate_pipeline_text(user_input)
if syntax_error:
print(syntax_error.message, file=sys.stderr)
continue
except Exception:
# Best-effort only; if validator can't load, continue with shlex.
pass
try:
import shlex
tokens = shlex.split(user_input)
except ValueError:
tokens = user_input.split()
except ValueError as exc:
print(f"Syntax error: {exc}", file=sys.stderr)
continue
if not tokens:
continue
@@ -1078,12 +1114,12 @@ def _create_cmdlet_cli():
def _execute_pipeline(tokens: list):
"""Execute a pipeline of cmdlets separated by pipes (|).
"""Execute a pipeline of cmdlet separated by pipes (|).
Example: cmd1 arg1 arg2 | cmd2 arg2 | cmd3 arg3
"""
try:
from cmdlets import REGISTRY
from cmdlet import REGISTRY
import json
import pipeline as ctx
@@ -1333,7 +1369,7 @@ def _execute_pipeline(tokens: list):
filtered = [resolved_items[i] for i in first_stage_selection_indices if 0 <= i < len(resolved_items)]
if filtered:
# Convert filtered items to PipeObjects for consistent pipeline handling
from cmdlets._shared import coerce_to_pipe_object
from cmdlet._shared import coerce_to_pipe_object
filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered]
piped_result = filtered_pipe_objs if len(filtered_pipe_objs) > 1 else filtered_pipe_objs[0]
# Build log message with proper string conversion
@@ -1529,7 +1565,7 @@ def _execute_pipeline(tokens: list):
filtered = [resolved_list[i] for i in selection_indices if 0 <= i < len(resolved_list)]
if filtered:
# Convert filtered items to PipeObjects for consistent pipeline handling
from cmdlets._shared import coerce_to_pipe_object
from cmdlet._shared import coerce_to_pipe_object
filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered]
piped_result = filtered_pipe_objs if len(filtered_pipe_objs) > 1 else filtered_pipe_objs[0]
print(f"Selected {len(filtered)} item(s) using {cmd_name}")
@@ -1817,13 +1853,13 @@ def _execute_cmdlet(cmd_name: str, args: list):
- @{1,3,5} - select rows 1, 3, 5
"""
try:
from cmdlets import REGISTRY
from cmdlet import REGISTRY
import json
import pipeline as ctx
# Ensure native commands (cmdnats) are loaded
# Ensure native commands (cmdnat) are loaded
try:
from cmdlets.catalog import ensure_registry_loaded as _ensure_registry_loaded
from cmdlet.catalog import ensure_registry_loaded as _ensure_registry_loaded
_ensure_registry_loaded()
except Exception:
pass
@@ -1832,7 +1868,7 @@ def _execute_cmdlet(cmd_name: str, args: list):
cmd_fn = REGISTRY.get(cmd_name)
if not cmd_fn:
# Attempt lazy import of the module and retry
from cmdlets.catalog import import_cmd_module as _catalog_import
from cmdlet.catalog import import_cmd_module as _catalog_import
try:
mod = _catalog_import(cmd_name)
data = getattr(mod, "CMDLET", None) if mod else None
@@ -1893,7 +1929,7 @@ def _execute_cmdlet(cmd_name: str, args: list):
# Filter to selected indices only
result = [piped_items[idx] for idx in selected_indices if 0 <= idx < len(piped_items)]
else:
# No selection specified, pass all items (cmdlets handle lists via normalize_result_input)
# No selection specified, pass all items (cmdlet handle lists via normalize_result_input)
result = piped_items
worker_manager = _ensure_worker_manager(config)
@@ -2038,10 +2074,10 @@ def _execute_cmdlet(cmd_name: str, args: list):
def _show_cmdlet_list():
"""Display available cmdlets with full metadata: cmd:name alias:aliases args:args."""
"""Display available cmdlet with full metadata: cmd:name alias:aliases args:args."""
try:
metadata = _catalog_list_cmdlet_metadata()
print("\nAvailable cmdlets:")
print("\nAvailable cmdlet:")
for cmd_name in sorted(metadata.keys()):
info = metadata[cmd_name]
aliases = info.get("aliases", [])

View File

@@ -4,6 +4,31 @@ local msg = require 'mp.msg'
local M = {}
-- Lyrics overlay toggle
-- The Python helper (python -m MPV.lyric) will read this property via IPC.
local LYRIC_VISIBLE_PROP = "user-data/medeia-lyric-visible"
local function lyric_get_visible()
local ok, v = pcall(mp.get_property_native, LYRIC_VISIBLE_PROP)
if not ok or v == nil then
return true
end
return v and true or false
end
local function lyric_set_visible(v)
pcall(mp.set_property_native, LYRIC_VISIBLE_PROP, v and true or false)
end
local function lyric_toggle()
local now = not lyric_get_visible()
lyric_set_visible(now)
mp.osd_message("Lyrics: " .. (now and "on" or "off"), 1)
end
-- Default to visible unless user overrides.
lyric_set_visible(true)
-- Configuration
local opts = {
python_path = "python",
@@ -138,4 +163,8 @@ mp.add_key_binding("mbtn_right", "medios-menu-right-click", M.show_menu)
mp.add_key_binding("ctrl+i", "medios-info", M.get_file_info)
mp.add_key_binding("ctrl+del", "medios-delete", M.delete_current_file)
-- Lyrics toggle (requested: 'L')
mp.add_key_binding("l", "medeia-lyric-toggle", lyric_toggle)
mp.add_key_binding("L", "medeia-lyric-toggle-shift", lyric_toggle)
return M

1195
MPV/lyric.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -12,6 +12,7 @@ import os
import platform
import socket
import subprocess
import sys
import time as _time
from pathlib import Path
from typing import Any, Dict, Optional, List, BinaryIO, cast
@@ -24,6 +25,88 @@ FIXED_IPC_PIPE_NAME = "mpv-medeia-macina"
MPV_LUA_SCRIPT_PATH = str(Path(__file__).resolve().parent / "LUA" / "main.lua")
_LYRIC_PROCESS: Optional[subprocess.Popen] = None
_LYRIC_LOG_FH: Optional[Any] = None
def _windows_list_lyric_helper_pids(ipc_path: str) -> List[int]:
"""Return PIDs of `python -m MPV.lyric --ipc <ipc_path>` helpers (Windows only)."""
if platform.system() != "Windows":
return []
try:
ipc_path = str(ipc_path or "")
except Exception:
ipc_path = ""
if not ipc_path:
return []
# Use CIM to query command lines; output as JSON for robust parsing.
# Note: `ConvertTo-Json` returns a number for single item, array for many, or null.
ps_script = (
"$ipc = "
+ json.dumps(ipc_path)
+ "; "
"Get-CimInstance Win32_Process | "
"Where-Object { $_.CommandLine -and $_.CommandLine -match ' -m\\s+MPV\\.lyric(\\s|$)' -and $_.CommandLine -match ('--ipc\\s+' + [regex]::Escape($ipc)) } | "
"Select-Object -ExpandProperty ProcessId | ConvertTo-Json -Compress"
)
try:
out = subprocess.check_output(
["powershell", "-NoProfile", "-Command", ps_script],
stdin=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
timeout=2,
text=True,
)
except Exception:
return []
txt = (out or "").strip()
if not txt or txt == "null":
return []
try:
obj = json.loads(txt)
except Exception:
return []
pids: List[int] = []
if isinstance(obj, list):
for v in obj:
try:
pids.append(int(v))
except Exception:
pass
else:
try:
pids.append(int(obj))
except Exception:
pass
# De-dupe and filter obvious junk.
uniq: List[int] = []
for pid in pids:
if pid and pid > 0 and pid not in uniq:
uniq.append(pid)
return uniq
def _windows_kill_pids(pids: List[int]) -> None:
if platform.system() != "Windows":
return
for pid in pids or []:
try:
subprocess.run(
["taskkill", "/PID", str(int(pid)), "/F"],
stdin=subprocess.DEVNULL,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL,
timeout=2,
)
except Exception:
continue
class MPVIPCError(Exception):
"""Raised when MPV IPC communication fails."""
pass
@@ -38,7 +121,7 @@ class MPV:
- Query playlist and currently playing item via IPC
This class intentionally stays "dumb": it does not implement app logic.
App behavior is driven by cmdlets (e.g. `.pipe`) and the bundled Lua script.
App behavior is driven by cmdlet (e.g. `.pipe`) and the bundled Lua script.
"""
def __init__(
@@ -55,11 +138,11 @@ class MPV:
lua_path = Path(str(lua_script_path)).resolve()
self.lua_script_path = str(lua_path)
def client(self) -> "MPVIPCClient":
return MPVIPCClient(socket_path=self.ipc_path, timeout=self.timeout)
def client(self, silent: bool = False) -> "MPVIPCClient":
return MPVIPCClient(socket_path=self.ipc_path, timeout=self.timeout, silent=bool(silent))
def is_running(self) -> bool:
client = self.client()
client = self.client(silent=True)
try:
ok = client.connect()
return bool(ok)
@@ -67,7 +150,7 @@ class MPV:
client.disconnect()
def send(self, command: Dict[str, Any] | List[Any], silent: bool = False) -> Optional[Dict[str, Any]]:
client = self.client()
client = self.client(silent=bool(silent))
try:
if not client.connect():
return None
@@ -136,9 +219,109 @@ class MPV:
except Exception:
return
def ensure_lyric_loader_running(self) -> None:
"""Start (or keep) the Python lyric overlay helper.
Uses the fixed IPC pipe name so it can follow playback.
"""
global _LYRIC_PROCESS, _LYRIC_LOG_FH
# Cross-session guard (Windows): avoid spawning multiple helpers across separate CLI runs.
# Also clean up stale helpers when mpv isn't running anymore.
if platform.system() == "Windows":
try:
existing = _windows_list_lyric_helper_pids(str(self.ipc_path))
if existing:
if not self.is_running():
_windows_kill_pids(existing)
return
# If multiple exist, kill them and start fresh (prevents double overlays).
if len(existing) == 1:
return
_windows_kill_pids(existing)
except Exception:
pass
try:
if _LYRIC_PROCESS is not None and _LYRIC_PROCESS.poll() is None:
return
except Exception:
pass
try:
if _LYRIC_PROCESS is not None:
try:
_LYRIC_PROCESS.terminate()
except Exception:
pass
finally:
_LYRIC_PROCESS = None
try:
if _LYRIC_LOG_FH is not None:
_LYRIC_LOG_FH.close()
except Exception:
pass
_LYRIC_LOG_FH = None
try:
try:
tmp_dir = Path(os.environ.get("TEMP") or os.environ.get("TMP") or ".")
except Exception:
tmp_dir = Path(".")
log_path = str((tmp_dir / "medeia-mpv-lyric.log").resolve())
# Ensure the module can be imported even when the app is launched from a different cwd.
# Repo root = parent of the MPV package directory.
try:
repo_root = Path(__file__).resolve().parent.parent
except Exception:
repo_root = Path.cwd()
cmd: List[str] = [
sys.executable,
"-m",
"MPV.lyric",
"--ipc",
str(self.ipc_path),
"--log",
log_path,
]
# Redirect helper stdout/stderr to the log file so we can see crashes/import errors.
try:
_LYRIC_LOG_FH = open(log_path, "a", encoding="utf-8", errors="replace")
except Exception:
_LYRIC_LOG_FH = None
kwargs: Dict[str, Any] = {
"stdin": subprocess.DEVNULL,
"stdout": _LYRIC_LOG_FH or subprocess.DEVNULL,
"stderr": _LYRIC_LOG_FH or subprocess.DEVNULL,
}
# Ensure immediate flushing to the log file.
env = os.environ.copy()
env["PYTHONUNBUFFERED"] = "1"
try:
existing_pp = env.get("PYTHONPATH")
env["PYTHONPATH"] = str(repo_root) if not existing_pp else (str(repo_root) + os.pathsep + str(existing_pp))
except Exception:
pass
kwargs["env"] = env
# Make the current directory the repo root so `-m MPV.lyric` resolves reliably.
kwargs["cwd"] = str(repo_root)
if platform.system() == "Windows":
kwargs["creationflags"] = 0x00000008 # DETACHED_PROCESS
_LYRIC_PROCESS = subprocess.Popen(cmd, **kwargs)
debug(f"Lyric loader started (log={log_path})")
except Exception as exc:
debug(f"Lyric loader failed to start: {exc}")
def wait_for_ipc(self, retries: int = 20, delay_seconds: float = 0.2) -> bool:
for _ in range(max(1, retries)):
client = self.client()
client = self.client(silent=True)
try:
if client.connect():
return True
@@ -233,7 +416,7 @@ class MPVIPCClient:
It handles platform-specific differences (Windows named pipes vs Unix sockets).
"""
def __init__(self, socket_path: Optional[str] = None, timeout: float = 5.0):
def __init__(self, socket_path: Optional[str] = None, timeout: float = 5.0, silent: bool = False):
"""Initialize MPV IPC client.
Args:
@@ -244,6 +427,7 @@ class MPVIPCClient:
self.socket_path = socket_path or get_ipc_pipe_path()
self.sock: socket.socket | BinaryIO | None = None
self.is_windows = platform.system() == "Windows"
self.silent = bool(silent)
def connect(self) -> bool:
"""Connect to mpv IPC socket.
@@ -259,17 +443,20 @@ class MPVIPCClient:
self.sock = open(self.socket_path, 'r+b', buffering=0)
return True
except (OSError, IOError) as exc:
debug(f"Failed to connect to MPV named pipe: {exc}")
if not self.silent:
debug(f"Failed to connect to MPV named pipe: {exc}")
return False
else:
# Unix domain socket (Linux, macOS)
if not os.path.exists(self.socket_path):
debug(f"IPC socket not found: {self.socket_path}")
if not self.silent:
debug(f"IPC socket not found: {self.socket_path}")
return False
af_unix = getattr(socket, "AF_UNIX", None)
if af_unix is None:
debug("IPC AF_UNIX is not available on this platform")
if not self.silent:
debug("IPC AF_UNIX is not available on this platform")
return False
self.sock = socket.socket(af_unix, socket.SOCK_STREAM)
@@ -277,7 +464,8 @@ class MPVIPCClient:
self.sock.connect(self.socket_path)
return True
except Exception as exc:
debug(f"Failed to connect to MPV IPC: {exc}")
if not self.silent:
debug(f"Failed to connect to MPV IPC: {exc}")
self.sock = None
return False

View File

@@ -1,5 +1,5 @@
"""Provider plugin modules.
Concrete provider implementations live in this package.
The public entrypoint/registry is Provider.registry.
The public entrypoint/registry is ProviderCore.registry.
"""

View File

@@ -3,7 +3,7 @@ from __future__ import annotations
import sys
from typing import Any, Dict, List, Optional
from Provider._base import SearchProvider, SearchResult
from ProviderCore.base import SearchProvider, SearchResult
from SYS.logger import log, debug
try:

View File

@@ -1,12 +1,24 @@
from __future__ import annotations
import logging
import re
import requests
import sys
from typing import Any, Dict, List, Optional
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple
from urllib.parse import quote, urljoin, urlparse, unquote
from Provider._base import SearchProvider, SearchResult
from ProviderCore.base import SearchProvider, SearchResult
from SYS.logger import log
# Optional dependencies
try:
from bs4 import BeautifulSoup
except ImportError:
BeautifulSoup = None
class Libgen(SearchProvider):
"""Search provider for Library Genesis books."""
@@ -20,8 +32,7 @@ class Libgen(SearchProvider):
filters = filters or {}
try:
from Provider.unified_book_downloader import UnifiedBookDownloader
from Provider.query_parser import parse_query, get_field, get_free_text
from cli_syntax import get_field, get_free_text, parse_query
parsed = parse_query(query)
isbn = get_field(parsed, "isbn")
@@ -31,8 +42,11 @@ class Libgen(SearchProvider):
search_query = isbn or title or author or free_text or query
downloader = UnifiedBookDownloader(config=self.config)
books = downloader.search_libgen(search_query, limit=limit)
books = search_libgen(
search_query,
limit=limit,
log_error=lambda msg: log(msg, file=sys.stderr),
)
results: List[SearchResult] = []
for idx, book in enumerate(books, 1):
@@ -91,8 +105,455 @@ class Libgen(SearchProvider):
def validate(self) -> bool:
try:
from Provider.unified_book_downloader import UnifiedBookDownloader # noqa: F401
return True
return BeautifulSoup is not None
except Exception:
return False
LogFn = Optional[Callable[[str], None]]
ErrorFn = Optional[Callable[[str], None]]
DEFAULT_TIMEOUT = 20.0
DEFAULT_LIMIT = 50
# Mirrors to try in order
MIRRORS = [
"https://libgen.is",
"https://libgen.rs",
"https://libgen.st",
"http://libgen.is",
"http://libgen.rs",
"http://libgen.st",
"https://libgen.li", # Different structure, fallback
"http://libgen.li",
"https://libgen.gl", # Different structure, fallback
"http://libgen.gl",
]
logging.getLogger(__name__).setLevel(logging.INFO)
def _call(logger: LogFn, message: str) -> None:
if logger:
logger(message)
class LibgenSearch:
"""Robust LibGen searcher."""
def __init__(self, session: Optional[requests.Session] = None):
self.session = session or requests.Session()
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
})
def search(self, query: str, limit: int = DEFAULT_LIMIT) -> List[Dict[str, Any]]:
"""Search LibGen mirrors."""
if not BeautifulSoup:
logging.error("BeautifulSoup not installed. Cannot search LibGen.")
return []
for mirror in MIRRORS:
try:
if "libgen.li" in mirror or "libgen.gl" in mirror:
results = self._search_libgen_li(mirror, query, limit)
else:
results = self._search_libgen_rs(mirror, query, limit)
if results:
return results
except Exception as e:
logging.debug(f"Mirror {mirror} failed: {e}")
continue
return []
def _search_libgen_rs(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
"""Search libgen.rs/is/st style mirrors."""
url = f"{mirror}/search.php"
params = {
"req": query,
"res": 100,
"column": "def",
"open": 0,
"view": "simple",
"phrase": 1,
}
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
table = soup.find("table", {"class": "c"})
if not table:
tables = soup.find_all("table")
for t in tables:
if len(t.find_all("tr")) > 5:
table = t
break
if not table:
return []
results: List[Dict[str, Any]] = []
rows = table.find_all("tr")[1:]
for row in rows:
cols = row.find_all("td")
if len(cols) < 9:
continue
try:
libgen_id = cols[0].get_text(strip=True)
authors = [a.get_text(strip=True) for a in cols[1].find_all("a")]
if not authors:
authors = [cols[1].get_text(strip=True)]
title_tag = cols[2].find("a")
title = title_tag.get_text(strip=True) if title_tag else cols[2].get_text(strip=True)
md5 = ""
if title_tag and title_tag.has_attr("href"):
href = title_tag["href"]
match = re.search(r"md5=([a-fA-F0-9]{32})", href)
if match:
md5 = match.group(1)
publisher = cols[3].get_text(strip=True)
year = cols[4].get_text(strip=True)
pages = cols[5].get_text(strip=True)
language = cols[6].get_text(strip=True)
size = cols[7].get_text(strip=True)
extension = cols[8].get_text(strip=True)
mirror_links = []
for i in range(9, len(cols)):
a = cols[i].find("a")
if a and a.has_attr("href"):
mirror_links.append(a["href"])
if md5:
download_link = f"http://library.lol/main/{md5}"
elif mirror_links:
download_link = mirror_links[0]
else:
download_link = ""
results.append({
"id": libgen_id,
"title": title,
"author": ", ".join(authors),
"publisher": publisher,
"year": year,
"pages": pages,
"language": language,
"filesize_str": size,
"extension": extension,
"md5": md5,
"mirror_url": download_link,
"cover": "",
})
if len(results) >= limit:
break
except Exception as e:
logging.debug(f"Error parsing row: {e}")
continue
return results
def _search_libgen_li(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
"""Search libgen.li/gl style mirrors."""
url = f"{mirror}/index.php"
params = {
"req": query,
"res": 100,
"covers": "on",
"filesuns": "all",
}
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
table = soup.find("table", {"id": "tablelibgen"})
if not table:
table = soup.find("table", {"class": "table table-striped"})
if not table:
return []
results: List[Dict[str, Any]] = []
rows = table.find_all("tr")[1:]
for row in rows:
cols = row.find_all("td")
if len(cols) < 9:
continue
try:
title_col = cols[1]
title_link = title_col.find("a")
title = title_link.get_text(strip=True) if title_link else title_col.get_text(strip=True)
libgen_id = ""
if title_link and title_link.has_attr("href"):
href = title_link["href"]
match = re.search(r"id=(\d+)", href)
if match:
libgen_id = match.group(1)
authors = cols[2].get_text(strip=True)
publisher = cols[3].get_text(strip=True)
year = cols[4].get_text(strip=True)
language = cols[5].get_text(strip=True)
pages = cols[6].get_text(strip=True)
size = cols[7].get_text(strip=True)
extension = cols[8].get_text(strip=True)
mirror_url = ""
if title_link:
href = title_link["href"]
if href.startswith("/"):
mirror_url = mirror + href
else:
mirror_url = urljoin(mirror, href)
results.append({
"id": libgen_id,
"title": title,
"author": authors,
"publisher": publisher,
"year": year,
"pages": pages,
"language": language,
"filesize_str": size,
"extension": extension,
"md5": "",
"mirror_url": mirror_url,
})
if len(results) >= limit:
break
except Exception:
continue
return results
def search_libgen(
query: str,
limit: int = DEFAULT_LIMIT,
*,
log_info: LogFn = None,
log_error: ErrorFn = None,
session: Optional[requests.Session] = None,
) -> List[Dict[str, Any]]:
"""Search Libgen using the robust scraper."""
searcher = LibgenSearch(session=session)
try:
results = searcher.search(query, limit=limit)
_call(log_info, f"[libgen] Found {len(results)} results")
return results
except Exception as e:
_call(log_error, f"[libgen] Search failed: {e}")
return []
def _resolve_download_url(
session: requests.Session,
url: str,
log_info: LogFn = None,
) -> Optional[str]:
"""Resolve the final download URL by following the LibGen chain."""
current_url = url
visited = set()
for _ in range(6):
if current_url in visited:
break
visited.add(current_url)
_call(log_info, f"[resolve] Checking: {current_url}")
if current_url.lower().endswith((".pdf", ".epub", ".mobi", ".djvu", ".azw3", ".cbz", ".cbr")):
return current_url
try:
with session.get(current_url, stream=True, timeout=30) as resp:
resp.raise_for_status()
ct = resp.headers.get("Content-Type", "").lower()
if "text/html" not in ct:
return current_url
content = resp.text
except Exception as e:
_call(log_info, f"[resolve] Failed to fetch {current_url}: {e}")
return None
soup = BeautifulSoup(content, "html.parser")
get_link = soup.find("a", string=re.compile(r"^GET$", re.IGNORECASE))
if not get_link:
h2_get = soup.find("h2", string=re.compile(r"^GET$", re.IGNORECASE))
if h2_get and h2_get.parent.name == "a":
get_link = h2_get.parent
if get_link and get_link.has_attr("href"):
return urljoin(current_url, get_link["href"])
if "series.php" in current_url:
edition_link = soup.find("a", href=re.compile(r"edition\.php"))
if edition_link:
current_url = urljoin(current_url, edition_link["href"])
continue
if "edition.php" in current_url:
file_link = soup.find("a", href=re.compile(r"file\.php"))
if file_link:
current_url = urljoin(current_url, file_link["href"])
continue
if "file.php" in current_url:
libgen_link = soup.find("a", title="libgen")
if not libgen_link:
libgen_link = soup.find("a", string=re.compile(r"Libgen", re.IGNORECASE))
if libgen_link and libgen_link.has_attr("href"):
current_url = urljoin(current_url, libgen_link["href"])
continue
if "ads.php" in current_url:
get_php_link = soup.find("a", href=re.compile(r"get\.php"))
if get_php_link:
return urljoin(current_url, get_php_link["href"])
for text in ["Cloudflare", "IPFS.io", "Infura"]:
link = soup.find("a", string=re.compile(text, re.IGNORECASE))
if link and link.has_attr("href"):
return urljoin(current_url, link["href"])
break
return None
def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Optional[str]:
"""Guess the file extension from headers or the download URL."""
content_disposition = headers.get("content-disposition", "")
if content_disposition:
match = re.search(r"filename\*?=(?:UTF-8\'\'|\"?)([^\";]+)", content_disposition, flags=re.IGNORECASE)
if match:
filename = unquote(match.group(1).strip('"'))
suffix = Path(filename).suffix
if suffix:
return suffix.lstrip(".")
parsed = urlparse(download_url)
suffix = Path(parsed.path).suffix
if suffix:
return suffix.lstrip(".")
content_type = headers.get("content-type", "").lower()
mime_map = {
"application/pdf": "pdf",
"application/epub+zip": "epub",
"application/x-mobipocket-ebook": "mobi",
"application/x-cbr": "cbr",
"application/x-cbz": "cbz",
"application/zip": "zip",
}
for mime, ext in mime_map.items():
if mime in content_type:
return ext
return None
def _apply_extension(path: Path, extension: Optional[str]) -> Path:
"""Rename the path to match the detected extension, if needed."""
if not extension:
return path
suffix = extension if extension.startswith(".") else f".{extension}"
if path.suffix.lower() == suffix.lower():
return path
candidate = path.with_suffix(suffix)
base_stem = path.stem
counter = 1
while candidate.exists() and counter < 100:
candidate = path.with_name(f"{base_stem}({counter}){suffix}")
counter += 1
try:
path.replace(candidate)
return candidate
except Exception:
return path
def download_from_mirror(
mirror_url: str,
output_path: Path,
*,
log_info: LogFn = None,
log_error: ErrorFn = None,
session: Optional[requests.Session] = None,
progress_callback: Optional[Callable[[int, int], None]] = None,
) -> Tuple[bool, Optional[Path]]:
"""Download file from a LibGen mirror URL with optional progress tracking."""
session = session or requests.Session()
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
try:
_call(log_info, f"[download] Resolving download link from: {mirror_url}")
download_url = _resolve_download_url(session, mirror_url, log_info)
if not download_url:
_call(log_error, "[download] Could not find direct download link")
return False, None
_call(log_info, f"[download] Downloading from: {download_url}")
downloaded = 0
total_size = 0
headers: Dict[str, str] = {}
with session.get(download_url, stream=True, timeout=60) as r:
r.raise_for_status()
headers = dict(r.headers)
ct = headers.get("content-type", "").lower()
if "text/html" in ct:
_call(log_error, "[download] Final URL returned HTML, not a file.")
return False, None
total_size = int(headers.get("content-length", 0) or 0)
with open(output_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
downloaded += len(chunk)
if progress_callback:
progress_callback(downloaded, total_size)
final_extension = _guess_filename_extension(download_url, headers)
final_path = _apply_extension(output_path, final_extension)
if progress_callback and total_size > 0:
progress_callback(downloaded, total_size)
_call(log_info, f"[download] Saved to {final_path}")
return True, final_path
except Exception as e:
_call(log_error, f"[download] Download failed: {e}")
return False, None

View File

@@ -1,523 +0,0 @@
"""Shared Library Genesis search and download helpers.
Replaces the old libgen backend with a robust scraper based on libgen-api-enhanced logic.
Targets libgen.is/rs/st mirrors and parses the results table directly.
"""
from __future__ import annotations
import logging
import re
import requests
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional, Tuple
from urllib.parse import quote, urljoin, urlparse, unquote
# Optional dependencies
try:
from bs4 import BeautifulSoup
except ImportError:
BeautifulSoup = None
LogFn = Optional[Callable[[str], None]]
ErrorFn = Optional[Callable[[str], None]]
DEFAULT_TIMEOUT = 20.0
DEFAULT_LIMIT = 50
# Mirrors to try in order
MIRRORS = [
"https://libgen.is",
"https://libgen.rs",
"https://libgen.st",
"http://libgen.is",
"http://libgen.rs",
"http://libgen.st",
"https://libgen.li", # Different structure, fallback
"http://libgen.li",
"https://libgen.gl", # Different structure, fallback
"http://libgen.gl",
]
logging.getLogger(__name__).setLevel(logging.INFO)
def _call(logger: LogFn, message: str) -> None:
if logger:
logger(message)
class LibgenSearch:
"""Robust LibGen searcher."""
def __init__(self, session: Optional[requests.Session] = None):
self.session = session or requests.Session()
self.session.headers.update({
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
})
def search(self, query: str, limit: int = DEFAULT_LIMIT) -> List[Dict[str, Any]]:
"""Search LibGen mirrors."""
if not BeautifulSoup:
logging.error("BeautifulSoup not installed. Cannot search LibGen.")
return []
for mirror in MIRRORS:
try:
if "libgen.li" in mirror or "libgen.gl" in mirror:
results = self._search_libgen_li(mirror, query, limit)
else:
results = self._search_libgen_rs(mirror, query, limit)
if results:
return results
except Exception as e:
logging.debug(f"Mirror {mirror} failed: {e}")
continue
return []
def _search_libgen_rs(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
"""Search libgen.rs/is/st style mirrors."""
# Search URL: /search.php?req=QUERY&res=100&column=def
url = f"{mirror}/search.php"
params = {
"req": query,
"res": 100, # Request more to filter later
"column": "def",
"open": 0,
"view": "simple",
"phrase": 1,
}
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
# Find the table with results. usually class 'c'
table = soup.find("table", {"class": "c"})
if not table:
# Try finding by structure (table with many rows)
tables = soup.find_all("table")
for t in tables:
if len(t.find_all("tr")) > 5:
table = t
break
if not table:
return []
results = []
# Skip header row
rows = table.find_all("tr")[1:]
for row in rows:
cols = row.find_all("td")
if len(cols) < 9:
continue
# Columns:
# 0: ID
# 1: Author(s)
# 2: Title
# 3: Publisher
# 4: Year
# 5: Pages
# 6: Language
# 7: Size
# 8: Extension
# 9+: Mirrors
try:
libgen_id = cols[0].get_text(strip=True)
authors = [a.get_text(strip=True) for a in cols[1].find_all("a")]
if not authors:
authors = [cols[1].get_text(strip=True)]
title_tag = cols[2].find("a")
title = title_tag.get_text(strip=True) if title_tag else cols[2].get_text(strip=True)
# Extract MD5 from title link if possible (often in href)
# href='book/index.php?md5=...'
md5 = ""
if title_tag and title_tag.has_attr("href"):
href = title_tag["href"]
match = re.search(r"md5=([a-fA-F0-9]{32})", href)
if match:
md5 = match.group(1)
publisher = cols[3].get_text(strip=True)
year = cols[4].get_text(strip=True)
pages = cols[5].get_text(strip=True)
language = cols[6].get_text(strip=True)
size = cols[7].get_text(strip=True)
extension = cols[8].get_text(strip=True)
# Mirrors
# Usually col 9 is http://library.lol/main/MD5
mirror_links = []
for i in range(9, len(cols)):
a = cols[i].find("a")
if a and a.has_attr("href"):
mirror_links.append(a["href"])
# Construct direct download page link (library.lol)
# If we have MD5, we can guess it: http://library.lol/main/{md5}
if md5:
download_link = f"http://library.lol/main/{md5}"
elif mirror_links:
download_link = mirror_links[0]
else:
download_link = ""
results.append({
"id": libgen_id,
"title": title,
"author": ", ".join(authors),
"publisher": publisher,
"year": year,
"pages": pages,
"language": language,
"filesize_str": size,
"extension": extension,
"md5": md5,
"mirror_url": download_link,
"cover": "", # Could extract from hover if needed
})
if len(results) >= limit:
break
except Exception as e:
logging.debug(f"Error parsing row: {e}")
continue
return results
def _search_libgen_li(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
"""Search libgen.li/gl style mirrors."""
# Search URL: /index.php?req=QUERY&columns[]=t&columns[]=a...
url = f"{mirror}/index.php"
params = {
"req": query,
"res": 100,
"covers": "on",
"filesuns": "all",
}
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
table = soup.find("table", {"id": "tablelibgen"})
if not table:
table = soup.find("table", {"class": "table table-striped"})
if not table:
return []
results = []
rows = table.find_all("tr")[1:]
for row in rows:
cols = row.find_all("td")
if len(cols) < 9:
continue
try:
# Structure is different
# 0: Cover
# 1: Title (with link to file.php?id=...)
# 2: Author
# 3: Publisher
# 4: Year
# 5: Language
# 6: Pages
# 7: Size
# 8: Extension
# 9: Mirrors
title_col = cols[1]
title_link = title_col.find("a")
title = title_link.get_text(strip=True) if title_link else title_col.get_text(strip=True)
# Extract ID from link
libgen_id = ""
if title_link and title_link.has_attr("href"):
href = title_link["href"]
# href is usually "file.php?id=..." or "edition.php?id=..."
match = re.search(r"id=(\d+)", href)
if match:
libgen_id = match.group(1)
authors = cols[2].get_text(strip=True)
publisher = cols[3].get_text(strip=True)
year = cols[4].get_text(strip=True)
language = cols[5].get_text(strip=True)
pages = cols[6].get_text(strip=True)
size = cols[7].get_text(strip=True)
extension = cols[8].get_text(strip=True)
# Mirror link
# Usually in col 9 or title link
mirror_url = ""
if title_link:
href = title_link["href"]
if href.startswith("/"):
mirror_url = mirror + href
else:
mirror_url = urljoin(mirror, href)
results.append({
"id": libgen_id,
"title": title,
"author": authors,
"publisher": publisher,
"year": year,
"pages": pages,
"language": language,
"filesize_str": size,
"extension": extension,
"md5": "", # .li doesn't show MD5 easily in table
"mirror_url": mirror_url,
})
if len(results) >= limit:
break
except Exception:
continue
return results
def search_libgen(
query: str,
limit: int = DEFAULT_LIMIT,
*,
log_info: LogFn = None,
log_error: ErrorFn = None,
session: Optional[requests.Session] = None,
) -> List[Dict[str, Any]]:
"""Search Libgen using the robust scraper."""
searcher = LibgenSearch(session=session)
try:
results = searcher.search(query, limit=limit)
_call(log_info, f"[libgen] Found {len(results)} results")
return results
except Exception as e:
_call(log_error, f"[libgen] Search failed: {e}")
return []
def _resolve_download_url(
session: requests.Session,
url: str,
log_info: LogFn = None
) -> Optional[str]:
"""Resolve the final download URL by following the LibGen chain."""
current_url = url
visited = set()
# Max hops to prevent infinite loops
for _ in range(6):
if current_url in visited:
break
visited.add(current_url)
_call(log_info, f"[resolve] Checking: {current_url}")
# Simple heuristic: if it looks like a file, return it
if current_url.lower().endswith(('.pdf', '.epub', '.mobi', '.djvu', '.azw3', '.cbz', '.cbr')):
return current_url
try:
# Use HEAD first to check content type if possible, but some mirrors block HEAD or return 405
# So we'll just GET with stream=True to peek headers/content without downloading everything
with session.get(current_url, stream=True, timeout=30) as resp:
resp.raise_for_status()
ct = resp.headers.get("Content-Type", "").lower()
if "text/html" not in ct:
# It's a binary file
return current_url
# It's HTML, read content
content = resp.text
except Exception as e:
_call(log_info, f"[resolve] Failed to fetch {current_url}: {e}")
return None
soup = BeautifulSoup(content, "html.parser")
# 1. Check for "GET" link (library.lol / ads.php style)
# Usually <h2>GET</h2> inside <a> or just text "GET"
get_link = soup.find("a", string=re.compile(r"^GET$", re.IGNORECASE))
if not get_link:
# Try finding <a> containing <h2>GET</h2>
h2_get = soup.find("h2", string=re.compile(r"^GET$", re.IGNORECASE))
if h2_get and h2_get.parent.name == "a":
get_link = h2_get.parent
if get_link and get_link.has_attr("href"):
return urljoin(current_url, get_link["href"])
# 2. Check for "series.php" -> "edition.php"
if "series.php" in current_url:
# Find first edition link
edition_link = soup.find("a", href=re.compile(r"edition\.php"))
if edition_link:
current_url = urljoin(current_url, edition_link["href"])
continue
# 3. Check for "edition.php" -> "file.php"
if "edition.php" in current_url:
file_link = soup.find("a", href=re.compile(r"file\.php"))
if file_link:
current_url = urljoin(current_url, file_link["href"])
continue
# 4. Check for "file.php" -> "ads.php" (Libgen badge)
if "file.php" in current_url:
# Look for link with title="libgen" or text "Libgen"
libgen_link = soup.find("a", title="libgen")
if not libgen_link:
libgen_link = soup.find("a", string=re.compile(r"Libgen", re.IGNORECASE))
if libgen_link and libgen_link.has_attr("href"):
current_url = urljoin(current_url, libgen_link["href"])
continue
# 5. Check for "ads.php" -> "get.php" (Fallback if GET link logic above failed)
if "ads.php" in current_url:
get_php_link = soup.find("a", href=re.compile(r"get\.php"))
if get_php_link:
return urljoin(current_url, get_php_link["href"])
# 6. Library.lol / generic fallback
for text in ["Cloudflare", "IPFS.io", "Infura"]:
link = soup.find("a", string=re.compile(text, re.IGNORECASE))
if link and link.has_attr("href"):
return urljoin(current_url, link["href"])
# If we found nothing new, stop
break
return None
def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Optional[str]:
"""Guess the file extension from headers or the download URL."""
content_disposition = headers.get("content-disposition", "")
if content_disposition:
match = re.search(r'filename\*?=(?:UTF-8\'\'|"?)([^";]+)', content_disposition, flags=re.IGNORECASE)
if match:
filename = unquote(match.group(1).strip('"'))
suffix = Path(filename).suffix
if suffix:
return suffix.lstrip('.')
parsed = urlparse(download_url)
suffix = Path(parsed.path).suffix
if suffix:
return suffix.lstrip('.')
content_type = headers.get('content-type', '').lower()
mime_map = {
'application/pdf': 'pdf',
'application/epub+zip': 'epub',
'application/x-mobipocket-ebook': 'mobi',
'application/x-cbr': 'cbr',
'application/x-cbz': 'cbz',
'application/zip': 'zip',
}
for mime, ext in mime_map.items():
if mime in content_type:
return ext
return None
def _apply_extension(path: Path, extension: Optional[str]) -> Path:
"""Rename the path to match the detected extension, if needed."""
if not extension:
return path
suffix = extension if extension.startswith('.') else f'.{extension}'
if path.suffix.lower() == suffix.lower():
return path
candidate = path.with_suffix(suffix)
base_stem = path.stem
counter = 1
while candidate.exists() and counter < 100:
candidate = path.with_name(f"{base_stem}({counter}){suffix}")
counter += 1
try:
path.replace(candidate)
return candidate
except Exception:
return path
def download_from_mirror(
mirror_url: str,
output_path: Path,
*,
log_info: LogFn = None,
log_error: ErrorFn = None,
session: Optional[requests.Session] = None,
progress_callback: Optional[Callable[[int, int], None]] = None,
) -> Tuple[bool, Optional[Path]]:
"""Download file from a LibGen mirror URL with optional progress tracking."""
session = session or requests.Session()
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
try:
_call(log_info, f"[download] Resolving download link from: {mirror_url}")
download_url = _resolve_download_url(session, mirror_url, log_info)
if not download_url:
_call(log_error, "[download] Could not find direct download link")
return False, None
_call(log_info, f"[download] Downloading from: {download_url}")
downloaded = 0
total_size = 0
headers: Dict[str, str] = {}
with session.get(download_url, stream=True, timeout=60) as r:
r.raise_for_status()
headers = dict(r.headers)
# Verify it's not HTML (error page)
ct = headers.get("content-type", "").lower()
if "text/html" in ct:
_call(log_error, "[download] Final URL returned HTML, not a file.")
return False, None
total_size = int(headers.get("content-length", 0) or 0)
with open(output_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
downloaded += len(chunk)
if progress_callback:
progress_callback(downloaded, total_size)
final_extension = _guess_filename_extension(download_url, headers)
final_path = _apply_extension(output_path, final_extension)
if progress_callback and total_size > 0:
progress_callback(downloaded, total_size)
_call(log_info, f"[download] Saved to {final_path}")
return True, final_path
except Exception as e:
_call(log_error, f"[download] Download failed: {e}")
return False, None

View File

@@ -6,7 +6,7 @@ from typing import Any
import requests
from Provider._base import FileProvider
from ProviderCore.base import FileProvider
class Matrix(FileProvider):

358
Provider/openlibrary.py Normal file
View File

@@ -0,0 +1,358 @@
from __future__ import annotations
import shutil
import sys
import tempfile
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
import requests
from ProviderCore.base import SearchProvider, SearchResult
from ProviderCore.download import download_file, sanitize_filename
from cli_syntax import get_field, get_free_text, parse_query
from SYS.logger import log
from SYS.utils import unique_path
def _looks_like_isbn(text: str) -> bool:
t = (text or "").replace("-", "").strip()
return t.isdigit() and len(t) in (10, 13)
def _first_str(value: Any) -> Optional[str]:
if isinstance(value, str):
v = value.strip()
return v if v else None
if isinstance(value, list) and value:
first = value[0]
if isinstance(first, str):
v = first.strip()
return v if v else None
return str(first) if first is not None else None
return None
def _resolve_edition_id(doc: Dict[str, Any]) -> str:
# OpenLibrary Search API typically provides edition_key: ["OL...M", ...]
edition_key = doc.get("edition_key")
if isinstance(edition_key, list) and edition_key:
return str(edition_key[0]).strip()
# Fallback: sometimes key can be /books/OL...M
key = doc.get("key")
if isinstance(key, str) and key.startswith("/books/"):
return key.split("/books/", 1)[1].strip("/")
return ""
def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, str]:
"""Return (lendable, status_text) using OpenLibrary volumes API."""
try:
if not edition_id or not edition_id.startswith("OL") or not edition_id.endswith("M"):
return False, "not-an-edition"
url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{edition_id}"
resp = session.get(url, timeout=10)
resp.raise_for_status()
data = resp.json() or {}
wrapped = data.get(f"OLID:{edition_id}")
if not isinstance(wrapped, dict):
return False, "no-availability"
items = wrapped.get("items")
if not isinstance(items, list) or not items:
return False, "no-items"
first = items[0]
status_val = ""
if isinstance(first, dict):
status_val = str(first.get("status", ""))
else:
status_val = str(first)
return ("lendable" in status_val.lower()), status_val
except requests.exceptions.Timeout:
return False, "api-timeout"
except Exception:
return False, "api-error"
def _resolve_archive_id(session: requests.Session, edition_id: str, ia_candidates: List[str]) -> str:
# Prefer IA identifiers already present in search results.
if ia_candidates:
first = ia_candidates[0].strip()
if first:
return first
# Otherwise query the edition JSON.
try:
resp = session.get(f"https://openlibrary.org/books/{edition_id}.json", timeout=10)
resp.raise_for_status()
data = resp.json() or {}
ocaid = data.get("ocaid")
if isinstance(ocaid, str) and ocaid.strip():
return ocaid.strip()
identifiers = data.get("identifiers")
if isinstance(identifiers, dict):
ia = identifiers.get("internet_archive")
ia_id = _first_str(ia)
if ia_id:
return ia_id
except Exception:
pass
return ""
class OpenLibrary(SearchProvider):
"""Search provider for OpenLibrary books + Archive.org direct/borrow download."""
def __init__(self, config: Optional[Dict[str, Any]] = None):
super().__init__(config)
self._session = requests.Session()
def search(
self,
query: str,
limit: int = 50,
filters: Optional[Dict[str, Any]] = None,
**kwargs: Any,
) -> List[SearchResult]:
filters = filters or {}
parsed = parse_query(query)
isbn = get_field(parsed, "isbn")
author = get_field(parsed, "author")
title = get_field(parsed, "title")
free_text = get_free_text(parsed)
q = (isbn or title or author or free_text or query or "").strip()
if not q:
return []
if _looks_like_isbn(q):
q = f"isbn:{q.replace('-', '')}"
try:
resp = self._session.get(
"https://openlibrary.org/search.json",
params={"q": q, "limit": int(limit)},
timeout=10,
)
resp.raise_for_status()
data = resp.json() or {}
except Exception as exc:
log(f"[openlibrary] Search failed: {exc}", file=sys.stderr)
return []
results: List[SearchResult] = []
docs = data.get("docs") or []
if not isinstance(docs, list):
return []
for doc in docs[: int(limit)]:
if not isinstance(doc, dict):
continue
book_title = str(doc.get("title") or "").strip() or "Unknown"
authors = doc.get("author_name") or []
if isinstance(authors, str):
authors = [authors]
if not isinstance(authors, list):
authors = []
authors_list = [str(a) for a in authors if a]
year_val = doc.get("first_publish_year")
year = str(year_val) if year_val is not None else ""
edition_id = _resolve_edition_id(doc)
ia_val = doc.get("ia") or []
if isinstance(ia_val, str):
ia_val = [ia_val]
if not isinstance(ia_val, list):
ia_val = []
ia_ids = [str(x) for x in ia_val if x]
isbn_list = doc.get("isbn") or []
if isinstance(isbn_list, str):
isbn_list = [isbn_list]
if not isinstance(isbn_list, list):
isbn_list = []
isbn_13 = next((str(i) for i in isbn_list if len(str(i)) == 13), "")
isbn_10 = next((str(i) for i in isbn_list if len(str(i)) == 10), "")
columns = [
("Title", book_title),
("Author", ", ".join(authors_list)),
("Year", year),
("OLID", edition_id),
]
annotations: List[str] = []
if isbn_13:
annotations.append(f"isbn_13:{isbn_13}")
elif isbn_10:
annotations.append(f"isbn_10:{isbn_10}")
if ia_ids:
annotations.append("archive")
results.append(
SearchResult(
table="openlibrary",
title=book_title,
path=(f"https://openlibrary.org/books/{edition_id}" if edition_id else "https://openlibrary.org"),
detail=(
(f"By: {', '.join(authors_list)}" if authors_list else "")
+ (f" ({year})" if year else "")
).strip(),
annotations=annotations,
media_kind="book",
columns=columns,
full_metadata={
"openlibrary_id": edition_id,
"authors": authors_list,
"year": year,
"isbn_10": isbn_10,
"isbn_13": isbn_13,
"ia": ia_ids,
"raw": doc,
},
)
)
return results
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
meta = result.full_metadata or {}
edition_id = str(meta.get("openlibrary_id") or "").strip()
if not edition_id:
log("[openlibrary] Missing openlibrary_id; cannot download", file=sys.stderr)
return None
ia_ids = meta.get("ia") or []
if isinstance(ia_ids, str):
ia_ids = [ia_ids]
if not isinstance(ia_ids, list):
ia_ids = []
ia_candidates = [str(x) for x in ia_ids if x]
archive_id = _resolve_archive_id(self._session, edition_id, ia_candidates)
if not archive_id:
log("[openlibrary] No archive identifier available; cannot download", file=sys.stderr)
return None
safe_title = sanitize_filename(result.title)
# 1) Direct download if available.
try:
from API.archive_client import check_direct_download
can_direct, pdf_url = check_direct_download(archive_id)
except Exception:
can_direct, pdf_url = False, ""
if can_direct and pdf_url:
out_path = unique_path(output_dir / f"{safe_title}.pdf")
ok = download_file(pdf_url, out_path, session=self._session)
if ok:
return out_path
log("[openlibrary] Direct download failed", file=sys.stderr)
return None
# 2) Borrow flow (credentials required).
try:
from API.archive_client import BookNotAvailableError, credential_openlibrary, download as archive_download
from API.archive_client import get_book_infos, loan, login
email, password = credential_openlibrary(self.config or {})
if not email or not password:
log("[openlibrary] Archive credentials missing; cannot borrow", file=sys.stderr)
return None
lendable, reason = _check_lendable(self._session, edition_id)
if not lendable:
log(f"[openlibrary] Not lendable: {reason}", file=sys.stderr)
return None
session = login(email, password)
try:
session = loan(session, archive_id, verbose=False)
except BookNotAvailableError:
log("[openlibrary] Book not available to borrow", file=sys.stderr)
return None
except SystemExit:
log("[openlibrary] Borrow failed", file=sys.stderr)
return None
urls = [f"https://archive.org/borrow/{archive_id}", f"https://archive.org/details/{archive_id}"]
title = safe_title
links: Optional[List[str]] = None
last_exc: Optional[Exception] = None
for u in urls:
try:
title_raw, links, _metadata = get_book_infos(session, u)
if title_raw:
title = sanitize_filename(title_raw)
break
except Exception as exc:
last_exc = exc
continue
if not links:
log(f"[openlibrary] Failed to extract pages: {last_exc}", file=sys.stderr)
return None
temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=str(output_dir))
try:
images = archive_download(session=session, n_threads=10, directory=temp_dir, links=links, scale=3, book_id=archive_id)
try:
import img2pdf # type: ignore
pdf_bytes = img2pdf.convert(images) if images else None
if not pdf_bytes:
log("[openlibrary] PDF conversion failed", file=sys.stderr)
try:
shutil.rmtree(temp_dir)
except Exception:
pass
return None
pdf_path = unique_path(output_dir / f"{title}.pdf")
with open(pdf_path, "wb") as f:
f.write(pdf_bytes)
try:
shutil.rmtree(temp_dir)
except Exception:
pass
return pdf_path
except ImportError:
# Keep images folder.
return Path(temp_dir)
except Exception:
try:
shutil.rmtree(temp_dir)
except Exception:
pass
raise
except Exception as exc:
log(f"[openlibrary] Borrow workflow error: {exc}", file=sys.stderr)
return None
def validate(self) -> bool:
return True

View File

@@ -1,159 +0,0 @@
"""Dynamic query parser for filtering and field extraction.
Supports query syntax like:
- isbn:0557677203
- author:"Albert Pike"
- title:"Morals and Dogma"
- year:2010
- isbn:0557677203 author:"Albert Pike"
- Mixed with free text: "Morals" isbn:0557677203
This allows flexible query strings that can be parsed by any search provider
to extract specific fields for filtering and searching.
"""
from typing import Dict, List, Tuple, Optional, Any
import re
def parse_query(query: str) -> Dict[str, Any]:
"""Parse a query string into field:value pairs and free text.
Args:
query: Query string like 'isbn:0557677203 author:"Albert Pike" Morals'
Returns:
Dictionary with:
- 'fields': Dict[field_name, field_value] for structured fields
- 'text': str with remaining free text
- 'raw': str original query
"""
result = {
'fields': {},
'text': '',
'raw': query,
}
if not query or not query.strip():
return result
query = query.strip()
remaining_parts = []
# Pattern to match: field:value or field:"quoted value"
# Matches: word: followed by either quoted string or unquoted word
pattern = r'(\w+):(?:"([^"]*)"|(\S+))'
pos = 0
for match in re.finditer(pattern, query):
# Add any text before this match
if match.start() > pos:
before_text = query[pos:match.start()].strip()
if before_text:
remaining_parts.append(before_text)
field_name = match.group(1).lower()
field_value = match.group(2) if match.group(2) is not None else match.group(3)
result['fields'][field_name] = field_value
pos = match.end()
# Add any remaining text after last match
if pos < len(query):
remaining_text = query[pos:].strip()
if remaining_text:
remaining_parts.append(remaining_text)
result['text'] = ' '.join(remaining_parts)
return result
def get_field(parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None) -> Optional[str]:
"""Get a field value from parsed query, with optional default.
Args:
parsed_query: Result from parse_query()
field_name: Field name to look up (case-insensitive)
default: Default value if field not found
Returns:
Field value or default
"""
return parsed_query.get('fields', {}).get(field_name.lower(), default)
def has_field(parsed_query: Dict[str, Any], field_name: str) -> bool:
"""Check if a field exists in parsed query.
Args:
parsed_query: Result from parse_query()
field_name: Field name to check (case-insensitive)
Returns:
True if field exists
"""
return field_name.lower() in parsed_query.get('fields', {})
def get_free_text(parsed_query: Dict[str, Any]) -> str:
"""Get the free text portion of a parsed query.
Args:
parsed_query: Result from parse_query()
Returns:
Free text or empty string
"""
return parsed_query.get('text', '')
def build_query_for_provider(
parsed_query: Dict[str, Any],
provider: str,
extraction_map: Optional[Dict[str, str]] = None
) -> Tuple[str, Dict[str, str]]:
"""Build a search query and filters dict for a specific provider.
Different providers have different search syntax. This function
extracts the appropriate fields for each provider.
Args:
parsed_query: Result from parse_query()
provider: Provider name ('libgen', 'openlibrary', 'soulseek')
extraction_map: Optional mapping of field names to provider-specific names
e.g. {'isbn': 'isbn', 'author': 'author', 'title': 'title'}
Returns:
Tuple of (search_query: str, extracted_fields: Dict[field, value])
"""
extraction_map = extraction_map or {}
extracted = {}
free_text = get_free_text(parsed_query)
# Extract fields based on map
for field_name, provider_key in extraction_map.items():
if has_field(parsed_query, field_name):
extracted[provider_key] = get_field(parsed_query, field_name)
# If provider-specific extraction needed, providers can implement it
# For now, return the free text as query
return free_text, extracted
if __name__ == '__main__':
# Test cases
test_queries = [
'isbn:0557677203',
'isbn:0557677203 author:"Albert Pike"',
'Morals and Dogma isbn:0557677203',
'title:"Morals and Dogma" author:"Albert Pike" year:2010',
'search term without fields',
'author:"John Smith" title:"A Book"',
]
for query in test_queries:
print(f"\nQuery: {query}")
parsed = parse_query(query)
print(f" Fields: {parsed['fields']}")
print(f" Text: {parsed['text']}")

View File

@@ -11,7 +11,7 @@ import time
from pathlib import Path
from typing import Any, Dict, List, Optional
from Provider._base import SearchProvider, SearchResult
from ProviderCore.base import SearchProvider, SearchResult
from SYS.logger import log, debug

View File

@@ -1,707 +0,0 @@
"""Unified book downloader - handles Archive.org borrowing and Libgen fallback.
This module provides a single interface for downloading books from multiple sources:
1. Try Archive.org direct download (if available)
2. Try Archive.org borrowing (if user has credentials)
3. Fallback to Libgen search by ISBN
4. Attempt Libgen download
All sources integrated with proper metadata scraping and error handling.
"""
import logging
import asyncio
import requests
from typing import Optional, Dict, Any, Tuple, List, Callable, cast
from pathlib import Path
from SYS.logger import debug
logger = logging.getLogger(__name__)
class UnifiedBookDownloader:
"""Unified interface for downloading books from multiple sources."""
def __init__(self, config: Optional[Dict[str, Any]] = None, output_dir: Optional[str] = None):
"""Initialize the unified book downloader.
Args:
config: Configuration dict with credentials
output_dir: Default output directory
"""
self.config = config or {}
self.output_dir = output_dir
self.session = requests.Session()
# Import download functions from their modules
self._init_downloaders()
def _init_downloaders(self) -> None:
"""Initialize downloader functions from their modules."""
try:
from API.archive_client import (
check_direct_download,
get_openlibrary_by_isbn,
loan
)
self.check_direct_download = check_direct_download
self.get_openlibrary_by_isbn = get_openlibrary_by_isbn
self.loan_func = loan
logger.debug("[UnifiedBookDownloader] Loaded archive.org downloaders from archive_client")
except Exception as e:
logger.warning(f"[UnifiedBookDownloader] Failed to load archive.org functions: {e}")
self.check_direct_download = None
self.get_openlibrary_by_isbn = None
self.loan_func = None
try:
from Provider.libgen_service import (
DEFAULT_LIMIT as _LIBGEN_DEFAULT_LIMIT,
download_from_mirror as _libgen_download,
search_libgen as _libgen_search,
)
def _log_info(message: str) -> None:
debug(f"[UnifiedBookDownloader] {message}")
def _log_error(message: str) -> None:
logger.error(f"[UnifiedBookDownloader] {message}")
self.search_libgen = lambda query, limit=_LIBGEN_DEFAULT_LIMIT: _libgen_search(
query,
limit=limit,
log_info=_log_info,
log_error=_log_error,
)
self.download_from_mirror = lambda mirror_url, output_path: _libgen_download(
mirror_url,
output_path,
log_info=_log_info,
log_error=_log_error,
)
logger.debug("[UnifiedBookDownloader] Loaded Libgen helpers")
except Exception as e:
logger.warning(f"[UnifiedBookDownloader] Failed to load Libgen helpers: {e}")
self.search_libgen = None
self.download_from_mirror = None
def get_download_options(self, book_data: Dict[str, Any]) -> Dict[str, Any]:
"""Get all available download options for a book.
Checks in priority order:
1. Archive.org direct download (public domain)
2. Archive.org borrowing (if credentials available and book is borrowable)
3. Libgen fallback (by ISBN)
Args:
book_data: Book metadata dict with at least 'openlibrary_id' or 'isbn'
Returns:
Dict with available download methods and metadata
"""
options = {
'book_title': book_data.get('title', 'Unknown'),
'book_author': book_data.get('author', 'Unknown'),
'isbn': book_data.get('isbn', ''),
'openlibrary_id': book_data.get('openlibrary_id', ''),
'methods': [], # Will be sorted by priority
'metadata': {}
}
# Extract book ID from openlibrary_id (e.g., OL8513721M -> 8513721, OL8513721W -> 8513721)
ol_id = book_data.get('openlibrary_id', '')
book_id = None
if ol_id.startswith('OL') and len(ol_id) > 2:
# Remove 'OL' prefix (keep everything after it including the suffix letter)
# The book_id is all digits after 'OL'
book_id = ''.join(c for c in ol_id[2:] if c.isdigit())
# PRIORITY 1: Check direct download (fastest, no auth needed)
if self.check_direct_download:
try:
can_download, pdf_url = self.check_direct_download(book_id)
if can_download:
options['methods'].append({
'type': 'archive.org_direct',
'label': 'Archive.org Direct Download',
'requires_auth': False,
'pdf_url': pdf_url,
'book_id': book_id,
'priority': 1 # Highest priority
})
logger.info(f"[UnifiedBookDownloader] Direct download available for {book_id}")
except Exception as e:
logger.debug(f"[UnifiedBookDownloader] Direct download check failed: {e}")
# PRIORITY 2: Check borrowing option (requires auth, 14-day loan)
# First verify the book is actually lendable via OpenLibrary API
if self._has_archive_credentials():
is_lendable, status = self._check_book_lendable_status(ol_id)
if is_lendable:
options['methods'].append({
'type': 'archive.org_borrow',
'label': 'Archive.org Borrow',
'requires_auth': True,
'book_id': book_id,
'priority': 2 # Second priority
})
logger.info(f"[UnifiedBookDownloader] Borrow option available for {book_id} (status: {status})")
else:
logger.debug(f"[UnifiedBookDownloader] Borrow not available for {book_id} (status: {status})")
# PRIORITY 3: Check Libgen fallback (by ISBN, no auth needed, most reliable)
isbn = book_data.get('isbn', '')
title = book_data.get('title', '')
author = book_data.get('author', '')
if self.search_libgen:
# Can use Libgen if we have ISBN OR title (or both)
if isbn or title:
options['methods'].append({
'type': 'libgen',
'label': 'Libgen Search & Download',
'requires_auth': False,
'isbn': isbn,
'title': title,
'author': author,
'priority': 3 # Third priority (fallback)
})
logger.info(f"[UnifiedBookDownloader] Libgen fallback available (ISBN: {isbn if isbn else 'N/A'}, Title: {title})")
# Sort by priority (higher priority first)
options['methods'].sort(key=lambda x: x.get('priority', 999))
return options
def _has_archive_credentials(self) -> bool:
"""Check if Archive.org credentials are available."""
try:
from API.archive_client import credential_openlibrary
email, password = credential_openlibrary(self.config)
return bool(email and password)
except Exception:
return False
def _check_book_lendable_status(self, ol_id: str) -> Tuple[bool, Optional[str]]:
"""Check if a book is lendable via OpenLibrary API.
Queries: https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}
Note: Only works with Edition IDs (OL...M), not Work IDs (OL...W)
Args:
ol_id: OpenLibrary ID (e.g., OL8513721M for Edition or OL4801915W for Work)
Returns:
Tuple of (is_lendable: bool, status_reason: Optional[str])
"""
try:
if not ol_id.startswith('OL'):
return False, "Invalid OpenLibrary ID format"
# If this is a Work ID (ends with W), we can't query Volumes API
# Work IDs are abstract umbrella records, not specific editions
if ol_id.endswith('W'):
logger.debug(f"[UnifiedBookDownloader] Work ID {ol_id} - skipping Volumes API (not lendable)")
return False, "Work ID not supported by Volumes API (not a specific edition)"
# If it ends with M, it's an Edition ID - proceed with query
if not ol_id.endswith('M'):
logger.debug(f"[UnifiedBookDownloader] Unknown ID type {ol_id} (not M or W)")
return False, "Invalid OpenLibrary ID type"
url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}"
response = self.session.get(url, timeout=10)
response.raise_for_status()
data = response.json()
# Empty response means no records found
if not data:
logger.debug(f"[UnifiedBookDownloader] Empty response for {ol_id}")
return False, "No availability data found"
# The response is wrapped in OLID key
olid_key = f"OLID:{ol_id}"
if olid_key not in data:
logger.debug(f"[UnifiedBookDownloader] OLID key not found in response")
return False, "No availability data found"
olid_data = data[olid_key]
# Check items array for lendable status
if 'items' in olid_data and olid_data['items'] and len(olid_data['items']) > 0:
items = olid_data['items']
# Check the first item for lending status
first_item = items[0]
# Handle both dict and string representations (PowerShell converts to string)
if isinstance(first_item, dict):
status = first_item.get('status', '')
else:
# String representation - check if 'lendable' is in it
status = str(first_item).lower()
is_lendable = 'lendable' in str(status).lower()
if is_lendable:
logger.info(f"[UnifiedBookDownloader] Book {ol_id} is lendable")
return True, "LENDABLE"
else:
status_str = status.get('status', 'NOT_LENDABLE') if isinstance(status, dict) else 'NOT_LENDABLE'
logger.debug(f"[UnifiedBookDownloader] Book {ol_id} is not lendable (status: {status_str})")
return False, status_str
else:
# No items array or empty
logger.debug(f"[UnifiedBookDownloader] No items found for {ol_id}")
return False, "Not available for lending"
except requests.exceptions.Timeout:
logger.warning(f"[UnifiedBookDownloader] OpenLibrary API timeout for {ol_id}")
return False, "API timeout"
except Exception as e:
logger.debug(f"[UnifiedBookDownloader] Failed to check lendable status for {ol_id}: {e}")
return False, f"API error"
async def download_book(self, method: Dict[str, Any], output_dir: Optional[str] = None) -> Tuple[bool, str]:
"""Download a book using the specified method.
Args:
method: Download method dict from get_download_options()
output_dir: Directory to save the book
Returns:
Tuple of (success: bool, message: str)
"""
output_dir = output_dir or self.output_dir or str(Path.home() / "Downloads")
method_type = method.get('type', '')
logger.info(f"[UnifiedBookDownloader] Starting download with method: {method_type}")
try:
if method_type == 'archive.org_direct':
return await self._download_archive_direct(method, output_dir)
elif method_type == 'archive.org_borrow':
return await self._download_archive_borrow(method, output_dir)
elif method_type == 'libgen':
return await self._download_libgen(method, output_dir)
else:
return False, f"Unknown download method: {method_type}"
except Exception as e:
logger.error(f"[UnifiedBookDownloader] Download error: {e}", exc_info=True)
return False, f"Download failed: {str(e)}"
async def _download_archive_direct(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
"""Download directly from Archive.org."""
try:
pdf_url = method.get('pdf_url', '')
book_id = method.get('book_id', '')
if not pdf_url:
return False, "No PDF URL available"
# Determine output filename
filename = f"{book_id}.pdf"
output_path = Path(output_dir) / filename
logger.info(f"[UnifiedBookDownloader] Downloading PDF from: {pdf_url}")
# Download in a thread to avoid blocking
loop = asyncio.get_event_loop()
success = await loop.run_in_executor(
None,
self._download_file,
pdf_url,
str(output_path)
)
if success:
logger.info(f"[UnifiedBookDownloader] Successfully downloaded to: {output_path}")
return True, f"Downloaded to: {output_path}"
else:
return False, "Failed to download PDF"
except Exception as e:
logger.error(f"[UnifiedBookDownloader] Archive direct download error: {e}")
return False, f"Archive download failed: {str(e)}"
async def _download_archive_borrow(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
"""Download via Archive.org borrowing (requires credentials).
Process (follows archive_client.py pattern):
1. Login to Archive.org with credentials
2. Call loan endpoint to borrow the book (14-day loan)
3. Get book info (page links, metadata)
4. Download all pages as images
5. Merge images into PDF
The loan function from archive_client.py handles:
- Checking if book needs borrowing (status 400 = "doesn't need to be borrowed")
- Creating borrow token for access
- Handling borrow failures
get_book_infos() extracts page links from the borrowed book viewer
download() downloads all pages using thread pool
img2pdf merges pages into searchable PDF
"""
try:
from API.archive_client import credential_openlibrary
book_id = method.get('book_id', '')
# Get credentials
email, password = credential_openlibrary(self.config)
if not email or not password:
return False, "Archive.org credentials not configured"
logger.info(f"[UnifiedBookDownloader] Logging into Archive.org...")
# Login and borrow (in thread, following download_book.py pattern)
loop = asyncio.get_event_loop()
borrow_result = await loop.run_in_executor(
None,
self._archive_borrow_and_download,
email,
password,
book_id,
output_dir
)
if borrow_result and isinstance(borrow_result, tuple):
success, filepath = borrow_result
if success:
logger.info(f"[UnifiedBookDownloader] Borrow succeeded: {filepath}")
return True, filepath
else:
logger.warning(f"[UnifiedBookDownloader] Borrow failed: {filepath}")
return False, filepath
else:
return False, "Failed to borrow book from Archive.org"
except Exception as e:
logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}")
return False, f"Archive borrow failed: {str(e)}"
async def _download_libgen(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
"""Download via Libgen search and download with mirror fallback."""
try:
isbn = method.get('isbn', '')
title = method.get('title', '')
if not isbn and not title:
return False, "Need ISBN or title for Libgen search"
if not self.search_libgen:
return False, "Libgen searcher not available"
# Define wrapper functions to safely call the methods
search_func = self.search_libgen
if search_func is None:
return False, "Search function not available"
preloaded_results = method.get('results')
loop = asyncio.get_event_loop()
if preloaded_results:
results = list(preloaded_results)
if not results:
results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10))
else:
results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10))
if not results:
logger.warning(f"[UnifiedBookDownloader] No Libgen results for: {isbn or title}")
return False, f"No Libgen results found for: {isbn or title}"
logger.info(f"[UnifiedBookDownloader] Found {len(results)} Libgen results")
# Determine output filename (use first result for naming)
first_result = results[0]
filename = f"{first_result.get('title', 'book')}"
filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
# Try each result's mirror until one succeeds
for idx, result in enumerate(results, 1):
mirror_url = result.get('mirror_url', '')
if not mirror_url:
logger.debug(f"[UnifiedBookDownloader] Result {idx}: No mirror URL")
continue
# Use extension from this result if available
extension = result.get('extension', 'pdf')
if extension and not extension.startswith('.'):
extension = f".{extension}"
elif not extension:
extension = '.pdf'
output_path = Path(output_dir) / (filename + extension)
logger.info(f"[UnifiedBookDownloader] Trying mirror {idx}/{len(results)}: {mirror_url}")
download_func = self.download_from_mirror
if download_func is None:
return False, "Download function not available"
download_callable = cast(Callable[[str, str], Tuple[bool, Optional[Path]]], download_func)
def download_wrapper():
return download_callable(mirror_url, str(output_path))
# Download (in thread)
try:
success, downloaded_path = await loop.run_in_executor(None, download_wrapper)
if success:
dest_path = Path(downloaded_path) if downloaded_path else output_path
# Validate downloaded file is not HTML (common Libgen issue)
if dest_path.exists():
try:
with open(dest_path, 'rb') as f:
file_start = f.read(1024).decode('utf-8', errors='ignore').lower()
if '<!doctype' in file_start or '<html' in file_start:
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} returned HTML instead of file, trying next mirror...")
dest_path.unlink() # Delete the HTML file
continue
except Exception as e:
logger.debug(f"[UnifiedBookDownloader] Could not validate file content: {e}")
logger.info(f"[UnifiedBookDownloader] Successfully downloaded from mirror {idx} to: {dest_path}")
return True, str(dest_path)
else:
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} download failed, trying next...")
except Exception as e:
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} error: {e}, trying next...")
continue
return False, f"All {len(results)} mirrors failed"
except Exception as e:
logger.error(f"[UnifiedBookDownloader] Libgen download error: {e}")
return False, f"Libgen download failed: {str(e)}"
async def download_libgen_selection(
self,
selected: Dict[str, Any],
remaining: Optional[List[Dict[str, Any]]] = None,
output_dir: Optional[str] = None,
) -> Tuple[bool, str]:
"""Download a specific Libgen result with optional fallbacks."""
if not isinstance(selected, dict):
return False, "Selected result must be a dictionary"
ordered_results: List[Dict[str, Any]] = [selected]
if remaining:
for item in remaining:
if isinstance(item, dict) and item is not selected:
ordered_results.append(item)
method: Dict[str, Any] = {
'type': 'libgen',
'isbn': selected.get('isbn', '') or '',
'title': selected.get('title', '') or '',
'author': selected.get('author', '') or '',
'results': ordered_results,
}
return await self.download_book(method, output_dir)
def download_libgen_selection_sync(
self,
selected: Dict[str, Any],
remaining: Optional[List[Dict[str, Any]]] = None,
output_dir: Optional[str] = None,
) -> Tuple[bool, str]:
"""Synchronous helper for downloading a Libgen selection."""
async def _run() -> Tuple[bool, str]:
return await self.download_libgen_selection(selected, remaining, output_dir)
loop = asyncio.new_event_loop()
try:
asyncio.set_event_loop(loop)
return loop.run_until_complete(_run())
finally:
loop.close()
asyncio.set_event_loop(None)
def _download_file(self, url: str, output_path: str) -> bool:
"""Download a file from URL."""
try:
response = requests.get(url, stream=True, timeout=30)
response.raise_for_status()
with open(output_path, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
return True
except Exception as e:
logger.error(f"[UnifiedBookDownloader] File download error: {e}")
return False
def _archive_borrow_and_download(self, email: str, password: str, book_id: str, output_dir: str) -> Tuple[bool, str]:
"""Borrow a book from Archive.org and download pages as PDF.
This follows the exact process from archive_client.py:
1. Login with credentials
2. Call loan() to create 14-day borrow
3. Get book info (extract page url)
4. Download all pages as images
5. Merge images into searchable PDF
Returns tuple of (success: bool, filepath/message: str)
"""
try:
from API.archive_client import login, loan, get_book_infos, download
import tempfile
import shutil
logger.info(f"[UnifiedBookDownloader] Logging into Archive.org as {email}")
session = login(email, password)
logger.info(f"[UnifiedBookDownloader] Attempting to borrow book: {book_id}")
# Call loan to create the 14-day borrow
session = loan(session, book_id, verbose=True)
# If we get here, borrowing succeeded
logger.info(f"[UnifiedBookDownloader] Successfully borrowed book: {book_id}")
# Now get the book info (page url and metadata)
logger.info(f"[UnifiedBookDownloader] Extracting book page information...")
# Try both URL formats: with /borrow and without
book_url = [
f"https://archive.org/borrow/{book_id}", # Try borrow page first (for borrowed books)
f"https://archive.org/details/{book_id}" # Fallback to details page
]
title = None
links = None
metadata = None
last_error = None
for book_url in book_url:
try:
logger.debug(f"[UnifiedBookDownloader] Trying to get book info from: {book_url}")
response = session.get(book_url, timeout=10)
# Log response status
if response.status_code != 200:
logger.debug(f"[UnifiedBookDownloader] URL returned {response.status_code}: {book_url}")
# Continue to try next URL
continue
# Try to parse the response
title, links, metadata = get_book_infos(session, book_url)
logger.info(f"[UnifiedBookDownloader] Successfully got info from: {book_url}")
logger.info(f"[UnifiedBookDownloader] Found {len(links)} pages to download")
break
except Exception as e:
logger.debug(f"[UnifiedBookDownloader] Failed with {book_url}: {e}")
last_error = e
continue
if links is None:
logger.error(f"[UnifiedBookDownloader] Failed to get book info from all url: {last_error}")
# Borrow extraction failed - return False
return False, "Could not extract borrowed book pages"
# Create temporary directory for images
temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=output_dir)
logger.info(f"[UnifiedBookDownloader] Downloading {len(links)} pages to temporary directory...")
try:
# Download all pages (uses thread pool)
images = download(
session=session,
n_threads=10,
directory=temp_dir,
links=links,
scale=3, # Default resolution
book_id=book_id
)
logger.info(f"[UnifiedBookDownloader] Downloaded {len(images)} pages")
# Try to merge pages into PDF
try:
import img2pdf
logger.info(f"[UnifiedBookDownloader] Merging pages into PDF...")
# Prepare PDF metadata
pdfmeta = {}
if metadata:
if "title" in metadata:
pdfmeta["title"] = metadata["title"]
if "creator" in metadata:
pdfmeta["author"] = metadata["creator"]
pdfmeta["keywords"] = [f"https://archive.org/details/{book_id}"]
pdfmeta["creationdate"] = None # Avoid timezone issues
# Convert images to PDF
pdf_content = img2pdf.convert(images, **pdfmeta) if images else None
if not pdf_content:
logger.error(f"[UnifiedBookDownloader] PDF conversion failed")
return False, "Failed to convert pages to PDF"
# Save the PDF
pdf_filename = f"{title}.pdf" if title else "book.pdf"
pdf_path = Path(output_dir) / pdf_filename
# Handle duplicate filenames
i = 1
while pdf_path.exists():
pdf_path = Path(output_dir) / f"{title or 'book'}({i}).pdf"
i += 1
with open(pdf_path, 'wb') as f:
f.write(pdf_content)
logger.info(f"[UnifiedBookDownloader] Successfully created PDF: {pdf_path}")
return True, str(pdf_path)
except ImportError:
logger.warning(f"[UnifiedBookDownloader] img2pdf not available, saving as JPG collection instead")
# Create JPG collection directory
if not title:
title = f"book_{book_id}"
jpg_dir = Path(output_dir) / title
i = 1
while jpg_dir.exists():
jpg_dir = Path(output_dir) / f"{title}({i})"
i += 1
# Move temporary directory to final location
shutil.move(temp_dir, str(jpg_dir))
temp_dir = None # Mark as already moved
logger.info(f"[UnifiedBookDownloader] Saved as JPG collection: {jpg_dir}")
return True, str(jpg_dir)
finally:
# Clean up temporary directory if it still exists
if temp_dir and Path(temp_dir).exists():
shutil.rmtree(temp_dir)
except SystemExit:
# loan() function calls sys.exit on failure - catch it
logger.error(f"[UnifiedBookDownloader] Borrow process exited (book may not be borrowable)")
return False, "Book could not be borrowed (may not be available for borrowing)"
except Exception as e:
logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}")
return False, f"Borrow failed: {str(e)}"
def close(self) -> None:
"""Close the session."""
self.session.close()

View File

@@ -6,7 +6,7 @@ import subprocess
import sys
from typing import Any, Dict, List, Optional
from Provider._base import SearchProvider, SearchResult
from ProviderCore.base import SearchProvider, SearchResult
from SYS.logger import log

View File

@@ -4,7 +4,7 @@ import os
import sys
from typing import Any
from Provider._base import FileProvider
from ProviderCore.base import FileProvider
from SYS.logger import log

5
ProviderCore/__init__.py Normal file
View File

@@ -0,0 +1,5 @@
"""Provider core modules.
This package contains the provider framework (base types, registry, and shared helpers).
Concrete provider implementations live in the `Provider/` package.
"""

42
ProviderCore/download.py Normal file
View File

@@ -0,0 +1,42 @@
from __future__ import annotations
from pathlib import Path
from typing import Optional
import requests
def sanitize_filename(name: str, *, max_len: int = 150) -> str:
text = str(name or "").strip()
if not text:
return "download"
forbidden = set('<>:"/\\|?*')
cleaned = "".join("_" if c in forbidden else c for c in text)
cleaned = " ".join(cleaned.split()).strip().strip(".")
if not cleaned:
cleaned = "download"
return cleaned[:max_len]
def download_file(url: str, output_path: Path, *, session: Optional[requests.Session] = None, timeout_s: float = 30.0) -> bool:
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
s = session or requests.Session()
try:
with s.get(url, stream=True, timeout=timeout_s) as resp:
resp.raise_for_status()
with open(output_path, "wb") as f:
for chunk in resp.iter_content(chunk_size=1024 * 256):
if chunk:
f.write(chunk)
return output_path.exists() and output_path.stat().st_size > 0
except Exception:
try:
if output_path.exists():
output_path.unlink()
except Exception:
pass
return False

View File

@@ -11,10 +11,11 @@ import sys
from SYS.logger import log
from Provider._base import FileProvider, SearchProvider, SearchResult
from ProviderCore.base import FileProvider, SearchProvider, SearchResult
from Provider.bandcamp import Bandcamp
from Provider.libgen import Libgen
from Provider.matrix import Matrix
from Provider.openlibrary import OpenLibrary
from Provider.soulseek import Soulseek, download_soulseek_file
from Provider.youtube import YouTube
from Provider.zeroxzero import ZeroXZero
@@ -22,6 +23,7 @@ from Provider.zeroxzero import ZeroXZero
_SEARCH_PROVIDERS: Dict[str, Type[SearchProvider]] = {
"libgen": Libgen,
"openlibrary": OpenLibrary,
"soulseek": Soulseek,
"bandcamp": Bandcamp,
"youtube": YouTube,

View File

@@ -943,6 +943,79 @@ class Folder(Store):
debug(f"delete_url failed for local file: {exc}")
return False
def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
"""Get notes for a local file by hash."""
from API.folder import API_folder_store
try:
if not self._location:
return {}
file_hash = str(file_identifier or "").strip().lower()
if not _normalize_hash(file_hash):
return {}
with API_folder_store(Path(self._location)) as db:
getter = getattr(db, "get_notes", None)
if callable(getter):
notes = getter(file_hash)
return notes if isinstance(notes, dict) else {}
# Fallback: default-only
note = db.get_note(file_hash)
return {"default": str(note or "")} if note else {}
except Exception as exc:
debug(f"get_note failed for local file: {exc}")
return {}
def set_note(self, file_identifier: str, name: str, text: str, **kwargs: Any) -> bool:
"""Set a named note for a local file by hash."""
from API.folder import API_folder_store
try:
if not self._location:
return False
file_hash = str(file_identifier or "").strip().lower()
if not _normalize_hash(file_hash):
return False
file_path = self.get_file(file_hash, **kwargs)
if not file_path or not isinstance(file_path, Path) or not file_path.exists():
return False
with API_folder_store(Path(self._location)) as db:
setter = getattr(db, "set_note", None)
if callable(setter):
setter(file_path, str(name), str(text))
return True
db.save_note(file_path, str(text))
return True
except Exception as exc:
debug(f"set_note failed for local file: {exc}")
return False
def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
"""Delete a named note for a local file by hash."""
from API.folder import API_folder_store
try:
if not self._location:
return False
file_hash = str(file_identifier or "").strip().lower()
if not _normalize_hash(file_hash):
return False
with API_folder_store(Path(self._location)) as db:
deleter = getattr(db, "delete_note", None)
if callable(deleter):
deleter(file_hash, str(name))
return True
# Default-only fallback
if str(name).strip().lower() == "default":
deleter2 = getattr(db, "save_note", None)
if callable(deleter2):
file_path = self.get_file(file_hash, **kwargs)
if file_path and isinstance(file_path, Path) and file_path.exists():
deleter2(file_path, "")
return True
return False
except Exception as exc:
debug(f"delete_note failed for local file: {exc}")
return False
def delete_file(self, file_identifier: str, **kwargs: Any) -> bool:
"""Delete a file from the folder store.

View File

@@ -437,7 +437,10 @@ class HydrusNetwork(Store):
try:
from API import HydrusNetwork as hydrus_wrapper
file_hash = str(file_identifier)
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
debug(f"get_tags: invalid file hash '{file_identifier}'")
return [], "unknown"
# Get Hydrus client and service info
client = self._client
@@ -483,12 +486,17 @@ class HydrusNetwork(Store):
if client is None:
debug("add_tag: Hydrus client unavailable")
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
debug(f"add_tag: invalid file hash '{file_identifier}'")
return False
service_name = kwargs.get("service_name") or "my tags"
# Ensure tags is a list
tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
if not tag_list:
return False
client.add_tag(file_identifier, tag_list, service_name)
client.add_tag(file_hash, tag_list, service_name)
return True
except Exception as exc:
debug(f"Hydrus add_tag failed: {exc}")
@@ -502,11 +510,16 @@ class HydrusNetwork(Store):
if client is None:
debug("delete_tag: Hydrus client unavailable")
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
debug(f"delete_tag: invalid file hash '{file_identifier}'")
return False
service_name = kwargs.get("service_name") or "my tags"
tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
if not tag_list:
return False
client.delete_tag(file_identifier, tag_list, service_name)
client.delete_tag(file_hash, tag_list, service_name)
return True
except Exception as exc:
debug(f"Hydrus delete_tag failed: {exc}")
@@ -520,7 +533,12 @@ class HydrusNetwork(Store):
if client is None:
debug("get_url: Hydrus client unavailable")
return []
payload = client.fetch_file_metadata(hashes=[str(file_identifier)], include_file_url=True)
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
return []
payload = client.fetch_file_metadata(hashes=[file_hash], include_file_url=True)
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
return []
@@ -561,6 +579,80 @@ class HydrusNetwork(Store):
debug(f"Hydrus delete_url failed: {exc}")
return False
def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
"""Get notes for a Hydrus file (default note service only)."""
try:
client = self._client
if client is None:
debug("get_note: Hydrus client unavailable")
return {}
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
return {}
payload = client.fetch_file_metadata(hashes=[file_hash], include_notes=True)
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
return {}
meta = items[0] if isinstance(items[0], dict) else None
if not isinstance(meta, dict):
return {}
notes_payload = meta.get("notes")
if isinstance(notes_payload, dict):
return {str(k): str(v or "") for k, v in notes_payload.items() if str(k).strip()}
return {}
except Exception as exc:
debug(f"Hydrus get_note failed: {exc}")
return {}
def set_note(self, file_identifier: str, name: str, text: str, **kwargs: Any) -> bool:
"""Set a named note for a Hydrus file (default note service only)."""
try:
client = self._client
if client is None:
debug("set_note: Hydrus client unavailable")
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
return False
note_name = str(name or "").strip()
if not note_name:
return False
note_text = str(text or "")
client.set_notes(file_hash, {note_name: note_text})
return True
except Exception as exc:
debug(f"Hydrus set_note failed: {exc}")
return False
def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
"""Delete a named note for a Hydrus file (default note service only)."""
try:
client = self._client
if client is None:
debug("delete_note: Hydrus client unavailable")
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
return False
note_name = str(name or "").strip()
if not note_name:
return False
client.delete_notes(file_hash, [note_name])
return True
except Exception as exc:
debug(f"Hydrus delete_note failed: {exc}")
return False
@staticmethod
def _extract_tags_from_hydrus_meta(
meta: Dict[str, Any],

View File

@@ -53,3 +53,21 @@ class Store(ABC):
@abstractmethod
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
raise NotImplementedError
@abstractmethod
def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
"""Get notes for a file.
Returns a mapping of note name/key -> note text.
"""
raise NotImplementedError
@abstractmethod
def set_note(self, file_identifier: str, name: str, text: str, **kwargs: Any) -> bool:
"""Add or replace a named note for a file."""
raise NotImplementedError
@abstractmethod
def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
"""Delete a named note for a file."""
raise NotImplementedError

View File

@@ -24,9 +24,9 @@ import json
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
# Import cmdlets system to call get-tag
# Import cmdlet system to call get-tag
try:
from cmdlets import get as get_cmdlet
from cmdlet import get as get_cmdlet
except ImportError:
get_cmdlet = None
@@ -353,10 +353,10 @@ class DownloadModal(ModalScreen):
# Import cmdlet system
if not get_cmdlet:
logger.error("cmdlets module not available")
logger.error("cmdlet module not available")
self.app.call_from_thread(
self.app.notify,
"Cmdlets system unavailable",
"cmdlet system unavailable",
title="Error",
severity="error"
)
@@ -1323,10 +1323,10 @@ class DownloadModal(ModalScreen):
# Call get-tag cmdlet to scrape URL
if not get_cmdlet:
logger.error("cmdlets module not available")
logger.error("cmdlet module not available")
self.app.call_from_thread(
self.app.notify,
"cmdlets module not available",
"cmdlet module not available",
title="Error",
severity="error"
)
@@ -1563,13 +1563,13 @@ class DownloadModal(ModalScreen):
"""
# Import cmdlet system
if not get_cmdlet:
error_msg = "cmdlets module not available"
error_msg = "cmdlet module not available"
logger.error(error_msg)
if worker:
worker.append_stdout(f"❌ ERROR: {error_msg}\n")
self.app.call_from_thread(
self.app.notify,
"Cmdlets system unavailable",
"cmdlet system unavailable",
title="Error",
severity="error"
)

View File

@@ -14,9 +14,9 @@ import asyncio
# Add parent directory to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))
from config import load_config
from config import load_config, resolve_output_dir
from result_table import ResultTable
from Provider.registry import get_search_provider
from ProviderCore.registry import get_search_provider
logger = logging.getLogger(__name__)
@@ -236,7 +236,7 @@ class SearchModal(ModalScreen):
selected_row = self.results_table.cursor_row
if 0 <= selected_row < len(self.current_results):
result = self.current_results[selected_row]
if result.get("source") == "openlibrary":
if getattr(result, "table", "") == "openlibrary":
asyncio.create_task(self._download_book(result))
else:
logger.warning("[search-modal] Download only supported for OpenLibrary results")
@@ -330,48 +330,28 @@ class SearchModal(ModalScreen):
logger.info(f"[search-modal] Populated tags textarea from result")
async def _download_book(self, result: Any) -> None:
"""Download a book from OpenLibrary using unified downloader."""
"""Download a book from OpenLibrary using the provider."""
if getattr(result, "table", "") != "openlibrary":
logger.warning("[search-modal] Download only supported for OpenLibrary results")
return
try:
from Provider.unified_book_downloader import UnifiedBookDownloader
from config import load_config
# Convert SearchResult to dict if needed
if hasattr(result, 'to_dict'):
result_dict = result.to_dict()
# Ensure raw_data is populated for downloader
if 'raw_data' not in result_dict and result.full_metadata:
result_dict['raw_data'] = result.full_metadata
else:
result_dict = result
logger.info(f"[search-modal] Starting download for: {result_dict.get('title')}")
config = load_config()
downloader = UnifiedBookDownloader(config=config)
output_dir = resolve_output_dir(config)
# Get download options for this book
options = downloader.get_download_options(result_dict)
if not options['methods']:
logger.warning(f"[search-modal] No download methods available for: {result_dict.get('title')}")
# Could show a modal dialog here
provider = get_search_provider("openlibrary", config=config)
if not provider:
logger.error("[search-modal] Provider not available: openlibrary")
return
# For now, use the first available method (we could show a dialog to choose)
method = options['methods'][0]
logger.info(f"[search-modal] Using download method: {method.get('label')}")
title = getattr(result, "title", "")
logger.info(f"[search-modal] Starting download for: {title}")
# Perform the download
success, message = await downloader.download_book(method)
if success:
logger.info(f"[search-modal] Download successful: {message}")
# Could show success dialog
downloaded = await asyncio.to_thread(provider.download, result, output_dir)
if downloaded:
logger.info(f"[search-modal] Download successful: {downloaded}")
else:
logger.warning(f"[search-modal] Download failed: {message}")
# Could show error dialog
downloader.close()
logger.warning(f"[search-modal] Download failed for: {title}")
except Exception as e:
logger.error(f"[search-modal] Download error: {e}", exc_info=True)

View File

@@ -23,7 +23,7 @@ for path in (ROOT_DIR, BASE_DIR):
sys.path.insert(0, str_path)
import pipeline as ctx
from cmdlets import REGISTRY
from cmdlet import REGISTRY
from config import get_local_storage_path, load_config
from SYS.worker_manager import WorkerManager

166
cli_syntax.py Normal file
View File

@@ -0,0 +1,166 @@
from __future__ import annotations
from dataclasses import dataclass
from typing import Any, Dict, Optional
import re
@dataclass(frozen=True)
class SyntaxErrorDetail:
message: str
expected: Optional[str] = None
def validate_pipeline_text(text: str) -> Optional[SyntaxErrorDetail]:
"""Validate raw CLI input before tokenization/execution.
This is intentionally lightweight and focuses on user-facing syntax issues:
- Unbalanced single/double quotes
- Dangling or empty pipeline stages (|)
Returns:
None if valid, otherwise a SyntaxErrorDetail describing the issue.
"""
if text is None:
return SyntaxErrorDetail("Empty command")
raw = text.strip()
if not raw:
return SyntaxErrorDetail("Empty command")
in_single = False
in_double = False
escaped = False
last_pipe_outside_quotes: Optional[int] = None
for idx, ch in enumerate(raw):
if escaped:
escaped = False
continue
if ch == "\\" and (in_single or in_double):
escaped = True
continue
if ch == '"' and not in_single:
in_double = not in_double
continue
if ch == "'" and not in_double:
in_single = not in_single
continue
if ch == "|" and not in_single and not in_double:
# Record pipe locations to catch empty stages/dangling pipe.
if last_pipe_outside_quotes is not None and last_pipe_outside_quotes == idx - 1:
return SyntaxErrorDetail("Syntax error: empty pipeline stage (found '||').")
last_pipe_outside_quotes = idx
if in_double:
return SyntaxErrorDetail('Syntax error: missing closing ' + '"' + '.', expected='"')
if in_single:
return SyntaxErrorDetail("Syntax error: missing closing '.", expected="'")
# Dangling pipe at end / pipe as first non-space character
if raw.startswith("|"):
return SyntaxErrorDetail("Syntax error: pipeline cannot start with '|'.")
if raw.endswith("|"):
return SyntaxErrorDetail("Syntax error: pipeline cannot end with '|'.")
# Empty stage like "cmd1 | | cmd2" (spaces between pipes)
if "|" in raw:
# Simple pass: look for pipes that have only whitespace between them.
# We only check outside quotes by re-scanning and counting non-space chars between pipes.
in_single = False
in_double = False
escaped = False
seen_nonspace_since_pipe = True # start true to allow leading command
for ch in raw:
if escaped:
escaped = False
continue
if ch == "\\" and (in_single or in_double):
escaped = True
continue
if ch == '"' and not in_single:
in_double = not in_double
continue
if ch == "'" and not in_double:
in_single = not in_single
continue
if ch == "|" and not in_single and not in_double:
if not seen_nonspace_since_pipe:
return SyntaxErrorDetail("Syntax error: empty pipeline stage (use a command between '|').")
seen_nonspace_since_pipe = False
continue
if not in_single and not in_double and not ch.isspace():
seen_nonspace_since_pipe = True
return None
def parse_query(query: str) -> Dict[str, Any]:
"""Parse a query string into field:value pairs and free text.
Supports syntax like:
- isbn:0557677203
- author:"Albert Pike"
- title:"Morals and Dogma" year:2010
- Mixed with free text: Morals isbn:0557677203
Returns:
Dict with keys:
- fields: Dict[str, str]
- text: str
- raw: str
"""
result: Dict[str, Any] = {
"fields": {},
"text": "",
"raw": query,
}
if not query or not query.strip():
return result
raw = query.strip()
remaining_parts: list[str] = []
# Match field:value where value is either a quoted string or a non-space token.
pattern = r'(\w+):(?:"([^"]*)"|(\S+))'
pos = 0
for match in re.finditer(pattern, raw):
if match.start() > pos:
before_text = raw[pos : match.start()].strip()
if before_text:
remaining_parts.append(before_text)
field_name = (match.group(1) or "").lower()
field_value = match.group(2) if match.group(2) is not None else match.group(3)
if field_name:
result["fields"][field_name] = field_value
pos = match.end()
if pos < len(raw):
remaining_text = raw[pos:].strip()
if remaining_text:
remaining_parts.append(remaining_text)
result["text"] = " ".join(remaining_parts)
return result
def get_field(parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None) -> Optional[str]:
"""Get a field value from a parsed query."""
return parsed_query.get("fields", {}).get((field_name or "").lower(), default)
def get_free_text(parsed_query: Dict[str, Any]) -> str:
"""Get the free-text portion of a parsed query."""
return str(parsed_query.get("text", "") or "")

View File

@@ -10,6 +10,24 @@ Cmdlet = Callable[[Any, Sequence[str], Dict[str, Any]], int]
REGISTRY: Dict[str, Cmdlet] = {}
def _normalize_cmd_name(name: str) -> str:
return str(name or "").replace('_', '-').lower().strip()
def register_callable(names: Iterable[str], fn: Cmdlet) -> Cmdlet:
"""Register a callable under one or more command names.
This is the single registration mechanism used by both:
- legacy function cmdlet (decorator form)
- class-based cmdlet (Cmdlet.register())
"""
for name in names:
key = _normalize_cmd_name(name)
if key:
REGISTRY[key] = fn
return fn
def register(names: Iterable[str]):
"""Decorator to register a function under one or more command names.
@@ -18,18 +36,16 @@ def register(names: Iterable[str]):
def _run(result, args, config) -> int: ...
"""
def _wrap(fn: Cmdlet) -> Cmdlet:
for name in names:
REGISTRY[name.replace('_', '-').lower()] = fn
return fn
return register_callable(names, fn)
return _wrap
def get(cmd_name: str) -> Cmdlet | None:
return REGISTRY.get(cmd_name.replace('_', '-').lower())
return REGISTRY.get(_normalize_cmd_name(cmd_name))
# Dynamically import all cmdlet modules in this directory (ignore files starting with _ and __init__.py)
# Cmdlets self-register when instantiated via their __init__ method
# cmdlet self-register when instantiated via their __init__ method
import os
cmdlet_dir = os.path.dirname(__file__)
for filename in os.listdir(cmdlet_dir):
@@ -43,7 +59,7 @@ for filename in os.listdir(cmdlet_dir):
mod_name = filename[:-3]
# Enforce Powershell-style two-word cmdlet naming (e.g., add_file, get_file)
# Skip native/utility scripts that are not cmdlets (e.g., adjective, worker, matrix, pipe)
# Skip native/utility scripts that are not cmdlet (e.g., adjective, worker, matrix, pipe)
if "_" not in mod_name:
continue
@@ -54,15 +70,15 @@ for filename in os.listdir(cmdlet_dir):
print(f"Error importing cmdlet '{mod_name}': {e}", file=sys.stderr)
continue
# Import and register native commands that are not considered cmdlets
# Import and register native commands that are not considered cmdlet
try:
from cmdnats import register_native_commands as _register_native_commands
from cmdnat import register_native_commands as _register_native_commands
_register_native_commands(REGISTRY)
except Exception:
# Native commands are optional; ignore if unavailable
pass
# Import root-level modules that also register cmdlets
# Import root-level modules that also register cmdlet
for _root_mod in ("select_cmdlet",):
try:
_import_module(_root_mod)
@@ -70,7 +86,7 @@ for _root_mod in ("select_cmdlet",):
# Allow missing optional modules
continue
# Also import helper modules that register cmdlets
# Also import helper modules that register cmdlet
try:
import API.alldebrid as _alldebrid
except Exception:

View File

@@ -10,7 +10,7 @@ from collections.abc import Iterable as IterableABC
from SYS.logger import log, debug
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set
from dataclasses import dataclass, field
import models
@@ -94,15 +94,15 @@ class CmdletArg:
# ============================================================================
# SHARED ARGUMENTS - Reusable argument definitions across cmdlets
# SHARED ARGUMENTS - Reusable argument definitions across cmdlet
# ============================================================================
class SharedArgs:
"""Registry of shared CmdletArg definitions used across multiple cmdlets.
"""Registry of shared CmdletArg definitions used across multiple cmdlet.
This class provides a centralized location for common arguments so they're
defined once and used consistently everywhere. Reduces duplication and ensures
all cmdlets handle the same arguments identically.
all cmdlet handle the same arguments identically.
Example:
CMDLET = Cmdlet(
@@ -367,8 +367,8 @@ class Cmdlet:
"""List of arguments accepted by this cmdlet"""
detail: List[str] = field(default_factory=list)
"""Detailed explanation lines (for help text)"""
exec: Optional[Any] = field(default=None)
"""The execution function: func(result, args, config) -> int"""
# Execution function: func(result, args, config) -> int
exec: Optional[Callable[[Any, Sequence[str], Dict[str, Any]], int]] = field(default=None)
@@ -399,7 +399,7 @@ class Cmdlet:
if not callable(self.exec):
return self
try:
from . import register as _register # Local import to avoid circular import cost
from . import register_callable as _register_callable # Local import to avoid circular import cost
except Exception:
return self
@@ -407,7 +407,7 @@ class Cmdlet:
if not names:
return self
_register(names)(self.exec)
_register_callable(names, self.exec)
return self
def get_flags(self, arg_name: str) -> set[str]:
@@ -599,8 +599,14 @@ def normalize_hash(hash_hex: Optional[str]) -> Optional[str]:
"""
if not isinstance(hash_hex, str):
return None
text = hash_hex.strip()
return text.lower() if text else None
text = hash_hex.strip().lower()
if not text:
return None
if len(text) != 64:
return None
if not all(ch in "0123456789abcdef" for ch in text):
return None
return text
def get_hash_for_operation(override_hash: Optional[str], result: Any, field_name: str = "hash") -> Optional[str]:
@@ -669,7 +675,7 @@ def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any:
Handles both dict.get(field) and getattr(obj, field) access patterns.
Also handles lists by accessing the first element.
For PipeObjects, checks the extra field as well.
Used throughout cmdlets to uniformly access fields from mixed types.
Used throughout cmdlet to uniformly access fields from mixed types.
Args:
obj: Dict, object, or list to extract from
@@ -705,7 +711,7 @@ def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any:
def should_show_help(args: Sequence[str]) -> bool:
"""Check if help flag was passed in arguments.
Consolidates repeated pattern of checking for help flags across cmdlets.
Consolidates repeated pattern of checking for help flags across cmdlet.
Args:
args: Command arguments to check
@@ -1077,7 +1083,7 @@ def apply_preferred_title(tags: List[str], preferred: Optional[str]) -> List[str
# ============================================================================
# PIPEOBJECT UTILITIES (for chainable cmdlets and multi-action pipelines)
# PIPEOBJECT UTILITIES (for chainable cmdlet and multi-action pipelines)
# ============================================================================
def create_pipe_object_result(
@@ -1095,7 +1101,7 @@ def create_pipe_object_result(
"""Create a PipeObject-compatible result dict for pipeline chaining.
This is a helper to emit results in the standard format that downstream
cmdlets can process (filter, tag, cleanup, etc.).
cmdlet can process (filter, tag, cleanup, etc.).
Args:
source: Source system (e.g., 'local', 'hydrus', 'download')
@@ -1350,7 +1356,7 @@ def collapse_namespace_tags(tags: Optional[Iterable[Any]], namespace: str, prefe
def collapse_namespace_tag(tags: Optional[Iterable[Any]], namespace: str, prefer: str = "last") -> list[str]:
"""Singular alias for collapse_namespace_tags.
Some cmdlets prefer the singular name; keep behavior centralized.
Some cmdlet prefer the singular name; keep behavior centralized.
"""
return collapse_namespace_tags(tags, namespace, prefer=prefer)
@@ -1643,7 +1649,7 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
def register_url_with_local_library(pipe_obj: models.PipeObject, config: Dict[str, Any]) -> bool:
"""Register url with a file in the local library database.
This is called automatically by download cmdlets to ensure url are persisted
This is called automatically by download cmdlet to ensure url are persisted
without requiring a separate add-url step in the pipeline.
Args:

View File

@@ -350,7 +350,7 @@ class Add_File(Cmdlet):
"""Delegate URL handling to download-media cmdlet."""
log(f"Target is a URL, delegating to download-media: {url_str}", file=sys.stderr)
# Reuse the globally-registered cmdlet instance to avoid duplicative registration
from cmdlets.download_media import CMDLET as dl_cmdlet
from cmdlet.download_media import CMDLET as dl_cmdlet
dl_args = list(args) if args else []
# Add the URL to the argument list for download-media
@@ -615,7 +615,7 @@ class Add_File(Cmdlet):
"""
try:
import asyncio
from Provider.registry import download_soulseek_file
from ProviderCore.registry import download_soulseek_file
from pathlib import Path
# Extract metadata from result
@@ -684,7 +684,7 @@ class Add_File(Cmdlet):
delete_after: bool,
) -> int:
"""Handle uploading to a file provider (e.g. 0x0)."""
from Provider.registry import get_file_provider
from ProviderCore.registry import get_file_provider
log(f"Uploading via {provider_name}: {media_path.name}", file=sys.stderr)

148
cmdlet/add_note.py Normal file
View File

@@ -0,0 +1,148 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, Optional, Sequence
import sys
from SYS.logger import log
import pipeline as ctx
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
normalize_hash,
parse_cmdlet_args,
normalize_result_input,
should_show_help,
)
from Store import Store
from SYS.utils import sha256_file
class Add_Note(Cmdlet):
def __init__(self) -> None:
super().__init__(
name="add-note",
summary="Add or set a named note on a file in a store.",
usage="add-note -store <store> [-hash <sha256>] <name> <text...>",
alias=["set-note", "add_note"],
arg=[
SharedArgs.STORE,
SharedArgs.HASH,
CmdletArg("name", type="string", required=True, description="The note name/key to set (e.g. 'comment', 'lyric')."),
CmdletArg("text", type="string", required=True, description="Note text/content to store.", variadic=True),
],
detail=[
"- Notes are stored via the selected store backend.",
"- For lyrics: store LRC text in a note named 'lyric'.",
],
exec=self.run,
)
# Populate dynamic store choices for autocomplete
try:
SharedArgs.STORE.choices = SharedArgs.get_store_choices(None)
except Exception:
pass
self.register()
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
if resolved:
return resolved
if raw_path:
try:
p = Path(str(raw_path))
stem = p.stem
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
return stem.lower()
if p.exists() and p.is_file():
return sha256_file(p)
except Exception:
return None
return None
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
parsed = parse_cmdlet_args(args, self)
store_override = parsed.get("store")
hash_override = parsed.get("hash")
note_name = str(parsed.get("name") or "").strip()
text_parts = parsed.get("text")
if not note_name:
log("[add_note] Error: Requires <name>", file=sys.stderr)
return 1
if isinstance(text_parts, list):
note_text = " ".join([str(p) for p in text_parts]).strip()
else:
note_text = str(text_parts or "").strip()
if not note_text:
log("[add_note] Error: Empty note text", file=sys.stderr)
return 1
results = normalize_result_input(result)
if not results:
if store_override and normalize_hash(hash_override):
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
else:
log("[add_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
return 1
store_registry = Store(config)
updated = 0
for res in results:
if not isinstance(res, dict):
ctx.emit(res)
continue
store_name = str(store_override or res.get("store") or "").strip()
raw_hash = res.get("hash")
raw_path = res.get("path")
if not store_name:
log("[add_note] Error: Missing -store and item has no store field", file=sys.stderr)
return 1
resolved_hash = self._resolve_hash(
raw_hash=str(raw_hash) if raw_hash else None,
raw_path=str(raw_path) if raw_path else None,
override_hash=str(hash_override) if hash_override else None,
)
if not resolved_hash:
log("[add_note] Warning: Item missing usable hash; skipping", file=sys.stderr)
ctx.emit(res)
continue
try:
backend = store_registry[store_name]
except Exception as exc:
log(f"[add_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
return 1
ok = False
try:
ok = bool(backend.set_note(resolved_hash, note_name, note_text, config=config))
except Exception as exc:
log(f"[add_note] Error: Failed to set note: {exc}", file=sys.stderr)
ok = False
if ok:
updated += 1
ctx.emit(res)
log(f"[add_note] Updated {updated} item(s)", file=sys.stderr)
return 0 if updated > 0 else 1
CMDLET = Add_Note()

View File

@@ -10,7 +10,6 @@ import sys
from SYS.logger import log
from . import register
import models
import pipeline as ctx
from API import HydrusNetwork as hydrus_wrapper
@@ -144,10 +143,18 @@ def _resolve_king_reference(king_arg: str) -> Optional[str]:
def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None:
"""If the current subject matches the target, refresh relationships via get-relationship."""
try:
from cmdlets import get_relationship as get_rel_cmd # type: ignore
from cmdlet import get as get_cmdlet # type: ignore
except Exception:
return
get_relationship = None
try:
get_relationship = get_cmdlet("get-relationship")
except Exception:
get_relationship = None
if not callable(get_relationship):
return
try:
subject = ctx.get_last_result_subject()
if subject is None:
@@ -179,12 +186,11 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat
refresh_args: list[str] = []
if target_hash:
refresh_args.extend(["-hash", target_hash])
get_rel_cmd._run(subject, refresh_args, config)
get_relationship(subject, refresh_args, config)
except Exception:
pass
@register(["add-relationship", "add-rel"]) # primary name and alias
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
"""Associate file relationships in Hydrus.
@@ -196,24 +202,21 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
"""
# Help
if should_show_help(_args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse arguments using CMDLET spec
parsed = parse_cmdlet_args(_args, CMDLET)
arg_path: Optional[Path] = None
king_arg = parsed.get("king") # New: explicit king argument
rel_type = parsed.get("type", "alt") # New: relationship type (default: alt)
king_arg = parsed.get("king")
rel_type = parsed.get("type", "alt")
if parsed:
# Get the first arg value (e.g., -path)
first_arg_name = CMDLET.get("args", [{}])[0].get("name") if CMDLET.get("args") else None
if first_arg_name and first_arg_name in parsed:
arg_value = parsed[first_arg_name]
try:
arg_path = Path(str(arg_value)).expanduser()
except Exception:
arg_path = Path(str(arg_value))
raw_path = parsed.get("path")
if raw_path:
try:
arg_path = Path(str(raw_path)).expanduser()
except Exception:
arg_path = Path(str(raw_path))
# Handle @N selection which creates a list
# Use normalize_result_input to handle both single items and lists
@@ -481,3 +484,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
return 1
# Register cmdlet (no legacy decorator)
CMDLET.exec = _run
CMDLET.alias = ["add-rel"]
CMDLET.register()

View File

@@ -66,21 +66,37 @@ def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None:
res["columns"] = updated
def _matches_target(item: Any, target_hash: Optional[str], target_path: Optional[str]) -> bool:
"""Determine whether a result item refers to the given hash/path target (canonical fields only)."""
def _matches_target(
item: Any,
target_hash: Optional[str],
target_path: Optional[str],
target_store: Optional[str] = None,
) -> bool:
"""Determine whether a result item refers to the given target.
Important: hashes can collide across backends in this app's UX (same media in
multiple stores). When target_store is provided, it must match too.
"""
def norm(val: Any) -> Optional[str]:
return str(val).lower() if val is not None else None
target_hash_l = target_hash.lower() if target_hash else None
target_path_l = target_path.lower() if target_path else None
target_store_l = target_store.lower() if target_store else None
if isinstance(item, dict):
hashes = [norm(item.get("hash"))]
paths = [norm(item.get("path"))]
stores = [norm(item.get("store"))]
else:
hashes = [norm(get_field(item, "hash"))]
paths = [norm(get_field(item, "path"))]
stores = [norm(get_field(item, "store"))]
if target_store_l:
if target_store_l not in stores:
return False
if target_hash_l and target_hash_l in hashes:
return True
@@ -118,7 +134,12 @@ def _update_item_title_fields(item: Any, new_title: str) -> None:
item["columns"] = updated_cols
def _refresh_result_table_title(new_title: str, target_hash: Optional[str], target_path: Optional[str]) -> None:
def _refresh_result_table_title(
new_title: str,
target_hash: Optional[str],
target_store: Optional[str],
target_path: Optional[str],
) -> None:
"""Refresh the cached result table with an updated title and redisplay it."""
try:
last_table = ctx.get_last_result_table()
@@ -130,7 +151,7 @@ def _refresh_result_table_title(new_title: str, target_hash: Optional[str], targ
match_found = False
for item in items:
try:
if _matches_target(item, target_hash, target_path):
if _matches_target(item, target_hash, target_path, target_store):
_update_item_title_fields(item, new_title)
match_found = True
except Exception:
@@ -154,7 +175,7 @@ def _refresh_result_table_title(new_title: str, target_hash: Optional[str], targ
def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional[str], target_path: Optional[str], config: Dict[str, Any]) -> None:
"""Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh."""
try:
from cmdlets import get_tag as get_tag_cmd # type: ignore
from cmdlet import get as get_cmdlet # type: ignore
except Exception:
return
@@ -163,16 +184,24 @@ def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional
refresh_args: List[str] = ["-hash", target_hash, "-store", store_name]
get_tag = None
try:
get_tag = get_cmdlet("get-tag")
except Exception:
get_tag = None
if not callable(get_tag):
return
try:
subject = ctx.get_last_result_subject()
if subject and _matches_target(subject, target_hash, target_path):
get_tag_cmd._run(subject, refresh_args, config)
if subject and _matches_target(subject, target_hash, target_path, store_name):
get_tag(subject, refresh_args, config)
return
except Exception:
pass
try:
get_tag_cmd._run(res, refresh_args, config)
get_tag(res, refresh_args, config)
except Exception:
pass
@@ -187,12 +216,12 @@ class Add_Tag(Cmdlet):
summary="Add tag to a file in a store.",
usage="add-tag -store <store> [-hash <sha256>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
arg=[
CmdletArg("tag", type="string", required=False, description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.", variadic=True),
SharedArgs.HASH,
SharedArgs.STORE,
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tag non-temporary files)."),
CmdletArg("tag", type="string", required=False, description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.", variadic=True),
],
detail=[
"- By default, only tag non-temporary files (from pipelines). Use --all to tag everything.",
@@ -406,15 +435,19 @@ class Add_Tag(Cmdlet):
changed = False
if removed_namespace_tag:
try:
backend.delete_tag(resolved_hash, removed_namespace_tag, config=config)
changed = True
ok_del = backend.delete_tag(resolved_hash, removed_namespace_tag, config=config)
if ok_del:
changed = True
except Exception as exc:
log(f"[add_tag] Warning: Failed deleting namespace tag: {exc}", file=sys.stderr)
if actual_tag_to_add:
try:
backend.add_tag(resolved_hash, actual_tag_to_add, config=config)
changed = True
ok_add = backend.add_tag(resolved_hash, actual_tag_to_add, config=config)
if ok_add:
changed = True
else:
log("[add_tag] Warning: Store rejected tag update", file=sys.stderr)
except Exception as exc:
log(f"[add_tag] Warning: Failed adding tag: {exc}", file=sys.stderr)
@@ -438,7 +471,7 @@ class Add_Tag(Cmdlet):
_apply_title_to_result(res, final_title)
if final_title and (not original_title or final_title.lower() != original_title.lower()):
_refresh_result_table_title(final_title, resolved_hash, raw_path)
_refresh_result_table_title(final_title, resolved_hash, str(store_name), raw_path)
if changed:
_refresh_tag_view(res, resolved_hash, str(store_name), raw_path, config)

456
cmdlet/add_tags.py Normal file
View File

@@ -0,0 +1,456 @@
from __future__ import annotations
from typing import Any, Dict, List, Sequence, Optional
from pathlib import Path
import sys
from SYS.logger import log
import models
import pipeline as ctx
from ._shared import normalize_result_input, filter_results_by_temp
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
normalize_hash,
parse_tag_arguments,
expand_tag_groups,
parse_cmdlet_args,
collapse_namespace_tags,
should_show_help,
get_field,
)
from Store import Store
from SYS.utils import sha256_file
def _extract_title_tag(tags: List[str]) -> Optional[str]:
"""Return the value of the first title: tag if present."""
for tag in tags:
if isinstance(tag, str) and tag.lower().startswith("title:"):
value = tag.split(":", 1)[1].strip()
if value:
return value
return None
def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None:
"""Update result object/dict title fields and columns in-place."""
if not title_value:
return
if isinstance(res, models.PipeObject):
res.title = title_value
# Update columns if present (Title column assumed index 0)
if hasattr(res, "columns") and isinstance(res.columns, list) and res.columns:
label, *_ = res.columns[0]
if str(label).lower() == "title":
res.columns[0] = (res.columns[0][0], title_value)
elif isinstance(res, dict):
res["title"] = title_value
cols = res.get("columns")
if isinstance(cols, list):
updated = []
changed = False
for col in cols:
if isinstance(col, tuple) and len(col) == 2:
label, val = col
if str(label).lower() == "title":
updated.append((label, title_value))
changed = True
else:
updated.append(col)
else:
updated.append(col)
if changed:
res["columns"] = updated
def _matches_target(item: Any, target_hash: Optional[str], target_path: Optional[str]) -> bool:
"""Determine whether a result item refers to the given hash/path target (canonical fields only)."""
def norm(val: Any) -> Optional[str]:
return str(val).lower() if val is not None else None
target_hash_l = target_hash.lower() if target_hash else None
target_path_l = target_path.lower() if target_path else None
if isinstance(item, dict):
hashes = [norm(item.get("hash"))]
paths = [norm(item.get("path"))]
else:
hashes = [norm(get_field(item, "hash"))]
paths = [norm(get_field(item, "path"))]
if target_hash_l and target_hash_l in hashes:
return True
if target_path_l and target_path_l in paths:
return True
return False
def _update_item_title_fields(item: Any, new_title: str) -> None:
"""Mutate an item to reflect a new title in plain fields and columns."""
if isinstance(item, models.PipeObject):
item.title = new_title
if hasattr(item, "columns") and isinstance(item.columns, list) and item.columns:
label, *_ = item.columns[0]
if str(label).lower() == "title":
item.columns[0] = (label, new_title)
elif isinstance(item, dict):
item["title"] = new_title
cols = item.get("columns")
if isinstance(cols, list):
updated_cols = []
changed = False
for col in cols:
if isinstance(col, tuple) and len(col) == 2:
label, val = col
if str(label).lower() == "title":
updated_cols.append((label, new_title))
changed = True
else:
updated_cols.append(col)
else:
updated_cols.append(col)
if changed:
item["columns"] = updated_cols
def _refresh_result_table_title(new_title: str, target_hash: Optional[str], target_path: Optional[str]) -> None:
"""Refresh the cached result table with an updated title and redisplay it."""
try:
last_table = ctx.get_last_result_table()
items = ctx.get_last_result_items()
if not last_table or not items:
return
updated_items = []
match_found = False
for item in items:
try:
if _matches_target(item, target_hash, target_path):
_update_item_title_fields(item, new_title)
match_found = True
except Exception:
pass
updated_items.append(item)
if not match_found:
return
from result_table import ResultTable # Local import to avoid circular dependency
new_table = last_table.copy_with_title(getattr(last_table, "title", ""))
for item in updated_items:
new_table.add_result(item)
# Keep the underlying history intact; update only the overlay so @.. can
# clear the overlay then continue back to prior tables (e.g., the search list).
ctx.set_last_result_table_overlay(new_table, updated_items)
except Exception:
pass
def _refresh_tags_view(res: Any, target_hash: Optional[str], store_name: Optional[str], target_path: Optional[str], config: Dict[str, Any]) -> None:
"""Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh."""
try:
from cmdlet import get_tag as get_tag_cmd # type: ignore
except Exception:
return
if not target_hash or not store_name:
return
refresh_args: List[str] = ["-hash", target_hash, "-store", store_name]
try:
subject = ctx.get_last_result_subject()
if subject and _matches_target(subject, target_hash, target_path):
get_tag_cmd._run(subject, refresh_args, config)
return
except Exception:
pass
try:
get_tag_cmd._run(res, refresh_args, config)
except Exception:
pass
class Add_Tag(Cmdlet):
"""Class-based add-tags cmdlet with Cmdlet metadata inheritance."""
def __init__(self) -> None:
super().__init__(
name="add-tags",
summary="Add tags to a file in a store.",
usage="add-tags -store <store> [-hash <sha256>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
arg=[
SharedArgs.HASH,
SharedArgs.STORE,
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."),
CmdletArg("tags", type="string", required=False, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tags from pipeline payload.", variadic=True),
],
detail=[
"- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.",
"- Requires a store backend: use -store or pipe items that include store.",
"- If -hash is not provided, uses the piped item's hash (or derives from its path when possible).",
"- Multiple tags can be comma-separated or space-separated.",
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
"- Tags can also reference lists with curly braces: add-tags {philosophy} \"other:tag\"",
"- Use -duplicate to copy EXISTING tag values to new namespaces:",
" Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
"- The source namespace must already exist in the file being tagged.",
"- Target namespaces that already have a value are skipped (not overwritten).",
"- You can also pass the target hash as a tag token: hash:<sha256>. This overrides -hash and is removed from the tag list.",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Add tags to a file with smart filtering for pipeline results."""
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
# Parse arguments
parsed = parse_cmdlet_args(args, self)
# Check for --all flag
include_temp = parsed.get("all", False)
# Normalize input to list
results = normalize_result_input(result)
# Filter by temp status (unless --all is set)
if not include_temp:
results = filter_results_by_temp(results, include_temp=False)
if not results:
log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr)
return 1
# Get tags from arguments (or fallback to pipeline payload)
raw_tags = parsed.get("tags", [])
if isinstance(raw_tags, str):
raw_tags = [raw_tags]
# Fallback: if no tags provided explicitly, try to pull from first result payload
if not raw_tags and results:
first = results[0]
payload_tags = None
# Try multiple tag lookup strategies in order
tag_lookups = [
lambda x: getattr(x, "tags", None),
lambda x: x.get("tags") if isinstance(x, dict) else None,
]
for lookup in tag_lookups:
try:
payload_tags = lookup(first)
if payload_tags:
break
except (AttributeError, TypeError, KeyError):
continue
if payload_tags:
if isinstance(payload_tags, str):
raw_tags = [payload_tags]
elif isinstance(payload_tags, list):
raw_tags = payload_tags
# Handle -list argument (convert to {list} syntax)
list_arg = parsed.get("list")
if list_arg:
for l in list_arg.split(','):
l = l.strip()
if l:
raw_tags.append(f"{{{l}}}")
# Parse and expand tags
tags_to_add = parse_tag_arguments(raw_tags)
tags_to_add = expand_tag_groups(tags_to_add)
# Allow hash override via namespaced token (e.g., "hash:abcdef...")
extracted_hash = None
filtered_tags: List[str] = []
for tag in tags_to_add:
if isinstance(tag, str) and tag.lower().startswith("hash:"):
_, _, hash_val = tag.partition(":")
if hash_val:
extracted_hash = normalize_hash(hash_val.strip())
continue
filtered_tags.append(tag)
tags_to_add = filtered_tags
if not tags_to_add:
log("No tags provided to add", file=sys.stderr)
return 1
# Get other flags (hash override can come from -hash or hash: token)
hash_override = normalize_hash(parsed.get("hash")) or extracted_hash
duplicate_arg = parsed.get("duplicate")
# Tags ARE provided - apply them to each store-backed result
total_added = 0
total_modified = 0
store_override = parsed.get("store")
for res in results:
store_name: Optional[str]
raw_hash: Optional[str]
raw_path: Optional[str]
if isinstance(res, models.PipeObject):
store_name = store_override or res.store
raw_hash = res.hash
raw_path = res.path
elif isinstance(res, dict):
store_name = store_override or res.get("store")
raw_hash = res.get("hash")
raw_path = res.get("path")
else:
ctx.emit(res)
continue
if not store_name:
log("[add_tags] Error: Missing -store and item has no store field", file=sys.stderr)
return 1
resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash)
if not resolved_hash and raw_path:
try:
p = Path(str(raw_path))
stem = p.stem
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
resolved_hash = stem.lower()
elif p.exists() and p.is_file():
resolved_hash = sha256_file(p)
except Exception:
resolved_hash = None
if not resolved_hash:
log("[add_tags] Warning: Item missing usable hash (and could not derive from path); skipping", file=sys.stderr)
ctx.emit(res)
continue
try:
backend = Store(config)[str(store_name)]
except Exception as exc:
log(f"[add_tags] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
return 1
try:
existing_tags, _src = backend.get_tag(resolved_hash, config=config)
except Exception:
existing_tags = []
existing_tags_list = [t for t in (existing_tags or []) if isinstance(t, str)]
existing_lower = {t.lower() for t in existing_tags_list}
original_title = _extract_title_tag(existing_tags_list)
# Per-item tag list (do not mutate shared list)
item_tags_to_add = list(tags_to_add)
item_tags_to_add = collapse_namespace_tags(item_tags_to_add, "title", prefer="last")
# Handle -duplicate logic (copy existing tags to new namespaces)
if duplicate_arg:
parts = str(duplicate_arg).split(':')
source_ns = ""
targets: list[str] = []
if len(parts) > 1:
source_ns = parts[0]
targets = [t.strip() for t in parts[1].split(',') if t.strip()]
else:
parts2 = str(duplicate_arg).split(',')
if len(parts2) > 1:
source_ns = parts2[0]
targets = [t.strip() for t in parts2[1:] if t.strip()]
if source_ns and targets:
source_prefix = source_ns.lower() + ":"
for t in existing_tags_list:
if not t.lower().startswith(source_prefix):
continue
value = t.split(":", 1)[1]
for target_ns in targets:
new_tag = f"{target_ns}:{value}"
if new_tag.lower() not in existing_lower:
item_tags_to_add.append(new_tag)
# Namespace replacement: delete old namespace:* when adding namespace:value
removed_namespace_tags: list[str] = []
for new_tag in item_tags_to_add:
if not isinstance(new_tag, str) or ":" not in new_tag:
continue
ns = new_tag.split(":", 1)[0].strip()
if not ns:
continue
ns_prefix = ns.lower() + ":"
for t in existing_tags_list:
if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower():
removed_namespace_tags.append(t)
removed_namespace_tags = sorted({t for t in removed_namespace_tags})
actual_tags_to_add = [t for t in item_tags_to_add if isinstance(t, str) and t.lower() not in existing_lower]
changed = False
if removed_namespace_tags:
try:
backend.delete_tag(resolved_hash, removed_namespace_tags, config=config)
changed = True
except Exception as exc:
log(f"[add_tags] Warning: Failed deleting namespace tags: {exc}", file=sys.stderr)
if actual_tags_to_add:
try:
backend.add_tag(resolved_hash, actual_tags_to_add, config=config)
changed = True
except Exception as exc:
log(f"[add_tags] Warning: Failed adding tags: {exc}", file=sys.stderr)
if changed:
total_added += len(actual_tags_to_add)
total_modified += 1
try:
refreshed_tags, _src2 = backend.get_tag(resolved_hash, config=config)
refreshed_list = [t for t in (refreshed_tags or []) if isinstance(t, str)]
except Exception:
refreshed_list = existing_tags_list
# Update the result's tags using canonical field
if isinstance(res, models.PipeObject):
res.tags = refreshed_list
elif isinstance(res, dict):
res["tags"] = refreshed_list
final_title = _extract_title_tag(refreshed_list)
_apply_title_to_result(res, final_title)
if final_title and (not original_title or final_title.lower() != original_title.lower()):
_refresh_result_table_title(final_title, resolved_hash, raw_path)
if changed:
_refresh_tags_view(res, resolved_hash, str(store_name), raw_path, config)
ctx.emit(res)
log(
f"[add_tags] Added {total_added} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)",
file=sys.stderr,
)
return 0
CMDLET = Add_Tag()

View File

@@ -4,12 +4,12 @@ from importlib import import_module
from typing import Any, Dict, List, Optional
try:
from cmdlets import REGISTRY
from cmdlet import REGISTRY
except Exception:
REGISTRY = {} # type: ignore
try:
from cmdnats import register_native_commands as _register_native_commands
from cmdnat import register_native_commands as _register_native_commands
except Exception:
_register_native_commands = None
@@ -33,11 +33,11 @@ def _normalize_mod_name(mod_name: str) -> str:
def import_cmd_module(mod_name: str):
"""Import a cmdlet/native module from cmdnats or cmdlets packages."""
"""Import a cmdlet/native module from cmdnat or cmdlet packages."""
normalized = _normalize_mod_name(mod_name)
if not normalized:
return None
for package in ("cmdnats", "cmdlets", None):
for package in ("cmdnat", "cmdlet", None):
try:
qualified = f"{package}.{normalized}" if package else normalized
return import_module(qualified)
@@ -122,7 +122,7 @@ def get_cmdlet_metadata(cmd_name: str) -> Optional[Dict[str, Any]]:
def list_cmdlet_metadata() -> Dict[str, Dict[str, Any]]:
"""Collect metadata for all registered cmdlets keyed by canonical name."""
"""Collect metadata for all registered cmdlet keyed by canonical name."""
ensure_registry_loaded()
entries: Dict[str, Dict[str, Any]] = {}
for reg_name in (REGISTRY or {}).keys():
@@ -186,27 +186,20 @@ def get_cmdlet_arg_flags(cmd_name: str) -> List[str]:
if not meta:
return []
raw = meta.get("raw")
if raw and hasattr(raw, "build_flag_registry"):
try:
registry = raw.build_flag_registry()
flags: List[str] = []
for flag_set in registry.values():
flags.extend(flag_set)
return sorted(set(flags))
except Exception:
pass
# Preserve the order that arguments are defined on the cmdlet (arg=[...]) so
# completions feel stable and predictable.
flags: List[str] = []
seen: set[str] = set()
for arg in meta.get("args", []):
name = arg.get("name")
name = str(arg.get("name") or "").strip().lstrip("-")
if not name:
continue
flags.append(f"-{name}")
flags.append(f"--{name}")
alias = arg.get("alias")
if alias:
flags.append(f"-{alias}")
for candidate in (f"-{name}", f"--{name}"):
if candidate not in seen:
flags.append(candidate)
seen.add(candidate)
return flags

View File

@@ -6,7 +6,6 @@ import sys
from SYS.logger import log
from . import register
from API import HydrusNetwork as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, should_show_help
@@ -27,11 +26,10 @@ CMDLET = Cmdlet(
)
@register(["check-file-status", "check-status", "file-status", "status"])
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse arguments
@@ -148,3 +146,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
import traceback
traceback.print_exc(file=sys.stderr)
return 1
# Register cmdlet (no legacy decorator)
CMDLET.exec = _run
CMDLET.alias = ["check-status", "file-status", "status"]
CMDLET.register()

View File

@@ -13,13 +13,10 @@ import json
from SYS.logger import log
from . import register
from ._shared import Cmdlet, CmdletArg, get_pipe_object_path, normalize_result_input, filter_results_by_temp, should_show_help
import models
import pipeline as pipeline_context
@register(["cleanup"])
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Remove temporary files from pipeline results.
@@ -38,7 +35,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Normalize input to list
@@ -103,5 +100,6 @@ CMDLET = Cmdlet(
"- Typical usage at end of pipeline: ... | add-tag -store local \"tag\" --all | cleanup",
"- Exit code 0 if cleanup successful, 1 if no results to process",
],
)
exec=_run,
).register()

140
cmdlet/delete_note.py Normal file
View File

@@ -0,0 +1,140 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, Optional, Sequence
import sys
from SYS.logger import log
import pipeline as ctx
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
normalize_hash,
parse_cmdlet_args,
normalize_result_input,
get_field,
should_show_help,
)
from Store import Store
from SYS.utils import sha256_file
class Delete_Note(Cmdlet):
def __init__(self) -> None:
super().__init__(
name="delete-note",
summary="Delete a named note from a file in a store.",
usage="delete-note -store <store> [-hash <sha256>] <name>",
alias=["del-note"],
arg=[
SharedArgs.STORE,
SharedArgs.HASH,
CmdletArg("name", type="string", required=True, description="The note name/key to delete."),
],
detail=[
"- Deletes the named note from the selected store backend.",
],
exec=self.run,
)
try:
SharedArgs.STORE.choices = SharedArgs.get_store_choices(None)
except Exception:
pass
self.register()
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
if resolved:
return resolved
if raw_path:
try:
p = Path(str(raw_path))
stem = p.stem
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
return stem.lower()
if p.exists() and p.is_file():
return sha256_file(p)
except Exception:
return None
return None
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
parsed = parse_cmdlet_args(args, self)
store_override = parsed.get("store")
hash_override = parsed.get("hash")
note_name_override = str(parsed.get("name") or "").strip()
# Allow piping note rows from get-note: the selected item carries note_name.
inferred_note_name = str(get_field(result, "note_name") or "").strip()
if not note_name_override and not inferred_note_name:
log("[delete_note] Error: Requires <name> (or pipe a note row that provides note_name)", file=sys.stderr)
return 1
results = normalize_result_input(result)
if not results:
if store_override and normalize_hash(hash_override):
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
else:
log("[delete_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
return 1
store_registry = Store(config)
deleted = 0
for res in results:
if not isinstance(res, dict):
ctx.emit(res)
continue
# Resolve which note name to delete for this item.
note_name = note_name_override or str(res.get("note_name") or "").strip() or inferred_note_name
if not note_name:
log("[delete_note] Error: Missing note name (pass <name> or pipe a note row)", file=sys.stderr)
return 1
store_name = str(store_override or res.get("store") or "").strip()
raw_hash = res.get("hash")
raw_path = res.get("path")
if not store_name:
log("[delete_note] Error: Missing -store and item has no store field", file=sys.stderr)
return 1
resolved_hash = self._resolve_hash(
raw_hash=str(raw_hash) if raw_hash else None,
raw_path=str(raw_path) if raw_path else None,
override_hash=str(hash_override) if hash_override else None,
)
if not resolved_hash:
ctx.emit(res)
continue
try:
backend = store_registry[store_name]
except Exception as exc:
log(f"[delete_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
return 1
ok = False
try:
ok = bool(backend.delete_note(resolved_hash, note_name, config=config))
except Exception as exc:
log(f"[delete_note] Error: Failed to delete note: {exc}", file=sys.stderr)
ok = False
if ok:
deleted += 1
ctx.emit(res)
log(f"[delete_note] Deleted note on {deleted} item(s)", file=sys.stderr)
return 0 if deleted > 0 else 1
CMDLET = Delete_Note()

View File

@@ -10,7 +10,7 @@ import sys
from SYS.logger import log
import pipeline as ctx
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, get_field
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, get_field, should_show_help
from API.folder import LocalLibrarySearchOptimizer
from config import get_local_storage_path
@@ -18,7 +18,7 @@ from config import get_local_storage_path
def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None:
"""If the current subject matches the target, refresh relationships via get-relationship."""
try:
from cmdlets import get_relationship as get_rel_cmd # type: ignore
from cmdlet import get as get_cmdlet # type: ignore
except Exception:
return
@@ -55,7 +55,11 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat
refresh_args: list[str] = []
if target_hash:
refresh_args.extend(["-hash", target_hash])
get_rel_cmd._run(subject, refresh_args, config)
cmd = get_cmdlet("get-relationship")
if not cmd:
return
cmd(subject, refresh_args, config)
except Exception:
pass
@@ -72,6 +76,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
Exit code (0 = success)
"""
try:
if should_show_help(args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse arguments
parsed_args = parse_cmdlet_args(args, CMDLET)
delete_all_flag = parsed_args.get("all", False)
@@ -203,3 +211,6 @@ CMDLET = Cmdlet(
"- Delete all from file: delete-relationship -path <file> --all",
],
)
CMDLET.exec = _run
CMDLET.register()

View File

@@ -5,7 +5,6 @@ from pathlib import Path
import json
import sys
from . import register
import models
import pipeline as ctx
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, should_show_help, get_field
@@ -16,10 +15,18 @@ from Store import Store
def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None, path: str | None, config: Dict[str, Any]) -> None:
"""If the current subject matches the target, refresh tags via get-tag."""
try:
from cmdlets import get_tag as get_tag_cmd # type: ignore
from cmdlet import get as get_cmdlet # type: ignore
except Exception:
return
get_tag = None
try:
get_tag = get_cmdlet("get-tag")
except Exception:
get_tag = None
if not callable(get_tag):
return
try:
subject = ctx.get_last_result_subject()
if subject is None:
@@ -51,7 +58,9 @@ def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None,
refresh_args: list[str] = []
if file_hash:
refresh_args.extend(["-hash", file_hash])
get_tag_cmd._run(subject, refresh_args, config)
if store_name:
refresh_args.extend(["-store", store_name])
get_tag(subject, refresh_args, config)
except Exception:
pass
@@ -71,11 +80,10 @@ CMDLET = Cmdlet(
],
)
@register(["delete-tag"])
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Check if we have a piped TagItem with no args (i.e., from @1 | delete-tag)
@@ -319,4 +327,9 @@ def _process_deletion(tags: list[str], file_hash: str | None, path: str | None,
return False
# Register cmdlet (no legacy decorator)
CMDLET.exec = _run
CMDLET.register()

View File

@@ -96,7 +96,7 @@ class Download_File(Cmdlet):
get_search_provider = None
SearchResult = None
try:
from Provider.registry import get_search_provider as _get_search_provider, SearchResult as _SearchResult
from ProviderCore.registry import get_search_provider as _get_search_provider, SearchResult as _SearchResult
get_search_provider = _get_search_provider
SearchResult = _SearchResult

View File

@@ -26,6 +26,7 @@ import sys
import time
import traceback
from typing import Any, Dict, Iterator, List, Optional
from urllib.parse import urljoin, urlparse
import httpx
@@ -89,12 +90,13 @@ def is_url_supported_by_ytdlp(url: str) -> bool:
def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]:
_ensure_yt_dlp_ready()
try:
ydl_opts = {"quiet": True, "no_warnings": True, "socket_timeout": 30}
assert yt_dlp is not None
ydl_opts: Dict[str, Any] = {"quiet": True, "no_warnings": True, "socket_timeout": 30}
if no_playlist:
ydl_opts["noplaylist"] = True
if playlist_items:
ydl_opts["playlist_items"] = playlist_items
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
debug(f"Fetching format list for: {url}")
info = ydl.extract_info(url, download=False)
formats = info.get("formats", [])
@@ -114,6 +116,7 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
"vcodec": fmt.get("vcodec", "none"),
"acodec": fmt.get("acodec", "none"),
"filesize": fmt.get("filesize"),
"abr": fmt.get("abr"),
"tbr": fmt.get("tbr"),
})
debug(f"Found {len(result_formats)} available formats")
@@ -123,6 +126,49 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
return None
def _pick_best_audio_format_id(formats: List[Dict[str, Any]]) -> Optional[str]:
audio_only: List[Dict[str, Any]] = []
for fmt in formats:
if not isinstance(fmt, dict):
continue
format_id = str(fmt.get("format_id") or "").strip()
if not format_id:
continue
vcodec = str(fmt.get("vcodec") or "none").lower()
acodec = str(fmt.get("acodec") or "none").lower()
if vcodec != "none":
continue
if not acodec or acodec == "none":
continue
audio_only.append(fmt)
if not audio_only:
return None
def score(f: Dict[str, Any]) -> tuple[float, float]:
tbr = f.get("tbr")
abr = f.get("abr")
bitrate = 0.0
for candidate in (tbr, abr):
try:
if candidate is not None:
bitrate = max(bitrate, float(candidate))
except Exception:
pass
size = 0.0
try:
fs = f.get("filesize")
if fs is not None:
size = float(fs)
except Exception:
pass
return (bitrate, size)
best = max(audio_only, key=score)
best_id = str(best.get("format_id") or "").strip()
return best_id or None
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False) -> tuple[Optional[str], Dict[str, Any]]:
sections_list = ytdl_options.get("download_sections", [])
if not sections_list:
@@ -173,6 +219,10 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
cmd.extend(["--cookies", cookies_path])
if ytdl_options.get("noplaylist"):
cmd.append("--no-playlist")
# Apply clip/section selection
cmd.extend(["--download-sections", section])
cmd.append(url)
if not quiet:
debug(f"Running yt-dlp for section: {section}")
@@ -511,7 +561,7 @@ def _download_direct_file(
return DownloadMediaResult(
path=file_path,
info=info,
tags=tags,
tag=tags,
source_url=url,
hash_value=hash_value,
)
@@ -865,7 +915,7 @@ def download_media(
return DownloadMediaResult(
path=media_path,
info=info_dict,
tags=tags,
tag=tags,
source_url=opts.url,
hash_value=file_hash,
paths=media_paths, # Include all section files if present
@@ -944,7 +994,7 @@ def download_media(
return DownloadMediaResult(
path=media_path,
info=entry,
tags=tags,
tag=tags,
source_url=source_url,
hash_value=hash_value,
)
@@ -1001,15 +1051,12 @@ class Download_Media(Cmdlet):
name="download-media",
summary="Download media from streaming sites (YouTube, Twitch, etc.)",
usage="download-media <url> [options] or search-file | download-media [options]",
alias=["dl-media", "download-ytdlp"],
alias=[""],
arg=[
CmdletArg(name="url", type="string", required=False, description="URL to download (yt-dlp supported sites only)", variadic=True),
CmdletArg(name="-url", type="string", description="URL to download (alias for positional argument)", variadic=True),
SharedArgs.URL,
CmdletArg(name="audio", type="flag", alias="a", description="Download audio only"),
CmdletArg(name="video", type="flag", alias="v", description="Download video (default)"),
CmdletArg(name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector"),
CmdletArg(name="clip", type="string", description="Extract time range: MM:SS-MM:SS"),
CmdletArg(name="section", type="string", description="Download sections: TIME_RANGE[,TIME_RANGE...]"),
CmdletArg(name="item", type="string", description="Item selection for playlists/formats"),
],
detail=["Download media from streaming sites using yt-dlp.", "For direct file downloads, use download-file."],
@@ -1073,9 +1120,10 @@ class Download_Media(Cmdlet):
# Get other options
clip_spec = parsed.get("clip")
section_spec = parsed.get("section")
# Parse clip/section ranges if specified
mode = "audio" if parsed.get("audio") else "video"
# Parse clip range if specified
clip_range = None
if clip_spec:
clip_range = self._parse_time_range(clip_spec)
@@ -1083,19 +1131,19 @@ class Download_Media(Cmdlet):
log(f"Invalid clip format: {clip_spec}", file=sys.stderr)
return 1
section_ranges = None
if section_spec:
section_ranges = self._parse_section_ranges(section_spec)
if not section_ranges:
log(f"Invalid section format: {section_spec}", file=sys.stderr)
return 1
# Check if we need to show format selection
playlist_items = str(parsed.get("item")) if parsed.get("item") else None
ytdl_format = parsed.get("format")
# If no -item, no explicit -format specified, and single URL, check for multiple formats/playlist
if not playlist_items and not ytdl_format and len(supported_url) == 1:
# If no -item, no explicit -format specified, and single URL, show the format table.
# Do NOT stop to show formats when -audio is used (auto-pick) or when -clip is used.
if (
mode != "audio"
and not clip_spec
and not playlist_items
and not ytdl_format
and len(supported_url) == 1
):
url = supported_url[0]
formats = list_formats(url, no_playlist=False)
@@ -1241,9 +1289,8 @@ class Download_Media(Cmdlet):
# Download each URL
downloaded_count = 0
clip_sections_spec = self._build_clip_sections_spec(clip_range, section_ranges)
clip_sections_spec = self._build_clip_sections_spec(clip_range)
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
mode = "audio" if parsed.get("audio") else "video"
for url in supported_url:
try:
@@ -1263,6 +1310,14 @@ class Download_Media(Cmdlet):
actual_format = playlist_items
actual_playlist_items = None
# Auto-pick best audio format when -audio is used and no explicit format is given.
if mode == "audio" and not actual_format:
chosen = None
formats = list_formats(url, no_playlist=False, playlist_items=actual_playlist_items)
if formats:
chosen = _pick_best_audio_format_id(formats)
actual_format = chosen or "bestaudio/best"
opts = DownloadOptions(
url=url,
mode=mode,
@@ -1358,31 +1413,14 @@ class Download_Media(Cmdlet):
except Exception:
return None
def _parse_section_ranges(self, spec: str) -> Optional[List[tuple]]:
"""Parse 'RANGE1,RANGE2,...' where each RANGE is 'MM:SS-MM:SS'."""
try:
ranges = []
for range_spec in spec.split(","):
r = self._parse_time_range(range_spec.strip())
if r is None:
return None
ranges.append(r)
return ranges if ranges else None
except Exception:
return None
def _build_clip_sections_spec(
self,
clip_range: Optional[tuple],
section_ranges: Optional[List[tuple]],
) -> Optional[str]:
"""Convert parsed clip/section ranges into downloader spec (seconds)."""
"""Convert parsed clip range into downloader spec (seconds)."""
ranges: List[str] = []
if clip_range:
ranges.append(f"{clip_range[0]}-{clip_range[1]}")
if section_ranges:
for start, end in section_ranges:
ranges.append(f"{start}-{end}")
return ",".join(ranges) if ranges else None
def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]:

143
cmdlet/get_note.py Normal file
View File

@@ -0,0 +1,143 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, Optional, Sequence
import sys
from SYS.logger import log
import pipeline as ctx
from ._shared import (
Cmdlet,
CmdletArg,
SharedArgs,
normalize_hash,
parse_cmdlet_args,
normalize_result_input,
should_show_help,
)
from Store import Store
from SYS.utils import sha256_file
class Get_Note(Cmdlet):
def __init__(self) -> None:
super().__init__(
name="get-note",
summary="List notes on a file in a store.",
usage="get-note -store <store> [-hash <sha256>]",
alias=["get-notes", "get_note"],
arg=[
SharedArgs.STORE,
SharedArgs.HASH,
],
detail=[
"- Notes are retrieved via the selected store backend.",
"- Lyrics are stored in a note named 'lyric'.",
],
exec=self.run,
)
try:
SharedArgs.STORE.choices = SharedArgs.get_store_choices(None)
except Exception:
pass
self.register()
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
if resolved:
return resolved
if raw_path:
try:
p = Path(str(raw_path))
stem = p.stem
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
return stem.lower()
if p.exists() and p.is_file():
return sha256_file(p)
except Exception:
return None
return None
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
parsed = parse_cmdlet_args(args, self)
store_override = parsed.get("store")
hash_override = parsed.get("hash")
results = normalize_result_input(result)
if not results:
if store_override and normalize_hash(hash_override):
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
else:
log("[get_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
return 1
store_registry = Store(config)
any_notes = False
for res in results:
if not isinstance(res, dict):
continue
store_name = str(store_override or res.get("store") or "").strip()
raw_hash = res.get("hash")
raw_path = res.get("path")
if not store_name:
log("[get_note] Error: Missing -store and item has no store field", file=sys.stderr)
return 1
resolved_hash = self._resolve_hash(
raw_hash=str(raw_hash) if raw_hash else None,
raw_path=str(raw_path) if raw_path else None,
override_hash=str(hash_override) if hash_override else None,
)
if not resolved_hash:
continue
try:
backend = store_registry[store_name]
except Exception as exc:
log(f"[get_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
return 1
notes = {}
try:
notes = backend.get_note(resolved_hash, config=config) or {}
except Exception:
notes = {}
if not notes:
continue
any_notes = True
# Emit each note as its own row so CLI renders a proper note table
for k in sorted(notes.keys(), key=lambda x: str(x).lower()):
v = notes.get(k)
raw_text = str(v or "")
preview = " ".join(raw_text.replace("\r", "").split("\n"))
ctx.emit(
{
"store": store_name,
"hash": resolved_hash,
"note_name": str(k),
"note_text": raw_text,
"columns": [
("Name", str(k)),
("Text", preview.strip()),
],
}
)
if not any_notes:
ctx.emit("No notes found.")
return 0
CMDLET = Get_Note()

View File

@@ -33,7 +33,7 @@ CMDLET = Cmdlet(
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(_args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse -hash override
@@ -423,3 +423,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
return 0
CMDLET.exec = _run
CMDLET.register()

View File

@@ -39,7 +39,7 @@ from dataclasses import dataclass
@dataclass
class TagItem:
"""Tag item for display in ResultTable and piping to other cmdlets.
"""Tag item for display in ResultTable and piping to other cmdlet.
Allows tags to be selected and piped like:
- delete-tag @{3,4,9} (delete tags at indices 3, 4, 9)

View File

@@ -3,16 +3,25 @@ from __future__ import annotations
from typing import Any, Dict, Optional, Sequence, List
from pathlib import Path
import json
import sys
from SYS.logger import log
from cmdlets.download_media import download_media
from models import DownloadOptions
from config import resolve_output_dir
import subprocess as _subprocess
import shutil as _shutil
from ._shared import create_pipe_object_result, parse_cmdlet_args
from ._shared import (
Cmdlet,
CmdletArg,
create_pipe_object_result,
get_field,
get_pipe_object_hash,
get_pipe_object_path,
normalize_result_input,
parse_cmdlet_args,
should_show_help,
)
import pipeline as ctx
try:
from PyPDF2 import PdfWriter, PdfReader
@@ -27,31 +36,29 @@ try:
read_tags_from_file,
write_tags_to_file,
dedup_tags_by_namespace,
merge_multiple_tag_lists,
write_tags,
write_metadata
)
HAS_METADATA_API = True
except ImportError:
HAS_METADATA_API = False
from . import register
from ._shared import (
Cmdlet,
CmdletArg,
normalize_result_input,
get_pipe_object_path,
get_pipe_object_hash,
should_show_help,
get_field,
)
import models
import pipeline as ctx
def read_tags_from_file(file_path: Path) -> List[str]:
return []
def write_tags_to_file(
file_path: Path,
tags: List[str],
source_hashes: Optional[List[str]] = None,
url: Optional[List[str]] = None,
append: bool = False,
) -> bool:
return False
def dedup_tags_by_namespace(tags: List[str]) -> List[str]:
return tags
def write_metadata(*_args: Any, **_kwargs: Any) -> None:
return None
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
@@ -59,7 +66,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Parse help
if should_show_help(args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse arguments
@@ -95,7 +102,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Extract file paths and metadata from result objects
source_files: List[Path] = []
source_tags_files: List[Path] = []
source_hashes: List[str] = []
source_url: List[str] = []
source_tags: List[str] = [] # NEW: collect tags from source files
@@ -111,37 +117,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
if candidate.exists():
target_path = candidate
# Check for playlist item that needs downloading
if not target_path and isinstance(item, dict) and item.get('__action', '').startswith('playlist-item:'):
try:
playlist_url = item.get('__file_path')
item_idx = int(item['__action'].split(':')[1])
log(f"Downloading playlist item #{item_idx} from {playlist_url}...", flush=True)
output_dir = resolve_output_dir(config)
opts = DownloadOptions(
url=playlist_url,
output_dir=output_dir,
playlist_items=str(item_idx),
mode="audio" if format_spec == "m4b" else "auto" # Infer mode if possible
)
res = download_media(opts)
if res and res.path and res.path.exists():
target_path = res.path
log(f"✓ Downloaded: {target_path.name}", flush=True)
except Exception as e:
log(f"Failed to download playlist item: {e}", file=sys.stderr)
if target_path and target_path.exists():
source_files.append(target_path)
# Track the .tag file for this source
# Track tags from the .tag sidecar for this source (if present)
tags_file = target_path.with_suffix(target_path.suffix + '.tag')
if tags_file.exists():
source_tags_files.append(tags_file)
if tags_file.exists() and HAS_METADATA_API:
try:
source_tags.extend(read_tags_from_file(tags_file) if HAS_METADATA_API else [])
source_tags.extend(read_tags_from_file(tags_file) or [])
except Exception:
pass
@@ -201,7 +184,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Determine output path
if output_override:
if output_override.is_dir():
base_name = _sanitize_name(getattr(files_to_merge[0], 'title', 'merged'))
base_title = get_field(files_to_merge[0], 'title', 'merged')
base_name = _sanitize_name(str(base_title or 'merged'))
output_path = output_override / f"{base_name} (merged).{_ext_for_format(output_format)}"
else:
output_path = output_override
@@ -231,12 +215,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"Merged {len(source_files)} files into: {output_path}", file=sys.stderr)
merged_tags: List[str] = [f"title:{output_path.stem}"]
# Create .tag sidecar file for the merged output using unified API
tags_path = output_path.with_suffix(output_path.suffix + '.tag')
try:
# Start with title tag
merged_tags = [f"title:{output_path.stem}"]
# Merge tags from source files using metadata API
if source_tags and HAS_METADATA_API:
# Use dedup function to normalize and deduplicate
@@ -281,8 +264,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Also create .metadata file using centralized function
try:
write_metadata(output_path, source_hashes[0] if source_hashes else None, source_url, source_relationships)
log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr)
if HAS_METADATA_API and write_metadata:
write_metadata(output_path, source_hashes[0] if source_hashes else None, source_url, source_relationships)
log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr)
except Exception as e:
log(f"Warning: Could not create metadata file: {e}", file=sys.stderr)
@@ -312,23 +296,26 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Still emit a string representation for feedback
ctx.emit(f"Merged: {output_path}")
# Delete source files if requested
# Always delete source files if they were downloaded playlist items (temp files)
# We can detect this if they are in the temp download directory or if we tracked them
if delete_after or True: # Force delete for now as merge consumes them
# First delete all .tag files
for tags_file in source_tags_files:
try:
tags_file.unlink()
log(f"Deleted: {tags_file.name}", file=sys.stderr)
except Exception as e:
log(f"Warning: Could not delete {tags_file.name}: {e}", file=sys.stderr)
# Then delete all source files
# Cleanup
# - Delete source files only when -delete is set.
if delete_after:
for f in source_files:
try:
f.unlink()
log(f"Deleted: {f.name}", file=sys.stderr)
# Delete sidecar tags for the source (if any)
tag_file = f.with_suffix(f.suffix + '.tag')
if tag_file.exists():
try:
tag_file.unlink()
log(f"Deleted: {tag_file.name}", file=sys.stderr)
except Exception as e:
log(f"Warning: Could not delete {tag_file.name}: {e}", file=sys.stderr)
except Exception:
pass
try:
if f.exists():
f.unlink()
log(f"Deleted: {f.name}", file=sys.stderr)
except Exception as e:
log(f"Warning: Could not delete {f.name}: {e}", file=sys.stderr)
@@ -348,6 +335,7 @@ def _ext_for_format(fmt: str) -> str:
format_map = {
'mp3': 'mp3',
'm4a': 'm4a',
'm4b': 'm4b',
'aac': 'aac',
'opus': 'opus',
'mka': 'mka', # Matroska Audio - EXCELLENT chapter support (recommended)
@@ -361,58 +349,6 @@ def _ext_for_format(fmt: str) -> str:
return format_map.get(fmt.lower(), 'mka')
def _add_chapters_to_m4a(file_path: Path, chapters: List[Dict]) -> bool:
"""Add chapters to an M4A file using mutagen.
Args:
file_path: Path to M4A file
chapters: List of chapter dicts with 'title', 'start_ms', 'end_ms'
Returns:
True if successful, False otherwise
"""
import logging
logger = logging.getLogger(__name__)
if not chapters:
return True
try:
from mutagen.mp4 import MP4, Atom
from mutagen.mp4._util import Atom as MP4Atom
except ImportError:
logger.warning("[merge-file] mutagen not available for chapter writing")
return False
try:
# Load the MP4 file
audio = MP4(str(file_path))
# Build the chapter atom
# MP4 chapters are stored in a 'chap' atom with specific structure
chapter_data = b''
for i, chapter in enumerate(chapters, 1):
# Each chapter entry: 10-byte header + title
title = chapter.get('title', f'Chapter {i}').encode('utf-8')
start_time_ms = int(chapter.get('start_ms', 0))
# Chapter atom format for M4A:
# (uint32: size)(uint32: 'chap')(uint8: reserved)(uint24: atom type) + more...
# This is complex, so we'll use a simpler atom approach
pass
# Unfortunately, mutagen doesn't have built-in chapter writing for MP4
# Chapter writing requires low-level atom manipulation
# For now, we'll just return and note this limitation
logger.info("[merge-file] MP4 chapter writing via mutagen not fully supported")
return False
except Exception as e:
logger.warning(f"[merge-file] Error writing chapters: {e}")
return False
def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
"""Merge audio files with chapters based on file boundaries."""
import logging
@@ -529,7 +465,7 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
# Audio codec selection for first input
if output_format == 'mp3':
cmd.extend(['-c:a', 'libmp3lame', '-q:a', '2'])
elif output_format == 'm4a':
elif output_format in {'m4a', 'm4b'}:
# Use copy if possible (much faster), otherwise re-encode
# Check if inputs are already AAC/M4A to avoid re-encoding
# For now, default to copy if format matches, otherwise re-encode
@@ -682,7 +618,7 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
except Exception as e:
logger.exception(f"[merge-file] Chapter embedding failed: {e}")
log(f"Warning: Chapter embedding failed, using merge without chapters", file=sys.stderr)
elif output_format == 'm4a' or output.suffix.lower() in ['.m4a', '.mp4']:
elif output_format in {'m4a', 'm4b'} or output.suffix.lower() in ['.m4a', '.m4b', '.mp4']:
# MP4/M4A format has native chapter support via iTunes metadata atoms
log(f"Embedding chapters into MP4 container...", file=sys.stderr)
logger.info(f"[merge-file] Adding chapters to M4A/MP4 file via iTunes metadata")
@@ -833,16 +769,12 @@ def _merge_text(files: List[Path], output: Path) -> bool:
def _merge_pdf(files: List[Path], output: Path) -> bool:
"""Merge PDF files."""
if not HAS_PYPDF2:
if (not HAS_PYPDF2) or (PdfWriter is None) or (PdfReader is None):
log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
return False
try:
if HAS_PYPDF2:
writer = PdfWriter()
else:
log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
return False
writer = PdfWriter()
for f in files:
try:
@@ -866,11 +798,11 @@ def _merge_pdf(files: List[Path], output: Path) -> bool:
CMDLET = Cmdlet(
name="merge-file",
summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
usage="merge-file [-delete] [-output <path>] [-format <auto|mp3|aac|opus|mp4|mkv|pdf|txt>]",
usage="merge-file [-delete] [-output <path>] [-format <auto|mka|m4a|m4b|mp3|aac|opus|mp4|mkv|pdf|txt>]",
arg=[
CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
CmdletArg("-output", description="Override output file path."),
CmdletArg("-format", description="Output format (auto/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
CmdletArg("-format", description="Output format (auto/mka/m4a/m4b/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
],
detail=[
"- Pipe multiple files: search-file query | [1,2,3] | merge-file",
@@ -882,3 +814,6 @@ CMDLET = Cmdlet(
"- -delete flag removes all source files after successful merge.",
],
)
CMDLET.exec = _run
CMDLET.register()

View File

@@ -8,8 +8,6 @@ from __future__ import annotations
import contextlib
import hashlib
import importlib
import json
import sys
import time
import httpx
@@ -21,10 +19,7 @@ from urllib.parse import urlsplit, quote, urljoin
from SYS.logger import log, debug
from API.HTTP import HTTPClient
from SYS.utils import ensure_directory, unique_path, unique_preserve_order
from . import register
from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input, should_show_help, get_field
import models
import pipeline as pipeline_context
# ============================================================================
@@ -40,13 +35,17 @@ import pipeline as pipeline_context
try:
from playwright.sync_api import (
TimeoutError as PlaywrightTimeoutError,
ViewportSize,
sync_playwright,
)
except Exception as exc:
raise RuntimeError(
"playwright is required for screenshot capture; install with 'pip install playwright'"
) from exc
HAS_PLAYWRIGHT = True
except Exception:
HAS_PLAYWRIGHT = False
PlaywrightTimeoutError = TimeoutError # type: ignore
def sync_playwright(*_args: Any, **_kwargs: Any) -> Any: # type: ignore
raise RuntimeError(
"playwright is required for screenshot capture; install with: pip install playwright; then: playwright install"
)
try:
from config import resolve_output_dir
@@ -69,7 +68,7 @@ USER_AGENT = (
"Chrome/120.0.0.0 Safari/537.36"
)
DEFAULT_VIEWPORT: ViewportSize = {"width": 1280, "height": 1200}
DEFAULT_VIEWPORT: dict[str, int] = {"width": 1280, "height": 1200}
ARCHIVE_TIMEOUT = 30.0
# Configurable selectors for specific websites
@@ -114,7 +113,7 @@ class ScreenshotOptions:
"""Options controlling screenshot capture and post-processing."""
output_dir: Path
url: Sequence[str] = ()
url: str = ""
output_path: Optional[Path] = None
full_page: bool = True
headless: bool = True
@@ -124,7 +123,6 @@ class ScreenshotOptions:
tag: Sequence[str] = ()
archive: bool = False
archive_timeout: float = ARCHIVE_TIMEOUT
url: Sequence[str] = ()
output_format: Optional[str] = None
prefer_platform_target: bool = False
target_selectors: Optional[Sequence[str]] = None
@@ -470,10 +468,10 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
warnings: List[str] = []
_capture(options, destination, warnings)
# Build URL list from provided options.url (sequence) and deduplicate
url = unique_preserve_order(list(options.url))
# Build URL list from captured url and any archives
url: List[str] = [options.url] if options.url else []
archive_url: List[str] = []
if options.archive:
if options.archive and options.url:
debug(f"[_capture_screenshot] Archiving enabled for {options.url}")
archives, archive_warnings = _archive_url(options.url, options.archive_timeout)
archive_url.extend(archives)
@@ -518,9 +516,16 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help check
if should_show_help(args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
if not HAS_PLAYWRIGHT:
log(
"playwright is required for screenshot capture; install with: pip install playwright; then: playwright install",
file=sys.stderr,
)
return 1
# ========================================================================
# ARGUMENT PARSING
# ========================================================================
@@ -627,7 +632,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
try:
# Create screenshot with provided options
options = ScreenshotOptions(
url=[url],
url=url,
output_dir=screenshot_dir,
output_format=format_name,
archive=archive_enabled,
@@ -672,7 +677,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
}
)
# Emit the result so downstream cmdlets (like add-file) can use it
# Emit the result so downstream cmdlet (like add-file) can use it
pipeline_context.emit(pipe_obj)
all_emitted.append(pipe_obj)
@@ -711,3 +716,6 @@ CMDLET = Cmdlet(
"""]
)
CMDLET.exec = _run
CMDLET.register()

View File

@@ -8,7 +8,7 @@ import uuid
import importlib
from SYS.logger import log, debug
from Provider.registry import get_search_provider, list_search_providers
from ProviderCore.registry import get_search_provider, list_search_providers
from ._shared import Cmdlet, CmdletArg, should_show_help
import pipeline as ctx
@@ -49,7 +49,7 @@ class Search_Provider(Cmdlet):
"- soulseek: Plain text search",
"- youtube: Plain text search",
"",
"Results can be piped to other cmdlets:",
"Results can be piped to other cmdlet:",
" search-provider bandcamp \"artist:grace\" | @1 | download-data",
],
exec=self.run

View File

@@ -304,7 +304,7 @@ class Search_Store(Cmdlet):
continue
normalized = self._ensure_storage_columns(item_dict)
# Make hash/store available for downstream cmdlets without rerunning search
# Make hash/store available for downstream cmdlet without rerunning search
hash_val = normalized.get("hash")
store_val = normalized.get("store") or item_dict.get("store")
if hash_val and not normalized.get("hash"):

View File

@@ -11,7 +11,6 @@ import re
from SYS.logger import log, debug
from SYS.utils import sha256_file
from . import register
from ._shared import (
Cmdlet,
CmdletArg,
@@ -112,7 +111,6 @@ def _trim_media(input_path: Path, output_path: Path, start_time: str, end_time:
log(f"Error parsing time or running ffmpeg: {e}", file=sys.stderr)
return False
@register(["trim-file"])
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Trim a media file."""
# Parse arguments
@@ -292,3 +290,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"Failed to trim {path_obj.name}", file=sys.stderr)
return 0 if success_count > 0 else 1
# Register cmdlet (no legacy decorator)
CMDLET.exec = _run
CMDLET.register()

View File

@@ -1,106 +0,0 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import json
from . import register
import models
import pipeline as ctx
from API import HydrusNetwork as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash, should_show_help
from SYS.logger import log
CMDLET = Cmdlet(
name="add-note",
summary="Add or set a note on a Hydrus file.",
usage="add-note [-hash <sha256>] <name> <text>",
arg=[
CmdletArg("hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
CmdletArg("name", type="string", required=True, description="The note name/key to set (e.g. 'comment', 'source', etc.)."),
CmdletArg("text", type="string", required=True, description="The note text/content to store.", variadic=True),
],
detail=[
"- Notes are stored in the 'my notes' service by default.",
],
)
@register(["add-note", "set-note", "add_note"]) # aliases
def add(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
from ._shared import parse_cmdlet_args
parsed = parse_cmdlet_args(args, CMDLET)
override_hash = parsed.get("hash")
name = parsed.get("name")
text_parts = parsed.get("text")
if not name:
log("Requires a note name")
return 1
name = str(name).strip()
if isinstance(text_parts, list):
text = " ".join(text_parts).strip()
else:
text = str(text_parts or "").strip()
if not text:
log("Empty note text")
return 1
# Handle @N selection which creates a list - extract the first item
if isinstance(result, list) and len(result) > 0:
result = result[0]
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
if not hash_hex:
log("Selected result does not include a Hydrus hash")
return 1
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}")
return 1
if client is None:
log("Hydrus client unavailable")
return 1
try:
service_name = "my notes"
client.set_notes(hash_hex, {name: text}, service_name)
except Exception as exc:
log(f"Hydrus add-note failed: {exc}")
return 1
# Refresh notes view if we're operating on the currently selected subject
try:
from cmdlets import get_note as get_note_cmd # type: ignore
except Exception:
get_note_cmd = None
if get_note_cmd:
try:
subject = ctx.get_last_result_subject()
if subject is not None:
def norm(val: Any) -> str:
return str(val).lower()
target_hash = norm(hash_hex) if hash_hex else None
subj_hashes = []
if isinstance(subject, dict):
subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v]
else:
subj_hashes = [norm(getattr(subject, f, None)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if getattr(subject, f, None)]
if target_hash and target_hash in subj_hashes:
get_note_cmd.get_notes(subject, ["-hash", hash_hex], config)
return 0
except Exception:
pass
ctx.emit(f"Added note '{name}' ({len(text)} chars)")
return 0

View File

@@ -1,102 +0,0 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import json
import pipeline as ctx
from API import HydrusNetwork as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash, get_hash_for_operation, fetch_hydrus_metadata, should_show_help, get_field
from SYS.logger import log
CMDLET = Cmdlet(
name="delete-note",
summary="Delete a named note from a Hydrus file.",
usage="i | del-note [-hash <sha256>] <name>",
alias=["del-note"],
arg=[
],
detail=[
"- Removes the note with the given name from the Hydrus file.",
],
)
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
if not args:
log("Requires the note name/key to delete")
return 1
override_hash: str | None = None
rest: list[str] = []
i = 0
while i < len(args):
a = args[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
override_hash = str(args[i + 1]).strip()
i += 2
continue
rest.append(a)
i += 1
if not rest:
log("Requires the note name/key to delete")
return 1
name = str(rest[0] or '').strip()
if not name:
log("Requires a non-empty note name/key")
return 1
# Handle @N selection which creates a list - extract the first item
if isinstance(result, list) and len(result) > 0:
result = result[0]
hash_hex = get_hash_for_operation(override_hash, result)
if not hash_hex:
log("Selected result does not include a Hydrus hash")
return 1
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}")
return 1
if client is None:
log("Hydrus client unavailable")
return 1
try:
service_name = "my notes"
client.delete_notes(hash_hex, [name], service_name)
except Exception as exc:
log(f"Hydrus delete-note failed: {exc}")
return 1
# Refresh notes view if we're operating on the current subject
try:
from cmdlets import get_note as get_note_cmd # type: ignore
except Exception:
get_note_cmd = None
if get_note_cmd:
try:
subject = ctx.get_last_result_subject()
if subject is not None:
def norm(val: Any) -> str:
return str(val).lower()
target_hash = norm(hash_hex) if hash_hex else None
subj_hashes = []
if isinstance(subject, dict):
subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v]
else:
subj_hashes = [norm(get_field(subject, f)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if get_field(subject, f)]
if target_hash and target_hash in subj_hashes:
get_note_cmd.get_notes(subject, ["-hash", hash_hex], config)
return 0
except Exception:
pass
log(f"Deleted note '{name}'")
return 0

View File

@@ -1,66 +0,0 @@
from __future__ import annotations
from typing import Any, Dict, Sequence
import json
from . import register
import models
import pipeline as ctx
from API import HydrusNetwork as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, get_hash_for_operation, fetch_hydrus_metadata, get_field, should_show_help
from SYS.logger import log
CMDLET = Cmdlet(
name="get-note",
summary="List notes on a Hydrus file.",
usage="get-note [-hash <sha256>]",
arg=[
SharedArgs.HASH,
],
detail=[
"- Prints notes by service and note name.",
],
)
@register(["get-note", "get-notes", "get_note"]) # aliases
def get_notes(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
from ._shared import parse_cmdlet_args, get_hash_for_operation, fetch_hydrus_metadata
parsed = parse_cmdlet_args(args, CMDLET)
override_hash = parsed.get("hash")
hash_hex = get_hash_for_operation(override_hash, result)
if not hash_hex:
log("Selected result does not include a Hydrus hash")
return 1
meta, error_code = fetch_hydrus_metadata(config, hash_hex, include_service_keys_to_tags=False, include_notes=True)
if error_code != 0:
return error_code
notes = {}
if isinstance(meta, dict):
# Hydrus returns service_keys_to_tags; for notes we expect 'service_names_to_notes' in modern API
notes = meta.get('notes') or meta.get('service_names_to_notes') or {}
if notes:
ctx.emit("Notes:")
# Print flattened: service -> (name: text)
if isinstance(notes, dict) and any(isinstance(v, dict) for v in notes.values()):
for svc, mapping in notes.items():
ctx.emit(f"- {svc}:")
if isinstance(mapping, dict):
for k, v in mapping.items():
ctx.emit(f"{k}: {str(v).strip()}")
elif isinstance(notes, dict):
for k, v in notes.items():
ctx.emit(f"- {k}: {str(v).strip()}")
else:
ctx.emit("No notes found.")
return 0

View File

@@ -2,12 +2,12 @@ import json
import os
import sys
from typing import List, Dict, Any, Optional, Sequence
from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from SYS.logger import log
from result_table import ResultTable
import pipeline as ctx
ADJECTIVE_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "cmdnats", "adjective.json")
ADJECTIVE_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "cmdnat", "adjective.json")
def _load_adjectives() -> Dict[str, List[str]]:
try:

View File

@@ -1,6 +1,6 @@
from typing import List, Dict, Any
from cmdlets._shared import Cmdlet, CmdletArg
from cmdlet._shared import Cmdlet, CmdletArg
from config import load_config, save_config
CMDLET = Cmdlet(

View File

@@ -4,7 +4,7 @@ from typing import Any, Dict, Sequence, List, Optional
import shlex
import sys
from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from SYS.logger import log
from result_table import ResultTable
import pipeline as ctx
@@ -135,7 +135,7 @@ def _render_detail(meta: Dict[str, Any], args: Sequence[str]) -> None:
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
try:
from cmdlets import catalog as _catalog
from cmdlet import catalog as _catalog
CMDLET.arg[0].choices = _normalize_choice_list(_catalog.list_cmdlet_names())
metadata = _catalog.list_cmdlet_metadata()
@@ -163,7 +163,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
CMDLET = Cmdlet(
name=".help",
alias=["help", "?"],
summary="Show cmdlets or detailed help",
summary="Show cmdlet or detailed help",
usage=".help [cmd] [-filter text]",
arg=[
CmdletArg(
@@ -176,7 +176,7 @@ CMDLET = Cmdlet(
CmdletArg(
name="-filter",
type="string",
description="Filter cmdlets by substring",
description="Filter cmdlet by substring",
required=False,
),
],

View File

@@ -1,6 +1,6 @@
from typing import Any, Dict, Sequence, List
import sys
from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from SYS.logger import log, debug
from result_table import ResultTable
# REFACTOR: Commenting out Matrix import until provider refactor is complete

1486
cmdnat/pipe.py Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -7,8 +7,8 @@ from dataclasses import dataclass
from datetime import datetime, timezone
from typing import Any, Dict, Sequence, List
from cmdlets import register
from cmdlets._shared import Cmdlet, CmdletArg
from cmdlet import register
from cmdlet._shared import Cmdlet, CmdletArg
import pipeline as ctx
from SYS.logger import log
from config import get_local_storage_path

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -58,10 +58,7 @@ _CURRENT_RELATIONSHIP_TRACKER = FileRelationshipTracker()
def prepare_ffmpeg_metadata(payload: Optional[Dict[str, Any]]) -> Dict[str, str]:
"""Derive ffmpeg/mutagen metadata tags from a generic metadata payload.
This is not Hydrus-specific; it is used by exporters/converters.
"""
"""Build ffmpeg/mutagen metadata map from payload."""
if not isinstance(payload, dict):
return {}
@@ -275,29 +272,17 @@ def build_ffmpeg_command(
def field(obj: Any, name: str, value: Any = None) -> Any:
"""Get or set a field on dict or object.
"""Get or set a field on dict or object."""
if value is None:
if isinstance(obj, dict):
return obj.get(name)
return getattr(obj, name, None)
Args:
obj: Dict or object to access
name: Field name
value: If None, gets the field; if not None, sets it and returns the value
Returns:
The field value (when getting) or the value (when setting)
"""
if value is None:
# Get mode
if isinstance(obj, dict):
return obj.get(name)
else:
return getattr(obj, name, None)
else:
# Set mode
if isinstance(obj, dict):
obj[name] = value
else:
setattr(obj, name, value)
return value
if isinstance(obj, dict):
obj[name] = value
else:
setattr(obj, name, value)
return value
@@ -1602,78 +1587,61 @@ def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str]
def rename(file_path: Path, tags: Iterable[str]) -> Optional[Path]:
"""Rename a file based on title: tag in the tags list.
"""Rename a file based on a title: tag.
If a title: tag is present, renames the file and any .tag/.metadata sidecars.
"""
Args:
file_path: Path to the file to potentially rename
tags: Iterable of tag strings (should contain title: tag if rename needed)
new_title: Optional[str] = None
for tag in tags:
if isinstance(tag, str) and tag.lower().startswith("title:"):
new_title = tag.split(":", 1)[1].strip()
break
Returns:
New path if renamed, None if not renamed or error occurred
"""
# Extract title from tags
new_title = None
for tag in tags:
if isinstance(tag, str) and tag.lower().startswith('title:'):
new_title = tag.split(':', 1)[1].strip()
break
if not new_title or not file_path.exists():
return None
if not new_title or not file_path.exists():
return None
old_name = file_path.name
old_suffix = file_path.suffix
new_name = f"{new_title}{old_suffix}"
new_path = file_path.with_name(new_name)
try:
old_name = file_path.name
old_suffix = file_path.suffix
if new_path == file_path:
return None
# Create new filename: title + extension
new_name = f"{new_title}{old_suffix}"
new_path = file_path.parent / new_name
def _rename_sidecar(ext: str) -> None:
old_sidecar = file_path.parent / (old_name + ext)
if not old_sidecar.exists():
return
new_sidecar = file_path.parent / (new_name + ext)
if new_sidecar.exists():
try:
new_sidecar.unlink()
except Exception as exc:
debug(f"Warning: Could not replace target sidecar {new_sidecar.name}: {exc}", file=sys.stderr)
return
old_sidecar.rename(new_sidecar)
debug(f"Renamed sidecar: {old_sidecar.name} -> {new_sidecar.name}", file=sys.stderr)
# Don't rename if already the same name
if new_path == file_path:
return None
try:
if new_path.exists():
try:
new_path.unlink()
debug(f"Replaced existing file: {new_name}", file=sys.stderr)
except Exception as exc:
debug(f"Warning: Could not replace target file {new_name}: {exc}", file=sys.stderr)
return None
# If target exists, delete it first (replace mode)
if new_path.exists():
try:
new_path.unlink()
debug(f"Replaced existing file: {new_name}", file=sys.stderr)
except Exception as e:
debug(f"Warning: Could not replace target file {new_name}: {e}", file=sys.stderr)
return None
file_path.rename(new_path)
debug(f"Renamed file: {old_name} -> {new_name}", file=sys.stderr)
file_path.rename(new_path)
debug(f"Renamed file: {old_name}{new_name}", file=sys.stderr)
_rename_sidecar(".tag")
_rename_sidecar(".metadata")
# Rename the .tag sidecar if it exists
old_tags_path = file_path.parent / (old_name + '.tag')
if old_tags_path.exists():
new_tags_path = file_path.parent / (new_name + '.tag')
if new_tags_path.exists():
try:
new_tags_path.unlink()
except Exception:
pass
else:
old_tags_path.rename(new_tags_path)
debug(f"Renamed sidecar: {old_tags_path.name}{new_tags_path.name}", file=sys.stderr)
# Rename the .metadata sidecar if it exists
old_metadata_path = file_path.parent / (old_name + '.metadata')
if old_metadata_path.exists():
new_metadata_path = file_path.parent / (new_name + '.metadata')
if new_metadata_path.exists():
debug(f"Warning: Target metadata already exists: {new_metadata_path.name}", file=sys.stderr)
else:
old_metadata_path.rename(new_metadata_path)
debug(f"Renamed metadata: {old_metadata_path.name}{new_metadata_path.name}", file=sys.stderr)
return new_path
except Exception as exc:
debug(f"Warning: Failed to rename file: {exc}", file=sys.stderr)
return None
return new_path
except Exception as exc:
debug(f"Warning: Failed to rename file: {exc}", file=sys.stderr)
return None
def write_tags(media_path: Path, tags: Iterable[str], url: Iterable[str], hash_value: Optional[str] = None, db=None) -> None:
@@ -2096,26 +2064,7 @@ def apply_tag_mutation(payload: Dict[str, Any], operation: str = 'add') -> Dict[
def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
"""Extract meaningful metadata tags from yt-dlp entry.
This is the UNIFIED API for extracting tags from yt-dlp metadata.
All modules (download_data, merge_file, etc.) should use this function
instead of implementing their own extraction logic.
Extracts meaningful tags (artist, album, creator, genre, track, etc.)
while excluding technical fields (filesize, duration, format, etc.).
Args:
entry: yt-dlp entry metadata dictionary from download
Returns:
List of normalized tag strings in format "namespace:value"
Example:
>>> entry = {'artist': 'The Beatles', 'album': 'Abbey Road', 'duration': 5247}
>>> tags = extract_ytdlp_tags(entry)
>>> debug(tags)
['artist:The Beatles', 'album:Abbey Road']
"""
"""
tags: List[str] = []
seen_namespaces: Set[str] = set()
@@ -2186,7 +2135,7 @@ def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
def dedup_tags_by_namespace(tags: List[str], keep_first: bool = True) -> List[str]:
"""Deduplicate tags by namespace, keeping consistent order.
This is the UNIFIED API for tag deduplication used across all cmdlets.
This is the UNIFIED API for tag deduplication used across all cmdlet.
Replaces custom deduplication logic in merge_file.py and other modules.
Groups tags by namespace (e.g., "artist", "album", "tag") and keeps
@@ -2345,7 +2294,7 @@ def merge_multiple_tag_lists(
def read_tags_from_file(file_path: Path) -> List[str]:
"""Read and normalize tags from .tag sidecar file.
This is the UNIFIED API for reading .tag files across all cmdlets.
This is the UNIFIED API for reading .tag files across all cmdlet.
Handles normalization, deduplication, and format validation.
Args:
@@ -2397,33 +2346,7 @@ def embed_metadata_in_file(
tags: List[str],
file_kind: str = ''
) -> bool:
"""Embed metadata tags into a media file using FFmpeg.
Extracts metadata from tags (namespace:value format) and writes to the file's
metadata using FFmpeg with -c copy (no re-encoding).
Supported tag namespaces:
- title, artist, album, track/track_number, date/year, genre, composer, comment
For audio files, applies sensible defaults:
- If no album, uses title as album
- If no track, defaults to 1
- album_artist is set to artist value
Args:
file_path: Path to media file
tags: List of tags in format ['namespace:value', ...] (e.g., ['artist:Beatles', 'album:Abbey Road'])
file_kind: Type of file: 'audio', 'video', or '' for auto-detect (optional)
Returns:
True if successful, False otherwise
Raises:
None (logs errors to stderr)
Example:
>>> tags = ['artist:Beatles', 'album:Abbey Road', 'track:1']
>>> success = embed_metadata_in_file(Path('song.mp3'), tags, file_kind='audio')
"""
"""
if not tags:
return True
@@ -2550,7 +2473,7 @@ def write_tags_to_file(
) -> bool:
"""Write tags to .tag sidecar file.
This is the UNIFIED API for writing .tag files across all cmdlets.
This is the UNIFIED API for writing .tag files across all cmdlet.
Uses consistent format and handles file creation/overwriting.
Args:

View File

@@ -1,6 +1,6 @@
"""Pipeline execution context and state management for cmdlets.
"""Pipeline execution context and state management for cmdlet.
This module provides functions for managing pipeline state, allowing cmdlets to
This module provides functions for managing pipeline state, allowing cmdlet to
emit results and control printing behavior within a piped execution context.
Key Concepts:
@@ -76,7 +76,7 @@ _PIPELINE_LAST_SELECTION: List[int] = []
# Track the currently executing command/pipeline string for worker attribution
_PIPELINE_COMMAND_TEXT: str = ""
# Shared scratchpad for cmdlets/funacts to stash structured data between stages
# Shared scratchpad for cmdlet/funacts to stash structured data between stages
_PIPELINE_VALUES: Dict[str, Any] = {}
_PIPELINE_MISSING = object()
@@ -128,8 +128,8 @@ def emit(obj: Any) -> None:
def emit_list(objects: List[Any]) -> None:
"""Emit a list of objects to the next pipeline stage.
This allows cmdlets to emit multiple results that are tracked as a list,
enabling downstream cmdlets to process all of them or filter by metadata.
This allows cmdlet to emit multiple results that are tracked as a list,
enabling downstream cmdlet to process all of them or filter by metadata.
Args:
objects: List of objects to emit
@@ -143,7 +143,7 @@ def print_if_visible(*args: Any, file=None, **kwargs: Any) -> None:
- Always allow errors printed to stderr by callers (they pass file=sys.stderr).
- For normal info messages, this suppresses printing for intermediate pipeline stages.
- Use this instead of log() in cmdlets when you want stage-aware output.
- Use this instead of log() in cmdlet when you want stage-aware output.
Args:
*args: Arguments to print (same as built-in print)
@@ -426,7 +426,7 @@ def get_ui_library_refresh_callback() -> Optional[Any]:
def trigger_ui_library_refresh(library_filter: str = 'local') -> None:
"""Trigger a library refresh in the UI if callback is registered.
This should be called from cmdlets/funacts after content is added to library.
This should be called from cmdlet/funacts after content is added to library.
Args:
library_filter: Which library to refresh ('local', 'hydrus', etc)
@@ -732,7 +732,7 @@ def get_last_result_table_row_selection_args(row_index: int) -> Optional[List[st
def set_current_stage_table(result_table: Optional[Any]) -> None:
"""Store the current pipeline stage table for @N expansion.
Used by cmdlets that display tabular results (e.g., download-data with formats)
Used by cmdlet that display tabular results (e.g., download-data with formats)
to make their result table available for @N expansion logic.
Does NOT push to history - purely for command expansion in the current pipeline.

View File

@@ -113,7 +113,7 @@ Repository = "https://github.com/yourusername/medeia-macina.git"
Issues = "https://github.com/yourusername/medeia-macina/issues"
[tool.setuptools]
packages = ["cmdlets", "helper", "TUI", "medeia_macina"]
packages = ["cmdlet", "helper", "TUI", "medeia_macina"]
[tool.black]
line-length = 100

View File

@@ -34,7 +34,7 @@ server and uses it as a remote storage backend through the RemoteStorageBackend.
## USAGE
After setup, all cmdlets work with the phone:
After setup, all cmdlet work with the phone:
$ search-file zohar -store phone
$ @1-3 | add-relationship -king @4 -store phone
$ @1 | get-relationship -store phone

View File

@@ -1,530 +0,0 @@
"""Search-file cmdlet: Search for files by query, tag, size, type, duration, etc."""
from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional, Tuple
from pathlib import Path
from dataclasses import dataclass, field
from collections import OrderedDict
import re
import json
import sys
from SYS.logger import log, debug
from Provider.registry import get_search_provider
from cmdlets._shared import Cmdlet, CmdletArg, get_field, should_show_help
import pipeline as ctx
def get_origin(obj: Any, default: Any = None) -> Any:
"""Return the canonical origin/table identifier from a payload-like object."""
value = get_field(obj, "origin", None)
if value is not None:
return value
value = get_field(obj, "table", None)
if value is not None:
return value
value = get_field(obj, "store", None)
if value is not None:
return value
return default
# Optional dependencies
try:
import mutagen # type: ignore
except ImportError: # pragma: no cover
mutagen = None # type: ignore
try:
from config import get_hydrus_url, resolve_output_dir
except Exception: # pragma: no cover
get_hydrus_url = None # type: ignore
resolve_output_dir = None # type: ignore
try:
from API.HydrusNetwork import HydrusNetwork, HydrusRequestError
except ImportError: # pragma: no cover
HydrusNetwork = None # type: ignore
HydrusRequestError = RuntimeError # type: ignore
try:
from SYS.utils import sha256_file
except ImportError: # pragma: no cover
sha256_file = None # type: ignore
try:
from SYS.utils_constant import mime_maps
except ImportError: # pragma: no cover
mime_maps = {} # type: ignore
@dataclass(slots=True)
class SearchRecord:
path: str
size_bytes: int | None = None
duration_seconds: str | None = None
tag: str | None = None
hash: str | None = None
def as_dict(self) -> dict[str, str]:
payload: dict[str, str] = {"path": self.path}
if self.size_bytes is not None:
payload["size"] = str(self.size_bytes)
if self.duration_seconds:
payload["duration"] = self.duration_seconds
if self.tag:
payload["tag"] = self.tag
if self.hash:
payload["hash"] = self.hash
return payload
@dataclass
class ResultItem:
table: str # Renamed from origin
title: str
detail: str
annotations: List[str]
target: str
media_kind: str = "other"
hash: Optional[str] = None
columns: List[tuple[str, str]] = field(default_factory=list)
tag_summary: Optional[str] = None
duration_seconds: Optional[float] = None
size_bytes: Optional[int] = None
full_metadata: Optional[Dict[str, Any]] = None
tag: Optional[set[str]] = field(default_factory=set)
relationships: Optional[List[str]] = field(default_factory=list)
known_urls: Optional[List[str]] = field(default_factory=list)
@property
def origin(self) -> str:
return self.table
def to_dict(self) -> Dict[str, Any]:
payload: Dict[str, Any] = {
"title": self.title,
}
# Always include these core fields for downstream cmdlets (get-file, download-data, etc)
payload["table"] = self.table
payload["target"] = self.target
payload["media_kind"] = self.media_kind
# Always include full_metadata if present (needed by download-data, etc)
# This is NOT for display, but for downstream processing
if self.full_metadata:
payload["full_metadata"] = self.full_metadata
# Include columns if defined (result renderer will use these for display)
if self.columns:
payload["columns"] = list(self.columns)
else:
# If no columns, include the detail for backwards compatibility
payload["detail"] = self.detail
payload["annotations"] = list(self.annotations)
# Include optional fields
if self.hash:
payload["hash"] = self.hash
if self.tag_summary:
payload["tag_summary"] = self.tag_summary
if self.tag:
payload["tag"] = list(self.tag)
if self.relationships:
payload["relationships"] = self.relationships
if self.known_urls:
payload["known_urls"] = self.known_urls
return payload
STORAGE_ORIGINS = {"local", "hydrus", "debrid"}
class Search_File(Cmdlet):
"""Class-based search-file cmdlet with self-registration."""
def __init__(self) -> None:
super().__init__(
name="search-file",
summary="Unified search cmdlet for storage (Hydrus, Local) and providers (Debrid, LibGen, OpenLibrary, Soulseek).",
usage="search-file [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-store BACKEND] [-provider PROVIDER]",
arg=[
CmdletArg("query", description="Search query string"),
CmdletArg("tag", description="Filter by tag (can be used multiple times)"),
CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"),
CmdletArg("type", description="Filter by type: audio, video, image, document"),
CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"),
CmdletArg("limit", type="integer", description="Limit results (default: 45)"),
CmdletArg("store", description="Search storage backend: hydrus, local (default: all searchable storages)"),
CmdletArg("provider", description="Search provider: libgen, openlibrary, soulseek, debrid, local (overrides -storage)"),
],
detail=[
"Search across storage (Hydrus, Local) and providers (Debrid, LibGen, OpenLibrary, Soulseek)",
"Use -provider to search a specific source, or -store to search file backends",
"Filter results by: tag, size, type, duration",
"Results can be piped to other commands",
"Examples:",
"search-file foo # Search all file backends",
"search-file -provider libgen 'python programming' # Search LibGen books",
"search-file -provider debrid 'movie' # Search AllDebrid magnets",
"search-file 'music' -provider soulseek # Search Soulseek P2P",
"search-file -provider openlibrary 'tolkien' # Search OpenLibrary",
"search-file song -store hydrus -type audio # Search only Hydrus audio",
"search-file movie -tag action -provider debrid # Debrid with filters",
],
exec=self.run,
)
self.register()
# --- Helper methods -------------------------------------------------
@staticmethod
def _normalize_extension(ext_value: Any) -> str:
"""Sanitize extension strings to alphanumerics and cap at 5 chars."""
ext = str(ext_value or "").strip().lstrip(".")
for sep in (" ", "|", "(", "[", "{", ",", ";"):
if sep in ext:
ext = ext.split(sep, 1)[0]
break
if "." in ext:
ext = ext.split(".")[-1]
ext = "".join(ch for ch in ext if ch.isalnum())
return ext[:5]
def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]:
"""Ensure storage results have the necessary fields for result_table display."""
store_value = str(get_origin(payload, "") or "").lower()
if store_value not in STORAGE_ORIGINS:
return payload
# Ensure we have title field
if "title" not in payload:
payload["title"] = payload.get("name") or payload.get("target") or payload.get("path") or "Result"
# Ensure we have ext field
if "ext" not in payload:
title = str(payload.get("title", ""))
path_obj = Path(title)
if path_obj.suffix:
payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip('.'))
else:
payload["ext"] = payload.get("ext", "")
# Ensure size_bytes is present for display (already set by search_file())
# result_table will handle formatting it
# Don't create manual columns - let result_table handle display
# This allows the table to respect max_columns and apply consistent formatting
return payload
# --- Execution ------------------------------------------------------
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Search across multiple providers: Hydrus, Local, Debrid, LibGen, etc."""
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
args_list = [str(arg) for arg in (args or [])]
# Parse arguments
query = ""
tag_filters: List[str] = []
size_filter: Optional[Tuple[str, int]] = None
duration_filter: Optional[Tuple[str, float]] = None
type_filter: Optional[str] = None
storage_backend: Optional[str] = None
provider_name: Optional[str] = None
limit = 45
searched_backends: List[str] = []
i = 0
while i < len(args_list):
arg = args_list[i]
low = arg.lower()
if low in {"-provider", "--provider"} and i + 1 < len(args_list):
provider_name = args_list[i + 1].lower()
i += 2
elif low in {"-store", "--store", "-storage", "--storage"} and i + 1 < len(args_list):
storage_backend = args_list[i + 1].lower()
i += 2
elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
tag_filters.append(args_list[i + 1])
i += 2
elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
try:
limit = int(args_list[i + 1])
except ValueError:
limit = 100
i += 2
elif low in {"-type", "--type"} and i + 1 < len(args_list):
type_filter = args_list[i + 1].lower()
i += 2
elif not arg.startswith("-"):
query = f"{query} {arg}".strip() if query else arg
i += 1
else:
i += 1
store_filter: Optional[str] = None
if query:
match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE)
if match:
store_filter = match.group(1).strip().lower() or None
query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE)
query = re.sub(r"\s{2,}", " ", query)
query = query.strip().strip(',')
if storage_backend and storage_backend.lower() == "debrid":
log("Use -provider debrid instead of -store debrid (debrid is provider-only)", file=sys.stderr)
return 1
if store_filter and not provider_name and not storage_backend:
if store_filter in {"hydrus", "local", "debrid"}:
storage_backend = store_filter
# --- Feature: Filter provider result table by Name column ---
filter_after_search: Optional[str] = None
if result:
actual_result = result[0] if isinstance(result, list) and result else result
origin = get_origin(actual_result)
target = get_field(actual_result, 'target')
# If the incoming result is from a provider (not storage) AND this invocation looks like a filter (no flags)
positional_args = [a for a in args_list if not a.startswith('-')]
no_flags = len(positional_args) == len(args_list)
looks_like_filter = no_flags and len(positional_args) == 1 and not provider_name and not storage_backend and not tag_filters and not size_filter and not duration_filter and not type_filter
if origin and origin.lower() not in STORAGE_ORIGINS and looks_like_filter and query:
# Save the filter string to apply AFTER loading the provider data
filter_after_search = query.strip()
query = "" # Clear query so we load the target URL instead
# If result is from a provider, extract the target as query and set provider
if not query:
if origin == 'bandcamp' and target:
query = target
if not provider_name:
provider_name = 'bandcamp'
elif origin == 'youtube' and target:
query = target
if not provider_name:
provider_name = 'youtube'
elif target and str(target).startswith(('http://', 'https://')):
query = target
if not provider_name:
if 'bandcamp.com' in target:
provider_name = 'bandcamp'
elif 'youtube.com' in target or 'youtu.be' in target:
provider_name = 'youtube'
if not query:
log("Provide a search query", file=sys.stderr)
return 1
from API.folder import API_folder_store
from config import get_local_storage_path
import uuid
worker_id = str(uuid.uuid4())
library_root = get_local_storage_path(config or {})
if not library_root:
log("No library root configured", file=sys.stderr)
return 1
db = None
try:
db = API_folder_store(library_root)
db.insert_worker(
worker_id,
"search",
title=f"Search: {query}",
description=f"Query: {query}",
pipe=ctx.get_current_command_text()
)
results_list = []
import result_table
import importlib
importlib.reload(result_table)
from result_table import ResultTable
table_title = f"Search: {query}"
if provider_name:
table_title += f" [{provider_name}]"
elif storage_backend:
table_title += f" [{storage_backend}]"
preserve_order = provider_name and provider_name.lower() in ('youtube', 'openlibrary')
# Avoid setting source_command so @N does not re-run search-file; preserve row order when needed
table = ResultTable(table_title).set_preserve_order(preserve_order)
if provider_name:
debug(f"[search_file] Attempting provider search with: {provider_name}")
provider = get_search_provider(provider_name, config)
if not provider:
log(f"Provider '{provider_name}' not available", file=sys.stderr)
db.update_worker_status(worker_id, 'error')
return 1
debug(f"[search_file] Provider loaded, calling search with query: {query}")
search_result = provider.search(query, limit=limit)
debug(f"[search_file] Provider search returned {len(search_result)} results")
# Apply post-search filter if one was set
if filter_after_search:
debug(f"[search_file] Applying filter: {filter_after_search}")
filtered_result = []
for item in search_result:
item_dict = item.to_dict() if hasattr(item, 'to_dict') else dict(item)
title_val = get_field(item_dict, 'title') or get_field(item_dict, 'name') or ""
if filter_after_search.lower() in str(title_val).lower():
filtered_result.append(item)
search_result = filtered_result
if not search_result:
log(f"No results match filter: '{filter_after_search}'", file=sys.stderr)
db.update_worker_status(worker_id, 'completed')
return 0
debug(f"[search_file] Filter matched {len(search_result)} results")
table.title = f"Filter: {filter_after_search}"
for item in search_result:
table.add_result(item)
item_dict = item.to_dict()
results_list.append(item_dict)
ctx.emit(item_dict)
ctx.set_last_result_table(table, results_list)
debug(f"[search_file] Emitted {len(results_list)} results")
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
db.update_worker_status(worker_id, 'completed')
return 0
from Store import Store
storage = Store(config=config or {}, suppress_debug=True)
backend_to_search = storage_backend or None
if backend_to_search:
if backend_to_search == "hydrus":
from API.HydrusNetwork import is_hydrus_available
if not is_hydrus_available(config or {}):
log(f"Backend 'hydrus' is not available (Hydrus service not running)", file=sys.stderr)
db.update_worker_status(worker_id, 'error')
return 1
searched_backends.append(backend_to_search)
target_backend = storage[backend_to_search]
results = target_backend.search(query, limit=limit)
else:
from API.HydrusNetwork import is_hydrus_available
hydrus_available = is_hydrus_available(config or {})
all_results = []
for backend_name in storage.list_searchable_backends():
if backend_name == "hydrus" and not hydrus_available:
continue
searched_backends.append(backend_name)
try:
backend_results = storage[backend_name].search(query, limit=limit - len(all_results))
if backend_results:
all_results.extend(backend_results)
if len(all_results) >= limit:
break
except Exception as exc:
log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr)
results = all_results[:limit]
if not provider_name and not storage_backend:
try:
debrid_provider = get_search_provider("debrid", config)
if debrid_provider and debrid_provider.validate():
remaining = max(0, limit - len(results)) if isinstance(results, list) else limit
if remaining > 0:
debrid_results = debrid_provider.search(query, limit=remaining)
if debrid_results:
if "debrid" not in searched_backends:
searched_backends.append("debrid")
if results is None:
results = []
results.extend(debrid_results)
except Exception as exc:
log(f"Debrid provider search failed: {exc}", file=sys.stderr)
def _format_storage_label(name: str) -> str:
clean = str(name or "").strip()
if not clean:
return "Unknown"
return clean.replace("_", " ").title()
storage_counts: OrderedDict[str, int] = OrderedDict((name, 0) for name in searched_backends)
for item in results or []:
origin = get_origin(item)
if not origin:
continue
key = str(origin).lower()
if key not in storage_counts:
storage_counts[key] = 0
storage_counts[key] += 1
if storage_counts or query:
display_counts = OrderedDict((_format_storage_label(name), count) for name, count in storage_counts.items())
summary_line = table.set_storage_summary(display_counts, query, inline=True)
if summary_line:
table.title = summary_line
if results:
for item in results:
def _as_dict(obj: Any) -> Dict[str, Any]:
if isinstance(obj, dict):
return dict(obj)
if hasattr(obj, "to_dict") and callable(getattr(obj, "to_dict")):
return obj.to_dict() # type: ignore[arg-type]
return {"title": str(obj)}
item_dict = _as_dict(item)
if store_filter:
origin_val = str(get_origin(item_dict) or "").lower()
if store_filter != origin_val:
continue
normalized = self._ensure_storage_columns(item_dict)
# Make hash/store available for downstream cmdlets without rerunning search-file
hash_val = normalized.get("hash")
store_val = normalized.get("store") or get_origin(item_dict)
if hash_val and not normalized.get("hash"):
normalized["hash"] = hash_val
if store_val and not normalized.get("store"):
normalized["store"] = store_val
table.add_result(normalized)
results_list.append(normalized)
ctx.emit(normalized)
ctx.set_last_result_table(table, results_list)
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
else:
log("No results found", file=sys.stderr)
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
db.update_worker_status(worker_id, 'completed')
return 0
except Exception as exc:
log(f"Search failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
if db:
try:
db.update_worker_status(worker_id, 'error')
except Exception:
pass
return 1
finally:
if db:
try:
db.close()
except Exception:
pass
CMDLET = Search_File()