dfdfdf
This commit is contained in:
@@ -388,25 +388,55 @@ class HydrusNetwork:
|
||||
results[file_hash] = self._post("/add_url/associate_url", data=body)
|
||||
return {"batched": results}
|
||||
|
||||
def set_notes(self, file_hashes: Union[str, Iterable[str]], notes: dict[str, str], service_name: str) -> dict[str, Any]:
|
||||
def set_notes(
|
||||
self,
|
||||
file_hash: str,
|
||||
notes: dict[str, str],
|
||||
*,
|
||||
merge_cleverly: bool = False,
|
||||
extend_existing_note_if_possible: bool = True,
|
||||
conflict_resolution: int = 3,
|
||||
) -> dict[str, Any]:
|
||||
"""Add or update notes associated with a file.
|
||||
|
||||
Hydrus Client API: POST /add_notes/set_notes
|
||||
Required JSON args: {"hash": <sha256 hex>, "notes": {name: text}}
|
||||
"""
|
||||
if not notes:
|
||||
raise ValueError("notes mapping must not be empty")
|
||||
hashes = self._ensure_hashes(file_hashes)
|
||||
body = {"hashes": hashes, "service_names_to_notes": {service_name: notes}}
|
||||
|
||||
file_hash = str(file_hash or "").strip().lower()
|
||||
if not file_hash:
|
||||
raise ValueError("file_hash must not be empty")
|
||||
|
||||
body: dict[str, Any] = {"hash": file_hash, "notes": notes}
|
||||
|
||||
if merge_cleverly:
|
||||
body["merge_cleverly"] = True
|
||||
body["extend_existing_note_if_possible"] = bool(extend_existing_note_if_possible)
|
||||
body["conflict_resolution"] = int(conflict_resolution)
|
||||
return self._post("/add_notes/set_notes", data=body)
|
||||
|
||||
def delete_notes(
|
||||
self,
|
||||
file_hashes: Union[str, Iterable[str]],
|
||||
file_hash: str,
|
||||
note_names: Sequence[str],
|
||||
service_name: str,
|
||||
) -> dict[str, Any]:
|
||||
names = [name for name in note_names if name]
|
||||
"""Delete notes associated with a file.
|
||||
|
||||
Hydrus Client API: POST /add_notes/delete_notes
|
||||
Required JSON args: {"hash": <sha256 hex>, "note_names": [..]}
|
||||
"""
|
||||
names = [str(name) for name in note_names if str(name or "").strip()]
|
||||
if not names:
|
||||
raise ValueError("note_names must not be empty")
|
||||
hashes = self._ensure_hashes(file_hashes)
|
||||
body = {"hashes": hashes, "service_names_to_deleted_note_names": {service_name: names}}
|
||||
return self._post("/add_notes/set_notes", data=body)
|
||||
|
||||
file_hash = str(file_hash or "").strip().lower()
|
||||
if not file_hash:
|
||||
raise ValueError("file_hash must not be empty")
|
||||
|
||||
body = {"hash": file_hash, "note_names": names}
|
||||
return self._post("/add_notes/delete_notes", data=body)
|
||||
|
||||
def get_file_relationships(self, file_hash: str) -> dict[str, Any]:
|
||||
query = {"hash": file_hash}
|
||||
|
||||
@@ -804,7 +804,7 @@ def unlock_link_cmdlet(result: Any, args: Sequence[str], config: Dict[str, Any])
|
||||
def _register_unlock_link():
|
||||
"""Register unlock-link command with cmdlet registry if available."""
|
||||
try:
|
||||
from cmdlets import register
|
||||
from cmdlet import register
|
||||
|
||||
@register(["unlock-link"])
|
||||
def unlock_link_wrapper(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
@@ -821,7 +821,7 @@ def _register_unlock_link():
|
||||
|
||||
return unlock_link_wrapper
|
||||
except ImportError:
|
||||
# If cmdlets module not available, just return None
|
||||
# If cmdlet module not available, just return None
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ This module provides low-level functions for interacting with Archive.org:
|
||||
- Image downloading and deobfuscation
|
||||
- PDF creation with metadata
|
||||
|
||||
Used by unified_book_downloader.py for the borrowing workflow.
|
||||
Used by Provider/openlibrary.py for the borrowing workflow.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
|
||||
127
API/folder.py
127
API/folder.py
@@ -231,11 +231,13 @@ class API_folder_store:
|
||||
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS notes (
|
||||
hash TEXT PRIMARY KEY NOT NULL,
|
||||
hash TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
note TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (hash) REFERENCES files(hash) ON DELETE CASCADE
|
||||
FOREIGN KEY (hash) REFERENCES files(hash) ON DELETE CASCADE,
|
||||
PRIMARY KEY (hash, name)
|
||||
)
|
||||
""")
|
||||
|
||||
@@ -261,6 +263,11 @@ class API_folder_store:
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_worker_type ON worker(worker_type)")
|
||||
|
||||
self._migrate_metadata_schema(cursor)
|
||||
self._migrate_notes_schema(cursor)
|
||||
|
||||
# Notes indices (after migration so columns exist)
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_notes_hash ON notes(hash)")
|
||||
cursor.execute("CREATE INDEX IF NOT EXISTS idx_notes_name ON notes(name)")
|
||||
self.connection.commit()
|
||||
logger.debug("Database tables created/verified")
|
||||
|
||||
@@ -449,6 +456,42 @@ class API_folder_store:
|
||||
except Exception as e:
|
||||
logger.debug(f"Note: Schema import/migration completed with status: {e}")
|
||||
|
||||
def _migrate_notes_schema(self, cursor) -> None:
|
||||
"""Migrate legacy notes schema (hash PRIMARY KEY, note) to named notes (hash,name PRIMARY KEY)."""
|
||||
try:
|
||||
cursor.execute("PRAGMA table_info(notes)")
|
||||
cols = [row[1] for row in cursor.fetchall()]
|
||||
if not cols:
|
||||
return
|
||||
if "name" in cols:
|
||||
return
|
||||
|
||||
logger.info("Migrating legacy notes table to named notes schema")
|
||||
cursor.execute("""
|
||||
CREATE TABLE IF NOT EXISTS notes_new (
|
||||
hash TEXT NOT NULL,
|
||||
name TEXT NOT NULL,
|
||||
note TEXT,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
FOREIGN KEY (hash) REFERENCES files(hash) ON DELETE CASCADE,
|
||||
PRIMARY KEY (hash, name)
|
||||
)
|
||||
""")
|
||||
|
||||
# Copy existing notes into the default key
|
||||
cursor.execute("""
|
||||
INSERT INTO notes_new (hash, name, note, created_at, updated_at)
|
||||
SELECT hash, 'default', note, created_at, updated_at
|
||||
FROM notes
|
||||
""")
|
||||
|
||||
cursor.execute("DROP TABLE notes")
|
||||
cursor.execute("ALTER TABLE notes_new RENAME TO notes")
|
||||
self.connection.commit()
|
||||
except Exception as exc:
|
||||
logger.debug(f"Notes schema migration skipped/failed: {exc}")
|
||||
|
||||
def _update_metadata_modified_time(self, file_hash: str) -> None:
|
||||
"""Update the time_modified timestamp for a file's metadata."""
|
||||
try:
|
||||
@@ -1052,41 +1095,79 @@ class API_folder_store:
|
||||
return []
|
||||
|
||||
def get_note(self, file_hash: str) -> Optional[str]:
|
||||
"""Get note for a file by hash."""
|
||||
"""Get the default note for a file by hash."""
|
||||
try:
|
||||
cursor = self.connection.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
SELECT n.note FROM notes n
|
||||
WHERE n.hash = ?
|
||||
""", (file_hash,))
|
||||
|
||||
row = cursor.fetchone()
|
||||
return row[0] if row else None
|
||||
notes = self.get_notes(file_hash)
|
||||
if not notes:
|
||||
return None
|
||||
return notes.get("default")
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting note for hash {file_hash}: {e}", exc_info=True)
|
||||
return None
|
||||
|
||||
def save_note(self, file_path: Path, note: str) -> None:
|
||||
"""Save note for a file."""
|
||||
def get_notes(self, file_hash: str) -> Dict[str, str]:
|
||||
"""Get all notes for a file by hash."""
|
||||
try:
|
||||
cursor = self.connection.cursor()
|
||||
cursor.execute(
|
||||
"SELECT name, note FROM notes WHERE hash = ? ORDER BY name ASC",
|
||||
(file_hash,),
|
||||
)
|
||||
out: Dict[str, str] = {}
|
||||
for name, note in cursor.fetchall() or []:
|
||||
if not name:
|
||||
continue
|
||||
out[str(name)] = str(note or "")
|
||||
return out
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting notes for hash {file_hash}: {e}", exc_info=True)
|
||||
return {}
|
||||
|
||||
def save_note(self, file_path: Path, note: str) -> None:
|
||||
"""Save the default note for a file."""
|
||||
self.set_note(file_path, "default", note)
|
||||
|
||||
def set_note(self, file_path: Path, name: str, note: str) -> None:
|
||||
"""Set a named note for a file."""
|
||||
try:
|
||||
note_name = str(name or "").strip()
|
||||
if not note_name:
|
||||
raise ValueError("Note name is required")
|
||||
|
||||
file_hash = self.get_or_create_file_entry(file_path)
|
||||
cursor = self.connection.cursor()
|
||||
|
||||
cursor.execute("""
|
||||
INSERT INTO notes (hash, note)
|
||||
VALUES (?, ?)
|
||||
ON CONFLICT(hash) DO UPDATE SET
|
||||
cursor.execute(
|
||||
"""
|
||||
INSERT INTO notes (hash, name, note)
|
||||
VALUES (?, ?, ?)
|
||||
ON CONFLICT(hash, name) DO UPDATE SET
|
||||
note = excluded.note,
|
||||
updated_at = CURRENT_TIMESTAMP
|
||||
""", (file_hash, note))
|
||||
|
||||
""",
|
||||
(file_hash, note_name, note),
|
||||
)
|
||||
self.connection.commit()
|
||||
logger.debug(f"Saved note for {file_path}")
|
||||
logger.debug(f"Saved note '{note_name}' for {file_path}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving note for {file_path}: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def delete_note(self, file_hash: str, name: str) -> None:
|
||||
"""Delete a named note for a file by hash."""
|
||||
try:
|
||||
note_name = str(name or "").strip()
|
||||
if not note_name:
|
||||
raise ValueError("Note name is required")
|
||||
cursor = self.connection.cursor()
|
||||
cursor.execute(
|
||||
"DELETE FROM notes WHERE hash = ? AND name = ?",
|
||||
(file_hash, note_name),
|
||||
)
|
||||
self.connection.commit()
|
||||
except Exception as e:
|
||||
logger.error(f"Error deleting note '{name}' for hash {file_hash}: {e}", exc_info=True)
|
||||
raise
|
||||
|
||||
def search_by_tag(self, tag: str, limit: int = 100) -> List[tuple]:
|
||||
"""Search for files with a specific tag. Returns list of (hash, file_path) tuples."""
|
||||
try:
|
||||
@@ -2027,7 +2108,7 @@ def migrate_tags_to_db(library_root: Path, db: API_folder_store) -> int:
|
||||
try:
|
||||
for tags_file in library_root.rglob("*.tag"):
|
||||
try:
|
||||
base_path = tags_file.with_suffix("")
|
||||
base_path = tags_file.with_suffix("")
|
||||
tags_text = tags_file.read_text(encoding='utf-8')
|
||||
tags = [line.strip() for line in tags_text.splitlines() if line.strip()]
|
||||
|
||||
|
||||
76
CLI.py
76
CLI.py
@@ -68,7 +68,7 @@ from typing import Callable
|
||||
|
||||
|
||||
from config import get_local_storage_path, load_config
|
||||
from cmdlets.catalog import (
|
||||
from cmdlet.catalog import (
|
||||
import_cmd_module as _catalog_import_cmd_module,
|
||||
list_cmdlet_metadata as _catalog_list_cmdlet_metadata,
|
||||
list_cmdlet_names as _catalog_list_cmdlet_names,
|
||||
@@ -482,7 +482,7 @@ def _get_cmdlet_names() -> List[str]:
|
||||
|
||||
|
||||
def _import_cmd_module(mod_name: str):
|
||||
"""Import a cmdlet/native module from cmdlets or cmdnats packages."""
|
||||
"""Import a cmdlet/native module from cmdlet or cmdnat packages."""
|
||||
try:
|
||||
return _catalog_import_cmd_module(mod_name)
|
||||
except Exception:
|
||||
@@ -518,7 +518,7 @@ def _get_arg_choices(cmd_name: str, arg_name: str) -> List[str]:
|
||||
# Dynamic search providers
|
||||
if normalized_arg == "provider":
|
||||
try:
|
||||
from Provider.registry import list_search_providers
|
||||
from ProviderCore.registry import list_search_providers
|
||||
providers = list_search_providers(_load_cli_config())
|
||||
available = [name for name, is_ready in providers.items() if is_ready]
|
||||
provider_choices = sorted(available) if available else sorted(providers.keys())
|
||||
@@ -607,9 +607,23 @@ if (
|
||||
return
|
||||
|
||||
arg_names = _get_cmdlet_args(cmd_name)
|
||||
logical_seen: Set[str] = set()
|
||||
for arg in arg_names:
|
||||
if arg.lower().startswith(current_token):
|
||||
arg_low = arg.lower()
|
||||
|
||||
# If the user has only typed '-', prefer single-dash flags (e.g. -url)
|
||||
# and avoid suggesting both -name and --name for the same logical arg.
|
||||
if current_token == "-" and arg_low.startswith("--"):
|
||||
continue
|
||||
|
||||
logical = arg.lstrip("-").lower()
|
||||
if current_token == "-" and logical in logical_seen:
|
||||
continue
|
||||
|
||||
if arg_low.startswith(current_token):
|
||||
yield CompletionType(arg, start_position=-len(current_token))
|
||||
if current_token == "-":
|
||||
logical_seen.add(logical)
|
||||
|
||||
if "--help".startswith(current_token):
|
||||
yield CompletionType("--help", start_position=-len(current_token))
|
||||
@@ -715,10 +729,21 @@ def _create_cmdlet_cli():
|
||||
print(f"Error parsing seeds JSON: {e}")
|
||||
return
|
||||
|
||||
try:
|
||||
from cli_syntax import validate_pipeline_text
|
||||
syntax_error = validate_pipeline_text(command)
|
||||
if syntax_error:
|
||||
print(syntax_error.message, file=sys.stderr)
|
||||
return
|
||||
except Exception:
|
||||
# Best-effort only; if validator can't load, fall back to shlex handling below.
|
||||
pass
|
||||
|
||||
try:
|
||||
tokens = shlex.split(command)
|
||||
except ValueError:
|
||||
tokens = command.split()
|
||||
except ValueError as exc:
|
||||
print(f"Syntax error: {exc}", file=sys.stderr)
|
||||
return
|
||||
|
||||
if not tokens:
|
||||
return
|
||||
@@ -728,7 +753,7 @@ def _create_cmdlet_cli():
|
||||
|
||||
@app.command("repl")
|
||||
def repl():
|
||||
"""Start interactive REPL for cmdlets with autocomplete."""
|
||||
"""Start interactive REPL for cmdlet with autocomplete."""
|
||||
banner = """
|
||||
Medeia-Macina
|
||||
=====================
|
||||
@@ -967,11 +992,22 @@ def _create_cmdlet_cli():
|
||||
except Exception:
|
||||
pipeline_ctx_ref = None
|
||||
|
||||
try:
|
||||
from cli_syntax import validate_pipeline_text
|
||||
syntax_error = validate_pipeline_text(user_input)
|
||||
if syntax_error:
|
||||
print(syntax_error.message, file=sys.stderr)
|
||||
continue
|
||||
except Exception:
|
||||
# Best-effort only; if validator can't load, continue with shlex.
|
||||
pass
|
||||
|
||||
try:
|
||||
import shlex
|
||||
tokens = shlex.split(user_input)
|
||||
except ValueError:
|
||||
tokens = user_input.split()
|
||||
except ValueError as exc:
|
||||
print(f"Syntax error: {exc}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
if not tokens:
|
||||
continue
|
||||
@@ -1078,12 +1114,12 @@ def _create_cmdlet_cli():
|
||||
|
||||
|
||||
def _execute_pipeline(tokens: list):
|
||||
"""Execute a pipeline of cmdlets separated by pipes (|).
|
||||
"""Execute a pipeline of cmdlet separated by pipes (|).
|
||||
|
||||
Example: cmd1 arg1 arg2 | cmd2 arg2 | cmd3 arg3
|
||||
"""
|
||||
try:
|
||||
from cmdlets import REGISTRY
|
||||
from cmdlet import REGISTRY
|
||||
import json
|
||||
import pipeline as ctx
|
||||
|
||||
@@ -1333,7 +1369,7 @@ def _execute_pipeline(tokens: list):
|
||||
filtered = [resolved_items[i] for i in first_stage_selection_indices if 0 <= i < len(resolved_items)]
|
||||
if filtered:
|
||||
# Convert filtered items to PipeObjects for consistent pipeline handling
|
||||
from cmdlets._shared import coerce_to_pipe_object
|
||||
from cmdlet._shared import coerce_to_pipe_object
|
||||
filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered]
|
||||
piped_result = filtered_pipe_objs if len(filtered_pipe_objs) > 1 else filtered_pipe_objs[0]
|
||||
# Build log message with proper string conversion
|
||||
@@ -1529,7 +1565,7 @@ def _execute_pipeline(tokens: list):
|
||||
filtered = [resolved_list[i] for i in selection_indices if 0 <= i < len(resolved_list)]
|
||||
if filtered:
|
||||
# Convert filtered items to PipeObjects for consistent pipeline handling
|
||||
from cmdlets._shared import coerce_to_pipe_object
|
||||
from cmdlet._shared import coerce_to_pipe_object
|
||||
filtered_pipe_objs = [coerce_to_pipe_object(item) for item in filtered]
|
||||
piped_result = filtered_pipe_objs if len(filtered_pipe_objs) > 1 else filtered_pipe_objs[0]
|
||||
print(f"Selected {len(filtered)} item(s) using {cmd_name}")
|
||||
@@ -1817,13 +1853,13 @@ def _execute_cmdlet(cmd_name: str, args: list):
|
||||
- @{1,3,5} - select rows 1, 3, 5
|
||||
"""
|
||||
try:
|
||||
from cmdlets import REGISTRY
|
||||
from cmdlet import REGISTRY
|
||||
import json
|
||||
import pipeline as ctx
|
||||
|
||||
# Ensure native commands (cmdnats) are loaded
|
||||
# Ensure native commands (cmdnat) are loaded
|
||||
try:
|
||||
from cmdlets.catalog import ensure_registry_loaded as _ensure_registry_loaded
|
||||
from cmdlet.catalog import ensure_registry_loaded as _ensure_registry_loaded
|
||||
_ensure_registry_loaded()
|
||||
except Exception:
|
||||
pass
|
||||
@@ -1832,7 +1868,7 @@ def _execute_cmdlet(cmd_name: str, args: list):
|
||||
cmd_fn = REGISTRY.get(cmd_name)
|
||||
if not cmd_fn:
|
||||
# Attempt lazy import of the module and retry
|
||||
from cmdlets.catalog import import_cmd_module as _catalog_import
|
||||
from cmdlet.catalog import import_cmd_module as _catalog_import
|
||||
try:
|
||||
mod = _catalog_import(cmd_name)
|
||||
data = getattr(mod, "CMDLET", None) if mod else None
|
||||
@@ -1893,7 +1929,7 @@ def _execute_cmdlet(cmd_name: str, args: list):
|
||||
# Filter to selected indices only
|
||||
result = [piped_items[idx] for idx in selected_indices if 0 <= idx < len(piped_items)]
|
||||
else:
|
||||
# No selection specified, pass all items (cmdlets handle lists via normalize_result_input)
|
||||
# No selection specified, pass all items (cmdlet handle lists via normalize_result_input)
|
||||
result = piped_items
|
||||
|
||||
worker_manager = _ensure_worker_manager(config)
|
||||
@@ -2038,10 +2074,10 @@ def _execute_cmdlet(cmd_name: str, args: list):
|
||||
|
||||
|
||||
def _show_cmdlet_list():
|
||||
"""Display available cmdlets with full metadata: cmd:name alias:aliases args:args."""
|
||||
"""Display available cmdlet with full metadata: cmd:name alias:aliases args:args."""
|
||||
try:
|
||||
metadata = _catalog_list_cmdlet_metadata()
|
||||
print("\nAvailable cmdlets:")
|
||||
print("\nAvailable cmdlet:")
|
||||
for cmd_name in sorted(metadata.keys()):
|
||||
info = metadata[cmd_name]
|
||||
aliases = info.get("aliases", [])
|
||||
|
||||
@@ -4,6 +4,31 @@ local msg = require 'mp.msg'
|
||||
|
||||
local M = {}
|
||||
|
||||
-- Lyrics overlay toggle
|
||||
-- The Python helper (python -m MPV.lyric) will read this property via IPC.
|
||||
local LYRIC_VISIBLE_PROP = "user-data/medeia-lyric-visible"
|
||||
|
||||
local function lyric_get_visible()
|
||||
local ok, v = pcall(mp.get_property_native, LYRIC_VISIBLE_PROP)
|
||||
if not ok or v == nil then
|
||||
return true
|
||||
end
|
||||
return v and true or false
|
||||
end
|
||||
|
||||
local function lyric_set_visible(v)
|
||||
pcall(mp.set_property_native, LYRIC_VISIBLE_PROP, v and true or false)
|
||||
end
|
||||
|
||||
local function lyric_toggle()
|
||||
local now = not lyric_get_visible()
|
||||
lyric_set_visible(now)
|
||||
mp.osd_message("Lyrics: " .. (now and "on" or "off"), 1)
|
||||
end
|
||||
|
||||
-- Default to visible unless user overrides.
|
||||
lyric_set_visible(true)
|
||||
|
||||
-- Configuration
|
||||
local opts = {
|
||||
python_path = "python",
|
||||
@@ -138,4 +163,8 @@ mp.add_key_binding("mbtn_right", "medios-menu-right-click", M.show_menu)
|
||||
mp.add_key_binding("ctrl+i", "medios-info", M.get_file_info)
|
||||
mp.add_key_binding("ctrl+del", "medios-delete", M.delete_current_file)
|
||||
|
||||
-- Lyrics toggle (requested: 'L')
|
||||
mp.add_key_binding("l", "medeia-lyric-toggle", lyric_toggle)
|
||||
mp.add_key_binding("L", "medeia-lyric-toggle-shift", lyric_toggle)
|
||||
|
||||
return M
|
||||
|
||||
1195
MPV/lyric.py
Normal file
1195
MPV/lyric.py
Normal file
File diff suppressed because it is too large
Load Diff
210
MPV/mpv_ipc.py
210
MPV/mpv_ipc.py
@@ -12,6 +12,7 @@ import os
|
||||
import platform
|
||||
import socket
|
||||
import subprocess
|
||||
import sys
|
||||
import time as _time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, List, BinaryIO, cast
|
||||
@@ -24,6 +25,88 @@ FIXED_IPC_PIPE_NAME = "mpv-medeia-macina"
|
||||
MPV_LUA_SCRIPT_PATH = str(Path(__file__).resolve().parent / "LUA" / "main.lua")
|
||||
|
||||
|
||||
_LYRIC_PROCESS: Optional[subprocess.Popen] = None
|
||||
_LYRIC_LOG_FH: Optional[Any] = None
|
||||
|
||||
|
||||
def _windows_list_lyric_helper_pids(ipc_path: str) -> List[int]:
|
||||
"""Return PIDs of `python -m MPV.lyric --ipc <ipc_path>` helpers (Windows only)."""
|
||||
if platform.system() != "Windows":
|
||||
return []
|
||||
try:
|
||||
ipc_path = str(ipc_path or "")
|
||||
except Exception:
|
||||
ipc_path = ""
|
||||
if not ipc_path:
|
||||
return []
|
||||
|
||||
# Use CIM to query command lines; output as JSON for robust parsing.
|
||||
# Note: `ConvertTo-Json` returns a number for single item, array for many, or null.
|
||||
ps_script = (
|
||||
"$ipc = "
|
||||
+ json.dumps(ipc_path)
|
||||
+ "; "
|
||||
"Get-CimInstance Win32_Process | "
|
||||
"Where-Object { $_.CommandLine -and $_.CommandLine -match ' -m\\s+MPV\\.lyric(\\s|$)' -and $_.CommandLine -match ('--ipc\\s+' + [regex]::Escape($ipc)) } | "
|
||||
"Select-Object -ExpandProperty ProcessId | ConvertTo-Json -Compress"
|
||||
)
|
||||
|
||||
try:
|
||||
out = subprocess.check_output(
|
||||
["powershell", "-NoProfile", "-Command", ps_script],
|
||||
stdin=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
timeout=2,
|
||||
text=True,
|
||||
)
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
txt = (out or "").strip()
|
||||
if not txt or txt == "null":
|
||||
return []
|
||||
try:
|
||||
obj = json.loads(txt)
|
||||
except Exception:
|
||||
return []
|
||||
|
||||
pids: List[int] = []
|
||||
if isinstance(obj, list):
|
||||
for v in obj:
|
||||
try:
|
||||
pids.append(int(v))
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
try:
|
||||
pids.append(int(obj))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# De-dupe and filter obvious junk.
|
||||
uniq: List[int] = []
|
||||
for pid in pids:
|
||||
if pid and pid > 0 and pid not in uniq:
|
||||
uniq.append(pid)
|
||||
return uniq
|
||||
|
||||
|
||||
def _windows_kill_pids(pids: List[int]) -> None:
|
||||
if platform.system() != "Windows":
|
||||
return
|
||||
for pid in pids or []:
|
||||
try:
|
||||
subprocess.run(
|
||||
["taskkill", "/PID", str(int(pid)), "/F"],
|
||||
stdin=subprocess.DEVNULL,
|
||||
stdout=subprocess.DEVNULL,
|
||||
stderr=subprocess.DEVNULL,
|
||||
timeout=2,
|
||||
)
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
|
||||
class MPVIPCError(Exception):
|
||||
"""Raised when MPV IPC communication fails."""
|
||||
pass
|
||||
@@ -38,7 +121,7 @@ class MPV:
|
||||
- Query playlist and currently playing item via IPC
|
||||
|
||||
This class intentionally stays "dumb": it does not implement app logic.
|
||||
App behavior is driven by cmdlets (e.g. `.pipe`) and the bundled Lua script.
|
||||
App behavior is driven by cmdlet (e.g. `.pipe`) and the bundled Lua script.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
@@ -55,11 +138,11 @@ class MPV:
|
||||
lua_path = Path(str(lua_script_path)).resolve()
|
||||
self.lua_script_path = str(lua_path)
|
||||
|
||||
def client(self) -> "MPVIPCClient":
|
||||
return MPVIPCClient(socket_path=self.ipc_path, timeout=self.timeout)
|
||||
def client(self, silent: bool = False) -> "MPVIPCClient":
|
||||
return MPVIPCClient(socket_path=self.ipc_path, timeout=self.timeout, silent=bool(silent))
|
||||
|
||||
def is_running(self) -> bool:
|
||||
client = self.client()
|
||||
client = self.client(silent=True)
|
||||
try:
|
||||
ok = client.connect()
|
||||
return bool(ok)
|
||||
@@ -67,7 +150,7 @@ class MPV:
|
||||
client.disconnect()
|
||||
|
||||
def send(self, command: Dict[str, Any] | List[Any], silent: bool = False) -> Optional[Dict[str, Any]]:
|
||||
client = self.client()
|
||||
client = self.client(silent=bool(silent))
|
||||
try:
|
||||
if not client.connect():
|
||||
return None
|
||||
@@ -136,9 +219,109 @@ class MPV:
|
||||
except Exception:
|
||||
return
|
||||
|
||||
def ensure_lyric_loader_running(self) -> None:
|
||||
"""Start (or keep) the Python lyric overlay helper.
|
||||
|
||||
Uses the fixed IPC pipe name so it can follow playback.
|
||||
"""
|
||||
global _LYRIC_PROCESS, _LYRIC_LOG_FH
|
||||
|
||||
# Cross-session guard (Windows): avoid spawning multiple helpers across separate CLI runs.
|
||||
# Also clean up stale helpers when mpv isn't running anymore.
|
||||
if platform.system() == "Windows":
|
||||
try:
|
||||
existing = _windows_list_lyric_helper_pids(str(self.ipc_path))
|
||||
if existing:
|
||||
if not self.is_running():
|
||||
_windows_kill_pids(existing)
|
||||
return
|
||||
# If multiple exist, kill them and start fresh (prevents double overlays).
|
||||
if len(existing) == 1:
|
||||
return
|
||||
_windows_kill_pids(existing)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
if _LYRIC_PROCESS is not None and _LYRIC_PROCESS.poll() is None:
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
if _LYRIC_PROCESS is not None:
|
||||
try:
|
||||
_LYRIC_PROCESS.terminate()
|
||||
except Exception:
|
||||
pass
|
||||
finally:
|
||||
_LYRIC_PROCESS = None
|
||||
try:
|
||||
if _LYRIC_LOG_FH is not None:
|
||||
_LYRIC_LOG_FH.close()
|
||||
except Exception:
|
||||
pass
|
||||
_LYRIC_LOG_FH = None
|
||||
|
||||
try:
|
||||
try:
|
||||
tmp_dir = Path(os.environ.get("TEMP") or os.environ.get("TMP") or ".")
|
||||
except Exception:
|
||||
tmp_dir = Path(".")
|
||||
log_path = str((tmp_dir / "medeia-mpv-lyric.log").resolve())
|
||||
|
||||
# Ensure the module can be imported even when the app is launched from a different cwd.
|
||||
# Repo root = parent of the MPV package directory.
|
||||
try:
|
||||
repo_root = Path(__file__).resolve().parent.parent
|
||||
except Exception:
|
||||
repo_root = Path.cwd()
|
||||
|
||||
cmd: List[str] = [
|
||||
sys.executable,
|
||||
"-m",
|
||||
"MPV.lyric",
|
||||
"--ipc",
|
||||
str(self.ipc_path),
|
||||
"--log",
|
||||
log_path,
|
||||
]
|
||||
|
||||
# Redirect helper stdout/stderr to the log file so we can see crashes/import errors.
|
||||
try:
|
||||
_LYRIC_LOG_FH = open(log_path, "a", encoding="utf-8", errors="replace")
|
||||
except Exception:
|
||||
_LYRIC_LOG_FH = None
|
||||
|
||||
kwargs: Dict[str, Any] = {
|
||||
"stdin": subprocess.DEVNULL,
|
||||
"stdout": _LYRIC_LOG_FH or subprocess.DEVNULL,
|
||||
"stderr": _LYRIC_LOG_FH or subprocess.DEVNULL,
|
||||
}
|
||||
|
||||
# Ensure immediate flushing to the log file.
|
||||
env = os.environ.copy()
|
||||
env["PYTHONUNBUFFERED"] = "1"
|
||||
try:
|
||||
existing_pp = env.get("PYTHONPATH")
|
||||
env["PYTHONPATH"] = str(repo_root) if not existing_pp else (str(repo_root) + os.pathsep + str(existing_pp))
|
||||
except Exception:
|
||||
pass
|
||||
kwargs["env"] = env
|
||||
|
||||
# Make the current directory the repo root so `-m MPV.lyric` resolves reliably.
|
||||
kwargs["cwd"] = str(repo_root)
|
||||
if platform.system() == "Windows":
|
||||
kwargs["creationflags"] = 0x00000008 # DETACHED_PROCESS
|
||||
|
||||
_LYRIC_PROCESS = subprocess.Popen(cmd, **kwargs)
|
||||
debug(f"Lyric loader started (log={log_path})")
|
||||
except Exception as exc:
|
||||
debug(f"Lyric loader failed to start: {exc}")
|
||||
|
||||
def wait_for_ipc(self, retries: int = 20, delay_seconds: float = 0.2) -> bool:
|
||||
for _ in range(max(1, retries)):
|
||||
client = self.client()
|
||||
client = self.client(silent=True)
|
||||
try:
|
||||
if client.connect():
|
||||
return True
|
||||
@@ -233,7 +416,7 @@ class MPVIPCClient:
|
||||
It handles platform-specific differences (Windows named pipes vs Unix sockets).
|
||||
"""
|
||||
|
||||
def __init__(self, socket_path: Optional[str] = None, timeout: float = 5.0):
|
||||
def __init__(self, socket_path: Optional[str] = None, timeout: float = 5.0, silent: bool = False):
|
||||
"""Initialize MPV IPC client.
|
||||
|
||||
Args:
|
||||
@@ -244,6 +427,7 @@ class MPVIPCClient:
|
||||
self.socket_path = socket_path or get_ipc_pipe_path()
|
||||
self.sock: socket.socket | BinaryIO | None = None
|
||||
self.is_windows = platform.system() == "Windows"
|
||||
self.silent = bool(silent)
|
||||
|
||||
def connect(self) -> bool:
|
||||
"""Connect to mpv IPC socket.
|
||||
@@ -259,17 +443,20 @@ class MPVIPCClient:
|
||||
self.sock = open(self.socket_path, 'r+b', buffering=0)
|
||||
return True
|
||||
except (OSError, IOError) as exc:
|
||||
debug(f"Failed to connect to MPV named pipe: {exc}")
|
||||
if not self.silent:
|
||||
debug(f"Failed to connect to MPV named pipe: {exc}")
|
||||
return False
|
||||
else:
|
||||
# Unix domain socket (Linux, macOS)
|
||||
if not os.path.exists(self.socket_path):
|
||||
debug(f"IPC socket not found: {self.socket_path}")
|
||||
if not self.silent:
|
||||
debug(f"IPC socket not found: {self.socket_path}")
|
||||
return False
|
||||
|
||||
af_unix = getattr(socket, "AF_UNIX", None)
|
||||
if af_unix is None:
|
||||
debug("IPC AF_UNIX is not available on this platform")
|
||||
if not self.silent:
|
||||
debug("IPC AF_UNIX is not available on this platform")
|
||||
return False
|
||||
|
||||
self.sock = socket.socket(af_unix, socket.SOCK_STREAM)
|
||||
@@ -277,7 +464,8 @@ class MPVIPCClient:
|
||||
self.sock.connect(self.socket_path)
|
||||
return True
|
||||
except Exception as exc:
|
||||
debug(f"Failed to connect to MPV IPC: {exc}")
|
||||
if not self.silent:
|
||||
debug(f"Failed to connect to MPV IPC: {exc}")
|
||||
self.sock = None
|
||||
return False
|
||||
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
"""Provider plugin modules.
|
||||
|
||||
Concrete provider implementations live in this package.
|
||||
The public entrypoint/registry is Provider.registry.
|
||||
The public entrypoint/registry is ProviderCore.registry.
|
||||
"""
|
||||
|
||||
@@ -3,7 +3,7 @@ from __future__ import annotations
|
||||
import sys
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from Provider._base import SearchProvider, SearchResult
|
||||
from ProviderCore.base import SearchProvider, SearchResult
|
||||
from SYS.logger import log, debug
|
||||
|
||||
try:
|
||||
|
||||
@@ -1,12 +1,24 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import requests
|
||||
import sys
|
||||
from typing import Any, Dict, List, Optional
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||
from urllib.parse import quote, urljoin, urlparse, unquote
|
||||
|
||||
from Provider._base import SearchProvider, SearchResult
|
||||
from ProviderCore.base import SearchProvider, SearchResult
|
||||
from SYS.logger import log
|
||||
|
||||
|
||||
# Optional dependencies
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
BeautifulSoup = None
|
||||
|
||||
|
||||
class Libgen(SearchProvider):
|
||||
"""Search provider for Library Genesis books."""
|
||||
|
||||
@@ -20,8 +32,7 @@ class Libgen(SearchProvider):
|
||||
filters = filters or {}
|
||||
|
||||
try:
|
||||
from Provider.unified_book_downloader import UnifiedBookDownloader
|
||||
from Provider.query_parser import parse_query, get_field, get_free_text
|
||||
from cli_syntax import get_field, get_free_text, parse_query
|
||||
|
||||
parsed = parse_query(query)
|
||||
isbn = get_field(parsed, "isbn")
|
||||
@@ -31,8 +42,11 @@ class Libgen(SearchProvider):
|
||||
|
||||
search_query = isbn or title or author or free_text or query
|
||||
|
||||
downloader = UnifiedBookDownloader(config=self.config)
|
||||
books = downloader.search_libgen(search_query, limit=limit)
|
||||
books = search_libgen(
|
||||
search_query,
|
||||
limit=limit,
|
||||
log_error=lambda msg: log(msg, file=sys.stderr),
|
||||
)
|
||||
|
||||
results: List[SearchResult] = []
|
||||
for idx, book in enumerate(books, 1):
|
||||
@@ -91,8 +105,455 @@ class Libgen(SearchProvider):
|
||||
|
||||
def validate(self) -> bool:
|
||||
try:
|
||||
from Provider.unified_book_downloader import UnifiedBookDownloader # noqa: F401
|
||||
|
||||
return True
|
||||
return BeautifulSoup is not None
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
|
||||
LogFn = Optional[Callable[[str], None]]
|
||||
ErrorFn = Optional[Callable[[str], None]]
|
||||
|
||||
DEFAULT_TIMEOUT = 20.0
|
||||
DEFAULT_LIMIT = 50
|
||||
|
||||
# Mirrors to try in order
|
||||
MIRRORS = [
|
||||
"https://libgen.is",
|
||||
"https://libgen.rs",
|
||||
"https://libgen.st",
|
||||
"http://libgen.is",
|
||||
"http://libgen.rs",
|
||||
"http://libgen.st",
|
||||
"https://libgen.li", # Different structure, fallback
|
||||
"http://libgen.li",
|
||||
"https://libgen.gl", # Different structure, fallback
|
||||
"http://libgen.gl",
|
||||
]
|
||||
|
||||
logging.getLogger(__name__).setLevel(logging.INFO)
|
||||
|
||||
|
||||
def _call(logger: LogFn, message: str) -> None:
|
||||
if logger:
|
||||
logger(message)
|
||||
|
||||
|
||||
class LibgenSearch:
|
||||
"""Robust LibGen searcher."""
|
||||
|
||||
def __init__(self, session: Optional[requests.Session] = None):
|
||||
self.session = session or requests.Session()
|
||||
self.session.headers.update({
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
})
|
||||
|
||||
def search(self, query: str, limit: int = DEFAULT_LIMIT) -> List[Dict[str, Any]]:
|
||||
"""Search LibGen mirrors."""
|
||||
if not BeautifulSoup:
|
||||
logging.error("BeautifulSoup not installed. Cannot search LibGen.")
|
||||
return []
|
||||
|
||||
for mirror in MIRRORS:
|
||||
try:
|
||||
if "libgen.li" in mirror or "libgen.gl" in mirror:
|
||||
results = self._search_libgen_li(mirror, query, limit)
|
||||
else:
|
||||
results = self._search_libgen_rs(mirror, query, limit)
|
||||
|
||||
if results:
|
||||
return results
|
||||
except Exception as e:
|
||||
logging.debug(f"Mirror {mirror} failed: {e}")
|
||||
continue
|
||||
|
||||
return []
|
||||
|
||||
def _search_libgen_rs(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
|
||||
"""Search libgen.rs/is/st style mirrors."""
|
||||
url = f"{mirror}/search.php"
|
||||
params = {
|
||||
"req": query,
|
||||
"res": 100,
|
||||
"column": "def",
|
||||
"open": 0,
|
||||
"view": "simple",
|
||||
"phrase": 1,
|
||||
}
|
||||
|
||||
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
|
||||
table = soup.find("table", {"class": "c"})
|
||||
if not table:
|
||||
tables = soup.find_all("table")
|
||||
for t in tables:
|
||||
if len(t.find_all("tr")) > 5:
|
||||
table = t
|
||||
break
|
||||
|
||||
if not table:
|
||||
return []
|
||||
|
||||
results: List[Dict[str, Any]] = []
|
||||
rows = table.find_all("tr")[1:]
|
||||
|
||||
for row in rows:
|
||||
cols = row.find_all("td")
|
||||
if len(cols) < 9:
|
||||
continue
|
||||
|
||||
try:
|
||||
libgen_id = cols[0].get_text(strip=True)
|
||||
authors = [a.get_text(strip=True) for a in cols[1].find_all("a")]
|
||||
if not authors:
|
||||
authors = [cols[1].get_text(strip=True)]
|
||||
|
||||
title_tag = cols[2].find("a")
|
||||
title = title_tag.get_text(strip=True) if title_tag else cols[2].get_text(strip=True)
|
||||
|
||||
md5 = ""
|
||||
if title_tag and title_tag.has_attr("href"):
|
||||
href = title_tag["href"]
|
||||
match = re.search(r"md5=([a-fA-F0-9]{32})", href)
|
||||
if match:
|
||||
md5 = match.group(1)
|
||||
|
||||
publisher = cols[3].get_text(strip=True)
|
||||
year = cols[4].get_text(strip=True)
|
||||
pages = cols[5].get_text(strip=True)
|
||||
language = cols[6].get_text(strip=True)
|
||||
size = cols[7].get_text(strip=True)
|
||||
extension = cols[8].get_text(strip=True)
|
||||
|
||||
mirror_links = []
|
||||
for i in range(9, len(cols)):
|
||||
a = cols[i].find("a")
|
||||
if a and a.has_attr("href"):
|
||||
mirror_links.append(a["href"])
|
||||
|
||||
if md5:
|
||||
download_link = f"http://library.lol/main/{md5}"
|
||||
elif mirror_links:
|
||||
download_link = mirror_links[0]
|
||||
else:
|
||||
download_link = ""
|
||||
|
||||
results.append({
|
||||
"id": libgen_id,
|
||||
"title": title,
|
||||
"author": ", ".join(authors),
|
||||
"publisher": publisher,
|
||||
"year": year,
|
||||
"pages": pages,
|
||||
"language": language,
|
||||
"filesize_str": size,
|
||||
"extension": extension,
|
||||
"md5": md5,
|
||||
"mirror_url": download_link,
|
||||
"cover": "",
|
||||
})
|
||||
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logging.debug(f"Error parsing row: {e}")
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
def _search_libgen_li(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
|
||||
"""Search libgen.li/gl style mirrors."""
|
||||
url = f"{mirror}/index.php"
|
||||
params = {
|
||||
"req": query,
|
||||
"res": 100,
|
||||
"covers": "on",
|
||||
"filesuns": "all",
|
||||
}
|
||||
|
||||
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
table = soup.find("table", {"id": "tablelibgen"})
|
||||
if not table:
|
||||
table = soup.find("table", {"class": "table table-striped"})
|
||||
|
||||
if not table:
|
||||
return []
|
||||
|
||||
results: List[Dict[str, Any]] = []
|
||||
rows = table.find_all("tr")[1:]
|
||||
|
||||
for row in rows:
|
||||
cols = row.find_all("td")
|
||||
if len(cols) < 9:
|
||||
continue
|
||||
|
||||
try:
|
||||
title_col = cols[1]
|
||||
title_link = title_col.find("a")
|
||||
title = title_link.get_text(strip=True) if title_link else title_col.get_text(strip=True)
|
||||
|
||||
libgen_id = ""
|
||||
if title_link and title_link.has_attr("href"):
|
||||
href = title_link["href"]
|
||||
match = re.search(r"id=(\d+)", href)
|
||||
if match:
|
||||
libgen_id = match.group(1)
|
||||
|
||||
authors = cols[2].get_text(strip=True)
|
||||
publisher = cols[3].get_text(strip=True)
|
||||
year = cols[4].get_text(strip=True)
|
||||
language = cols[5].get_text(strip=True)
|
||||
pages = cols[6].get_text(strip=True)
|
||||
size = cols[7].get_text(strip=True)
|
||||
extension = cols[8].get_text(strip=True)
|
||||
|
||||
mirror_url = ""
|
||||
if title_link:
|
||||
href = title_link["href"]
|
||||
if href.startswith("/"):
|
||||
mirror_url = mirror + href
|
||||
else:
|
||||
mirror_url = urljoin(mirror, href)
|
||||
|
||||
results.append({
|
||||
"id": libgen_id,
|
||||
"title": title,
|
||||
"author": authors,
|
||||
"publisher": publisher,
|
||||
"year": year,
|
||||
"pages": pages,
|
||||
"language": language,
|
||||
"filesize_str": size,
|
||||
"extension": extension,
|
||||
"md5": "",
|
||||
"mirror_url": mirror_url,
|
||||
})
|
||||
|
||||
if len(results) >= limit:
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def search_libgen(
|
||||
query: str,
|
||||
limit: int = DEFAULT_LIMIT,
|
||||
*,
|
||||
log_info: LogFn = None,
|
||||
log_error: ErrorFn = None,
|
||||
session: Optional[requests.Session] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Search Libgen using the robust scraper."""
|
||||
searcher = LibgenSearch(session=session)
|
||||
try:
|
||||
results = searcher.search(query, limit=limit)
|
||||
_call(log_info, f"[libgen] Found {len(results)} results")
|
||||
return results
|
||||
except Exception as e:
|
||||
_call(log_error, f"[libgen] Search failed: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def _resolve_download_url(
|
||||
session: requests.Session,
|
||||
url: str,
|
||||
log_info: LogFn = None,
|
||||
) -> Optional[str]:
|
||||
"""Resolve the final download URL by following the LibGen chain."""
|
||||
current_url = url
|
||||
visited = set()
|
||||
|
||||
for _ in range(6):
|
||||
if current_url in visited:
|
||||
break
|
||||
visited.add(current_url)
|
||||
|
||||
_call(log_info, f"[resolve] Checking: {current_url}")
|
||||
|
||||
if current_url.lower().endswith((".pdf", ".epub", ".mobi", ".djvu", ".azw3", ".cbz", ".cbr")):
|
||||
return current_url
|
||||
|
||||
try:
|
||||
with session.get(current_url, stream=True, timeout=30) as resp:
|
||||
resp.raise_for_status()
|
||||
ct = resp.headers.get("Content-Type", "").lower()
|
||||
|
||||
if "text/html" not in ct:
|
||||
return current_url
|
||||
|
||||
content = resp.text
|
||||
except Exception as e:
|
||||
_call(log_info, f"[resolve] Failed to fetch {current_url}: {e}")
|
||||
return None
|
||||
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
|
||||
get_link = soup.find("a", string=re.compile(r"^GET$", re.IGNORECASE))
|
||||
if not get_link:
|
||||
h2_get = soup.find("h2", string=re.compile(r"^GET$", re.IGNORECASE))
|
||||
if h2_get and h2_get.parent.name == "a":
|
||||
get_link = h2_get.parent
|
||||
|
||||
if get_link and get_link.has_attr("href"):
|
||||
return urljoin(current_url, get_link["href"])
|
||||
|
||||
if "series.php" in current_url:
|
||||
edition_link = soup.find("a", href=re.compile(r"edition\.php"))
|
||||
if edition_link:
|
||||
current_url = urljoin(current_url, edition_link["href"])
|
||||
continue
|
||||
|
||||
if "edition.php" in current_url:
|
||||
file_link = soup.find("a", href=re.compile(r"file\.php"))
|
||||
if file_link:
|
||||
current_url = urljoin(current_url, file_link["href"])
|
||||
continue
|
||||
|
||||
if "file.php" in current_url:
|
||||
libgen_link = soup.find("a", title="libgen")
|
||||
if not libgen_link:
|
||||
libgen_link = soup.find("a", string=re.compile(r"Libgen", re.IGNORECASE))
|
||||
|
||||
if libgen_link and libgen_link.has_attr("href"):
|
||||
current_url = urljoin(current_url, libgen_link["href"])
|
||||
continue
|
||||
|
||||
if "ads.php" in current_url:
|
||||
get_php_link = soup.find("a", href=re.compile(r"get\.php"))
|
||||
if get_php_link:
|
||||
return urljoin(current_url, get_php_link["href"])
|
||||
|
||||
for text in ["Cloudflare", "IPFS.io", "Infura"]:
|
||||
link = soup.find("a", string=re.compile(text, re.IGNORECASE))
|
||||
if link and link.has_attr("href"):
|
||||
return urljoin(current_url, link["href"])
|
||||
|
||||
break
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Optional[str]:
|
||||
"""Guess the file extension from headers or the download URL."""
|
||||
content_disposition = headers.get("content-disposition", "")
|
||||
if content_disposition:
|
||||
match = re.search(r"filename\*?=(?:UTF-8\'\'|\"?)([^\";]+)", content_disposition, flags=re.IGNORECASE)
|
||||
if match:
|
||||
filename = unquote(match.group(1).strip('"'))
|
||||
suffix = Path(filename).suffix
|
||||
if suffix:
|
||||
return suffix.lstrip(".")
|
||||
|
||||
parsed = urlparse(download_url)
|
||||
suffix = Path(parsed.path).suffix
|
||||
if suffix:
|
||||
return suffix.lstrip(".")
|
||||
|
||||
content_type = headers.get("content-type", "").lower()
|
||||
mime_map = {
|
||||
"application/pdf": "pdf",
|
||||
"application/epub+zip": "epub",
|
||||
"application/x-mobipocket-ebook": "mobi",
|
||||
"application/x-cbr": "cbr",
|
||||
"application/x-cbz": "cbz",
|
||||
"application/zip": "zip",
|
||||
}
|
||||
|
||||
for mime, ext in mime_map.items():
|
||||
if mime in content_type:
|
||||
return ext
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _apply_extension(path: Path, extension: Optional[str]) -> Path:
|
||||
"""Rename the path to match the detected extension, if needed."""
|
||||
if not extension:
|
||||
return path
|
||||
|
||||
suffix = extension if extension.startswith(".") else f".{extension}"
|
||||
if path.suffix.lower() == suffix.lower():
|
||||
return path
|
||||
|
||||
candidate = path.with_suffix(suffix)
|
||||
base_stem = path.stem
|
||||
counter = 1
|
||||
while candidate.exists() and counter < 100:
|
||||
candidate = path.with_name(f"{base_stem}({counter}){suffix}")
|
||||
counter += 1
|
||||
|
||||
try:
|
||||
path.replace(candidate)
|
||||
return candidate
|
||||
except Exception:
|
||||
return path
|
||||
|
||||
|
||||
def download_from_mirror(
|
||||
mirror_url: str,
|
||||
output_path: Path,
|
||||
*,
|
||||
log_info: LogFn = None,
|
||||
log_error: ErrorFn = None,
|
||||
session: Optional[requests.Session] = None,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||
) -> Tuple[bool, Optional[Path]]:
|
||||
"""Download file from a LibGen mirror URL with optional progress tracking."""
|
||||
session = session or requests.Session()
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
_call(log_info, f"[download] Resolving download link from: {mirror_url}")
|
||||
|
||||
download_url = _resolve_download_url(session, mirror_url, log_info)
|
||||
|
||||
if not download_url:
|
||||
_call(log_error, "[download] Could not find direct download link")
|
||||
return False, None
|
||||
|
||||
_call(log_info, f"[download] Downloading from: {download_url}")
|
||||
|
||||
downloaded = 0
|
||||
total_size = 0
|
||||
headers: Dict[str, str] = {}
|
||||
|
||||
with session.get(download_url, stream=True, timeout=60) as r:
|
||||
r.raise_for_status()
|
||||
headers = dict(r.headers)
|
||||
|
||||
ct = headers.get("content-type", "").lower()
|
||||
if "text/html" in ct:
|
||||
_call(log_error, "[download] Final URL returned HTML, not a file.")
|
||||
return False, None
|
||||
|
||||
total_size = int(headers.get("content-length", 0) or 0)
|
||||
|
||||
with open(output_path, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
downloaded += len(chunk)
|
||||
if progress_callback:
|
||||
progress_callback(downloaded, total_size)
|
||||
|
||||
final_extension = _guess_filename_extension(download_url, headers)
|
||||
final_path = _apply_extension(output_path, final_extension)
|
||||
|
||||
if progress_callback and total_size > 0:
|
||||
progress_callback(downloaded, total_size)
|
||||
|
||||
_call(log_info, f"[download] Saved to {final_path}")
|
||||
return True, final_path
|
||||
|
||||
except Exception as e:
|
||||
_call(log_error, f"[download] Download failed: {e}")
|
||||
return False, None
|
||||
|
||||
@@ -1,523 +0,0 @@
|
||||
"""Shared Library Genesis search and download helpers.
|
||||
|
||||
Replaces the old libgen backend with a robust scraper based on libgen-api-enhanced logic.
|
||||
Targets libgen.is/rs/st mirrors and parses the results table directly.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
import requests
|
||||
from pathlib import Path
|
||||
from typing import Any, Callable, Dict, List, Optional, Tuple
|
||||
from urllib.parse import quote, urljoin, urlparse, unquote
|
||||
|
||||
# Optional dependencies
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
except ImportError:
|
||||
BeautifulSoup = None
|
||||
|
||||
LogFn = Optional[Callable[[str], None]]
|
||||
ErrorFn = Optional[Callable[[str], None]]
|
||||
|
||||
DEFAULT_TIMEOUT = 20.0
|
||||
DEFAULT_LIMIT = 50
|
||||
|
||||
# Mirrors to try in order
|
||||
MIRRORS = [
|
||||
"https://libgen.is",
|
||||
"https://libgen.rs",
|
||||
"https://libgen.st",
|
||||
"http://libgen.is",
|
||||
"http://libgen.rs",
|
||||
"http://libgen.st",
|
||||
"https://libgen.li", # Different structure, fallback
|
||||
"http://libgen.li",
|
||||
"https://libgen.gl", # Different structure, fallback
|
||||
"http://libgen.gl",
|
||||
]
|
||||
|
||||
logging.getLogger(__name__).setLevel(logging.INFO)
|
||||
|
||||
|
||||
def _call(logger: LogFn, message: str) -> None:
|
||||
if logger:
|
||||
logger(message)
|
||||
|
||||
|
||||
class LibgenSearch:
|
||||
"""Robust LibGen searcher."""
|
||||
|
||||
def __init__(self, session: Optional[requests.Session] = None):
|
||||
self.session = session or requests.Session()
|
||||
self.session.headers.update({
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
|
||||
})
|
||||
|
||||
def search(self, query: str, limit: int = DEFAULT_LIMIT) -> List[Dict[str, Any]]:
|
||||
"""Search LibGen mirrors."""
|
||||
if not BeautifulSoup:
|
||||
logging.error("BeautifulSoup not installed. Cannot search LibGen.")
|
||||
return []
|
||||
|
||||
for mirror in MIRRORS:
|
||||
try:
|
||||
if "libgen.li" in mirror or "libgen.gl" in mirror:
|
||||
results = self._search_libgen_li(mirror, query, limit)
|
||||
else:
|
||||
results = self._search_libgen_rs(mirror, query, limit)
|
||||
|
||||
if results:
|
||||
return results
|
||||
except Exception as e:
|
||||
logging.debug(f"Mirror {mirror} failed: {e}")
|
||||
continue
|
||||
|
||||
return []
|
||||
|
||||
def _search_libgen_rs(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
|
||||
"""Search libgen.rs/is/st style mirrors."""
|
||||
# Search URL: /search.php?req=QUERY&res=100&column=def
|
||||
url = f"{mirror}/search.php"
|
||||
params = {
|
||||
"req": query,
|
||||
"res": 100, # Request more to filter later
|
||||
"column": "def",
|
||||
"open": 0,
|
||||
"view": "simple",
|
||||
"phrase": 1,
|
||||
}
|
||||
|
||||
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
|
||||
# Find the table with results. usually class 'c'
|
||||
table = soup.find("table", {"class": "c"})
|
||||
if not table:
|
||||
# Try finding by structure (table with many rows)
|
||||
tables = soup.find_all("table")
|
||||
for t in tables:
|
||||
if len(t.find_all("tr")) > 5:
|
||||
table = t
|
||||
break
|
||||
|
||||
if not table:
|
||||
return []
|
||||
|
||||
results = []
|
||||
# Skip header row
|
||||
rows = table.find_all("tr")[1:]
|
||||
|
||||
for row in rows:
|
||||
cols = row.find_all("td")
|
||||
if len(cols) < 9:
|
||||
continue
|
||||
|
||||
# Columns:
|
||||
# 0: ID
|
||||
# 1: Author(s)
|
||||
# 2: Title
|
||||
# 3: Publisher
|
||||
# 4: Year
|
||||
# 5: Pages
|
||||
# 6: Language
|
||||
# 7: Size
|
||||
# 8: Extension
|
||||
# 9+: Mirrors
|
||||
|
||||
try:
|
||||
libgen_id = cols[0].get_text(strip=True)
|
||||
authors = [a.get_text(strip=True) for a in cols[1].find_all("a")]
|
||||
if not authors:
|
||||
authors = [cols[1].get_text(strip=True)]
|
||||
|
||||
title_tag = cols[2].find("a")
|
||||
title = title_tag.get_text(strip=True) if title_tag else cols[2].get_text(strip=True)
|
||||
|
||||
# Extract MD5 from title link if possible (often in href)
|
||||
# href='book/index.php?md5=...'
|
||||
md5 = ""
|
||||
if title_tag and title_tag.has_attr("href"):
|
||||
href = title_tag["href"]
|
||||
match = re.search(r"md5=([a-fA-F0-9]{32})", href)
|
||||
if match:
|
||||
md5 = match.group(1)
|
||||
|
||||
publisher = cols[3].get_text(strip=True)
|
||||
year = cols[4].get_text(strip=True)
|
||||
pages = cols[5].get_text(strip=True)
|
||||
language = cols[6].get_text(strip=True)
|
||||
size = cols[7].get_text(strip=True)
|
||||
extension = cols[8].get_text(strip=True)
|
||||
|
||||
# Mirrors
|
||||
# Usually col 9 is http://library.lol/main/MD5
|
||||
mirror_links = []
|
||||
for i in range(9, len(cols)):
|
||||
a = cols[i].find("a")
|
||||
if a and a.has_attr("href"):
|
||||
mirror_links.append(a["href"])
|
||||
|
||||
# Construct direct download page link (library.lol)
|
||||
# If we have MD5, we can guess it: http://library.lol/main/{md5}
|
||||
if md5:
|
||||
download_link = f"http://library.lol/main/{md5}"
|
||||
elif mirror_links:
|
||||
download_link = mirror_links[0]
|
||||
else:
|
||||
download_link = ""
|
||||
|
||||
results.append({
|
||||
"id": libgen_id,
|
||||
"title": title,
|
||||
"author": ", ".join(authors),
|
||||
"publisher": publisher,
|
||||
"year": year,
|
||||
"pages": pages,
|
||||
"language": language,
|
||||
"filesize_str": size,
|
||||
"extension": extension,
|
||||
"md5": md5,
|
||||
"mirror_url": download_link,
|
||||
"cover": "", # Could extract from hover if needed
|
||||
})
|
||||
|
||||
if len(results) >= limit:
|
||||
break
|
||||
|
||||
except Exception as e:
|
||||
logging.debug(f"Error parsing row: {e}")
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
def _search_libgen_li(self, mirror: str, query: str, limit: int) -> List[Dict[str, Any]]:
|
||||
"""Search libgen.li/gl style mirrors."""
|
||||
# Search URL: /index.php?req=QUERY&columns[]=t&columns[]=a...
|
||||
url = f"{mirror}/index.php"
|
||||
params = {
|
||||
"req": query,
|
||||
"res": 100,
|
||||
"covers": "on",
|
||||
"filesuns": "all",
|
||||
}
|
||||
|
||||
resp = self.session.get(url, params=params, timeout=DEFAULT_TIMEOUT)
|
||||
resp.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(resp.text, "html.parser")
|
||||
table = soup.find("table", {"id": "tablelibgen"})
|
||||
if not table:
|
||||
table = soup.find("table", {"class": "table table-striped"})
|
||||
|
||||
if not table:
|
||||
return []
|
||||
|
||||
results = []
|
||||
rows = table.find_all("tr")[1:]
|
||||
|
||||
for row in rows:
|
||||
cols = row.find_all("td")
|
||||
if len(cols) < 9:
|
||||
continue
|
||||
|
||||
try:
|
||||
# Structure is different
|
||||
# 0: Cover
|
||||
# 1: Title (with link to file.php?id=...)
|
||||
# 2: Author
|
||||
# 3: Publisher
|
||||
# 4: Year
|
||||
# 5: Language
|
||||
# 6: Pages
|
||||
# 7: Size
|
||||
# 8: Extension
|
||||
# 9: Mirrors
|
||||
|
||||
title_col = cols[1]
|
||||
title_link = title_col.find("a")
|
||||
title = title_link.get_text(strip=True) if title_link else title_col.get_text(strip=True)
|
||||
|
||||
# Extract ID from link
|
||||
libgen_id = ""
|
||||
if title_link and title_link.has_attr("href"):
|
||||
href = title_link["href"]
|
||||
# href is usually "file.php?id=..." or "edition.php?id=..."
|
||||
match = re.search(r"id=(\d+)", href)
|
||||
if match:
|
||||
libgen_id = match.group(1)
|
||||
|
||||
authors = cols[2].get_text(strip=True)
|
||||
publisher = cols[3].get_text(strip=True)
|
||||
year = cols[4].get_text(strip=True)
|
||||
language = cols[5].get_text(strip=True)
|
||||
pages = cols[6].get_text(strip=True)
|
||||
size = cols[7].get_text(strip=True)
|
||||
extension = cols[8].get_text(strip=True)
|
||||
|
||||
# Mirror link
|
||||
# Usually in col 9 or title link
|
||||
mirror_url = ""
|
||||
if title_link:
|
||||
href = title_link["href"]
|
||||
if href.startswith("/"):
|
||||
mirror_url = mirror + href
|
||||
else:
|
||||
mirror_url = urljoin(mirror, href)
|
||||
|
||||
results.append({
|
||||
"id": libgen_id,
|
||||
"title": title,
|
||||
"author": authors,
|
||||
"publisher": publisher,
|
||||
"year": year,
|
||||
"pages": pages,
|
||||
"language": language,
|
||||
"filesize_str": size,
|
||||
"extension": extension,
|
||||
"md5": "", # .li doesn't show MD5 easily in table
|
||||
"mirror_url": mirror_url,
|
||||
})
|
||||
|
||||
if len(results) >= limit:
|
||||
break
|
||||
except Exception:
|
||||
continue
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def search_libgen(
|
||||
query: str,
|
||||
limit: int = DEFAULT_LIMIT,
|
||||
*,
|
||||
log_info: LogFn = None,
|
||||
log_error: ErrorFn = None,
|
||||
session: Optional[requests.Session] = None,
|
||||
) -> List[Dict[str, Any]]:
|
||||
"""Search Libgen using the robust scraper."""
|
||||
searcher = LibgenSearch(session=session)
|
||||
try:
|
||||
results = searcher.search(query, limit=limit)
|
||||
_call(log_info, f"[libgen] Found {len(results)} results")
|
||||
return results
|
||||
except Exception as e:
|
||||
_call(log_error, f"[libgen] Search failed: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def _resolve_download_url(
|
||||
session: requests.Session,
|
||||
url: str,
|
||||
log_info: LogFn = None
|
||||
) -> Optional[str]:
|
||||
"""Resolve the final download URL by following the LibGen chain."""
|
||||
current_url = url
|
||||
visited = set()
|
||||
|
||||
# Max hops to prevent infinite loops
|
||||
for _ in range(6):
|
||||
if current_url in visited:
|
||||
break
|
||||
visited.add(current_url)
|
||||
|
||||
_call(log_info, f"[resolve] Checking: {current_url}")
|
||||
|
||||
# Simple heuristic: if it looks like a file, return it
|
||||
if current_url.lower().endswith(('.pdf', '.epub', '.mobi', '.djvu', '.azw3', '.cbz', '.cbr')):
|
||||
return current_url
|
||||
|
||||
try:
|
||||
# Use HEAD first to check content type if possible, but some mirrors block HEAD or return 405
|
||||
# So we'll just GET with stream=True to peek headers/content without downloading everything
|
||||
with session.get(current_url, stream=True, timeout=30) as resp:
|
||||
resp.raise_for_status()
|
||||
ct = resp.headers.get("Content-Type", "").lower()
|
||||
|
||||
if "text/html" not in ct:
|
||||
# It's a binary file
|
||||
return current_url
|
||||
|
||||
# It's HTML, read content
|
||||
content = resp.text
|
||||
except Exception as e:
|
||||
_call(log_info, f"[resolve] Failed to fetch {current_url}: {e}")
|
||||
return None
|
||||
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
|
||||
# 1. Check for "GET" link (library.lol / ads.php style)
|
||||
# Usually <h2>GET</h2> inside <a> or just text "GET"
|
||||
get_link = soup.find("a", string=re.compile(r"^GET$", re.IGNORECASE))
|
||||
if not get_link:
|
||||
# Try finding <a> containing <h2>GET</h2>
|
||||
h2_get = soup.find("h2", string=re.compile(r"^GET$", re.IGNORECASE))
|
||||
if h2_get and h2_get.parent.name == "a":
|
||||
get_link = h2_get.parent
|
||||
|
||||
if get_link and get_link.has_attr("href"):
|
||||
return urljoin(current_url, get_link["href"])
|
||||
|
||||
# 2. Check for "series.php" -> "edition.php"
|
||||
if "series.php" in current_url:
|
||||
# Find first edition link
|
||||
edition_link = soup.find("a", href=re.compile(r"edition\.php"))
|
||||
if edition_link:
|
||||
current_url = urljoin(current_url, edition_link["href"])
|
||||
continue
|
||||
|
||||
# 3. Check for "edition.php" -> "file.php"
|
||||
if "edition.php" in current_url:
|
||||
file_link = soup.find("a", href=re.compile(r"file\.php"))
|
||||
if file_link:
|
||||
current_url = urljoin(current_url, file_link["href"])
|
||||
continue
|
||||
|
||||
# 4. Check for "file.php" -> "ads.php" (Libgen badge)
|
||||
if "file.php" in current_url:
|
||||
# Look for link with title="libgen" or text "Libgen"
|
||||
libgen_link = soup.find("a", title="libgen")
|
||||
if not libgen_link:
|
||||
libgen_link = soup.find("a", string=re.compile(r"Libgen", re.IGNORECASE))
|
||||
|
||||
if libgen_link and libgen_link.has_attr("href"):
|
||||
current_url = urljoin(current_url, libgen_link["href"])
|
||||
continue
|
||||
|
||||
# 5. Check for "ads.php" -> "get.php" (Fallback if GET link logic above failed)
|
||||
if "ads.php" in current_url:
|
||||
get_php_link = soup.find("a", href=re.compile(r"get\.php"))
|
||||
if get_php_link:
|
||||
return urljoin(current_url, get_php_link["href"])
|
||||
|
||||
# 6. Library.lol / generic fallback
|
||||
for text in ["Cloudflare", "IPFS.io", "Infura"]:
|
||||
link = soup.find("a", string=re.compile(text, re.IGNORECASE))
|
||||
if link and link.has_attr("href"):
|
||||
return urljoin(current_url, link["href"])
|
||||
|
||||
# If we found nothing new, stop
|
||||
break
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Optional[str]:
|
||||
"""Guess the file extension from headers or the download URL."""
|
||||
content_disposition = headers.get("content-disposition", "")
|
||||
if content_disposition:
|
||||
match = re.search(r'filename\*?=(?:UTF-8\'\'|"?)([^";]+)', content_disposition, flags=re.IGNORECASE)
|
||||
if match:
|
||||
filename = unquote(match.group(1).strip('"'))
|
||||
suffix = Path(filename).suffix
|
||||
if suffix:
|
||||
return suffix.lstrip('.')
|
||||
|
||||
parsed = urlparse(download_url)
|
||||
suffix = Path(parsed.path).suffix
|
||||
if suffix:
|
||||
return suffix.lstrip('.')
|
||||
|
||||
content_type = headers.get('content-type', '').lower()
|
||||
mime_map = {
|
||||
'application/pdf': 'pdf',
|
||||
'application/epub+zip': 'epub',
|
||||
'application/x-mobipocket-ebook': 'mobi',
|
||||
'application/x-cbr': 'cbr',
|
||||
'application/x-cbz': 'cbz',
|
||||
'application/zip': 'zip',
|
||||
}
|
||||
|
||||
for mime, ext in mime_map.items():
|
||||
if mime in content_type:
|
||||
return ext
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _apply_extension(path: Path, extension: Optional[str]) -> Path:
|
||||
"""Rename the path to match the detected extension, if needed."""
|
||||
if not extension:
|
||||
return path
|
||||
|
||||
suffix = extension if extension.startswith('.') else f'.{extension}'
|
||||
if path.suffix.lower() == suffix.lower():
|
||||
return path
|
||||
|
||||
candidate = path.with_suffix(suffix)
|
||||
base_stem = path.stem
|
||||
counter = 1
|
||||
while candidate.exists() and counter < 100:
|
||||
candidate = path.with_name(f"{base_stem}({counter}){suffix}")
|
||||
counter += 1
|
||||
|
||||
try:
|
||||
path.replace(candidate)
|
||||
return candidate
|
||||
except Exception:
|
||||
return path
|
||||
|
||||
def download_from_mirror(
|
||||
mirror_url: str,
|
||||
output_path: Path,
|
||||
*,
|
||||
log_info: LogFn = None,
|
||||
log_error: ErrorFn = None,
|
||||
session: Optional[requests.Session] = None,
|
||||
progress_callback: Optional[Callable[[int, int], None]] = None,
|
||||
) -> Tuple[bool, Optional[Path]]:
|
||||
"""Download file from a LibGen mirror URL with optional progress tracking."""
|
||||
session = session or requests.Session()
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
try:
|
||||
_call(log_info, f"[download] Resolving download link from: {mirror_url}")
|
||||
|
||||
download_url = _resolve_download_url(session, mirror_url, log_info)
|
||||
|
||||
if not download_url:
|
||||
_call(log_error, "[download] Could not find direct download link")
|
||||
return False, None
|
||||
|
||||
_call(log_info, f"[download] Downloading from: {download_url}")
|
||||
|
||||
downloaded = 0
|
||||
total_size = 0
|
||||
headers: Dict[str, str] = {}
|
||||
|
||||
with session.get(download_url, stream=True, timeout=60) as r:
|
||||
r.raise_for_status()
|
||||
headers = dict(r.headers)
|
||||
|
||||
# Verify it's not HTML (error page)
|
||||
ct = headers.get("content-type", "").lower()
|
||||
if "text/html" in ct:
|
||||
_call(log_error, "[download] Final URL returned HTML, not a file.")
|
||||
return False, None
|
||||
|
||||
total_size = int(headers.get("content-length", 0) or 0)
|
||||
|
||||
with open(output_path, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
downloaded += len(chunk)
|
||||
if progress_callback:
|
||||
progress_callback(downloaded, total_size)
|
||||
|
||||
final_extension = _guess_filename_extension(download_url, headers)
|
||||
final_path = _apply_extension(output_path, final_extension)
|
||||
|
||||
if progress_callback and total_size > 0:
|
||||
progress_callback(downloaded, total_size)
|
||||
|
||||
_call(log_info, f"[download] Saved to {final_path}")
|
||||
return True, final_path
|
||||
|
||||
except Exception as e:
|
||||
_call(log_error, f"[download] Download failed: {e}")
|
||||
return False, None
|
||||
@@ -6,7 +6,7 @@ from typing import Any
|
||||
|
||||
import requests
|
||||
|
||||
from Provider._base import FileProvider
|
||||
from ProviderCore.base import FileProvider
|
||||
|
||||
|
||||
class Matrix(FileProvider):
|
||||
|
||||
358
Provider/openlibrary.py
Normal file
358
Provider/openlibrary.py
Normal file
@@ -0,0 +1,358 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import shutil
|
||||
import sys
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
|
||||
import requests
|
||||
|
||||
from ProviderCore.base import SearchProvider, SearchResult
|
||||
from ProviderCore.download import download_file, sanitize_filename
|
||||
from cli_syntax import get_field, get_free_text, parse_query
|
||||
from SYS.logger import log
|
||||
from SYS.utils import unique_path
|
||||
|
||||
|
||||
def _looks_like_isbn(text: str) -> bool:
|
||||
t = (text or "").replace("-", "").strip()
|
||||
return t.isdigit() and len(t) in (10, 13)
|
||||
|
||||
|
||||
def _first_str(value: Any) -> Optional[str]:
|
||||
if isinstance(value, str):
|
||||
v = value.strip()
|
||||
return v if v else None
|
||||
if isinstance(value, list) and value:
|
||||
first = value[0]
|
||||
if isinstance(first, str):
|
||||
v = first.strip()
|
||||
return v if v else None
|
||||
return str(first) if first is not None else None
|
||||
return None
|
||||
|
||||
|
||||
def _resolve_edition_id(doc: Dict[str, Any]) -> str:
|
||||
# OpenLibrary Search API typically provides edition_key: ["OL...M", ...]
|
||||
edition_key = doc.get("edition_key")
|
||||
if isinstance(edition_key, list) and edition_key:
|
||||
return str(edition_key[0]).strip()
|
||||
|
||||
# Fallback: sometimes key can be /books/OL...M
|
||||
key = doc.get("key")
|
||||
if isinstance(key, str) and key.startswith("/books/"):
|
||||
return key.split("/books/", 1)[1].strip("/")
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
def _check_lendable(session: requests.Session, edition_id: str) -> Tuple[bool, str]:
|
||||
"""Return (lendable, status_text) using OpenLibrary volumes API."""
|
||||
try:
|
||||
if not edition_id or not edition_id.startswith("OL") or not edition_id.endswith("M"):
|
||||
return False, "not-an-edition"
|
||||
|
||||
url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{edition_id}"
|
||||
resp = session.get(url, timeout=10)
|
||||
resp.raise_for_status()
|
||||
data = resp.json() or {}
|
||||
wrapped = data.get(f"OLID:{edition_id}")
|
||||
if not isinstance(wrapped, dict):
|
||||
return False, "no-availability"
|
||||
|
||||
items = wrapped.get("items")
|
||||
if not isinstance(items, list) or not items:
|
||||
return False, "no-items"
|
||||
|
||||
first = items[0]
|
||||
status_val = ""
|
||||
if isinstance(first, dict):
|
||||
status_val = str(first.get("status", ""))
|
||||
else:
|
||||
status_val = str(first)
|
||||
|
||||
return ("lendable" in status_val.lower()), status_val
|
||||
except requests.exceptions.Timeout:
|
||||
return False, "api-timeout"
|
||||
except Exception:
|
||||
return False, "api-error"
|
||||
|
||||
|
||||
def _resolve_archive_id(session: requests.Session, edition_id: str, ia_candidates: List[str]) -> str:
|
||||
# Prefer IA identifiers already present in search results.
|
||||
if ia_candidates:
|
||||
first = ia_candidates[0].strip()
|
||||
if first:
|
||||
return first
|
||||
|
||||
# Otherwise query the edition JSON.
|
||||
try:
|
||||
resp = session.get(f"https://openlibrary.org/books/{edition_id}.json", timeout=10)
|
||||
resp.raise_for_status()
|
||||
data = resp.json() or {}
|
||||
|
||||
ocaid = data.get("ocaid")
|
||||
if isinstance(ocaid, str) and ocaid.strip():
|
||||
return ocaid.strip()
|
||||
|
||||
identifiers = data.get("identifiers")
|
||||
if isinstance(identifiers, dict):
|
||||
ia = identifiers.get("internet_archive")
|
||||
ia_id = _first_str(ia)
|
||||
if ia_id:
|
||||
return ia_id
|
||||
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return ""
|
||||
|
||||
|
||||
class OpenLibrary(SearchProvider):
|
||||
"""Search provider for OpenLibrary books + Archive.org direct/borrow download."""
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None):
|
||||
super().__init__(config)
|
||||
self._session = requests.Session()
|
||||
|
||||
def search(
|
||||
self,
|
||||
query: str,
|
||||
limit: int = 50,
|
||||
filters: Optional[Dict[str, Any]] = None,
|
||||
**kwargs: Any,
|
||||
) -> List[SearchResult]:
|
||||
filters = filters or {}
|
||||
|
||||
parsed = parse_query(query)
|
||||
isbn = get_field(parsed, "isbn")
|
||||
author = get_field(parsed, "author")
|
||||
title = get_field(parsed, "title")
|
||||
free_text = get_free_text(parsed)
|
||||
|
||||
q = (isbn or title or author or free_text or query or "").strip()
|
||||
if not q:
|
||||
return []
|
||||
|
||||
if _looks_like_isbn(q):
|
||||
q = f"isbn:{q.replace('-', '')}"
|
||||
|
||||
try:
|
||||
resp = self._session.get(
|
||||
"https://openlibrary.org/search.json",
|
||||
params={"q": q, "limit": int(limit)},
|
||||
timeout=10,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
data = resp.json() or {}
|
||||
except Exception as exc:
|
||||
log(f"[openlibrary] Search failed: {exc}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
results: List[SearchResult] = []
|
||||
docs = data.get("docs") or []
|
||||
if not isinstance(docs, list):
|
||||
return []
|
||||
|
||||
for doc in docs[: int(limit)]:
|
||||
if not isinstance(doc, dict):
|
||||
continue
|
||||
|
||||
book_title = str(doc.get("title") or "").strip() or "Unknown"
|
||||
|
||||
authors = doc.get("author_name") or []
|
||||
if isinstance(authors, str):
|
||||
authors = [authors]
|
||||
if not isinstance(authors, list):
|
||||
authors = []
|
||||
authors_list = [str(a) for a in authors if a]
|
||||
|
||||
year_val = doc.get("first_publish_year")
|
||||
year = str(year_val) if year_val is not None else ""
|
||||
|
||||
edition_id = _resolve_edition_id(doc)
|
||||
|
||||
ia_val = doc.get("ia") or []
|
||||
if isinstance(ia_val, str):
|
||||
ia_val = [ia_val]
|
||||
if not isinstance(ia_val, list):
|
||||
ia_val = []
|
||||
ia_ids = [str(x) for x in ia_val if x]
|
||||
|
||||
isbn_list = doc.get("isbn") or []
|
||||
if isinstance(isbn_list, str):
|
||||
isbn_list = [isbn_list]
|
||||
if not isinstance(isbn_list, list):
|
||||
isbn_list = []
|
||||
|
||||
isbn_13 = next((str(i) for i in isbn_list if len(str(i)) == 13), "")
|
||||
isbn_10 = next((str(i) for i in isbn_list if len(str(i)) == 10), "")
|
||||
|
||||
columns = [
|
||||
("Title", book_title),
|
||||
("Author", ", ".join(authors_list)),
|
||||
("Year", year),
|
||||
("OLID", edition_id),
|
||||
]
|
||||
|
||||
annotations: List[str] = []
|
||||
if isbn_13:
|
||||
annotations.append(f"isbn_13:{isbn_13}")
|
||||
elif isbn_10:
|
||||
annotations.append(f"isbn_10:{isbn_10}")
|
||||
if ia_ids:
|
||||
annotations.append("archive")
|
||||
|
||||
results.append(
|
||||
SearchResult(
|
||||
table="openlibrary",
|
||||
title=book_title,
|
||||
path=(f"https://openlibrary.org/books/{edition_id}" if edition_id else "https://openlibrary.org"),
|
||||
detail=(
|
||||
(f"By: {', '.join(authors_list)}" if authors_list else "")
|
||||
+ (f" ({year})" if year else "")
|
||||
).strip(),
|
||||
annotations=annotations,
|
||||
media_kind="book",
|
||||
columns=columns,
|
||||
full_metadata={
|
||||
"openlibrary_id": edition_id,
|
||||
"authors": authors_list,
|
||||
"year": year,
|
||||
"isbn_10": isbn_10,
|
||||
"isbn_13": isbn_13,
|
||||
"ia": ia_ids,
|
||||
"raw": doc,
|
||||
},
|
||||
)
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
def download(self, result: SearchResult, output_dir: Path) -> Optional[Path]:
|
||||
output_dir = Path(output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
meta = result.full_metadata or {}
|
||||
edition_id = str(meta.get("openlibrary_id") or "").strip()
|
||||
if not edition_id:
|
||||
log("[openlibrary] Missing openlibrary_id; cannot download", file=sys.stderr)
|
||||
return None
|
||||
|
||||
ia_ids = meta.get("ia") or []
|
||||
if isinstance(ia_ids, str):
|
||||
ia_ids = [ia_ids]
|
||||
if not isinstance(ia_ids, list):
|
||||
ia_ids = []
|
||||
ia_candidates = [str(x) for x in ia_ids if x]
|
||||
|
||||
archive_id = _resolve_archive_id(self._session, edition_id, ia_candidates)
|
||||
if not archive_id:
|
||||
log("[openlibrary] No archive identifier available; cannot download", file=sys.stderr)
|
||||
return None
|
||||
|
||||
safe_title = sanitize_filename(result.title)
|
||||
|
||||
# 1) Direct download if available.
|
||||
try:
|
||||
from API.archive_client import check_direct_download
|
||||
|
||||
can_direct, pdf_url = check_direct_download(archive_id)
|
||||
except Exception:
|
||||
can_direct, pdf_url = False, ""
|
||||
|
||||
if can_direct and pdf_url:
|
||||
out_path = unique_path(output_dir / f"{safe_title}.pdf")
|
||||
ok = download_file(pdf_url, out_path, session=self._session)
|
||||
if ok:
|
||||
return out_path
|
||||
log("[openlibrary] Direct download failed", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# 2) Borrow flow (credentials required).
|
||||
try:
|
||||
from API.archive_client import BookNotAvailableError, credential_openlibrary, download as archive_download
|
||||
from API.archive_client import get_book_infos, loan, login
|
||||
|
||||
email, password = credential_openlibrary(self.config or {})
|
||||
if not email or not password:
|
||||
log("[openlibrary] Archive credentials missing; cannot borrow", file=sys.stderr)
|
||||
return None
|
||||
|
||||
lendable, reason = _check_lendable(self._session, edition_id)
|
||||
if not lendable:
|
||||
log(f"[openlibrary] Not lendable: {reason}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
session = login(email, password)
|
||||
try:
|
||||
session = loan(session, archive_id, verbose=False)
|
||||
except BookNotAvailableError:
|
||||
log("[openlibrary] Book not available to borrow", file=sys.stderr)
|
||||
return None
|
||||
except SystemExit:
|
||||
log("[openlibrary] Borrow failed", file=sys.stderr)
|
||||
return None
|
||||
|
||||
urls = [f"https://archive.org/borrow/{archive_id}", f"https://archive.org/details/{archive_id}"]
|
||||
title = safe_title
|
||||
links: Optional[List[str]] = None
|
||||
last_exc: Optional[Exception] = None
|
||||
for u in urls:
|
||||
try:
|
||||
title_raw, links, _metadata = get_book_infos(session, u)
|
||||
if title_raw:
|
||||
title = sanitize_filename(title_raw)
|
||||
break
|
||||
except Exception as exc:
|
||||
last_exc = exc
|
||||
continue
|
||||
|
||||
if not links:
|
||||
log(f"[openlibrary] Failed to extract pages: {last_exc}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=str(output_dir))
|
||||
try:
|
||||
images = archive_download(session=session, n_threads=10, directory=temp_dir, links=links, scale=3, book_id=archive_id)
|
||||
|
||||
try:
|
||||
import img2pdf # type: ignore
|
||||
|
||||
pdf_bytes = img2pdf.convert(images) if images else None
|
||||
if not pdf_bytes:
|
||||
log("[openlibrary] PDF conversion failed", file=sys.stderr)
|
||||
try:
|
||||
shutil.rmtree(temp_dir)
|
||||
except Exception:
|
||||
pass
|
||||
return None
|
||||
|
||||
pdf_path = unique_path(output_dir / f"{title}.pdf")
|
||||
with open(pdf_path, "wb") as f:
|
||||
f.write(pdf_bytes)
|
||||
|
||||
try:
|
||||
shutil.rmtree(temp_dir)
|
||||
except Exception:
|
||||
pass
|
||||
return pdf_path
|
||||
|
||||
except ImportError:
|
||||
# Keep images folder.
|
||||
return Path(temp_dir)
|
||||
|
||||
except Exception:
|
||||
try:
|
||||
shutil.rmtree(temp_dir)
|
||||
except Exception:
|
||||
pass
|
||||
raise
|
||||
|
||||
except Exception as exc:
|
||||
log(f"[openlibrary] Borrow workflow error: {exc}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
def validate(self) -> bool:
|
||||
return True
|
||||
@@ -1,159 +0,0 @@
|
||||
"""Dynamic query parser for filtering and field extraction.
|
||||
|
||||
Supports query syntax like:
|
||||
- isbn:0557677203
|
||||
- author:"Albert Pike"
|
||||
- title:"Morals and Dogma"
|
||||
- year:2010
|
||||
- isbn:0557677203 author:"Albert Pike"
|
||||
- Mixed with free text: "Morals" isbn:0557677203
|
||||
|
||||
This allows flexible query strings that can be parsed by any search provider
|
||||
to extract specific fields for filtering and searching.
|
||||
"""
|
||||
|
||||
from typing import Dict, List, Tuple, Optional, Any
|
||||
import re
|
||||
|
||||
|
||||
def parse_query(query: str) -> Dict[str, Any]:
|
||||
"""Parse a query string into field:value pairs and free text.
|
||||
|
||||
Args:
|
||||
query: Query string like 'isbn:0557677203 author:"Albert Pike" Morals'
|
||||
|
||||
Returns:
|
||||
Dictionary with:
|
||||
- 'fields': Dict[field_name, field_value] for structured fields
|
||||
- 'text': str with remaining free text
|
||||
- 'raw': str original query
|
||||
"""
|
||||
result = {
|
||||
'fields': {},
|
||||
'text': '',
|
||||
'raw': query,
|
||||
}
|
||||
|
||||
if not query or not query.strip():
|
||||
return result
|
||||
|
||||
query = query.strip()
|
||||
remaining_parts = []
|
||||
|
||||
# Pattern to match: field:value or field:"quoted value"
|
||||
# Matches: word: followed by either quoted string or unquoted word
|
||||
pattern = r'(\w+):(?:"([^"]*)"|(\S+))'
|
||||
|
||||
pos = 0
|
||||
for match in re.finditer(pattern, query):
|
||||
# Add any text before this match
|
||||
if match.start() > pos:
|
||||
before_text = query[pos:match.start()].strip()
|
||||
if before_text:
|
||||
remaining_parts.append(before_text)
|
||||
|
||||
field_name = match.group(1).lower()
|
||||
field_value = match.group(2) if match.group(2) is not None else match.group(3)
|
||||
|
||||
result['fields'][field_name] = field_value
|
||||
pos = match.end()
|
||||
|
||||
# Add any remaining text after last match
|
||||
if pos < len(query):
|
||||
remaining_text = query[pos:].strip()
|
||||
if remaining_text:
|
||||
remaining_parts.append(remaining_text)
|
||||
|
||||
result['text'] = ' '.join(remaining_parts)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def get_field(parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None) -> Optional[str]:
|
||||
"""Get a field value from parsed query, with optional default.
|
||||
|
||||
Args:
|
||||
parsed_query: Result from parse_query()
|
||||
field_name: Field name to look up (case-insensitive)
|
||||
default: Default value if field not found
|
||||
|
||||
Returns:
|
||||
Field value or default
|
||||
"""
|
||||
return parsed_query.get('fields', {}).get(field_name.lower(), default)
|
||||
|
||||
|
||||
def has_field(parsed_query: Dict[str, Any], field_name: str) -> bool:
|
||||
"""Check if a field exists in parsed query.
|
||||
|
||||
Args:
|
||||
parsed_query: Result from parse_query()
|
||||
field_name: Field name to check (case-insensitive)
|
||||
|
||||
Returns:
|
||||
True if field exists
|
||||
"""
|
||||
return field_name.lower() in parsed_query.get('fields', {})
|
||||
|
||||
|
||||
def get_free_text(parsed_query: Dict[str, Any]) -> str:
|
||||
"""Get the free text portion of a parsed query.
|
||||
|
||||
Args:
|
||||
parsed_query: Result from parse_query()
|
||||
|
||||
Returns:
|
||||
Free text or empty string
|
||||
"""
|
||||
return parsed_query.get('text', '')
|
||||
|
||||
|
||||
def build_query_for_provider(
|
||||
parsed_query: Dict[str, Any],
|
||||
provider: str,
|
||||
extraction_map: Optional[Dict[str, str]] = None
|
||||
) -> Tuple[str, Dict[str, str]]:
|
||||
"""Build a search query and filters dict for a specific provider.
|
||||
|
||||
Different providers have different search syntax. This function
|
||||
extracts the appropriate fields for each provider.
|
||||
|
||||
Args:
|
||||
parsed_query: Result from parse_query()
|
||||
provider: Provider name ('libgen', 'openlibrary', 'soulseek')
|
||||
extraction_map: Optional mapping of field names to provider-specific names
|
||||
e.g. {'isbn': 'isbn', 'author': 'author', 'title': 'title'}
|
||||
|
||||
Returns:
|
||||
Tuple of (search_query: str, extracted_fields: Dict[field, value])
|
||||
"""
|
||||
extraction_map = extraction_map or {}
|
||||
extracted = {}
|
||||
free_text = get_free_text(parsed_query)
|
||||
|
||||
# Extract fields based on map
|
||||
for field_name, provider_key in extraction_map.items():
|
||||
if has_field(parsed_query, field_name):
|
||||
extracted[provider_key] = get_field(parsed_query, field_name)
|
||||
|
||||
# If provider-specific extraction needed, providers can implement it
|
||||
# For now, return the free text as query
|
||||
return free_text, extracted
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
# Test cases
|
||||
test_queries = [
|
||||
'isbn:0557677203',
|
||||
'isbn:0557677203 author:"Albert Pike"',
|
||||
'Morals and Dogma isbn:0557677203',
|
||||
'title:"Morals and Dogma" author:"Albert Pike" year:2010',
|
||||
'search term without fields',
|
||||
'author:"John Smith" title:"A Book"',
|
||||
]
|
||||
|
||||
for query in test_queries:
|
||||
print(f"\nQuery: {query}")
|
||||
parsed = parse_query(query)
|
||||
print(f" Fields: {parsed['fields']}")
|
||||
print(f" Text: {parsed['text']}")
|
||||
@@ -11,7 +11,7 @@ import time
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from Provider._base import SearchProvider, SearchResult
|
||||
from ProviderCore.base import SearchProvider, SearchResult
|
||||
from SYS.logger import log, debug
|
||||
|
||||
|
||||
|
||||
@@ -1,707 +0,0 @@
|
||||
"""Unified book downloader - handles Archive.org borrowing and Libgen fallback.
|
||||
|
||||
This module provides a single interface for downloading books from multiple sources:
|
||||
1. Try Archive.org direct download (if available)
|
||||
2. Try Archive.org borrowing (if user has credentials)
|
||||
3. Fallback to Libgen search by ISBN
|
||||
4. Attempt Libgen download
|
||||
|
||||
All sources integrated with proper metadata scraping and error handling.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import asyncio
|
||||
import requests
|
||||
from typing import Optional, Dict, Any, Tuple, List, Callable, cast
|
||||
from pathlib import Path
|
||||
|
||||
from SYS.logger import debug
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class UnifiedBookDownloader:
|
||||
"""Unified interface for downloading books from multiple sources."""
|
||||
|
||||
def __init__(self, config: Optional[Dict[str, Any]] = None, output_dir: Optional[str] = None):
|
||||
"""Initialize the unified book downloader.
|
||||
|
||||
Args:
|
||||
config: Configuration dict with credentials
|
||||
output_dir: Default output directory
|
||||
"""
|
||||
self.config = config or {}
|
||||
self.output_dir = output_dir
|
||||
self.session = requests.Session()
|
||||
|
||||
# Import download functions from their modules
|
||||
self._init_downloaders()
|
||||
|
||||
def _init_downloaders(self) -> None:
|
||||
"""Initialize downloader functions from their modules."""
|
||||
try:
|
||||
from API.archive_client import (
|
||||
check_direct_download,
|
||||
get_openlibrary_by_isbn,
|
||||
loan
|
||||
)
|
||||
self.check_direct_download = check_direct_download
|
||||
self.get_openlibrary_by_isbn = get_openlibrary_by_isbn
|
||||
self.loan_func = loan
|
||||
logger.debug("[UnifiedBookDownloader] Loaded archive.org downloaders from archive_client")
|
||||
except Exception as e:
|
||||
logger.warning(f"[UnifiedBookDownloader] Failed to load archive.org functions: {e}")
|
||||
self.check_direct_download = None
|
||||
self.get_openlibrary_by_isbn = None
|
||||
self.loan_func = None
|
||||
|
||||
try:
|
||||
from Provider.libgen_service import (
|
||||
DEFAULT_LIMIT as _LIBGEN_DEFAULT_LIMIT,
|
||||
download_from_mirror as _libgen_download,
|
||||
search_libgen as _libgen_search,
|
||||
)
|
||||
|
||||
def _log_info(message: str) -> None:
|
||||
debug(f"[UnifiedBookDownloader] {message}")
|
||||
|
||||
def _log_error(message: str) -> None:
|
||||
logger.error(f"[UnifiedBookDownloader] {message}")
|
||||
|
||||
self.search_libgen = lambda query, limit=_LIBGEN_DEFAULT_LIMIT: _libgen_search(
|
||||
query,
|
||||
limit=limit,
|
||||
log_info=_log_info,
|
||||
log_error=_log_error,
|
||||
)
|
||||
self.download_from_mirror = lambda mirror_url, output_path: _libgen_download(
|
||||
mirror_url,
|
||||
output_path,
|
||||
log_info=_log_info,
|
||||
log_error=_log_error,
|
||||
)
|
||||
logger.debug("[UnifiedBookDownloader] Loaded Libgen helpers")
|
||||
except Exception as e:
|
||||
logger.warning(f"[UnifiedBookDownloader] Failed to load Libgen helpers: {e}")
|
||||
self.search_libgen = None
|
||||
self.download_from_mirror = None
|
||||
|
||||
def get_download_options(self, book_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Get all available download options for a book.
|
||||
|
||||
Checks in priority order:
|
||||
1. Archive.org direct download (public domain)
|
||||
2. Archive.org borrowing (if credentials available and book is borrowable)
|
||||
3. Libgen fallback (by ISBN)
|
||||
|
||||
Args:
|
||||
book_data: Book metadata dict with at least 'openlibrary_id' or 'isbn'
|
||||
|
||||
Returns:
|
||||
Dict with available download methods and metadata
|
||||
"""
|
||||
options = {
|
||||
'book_title': book_data.get('title', 'Unknown'),
|
||||
'book_author': book_data.get('author', 'Unknown'),
|
||||
'isbn': book_data.get('isbn', ''),
|
||||
'openlibrary_id': book_data.get('openlibrary_id', ''),
|
||||
'methods': [], # Will be sorted by priority
|
||||
'metadata': {}
|
||||
}
|
||||
|
||||
# Extract book ID from openlibrary_id (e.g., OL8513721M -> 8513721, OL8513721W -> 8513721)
|
||||
ol_id = book_data.get('openlibrary_id', '')
|
||||
book_id = None
|
||||
|
||||
if ol_id.startswith('OL') and len(ol_id) > 2:
|
||||
# Remove 'OL' prefix (keep everything after it including the suffix letter)
|
||||
# The book_id is all digits after 'OL'
|
||||
book_id = ''.join(c for c in ol_id[2:] if c.isdigit())
|
||||
|
||||
# PRIORITY 1: Check direct download (fastest, no auth needed)
|
||||
if self.check_direct_download:
|
||||
try:
|
||||
can_download, pdf_url = self.check_direct_download(book_id)
|
||||
if can_download:
|
||||
options['methods'].append({
|
||||
'type': 'archive.org_direct',
|
||||
'label': 'Archive.org Direct Download',
|
||||
'requires_auth': False,
|
||||
'pdf_url': pdf_url,
|
||||
'book_id': book_id,
|
||||
'priority': 1 # Highest priority
|
||||
})
|
||||
logger.info(f"[UnifiedBookDownloader] Direct download available for {book_id}")
|
||||
except Exception as e:
|
||||
logger.debug(f"[UnifiedBookDownloader] Direct download check failed: {e}")
|
||||
|
||||
# PRIORITY 2: Check borrowing option (requires auth, 14-day loan)
|
||||
# First verify the book is actually lendable via OpenLibrary API
|
||||
if self._has_archive_credentials():
|
||||
is_lendable, status = self._check_book_lendable_status(ol_id)
|
||||
|
||||
if is_lendable:
|
||||
options['methods'].append({
|
||||
'type': 'archive.org_borrow',
|
||||
'label': 'Archive.org Borrow',
|
||||
'requires_auth': True,
|
||||
'book_id': book_id,
|
||||
'priority': 2 # Second priority
|
||||
})
|
||||
logger.info(f"[UnifiedBookDownloader] Borrow option available for {book_id} (status: {status})")
|
||||
else:
|
||||
logger.debug(f"[UnifiedBookDownloader] Borrow not available for {book_id} (status: {status})")
|
||||
|
||||
# PRIORITY 3: Check Libgen fallback (by ISBN, no auth needed, most reliable)
|
||||
isbn = book_data.get('isbn', '')
|
||||
title = book_data.get('title', '')
|
||||
author = book_data.get('author', '')
|
||||
|
||||
if self.search_libgen:
|
||||
# Can use Libgen if we have ISBN OR title (or both)
|
||||
if isbn or title:
|
||||
options['methods'].append({
|
||||
'type': 'libgen',
|
||||
'label': 'Libgen Search & Download',
|
||||
'requires_auth': False,
|
||||
'isbn': isbn,
|
||||
'title': title,
|
||||
'author': author,
|
||||
'priority': 3 # Third priority (fallback)
|
||||
})
|
||||
logger.info(f"[UnifiedBookDownloader] Libgen fallback available (ISBN: {isbn if isbn else 'N/A'}, Title: {title})")
|
||||
|
||||
# Sort by priority (higher priority first)
|
||||
options['methods'].sort(key=lambda x: x.get('priority', 999))
|
||||
|
||||
return options
|
||||
|
||||
def _has_archive_credentials(self) -> bool:
|
||||
"""Check if Archive.org credentials are available."""
|
||||
try:
|
||||
from API.archive_client import credential_openlibrary
|
||||
email, password = credential_openlibrary(self.config)
|
||||
return bool(email and password)
|
||||
except Exception:
|
||||
return False
|
||||
|
||||
def _check_book_lendable_status(self, ol_id: str) -> Tuple[bool, Optional[str]]:
|
||||
"""Check if a book is lendable via OpenLibrary API.
|
||||
|
||||
Queries: https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}
|
||||
Note: Only works with Edition IDs (OL...M), not Work IDs (OL...W)
|
||||
|
||||
Args:
|
||||
ol_id: OpenLibrary ID (e.g., OL8513721M for Edition or OL4801915W for Work)
|
||||
|
||||
Returns:
|
||||
Tuple of (is_lendable: bool, status_reason: Optional[str])
|
||||
"""
|
||||
try:
|
||||
if not ol_id.startswith('OL'):
|
||||
return False, "Invalid OpenLibrary ID format"
|
||||
|
||||
# If this is a Work ID (ends with W), we can't query Volumes API
|
||||
# Work IDs are abstract umbrella records, not specific editions
|
||||
if ol_id.endswith('W'):
|
||||
logger.debug(f"[UnifiedBookDownloader] Work ID {ol_id} - skipping Volumes API (not lendable)")
|
||||
return False, "Work ID not supported by Volumes API (not a specific edition)"
|
||||
|
||||
# If it ends with M, it's an Edition ID - proceed with query
|
||||
if not ol_id.endswith('M'):
|
||||
logger.debug(f"[UnifiedBookDownloader] Unknown ID type {ol_id} (not M or W)")
|
||||
return False, "Invalid OpenLibrary ID type"
|
||||
|
||||
url = f"https://openlibrary.org/api/volumes/brief/json/OLID:{ol_id}"
|
||||
response = self.session.get(url, timeout=10)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
|
||||
# Empty response means no records found
|
||||
if not data:
|
||||
logger.debug(f"[UnifiedBookDownloader] Empty response for {ol_id}")
|
||||
return False, "No availability data found"
|
||||
|
||||
# The response is wrapped in OLID key
|
||||
olid_key = f"OLID:{ol_id}"
|
||||
if olid_key not in data:
|
||||
logger.debug(f"[UnifiedBookDownloader] OLID key not found in response")
|
||||
return False, "No availability data found"
|
||||
|
||||
olid_data = data[olid_key]
|
||||
|
||||
# Check items array for lendable status
|
||||
if 'items' in olid_data and olid_data['items'] and len(olid_data['items']) > 0:
|
||||
items = olid_data['items']
|
||||
|
||||
# Check the first item for lending status
|
||||
first_item = items[0]
|
||||
|
||||
# Handle both dict and string representations (PowerShell converts to string)
|
||||
if isinstance(first_item, dict):
|
||||
status = first_item.get('status', '')
|
||||
else:
|
||||
# String representation - check if 'lendable' is in it
|
||||
status = str(first_item).lower()
|
||||
|
||||
is_lendable = 'lendable' in str(status).lower()
|
||||
|
||||
if is_lendable:
|
||||
logger.info(f"[UnifiedBookDownloader] Book {ol_id} is lendable")
|
||||
return True, "LENDABLE"
|
||||
else:
|
||||
status_str = status.get('status', 'NOT_LENDABLE') if isinstance(status, dict) else 'NOT_LENDABLE'
|
||||
logger.debug(f"[UnifiedBookDownloader] Book {ol_id} is not lendable (status: {status_str})")
|
||||
return False, status_str
|
||||
else:
|
||||
# No items array or empty
|
||||
logger.debug(f"[UnifiedBookDownloader] No items found for {ol_id}")
|
||||
return False, "Not available for lending"
|
||||
|
||||
except requests.exceptions.Timeout:
|
||||
logger.warning(f"[UnifiedBookDownloader] OpenLibrary API timeout for {ol_id}")
|
||||
return False, "API timeout"
|
||||
except Exception as e:
|
||||
logger.debug(f"[UnifiedBookDownloader] Failed to check lendable status for {ol_id}: {e}")
|
||||
return False, f"API error"
|
||||
|
||||
|
||||
async def download_book(self, method: Dict[str, Any], output_dir: Optional[str] = None) -> Tuple[bool, str]:
|
||||
"""Download a book using the specified method.
|
||||
|
||||
Args:
|
||||
method: Download method dict from get_download_options()
|
||||
output_dir: Directory to save the book
|
||||
|
||||
Returns:
|
||||
Tuple of (success: bool, message: str)
|
||||
"""
|
||||
output_dir = output_dir or self.output_dir or str(Path.home() / "Downloads")
|
||||
method_type = method.get('type', '')
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Starting download with method: {method_type}")
|
||||
|
||||
try:
|
||||
if method_type == 'archive.org_direct':
|
||||
return await self._download_archive_direct(method, output_dir)
|
||||
|
||||
elif method_type == 'archive.org_borrow':
|
||||
return await self._download_archive_borrow(method, output_dir)
|
||||
|
||||
elif method_type == 'libgen':
|
||||
return await self._download_libgen(method, output_dir)
|
||||
|
||||
else:
|
||||
return False, f"Unknown download method: {method_type}"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[UnifiedBookDownloader] Download error: {e}", exc_info=True)
|
||||
return False, f"Download failed: {str(e)}"
|
||||
|
||||
async def _download_archive_direct(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
|
||||
"""Download directly from Archive.org."""
|
||||
try:
|
||||
pdf_url = method.get('pdf_url', '')
|
||||
book_id = method.get('book_id', '')
|
||||
|
||||
if not pdf_url:
|
||||
return False, "No PDF URL available"
|
||||
|
||||
# Determine output filename
|
||||
filename = f"{book_id}.pdf"
|
||||
output_path = Path(output_dir) / filename
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Downloading PDF from: {pdf_url}")
|
||||
|
||||
# Download in a thread to avoid blocking
|
||||
loop = asyncio.get_event_loop()
|
||||
success = await loop.run_in_executor(
|
||||
None,
|
||||
self._download_file,
|
||||
pdf_url,
|
||||
str(output_path)
|
||||
)
|
||||
|
||||
if success:
|
||||
logger.info(f"[UnifiedBookDownloader] Successfully downloaded to: {output_path}")
|
||||
return True, f"Downloaded to: {output_path}"
|
||||
else:
|
||||
return False, "Failed to download PDF"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[UnifiedBookDownloader] Archive direct download error: {e}")
|
||||
return False, f"Archive download failed: {str(e)}"
|
||||
|
||||
async def _download_archive_borrow(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
|
||||
"""Download via Archive.org borrowing (requires credentials).
|
||||
|
||||
Process (follows archive_client.py pattern):
|
||||
1. Login to Archive.org with credentials
|
||||
2. Call loan endpoint to borrow the book (14-day loan)
|
||||
3. Get book info (page links, metadata)
|
||||
4. Download all pages as images
|
||||
5. Merge images into PDF
|
||||
|
||||
The loan function from archive_client.py handles:
|
||||
- Checking if book needs borrowing (status 400 = "doesn't need to be borrowed")
|
||||
- Creating borrow token for access
|
||||
- Handling borrow failures
|
||||
|
||||
get_book_infos() extracts page links from the borrowed book viewer
|
||||
download() downloads all pages using thread pool
|
||||
img2pdf merges pages into searchable PDF
|
||||
"""
|
||||
try:
|
||||
from API.archive_client import credential_openlibrary
|
||||
|
||||
book_id = method.get('book_id', '')
|
||||
|
||||
# Get credentials
|
||||
email, password = credential_openlibrary(self.config)
|
||||
if not email or not password:
|
||||
return False, "Archive.org credentials not configured"
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Logging into Archive.org...")
|
||||
|
||||
# Login and borrow (in thread, following download_book.py pattern)
|
||||
loop = asyncio.get_event_loop()
|
||||
borrow_result = await loop.run_in_executor(
|
||||
None,
|
||||
self._archive_borrow_and_download,
|
||||
email,
|
||||
password,
|
||||
book_id,
|
||||
output_dir
|
||||
)
|
||||
|
||||
if borrow_result and isinstance(borrow_result, tuple):
|
||||
success, filepath = borrow_result
|
||||
if success:
|
||||
logger.info(f"[UnifiedBookDownloader] Borrow succeeded: {filepath}")
|
||||
return True, filepath
|
||||
else:
|
||||
logger.warning(f"[UnifiedBookDownloader] Borrow failed: {filepath}")
|
||||
return False, filepath
|
||||
else:
|
||||
return False, "Failed to borrow book from Archive.org"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}")
|
||||
return False, f"Archive borrow failed: {str(e)}"
|
||||
|
||||
async def _download_libgen(self, method: Dict[str, Any], output_dir: str) -> Tuple[bool, str]:
|
||||
"""Download via Libgen search and download with mirror fallback."""
|
||||
try:
|
||||
isbn = method.get('isbn', '')
|
||||
title = method.get('title', '')
|
||||
|
||||
if not isbn and not title:
|
||||
return False, "Need ISBN or title for Libgen search"
|
||||
|
||||
if not self.search_libgen:
|
||||
return False, "Libgen searcher not available"
|
||||
|
||||
# Define wrapper functions to safely call the methods
|
||||
search_func = self.search_libgen
|
||||
if search_func is None:
|
||||
return False, "Search function not available"
|
||||
|
||||
preloaded_results = method.get('results')
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
if preloaded_results:
|
||||
results = list(preloaded_results)
|
||||
if not results:
|
||||
results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10))
|
||||
else:
|
||||
results = await loop.run_in_executor(None, lambda: search_func(isbn or title, 10))
|
||||
|
||||
if not results:
|
||||
logger.warning(f"[UnifiedBookDownloader] No Libgen results for: {isbn or title}")
|
||||
return False, f"No Libgen results found for: {isbn or title}"
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Found {len(results)} Libgen results")
|
||||
|
||||
# Determine output filename (use first result for naming)
|
||||
first_result = results[0]
|
||||
filename = f"{first_result.get('title', 'book')}"
|
||||
filename = "".join(c for c in filename if c.isalnum() or c in (' ', '.', '-'))[:100]
|
||||
|
||||
# Try each result's mirror until one succeeds
|
||||
for idx, result in enumerate(results, 1):
|
||||
mirror_url = result.get('mirror_url', '')
|
||||
|
||||
if not mirror_url:
|
||||
logger.debug(f"[UnifiedBookDownloader] Result {idx}: No mirror URL")
|
||||
continue
|
||||
|
||||
# Use extension from this result if available
|
||||
extension = result.get('extension', 'pdf')
|
||||
if extension and not extension.startswith('.'):
|
||||
extension = f".{extension}"
|
||||
elif not extension:
|
||||
extension = '.pdf'
|
||||
|
||||
output_path = Path(output_dir) / (filename + extension)
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Trying mirror {idx}/{len(results)}: {mirror_url}")
|
||||
|
||||
download_func = self.download_from_mirror
|
||||
if download_func is None:
|
||||
return False, "Download function not available"
|
||||
|
||||
download_callable = cast(Callable[[str, str], Tuple[bool, Optional[Path]]], download_func)
|
||||
|
||||
def download_wrapper():
|
||||
return download_callable(mirror_url, str(output_path))
|
||||
|
||||
# Download (in thread)
|
||||
try:
|
||||
success, downloaded_path = await loop.run_in_executor(None, download_wrapper)
|
||||
|
||||
if success:
|
||||
dest_path = Path(downloaded_path) if downloaded_path else output_path
|
||||
# Validate downloaded file is not HTML (common Libgen issue)
|
||||
if dest_path.exists():
|
||||
try:
|
||||
with open(dest_path, 'rb') as f:
|
||||
file_start = f.read(1024).decode('utf-8', errors='ignore').lower()
|
||||
if '<!doctype' in file_start or '<html' in file_start:
|
||||
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} returned HTML instead of file, trying next mirror...")
|
||||
dest_path.unlink() # Delete the HTML file
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.debug(f"[UnifiedBookDownloader] Could not validate file content: {e}")
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Successfully downloaded from mirror {idx} to: {dest_path}")
|
||||
return True, str(dest_path)
|
||||
else:
|
||||
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} download failed, trying next...")
|
||||
except Exception as e:
|
||||
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} error: {e}, trying next...")
|
||||
continue
|
||||
|
||||
return False, f"All {len(results)} mirrors failed"
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[UnifiedBookDownloader] Libgen download error: {e}")
|
||||
return False, f"Libgen download failed: {str(e)}"
|
||||
|
||||
async def download_libgen_selection(
|
||||
self,
|
||||
selected: Dict[str, Any],
|
||||
remaining: Optional[List[Dict[str, Any]]] = None,
|
||||
output_dir: Optional[str] = None,
|
||||
) -> Tuple[bool, str]:
|
||||
"""Download a specific Libgen result with optional fallbacks."""
|
||||
|
||||
if not isinstance(selected, dict):
|
||||
return False, "Selected result must be a dictionary"
|
||||
|
||||
ordered_results: List[Dict[str, Any]] = [selected]
|
||||
if remaining:
|
||||
for item in remaining:
|
||||
if isinstance(item, dict) and item is not selected:
|
||||
ordered_results.append(item)
|
||||
|
||||
method: Dict[str, Any] = {
|
||||
'type': 'libgen',
|
||||
'isbn': selected.get('isbn', '') or '',
|
||||
'title': selected.get('title', '') or '',
|
||||
'author': selected.get('author', '') or '',
|
||||
'results': ordered_results,
|
||||
}
|
||||
|
||||
return await self.download_book(method, output_dir)
|
||||
|
||||
def download_libgen_selection_sync(
|
||||
self,
|
||||
selected: Dict[str, Any],
|
||||
remaining: Optional[List[Dict[str, Any]]] = None,
|
||||
output_dir: Optional[str] = None,
|
||||
) -> Tuple[bool, str]:
|
||||
"""Synchronous helper for downloading a Libgen selection."""
|
||||
|
||||
async def _run() -> Tuple[bool, str]:
|
||||
return await self.download_libgen_selection(selected, remaining, output_dir)
|
||||
|
||||
loop = asyncio.new_event_loop()
|
||||
try:
|
||||
asyncio.set_event_loop(loop)
|
||||
return loop.run_until_complete(_run())
|
||||
finally:
|
||||
loop.close()
|
||||
asyncio.set_event_loop(None)
|
||||
|
||||
def _download_file(self, url: str, output_path: str) -> bool:
|
||||
"""Download a file from URL."""
|
||||
try:
|
||||
response = requests.get(url, stream=True, timeout=30)
|
||||
response.raise_for_status()
|
||||
|
||||
with open(output_path, 'wb') as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.error(f"[UnifiedBookDownloader] File download error: {e}")
|
||||
return False
|
||||
|
||||
def _archive_borrow_and_download(self, email: str, password: str, book_id: str, output_dir: str) -> Tuple[bool, str]:
|
||||
"""Borrow a book from Archive.org and download pages as PDF.
|
||||
|
||||
This follows the exact process from archive_client.py:
|
||||
1. Login with credentials
|
||||
2. Call loan() to create 14-day borrow
|
||||
3. Get book info (extract page url)
|
||||
4. Download all pages as images
|
||||
5. Merge images into searchable PDF
|
||||
|
||||
Returns tuple of (success: bool, filepath/message: str)
|
||||
"""
|
||||
try:
|
||||
from API.archive_client import login, loan, get_book_infos, download
|
||||
import tempfile
|
||||
import shutil
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Logging into Archive.org as {email}")
|
||||
session = login(email, password)
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Attempting to borrow book: {book_id}")
|
||||
# Call loan to create the 14-day borrow
|
||||
session = loan(session, book_id, verbose=True)
|
||||
|
||||
# If we get here, borrowing succeeded
|
||||
logger.info(f"[UnifiedBookDownloader] Successfully borrowed book: {book_id}")
|
||||
|
||||
# Now get the book info (page url and metadata)
|
||||
logger.info(f"[UnifiedBookDownloader] Extracting book page information...")
|
||||
# Try both URL formats: with /borrow and without
|
||||
book_url = [
|
||||
f"https://archive.org/borrow/{book_id}", # Try borrow page first (for borrowed books)
|
||||
f"https://archive.org/details/{book_id}" # Fallback to details page
|
||||
]
|
||||
|
||||
title = None
|
||||
links = None
|
||||
metadata = None
|
||||
last_error = None
|
||||
|
||||
for book_url in book_url:
|
||||
try:
|
||||
logger.debug(f"[UnifiedBookDownloader] Trying to get book info from: {book_url}")
|
||||
response = session.get(book_url, timeout=10)
|
||||
|
||||
# Log response status
|
||||
if response.status_code != 200:
|
||||
logger.debug(f"[UnifiedBookDownloader] URL returned {response.status_code}: {book_url}")
|
||||
# Continue to try next URL
|
||||
continue
|
||||
|
||||
# Try to parse the response
|
||||
title, links, metadata = get_book_infos(session, book_url)
|
||||
logger.info(f"[UnifiedBookDownloader] Successfully got info from: {book_url}")
|
||||
logger.info(f"[UnifiedBookDownloader] Found {len(links)} pages to download")
|
||||
break
|
||||
except Exception as e:
|
||||
logger.debug(f"[UnifiedBookDownloader] Failed with {book_url}: {e}")
|
||||
last_error = e
|
||||
continue
|
||||
|
||||
if links is None:
|
||||
logger.error(f"[UnifiedBookDownloader] Failed to get book info from all url: {last_error}")
|
||||
# Borrow extraction failed - return False
|
||||
return False, "Could not extract borrowed book pages"
|
||||
|
||||
# Create temporary directory for images
|
||||
temp_dir = tempfile.mkdtemp(prefix=f"{title}_", dir=output_dir)
|
||||
logger.info(f"[UnifiedBookDownloader] Downloading {len(links)} pages to temporary directory...")
|
||||
|
||||
try:
|
||||
# Download all pages (uses thread pool)
|
||||
images = download(
|
||||
session=session,
|
||||
n_threads=10,
|
||||
directory=temp_dir,
|
||||
links=links,
|
||||
scale=3, # Default resolution
|
||||
book_id=book_id
|
||||
)
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Downloaded {len(images)} pages")
|
||||
|
||||
# Try to merge pages into PDF
|
||||
try:
|
||||
import img2pdf
|
||||
logger.info(f"[UnifiedBookDownloader] Merging pages into PDF...")
|
||||
|
||||
# Prepare PDF metadata
|
||||
pdfmeta = {}
|
||||
if metadata:
|
||||
if "title" in metadata:
|
||||
pdfmeta["title"] = metadata["title"]
|
||||
if "creator" in metadata:
|
||||
pdfmeta["author"] = metadata["creator"]
|
||||
pdfmeta["keywords"] = [f"https://archive.org/details/{book_id}"]
|
||||
pdfmeta["creationdate"] = None # Avoid timezone issues
|
||||
|
||||
# Convert images to PDF
|
||||
pdf_content = img2pdf.convert(images, **pdfmeta) if images else None
|
||||
if not pdf_content:
|
||||
logger.error(f"[UnifiedBookDownloader] PDF conversion failed")
|
||||
return False, "Failed to convert pages to PDF"
|
||||
|
||||
# Save the PDF
|
||||
pdf_filename = f"{title}.pdf" if title else "book.pdf"
|
||||
pdf_path = Path(output_dir) / pdf_filename
|
||||
|
||||
# Handle duplicate filenames
|
||||
i = 1
|
||||
while pdf_path.exists():
|
||||
pdf_path = Path(output_dir) / f"{title or 'book'}({i}).pdf"
|
||||
i += 1
|
||||
|
||||
with open(pdf_path, 'wb') as f:
|
||||
f.write(pdf_content)
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Successfully created PDF: {pdf_path}")
|
||||
|
||||
return True, str(pdf_path)
|
||||
|
||||
except ImportError:
|
||||
logger.warning(f"[UnifiedBookDownloader] img2pdf not available, saving as JPG collection instead")
|
||||
|
||||
# Create JPG collection directory
|
||||
if not title:
|
||||
title = f"book_{book_id}"
|
||||
jpg_dir = Path(output_dir) / title
|
||||
i = 1
|
||||
while jpg_dir.exists():
|
||||
jpg_dir = Path(output_dir) / f"{title}({i})"
|
||||
i += 1
|
||||
|
||||
# Move temporary directory to final location
|
||||
shutil.move(temp_dir, str(jpg_dir))
|
||||
temp_dir = None # Mark as already moved
|
||||
|
||||
logger.info(f"[UnifiedBookDownloader] Saved as JPG collection: {jpg_dir}")
|
||||
return True, str(jpg_dir)
|
||||
|
||||
finally:
|
||||
# Clean up temporary directory if it still exists
|
||||
if temp_dir and Path(temp_dir).exists():
|
||||
shutil.rmtree(temp_dir)
|
||||
|
||||
except SystemExit:
|
||||
# loan() function calls sys.exit on failure - catch it
|
||||
logger.error(f"[UnifiedBookDownloader] Borrow process exited (book may not be borrowable)")
|
||||
return False, "Book could not be borrowed (may not be available for borrowing)"
|
||||
except Exception as e:
|
||||
logger.error(f"[UnifiedBookDownloader] Archive borrow error: {e}")
|
||||
return False, f"Borrow failed: {str(e)}"
|
||||
|
||||
def close(self) -> None:
|
||||
"""Close the session."""
|
||||
self.session.close()
|
||||
@@ -6,7 +6,7 @@ import subprocess
|
||||
import sys
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from Provider._base import SearchProvider, SearchResult
|
||||
from ProviderCore.base import SearchProvider, SearchResult
|
||||
from SYS.logger import log
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ import os
|
||||
import sys
|
||||
from typing import Any
|
||||
|
||||
from Provider._base import FileProvider
|
||||
from ProviderCore.base import FileProvider
|
||||
from SYS.logger import log
|
||||
|
||||
|
||||
|
||||
5
ProviderCore/__init__.py
Normal file
5
ProviderCore/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Provider core modules.
|
||||
|
||||
This package contains the provider framework (base types, registry, and shared helpers).
|
||||
Concrete provider implementations live in the `Provider/` package.
|
||||
"""
|
||||
42
ProviderCore/download.py
Normal file
42
ProviderCore/download.py
Normal file
@@ -0,0 +1,42 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
def sanitize_filename(name: str, *, max_len: int = 150) -> str:
|
||||
text = str(name or "").strip()
|
||||
if not text:
|
||||
return "download"
|
||||
|
||||
forbidden = set('<>:"/\\|?*')
|
||||
cleaned = "".join("_" if c in forbidden else c for c in text)
|
||||
cleaned = " ".join(cleaned.split()).strip().strip(".")
|
||||
if not cleaned:
|
||||
cleaned = "download"
|
||||
return cleaned[:max_len]
|
||||
|
||||
|
||||
def download_file(url: str, output_path: Path, *, session: Optional[requests.Session] = None, timeout_s: float = 30.0) -> bool:
|
||||
output_path = Path(output_path)
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
s = session or requests.Session()
|
||||
|
||||
try:
|
||||
with s.get(url, stream=True, timeout=timeout_s) as resp:
|
||||
resp.raise_for_status()
|
||||
with open(output_path, "wb") as f:
|
||||
for chunk in resp.iter_content(chunk_size=1024 * 256):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
return output_path.exists() and output_path.stat().st_size > 0
|
||||
except Exception:
|
||||
try:
|
||||
if output_path.exists():
|
||||
output_path.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
return False
|
||||
@@ -11,10 +11,11 @@ import sys
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
from Provider._base import FileProvider, SearchProvider, SearchResult
|
||||
from ProviderCore.base import FileProvider, SearchProvider, SearchResult
|
||||
from Provider.bandcamp import Bandcamp
|
||||
from Provider.libgen import Libgen
|
||||
from Provider.matrix import Matrix
|
||||
from Provider.openlibrary import OpenLibrary
|
||||
from Provider.soulseek import Soulseek, download_soulseek_file
|
||||
from Provider.youtube import YouTube
|
||||
from Provider.zeroxzero import ZeroXZero
|
||||
@@ -22,6 +23,7 @@ from Provider.zeroxzero import ZeroXZero
|
||||
|
||||
_SEARCH_PROVIDERS: Dict[str, Type[SearchProvider]] = {
|
||||
"libgen": Libgen,
|
||||
"openlibrary": OpenLibrary,
|
||||
"soulseek": Soulseek,
|
||||
"bandcamp": Bandcamp,
|
||||
"youtube": YouTube,
|
||||
@@ -943,6 +943,79 @@ class Folder(Store):
|
||||
debug(f"delete_url failed for local file: {exc}")
|
||||
return False
|
||||
|
||||
def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
|
||||
"""Get notes for a local file by hash."""
|
||||
from API.folder import API_folder_store
|
||||
try:
|
||||
if not self._location:
|
||||
return {}
|
||||
file_hash = str(file_identifier or "").strip().lower()
|
||||
if not _normalize_hash(file_hash):
|
||||
return {}
|
||||
with API_folder_store(Path(self._location)) as db:
|
||||
getter = getattr(db, "get_notes", None)
|
||||
if callable(getter):
|
||||
notes = getter(file_hash)
|
||||
return notes if isinstance(notes, dict) else {}
|
||||
# Fallback: default-only
|
||||
note = db.get_note(file_hash)
|
||||
return {"default": str(note or "")} if note else {}
|
||||
except Exception as exc:
|
||||
debug(f"get_note failed for local file: {exc}")
|
||||
return {}
|
||||
|
||||
def set_note(self, file_identifier: str, name: str, text: str, **kwargs: Any) -> bool:
|
||||
"""Set a named note for a local file by hash."""
|
||||
from API.folder import API_folder_store
|
||||
try:
|
||||
if not self._location:
|
||||
return False
|
||||
file_hash = str(file_identifier or "").strip().lower()
|
||||
if not _normalize_hash(file_hash):
|
||||
return False
|
||||
|
||||
file_path = self.get_file(file_hash, **kwargs)
|
||||
if not file_path or not isinstance(file_path, Path) or not file_path.exists():
|
||||
return False
|
||||
|
||||
with API_folder_store(Path(self._location)) as db:
|
||||
setter = getattr(db, "set_note", None)
|
||||
if callable(setter):
|
||||
setter(file_path, str(name), str(text))
|
||||
return True
|
||||
db.save_note(file_path, str(text))
|
||||
return True
|
||||
except Exception as exc:
|
||||
debug(f"set_note failed for local file: {exc}")
|
||||
return False
|
||||
|
||||
def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
|
||||
"""Delete a named note for a local file by hash."""
|
||||
from API.folder import API_folder_store
|
||||
try:
|
||||
if not self._location:
|
||||
return False
|
||||
file_hash = str(file_identifier or "").strip().lower()
|
||||
if not _normalize_hash(file_hash):
|
||||
return False
|
||||
with API_folder_store(Path(self._location)) as db:
|
||||
deleter = getattr(db, "delete_note", None)
|
||||
if callable(deleter):
|
||||
deleter(file_hash, str(name))
|
||||
return True
|
||||
# Default-only fallback
|
||||
if str(name).strip().lower() == "default":
|
||||
deleter2 = getattr(db, "save_note", None)
|
||||
if callable(deleter2):
|
||||
file_path = self.get_file(file_hash, **kwargs)
|
||||
if file_path and isinstance(file_path, Path) and file_path.exists():
|
||||
deleter2(file_path, "")
|
||||
return True
|
||||
return False
|
||||
except Exception as exc:
|
||||
debug(f"delete_note failed for local file: {exc}")
|
||||
return False
|
||||
|
||||
def delete_file(self, file_identifier: str, **kwargs: Any) -> bool:
|
||||
"""Delete a file from the folder store.
|
||||
|
||||
|
||||
@@ -437,7 +437,10 @@ class HydrusNetwork(Store):
|
||||
try:
|
||||
from API import HydrusNetwork as hydrus_wrapper
|
||||
|
||||
file_hash = str(file_identifier)
|
||||
file_hash = str(file_identifier or "").strip().lower()
|
||||
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
|
||||
debug(f"get_tags: invalid file hash '{file_identifier}'")
|
||||
return [], "unknown"
|
||||
|
||||
# Get Hydrus client and service info
|
||||
client = self._client
|
||||
@@ -483,12 +486,17 @@ class HydrusNetwork(Store):
|
||||
if client is None:
|
||||
debug("add_tag: Hydrus client unavailable")
|
||||
return False
|
||||
|
||||
file_hash = str(file_identifier or "").strip().lower()
|
||||
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
|
||||
debug(f"add_tag: invalid file hash '{file_identifier}'")
|
||||
return False
|
||||
service_name = kwargs.get("service_name") or "my tags"
|
||||
# Ensure tags is a list
|
||||
tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
|
||||
if not tag_list:
|
||||
return False
|
||||
client.add_tag(file_identifier, tag_list, service_name)
|
||||
client.add_tag(file_hash, tag_list, service_name)
|
||||
return True
|
||||
except Exception as exc:
|
||||
debug(f"Hydrus add_tag failed: {exc}")
|
||||
@@ -502,11 +510,16 @@ class HydrusNetwork(Store):
|
||||
if client is None:
|
||||
debug("delete_tag: Hydrus client unavailable")
|
||||
return False
|
||||
|
||||
file_hash = str(file_identifier or "").strip().lower()
|
||||
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
|
||||
debug(f"delete_tag: invalid file hash '{file_identifier}'")
|
||||
return False
|
||||
service_name = kwargs.get("service_name") or "my tags"
|
||||
tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
|
||||
if not tag_list:
|
||||
return False
|
||||
client.delete_tag(file_identifier, tag_list, service_name)
|
||||
client.delete_tag(file_hash, tag_list, service_name)
|
||||
return True
|
||||
except Exception as exc:
|
||||
debug(f"Hydrus delete_tag failed: {exc}")
|
||||
@@ -520,7 +533,12 @@ class HydrusNetwork(Store):
|
||||
if client is None:
|
||||
debug("get_url: Hydrus client unavailable")
|
||||
return []
|
||||
payload = client.fetch_file_metadata(hashes=[str(file_identifier)], include_file_url=True)
|
||||
|
||||
file_hash = str(file_identifier or "").strip().lower()
|
||||
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
|
||||
return []
|
||||
|
||||
payload = client.fetch_file_metadata(hashes=[file_hash], include_file_url=True)
|
||||
items = payload.get("metadata") if isinstance(payload, dict) else None
|
||||
if not isinstance(items, list) or not items:
|
||||
return []
|
||||
@@ -561,6 +579,80 @@ class HydrusNetwork(Store):
|
||||
debug(f"Hydrus delete_url failed: {exc}")
|
||||
return False
|
||||
|
||||
def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
|
||||
"""Get notes for a Hydrus file (default note service only)."""
|
||||
try:
|
||||
client = self._client
|
||||
if client is None:
|
||||
debug("get_note: Hydrus client unavailable")
|
||||
return {}
|
||||
|
||||
file_hash = str(file_identifier or "").strip().lower()
|
||||
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
|
||||
return {}
|
||||
|
||||
payload = client.fetch_file_metadata(hashes=[file_hash], include_notes=True)
|
||||
items = payload.get("metadata") if isinstance(payload, dict) else None
|
||||
if not isinstance(items, list) or not items:
|
||||
return {}
|
||||
meta = items[0] if isinstance(items[0], dict) else None
|
||||
if not isinstance(meta, dict):
|
||||
return {}
|
||||
|
||||
notes_payload = meta.get("notes")
|
||||
if isinstance(notes_payload, dict):
|
||||
return {str(k): str(v or "") for k, v in notes_payload.items() if str(k).strip()}
|
||||
|
||||
return {}
|
||||
except Exception as exc:
|
||||
debug(f"Hydrus get_note failed: {exc}")
|
||||
return {}
|
||||
|
||||
def set_note(self, file_identifier: str, name: str, text: str, **kwargs: Any) -> bool:
|
||||
"""Set a named note for a Hydrus file (default note service only)."""
|
||||
try:
|
||||
client = self._client
|
||||
if client is None:
|
||||
debug("set_note: Hydrus client unavailable")
|
||||
return False
|
||||
|
||||
file_hash = str(file_identifier or "").strip().lower()
|
||||
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
|
||||
return False
|
||||
|
||||
note_name = str(name or "").strip()
|
||||
if not note_name:
|
||||
return False
|
||||
note_text = str(text or "")
|
||||
|
||||
client.set_notes(file_hash, {note_name: note_text})
|
||||
return True
|
||||
except Exception as exc:
|
||||
debug(f"Hydrus set_note failed: {exc}")
|
||||
return False
|
||||
|
||||
def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
|
||||
"""Delete a named note for a Hydrus file (default note service only)."""
|
||||
try:
|
||||
client = self._client
|
||||
if client is None:
|
||||
debug("delete_note: Hydrus client unavailable")
|
||||
return False
|
||||
|
||||
file_hash = str(file_identifier or "").strip().lower()
|
||||
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
|
||||
return False
|
||||
|
||||
note_name = str(name or "").strip()
|
||||
if not note_name:
|
||||
return False
|
||||
|
||||
client.delete_notes(file_hash, [note_name])
|
||||
return True
|
||||
except Exception as exc:
|
||||
debug(f"Hydrus delete_note failed: {exc}")
|
||||
return False
|
||||
|
||||
@staticmethod
|
||||
def _extract_tags_from_hydrus_meta(
|
||||
meta: Dict[str, Any],
|
||||
|
||||
@@ -53,3 +53,21 @@ class Store(ABC):
|
||||
@abstractmethod
|
||||
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
|
||||
"""Get notes for a file.
|
||||
|
||||
Returns a mapping of note name/key -> note text.
|
||||
"""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def set_note(self, file_identifier: str, name: str, text: str, **kwargs: Any) -> bool:
|
||||
"""Add or replace a named note for a file."""
|
||||
raise NotImplementedError
|
||||
|
||||
@abstractmethod
|
||||
def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
|
||||
"""Delete a named note for a file."""
|
||||
raise NotImplementedError
|
||||
|
||||
@@ -24,9 +24,9 @@ import json
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
|
||||
# Import cmdlets system to call get-tag
|
||||
# Import cmdlet system to call get-tag
|
||||
try:
|
||||
from cmdlets import get as get_cmdlet
|
||||
from cmdlet import get as get_cmdlet
|
||||
except ImportError:
|
||||
get_cmdlet = None
|
||||
|
||||
@@ -353,10 +353,10 @@ class DownloadModal(ModalScreen):
|
||||
|
||||
# Import cmdlet system
|
||||
if not get_cmdlet:
|
||||
logger.error("cmdlets module not available")
|
||||
logger.error("cmdlet module not available")
|
||||
self.app.call_from_thread(
|
||||
self.app.notify,
|
||||
"Cmdlets system unavailable",
|
||||
"cmdlet system unavailable",
|
||||
title="Error",
|
||||
severity="error"
|
||||
)
|
||||
@@ -1323,10 +1323,10 @@ class DownloadModal(ModalScreen):
|
||||
|
||||
# Call get-tag cmdlet to scrape URL
|
||||
if not get_cmdlet:
|
||||
logger.error("cmdlets module not available")
|
||||
logger.error("cmdlet module not available")
|
||||
self.app.call_from_thread(
|
||||
self.app.notify,
|
||||
"cmdlets module not available",
|
||||
"cmdlet module not available",
|
||||
title="Error",
|
||||
severity="error"
|
||||
)
|
||||
@@ -1563,13 +1563,13 @@ class DownloadModal(ModalScreen):
|
||||
"""
|
||||
# Import cmdlet system
|
||||
if not get_cmdlet:
|
||||
error_msg = "cmdlets module not available"
|
||||
error_msg = "cmdlet module not available"
|
||||
logger.error(error_msg)
|
||||
if worker:
|
||||
worker.append_stdout(f"❌ ERROR: {error_msg}\n")
|
||||
self.app.call_from_thread(
|
||||
self.app.notify,
|
||||
"Cmdlets system unavailable",
|
||||
"cmdlet system unavailable",
|
||||
title="Error",
|
||||
severity="error"
|
||||
)
|
||||
|
||||
@@ -14,9 +14,9 @@ import asyncio
|
||||
|
||||
# Add parent directory to path for imports
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent))
|
||||
from config import load_config
|
||||
from config import load_config, resolve_output_dir
|
||||
from result_table import ResultTable
|
||||
from Provider.registry import get_search_provider
|
||||
from ProviderCore.registry import get_search_provider
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -236,7 +236,7 @@ class SearchModal(ModalScreen):
|
||||
selected_row = self.results_table.cursor_row
|
||||
if 0 <= selected_row < len(self.current_results):
|
||||
result = self.current_results[selected_row]
|
||||
if result.get("source") == "openlibrary":
|
||||
if getattr(result, "table", "") == "openlibrary":
|
||||
asyncio.create_task(self._download_book(result))
|
||||
else:
|
||||
logger.warning("[search-modal] Download only supported for OpenLibrary results")
|
||||
@@ -330,48 +330,28 @@ class SearchModal(ModalScreen):
|
||||
logger.info(f"[search-modal] Populated tags textarea from result")
|
||||
|
||||
async def _download_book(self, result: Any) -> None:
|
||||
"""Download a book from OpenLibrary using unified downloader."""
|
||||
"""Download a book from OpenLibrary using the provider."""
|
||||
if getattr(result, "table", "") != "openlibrary":
|
||||
logger.warning("[search-modal] Download only supported for OpenLibrary results")
|
||||
return
|
||||
|
||||
try:
|
||||
from Provider.unified_book_downloader import UnifiedBookDownloader
|
||||
from config import load_config
|
||||
|
||||
# Convert SearchResult to dict if needed
|
||||
if hasattr(result, 'to_dict'):
|
||||
result_dict = result.to_dict()
|
||||
# Ensure raw_data is populated for downloader
|
||||
if 'raw_data' not in result_dict and result.full_metadata:
|
||||
result_dict['raw_data'] = result.full_metadata
|
||||
else:
|
||||
result_dict = result
|
||||
|
||||
logger.info(f"[search-modal] Starting download for: {result_dict.get('title')}")
|
||||
|
||||
config = load_config()
|
||||
downloader = UnifiedBookDownloader(config=config)
|
||||
output_dir = resolve_output_dir(config)
|
||||
|
||||
# Get download options for this book
|
||||
options = downloader.get_download_options(result_dict)
|
||||
|
||||
if not options['methods']:
|
||||
logger.warning(f"[search-modal] No download methods available for: {result_dict.get('title')}")
|
||||
# Could show a modal dialog here
|
||||
provider = get_search_provider("openlibrary", config=config)
|
||||
if not provider:
|
||||
logger.error("[search-modal] Provider not available: openlibrary")
|
||||
return
|
||||
|
||||
# For now, use the first available method (we could show a dialog to choose)
|
||||
method = options['methods'][0]
|
||||
logger.info(f"[search-modal] Using download method: {method.get('label')}")
|
||||
title = getattr(result, "title", "")
|
||||
logger.info(f"[search-modal] Starting download for: {title}")
|
||||
|
||||
# Perform the download
|
||||
success, message = await downloader.download_book(method)
|
||||
|
||||
if success:
|
||||
logger.info(f"[search-modal] Download successful: {message}")
|
||||
# Could show success dialog
|
||||
downloaded = await asyncio.to_thread(provider.download, result, output_dir)
|
||||
if downloaded:
|
||||
logger.info(f"[search-modal] Download successful: {downloaded}")
|
||||
else:
|
||||
logger.warning(f"[search-modal] Download failed: {message}")
|
||||
# Could show error dialog
|
||||
|
||||
downloader.close()
|
||||
logger.warning(f"[search-modal] Download failed for: {title}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"[search-modal] Download error: {e}", exc_info=True)
|
||||
|
||||
@@ -23,7 +23,7 @@ for path in (ROOT_DIR, BASE_DIR):
|
||||
sys.path.insert(0, str_path)
|
||||
|
||||
import pipeline as ctx
|
||||
from cmdlets import REGISTRY
|
||||
from cmdlet import REGISTRY
|
||||
from config import get_local_storage_path, load_config
|
||||
from SYS.worker_manager import WorkerManager
|
||||
|
||||
|
||||
166
cli_syntax.py
Normal file
166
cli_syntax.py
Normal file
@@ -0,0 +1,166 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
import re
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class SyntaxErrorDetail:
|
||||
message: str
|
||||
expected: Optional[str] = None
|
||||
|
||||
|
||||
def validate_pipeline_text(text: str) -> Optional[SyntaxErrorDetail]:
|
||||
"""Validate raw CLI input before tokenization/execution.
|
||||
|
||||
This is intentionally lightweight and focuses on user-facing syntax issues:
|
||||
- Unbalanced single/double quotes
|
||||
- Dangling or empty pipeline stages (|)
|
||||
|
||||
Returns:
|
||||
None if valid, otherwise a SyntaxErrorDetail describing the issue.
|
||||
"""
|
||||
if text is None:
|
||||
return SyntaxErrorDetail("Empty command")
|
||||
|
||||
raw = text.strip()
|
||||
if not raw:
|
||||
return SyntaxErrorDetail("Empty command")
|
||||
|
||||
in_single = False
|
||||
in_double = False
|
||||
escaped = False
|
||||
last_pipe_outside_quotes: Optional[int] = None
|
||||
|
||||
for idx, ch in enumerate(raw):
|
||||
if escaped:
|
||||
escaped = False
|
||||
continue
|
||||
|
||||
if ch == "\\" and (in_single or in_double):
|
||||
escaped = True
|
||||
continue
|
||||
|
||||
if ch == '"' and not in_single:
|
||||
in_double = not in_double
|
||||
continue
|
||||
|
||||
if ch == "'" and not in_double:
|
||||
in_single = not in_single
|
||||
continue
|
||||
|
||||
if ch == "|" and not in_single and not in_double:
|
||||
# Record pipe locations to catch empty stages/dangling pipe.
|
||||
if last_pipe_outside_quotes is not None and last_pipe_outside_quotes == idx - 1:
|
||||
return SyntaxErrorDetail("Syntax error: empty pipeline stage (found '||').")
|
||||
last_pipe_outside_quotes = idx
|
||||
|
||||
if in_double:
|
||||
return SyntaxErrorDetail('Syntax error: missing closing ' + '"' + '.', expected='"')
|
||||
if in_single:
|
||||
return SyntaxErrorDetail("Syntax error: missing closing '.", expected="'")
|
||||
|
||||
# Dangling pipe at end / pipe as first non-space character
|
||||
if raw.startswith("|"):
|
||||
return SyntaxErrorDetail("Syntax error: pipeline cannot start with '|'.")
|
||||
if raw.endswith("|"):
|
||||
return SyntaxErrorDetail("Syntax error: pipeline cannot end with '|'.")
|
||||
|
||||
# Empty stage like "cmd1 | | cmd2" (spaces between pipes)
|
||||
if "|" in raw:
|
||||
# Simple pass: look for pipes that have only whitespace between them.
|
||||
# We only check outside quotes by re-scanning and counting non-space chars between pipes.
|
||||
in_single = False
|
||||
in_double = False
|
||||
escaped = False
|
||||
seen_nonspace_since_pipe = True # start true to allow leading command
|
||||
for ch in raw:
|
||||
if escaped:
|
||||
escaped = False
|
||||
continue
|
||||
if ch == "\\" and (in_single or in_double):
|
||||
escaped = True
|
||||
continue
|
||||
if ch == '"' and not in_single:
|
||||
in_double = not in_double
|
||||
continue
|
||||
if ch == "'" and not in_double:
|
||||
in_single = not in_single
|
||||
continue
|
||||
if ch == "|" and not in_single and not in_double:
|
||||
if not seen_nonspace_since_pipe:
|
||||
return SyntaxErrorDetail("Syntax error: empty pipeline stage (use a command between '|').")
|
||||
seen_nonspace_since_pipe = False
|
||||
continue
|
||||
if not in_single and not in_double and not ch.isspace():
|
||||
seen_nonspace_since_pipe = True
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def parse_query(query: str) -> Dict[str, Any]:
|
||||
"""Parse a query string into field:value pairs and free text.
|
||||
|
||||
Supports syntax like:
|
||||
- isbn:0557677203
|
||||
- author:"Albert Pike"
|
||||
- title:"Morals and Dogma" year:2010
|
||||
- Mixed with free text: Morals isbn:0557677203
|
||||
|
||||
Returns:
|
||||
Dict with keys:
|
||||
- fields: Dict[str, str]
|
||||
- text: str
|
||||
- raw: str
|
||||
"""
|
||||
|
||||
result: Dict[str, Any] = {
|
||||
"fields": {},
|
||||
"text": "",
|
||||
"raw": query,
|
||||
}
|
||||
|
||||
if not query or not query.strip():
|
||||
return result
|
||||
|
||||
raw = query.strip()
|
||||
remaining_parts: list[str] = []
|
||||
|
||||
# Match field:value where value is either a quoted string or a non-space token.
|
||||
pattern = r'(\w+):(?:"([^"]*)"|(\S+))'
|
||||
|
||||
pos = 0
|
||||
for match in re.finditer(pattern, raw):
|
||||
if match.start() > pos:
|
||||
before_text = raw[pos : match.start()].strip()
|
||||
if before_text:
|
||||
remaining_parts.append(before_text)
|
||||
|
||||
field_name = (match.group(1) or "").lower()
|
||||
field_value = match.group(2) if match.group(2) is not None else match.group(3)
|
||||
if field_name:
|
||||
result["fields"][field_name] = field_value
|
||||
|
||||
pos = match.end()
|
||||
|
||||
if pos < len(raw):
|
||||
remaining_text = raw[pos:].strip()
|
||||
if remaining_text:
|
||||
remaining_parts.append(remaining_text)
|
||||
|
||||
result["text"] = " ".join(remaining_parts)
|
||||
return result
|
||||
|
||||
|
||||
def get_field(parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None) -> Optional[str]:
|
||||
"""Get a field value from a parsed query."""
|
||||
|
||||
return parsed_query.get("fields", {}).get((field_name or "").lower(), default)
|
||||
|
||||
|
||||
def get_free_text(parsed_query: Dict[str, Any]) -> str:
|
||||
"""Get the free-text portion of a parsed query."""
|
||||
|
||||
return str(parsed_query.get("text", "") or "")
|
||||
@@ -10,6 +10,24 @@ Cmdlet = Callable[[Any, Sequence[str], Dict[str, Any]], int]
|
||||
REGISTRY: Dict[str, Cmdlet] = {}
|
||||
|
||||
|
||||
def _normalize_cmd_name(name: str) -> str:
|
||||
return str(name or "").replace('_', '-').lower().strip()
|
||||
|
||||
|
||||
def register_callable(names: Iterable[str], fn: Cmdlet) -> Cmdlet:
|
||||
"""Register a callable under one or more command names.
|
||||
|
||||
This is the single registration mechanism used by both:
|
||||
- legacy function cmdlet (decorator form)
|
||||
- class-based cmdlet (Cmdlet.register())
|
||||
"""
|
||||
for name in names:
|
||||
key = _normalize_cmd_name(name)
|
||||
if key:
|
||||
REGISTRY[key] = fn
|
||||
return fn
|
||||
|
||||
|
||||
def register(names: Iterable[str]):
|
||||
"""Decorator to register a function under one or more command names.
|
||||
|
||||
@@ -18,18 +36,16 @@ def register(names: Iterable[str]):
|
||||
def _run(result, args, config) -> int: ...
|
||||
"""
|
||||
def _wrap(fn: Cmdlet) -> Cmdlet:
|
||||
for name in names:
|
||||
REGISTRY[name.replace('_', '-').lower()] = fn
|
||||
return fn
|
||||
return register_callable(names, fn)
|
||||
return _wrap
|
||||
|
||||
|
||||
def get(cmd_name: str) -> Cmdlet | None:
|
||||
return REGISTRY.get(cmd_name.replace('_', '-').lower())
|
||||
return REGISTRY.get(_normalize_cmd_name(cmd_name))
|
||||
|
||||
|
||||
# Dynamically import all cmdlet modules in this directory (ignore files starting with _ and __init__.py)
|
||||
# Cmdlets self-register when instantiated via their __init__ method
|
||||
# cmdlet self-register when instantiated via their __init__ method
|
||||
import os
|
||||
cmdlet_dir = os.path.dirname(__file__)
|
||||
for filename in os.listdir(cmdlet_dir):
|
||||
@@ -43,7 +59,7 @@ for filename in os.listdir(cmdlet_dir):
|
||||
mod_name = filename[:-3]
|
||||
|
||||
# Enforce Powershell-style two-word cmdlet naming (e.g., add_file, get_file)
|
||||
# Skip native/utility scripts that are not cmdlets (e.g., adjective, worker, matrix, pipe)
|
||||
# Skip native/utility scripts that are not cmdlet (e.g., adjective, worker, matrix, pipe)
|
||||
if "_" not in mod_name:
|
||||
continue
|
||||
|
||||
@@ -54,15 +70,15 @@ for filename in os.listdir(cmdlet_dir):
|
||||
print(f"Error importing cmdlet '{mod_name}': {e}", file=sys.stderr)
|
||||
continue
|
||||
|
||||
# Import and register native commands that are not considered cmdlets
|
||||
# Import and register native commands that are not considered cmdlet
|
||||
try:
|
||||
from cmdnats import register_native_commands as _register_native_commands
|
||||
from cmdnat import register_native_commands as _register_native_commands
|
||||
_register_native_commands(REGISTRY)
|
||||
except Exception:
|
||||
# Native commands are optional; ignore if unavailable
|
||||
pass
|
||||
|
||||
# Import root-level modules that also register cmdlets
|
||||
# Import root-level modules that also register cmdlet
|
||||
for _root_mod in ("select_cmdlet",):
|
||||
try:
|
||||
_import_module(_root_mod)
|
||||
@@ -70,7 +86,7 @@ for _root_mod in ("select_cmdlet",):
|
||||
# Allow missing optional modules
|
||||
continue
|
||||
|
||||
# Also import helper modules that register cmdlets
|
||||
# Also import helper modules that register cmdlet
|
||||
try:
|
||||
import API.alldebrid as _alldebrid
|
||||
except Exception:
|
||||
@@ -10,7 +10,7 @@ from collections.abc import Iterable as IterableABC
|
||||
|
||||
from SYS.logger import log, debug
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Iterable, List, Optional, Sequence, Set
|
||||
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Set
|
||||
from dataclasses import dataclass, field
|
||||
import models
|
||||
|
||||
@@ -94,15 +94,15 @@ class CmdletArg:
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# SHARED ARGUMENTS - Reusable argument definitions across cmdlets
|
||||
# SHARED ARGUMENTS - Reusable argument definitions across cmdlet
|
||||
# ============================================================================
|
||||
|
||||
class SharedArgs:
|
||||
"""Registry of shared CmdletArg definitions used across multiple cmdlets.
|
||||
"""Registry of shared CmdletArg definitions used across multiple cmdlet.
|
||||
|
||||
This class provides a centralized location for common arguments so they're
|
||||
defined once and used consistently everywhere. Reduces duplication and ensures
|
||||
all cmdlets handle the same arguments identically.
|
||||
all cmdlet handle the same arguments identically.
|
||||
|
||||
Example:
|
||||
CMDLET = Cmdlet(
|
||||
@@ -367,8 +367,8 @@ class Cmdlet:
|
||||
"""List of arguments accepted by this cmdlet"""
|
||||
detail: List[str] = field(default_factory=list)
|
||||
"""Detailed explanation lines (for help text)"""
|
||||
exec: Optional[Any] = field(default=None)
|
||||
"""The execution function: func(result, args, config) -> int"""
|
||||
# Execution function: func(result, args, config) -> int
|
||||
exec: Optional[Callable[[Any, Sequence[str], Dict[str, Any]], int]] = field(default=None)
|
||||
|
||||
|
||||
|
||||
@@ -399,7 +399,7 @@ class Cmdlet:
|
||||
if not callable(self.exec):
|
||||
return self
|
||||
try:
|
||||
from . import register as _register # Local import to avoid circular import cost
|
||||
from . import register_callable as _register_callable # Local import to avoid circular import cost
|
||||
except Exception:
|
||||
return self
|
||||
|
||||
@@ -407,7 +407,7 @@ class Cmdlet:
|
||||
if not names:
|
||||
return self
|
||||
|
||||
_register(names)(self.exec)
|
||||
_register_callable(names, self.exec)
|
||||
return self
|
||||
|
||||
def get_flags(self, arg_name: str) -> set[str]:
|
||||
@@ -599,8 +599,14 @@ def normalize_hash(hash_hex: Optional[str]) -> Optional[str]:
|
||||
"""
|
||||
if not isinstance(hash_hex, str):
|
||||
return None
|
||||
text = hash_hex.strip()
|
||||
return text.lower() if text else None
|
||||
text = hash_hex.strip().lower()
|
||||
if not text:
|
||||
return None
|
||||
if len(text) != 64:
|
||||
return None
|
||||
if not all(ch in "0123456789abcdef" for ch in text):
|
||||
return None
|
||||
return text
|
||||
|
||||
|
||||
def get_hash_for_operation(override_hash: Optional[str], result: Any, field_name: str = "hash") -> Optional[str]:
|
||||
@@ -669,7 +675,7 @@ def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any:
|
||||
Handles both dict.get(field) and getattr(obj, field) access patterns.
|
||||
Also handles lists by accessing the first element.
|
||||
For PipeObjects, checks the extra field as well.
|
||||
Used throughout cmdlets to uniformly access fields from mixed types.
|
||||
Used throughout cmdlet to uniformly access fields from mixed types.
|
||||
|
||||
Args:
|
||||
obj: Dict, object, or list to extract from
|
||||
@@ -705,7 +711,7 @@ def get_field(obj: Any, field: str, default: Optional[Any] = None) -> Any:
|
||||
def should_show_help(args: Sequence[str]) -> bool:
|
||||
"""Check if help flag was passed in arguments.
|
||||
|
||||
Consolidates repeated pattern of checking for help flags across cmdlets.
|
||||
Consolidates repeated pattern of checking for help flags across cmdlet.
|
||||
|
||||
Args:
|
||||
args: Command arguments to check
|
||||
@@ -1077,7 +1083,7 @@ def apply_preferred_title(tags: List[str], preferred: Optional[str]) -> List[str
|
||||
|
||||
|
||||
# ============================================================================
|
||||
# PIPEOBJECT UTILITIES (for chainable cmdlets and multi-action pipelines)
|
||||
# PIPEOBJECT UTILITIES (for chainable cmdlet and multi-action pipelines)
|
||||
# ============================================================================
|
||||
|
||||
def create_pipe_object_result(
|
||||
@@ -1095,7 +1101,7 @@ def create_pipe_object_result(
|
||||
"""Create a PipeObject-compatible result dict for pipeline chaining.
|
||||
|
||||
This is a helper to emit results in the standard format that downstream
|
||||
cmdlets can process (filter, tag, cleanup, etc.).
|
||||
cmdlet can process (filter, tag, cleanup, etc.).
|
||||
|
||||
Args:
|
||||
source: Source system (e.g., 'local', 'hydrus', 'download')
|
||||
@@ -1350,7 +1356,7 @@ def collapse_namespace_tags(tags: Optional[Iterable[Any]], namespace: str, prefe
|
||||
def collapse_namespace_tag(tags: Optional[Iterable[Any]], namespace: str, prefer: str = "last") -> list[str]:
|
||||
"""Singular alias for collapse_namespace_tags.
|
||||
|
||||
Some cmdlets prefer the singular name; keep behavior centralized.
|
||||
Some cmdlet prefer the singular name; keep behavior centralized.
|
||||
"""
|
||||
return collapse_namespace_tags(tags, namespace, prefer=prefer)
|
||||
|
||||
@@ -1643,7 +1649,7 @@ def coerce_to_pipe_object(value: Any, default_path: Optional[str] = None) -> mod
|
||||
def register_url_with_local_library(pipe_obj: models.PipeObject, config: Dict[str, Any]) -> bool:
|
||||
"""Register url with a file in the local library database.
|
||||
|
||||
This is called automatically by download cmdlets to ensure url are persisted
|
||||
This is called automatically by download cmdlet to ensure url are persisted
|
||||
without requiring a separate add-url step in the pipeline.
|
||||
|
||||
Args:
|
||||
@@ -350,7 +350,7 @@ class Add_File(Cmdlet):
|
||||
"""Delegate URL handling to download-media cmdlet."""
|
||||
log(f"Target is a URL, delegating to download-media: {url_str}", file=sys.stderr)
|
||||
# Reuse the globally-registered cmdlet instance to avoid duplicative registration
|
||||
from cmdlets.download_media import CMDLET as dl_cmdlet
|
||||
from cmdlet.download_media import CMDLET as dl_cmdlet
|
||||
dl_args = list(args) if args else []
|
||||
|
||||
# Add the URL to the argument list for download-media
|
||||
@@ -615,7 +615,7 @@ class Add_File(Cmdlet):
|
||||
"""
|
||||
try:
|
||||
import asyncio
|
||||
from Provider.registry import download_soulseek_file
|
||||
from ProviderCore.registry import download_soulseek_file
|
||||
from pathlib import Path
|
||||
|
||||
# Extract metadata from result
|
||||
@@ -684,7 +684,7 @@ class Add_File(Cmdlet):
|
||||
delete_after: bool,
|
||||
) -> int:
|
||||
"""Handle uploading to a file provider (e.g. 0x0)."""
|
||||
from Provider.registry import get_file_provider
|
||||
from ProviderCore.registry import get_file_provider
|
||||
|
||||
log(f"Uploading via {provider_name}: {media_path.name}", file=sys.stderr)
|
||||
|
||||
148
cmdlet/add_note.py
Normal file
148
cmdlet/add_note.py
Normal file
@@ -0,0 +1,148 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Sequence
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
import pipeline as ctx
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
SharedArgs,
|
||||
normalize_hash,
|
||||
parse_cmdlet_args,
|
||||
normalize_result_input,
|
||||
should_show_help,
|
||||
)
|
||||
from Store import Store
|
||||
from SYS.utils import sha256_file
|
||||
|
||||
|
||||
class Add_Note(Cmdlet):
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="add-note",
|
||||
summary="Add or set a named note on a file in a store.",
|
||||
usage="add-note -store <store> [-hash <sha256>] <name> <text...>",
|
||||
alias=["set-note", "add_note"],
|
||||
arg=[
|
||||
SharedArgs.STORE,
|
||||
SharedArgs.HASH,
|
||||
CmdletArg("name", type="string", required=True, description="The note name/key to set (e.g. 'comment', 'lyric')."),
|
||||
CmdletArg("text", type="string", required=True, description="Note text/content to store.", variadic=True),
|
||||
],
|
||||
detail=[
|
||||
"- Notes are stored via the selected store backend.",
|
||||
"- For lyrics: store LRC text in a note named 'lyric'.",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
# Populate dynamic store choices for autocomplete
|
||||
try:
|
||||
SharedArgs.STORE.choices = SharedArgs.get_store_choices(None)
|
||||
except Exception:
|
||||
pass
|
||||
self.register()
|
||||
|
||||
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
|
||||
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
|
||||
if resolved:
|
||||
return resolved
|
||||
|
||||
if raw_path:
|
||||
try:
|
||||
p = Path(str(raw_path))
|
||||
stem = p.stem
|
||||
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
|
||||
return stem.lower()
|
||||
if p.exists() and p.is_file():
|
||||
return sha256_file(p)
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
|
||||
return 0
|
||||
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
store_override = parsed.get("store")
|
||||
hash_override = parsed.get("hash")
|
||||
note_name = str(parsed.get("name") or "").strip()
|
||||
text_parts = parsed.get("text")
|
||||
|
||||
if not note_name:
|
||||
log("[add_note] Error: Requires <name>", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if isinstance(text_parts, list):
|
||||
note_text = " ".join([str(p) for p in text_parts]).strip()
|
||||
else:
|
||||
note_text = str(text_parts or "").strip()
|
||||
|
||||
if not note_text:
|
||||
log("[add_note] Error: Empty note text", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
results = normalize_result_input(result)
|
||||
if not results:
|
||||
if store_override and normalize_hash(hash_override):
|
||||
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
|
||||
else:
|
||||
log("[add_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
store_registry = Store(config)
|
||||
updated = 0
|
||||
|
||||
for res in results:
|
||||
if not isinstance(res, dict):
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
store_name = str(store_override or res.get("store") or "").strip()
|
||||
raw_hash = res.get("hash")
|
||||
raw_path = res.get("path")
|
||||
|
||||
if not store_name:
|
||||
log("[add_note] Error: Missing -store and item has no store field", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
resolved_hash = self._resolve_hash(
|
||||
raw_hash=str(raw_hash) if raw_hash else None,
|
||||
raw_path=str(raw_path) if raw_path else None,
|
||||
override_hash=str(hash_override) if hash_override else None,
|
||||
)
|
||||
if not resolved_hash:
|
||||
log("[add_note] Warning: Item missing usable hash; skipping", file=sys.stderr)
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
try:
|
||||
backend = store_registry[store_name]
|
||||
except Exception as exc:
|
||||
log(f"[add_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
ok = False
|
||||
try:
|
||||
ok = bool(backend.set_note(resolved_hash, note_name, note_text, config=config))
|
||||
except Exception as exc:
|
||||
log(f"[add_note] Error: Failed to set note: {exc}", file=sys.stderr)
|
||||
ok = False
|
||||
|
||||
if ok:
|
||||
updated += 1
|
||||
|
||||
ctx.emit(res)
|
||||
|
||||
log(f"[add_note] Updated {updated} item(s)", file=sys.stderr)
|
||||
return 0 if updated > 0 else 1
|
||||
|
||||
|
||||
CMDLET = Add_Note()
|
||||
|
||||
@@ -10,7 +10,6 @@ import sys
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
from . import register
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from API import HydrusNetwork as hydrus_wrapper
|
||||
@@ -144,10 +143,18 @@ def _resolve_king_reference(king_arg: str) -> Optional[str]:
|
||||
def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None:
|
||||
"""If the current subject matches the target, refresh relationships via get-relationship."""
|
||||
try:
|
||||
from cmdlets import get_relationship as get_rel_cmd # type: ignore
|
||||
from cmdlet import get as get_cmdlet # type: ignore
|
||||
except Exception:
|
||||
return
|
||||
|
||||
get_relationship = None
|
||||
try:
|
||||
get_relationship = get_cmdlet("get-relationship")
|
||||
except Exception:
|
||||
get_relationship = None
|
||||
if not callable(get_relationship):
|
||||
return
|
||||
|
||||
try:
|
||||
subject = ctx.get_last_result_subject()
|
||||
if subject is None:
|
||||
@@ -179,12 +186,11 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat
|
||||
refresh_args: list[str] = []
|
||||
if target_hash:
|
||||
refresh_args.extend(["-hash", target_hash])
|
||||
get_rel_cmd._run(subject, refresh_args, config)
|
||||
get_relationship(subject, refresh_args, config)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
@register(["add-relationship", "add-rel"]) # primary name and alias
|
||||
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Associate file relationships in Hydrus.
|
||||
|
||||
@@ -196,24 +202,21 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""
|
||||
# Help
|
||||
if should_show_help(_args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
# Parse arguments using CMDLET spec
|
||||
parsed = parse_cmdlet_args(_args, CMDLET)
|
||||
arg_path: Optional[Path] = None
|
||||
king_arg = parsed.get("king") # New: explicit king argument
|
||||
rel_type = parsed.get("type", "alt") # New: relationship type (default: alt)
|
||||
king_arg = parsed.get("king")
|
||||
rel_type = parsed.get("type", "alt")
|
||||
|
||||
if parsed:
|
||||
# Get the first arg value (e.g., -path)
|
||||
first_arg_name = CMDLET.get("args", [{}])[0].get("name") if CMDLET.get("args") else None
|
||||
if first_arg_name and first_arg_name in parsed:
|
||||
arg_value = parsed[first_arg_name]
|
||||
try:
|
||||
arg_path = Path(str(arg_value)).expanduser()
|
||||
except Exception:
|
||||
arg_path = Path(str(arg_value))
|
||||
raw_path = parsed.get("path")
|
||||
if raw_path:
|
||||
try:
|
||||
arg_path = Path(str(raw_path)).expanduser()
|
||||
except Exception:
|
||||
arg_path = Path(str(raw_path))
|
||||
|
||||
# Handle @N selection which creates a list
|
||||
# Use normalize_result_input to handle both single items and lists
|
||||
@@ -481,3 +484,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
return 1
|
||||
|
||||
|
||||
# Register cmdlet (no legacy decorator)
|
||||
CMDLET.exec = _run
|
||||
CMDLET.alias = ["add-rel"]
|
||||
CMDLET.register()
|
||||
|
||||
|
||||
@@ -66,21 +66,37 @@ def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None:
|
||||
res["columns"] = updated
|
||||
|
||||
|
||||
def _matches_target(item: Any, target_hash: Optional[str], target_path: Optional[str]) -> bool:
|
||||
"""Determine whether a result item refers to the given hash/path target (canonical fields only)."""
|
||||
def _matches_target(
|
||||
item: Any,
|
||||
target_hash: Optional[str],
|
||||
target_path: Optional[str],
|
||||
target_store: Optional[str] = None,
|
||||
) -> bool:
|
||||
"""Determine whether a result item refers to the given target.
|
||||
|
||||
Important: hashes can collide across backends in this app's UX (same media in
|
||||
multiple stores). When target_store is provided, it must match too.
|
||||
"""
|
||||
|
||||
def norm(val: Any) -> Optional[str]:
|
||||
return str(val).lower() if val is not None else None
|
||||
|
||||
target_hash_l = target_hash.lower() if target_hash else None
|
||||
target_path_l = target_path.lower() if target_path else None
|
||||
target_store_l = target_store.lower() if target_store else None
|
||||
|
||||
if isinstance(item, dict):
|
||||
hashes = [norm(item.get("hash"))]
|
||||
paths = [norm(item.get("path"))]
|
||||
stores = [norm(item.get("store"))]
|
||||
else:
|
||||
hashes = [norm(get_field(item, "hash"))]
|
||||
paths = [norm(get_field(item, "path"))]
|
||||
stores = [norm(get_field(item, "store"))]
|
||||
|
||||
if target_store_l:
|
||||
if target_store_l not in stores:
|
||||
return False
|
||||
|
||||
if target_hash_l and target_hash_l in hashes:
|
||||
return True
|
||||
@@ -118,7 +134,12 @@ def _update_item_title_fields(item: Any, new_title: str) -> None:
|
||||
item["columns"] = updated_cols
|
||||
|
||||
|
||||
def _refresh_result_table_title(new_title: str, target_hash: Optional[str], target_path: Optional[str]) -> None:
|
||||
def _refresh_result_table_title(
|
||||
new_title: str,
|
||||
target_hash: Optional[str],
|
||||
target_store: Optional[str],
|
||||
target_path: Optional[str],
|
||||
) -> None:
|
||||
"""Refresh the cached result table with an updated title and redisplay it."""
|
||||
try:
|
||||
last_table = ctx.get_last_result_table()
|
||||
@@ -130,7 +151,7 @@ def _refresh_result_table_title(new_title: str, target_hash: Optional[str], targ
|
||||
match_found = False
|
||||
for item in items:
|
||||
try:
|
||||
if _matches_target(item, target_hash, target_path):
|
||||
if _matches_target(item, target_hash, target_path, target_store):
|
||||
_update_item_title_fields(item, new_title)
|
||||
match_found = True
|
||||
except Exception:
|
||||
@@ -154,7 +175,7 @@ def _refresh_result_table_title(new_title: str, target_hash: Optional[str], targ
|
||||
def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional[str], target_path: Optional[str], config: Dict[str, Any]) -> None:
|
||||
"""Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh."""
|
||||
try:
|
||||
from cmdlets import get_tag as get_tag_cmd # type: ignore
|
||||
from cmdlet import get as get_cmdlet # type: ignore
|
||||
except Exception:
|
||||
return
|
||||
|
||||
@@ -163,16 +184,24 @@ def _refresh_tag_view(res: Any, target_hash: Optional[str], store_name: Optional
|
||||
|
||||
refresh_args: List[str] = ["-hash", target_hash, "-store", store_name]
|
||||
|
||||
get_tag = None
|
||||
try:
|
||||
get_tag = get_cmdlet("get-tag")
|
||||
except Exception:
|
||||
get_tag = None
|
||||
if not callable(get_tag):
|
||||
return
|
||||
|
||||
try:
|
||||
subject = ctx.get_last_result_subject()
|
||||
if subject and _matches_target(subject, target_hash, target_path):
|
||||
get_tag_cmd._run(subject, refresh_args, config)
|
||||
if subject and _matches_target(subject, target_hash, target_path, store_name):
|
||||
get_tag(subject, refresh_args, config)
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
get_tag_cmd._run(res, refresh_args, config)
|
||||
get_tag(res, refresh_args, config)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -187,12 +216,12 @@ class Add_Tag(Cmdlet):
|
||||
summary="Add tag to a file in a store.",
|
||||
usage="add-tag -store <store> [-hash <sha256>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
|
||||
arg=[
|
||||
CmdletArg("tag", type="string", required=False, description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.", variadic=True),
|
||||
SharedArgs.HASH,
|
||||
SharedArgs.STORE,
|
||||
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
|
||||
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
|
||||
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tag non-temporary files)."),
|
||||
CmdletArg("tag", type="string", required=False, description="One or more tag to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tag from pipeline payload.", variadic=True),
|
||||
],
|
||||
detail=[
|
||||
"- By default, only tag non-temporary files (from pipelines). Use --all to tag everything.",
|
||||
@@ -406,15 +435,19 @@ class Add_Tag(Cmdlet):
|
||||
changed = False
|
||||
if removed_namespace_tag:
|
||||
try:
|
||||
backend.delete_tag(resolved_hash, removed_namespace_tag, config=config)
|
||||
changed = True
|
||||
ok_del = backend.delete_tag(resolved_hash, removed_namespace_tag, config=config)
|
||||
if ok_del:
|
||||
changed = True
|
||||
except Exception as exc:
|
||||
log(f"[add_tag] Warning: Failed deleting namespace tag: {exc}", file=sys.stderr)
|
||||
|
||||
if actual_tag_to_add:
|
||||
try:
|
||||
backend.add_tag(resolved_hash, actual_tag_to_add, config=config)
|
||||
changed = True
|
||||
ok_add = backend.add_tag(resolved_hash, actual_tag_to_add, config=config)
|
||||
if ok_add:
|
||||
changed = True
|
||||
else:
|
||||
log("[add_tag] Warning: Store rejected tag update", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
log(f"[add_tag] Warning: Failed adding tag: {exc}", file=sys.stderr)
|
||||
|
||||
@@ -438,7 +471,7 @@ class Add_Tag(Cmdlet):
|
||||
_apply_title_to_result(res, final_title)
|
||||
|
||||
if final_title and (not original_title or final_title.lower() != original_title.lower()):
|
||||
_refresh_result_table_title(final_title, resolved_hash, raw_path)
|
||||
_refresh_result_table_title(final_title, resolved_hash, str(store_name), raw_path)
|
||||
|
||||
if changed:
|
||||
_refresh_tag_view(res, resolved_hash, str(store_name), raw_path, config)
|
||||
456
cmdlet/add_tags.py
Normal file
456
cmdlet/add_tags.py
Normal file
@@ -0,0 +1,456 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, List, Sequence, Optional
|
||||
from pathlib import Path
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from ._shared import normalize_result_input, filter_results_by_temp
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
SharedArgs,
|
||||
normalize_hash,
|
||||
parse_tag_arguments,
|
||||
expand_tag_groups,
|
||||
parse_cmdlet_args,
|
||||
collapse_namespace_tags,
|
||||
should_show_help,
|
||||
get_field,
|
||||
)
|
||||
from Store import Store
|
||||
from SYS.utils import sha256_file
|
||||
|
||||
|
||||
def _extract_title_tag(tags: List[str]) -> Optional[str]:
|
||||
"""Return the value of the first title: tag if present."""
|
||||
for tag in tags:
|
||||
if isinstance(tag, str) and tag.lower().startswith("title:"):
|
||||
value = tag.split(":", 1)[1].strip()
|
||||
if value:
|
||||
return value
|
||||
return None
|
||||
|
||||
|
||||
def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None:
|
||||
"""Update result object/dict title fields and columns in-place."""
|
||||
if not title_value:
|
||||
return
|
||||
if isinstance(res, models.PipeObject):
|
||||
res.title = title_value
|
||||
# Update columns if present (Title column assumed index 0)
|
||||
if hasattr(res, "columns") and isinstance(res.columns, list) and res.columns:
|
||||
label, *_ = res.columns[0]
|
||||
if str(label).lower() == "title":
|
||||
res.columns[0] = (res.columns[0][0], title_value)
|
||||
elif isinstance(res, dict):
|
||||
res["title"] = title_value
|
||||
cols = res.get("columns")
|
||||
if isinstance(cols, list):
|
||||
updated = []
|
||||
changed = False
|
||||
for col in cols:
|
||||
if isinstance(col, tuple) and len(col) == 2:
|
||||
label, val = col
|
||||
if str(label).lower() == "title":
|
||||
updated.append((label, title_value))
|
||||
changed = True
|
||||
else:
|
||||
updated.append(col)
|
||||
else:
|
||||
updated.append(col)
|
||||
if changed:
|
||||
res["columns"] = updated
|
||||
|
||||
|
||||
def _matches_target(item: Any, target_hash: Optional[str], target_path: Optional[str]) -> bool:
|
||||
"""Determine whether a result item refers to the given hash/path target (canonical fields only)."""
|
||||
|
||||
def norm(val: Any) -> Optional[str]:
|
||||
return str(val).lower() if val is not None else None
|
||||
|
||||
target_hash_l = target_hash.lower() if target_hash else None
|
||||
target_path_l = target_path.lower() if target_path else None
|
||||
|
||||
if isinstance(item, dict):
|
||||
hashes = [norm(item.get("hash"))]
|
||||
paths = [norm(item.get("path"))]
|
||||
else:
|
||||
hashes = [norm(get_field(item, "hash"))]
|
||||
paths = [norm(get_field(item, "path"))]
|
||||
|
||||
if target_hash_l and target_hash_l in hashes:
|
||||
return True
|
||||
if target_path_l and target_path_l in paths:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def _update_item_title_fields(item: Any, new_title: str) -> None:
|
||||
"""Mutate an item to reflect a new title in plain fields and columns."""
|
||||
if isinstance(item, models.PipeObject):
|
||||
item.title = new_title
|
||||
if hasattr(item, "columns") and isinstance(item.columns, list) and item.columns:
|
||||
label, *_ = item.columns[0]
|
||||
if str(label).lower() == "title":
|
||||
item.columns[0] = (label, new_title)
|
||||
elif isinstance(item, dict):
|
||||
item["title"] = new_title
|
||||
cols = item.get("columns")
|
||||
if isinstance(cols, list):
|
||||
updated_cols = []
|
||||
changed = False
|
||||
for col in cols:
|
||||
if isinstance(col, tuple) and len(col) == 2:
|
||||
label, val = col
|
||||
if str(label).lower() == "title":
|
||||
updated_cols.append((label, new_title))
|
||||
changed = True
|
||||
else:
|
||||
updated_cols.append(col)
|
||||
else:
|
||||
updated_cols.append(col)
|
||||
if changed:
|
||||
item["columns"] = updated_cols
|
||||
|
||||
|
||||
def _refresh_result_table_title(new_title: str, target_hash: Optional[str], target_path: Optional[str]) -> None:
|
||||
"""Refresh the cached result table with an updated title and redisplay it."""
|
||||
try:
|
||||
last_table = ctx.get_last_result_table()
|
||||
items = ctx.get_last_result_items()
|
||||
if not last_table or not items:
|
||||
return
|
||||
|
||||
updated_items = []
|
||||
match_found = False
|
||||
for item in items:
|
||||
try:
|
||||
if _matches_target(item, target_hash, target_path):
|
||||
_update_item_title_fields(item, new_title)
|
||||
match_found = True
|
||||
except Exception:
|
||||
pass
|
||||
updated_items.append(item)
|
||||
if not match_found:
|
||||
return
|
||||
|
||||
from result_table import ResultTable # Local import to avoid circular dependency
|
||||
|
||||
new_table = last_table.copy_with_title(getattr(last_table, "title", ""))
|
||||
|
||||
for item in updated_items:
|
||||
new_table.add_result(item)
|
||||
|
||||
# Keep the underlying history intact; update only the overlay so @.. can
|
||||
# clear the overlay then continue back to prior tables (e.g., the search list).
|
||||
ctx.set_last_result_table_overlay(new_table, updated_items)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _refresh_tags_view(res: Any, target_hash: Optional[str], store_name: Optional[str], target_path: Optional[str], config: Dict[str, Any]) -> None:
|
||||
"""Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh."""
|
||||
try:
|
||||
from cmdlet import get_tag as get_tag_cmd # type: ignore
|
||||
except Exception:
|
||||
return
|
||||
|
||||
if not target_hash or not store_name:
|
||||
return
|
||||
|
||||
refresh_args: List[str] = ["-hash", target_hash, "-store", store_name]
|
||||
|
||||
try:
|
||||
subject = ctx.get_last_result_subject()
|
||||
if subject and _matches_target(subject, target_hash, target_path):
|
||||
get_tag_cmd._run(subject, refresh_args, config)
|
||||
return
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
get_tag_cmd._run(res, refresh_args, config)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
class Add_Tag(Cmdlet):
|
||||
"""Class-based add-tags cmdlet with Cmdlet metadata inheritance."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="add-tags",
|
||||
summary="Add tags to a file in a store.",
|
||||
usage="add-tags -store <store> [-hash <sha256>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
|
||||
arg=[
|
||||
SharedArgs.HASH,
|
||||
SharedArgs.STORE,
|
||||
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
|
||||
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
|
||||
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."),
|
||||
CmdletArg("tags", type="string", required=False, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tags from pipeline payload.", variadic=True),
|
||||
],
|
||||
detail=[
|
||||
"- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.",
|
||||
"- Requires a store backend: use -store or pipe items that include store.",
|
||||
"- If -hash is not provided, uses the piped item's hash (or derives from its path when possible).",
|
||||
"- Multiple tags can be comma-separated or space-separated.",
|
||||
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
|
||||
"- Tags can also reference lists with curly braces: add-tags {philosophy} \"other:tag\"",
|
||||
"- Use -duplicate to copy EXISTING tag values to new namespaces:",
|
||||
" Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
|
||||
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
|
||||
"- The source namespace must already exist in the file being tagged.",
|
||||
"- Target namespaces that already have a value are skipped (not overwritten).",
|
||||
"- You can also pass the target hash as a tag token: hash:<sha256>. This overrides -hash and is removed from the tag list.",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
self.register()
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Add tags to a file with smart filtering for pipeline results."""
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
|
||||
return 0
|
||||
|
||||
# Parse arguments
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
# Check for --all flag
|
||||
include_temp = parsed.get("all", False)
|
||||
|
||||
# Normalize input to list
|
||||
results = normalize_result_input(result)
|
||||
|
||||
# Filter by temp status (unless --all is set)
|
||||
if not include_temp:
|
||||
results = filter_results_by_temp(results, include_temp=False)
|
||||
|
||||
if not results:
|
||||
log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Get tags from arguments (or fallback to pipeline payload)
|
||||
raw_tags = parsed.get("tags", [])
|
||||
if isinstance(raw_tags, str):
|
||||
raw_tags = [raw_tags]
|
||||
|
||||
# Fallback: if no tags provided explicitly, try to pull from first result payload
|
||||
if not raw_tags and results:
|
||||
first = results[0]
|
||||
payload_tags = None
|
||||
|
||||
# Try multiple tag lookup strategies in order
|
||||
tag_lookups = [
|
||||
lambda x: getattr(x, "tags", None),
|
||||
lambda x: x.get("tags") if isinstance(x, dict) else None,
|
||||
]
|
||||
|
||||
for lookup in tag_lookups:
|
||||
try:
|
||||
payload_tags = lookup(first)
|
||||
if payload_tags:
|
||||
break
|
||||
except (AttributeError, TypeError, KeyError):
|
||||
continue
|
||||
|
||||
if payload_tags:
|
||||
if isinstance(payload_tags, str):
|
||||
raw_tags = [payload_tags]
|
||||
elif isinstance(payload_tags, list):
|
||||
raw_tags = payload_tags
|
||||
|
||||
# Handle -list argument (convert to {list} syntax)
|
||||
list_arg = parsed.get("list")
|
||||
if list_arg:
|
||||
for l in list_arg.split(','):
|
||||
l = l.strip()
|
||||
if l:
|
||||
raw_tags.append(f"{{{l}}}")
|
||||
|
||||
# Parse and expand tags
|
||||
tags_to_add = parse_tag_arguments(raw_tags)
|
||||
tags_to_add = expand_tag_groups(tags_to_add)
|
||||
|
||||
# Allow hash override via namespaced token (e.g., "hash:abcdef...")
|
||||
extracted_hash = None
|
||||
filtered_tags: List[str] = []
|
||||
for tag in tags_to_add:
|
||||
if isinstance(tag, str) and tag.lower().startswith("hash:"):
|
||||
_, _, hash_val = tag.partition(":")
|
||||
if hash_val:
|
||||
extracted_hash = normalize_hash(hash_val.strip())
|
||||
continue
|
||||
filtered_tags.append(tag)
|
||||
tags_to_add = filtered_tags
|
||||
|
||||
if not tags_to_add:
|
||||
log("No tags provided to add", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Get other flags (hash override can come from -hash or hash: token)
|
||||
hash_override = normalize_hash(parsed.get("hash")) or extracted_hash
|
||||
duplicate_arg = parsed.get("duplicate")
|
||||
|
||||
# Tags ARE provided - apply them to each store-backed result
|
||||
total_added = 0
|
||||
total_modified = 0
|
||||
|
||||
store_override = parsed.get("store")
|
||||
|
||||
for res in results:
|
||||
store_name: Optional[str]
|
||||
raw_hash: Optional[str]
|
||||
raw_path: Optional[str]
|
||||
|
||||
if isinstance(res, models.PipeObject):
|
||||
store_name = store_override or res.store
|
||||
raw_hash = res.hash
|
||||
raw_path = res.path
|
||||
elif isinstance(res, dict):
|
||||
store_name = store_override or res.get("store")
|
||||
raw_hash = res.get("hash")
|
||||
raw_path = res.get("path")
|
||||
else:
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
if not store_name:
|
||||
log("[add_tags] Error: Missing -store and item has no store field", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
resolved_hash = normalize_hash(hash_override) if hash_override else normalize_hash(raw_hash)
|
||||
if not resolved_hash and raw_path:
|
||||
try:
|
||||
p = Path(str(raw_path))
|
||||
stem = p.stem
|
||||
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
|
||||
resolved_hash = stem.lower()
|
||||
elif p.exists() and p.is_file():
|
||||
resolved_hash = sha256_file(p)
|
||||
except Exception:
|
||||
resolved_hash = None
|
||||
|
||||
if not resolved_hash:
|
||||
log("[add_tags] Warning: Item missing usable hash (and could not derive from path); skipping", file=sys.stderr)
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
try:
|
||||
backend = Store(config)[str(store_name)]
|
||||
except Exception as exc:
|
||||
log(f"[add_tags] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
try:
|
||||
existing_tags, _src = backend.get_tag(resolved_hash, config=config)
|
||||
except Exception:
|
||||
existing_tags = []
|
||||
|
||||
existing_tags_list = [t for t in (existing_tags or []) if isinstance(t, str)]
|
||||
existing_lower = {t.lower() for t in existing_tags_list}
|
||||
original_title = _extract_title_tag(existing_tags_list)
|
||||
|
||||
# Per-item tag list (do not mutate shared list)
|
||||
item_tags_to_add = list(tags_to_add)
|
||||
item_tags_to_add = collapse_namespace_tags(item_tags_to_add, "title", prefer="last")
|
||||
|
||||
# Handle -duplicate logic (copy existing tags to new namespaces)
|
||||
if duplicate_arg:
|
||||
parts = str(duplicate_arg).split(':')
|
||||
source_ns = ""
|
||||
targets: list[str] = []
|
||||
|
||||
if len(parts) > 1:
|
||||
source_ns = parts[0]
|
||||
targets = [t.strip() for t in parts[1].split(',') if t.strip()]
|
||||
else:
|
||||
parts2 = str(duplicate_arg).split(',')
|
||||
if len(parts2) > 1:
|
||||
source_ns = parts2[0]
|
||||
targets = [t.strip() for t in parts2[1:] if t.strip()]
|
||||
|
||||
if source_ns and targets:
|
||||
source_prefix = source_ns.lower() + ":"
|
||||
for t in existing_tags_list:
|
||||
if not t.lower().startswith(source_prefix):
|
||||
continue
|
||||
value = t.split(":", 1)[1]
|
||||
for target_ns in targets:
|
||||
new_tag = f"{target_ns}:{value}"
|
||||
if new_tag.lower() not in existing_lower:
|
||||
item_tags_to_add.append(new_tag)
|
||||
|
||||
# Namespace replacement: delete old namespace:* when adding namespace:value
|
||||
removed_namespace_tags: list[str] = []
|
||||
for new_tag in item_tags_to_add:
|
||||
if not isinstance(new_tag, str) or ":" not in new_tag:
|
||||
continue
|
||||
ns = new_tag.split(":", 1)[0].strip()
|
||||
if not ns:
|
||||
continue
|
||||
ns_prefix = ns.lower() + ":"
|
||||
for t in existing_tags_list:
|
||||
if t.lower().startswith(ns_prefix) and t.lower() != new_tag.lower():
|
||||
removed_namespace_tags.append(t)
|
||||
|
||||
removed_namespace_tags = sorted({t for t in removed_namespace_tags})
|
||||
|
||||
actual_tags_to_add = [t for t in item_tags_to_add if isinstance(t, str) and t.lower() not in existing_lower]
|
||||
|
||||
changed = False
|
||||
if removed_namespace_tags:
|
||||
try:
|
||||
backend.delete_tag(resolved_hash, removed_namespace_tags, config=config)
|
||||
changed = True
|
||||
except Exception as exc:
|
||||
log(f"[add_tags] Warning: Failed deleting namespace tags: {exc}", file=sys.stderr)
|
||||
|
||||
if actual_tags_to_add:
|
||||
try:
|
||||
backend.add_tag(resolved_hash, actual_tags_to_add, config=config)
|
||||
changed = True
|
||||
except Exception as exc:
|
||||
log(f"[add_tags] Warning: Failed adding tags: {exc}", file=sys.stderr)
|
||||
|
||||
if changed:
|
||||
total_added += len(actual_tags_to_add)
|
||||
total_modified += 1
|
||||
|
||||
try:
|
||||
refreshed_tags, _src2 = backend.get_tag(resolved_hash, config=config)
|
||||
refreshed_list = [t for t in (refreshed_tags or []) if isinstance(t, str)]
|
||||
except Exception:
|
||||
refreshed_list = existing_tags_list
|
||||
|
||||
# Update the result's tags using canonical field
|
||||
if isinstance(res, models.PipeObject):
|
||||
res.tags = refreshed_list
|
||||
elif isinstance(res, dict):
|
||||
res["tags"] = refreshed_list
|
||||
|
||||
final_title = _extract_title_tag(refreshed_list)
|
||||
_apply_title_to_result(res, final_title)
|
||||
|
||||
if final_title and (not original_title or final_title.lower() != original_title.lower()):
|
||||
_refresh_result_table_title(final_title, resolved_hash, raw_path)
|
||||
|
||||
if changed:
|
||||
_refresh_tags_view(res, resolved_hash, str(store_name), raw_path, config)
|
||||
|
||||
ctx.emit(res)
|
||||
|
||||
log(
|
||||
f"[add_tags] Added {total_added} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 0
|
||||
|
||||
|
||||
CMDLET = Add_Tag()
|
||||
@@ -4,12 +4,12 @@ from importlib import import_module
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
try:
|
||||
from cmdlets import REGISTRY
|
||||
from cmdlet import REGISTRY
|
||||
except Exception:
|
||||
REGISTRY = {} # type: ignore
|
||||
|
||||
try:
|
||||
from cmdnats import register_native_commands as _register_native_commands
|
||||
from cmdnat import register_native_commands as _register_native_commands
|
||||
except Exception:
|
||||
_register_native_commands = None
|
||||
|
||||
@@ -33,11 +33,11 @@ def _normalize_mod_name(mod_name: str) -> str:
|
||||
|
||||
|
||||
def import_cmd_module(mod_name: str):
|
||||
"""Import a cmdlet/native module from cmdnats or cmdlets packages."""
|
||||
"""Import a cmdlet/native module from cmdnat or cmdlet packages."""
|
||||
normalized = _normalize_mod_name(mod_name)
|
||||
if not normalized:
|
||||
return None
|
||||
for package in ("cmdnats", "cmdlets", None):
|
||||
for package in ("cmdnat", "cmdlet", None):
|
||||
try:
|
||||
qualified = f"{package}.{normalized}" if package else normalized
|
||||
return import_module(qualified)
|
||||
@@ -122,7 +122,7 @@ def get_cmdlet_metadata(cmd_name: str) -> Optional[Dict[str, Any]]:
|
||||
|
||||
|
||||
def list_cmdlet_metadata() -> Dict[str, Dict[str, Any]]:
|
||||
"""Collect metadata for all registered cmdlets keyed by canonical name."""
|
||||
"""Collect metadata for all registered cmdlet keyed by canonical name."""
|
||||
ensure_registry_loaded()
|
||||
entries: Dict[str, Dict[str, Any]] = {}
|
||||
for reg_name in (REGISTRY or {}).keys():
|
||||
@@ -186,27 +186,20 @@ def get_cmdlet_arg_flags(cmd_name: str) -> List[str]:
|
||||
if not meta:
|
||||
return []
|
||||
|
||||
raw = meta.get("raw")
|
||||
if raw and hasattr(raw, "build_flag_registry"):
|
||||
try:
|
||||
registry = raw.build_flag_registry()
|
||||
flags: List[str] = []
|
||||
for flag_set in registry.values():
|
||||
flags.extend(flag_set)
|
||||
return sorted(set(flags))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# Preserve the order that arguments are defined on the cmdlet (arg=[...]) so
|
||||
# completions feel stable and predictable.
|
||||
flags: List[str] = []
|
||||
seen: set[str] = set()
|
||||
|
||||
for arg in meta.get("args", []):
|
||||
name = arg.get("name")
|
||||
name = str(arg.get("name") or "").strip().lstrip("-")
|
||||
if not name:
|
||||
continue
|
||||
flags.append(f"-{name}")
|
||||
flags.append(f"--{name}")
|
||||
alias = arg.get("alias")
|
||||
if alias:
|
||||
flags.append(f"-{alias}")
|
||||
for candidate in (f"-{name}", f"--{name}"):
|
||||
if candidate not in seen:
|
||||
flags.append(candidate)
|
||||
seen.add(candidate)
|
||||
|
||||
return flags
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@ import sys
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
from . import register
|
||||
from API import HydrusNetwork as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, should_show_help
|
||||
|
||||
@@ -27,11 +26,10 @@ CMDLET = Cmdlet(
|
||||
)
|
||||
|
||||
|
||||
@register(["check-file-status", "check-status", "file-status", "status"])
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
if should_show_help(args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
# Parse arguments
|
||||
@@ -148,3 +146,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
return 1
|
||||
|
||||
|
||||
# Register cmdlet (no legacy decorator)
|
||||
CMDLET.exec = _run
|
||||
CMDLET.alias = ["check-status", "file-status", "status"]
|
||||
CMDLET.register()
|
||||
@@ -13,13 +13,10 @@ import json
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
from . import register
|
||||
from ._shared import Cmdlet, CmdletArg, get_pipe_object_path, normalize_result_input, filter_results_by_temp, should_show_help
|
||||
import models
|
||||
import pipeline as pipeline_context
|
||||
|
||||
|
||||
@register(["cleanup"])
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Remove temporary files from pipeline results.
|
||||
|
||||
@@ -38,7 +35,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
|
||||
# Help
|
||||
if should_show_help(args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
# Normalize input to list
|
||||
@@ -103,5 +100,6 @@ CMDLET = Cmdlet(
|
||||
"- Typical usage at end of pipeline: ... | add-tag -store local \"tag\" --all | cleanup",
|
||||
"- Exit code 0 if cleanup successful, 1 if no results to process",
|
||||
],
|
||||
)
|
||||
exec=_run,
|
||||
).register()
|
||||
|
||||
140
cmdlet/delete_note.py
Normal file
140
cmdlet/delete_note.py
Normal file
@@ -0,0 +1,140 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Sequence
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
import pipeline as ctx
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
SharedArgs,
|
||||
normalize_hash,
|
||||
parse_cmdlet_args,
|
||||
normalize_result_input,
|
||||
get_field,
|
||||
should_show_help,
|
||||
)
|
||||
from Store import Store
|
||||
from SYS.utils import sha256_file
|
||||
|
||||
|
||||
class Delete_Note(Cmdlet):
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="delete-note",
|
||||
summary="Delete a named note from a file in a store.",
|
||||
usage="delete-note -store <store> [-hash <sha256>] <name>",
|
||||
alias=["del-note"],
|
||||
arg=[
|
||||
SharedArgs.STORE,
|
||||
SharedArgs.HASH,
|
||||
CmdletArg("name", type="string", required=True, description="The note name/key to delete."),
|
||||
],
|
||||
detail=[
|
||||
"- Deletes the named note from the selected store backend.",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
try:
|
||||
SharedArgs.STORE.choices = SharedArgs.get_store_choices(None)
|
||||
except Exception:
|
||||
pass
|
||||
self.register()
|
||||
|
||||
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
|
||||
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
|
||||
if resolved:
|
||||
return resolved
|
||||
if raw_path:
|
||||
try:
|
||||
p = Path(str(raw_path))
|
||||
stem = p.stem
|
||||
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
|
||||
return stem.lower()
|
||||
if p.exists() and p.is_file():
|
||||
return sha256_file(p)
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
|
||||
return 0
|
||||
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
|
||||
store_override = parsed.get("store")
|
||||
hash_override = parsed.get("hash")
|
||||
note_name_override = str(parsed.get("name") or "").strip()
|
||||
# Allow piping note rows from get-note: the selected item carries note_name.
|
||||
inferred_note_name = str(get_field(result, "note_name") or "").strip()
|
||||
if not note_name_override and not inferred_note_name:
|
||||
log("[delete_note] Error: Requires <name> (or pipe a note row that provides note_name)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
results = normalize_result_input(result)
|
||||
if not results:
|
||||
if store_override and normalize_hash(hash_override):
|
||||
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
|
||||
else:
|
||||
log("[delete_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
store_registry = Store(config)
|
||||
deleted = 0
|
||||
|
||||
for res in results:
|
||||
if not isinstance(res, dict):
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
# Resolve which note name to delete for this item.
|
||||
note_name = note_name_override or str(res.get("note_name") or "").strip() or inferred_note_name
|
||||
if not note_name:
|
||||
log("[delete_note] Error: Missing note name (pass <name> or pipe a note row)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
store_name = str(store_override or res.get("store") or "").strip()
|
||||
raw_hash = res.get("hash")
|
||||
raw_path = res.get("path")
|
||||
|
||||
if not store_name:
|
||||
log("[delete_note] Error: Missing -store and item has no store field", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
resolved_hash = self._resolve_hash(
|
||||
raw_hash=str(raw_hash) if raw_hash else None,
|
||||
raw_path=str(raw_path) if raw_path else None,
|
||||
override_hash=str(hash_override) if hash_override else None,
|
||||
)
|
||||
if not resolved_hash:
|
||||
ctx.emit(res)
|
||||
continue
|
||||
|
||||
try:
|
||||
backend = store_registry[store_name]
|
||||
except Exception as exc:
|
||||
log(f"[delete_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
ok = False
|
||||
try:
|
||||
ok = bool(backend.delete_note(resolved_hash, note_name, config=config))
|
||||
except Exception as exc:
|
||||
log(f"[delete_note] Error: Failed to delete note: {exc}", file=sys.stderr)
|
||||
ok = False
|
||||
|
||||
if ok:
|
||||
deleted += 1
|
||||
|
||||
ctx.emit(res)
|
||||
|
||||
log(f"[delete_note] Deleted note on {deleted} item(s)", file=sys.stderr)
|
||||
return 0 if deleted > 0 else 1
|
||||
|
||||
|
||||
CMDLET = Delete_Note()
|
||||
@@ -10,7 +10,7 @@ import sys
|
||||
from SYS.logger import log
|
||||
|
||||
import pipeline as ctx
|
||||
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, get_field
|
||||
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input, get_field, should_show_help
|
||||
from API.folder import LocalLibrarySearchOptimizer
|
||||
from config import get_local_storage_path
|
||||
|
||||
@@ -18,7 +18,7 @@ from config import get_local_storage_path
|
||||
def _refresh_relationship_view_if_current(target_hash: Optional[str], target_path: Optional[str], other: Optional[str], config: Dict[str, Any]) -> None:
|
||||
"""If the current subject matches the target, refresh relationships via get-relationship."""
|
||||
try:
|
||||
from cmdlets import get_relationship as get_rel_cmd # type: ignore
|
||||
from cmdlet import get as get_cmdlet # type: ignore
|
||||
except Exception:
|
||||
return
|
||||
|
||||
@@ -55,7 +55,11 @@ def _refresh_relationship_view_if_current(target_hash: Optional[str], target_pat
|
||||
refresh_args: list[str] = []
|
||||
if target_hash:
|
||||
refresh_args.extend(["-hash", target_hash])
|
||||
get_rel_cmd._run(subject, refresh_args, config)
|
||||
|
||||
cmd = get_cmdlet("get-relationship")
|
||||
if not cmd:
|
||||
return
|
||||
cmd(subject, refresh_args, config)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -72,6 +76,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
Exit code (0 = success)
|
||||
"""
|
||||
try:
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
# Parse arguments
|
||||
parsed_args = parse_cmdlet_args(args, CMDLET)
|
||||
delete_all_flag = parsed_args.get("all", False)
|
||||
@@ -203,3 +211,6 @@ CMDLET = Cmdlet(
|
||||
"- Delete all from file: delete-relationship -path <file> --all",
|
||||
],
|
||||
)
|
||||
|
||||
CMDLET.exec = _run
|
||||
CMDLET.register()
|
||||
@@ -5,7 +5,6 @@ from pathlib import Path
|
||||
import json
|
||||
import sys
|
||||
|
||||
from . import register
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, should_show_help, get_field
|
||||
@@ -16,10 +15,18 @@ from Store import Store
|
||||
def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None, path: str | None, config: Dict[str, Any]) -> None:
|
||||
"""If the current subject matches the target, refresh tags via get-tag."""
|
||||
try:
|
||||
from cmdlets import get_tag as get_tag_cmd # type: ignore
|
||||
from cmdlet import get as get_cmdlet # type: ignore
|
||||
except Exception:
|
||||
return
|
||||
|
||||
get_tag = None
|
||||
try:
|
||||
get_tag = get_cmdlet("get-tag")
|
||||
except Exception:
|
||||
get_tag = None
|
||||
if not callable(get_tag):
|
||||
return
|
||||
|
||||
try:
|
||||
subject = ctx.get_last_result_subject()
|
||||
if subject is None:
|
||||
@@ -51,7 +58,9 @@ def _refresh_tag_view_if_current(file_hash: str | None, store_name: str | None,
|
||||
refresh_args: list[str] = []
|
||||
if file_hash:
|
||||
refresh_args.extend(["-hash", file_hash])
|
||||
get_tag_cmd._run(subject, refresh_args, config)
|
||||
if store_name:
|
||||
refresh_args.extend(["-store", store_name])
|
||||
get_tag(subject, refresh_args, config)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -71,11 +80,10 @@ CMDLET = Cmdlet(
|
||||
],
|
||||
)
|
||||
|
||||
@register(["delete-tag"])
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
if should_show_help(args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
# Check if we have a piped TagItem with no args (i.e., from @1 | delete-tag)
|
||||
@@ -319,4 +327,9 @@ def _process_deletion(tags: list[str], file_hash: str | None, path: str | None,
|
||||
return False
|
||||
|
||||
|
||||
# Register cmdlet (no legacy decorator)
|
||||
CMDLET.exec = _run
|
||||
CMDLET.register()
|
||||
|
||||
|
||||
|
||||
@@ -96,7 +96,7 @@ class Download_File(Cmdlet):
|
||||
get_search_provider = None
|
||||
SearchResult = None
|
||||
try:
|
||||
from Provider.registry import get_search_provider as _get_search_provider, SearchResult as _SearchResult
|
||||
from ProviderCore.registry import get_search_provider as _get_search_provider, SearchResult as _SearchResult
|
||||
|
||||
get_search_provider = _get_search_provider
|
||||
SearchResult = _SearchResult
|
||||
@@ -26,6 +26,7 @@ import sys
|
||||
import time
|
||||
import traceback
|
||||
from typing import Any, Dict, Iterator, List, Optional
|
||||
from urllib.parse import urljoin, urlparse
|
||||
|
||||
import httpx
|
||||
|
||||
@@ -89,12 +90,13 @@ def is_url_supported_by_ytdlp(url: str) -> bool:
|
||||
def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[str] = None) -> Optional[List[Dict[str, Any]]]:
|
||||
_ensure_yt_dlp_ready()
|
||||
try:
|
||||
ydl_opts = {"quiet": True, "no_warnings": True, "socket_timeout": 30}
|
||||
assert yt_dlp is not None
|
||||
ydl_opts: Dict[str, Any] = {"quiet": True, "no_warnings": True, "socket_timeout": 30}
|
||||
if no_playlist:
|
||||
ydl_opts["noplaylist"] = True
|
||||
if playlist_items:
|
||||
ydl_opts["playlist_items"] = playlist_items
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl: # type: ignore[arg-type]
|
||||
debug(f"Fetching format list for: {url}")
|
||||
info = ydl.extract_info(url, download=False)
|
||||
formats = info.get("formats", [])
|
||||
@@ -114,6 +116,7 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
|
||||
"vcodec": fmt.get("vcodec", "none"),
|
||||
"acodec": fmt.get("acodec", "none"),
|
||||
"filesize": fmt.get("filesize"),
|
||||
"abr": fmt.get("abr"),
|
||||
"tbr": fmt.get("tbr"),
|
||||
})
|
||||
debug(f"Found {len(result_formats)} available formats")
|
||||
@@ -123,6 +126,49 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s
|
||||
return None
|
||||
|
||||
|
||||
def _pick_best_audio_format_id(formats: List[Dict[str, Any]]) -> Optional[str]:
|
||||
audio_only: List[Dict[str, Any]] = []
|
||||
for fmt in formats:
|
||||
if not isinstance(fmt, dict):
|
||||
continue
|
||||
format_id = str(fmt.get("format_id") or "").strip()
|
||||
if not format_id:
|
||||
continue
|
||||
vcodec = str(fmt.get("vcodec") or "none").lower()
|
||||
acodec = str(fmt.get("acodec") or "none").lower()
|
||||
if vcodec != "none":
|
||||
continue
|
||||
if not acodec or acodec == "none":
|
||||
continue
|
||||
audio_only.append(fmt)
|
||||
|
||||
if not audio_only:
|
||||
return None
|
||||
|
||||
def score(f: Dict[str, Any]) -> tuple[float, float]:
|
||||
tbr = f.get("tbr")
|
||||
abr = f.get("abr")
|
||||
bitrate = 0.0
|
||||
for candidate in (tbr, abr):
|
||||
try:
|
||||
if candidate is not None:
|
||||
bitrate = max(bitrate, float(candidate))
|
||||
except Exception:
|
||||
pass
|
||||
size = 0.0
|
||||
try:
|
||||
fs = f.get("filesize")
|
||||
if fs is not None:
|
||||
size = float(fs)
|
||||
except Exception:
|
||||
pass
|
||||
return (bitrate, size)
|
||||
|
||||
best = max(audio_only, key=score)
|
||||
best_id = str(best.get("format_id") or "").strip()
|
||||
return best_id or None
|
||||
|
||||
|
||||
def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str], quiet: bool = False) -> tuple[Optional[str], Dict[str, Any]]:
|
||||
sections_list = ytdl_options.get("download_sections", [])
|
||||
if not sections_list:
|
||||
@@ -173,6 +219,10 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect
|
||||
cmd.extend(["--cookies", cookies_path])
|
||||
if ytdl_options.get("noplaylist"):
|
||||
cmd.append("--no-playlist")
|
||||
|
||||
# Apply clip/section selection
|
||||
cmd.extend(["--download-sections", section])
|
||||
|
||||
cmd.append(url)
|
||||
if not quiet:
|
||||
debug(f"Running yt-dlp for section: {section}")
|
||||
@@ -511,7 +561,7 @@ def _download_direct_file(
|
||||
return DownloadMediaResult(
|
||||
path=file_path,
|
||||
info=info,
|
||||
tags=tags,
|
||||
tag=tags,
|
||||
source_url=url,
|
||||
hash_value=hash_value,
|
||||
)
|
||||
@@ -865,7 +915,7 @@ def download_media(
|
||||
return DownloadMediaResult(
|
||||
path=media_path,
|
||||
info=info_dict,
|
||||
tags=tags,
|
||||
tag=tags,
|
||||
source_url=opts.url,
|
||||
hash_value=file_hash,
|
||||
paths=media_paths, # Include all section files if present
|
||||
@@ -944,7 +994,7 @@ def download_media(
|
||||
return DownloadMediaResult(
|
||||
path=media_path,
|
||||
info=entry,
|
||||
tags=tags,
|
||||
tag=tags,
|
||||
source_url=source_url,
|
||||
hash_value=hash_value,
|
||||
)
|
||||
@@ -1001,15 +1051,12 @@ class Download_Media(Cmdlet):
|
||||
name="download-media",
|
||||
summary="Download media from streaming sites (YouTube, Twitch, etc.)",
|
||||
usage="download-media <url> [options] or search-file | download-media [options]",
|
||||
alias=["dl-media", "download-ytdlp"],
|
||||
alias=[""],
|
||||
arg=[
|
||||
CmdletArg(name="url", type="string", required=False, description="URL to download (yt-dlp supported sites only)", variadic=True),
|
||||
CmdletArg(name="-url", type="string", description="URL to download (alias for positional argument)", variadic=True),
|
||||
SharedArgs.URL,
|
||||
CmdletArg(name="audio", type="flag", alias="a", description="Download audio only"),
|
||||
CmdletArg(name="video", type="flag", alias="v", description="Download video (default)"),
|
||||
CmdletArg(name="format", type="string", alias="fmt", description="Explicit yt-dlp format selector"),
|
||||
CmdletArg(name="clip", type="string", description="Extract time range: MM:SS-MM:SS"),
|
||||
CmdletArg(name="section", type="string", description="Download sections: TIME_RANGE[,TIME_RANGE...]"),
|
||||
CmdletArg(name="item", type="string", description="Item selection for playlists/formats"),
|
||||
],
|
||||
detail=["Download media from streaming sites using yt-dlp.", "For direct file downloads, use download-file."],
|
||||
@@ -1073,9 +1120,10 @@ class Download_Media(Cmdlet):
|
||||
|
||||
# Get other options
|
||||
clip_spec = parsed.get("clip")
|
||||
section_spec = parsed.get("section")
|
||||
|
||||
# Parse clip/section ranges if specified
|
||||
mode = "audio" if parsed.get("audio") else "video"
|
||||
|
||||
# Parse clip range if specified
|
||||
clip_range = None
|
||||
if clip_spec:
|
||||
clip_range = self._parse_time_range(clip_spec)
|
||||
@@ -1083,19 +1131,19 @@ class Download_Media(Cmdlet):
|
||||
log(f"Invalid clip format: {clip_spec}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
section_ranges = None
|
||||
if section_spec:
|
||||
section_ranges = self._parse_section_ranges(section_spec)
|
||||
if not section_ranges:
|
||||
log(f"Invalid section format: {section_spec}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Check if we need to show format selection
|
||||
playlist_items = str(parsed.get("item")) if parsed.get("item") else None
|
||||
ytdl_format = parsed.get("format")
|
||||
|
||||
# If no -item, no explicit -format specified, and single URL, check for multiple formats/playlist
|
||||
if not playlist_items and not ytdl_format and len(supported_url) == 1:
|
||||
# If no -item, no explicit -format specified, and single URL, show the format table.
|
||||
# Do NOT stop to show formats when -audio is used (auto-pick) or when -clip is used.
|
||||
if (
|
||||
mode != "audio"
|
||||
and not clip_spec
|
||||
and not playlist_items
|
||||
and not ytdl_format
|
||||
and len(supported_url) == 1
|
||||
):
|
||||
url = supported_url[0]
|
||||
formats = list_formats(url, no_playlist=False)
|
||||
|
||||
@@ -1241,9 +1289,8 @@ class Download_Media(Cmdlet):
|
||||
|
||||
# Download each URL
|
||||
downloaded_count = 0
|
||||
clip_sections_spec = self._build_clip_sections_spec(clip_range, section_ranges)
|
||||
clip_sections_spec = self._build_clip_sections_spec(clip_range)
|
||||
quiet_mode = bool(config.get("_quiet_background_output")) if isinstance(config, dict) else False
|
||||
mode = "audio" if parsed.get("audio") else "video"
|
||||
|
||||
for url in supported_url:
|
||||
try:
|
||||
@@ -1263,6 +1310,14 @@ class Download_Media(Cmdlet):
|
||||
actual_format = playlist_items
|
||||
actual_playlist_items = None
|
||||
|
||||
# Auto-pick best audio format when -audio is used and no explicit format is given.
|
||||
if mode == "audio" and not actual_format:
|
||||
chosen = None
|
||||
formats = list_formats(url, no_playlist=False, playlist_items=actual_playlist_items)
|
||||
if formats:
|
||||
chosen = _pick_best_audio_format_id(formats)
|
||||
actual_format = chosen or "bestaudio/best"
|
||||
|
||||
opts = DownloadOptions(
|
||||
url=url,
|
||||
mode=mode,
|
||||
@@ -1358,31 +1413,14 @@ class Download_Media(Cmdlet):
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _parse_section_ranges(self, spec: str) -> Optional[List[tuple]]:
|
||||
"""Parse 'RANGE1,RANGE2,...' where each RANGE is 'MM:SS-MM:SS'."""
|
||||
try:
|
||||
ranges = []
|
||||
for range_spec in spec.split(","):
|
||||
r = self._parse_time_range(range_spec.strip())
|
||||
if r is None:
|
||||
return None
|
||||
ranges.append(r)
|
||||
return ranges if ranges else None
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _build_clip_sections_spec(
|
||||
self,
|
||||
clip_range: Optional[tuple],
|
||||
section_ranges: Optional[List[tuple]],
|
||||
) -> Optional[str]:
|
||||
"""Convert parsed clip/section ranges into downloader spec (seconds)."""
|
||||
"""Convert parsed clip range into downloader spec (seconds)."""
|
||||
ranges: List[str] = []
|
||||
if clip_range:
|
||||
ranges.append(f"{clip_range[0]}-{clip_range[1]}")
|
||||
if section_ranges:
|
||||
for start, end in section_ranges:
|
||||
ranges.append(f"{start}-{end}")
|
||||
return ",".join(ranges) if ranges else None
|
||||
|
||||
def _build_pipe_object(self, download_result: Any, url: str, opts: DownloadOptions) -> Dict[str, Any]:
|
||||
143
cmdlet/get_note.py
Normal file
143
cmdlet/get_note.py
Normal file
@@ -0,0 +1,143 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, Optional, Sequence
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
|
||||
import pipeline as ctx
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
SharedArgs,
|
||||
normalize_hash,
|
||||
parse_cmdlet_args,
|
||||
normalize_result_input,
|
||||
should_show_help,
|
||||
)
|
||||
from Store import Store
|
||||
from SYS.utils import sha256_file
|
||||
|
||||
|
||||
class Get_Note(Cmdlet):
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="get-note",
|
||||
summary="List notes on a file in a store.",
|
||||
usage="get-note -store <store> [-hash <sha256>]",
|
||||
alias=["get-notes", "get_note"],
|
||||
arg=[
|
||||
SharedArgs.STORE,
|
||||
SharedArgs.HASH,
|
||||
],
|
||||
detail=[
|
||||
"- Notes are retrieved via the selected store backend.",
|
||||
"- Lyrics are stored in a note named 'lyric'.",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
try:
|
||||
SharedArgs.STORE.choices = SharedArgs.get_store_choices(None)
|
||||
except Exception:
|
||||
pass
|
||||
self.register()
|
||||
|
||||
def _resolve_hash(self, raw_hash: Optional[str], raw_path: Optional[str], override_hash: Optional[str]) -> Optional[str]:
|
||||
resolved = normalize_hash(override_hash) if override_hash else normalize_hash(raw_hash)
|
||||
if resolved:
|
||||
return resolved
|
||||
if raw_path:
|
||||
try:
|
||||
p = Path(str(raw_path))
|
||||
stem = p.stem
|
||||
if len(stem) == 64 and all(c in "0123456789abcdef" for c in stem.lower()):
|
||||
return stem.lower()
|
||||
if p.exists() and p.is_file():
|
||||
return sha256_file(p)
|
||||
except Exception:
|
||||
return None
|
||||
return None
|
||||
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
|
||||
return 0
|
||||
|
||||
parsed = parse_cmdlet_args(args, self)
|
||||
store_override = parsed.get("store")
|
||||
hash_override = parsed.get("hash")
|
||||
|
||||
results = normalize_result_input(result)
|
||||
if not results:
|
||||
if store_override and normalize_hash(hash_override):
|
||||
results = [{"store": str(store_override), "hash": normalize_hash(hash_override)}]
|
||||
else:
|
||||
log("[get_note] Error: Requires piped item(s) or -store and -hash", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
store_registry = Store(config)
|
||||
any_notes = False
|
||||
|
||||
for res in results:
|
||||
if not isinstance(res, dict):
|
||||
continue
|
||||
|
||||
store_name = str(store_override or res.get("store") or "").strip()
|
||||
raw_hash = res.get("hash")
|
||||
raw_path = res.get("path")
|
||||
|
||||
if not store_name:
|
||||
log("[get_note] Error: Missing -store and item has no store field", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
resolved_hash = self._resolve_hash(
|
||||
raw_hash=str(raw_hash) if raw_hash else None,
|
||||
raw_path=str(raw_path) if raw_path else None,
|
||||
override_hash=str(hash_override) if hash_override else None,
|
||||
)
|
||||
if not resolved_hash:
|
||||
continue
|
||||
|
||||
try:
|
||||
backend = store_registry[store_name]
|
||||
except Exception as exc:
|
||||
log(f"[get_note] Error: Unknown store '{store_name}': {exc}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
notes = {}
|
||||
try:
|
||||
notes = backend.get_note(resolved_hash, config=config) or {}
|
||||
except Exception:
|
||||
notes = {}
|
||||
|
||||
if not notes:
|
||||
continue
|
||||
|
||||
any_notes = True
|
||||
# Emit each note as its own row so CLI renders a proper note table
|
||||
for k in sorted(notes.keys(), key=lambda x: str(x).lower()):
|
||||
v = notes.get(k)
|
||||
raw_text = str(v or "")
|
||||
preview = " ".join(raw_text.replace("\r", "").split("\n"))
|
||||
ctx.emit(
|
||||
{
|
||||
"store": store_name,
|
||||
"hash": resolved_hash,
|
||||
"note_name": str(k),
|
||||
"note_text": raw_text,
|
||||
"columns": [
|
||||
("Name", str(k)),
|
||||
("Text", preview.strip()),
|
||||
],
|
||||
}
|
||||
)
|
||||
|
||||
if not any_notes:
|
||||
ctx.emit("No notes found.")
|
||||
return 0
|
||||
|
||||
|
||||
CMDLET = Get_Note()
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ CMDLET = Cmdlet(
|
||||
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
if should_show_help(_args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
# Parse -hash override
|
||||
@@ -423,3 +423,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
return 0
|
||||
|
||||
|
||||
CMDLET.exec = _run
|
||||
CMDLET.register()
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class TagItem:
|
||||
"""Tag item for display in ResultTable and piping to other cmdlets.
|
||||
"""Tag item for display in ResultTable and piping to other cmdlet.
|
||||
|
||||
Allows tags to be selected and piped like:
|
||||
- delete-tag @{3,4,9} (delete tags at indices 3, 4, 9)
|
||||
@@ -3,16 +3,25 @@ from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Optional, Sequence, List
|
||||
from pathlib import Path
|
||||
import json
|
||||
import sys
|
||||
|
||||
from SYS.logger import log
|
||||
from cmdlets.download_media import download_media
|
||||
from models import DownloadOptions
|
||||
from config import resolve_output_dir
|
||||
import subprocess as _subprocess
|
||||
import shutil as _shutil
|
||||
from ._shared import create_pipe_object_result, parse_cmdlet_args
|
||||
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
create_pipe_object_result,
|
||||
get_field,
|
||||
get_pipe_object_hash,
|
||||
get_pipe_object_path,
|
||||
normalize_result_input,
|
||||
parse_cmdlet_args,
|
||||
should_show_help,
|
||||
)
|
||||
|
||||
import pipeline as ctx
|
||||
|
||||
try:
|
||||
from PyPDF2 import PdfWriter, PdfReader
|
||||
@@ -27,31 +36,29 @@ try:
|
||||
read_tags_from_file,
|
||||
write_tags_to_file,
|
||||
dedup_tags_by_namespace,
|
||||
merge_multiple_tag_lists,
|
||||
write_tags,
|
||||
write_metadata
|
||||
)
|
||||
HAS_METADATA_API = True
|
||||
except ImportError:
|
||||
HAS_METADATA_API = False
|
||||
|
||||
from . import register
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
normalize_result_input,
|
||||
get_pipe_object_path,
|
||||
get_pipe_object_hash,
|
||||
should_show_help,
|
||||
get_field,
|
||||
)
|
||||
import models
|
||||
import pipeline as ctx
|
||||
|
||||
|
||||
def read_tags_from_file(file_path: Path) -> List[str]:
|
||||
return []
|
||||
|
||||
def write_tags_to_file(
|
||||
file_path: Path,
|
||||
tags: List[str],
|
||||
source_hashes: Optional[List[str]] = None,
|
||||
url: Optional[List[str]] = None,
|
||||
append: bool = False,
|
||||
) -> bool:
|
||||
return False
|
||||
|
||||
def dedup_tags_by_namespace(tags: List[str]) -> List[str]:
|
||||
return tags
|
||||
|
||||
def write_metadata(*_args: Any, **_kwargs: Any) -> None:
|
||||
return None
|
||||
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
@@ -59,7 +66,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
|
||||
# Parse help
|
||||
if should_show_help(args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
# Parse arguments
|
||||
@@ -95,7 +102,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
|
||||
# Extract file paths and metadata from result objects
|
||||
source_files: List[Path] = []
|
||||
source_tags_files: List[Path] = []
|
||||
source_hashes: List[str] = []
|
||||
source_url: List[str] = []
|
||||
source_tags: List[str] = [] # NEW: collect tags from source files
|
||||
@@ -111,37 +117,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
if candidate.exists():
|
||||
target_path = candidate
|
||||
|
||||
# Check for playlist item that needs downloading
|
||||
if not target_path and isinstance(item, dict) and item.get('__action', '').startswith('playlist-item:'):
|
||||
try:
|
||||
playlist_url = item.get('__file_path')
|
||||
item_idx = int(item['__action'].split(':')[1])
|
||||
log(f"Downloading playlist item #{item_idx} from {playlist_url}...", flush=True)
|
||||
|
||||
output_dir = resolve_output_dir(config)
|
||||
opts = DownloadOptions(
|
||||
url=playlist_url,
|
||||
output_dir=output_dir,
|
||||
playlist_items=str(item_idx),
|
||||
mode="audio" if format_spec == "m4b" else "auto" # Infer mode if possible
|
||||
)
|
||||
|
||||
res = download_media(opts)
|
||||
if res and res.path and res.path.exists():
|
||||
target_path = res.path
|
||||
log(f"✓ Downloaded: {target_path.name}", flush=True)
|
||||
except Exception as e:
|
||||
log(f"Failed to download playlist item: {e}", file=sys.stderr)
|
||||
|
||||
if target_path and target_path.exists():
|
||||
source_files.append(target_path)
|
||||
|
||||
# Track the .tag file for this source
|
||||
# Track tags from the .tag sidecar for this source (if present)
|
||||
tags_file = target_path.with_suffix(target_path.suffix + '.tag')
|
||||
if tags_file.exists():
|
||||
source_tags_files.append(tags_file)
|
||||
if tags_file.exists() and HAS_METADATA_API:
|
||||
try:
|
||||
source_tags.extend(read_tags_from_file(tags_file) if HAS_METADATA_API else [])
|
||||
source_tags.extend(read_tags_from_file(tags_file) or [])
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
@@ -201,7 +184,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Determine output path
|
||||
if output_override:
|
||||
if output_override.is_dir():
|
||||
base_name = _sanitize_name(getattr(files_to_merge[0], 'title', 'merged'))
|
||||
base_title = get_field(files_to_merge[0], 'title', 'merged')
|
||||
base_name = _sanitize_name(str(base_title or 'merged'))
|
||||
output_path = output_override / f"{base_name} (merged).{_ext_for_format(output_format)}"
|
||||
else:
|
||||
output_path = output_override
|
||||
@@ -231,12 +215,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
|
||||
log(f"Merged {len(source_files)} files into: {output_path}", file=sys.stderr)
|
||||
|
||||
merged_tags: List[str] = [f"title:{output_path.stem}"]
|
||||
|
||||
# Create .tag sidecar file for the merged output using unified API
|
||||
tags_path = output_path.with_suffix(output_path.suffix + '.tag')
|
||||
try:
|
||||
# Start with title tag
|
||||
merged_tags = [f"title:{output_path.stem}"]
|
||||
|
||||
# Merge tags from source files using metadata API
|
||||
if source_tags and HAS_METADATA_API:
|
||||
# Use dedup function to normalize and deduplicate
|
||||
@@ -281,8 +264,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
|
||||
# Also create .metadata file using centralized function
|
||||
try:
|
||||
write_metadata(output_path, source_hashes[0] if source_hashes else None, source_url, source_relationships)
|
||||
log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr)
|
||||
if HAS_METADATA_API and write_metadata:
|
||||
write_metadata(output_path, source_hashes[0] if source_hashes else None, source_url, source_relationships)
|
||||
log(f"Created metadata: {output_path.name}.metadata", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Warning: Could not create metadata file: {e}", file=sys.stderr)
|
||||
|
||||
@@ -312,23 +296,26 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Still emit a string representation for feedback
|
||||
ctx.emit(f"Merged: {output_path}")
|
||||
|
||||
# Delete source files if requested
|
||||
# Always delete source files if they were downloaded playlist items (temp files)
|
||||
# We can detect this if they are in the temp download directory or if we tracked them
|
||||
if delete_after or True: # Force delete for now as merge consumes them
|
||||
# First delete all .tag files
|
||||
for tags_file in source_tags_files:
|
||||
try:
|
||||
tags_file.unlink()
|
||||
log(f"Deleted: {tags_file.name}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Warning: Could not delete {tags_file.name}: {e}", file=sys.stderr)
|
||||
|
||||
# Then delete all source files
|
||||
# Cleanup
|
||||
# - Delete source files only when -delete is set.
|
||||
if delete_after:
|
||||
for f in source_files:
|
||||
try:
|
||||
f.unlink()
|
||||
log(f"Deleted: {f.name}", file=sys.stderr)
|
||||
# Delete sidecar tags for the source (if any)
|
||||
tag_file = f.with_suffix(f.suffix + '.tag')
|
||||
if tag_file.exists():
|
||||
try:
|
||||
tag_file.unlink()
|
||||
log(f"Deleted: {tag_file.name}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Warning: Could not delete {tag_file.name}: {e}", file=sys.stderr)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
try:
|
||||
if f.exists():
|
||||
f.unlink()
|
||||
log(f"Deleted: {f.name}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
log(f"Warning: Could not delete {f.name}: {e}", file=sys.stderr)
|
||||
|
||||
@@ -348,6 +335,7 @@ def _ext_for_format(fmt: str) -> str:
|
||||
format_map = {
|
||||
'mp3': 'mp3',
|
||||
'm4a': 'm4a',
|
||||
'm4b': 'm4b',
|
||||
'aac': 'aac',
|
||||
'opus': 'opus',
|
||||
'mka': 'mka', # Matroska Audio - EXCELLENT chapter support (recommended)
|
||||
@@ -361,58 +349,6 @@ def _ext_for_format(fmt: str) -> str:
|
||||
return format_map.get(fmt.lower(), 'mka')
|
||||
|
||||
|
||||
def _add_chapters_to_m4a(file_path: Path, chapters: List[Dict]) -> bool:
|
||||
"""Add chapters to an M4A file using mutagen.
|
||||
|
||||
Args:
|
||||
file_path: Path to M4A file
|
||||
chapters: List of chapter dicts with 'title', 'start_ms', 'end_ms'
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
"""
|
||||
import logging
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
if not chapters:
|
||||
return True
|
||||
|
||||
try:
|
||||
from mutagen.mp4 import MP4, Atom
|
||||
from mutagen.mp4._util import Atom as MP4Atom
|
||||
except ImportError:
|
||||
logger.warning("[merge-file] mutagen not available for chapter writing")
|
||||
return False
|
||||
|
||||
try:
|
||||
# Load the MP4 file
|
||||
audio = MP4(str(file_path))
|
||||
|
||||
# Build the chapter atom
|
||||
# MP4 chapters are stored in a 'chap' atom with specific structure
|
||||
chapter_data = b''
|
||||
|
||||
for i, chapter in enumerate(chapters, 1):
|
||||
# Each chapter entry: 10-byte header + title
|
||||
title = chapter.get('title', f'Chapter {i}').encode('utf-8')
|
||||
start_time_ms = int(chapter.get('start_ms', 0))
|
||||
|
||||
# Chapter atom format for M4A:
|
||||
# (uint32: size)(uint32: 'chap')(uint8: reserved)(uint24: atom type) + more...
|
||||
# This is complex, so we'll use a simpler atom approach
|
||||
pass
|
||||
|
||||
# Unfortunately, mutagen doesn't have built-in chapter writing for MP4
|
||||
# Chapter writing requires low-level atom manipulation
|
||||
# For now, we'll just return and note this limitation
|
||||
logger.info("[merge-file] MP4 chapter writing via mutagen not fully supported")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"[merge-file] Error writing chapters: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
|
||||
"""Merge audio files with chapters based on file boundaries."""
|
||||
import logging
|
||||
@@ -529,7 +465,7 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
|
||||
# Audio codec selection for first input
|
||||
if output_format == 'mp3':
|
||||
cmd.extend(['-c:a', 'libmp3lame', '-q:a', '2'])
|
||||
elif output_format == 'm4a':
|
||||
elif output_format in {'m4a', 'm4b'}:
|
||||
# Use copy if possible (much faster), otherwise re-encode
|
||||
# Check if inputs are already AAC/M4A to avoid re-encoding
|
||||
# For now, default to copy if format matches, otherwise re-encode
|
||||
@@ -682,7 +618,7 @@ def _merge_audio(files: List[Path], output: Path, output_format: str) -> bool:
|
||||
except Exception as e:
|
||||
logger.exception(f"[merge-file] Chapter embedding failed: {e}")
|
||||
log(f"Warning: Chapter embedding failed, using merge without chapters", file=sys.stderr)
|
||||
elif output_format == 'm4a' or output.suffix.lower() in ['.m4a', '.mp4']:
|
||||
elif output_format in {'m4a', 'm4b'} or output.suffix.lower() in ['.m4a', '.m4b', '.mp4']:
|
||||
# MP4/M4A format has native chapter support via iTunes metadata atoms
|
||||
log(f"Embedding chapters into MP4 container...", file=sys.stderr)
|
||||
logger.info(f"[merge-file] Adding chapters to M4A/MP4 file via iTunes metadata")
|
||||
@@ -833,16 +769,12 @@ def _merge_text(files: List[Path], output: Path) -> bool:
|
||||
|
||||
def _merge_pdf(files: List[Path], output: Path) -> bool:
|
||||
"""Merge PDF files."""
|
||||
if not HAS_PYPDF2:
|
||||
if (not HAS_PYPDF2) or (PdfWriter is None) or (PdfReader is None):
|
||||
log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
|
||||
return False
|
||||
|
||||
try:
|
||||
if HAS_PYPDF2:
|
||||
writer = PdfWriter()
|
||||
else:
|
||||
log("PyPDF2 is required for PDF merging. Install with: pip install PyPDF2", file=sys.stderr)
|
||||
return False
|
||||
writer = PdfWriter()
|
||||
|
||||
for f in files:
|
||||
try:
|
||||
@@ -866,11 +798,11 @@ def _merge_pdf(files: List[Path], output: Path) -> bool:
|
||||
CMDLET = Cmdlet(
|
||||
name="merge-file",
|
||||
summary="Merge multiple files into a single output file. Supports audio, video, PDF, and text merging with optional cleanup.",
|
||||
usage="merge-file [-delete] [-output <path>] [-format <auto|mp3|aac|opus|mp4|mkv|pdf|txt>]",
|
||||
usage="merge-file [-delete] [-output <path>] [-format <auto|mka|m4a|m4b|mp3|aac|opus|mp4|mkv|pdf|txt>]",
|
||||
arg=[
|
||||
CmdletArg("-delete", type="flag", description="Delete source files after successful merge."),
|
||||
CmdletArg("-output", description="Override output file path."),
|
||||
CmdletArg("-format", description="Output format (auto/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
|
||||
CmdletArg("-format", description="Output format (auto/mka/m4a/m4b/mp3/aac/opus/mp4/mkv/pdf/txt). Default: auto-detect from first file."),
|
||||
],
|
||||
detail=[
|
||||
"- Pipe multiple files: search-file query | [1,2,3] | merge-file",
|
||||
@@ -882,3 +814,6 @@ CMDLET = Cmdlet(
|
||||
"- -delete flag removes all source files after successful merge.",
|
||||
],
|
||||
)
|
||||
|
||||
CMDLET.exec = _run
|
||||
CMDLET.register()
|
||||
@@ -8,8 +8,6 @@ from __future__ import annotations
|
||||
|
||||
import contextlib
|
||||
import hashlib
|
||||
import importlib
|
||||
import json
|
||||
import sys
|
||||
import time
|
||||
import httpx
|
||||
@@ -21,10 +19,7 @@ from urllib.parse import urlsplit, quote, urljoin
|
||||
from SYS.logger import log, debug
|
||||
from API.HTTP import HTTPClient
|
||||
from SYS.utils import ensure_directory, unique_path, unique_preserve_order
|
||||
|
||||
from . import register
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, create_pipe_object_result, normalize_result_input, should_show_help, get_field
|
||||
import models
|
||||
import pipeline as pipeline_context
|
||||
|
||||
# ============================================================================
|
||||
@@ -40,13 +35,17 @@ import pipeline as pipeline_context
|
||||
try:
|
||||
from playwright.sync_api import (
|
||||
TimeoutError as PlaywrightTimeoutError,
|
||||
ViewportSize,
|
||||
sync_playwright,
|
||||
)
|
||||
except Exception as exc:
|
||||
raise RuntimeError(
|
||||
"playwright is required for screenshot capture; install with 'pip install playwright'"
|
||||
) from exc
|
||||
HAS_PLAYWRIGHT = True
|
||||
except Exception:
|
||||
HAS_PLAYWRIGHT = False
|
||||
PlaywrightTimeoutError = TimeoutError # type: ignore
|
||||
|
||||
def sync_playwright(*_args: Any, **_kwargs: Any) -> Any: # type: ignore
|
||||
raise RuntimeError(
|
||||
"playwright is required for screenshot capture; install with: pip install playwright; then: playwright install"
|
||||
)
|
||||
|
||||
try:
|
||||
from config import resolve_output_dir
|
||||
@@ -69,7 +68,7 @@ USER_AGENT = (
|
||||
"Chrome/120.0.0.0 Safari/537.36"
|
||||
)
|
||||
|
||||
DEFAULT_VIEWPORT: ViewportSize = {"width": 1280, "height": 1200}
|
||||
DEFAULT_VIEWPORT: dict[str, int] = {"width": 1280, "height": 1200}
|
||||
ARCHIVE_TIMEOUT = 30.0
|
||||
|
||||
# Configurable selectors for specific websites
|
||||
@@ -114,7 +113,7 @@ class ScreenshotOptions:
|
||||
"""Options controlling screenshot capture and post-processing."""
|
||||
|
||||
output_dir: Path
|
||||
url: Sequence[str] = ()
|
||||
url: str = ""
|
||||
output_path: Optional[Path] = None
|
||||
full_page: bool = True
|
||||
headless: bool = True
|
||||
@@ -124,7 +123,6 @@ class ScreenshotOptions:
|
||||
tag: Sequence[str] = ()
|
||||
archive: bool = False
|
||||
archive_timeout: float = ARCHIVE_TIMEOUT
|
||||
url: Sequence[str] = ()
|
||||
output_format: Optional[str] = None
|
||||
prefer_platform_target: bool = False
|
||||
target_selectors: Optional[Sequence[str]] = None
|
||||
@@ -470,10 +468,10 @@ def _capture_screenshot(options: ScreenshotOptions) -> ScreenshotResult:
|
||||
warnings: List[str] = []
|
||||
_capture(options, destination, warnings)
|
||||
|
||||
# Build URL list from provided options.url (sequence) and deduplicate
|
||||
url = unique_preserve_order(list(options.url))
|
||||
# Build URL list from captured url and any archives
|
||||
url: List[str] = [options.url] if options.url else []
|
||||
archive_url: List[str] = []
|
||||
if options.archive:
|
||||
if options.archive and options.url:
|
||||
debug(f"[_capture_screenshot] Archiving enabled for {options.url}")
|
||||
archives, archive_warnings = _archive_url(options.url, options.archive_timeout)
|
||||
archive_url.extend(archives)
|
||||
@@ -518,9 +516,16 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
|
||||
# Help check
|
||||
if should_show_help(args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
|
||||
return 0
|
||||
|
||||
if not HAS_PLAYWRIGHT:
|
||||
log(
|
||||
"playwright is required for screenshot capture; install with: pip install playwright; then: playwright install",
|
||||
file=sys.stderr,
|
||||
)
|
||||
return 1
|
||||
|
||||
# ========================================================================
|
||||
# ARGUMENT PARSING
|
||||
# ========================================================================
|
||||
@@ -627,7 +632,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
try:
|
||||
# Create screenshot with provided options
|
||||
options = ScreenshotOptions(
|
||||
url=[url],
|
||||
url=url,
|
||||
output_dir=screenshot_dir,
|
||||
output_format=format_name,
|
||||
archive=archive_enabled,
|
||||
@@ -672,7 +677,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
}
|
||||
)
|
||||
|
||||
# Emit the result so downstream cmdlets (like add-file) can use it
|
||||
# Emit the result so downstream cmdlet (like add-file) can use it
|
||||
pipeline_context.emit(pipe_obj)
|
||||
all_emitted.append(pipe_obj)
|
||||
|
||||
@@ -711,3 +716,6 @@ CMDLET = Cmdlet(
|
||||
|
||||
"""]
|
||||
)
|
||||
|
||||
CMDLET.exec = _run
|
||||
CMDLET.register()
|
||||
@@ -8,7 +8,7 @@ import uuid
|
||||
import importlib
|
||||
|
||||
from SYS.logger import log, debug
|
||||
from Provider.registry import get_search_provider, list_search_providers
|
||||
from ProviderCore.registry import get_search_provider, list_search_providers
|
||||
|
||||
from ._shared import Cmdlet, CmdletArg, should_show_help
|
||||
import pipeline as ctx
|
||||
@@ -49,7 +49,7 @@ class Search_Provider(Cmdlet):
|
||||
"- soulseek: Plain text search",
|
||||
"- youtube: Plain text search",
|
||||
"",
|
||||
"Results can be piped to other cmdlets:",
|
||||
"Results can be piped to other cmdlet:",
|
||||
" search-provider bandcamp \"artist:grace\" | @1 | download-data",
|
||||
],
|
||||
exec=self.run
|
||||
@@ -304,7 +304,7 @@ class Search_Store(Cmdlet):
|
||||
continue
|
||||
normalized = self._ensure_storage_columns(item_dict)
|
||||
|
||||
# Make hash/store available for downstream cmdlets without rerunning search
|
||||
# Make hash/store available for downstream cmdlet without rerunning search
|
||||
hash_val = normalized.get("hash")
|
||||
store_val = normalized.get("store") or item_dict.get("store")
|
||||
if hash_val and not normalized.get("hash"):
|
||||
@@ -11,7 +11,6 @@ import re
|
||||
|
||||
from SYS.logger import log, debug
|
||||
from SYS.utils import sha256_file
|
||||
from . import register
|
||||
from ._shared import (
|
||||
Cmdlet,
|
||||
CmdletArg,
|
||||
@@ -112,7 +111,6 @@ def _trim_media(input_path: Path, output_path: Path, start_time: str, end_time:
|
||||
log(f"Error parsing time or running ffmpeg: {e}", file=sys.stderr)
|
||||
return False
|
||||
|
||||
@register(["trim-file"])
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Trim a media file."""
|
||||
# Parse arguments
|
||||
@@ -292,3 +290,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
log(f"Failed to trim {path_obj.name}", file=sys.stderr)
|
||||
|
||||
return 0 if success_count > 0 else 1
|
||||
|
||||
|
||||
# Register cmdlet (no legacy decorator)
|
||||
CMDLET.exec = _run
|
||||
CMDLET.register()
|
||||
@@ -1,106 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import json
|
||||
|
||||
from . import register
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from API import HydrusNetwork as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, normalize_hash, should_show_help
|
||||
from SYS.logger import log
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="add-note",
|
||||
summary="Add or set a note on a Hydrus file.",
|
||||
usage="add-note [-hash <sha256>] <name> <text>",
|
||||
arg=[
|
||||
CmdletArg("hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
|
||||
CmdletArg("name", type="string", required=True, description="The note name/key to set (e.g. 'comment', 'source', etc.)."),
|
||||
CmdletArg("text", type="string", required=True, description="The note text/content to store.", variadic=True),
|
||||
],
|
||||
detail=[
|
||||
"- Notes are stored in the 'my notes' service by default.",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@register(["add-note", "set-note", "add_note"]) # aliases
|
||||
def add(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
if should_show_help(args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
|
||||
from ._shared import parse_cmdlet_args
|
||||
parsed = parse_cmdlet_args(args, CMDLET)
|
||||
override_hash = parsed.get("hash")
|
||||
name = parsed.get("name")
|
||||
text_parts = parsed.get("text")
|
||||
|
||||
if not name:
|
||||
log("Requires a note name")
|
||||
return 1
|
||||
|
||||
name = str(name).strip()
|
||||
|
||||
if isinstance(text_parts, list):
|
||||
text = " ".join(text_parts).strip()
|
||||
else:
|
||||
text = str(text_parts or "").strip()
|
||||
|
||||
if not text:
|
||||
log("Empty note text")
|
||||
return 1
|
||||
|
||||
# Handle @N selection which creates a list - extract the first item
|
||||
if isinstance(result, list) and len(result) > 0:
|
||||
result = result[0]
|
||||
|
||||
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
|
||||
if not hash_hex:
|
||||
log("Selected result does not include a Hydrus hash")
|
||||
return 1
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}")
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable")
|
||||
return 1
|
||||
try:
|
||||
service_name = "my notes"
|
||||
client.set_notes(hash_hex, {name: text}, service_name)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus add-note failed: {exc}")
|
||||
return 1
|
||||
|
||||
# Refresh notes view if we're operating on the currently selected subject
|
||||
try:
|
||||
from cmdlets import get_note as get_note_cmd # type: ignore
|
||||
except Exception:
|
||||
get_note_cmd = None
|
||||
if get_note_cmd:
|
||||
try:
|
||||
subject = ctx.get_last_result_subject()
|
||||
if subject is not None:
|
||||
def norm(val: Any) -> str:
|
||||
return str(val).lower()
|
||||
target_hash = norm(hash_hex) if hash_hex else None
|
||||
subj_hashes = []
|
||||
if isinstance(subject, dict):
|
||||
subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v]
|
||||
else:
|
||||
subj_hashes = [norm(getattr(subject, f, None)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if getattr(subject, f, None)]
|
||||
if target_hash and target_hash in subj_hashes:
|
||||
get_note_cmd.get_notes(subject, ["-hash", hash_hex], config)
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
ctx.emit(f"Added note '{name}' ({len(text)} chars)")
|
||||
|
||||
return 0
|
||||
|
||||
@@ -1,102 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import json
|
||||
|
||||
import pipeline as ctx
|
||||
from API import HydrusNetwork as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, normalize_hash, get_hash_for_operation, fetch_hydrus_metadata, should_show_help, get_field
|
||||
from SYS.logger import log
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="delete-note",
|
||||
summary="Delete a named note from a Hydrus file.",
|
||||
usage="i | del-note [-hash <sha256>] <name>",
|
||||
alias=["del-note"],
|
||||
arg=[
|
||||
|
||||
],
|
||||
detail=[
|
||||
"- Removes the note with the given name from the Hydrus file.",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
if should_show_help(args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
if not args:
|
||||
log("Requires the note name/key to delete")
|
||||
return 1
|
||||
override_hash: str | None = None
|
||||
rest: list[str] = []
|
||||
i = 0
|
||||
while i < len(args):
|
||||
a = args[i]
|
||||
low = str(a).lower()
|
||||
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
|
||||
override_hash = str(args[i + 1]).strip()
|
||||
i += 2
|
||||
continue
|
||||
rest.append(a)
|
||||
i += 1
|
||||
if not rest:
|
||||
log("Requires the note name/key to delete")
|
||||
return 1
|
||||
name = str(rest[0] or '').strip()
|
||||
if not name:
|
||||
log("Requires a non-empty note name/key")
|
||||
return 1
|
||||
|
||||
# Handle @N selection which creates a list - extract the first item
|
||||
if isinstance(result, list) and len(result) > 0:
|
||||
result = result[0]
|
||||
|
||||
hash_hex = get_hash_for_operation(override_hash, result)
|
||||
if not hash_hex:
|
||||
log("Selected result does not include a Hydrus hash")
|
||||
return 1
|
||||
try:
|
||||
client = hydrus_wrapper.get_client(config)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus client unavailable: {exc}")
|
||||
return 1
|
||||
|
||||
if client is None:
|
||||
log("Hydrus client unavailable")
|
||||
return 1
|
||||
try:
|
||||
service_name = "my notes"
|
||||
client.delete_notes(hash_hex, [name], service_name)
|
||||
except Exception as exc:
|
||||
log(f"Hydrus delete-note failed: {exc}")
|
||||
return 1
|
||||
|
||||
# Refresh notes view if we're operating on the current subject
|
||||
try:
|
||||
from cmdlets import get_note as get_note_cmd # type: ignore
|
||||
except Exception:
|
||||
get_note_cmd = None
|
||||
if get_note_cmd:
|
||||
try:
|
||||
subject = ctx.get_last_result_subject()
|
||||
if subject is not None:
|
||||
def norm(val: Any) -> str:
|
||||
return str(val).lower()
|
||||
target_hash = norm(hash_hex) if hash_hex else None
|
||||
subj_hashes = []
|
||||
if isinstance(subject, dict):
|
||||
subj_hashes = [norm(v) for v in [subject.get("hydrus_hash"), subject.get("hash"), subject.get("hash_hex"), subject.get("file_hash")] if v]
|
||||
else:
|
||||
subj_hashes = [norm(get_field(subject, f)) for f in ("hydrus_hash", "hash", "hash_hex", "file_hash") if get_field(subject, f)]
|
||||
if target_hash and target_hash in subj_hashes:
|
||||
get_note_cmd.get_notes(subject, ["-hash", hash_hex], config)
|
||||
return 0
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
log(f"Deleted note '{name}'")
|
||||
|
||||
return 0
|
||||
@@ -1,66 +0,0 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence
|
||||
import json
|
||||
|
||||
from . import register
|
||||
import models
|
||||
import pipeline as ctx
|
||||
from API import HydrusNetwork as hydrus_wrapper
|
||||
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, get_hash_for_operation, fetch_hydrus_metadata, get_field, should_show_help
|
||||
from SYS.logger import log
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
name="get-note",
|
||||
summary="List notes on a Hydrus file.",
|
||||
usage="get-note [-hash <sha256>]",
|
||||
arg=[
|
||||
SharedArgs.HASH,
|
||||
],
|
||||
detail=[
|
||||
"- Prints notes by service and note name.",
|
||||
],
|
||||
)
|
||||
|
||||
|
||||
@register(["get-note", "get-notes", "get_note"]) # aliases
|
||||
def get_notes(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
# Help
|
||||
if should_show_help(args):
|
||||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||||
return 0
|
||||
|
||||
from ._shared import parse_cmdlet_args, get_hash_for_operation, fetch_hydrus_metadata
|
||||
parsed = parse_cmdlet_args(args, CMDLET)
|
||||
override_hash = parsed.get("hash")
|
||||
|
||||
hash_hex = get_hash_for_operation(override_hash, result)
|
||||
if not hash_hex:
|
||||
log("Selected result does not include a Hydrus hash")
|
||||
return 1
|
||||
|
||||
meta, error_code = fetch_hydrus_metadata(config, hash_hex, include_service_keys_to_tags=False, include_notes=True)
|
||||
if error_code != 0:
|
||||
return error_code
|
||||
|
||||
notes = {}
|
||||
if isinstance(meta, dict):
|
||||
# Hydrus returns service_keys_to_tags; for notes we expect 'service_names_to_notes' in modern API
|
||||
notes = meta.get('notes') or meta.get('service_names_to_notes') or {}
|
||||
if notes:
|
||||
ctx.emit("Notes:")
|
||||
# Print flattened: service -> (name: text)
|
||||
if isinstance(notes, dict) and any(isinstance(v, dict) for v in notes.values()):
|
||||
for svc, mapping in notes.items():
|
||||
ctx.emit(f"- {svc}:")
|
||||
if isinstance(mapping, dict):
|
||||
for k, v in mapping.items():
|
||||
ctx.emit(f" • {k}: {str(v).strip()}")
|
||||
elif isinstance(notes, dict):
|
||||
for k, v in notes.items():
|
||||
ctx.emit(f"- {k}: {str(v).strip()}")
|
||||
else:
|
||||
ctx.emit("No notes found.")
|
||||
return 0
|
||||
|
||||
|
||||
@@ -2,12 +2,12 @@ import json
|
||||
import os
|
||||
import sys
|
||||
from typing import List, Dict, Any, Optional, Sequence
|
||||
from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args
|
||||
from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args
|
||||
from SYS.logger import log
|
||||
from result_table import ResultTable
|
||||
import pipeline as ctx
|
||||
|
||||
ADJECTIVE_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "cmdnats", "adjective.json")
|
||||
ADJECTIVE_FILE = os.path.join(os.path.dirname(os.path.dirname(__file__)), "cmdnat", "adjective.json")
|
||||
|
||||
def _load_adjectives() -> Dict[str, List[str]]:
|
||||
try:
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import List, Dict, Any
|
||||
|
||||
from cmdlets._shared import Cmdlet, CmdletArg
|
||||
from cmdlet._shared import Cmdlet, CmdletArg
|
||||
from config import load_config, save_config
|
||||
|
||||
CMDLET = Cmdlet(
|
||||
@@ -4,7 +4,7 @@ from typing import Any, Dict, Sequence, List, Optional
|
||||
import shlex
|
||||
import sys
|
||||
|
||||
from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args
|
||||
from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args
|
||||
from SYS.logger import log
|
||||
from result_table import ResultTable
|
||||
import pipeline as ctx
|
||||
@@ -135,7 +135,7 @@ def _render_detail(meta: Dict[str, Any], args: Sequence[str]) -> None:
|
||||
|
||||
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
try:
|
||||
from cmdlets import catalog as _catalog
|
||||
from cmdlet import catalog as _catalog
|
||||
|
||||
CMDLET.arg[0].choices = _normalize_choice_list(_catalog.list_cmdlet_names())
|
||||
metadata = _catalog.list_cmdlet_metadata()
|
||||
@@ -163,7 +163,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
CMDLET = Cmdlet(
|
||||
name=".help",
|
||||
alias=["help", "?"],
|
||||
summary="Show cmdlets or detailed help",
|
||||
summary="Show cmdlet or detailed help",
|
||||
usage=".help [cmd] [-filter text]",
|
||||
arg=[
|
||||
CmdletArg(
|
||||
@@ -176,7 +176,7 @@ CMDLET = Cmdlet(
|
||||
CmdletArg(
|
||||
name="-filter",
|
||||
type="string",
|
||||
description="Filter cmdlets by substring",
|
||||
description="Filter cmdlet by substring",
|
||||
required=False,
|
||||
),
|
||||
],
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import Any, Dict, Sequence, List
|
||||
import sys
|
||||
from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args
|
||||
from cmdlet._shared import Cmdlet, CmdletArg, parse_cmdlet_args
|
||||
from SYS.logger import log, debug
|
||||
from result_table import ResultTable
|
||||
# REFACTOR: Commenting out Matrix import until provider refactor is complete
|
||||
1486
cmdnat/pipe.py
Normal file
1486
cmdnat/pipe.py
Normal file
File diff suppressed because it is too large
Load Diff
@@ -7,8 +7,8 @@ from dataclasses import dataclass
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any, Dict, Sequence, List
|
||||
|
||||
from cmdlets import register
|
||||
from cmdlets._shared import Cmdlet, CmdletArg
|
||||
from cmdlet import register
|
||||
from cmdlet._shared import Cmdlet, CmdletArg
|
||||
import pipeline as ctx
|
||||
from SYS.logger import log
|
||||
from config import get_local_storage_path
|
||||
1228
cmdnats/pipe.py
1228
cmdnats/pipe.py
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
197
metadata.py
197
metadata.py
@@ -58,10 +58,7 @@ _CURRENT_RELATIONSHIP_TRACKER = FileRelationshipTracker()
|
||||
|
||||
|
||||
def prepare_ffmpeg_metadata(payload: Optional[Dict[str, Any]]) -> Dict[str, str]:
|
||||
"""Derive ffmpeg/mutagen metadata tags from a generic metadata payload.
|
||||
|
||||
This is not Hydrus-specific; it is used by exporters/converters.
|
||||
"""
|
||||
"""Build ffmpeg/mutagen metadata map from payload."""
|
||||
if not isinstance(payload, dict):
|
||||
return {}
|
||||
|
||||
@@ -275,29 +272,17 @@ def build_ffmpeg_command(
|
||||
|
||||
|
||||
def field(obj: Any, name: str, value: Any = None) -> Any:
|
||||
"""Get or set a field on dict or object.
|
||||
"""Get or set a field on dict or object."""
|
||||
if value is None:
|
||||
if isinstance(obj, dict):
|
||||
return obj.get(name)
|
||||
return getattr(obj, name, None)
|
||||
|
||||
Args:
|
||||
obj: Dict or object to access
|
||||
name: Field name
|
||||
value: If None, gets the field; if not None, sets it and returns the value
|
||||
|
||||
Returns:
|
||||
The field value (when getting) or the value (when setting)
|
||||
"""
|
||||
if value is None:
|
||||
# Get mode
|
||||
if isinstance(obj, dict):
|
||||
return obj.get(name)
|
||||
else:
|
||||
return getattr(obj, name, None)
|
||||
else:
|
||||
# Set mode
|
||||
if isinstance(obj, dict):
|
||||
obj[name] = value
|
||||
else:
|
||||
setattr(obj, name, value)
|
||||
return value
|
||||
if isinstance(obj, dict):
|
||||
obj[name] = value
|
||||
else:
|
||||
setattr(obj, name, value)
|
||||
return value
|
||||
|
||||
|
||||
|
||||
@@ -1602,78 +1587,61 @@ def _read_sidecar_metadata(sidecar_path: Path) -> tuple[Optional[str], List[str]
|
||||
|
||||
|
||||
def rename(file_path: Path, tags: Iterable[str]) -> Optional[Path]:
|
||||
"""Rename a file based on title: tag in the tags list.
|
||||
"""Rename a file based on a title: tag.
|
||||
|
||||
If a title: tag is present, renames the file and any .tag/.metadata sidecars.
|
||||
"""
|
||||
|
||||
Args:
|
||||
file_path: Path to the file to potentially rename
|
||||
tags: Iterable of tag strings (should contain title: tag if rename needed)
|
||||
new_title: Optional[str] = None
|
||||
for tag in tags:
|
||||
if isinstance(tag, str) and tag.lower().startswith("title:"):
|
||||
new_title = tag.split(":", 1)[1].strip()
|
||||
break
|
||||
|
||||
Returns:
|
||||
New path if renamed, None if not renamed or error occurred
|
||||
"""
|
||||
# Extract title from tags
|
||||
new_title = None
|
||||
for tag in tags:
|
||||
if isinstance(tag, str) and tag.lower().startswith('title:'):
|
||||
new_title = tag.split(':', 1)[1].strip()
|
||||
break
|
||||
if not new_title or not file_path.exists():
|
||||
return None
|
||||
|
||||
if not new_title or not file_path.exists():
|
||||
return None
|
||||
old_name = file_path.name
|
||||
old_suffix = file_path.suffix
|
||||
new_name = f"{new_title}{old_suffix}"
|
||||
new_path = file_path.with_name(new_name)
|
||||
|
||||
try:
|
||||
old_name = file_path.name
|
||||
old_suffix = file_path.suffix
|
||||
if new_path == file_path:
|
||||
return None
|
||||
|
||||
# Create new filename: title + extension
|
||||
new_name = f"{new_title}{old_suffix}"
|
||||
new_path = file_path.parent / new_name
|
||||
def _rename_sidecar(ext: str) -> None:
|
||||
old_sidecar = file_path.parent / (old_name + ext)
|
||||
if not old_sidecar.exists():
|
||||
return
|
||||
new_sidecar = file_path.parent / (new_name + ext)
|
||||
if new_sidecar.exists():
|
||||
try:
|
||||
new_sidecar.unlink()
|
||||
except Exception as exc:
|
||||
debug(f"Warning: Could not replace target sidecar {new_sidecar.name}: {exc}", file=sys.stderr)
|
||||
return
|
||||
old_sidecar.rename(new_sidecar)
|
||||
debug(f"Renamed sidecar: {old_sidecar.name} -> {new_sidecar.name}", file=sys.stderr)
|
||||
|
||||
# Don't rename if already the same name
|
||||
if new_path == file_path:
|
||||
return None
|
||||
try:
|
||||
if new_path.exists():
|
||||
try:
|
||||
new_path.unlink()
|
||||
debug(f"Replaced existing file: {new_name}", file=sys.stderr)
|
||||
except Exception as exc:
|
||||
debug(f"Warning: Could not replace target file {new_name}: {exc}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
# If target exists, delete it first (replace mode)
|
||||
if new_path.exists():
|
||||
try:
|
||||
new_path.unlink()
|
||||
debug(f"Replaced existing file: {new_name}", file=sys.stderr)
|
||||
except Exception as e:
|
||||
debug(f"Warning: Could not replace target file {new_name}: {e}", file=sys.stderr)
|
||||
return None
|
||||
file_path.rename(new_path)
|
||||
debug(f"Renamed file: {old_name} -> {new_name}", file=sys.stderr)
|
||||
|
||||
file_path.rename(new_path)
|
||||
debug(f"Renamed file: {old_name} → {new_name}", file=sys.stderr)
|
||||
_rename_sidecar(".tag")
|
||||
_rename_sidecar(".metadata")
|
||||
|
||||
# Rename the .tag sidecar if it exists
|
||||
old_tags_path = file_path.parent / (old_name + '.tag')
|
||||
if old_tags_path.exists():
|
||||
new_tags_path = file_path.parent / (new_name + '.tag')
|
||||
if new_tags_path.exists():
|
||||
try:
|
||||
new_tags_path.unlink()
|
||||
except Exception:
|
||||
pass
|
||||
else:
|
||||
old_tags_path.rename(new_tags_path)
|
||||
debug(f"Renamed sidecar: {old_tags_path.name} → {new_tags_path.name}", file=sys.stderr)
|
||||
|
||||
# Rename the .metadata sidecar if it exists
|
||||
old_metadata_path = file_path.parent / (old_name + '.metadata')
|
||||
if old_metadata_path.exists():
|
||||
new_metadata_path = file_path.parent / (new_name + '.metadata')
|
||||
if new_metadata_path.exists():
|
||||
debug(f"Warning: Target metadata already exists: {new_metadata_path.name}", file=sys.stderr)
|
||||
else:
|
||||
old_metadata_path.rename(new_metadata_path)
|
||||
debug(f"Renamed metadata: {old_metadata_path.name} → {new_metadata_path.name}", file=sys.stderr)
|
||||
|
||||
return new_path
|
||||
except Exception as exc:
|
||||
debug(f"Warning: Failed to rename file: {exc}", file=sys.stderr)
|
||||
return None
|
||||
return new_path
|
||||
except Exception as exc:
|
||||
debug(f"Warning: Failed to rename file: {exc}", file=sys.stderr)
|
||||
return None
|
||||
|
||||
|
||||
def write_tags(media_path: Path, tags: Iterable[str], url: Iterable[str], hash_value: Optional[str] = None, db=None) -> None:
|
||||
@@ -2096,26 +2064,7 @@ def apply_tag_mutation(payload: Dict[str, Any], operation: str = 'add') -> Dict[
|
||||
|
||||
|
||||
def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
|
||||
"""Extract meaningful metadata tags from yt-dlp entry.
|
||||
|
||||
This is the UNIFIED API for extracting tags from yt-dlp metadata.
|
||||
All modules (download_data, merge_file, etc.) should use this function
|
||||
instead of implementing their own extraction logic.
|
||||
|
||||
Extracts meaningful tags (artist, album, creator, genre, track, etc.)
|
||||
while excluding technical fields (filesize, duration, format, etc.).
|
||||
|
||||
Args:
|
||||
entry: yt-dlp entry metadata dictionary from download
|
||||
|
||||
Returns:
|
||||
List of normalized tag strings in format "namespace:value"
|
||||
|
||||
Example:
|
||||
>>> entry = {'artist': 'The Beatles', 'album': 'Abbey Road', 'duration': 5247}
|
||||
>>> tags = extract_ytdlp_tags(entry)
|
||||
>>> debug(tags)
|
||||
['artist:The Beatles', 'album:Abbey Road']
|
||||
"""
|
||||
"""
|
||||
tags: List[str] = []
|
||||
seen_namespaces: Set[str] = set()
|
||||
@@ -2186,7 +2135,7 @@ def extract_ytdlp_tags(entry: Dict[str, Any]) -> List[str]:
|
||||
def dedup_tags_by_namespace(tags: List[str], keep_first: bool = True) -> List[str]:
|
||||
"""Deduplicate tags by namespace, keeping consistent order.
|
||||
|
||||
This is the UNIFIED API for tag deduplication used across all cmdlets.
|
||||
This is the UNIFIED API for tag deduplication used across all cmdlet.
|
||||
Replaces custom deduplication logic in merge_file.py and other modules.
|
||||
|
||||
Groups tags by namespace (e.g., "artist", "album", "tag") and keeps
|
||||
@@ -2345,7 +2294,7 @@ def merge_multiple_tag_lists(
|
||||
def read_tags_from_file(file_path: Path) -> List[str]:
|
||||
"""Read and normalize tags from .tag sidecar file.
|
||||
|
||||
This is the UNIFIED API for reading .tag files across all cmdlets.
|
||||
This is the UNIFIED API for reading .tag files across all cmdlet.
|
||||
Handles normalization, deduplication, and format validation.
|
||||
|
||||
Args:
|
||||
@@ -2397,33 +2346,7 @@ def embed_metadata_in_file(
|
||||
tags: List[str],
|
||||
file_kind: str = ''
|
||||
) -> bool:
|
||||
"""Embed metadata tags into a media file using FFmpeg.
|
||||
|
||||
Extracts metadata from tags (namespace:value format) and writes to the file's
|
||||
metadata using FFmpeg with -c copy (no re-encoding).
|
||||
|
||||
Supported tag namespaces:
|
||||
- title, artist, album, track/track_number, date/year, genre, composer, comment
|
||||
|
||||
For audio files, applies sensible defaults:
|
||||
- If no album, uses title as album
|
||||
- If no track, defaults to 1
|
||||
- album_artist is set to artist value
|
||||
|
||||
Args:
|
||||
file_path: Path to media file
|
||||
tags: List of tags in format ['namespace:value', ...] (e.g., ['artist:Beatles', 'album:Abbey Road'])
|
||||
file_kind: Type of file: 'audio', 'video', or '' for auto-detect (optional)
|
||||
|
||||
Returns:
|
||||
True if successful, False otherwise
|
||||
|
||||
Raises:
|
||||
None (logs errors to stderr)
|
||||
|
||||
Example:
|
||||
>>> tags = ['artist:Beatles', 'album:Abbey Road', 'track:1']
|
||||
>>> success = embed_metadata_in_file(Path('song.mp3'), tags, file_kind='audio')
|
||||
"""
|
||||
"""
|
||||
if not tags:
|
||||
return True
|
||||
@@ -2550,7 +2473,7 @@ def write_tags_to_file(
|
||||
) -> bool:
|
||||
"""Write tags to .tag sidecar file.
|
||||
|
||||
This is the UNIFIED API for writing .tag files across all cmdlets.
|
||||
This is the UNIFIED API for writing .tag files across all cmdlet.
|
||||
Uses consistent format and handles file creation/overwriting.
|
||||
|
||||
Args:
|
||||
|
||||
16
pipeline.py
16
pipeline.py
@@ -1,6 +1,6 @@
|
||||
"""Pipeline execution context and state management for cmdlets.
|
||||
"""Pipeline execution context and state management for cmdlet.
|
||||
|
||||
This module provides functions for managing pipeline state, allowing cmdlets to
|
||||
This module provides functions for managing pipeline state, allowing cmdlet to
|
||||
emit results and control printing behavior within a piped execution context.
|
||||
|
||||
Key Concepts:
|
||||
@@ -76,7 +76,7 @@ _PIPELINE_LAST_SELECTION: List[int] = []
|
||||
# Track the currently executing command/pipeline string for worker attribution
|
||||
_PIPELINE_COMMAND_TEXT: str = ""
|
||||
|
||||
# Shared scratchpad for cmdlets/funacts to stash structured data between stages
|
||||
# Shared scratchpad for cmdlet/funacts to stash structured data between stages
|
||||
_PIPELINE_VALUES: Dict[str, Any] = {}
|
||||
_PIPELINE_MISSING = object()
|
||||
|
||||
@@ -128,8 +128,8 @@ def emit(obj: Any) -> None:
|
||||
def emit_list(objects: List[Any]) -> None:
|
||||
"""Emit a list of objects to the next pipeline stage.
|
||||
|
||||
This allows cmdlets to emit multiple results that are tracked as a list,
|
||||
enabling downstream cmdlets to process all of them or filter by metadata.
|
||||
This allows cmdlet to emit multiple results that are tracked as a list,
|
||||
enabling downstream cmdlet to process all of them or filter by metadata.
|
||||
|
||||
Args:
|
||||
objects: List of objects to emit
|
||||
@@ -143,7 +143,7 @@ def print_if_visible(*args: Any, file=None, **kwargs: Any) -> None:
|
||||
|
||||
- Always allow errors printed to stderr by callers (they pass file=sys.stderr).
|
||||
- For normal info messages, this suppresses printing for intermediate pipeline stages.
|
||||
- Use this instead of log() in cmdlets when you want stage-aware output.
|
||||
- Use this instead of log() in cmdlet when you want stage-aware output.
|
||||
|
||||
Args:
|
||||
*args: Arguments to print (same as built-in print)
|
||||
@@ -426,7 +426,7 @@ def get_ui_library_refresh_callback() -> Optional[Any]:
|
||||
def trigger_ui_library_refresh(library_filter: str = 'local') -> None:
|
||||
"""Trigger a library refresh in the UI if callback is registered.
|
||||
|
||||
This should be called from cmdlets/funacts after content is added to library.
|
||||
This should be called from cmdlet/funacts after content is added to library.
|
||||
|
||||
Args:
|
||||
library_filter: Which library to refresh ('local', 'hydrus', etc)
|
||||
@@ -732,7 +732,7 @@ def get_last_result_table_row_selection_args(row_index: int) -> Optional[List[st
|
||||
def set_current_stage_table(result_table: Optional[Any]) -> None:
|
||||
"""Store the current pipeline stage table for @N expansion.
|
||||
|
||||
Used by cmdlets that display tabular results (e.g., download-data with formats)
|
||||
Used by cmdlet that display tabular results (e.g., download-data with formats)
|
||||
to make their result table available for @N expansion logic.
|
||||
|
||||
Does NOT push to history - purely for command expansion in the current pipeline.
|
||||
|
||||
@@ -113,7 +113,7 @@ Repository = "https://github.com/yourusername/medeia-macina.git"
|
||||
Issues = "https://github.com/yourusername/medeia-macina/issues"
|
||||
|
||||
[tool.setuptools]
|
||||
packages = ["cmdlets", "helper", "TUI", "medeia_macina"]
|
||||
packages = ["cmdlet", "helper", "TUI", "medeia_macina"]
|
||||
|
||||
[tool.black]
|
||||
line-length = 100
|
||||
|
||||
@@ -34,7 +34,7 @@ server and uses it as a remote storage backend through the RemoteStorageBackend.
|
||||
|
||||
## USAGE
|
||||
|
||||
After setup, all cmdlets work with the phone:
|
||||
After setup, all cmdlet work with the phone:
|
||||
$ search-file zohar -store phone
|
||||
$ @1-3 | add-relationship -king @4 -store phone
|
||||
$ @1 | get-relationship -store phone
|
||||
|
||||
530
search_file.py
530
search_file.py
@@ -1,530 +0,0 @@
|
||||
"""Search-file cmdlet: Search for files by query, tag, size, type, duration, etc."""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Any, Dict, Sequence, List, Optional, Tuple
|
||||
from pathlib import Path
|
||||
from dataclasses import dataclass, field
|
||||
from collections import OrderedDict
|
||||
import re
|
||||
import json
|
||||
import sys
|
||||
|
||||
from SYS.logger import log, debug
|
||||
|
||||
from Provider.registry import get_search_provider
|
||||
|
||||
from cmdlets._shared import Cmdlet, CmdletArg, get_field, should_show_help
|
||||
import pipeline as ctx
|
||||
|
||||
|
||||
def get_origin(obj: Any, default: Any = None) -> Any:
|
||||
"""Return the canonical origin/table identifier from a payload-like object."""
|
||||
value = get_field(obj, "origin", None)
|
||||
if value is not None:
|
||||
return value
|
||||
value = get_field(obj, "table", None)
|
||||
if value is not None:
|
||||
return value
|
||||
value = get_field(obj, "store", None)
|
||||
if value is not None:
|
||||
return value
|
||||
return default
|
||||
|
||||
# Optional dependencies
|
||||
try:
|
||||
import mutagen # type: ignore
|
||||
except ImportError: # pragma: no cover
|
||||
mutagen = None # type: ignore
|
||||
|
||||
try:
|
||||
from config import get_hydrus_url, resolve_output_dir
|
||||
except Exception: # pragma: no cover
|
||||
get_hydrus_url = None # type: ignore
|
||||
resolve_output_dir = None # type: ignore
|
||||
|
||||
try:
|
||||
from API.HydrusNetwork import HydrusNetwork, HydrusRequestError
|
||||
except ImportError: # pragma: no cover
|
||||
HydrusNetwork = None # type: ignore
|
||||
HydrusRequestError = RuntimeError # type: ignore
|
||||
|
||||
try:
|
||||
from SYS.utils import sha256_file
|
||||
except ImportError: # pragma: no cover
|
||||
sha256_file = None # type: ignore
|
||||
|
||||
try:
|
||||
from SYS.utils_constant import mime_maps
|
||||
except ImportError: # pragma: no cover
|
||||
mime_maps = {} # type: ignore
|
||||
|
||||
@dataclass(slots=True)
|
||||
class SearchRecord:
|
||||
path: str
|
||||
size_bytes: int | None = None
|
||||
duration_seconds: str | None = None
|
||||
tag: str | None = None
|
||||
hash: str | None = None
|
||||
|
||||
def as_dict(self) -> dict[str, str]:
|
||||
payload: dict[str, str] = {"path": self.path}
|
||||
if self.size_bytes is not None:
|
||||
payload["size"] = str(self.size_bytes)
|
||||
if self.duration_seconds:
|
||||
payload["duration"] = self.duration_seconds
|
||||
if self.tag:
|
||||
payload["tag"] = self.tag
|
||||
if self.hash:
|
||||
payload["hash"] = self.hash
|
||||
return payload
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResultItem:
|
||||
table: str # Renamed from origin
|
||||
title: str
|
||||
detail: str
|
||||
annotations: List[str]
|
||||
target: str
|
||||
media_kind: str = "other"
|
||||
hash: Optional[str] = None
|
||||
columns: List[tuple[str, str]] = field(default_factory=list)
|
||||
tag_summary: Optional[str] = None
|
||||
duration_seconds: Optional[float] = None
|
||||
size_bytes: Optional[int] = None
|
||||
full_metadata: Optional[Dict[str, Any]] = None
|
||||
tag: Optional[set[str]] = field(default_factory=set)
|
||||
relationships: Optional[List[str]] = field(default_factory=list)
|
||||
known_urls: Optional[List[str]] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def origin(self) -> str:
|
||||
return self.table
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
payload: Dict[str, Any] = {
|
||||
"title": self.title,
|
||||
}
|
||||
|
||||
# Always include these core fields for downstream cmdlets (get-file, download-data, etc)
|
||||
payload["table"] = self.table
|
||||
payload["target"] = self.target
|
||||
payload["media_kind"] = self.media_kind
|
||||
|
||||
# Always include full_metadata if present (needed by download-data, etc)
|
||||
# This is NOT for display, but for downstream processing
|
||||
if self.full_metadata:
|
||||
payload["full_metadata"] = self.full_metadata
|
||||
|
||||
# Include columns if defined (result renderer will use these for display)
|
||||
if self.columns:
|
||||
payload["columns"] = list(self.columns)
|
||||
else:
|
||||
# If no columns, include the detail for backwards compatibility
|
||||
payload["detail"] = self.detail
|
||||
payload["annotations"] = list(self.annotations)
|
||||
|
||||
# Include optional fields
|
||||
if self.hash:
|
||||
payload["hash"] = self.hash
|
||||
if self.tag_summary:
|
||||
payload["tag_summary"] = self.tag_summary
|
||||
if self.tag:
|
||||
payload["tag"] = list(self.tag)
|
||||
if self.relationships:
|
||||
payload["relationships"] = self.relationships
|
||||
if self.known_urls:
|
||||
payload["known_urls"] = self.known_urls
|
||||
return payload
|
||||
|
||||
|
||||
STORAGE_ORIGINS = {"local", "hydrus", "debrid"}
|
||||
|
||||
|
||||
class Search_File(Cmdlet):
|
||||
"""Class-based search-file cmdlet with self-registration."""
|
||||
|
||||
def __init__(self) -> None:
|
||||
super().__init__(
|
||||
name="search-file",
|
||||
summary="Unified search cmdlet for storage (Hydrus, Local) and providers (Debrid, LibGen, OpenLibrary, Soulseek).",
|
||||
usage="search-file [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-store BACKEND] [-provider PROVIDER]",
|
||||
arg=[
|
||||
CmdletArg("query", description="Search query string"),
|
||||
CmdletArg("tag", description="Filter by tag (can be used multiple times)"),
|
||||
CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"),
|
||||
CmdletArg("type", description="Filter by type: audio, video, image, document"),
|
||||
CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"),
|
||||
CmdletArg("limit", type="integer", description="Limit results (default: 45)"),
|
||||
CmdletArg("store", description="Search storage backend: hydrus, local (default: all searchable storages)"),
|
||||
CmdletArg("provider", description="Search provider: libgen, openlibrary, soulseek, debrid, local (overrides -storage)"),
|
||||
],
|
||||
detail=[
|
||||
"Search across storage (Hydrus, Local) and providers (Debrid, LibGen, OpenLibrary, Soulseek)",
|
||||
"Use -provider to search a specific source, or -store to search file backends",
|
||||
"Filter results by: tag, size, type, duration",
|
||||
"Results can be piped to other commands",
|
||||
"Examples:",
|
||||
"search-file foo # Search all file backends",
|
||||
"search-file -provider libgen 'python programming' # Search LibGen books",
|
||||
"search-file -provider debrid 'movie' # Search AllDebrid magnets",
|
||||
"search-file 'music' -provider soulseek # Search Soulseek P2P",
|
||||
"search-file -provider openlibrary 'tolkien' # Search OpenLibrary",
|
||||
"search-file song -store hydrus -type audio # Search only Hydrus audio",
|
||||
"search-file movie -tag action -provider debrid # Debrid with filters",
|
||||
],
|
||||
exec=self.run,
|
||||
)
|
||||
self.register()
|
||||
|
||||
# --- Helper methods -------------------------------------------------
|
||||
@staticmethod
|
||||
def _normalize_extension(ext_value: Any) -> str:
|
||||
"""Sanitize extension strings to alphanumerics and cap at 5 chars."""
|
||||
ext = str(ext_value or "").strip().lstrip(".")
|
||||
for sep in (" ", "|", "(", "[", "{", ",", ";"):
|
||||
if sep in ext:
|
||||
ext = ext.split(sep, 1)[0]
|
||||
break
|
||||
if "." in ext:
|
||||
ext = ext.split(".")[-1]
|
||||
ext = "".join(ch for ch in ext if ch.isalnum())
|
||||
return ext[:5]
|
||||
|
||||
def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""Ensure storage results have the necessary fields for result_table display."""
|
||||
store_value = str(get_origin(payload, "") or "").lower()
|
||||
if store_value not in STORAGE_ORIGINS:
|
||||
return payload
|
||||
|
||||
# Ensure we have title field
|
||||
if "title" not in payload:
|
||||
payload["title"] = payload.get("name") or payload.get("target") or payload.get("path") or "Result"
|
||||
|
||||
# Ensure we have ext field
|
||||
if "ext" not in payload:
|
||||
title = str(payload.get("title", ""))
|
||||
path_obj = Path(title)
|
||||
if path_obj.suffix:
|
||||
payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip('.'))
|
||||
else:
|
||||
payload["ext"] = payload.get("ext", "")
|
||||
|
||||
# Ensure size_bytes is present for display (already set by search_file())
|
||||
# result_table will handle formatting it
|
||||
|
||||
# Don't create manual columns - let result_table handle display
|
||||
# This allows the table to respect max_columns and apply consistent formatting
|
||||
return payload
|
||||
|
||||
# --- Execution ------------------------------------------------------
|
||||
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
"""Search across multiple providers: Hydrus, Local, Debrid, LibGen, etc."""
|
||||
if should_show_help(args):
|
||||
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
|
||||
return 0
|
||||
|
||||
args_list = [str(arg) for arg in (args or [])]
|
||||
|
||||
# Parse arguments
|
||||
query = ""
|
||||
tag_filters: List[str] = []
|
||||
size_filter: Optional[Tuple[str, int]] = None
|
||||
duration_filter: Optional[Tuple[str, float]] = None
|
||||
type_filter: Optional[str] = None
|
||||
storage_backend: Optional[str] = None
|
||||
provider_name: Optional[str] = None
|
||||
limit = 45
|
||||
searched_backends: List[str] = []
|
||||
|
||||
i = 0
|
||||
while i < len(args_list):
|
||||
arg = args_list[i]
|
||||
low = arg.lower()
|
||||
if low in {"-provider", "--provider"} and i + 1 < len(args_list):
|
||||
provider_name = args_list[i + 1].lower()
|
||||
i += 2
|
||||
elif low in {"-store", "--store", "-storage", "--storage"} and i + 1 < len(args_list):
|
||||
storage_backend = args_list[i + 1].lower()
|
||||
i += 2
|
||||
elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
|
||||
tag_filters.append(args_list[i + 1])
|
||||
i += 2
|
||||
elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
|
||||
try:
|
||||
limit = int(args_list[i + 1])
|
||||
except ValueError:
|
||||
limit = 100
|
||||
i += 2
|
||||
elif low in {"-type", "--type"} and i + 1 < len(args_list):
|
||||
type_filter = args_list[i + 1].lower()
|
||||
i += 2
|
||||
elif not arg.startswith("-"):
|
||||
query = f"{query} {arg}".strip() if query else arg
|
||||
i += 1
|
||||
else:
|
||||
i += 1
|
||||
|
||||
store_filter: Optional[str] = None
|
||||
if query:
|
||||
match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE)
|
||||
if match:
|
||||
store_filter = match.group(1).strip().lower() or None
|
||||
query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE)
|
||||
query = re.sub(r"\s{2,}", " ", query)
|
||||
query = query.strip().strip(',')
|
||||
|
||||
if storage_backend and storage_backend.lower() == "debrid":
|
||||
log("Use -provider debrid instead of -store debrid (debrid is provider-only)", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
if store_filter and not provider_name and not storage_backend:
|
||||
if store_filter in {"hydrus", "local", "debrid"}:
|
||||
storage_backend = store_filter
|
||||
|
||||
# --- Feature: Filter provider result table by Name column ---
|
||||
filter_after_search: Optional[str] = None
|
||||
if result:
|
||||
actual_result = result[0] if isinstance(result, list) and result else result
|
||||
origin = get_origin(actual_result)
|
||||
target = get_field(actual_result, 'target')
|
||||
|
||||
# If the incoming result is from a provider (not storage) AND this invocation looks like a filter (no flags)
|
||||
positional_args = [a for a in args_list if not a.startswith('-')]
|
||||
no_flags = len(positional_args) == len(args_list)
|
||||
looks_like_filter = no_flags and len(positional_args) == 1 and not provider_name and not storage_backend and not tag_filters and not size_filter and not duration_filter and not type_filter
|
||||
|
||||
if origin and origin.lower() not in STORAGE_ORIGINS and looks_like_filter and query:
|
||||
# Save the filter string to apply AFTER loading the provider data
|
||||
filter_after_search = query.strip()
|
||||
query = "" # Clear query so we load the target URL instead
|
||||
|
||||
# If result is from a provider, extract the target as query and set provider
|
||||
if not query:
|
||||
if origin == 'bandcamp' and target:
|
||||
query = target
|
||||
if not provider_name:
|
||||
provider_name = 'bandcamp'
|
||||
elif origin == 'youtube' and target:
|
||||
query = target
|
||||
if not provider_name:
|
||||
provider_name = 'youtube'
|
||||
elif target and str(target).startswith(('http://', 'https://')):
|
||||
query = target
|
||||
if not provider_name:
|
||||
if 'bandcamp.com' in target:
|
||||
provider_name = 'bandcamp'
|
||||
elif 'youtube.com' in target or 'youtu.be' in target:
|
||||
provider_name = 'youtube'
|
||||
|
||||
if not query:
|
||||
log("Provide a search query", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
from API.folder import API_folder_store
|
||||
from config import get_local_storage_path
|
||||
import uuid
|
||||
worker_id = str(uuid.uuid4())
|
||||
library_root = get_local_storage_path(config or {})
|
||||
if not library_root:
|
||||
log("No library root configured", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
db = None
|
||||
try:
|
||||
db = API_folder_store(library_root)
|
||||
db.insert_worker(
|
||||
worker_id,
|
||||
"search",
|
||||
title=f"Search: {query}",
|
||||
description=f"Query: {query}",
|
||||
pipe=ctx.get_current_command_text()
|
||||
)
|
||||
|
||||
results_list = []
|
||||
import result_table
|
||||
import importlib
|
||||
importlib.reload(result_table)
|
||||
from result_table import ResultTable
|
||||
|
||||
table_title = f"Search: {query}"
|
||||
if provider_name:
|
||||
table_title += f" [{provider_name}]"
|
||||
elif storage_backend:
|
||||
table_title += f" [{storage_backend}]"
|
||||
|
||||
preserve_order = provider_name and provider_name.lower() in ('youtube', 'openlibrary')
|
||||
# Avoid setting source_command so @N does not re-run search-file; preserve row order when needed
|
||||
table = ResultTable(table_title).set_preserve_order(preserve_order)
|
||||
|
||||
if provider_name:
|
||||
debug(f"[search_file] Attempting provider search with: {provider_name}")
|
||||
provider = get_search_provider(provider_name, config)
|
||||
if not provider:
|
||||
log(f"Provider '{provider_name}' not available", file=sys.stderr)
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
return 1
|
||||
|
||||
debug(f"[search_file] Provider loaded, calling search with query: {query}")
|
||||
search_result = provider.search(query, limit=limit)
|
||||
debug(f"[search_file] Provider search returned {len(search_result)} results")
|
||||
|
||||
# Apply post-search filter if one was set
|
||||
if filter_after_search:
|
||||
debug(f"[search_file] Applying filter: {filter_after_search}")
|
||||
filtered_result = []
|
||||
for item in search_result:
|
||||
item_dict = item.to_dict() if hasattr(item, 'to_dict') else dict(item)
|
||||
title_val = get_field(item_dict, 'title') or get_field(item_dict, 'name') or ""
|
||||
if filter_after_search.lower() in str(title_val).lower():
|
||||
filtered_result.append(item)
|
||||
search_result = filtered_result
|
||||
if not search_result:
|
||||
log(f"No results match filter: '{filter_after_search}'", file=sys.stderr)
|
||||
db.update_worker_status(worker_id, 'completed')
|
||||
return 0
|
||||
debug(f"[search_file] Filter matched {len(search_result)} results")
|
||||
table.title = f"Filter: {filter_after_search}"
|
||||
|
||||
for item in search_result:
|
||||
table.add_result(item)
|
||||
item_dict = item.to_dict()
|
||||
results_list.append(item_dict)
|
||||
ctx.emit(item_dict)
|
||||
|
||||
ctx.set_last_result_table(table, results_list)
|
||||
debug(f"[search_file] Emitted {len(results_list)} results")
|
||||
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
|
||||
db.update_worker_status(worker_id, 'completed')
|
||||
return 0
|
||||
|
||||
from Store import Store
|
||||
storage = Store(config=config or {}, suppress_debug=True)
|
||||
|
||||
backend_to_search = storage_backend or None
|
||||
if backend_to_search:
|
||||
if backend_to_search == "hydrus":
|
||||
from API.HydrusNetwork import is_hydrus_available
|
||||
if not is_hydrus_available(config or {}):
|
||||
log(f"Backend 'hydrus' is not available (Hydrus service not running)", file=sys.stderr)
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
return 1
|
||||
searched_backends.append(backend_to_search)
|
||||
target_backend = storage[backend_to_search]
|
||||
results = target_backend.search(query, limit=limit)
|
||||
else:
|
||||
from API.HydrusNetwork import is_hydrus_available
|
||||
hydrus_available = is_hydrus_available(config or {})
|
||||
|
||||
all_results = []
|
||||
for backend_name in storage.list_searchable_backends():
|
||||
if backend_name == "hydrus" and not hydrus_available:
|
||||
continue
|
||||
searched_backends.append(backend_name)
|
||||
try:
|
||||
backend_results = storage[backend_name].search(query, limit=limit - len(all_results))
|
||||
if backend_results:
|
||||
all_results.extend(backend_results)
|
||||
if len(all_results) >= limit:
|
||||
break
|
||||
except Exception as exc:
|
||||
log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr)
|
||||
results = all_results[:limit]
|
||||
|
||||
if not provider_name and not storage_backend:
|
||||
try:
|
||||
debrid_provider = get_search_provider("debrid", config)
|
||||
if debrid_provider and debrid_provider.validate():
|
||||
remaining = max(0, limit - len(results)) if isinstance(results, list) else limit
|
||||
if remaining > 0:
|
||||
debrid_results = debrid_provider.search(query, limit=remaining)
|
||||
if debrid_results:
|
||||
if "debrid" not in searched_backends:
|
||||
searched_backends.append("debrid")
|
||||
if results is None:
|
||||
results = []
|
||||
results.extend(debrid_results)
|
||||
except Exception as exc:
|
||||
log(f"Debrid provider search failed: {exc}", file=sys.stderr)
|
||||
|
||||
def _format_storage_label(name: str) -> str:
|
||||
clean = str(name or "").strip()
|
||||
if not clean:
|
||||
return "Unknown"
|
||||
return clean.replace("_", " ").title()
|
||||
|
||||
storage_counts: OrderedDict[str, int] = OrderedDict((name, 0) for name in searched_backends)
|
||||
for item in results or []:
|
||||
origin = get_origin(item)
|
||||
if not origin:
|
||||
continue
|
||||
key = str(origin).lower()
|
||||
if key not in storage_counts:
|
||||
storage_counts[key] = 0
|
||||
storage_counts[key] += 1
|
||||
|
||||
if storage_counts or query:
|
||||
display_counts = OrderedDict((_format_storage_label(name), count) for name, count in storage_counts.items())
|
||||
summary_line = table.set_storage_summary(display_counts, query, inline=True)
|
||||
if summary_line:
|
||||
table.title = summary_line
|
||||
|
||||
if results:
|
||||
for item in results:
|
||||
def _as_dict(obj: Any) -> Dict[str, Any]:
|
||||
if isinstance(obj, dict):
|
||||
return dict(obj)
|
||||
if hasattr(obj, "to_dict") and callable(getattr(obj, "to_dict")):
|
||||
return obj.to_dict() # type: ignore[arg-type]
|
||||
return {"title": str(obj)}
|
||||
|
||||
item_dict = _as_dict(item)
|
||||
if store_filter:
|
||||
origin_val = str(get_origin(item_dict) or "").lower()
|
||||
if store_filter != origin_val:
|
||||
continue
|
||||
normalized = self._ensure_storage_columns(item_dict)
|
||||
|
||||
# Make hash/store available for downstream cmdlets without rerunning search-file
|
||||
hash_val = normalized.get("hash")
|
||||
store_val = normalized.get("store") or get_origin(item_dict)
|
||||
if hash_val and not normalized.get("hash"):
|
||||
normalized["hash"] = hash_val
|
||||
if store_val and not normalized.get("store"):
|
||||
normalized["store"] = store_val
|
||||
|
||||
table.add_result(normalized)
|
||||
|
||||
results_list.append(normalized)
|
||||
ctx.emit(normalized)
|
||||
|
||||
ctx.set_last_result_table(table, results_list)
|
||||
db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
|
||||
else:
|
||||
log("No results found", file=sys.stderr)
|
||||
db.append_worker_stdout(worker_id, json.dumps([], indent=2))
|
||||
|
||||
db.update_worker_status(worker_id, 'completed')
|
||||
return 0
|
||||
|
||||
except Exception as exc:
|
||||
log(f"Search failed: {exc}", file=sys.stderr)
|
||||
import traceback
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
if db:
|
||||
try:
|
||||
db.update_worker_status(worker_id, 'error')
|
||||
except Exception:
|
||||
pass
|
||||
return 1
|
||||
|
||||
finally:
|
||||
if db:
|
||||
try:
|
||||
db.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
CMDLET = Search_File()
|
||||
|
||||
Reference in New Issue
Block a user