This commit is contained in:
nose
2025-12-05 03:42:57 -08:00
parent 5e4df11dbf
commit 5482ee5586
20 changed files with 911 additions and 223 deletions

174
CLI.py
View File

@@ -407,24 +407,27 @@ def _get_cmdlet_names() -> List[str]:
return []
def _import_cmd_module(mod_name: str):
"""Import a cmdlet/native module from cmdlets or cmdnats packages."""
for package in ("cmdlets", "cmdnats", None):
try:
qualified = f"{package}.{mod_name}" if package else mod_name
return import_module(qualified)
except ModuleNotFoundError:
continue
except Exception:
continue
return None
def _get_cmdlet_args(cmd_name: str) -> List[str]:
"""Get list of argument flags for a cmdlet (with - and -- prefixes)."""
try:
# Try to load CMDLET object from the module
mod_name = cmd_name.replace("-", "_")
# Try importing as cmdlet first, then as root-level module
data = None
try:
mod = import_module(f"cmdlets.{mod_name}")
mod = _import_cmd_module(mod_name)
if mod:
data = getattr(mod, "CMDLET", None)
except (ModuleNotFoundError, ImportError):
try:
# Try root-level modules like search_soulseek
mod = import_module(mod_name)
data = getattr(mod, "CMDLET", None)
except (ModuleNotFoundError, ImportError):
pass
if data:
# If CMDLET is an object (not dict), use build_flag_registry if available
@@ -458,25 +461,56 @@ def _get_arg_choices(cmd_name: str, arg_name: str) -> List[str]:
"""Get list of valid choices for a specific cmdlet argument."""
try:
mod_name = cmd_name.replace("-", "_")
try:
mod = import_module(f"cmdlets.{mod_name}")
data = getattr(mod, "CMDLET", None)
if data:
args_list = data.get("args", []) if isinstance(data, dict) else getattr(data, "args", [])
for arg in args_list:
normalized_arg = arg_name.lstrip("-")
# Dynamic storage backends: use current config to enumerate available storages
if normalized_arg == "storage":
try:
from helper.file_storage import FileStorage
storage = FileStorage(_load_cli_config())
backends = storage.list_backends()
if backends:
return backends
except Exception:
pass
# Dynamic search providers
if normalized_arg == "provider":
try:
from helper.search_provider import list_providers
providers = list_providers(_load_cli_config())
available = [name for name, is_ready in providers.items() if is_ready]
provider_choices = sorted(available) if available else sorted(providers.keys())
except Exception:
provider_choices = []
try:
from helper.metadata_search import list_metadata_providers
meta_providers = list_metadata_providers(_load_cli_config())
meta_available = [n for n, ready in meta_providers.items() if ready]
meta_choices = sorted(meta_available) if meta_available else sorted(meta_providers.keys())
except Exception:
meta_choices = []
merged = sorted(set(provider_choices + meta_choices))
if merged:
return merged
mod = _import_cmd_module(mod_name)
data = getattr(mod, "CMDLET", None) if mod else None
if data:
args_list = data.get("args", []) if isinstance(data, dict) else getattr(data, "args", [])
for arg in args_list:
if isinstance(arg, dict):
arg_obj_name = arg.get("name", "")
else:
arg_obj_name = getattr(arg, "name", "")
if arg_obj_name == arg_name:
# Found matching arg, get choices
if isinstance(arg, dict):
arg_obj_name = arg.get("name", "")
return arg.get("choices", [])
else:
arg_obj_name = getattr(arg, "name", "")
if arg_obj_name == arg_name:
# Found matching arg, get choices
if isinstance(arg, dict):
return arg.get("choices", [])
else:
return getattr(arg, "choices", [])
except ModuleNotFoundError:
pass
return getattr(arg, "choices", [])
return []
except Exception:
return []
@@ -1575,43 +1609,40 @@ def _show_cmdlet_list():
from cmdlets import REGISTRY
import os
# Collect unique commands by scanning cmdlet modules
cmdlet_info = {}
cmdlets_dir = os.path.join(os.path.dirname(__file__), "cmdlets")
# Iterate through cmdlet files
for filename in os.listdir(cmdlets_dir):
if filename.endswith(".py") and not filename.startswith("_"):
mod_name = filename[:-3]
try:
mod = import_module(f"cmdlets.{mod_name}")
if hasattr(mod, "CMDLET"):
cmdlet = getattr(mod, "CMDLET")
# Extract name, aliases, and args
if hasattr(cmdlet, "name"):
cmd_name = cmdlet.name
aliases = []
if hasattr(cmdlet, "aliases"):
aliases = cmdlet.aliases
# Extract argument names
arg_names = []
if hasattr(cmdlet, "args"):
for arg in cmdlet.args:
if hasattr(arg, "name"):
arg_names.append(arg.name)
elif isinstance(arg, dict):
arg_names.append(arg.get("name", ""))
# Store info (skip if already seen)
if cmd_name not in cmdlet_info:
cmdlet_info[cmd_name] = {
"aliases": aliases,
"args": arg_names,
}
except Exception:
# If we can't import the module, try to get info from REGISTRY
pass
base_dir = os.path.dirname(__file__)
def _collect_cmdlets_from_dir(folder: str, package: str) -> None:
if not os.path.isdir(folder):
return
for filename in os.listdir(folder):
if filename.endswith(".py") and not filename.startswith("_") and filename != "__init__.py":
mod_name = filename[:-3]
try:
mod = import_module(f"{package}.{mod_name}")
if hasattr(mod, "CMDLET"):
cmdlet = getattr(mod, "CMDLET")
if hasattr(cmdlet, "name"):
cmd_name = cmdlet.name
aliases = getattr(cmdlet, "aliases", []) if hasattr(cmdlet, "aliases") else []
arg_names = []
if hasattr(cmdlet, "args"):
for arg in cmdlet.args:
if hasattr(arg, "name"):
arg_names.append(arg.name)
elif isinstance(arg, dict):
arg_names.append(arg.get("name", ""))
if cmd_name not in cmdlet_info:
cmdlet_info[cmd_name] = {
"aliases": aliases,
"args": arg_names,
}
except Exception:
pass
_collect_cmdlets_from_dir(os.path.join(base_dir, "cmdlets"), "cmdlets")
_collect_cmdlets_from_dir(os.path.join(base_dir, "cmdnats"), "cmdnats")
# Also check root-level cmdlets (search_*, etc)
# Note: search_libgen, search_soulseek, and search_debrid are consolidated into search-file with providers
@@ -1700,14 +1731,11 @@ def _show_cmdlet_help(cmd_name: str):
"""Display help for a cmdlet."""
try:
mod_name = cmd_name.replace("-", "_")
try:
mod = import_module(f"cmdlets.{mod_name}")
data = getattr(mod, "CMDLET", None)
if data:
_print_metadata(cmd_name, data)
return
except ModuleNotFoundError:
pass
mod = _import_cmd_module(mod_name)
data = getattr(mod, "CMDLET", None) if mod else None
if data:
_print_metadata(cmd_name, data)
return
from cmdlets import REGISTRY
cmd_fn = REGISTRY.get(cmd_name)

View File

@@ -91,38 +91,54 @@ def format_cmd_help(cmdlet) -> str:
import os
cmdlet_dir = os.path.dirname(__file__)
for filename in os.listdir(cmdlet_dir):
if (
if not (
filename.endswith(".py")
and not filename.startswith("_")
and filename != "__init__.py"
):
mod_name = filename[:-3]
try:
module = _import_module(f".{mod_name}", __name__)
continue
mod_name = filename[:-3]
# Enforce Powershell-style two-word cmdlet naming (e.g., add_file, get_file)
# Skip native/utility scripts that are not cmdlets (e.g., adjective, worker, matrix, pipe)
if "_" not in mod_name:
continue
try:
module = _import_module(f".{mod_name}", __name__)
# Auto-register based on CMDLET object with exec function
# This allows cmdlets to be fully self-contained in the CMDLET object
if hasattr(module, 'CMDLET'):
cmdlet_obj = module.CMDLET
# Auto-register based on CMDLET object with exec function
# This allows cmdlets to be fully self-contained in the CMDLET object
if hasattr(module, 'CMDLET'):
cmdlet_obj = module.CMDLET
# Get the execution function from the CMDLET object
run_fn = getattr(cmdlet_obj, 'exec', None) if hasattr(cmdlet_obj, 'exec') else None
if callable(run_fn):
# Register main name
if hasattr(cmdlet_obj, 'name') and cmdlet_obj.name:
normalized_name = cmdlet_obj.name.replace('_', '-').lower()
REGISTRY[normalized_name] = run_fn
# Get the execution function from the CMDLET object
run_fn = getattr(cmdlet_obj, 'exec', None) if hasattr(cmdlet_obj, 'exec') else None
if callable(run_fn):
# Register main name
if hasattr(cmdlet_obj, 'name') and cmdlet_obj.name:
normalized_name = cmdlet_obj.name.replace('_', '-').lower()
REGISTRY[normalized_name] = run_fn
# Register all aliases
if hasattr(cmdlet_obj, 'aliases') and cmdlet_obj.aliases:
for alias in cmdlet_obj.aliases:
normalized_alias = alias.replace('_', '-').lower()
REGISTRY[normalized_alias] = run_fn
except Exception as e:
import sys
print(f"Error importing cmdlet '{mod_name}': {e}", file=sys.stderr)
continue
# Register all aliases
if hasattr(cmdlet_obj, 'aliases') and cmdlet_obj.aliases:
for alias in cmdlet_obj.aliases:
normalized_alias = alias.replace('_', '-').lower()
REGISTRY[normalized_alias] = run_fn
except Exception as e:
import sys
print(f"Error importing cmdlet '{mod_name}': {e}", file=sys.stderr)
continue
# Import and register native commands that are not considered cmdlets
try:
from cmdnats import register_native_commands as _register_native_commands
_register_native_commands(REGISTRY)
except Exception:
# Native commands are optional; ignore if unavailable
pass
# Import root-level modules that also register cmdlets
# Note: search_libgen, search_soulseek, and search_debrid are now consolidated into search_provider.py

View File

@@ -267,13 +267,19 @@ def _handle_local_transfer(media_path: Path, destination_root: Path, result: Any
log(f"Warning: Failed to rename file to match title: {e}", file=sys.stderr)
try:
# Ensure filename is the hash when adding to local storage
resolved_hash = _resolve_file_hash(result, sidecar_hash, media_path)
if resolved_hash:
hashed_name = resolved_hash + media_path.suffix
target_path = destination_root / hashed_name
media_path = media_path.rename(target_path) if media_path != target_path else media_path
dest_file = storage["local"].upload(media_path, location=str(destination_root), move=True)
except Exception as exc:
log(f"❌ Failed to move file into {destination_root}: {exc}", file=sys.stderr)
return 1, None
dest_path = Path(dest_file)
file_hash = _resolve_file_hash(result, sidecar_hash, dest_path)
file_hash = _resolve_file_hash(result, resolved_hash, dest_path)
media_kind = _resolve_media_kind(result, dest_path)
# If we have a title tag, keep it. Otherwise, derive from filename.

View File

@@ -18,31 +18,17 @@ from ._shared import Cmdlet, CmdletArg, normalize_hash, parse_tag_arguments, exp
from config import get_local_storage_path
CMDLET = Cmdlet(
name="add-tags",
summary="Add tags to a Hydrus file or write them to a local .tags sidecar.",
usage="add-tags [-hash <sha256>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
args=[
CmdletArg("-hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."),
CmdletArg("tags", type="string", required=True, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax.", variadic=True),
],
details=[
"- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.",
"- Without -hash and when the selection is a local file, tags are written to <file>.tags.",
"- With a Hydrus hash, tags are sent to the 'my tags' service.",
"- Multiple tags can be comma-separated or space-separated.",
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
"- Tags can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"",
"- Use -duplicate to copy EXISTING tag values to new namespaces:",
" Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
"- The source namespace must already exist in the file being tagged.",
"- Target namespaces that already have a value are skipped (not overwritten).",
],
)
def _extract_title_tag(tags: List[str]) -> Optional[str]:
"""Return the value of the first title: tag if present."""
for tag in tags:
if isinstance(tag, str) and tag.lower().startswith("title:"):
value = tag.split(":", 1)[1].strip()
if value:
return value
return None
@register(["add-tag", "add-tags"])
def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
@@ -71,11 +57,30 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr)
return 1
# Get tags from arguments
# Get tags from arguments (or fallback to pipeline payload)
raw_tags = parsed.get("tags", [])
if isinstance(raw_tags, str):
raw_tags = [raw_tags]
# Fallback: if no tags provided explicitly, try to pull from first result payload
if not raw_tags and results:
first = results[0]
payload_tags = None
if isinstance(first, models.PipeObject):
payload_tags = first.extra.get("tags") if isinstance(first.extra, dict) else None
elif isinstance(first, dict):
payload_tags = first.get("tags")
if not payload_tags:
payload_tags = first.get("extra", {}).get("tags") if isinstance(first.get("extra"), dict) else None
# If metadata payload stored tags under nested list, accept directly
if payload_tags is None:
payload_tags = getattr(first, "tags", None)
if payload_tags:
if isinstance(payload_tags, str):
raw_tags = [payload_tags]
elif isinstance(payload_tags, list):
raw_tags = payload_tags
# Handle -list argument (convert to {list} syntax)
list_arg = parsed.get("list")
if list_arg:
@@ -88,6 +93,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
tags_to_add = parse_tag_arguments(raw_tags)
tags_to_add = expand_tag_groups(tags_to_add)
if not tags_to_add:
log("No tags provided to add", file=sys.stderr)
return 1
# Get other flags
hash_override = normalize_hash(parsed.get("hash"))
duplicate_arg = parsed.get("duplicate")
@@ -139,6 +148,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Tags ARE provided - append them to each result and write sidecar files or add to Hydrus
sidecar_count = 0
removed_tags: List[str] = []
for res in results:
# Handle both dict and PipeObject formats
file_path = None
@@ -166,6 +176,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
hydrus_hash = res.get('hydrus_hash') or res.get('hash') or res.get('hash_hex')
if not hydrus_hash and 'extra' in res:
hydrus_hash = res['extra'].get('hydrus_hash') or res['extra'].get('hash') or res['extra'].get('hash_hex')
if not hydrus_hash and file_hash:
hydrus_hash = file_hash
if not storage_source and hydrus_hash and not file_path:
storage_source = 'hydrus'
else:
ctx.emit(res)
continue
@@ -215,6 +229,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Check if this is a namespaced tag (format: "namespace:value")
if ':' in new_tag:
namespace = new_tag.split(':', 1)[0]
# Track removals for Hydrus: delete old tags in same namespace (except identical)
to_remove = [t for t in existing_tags if t.startswith(namespace + ':') and t.lower() != new_tag.lower()]
removed_tags.extend(to_remove)
# Remove any existing tags with the same namespace
existing_tags = [t for t in existing_tags if not (t.startswith(namespace + ':'))]
@@ -227,6 +244,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
res.extra['tags'] = existing_tags
elif isinstance(res, dict):
res['tags'] = existing_tags
# If a title: tag was added, update the in-memory title so downstream display reflects it immediately
title_value = _extract_title_tag(existing_tags)
if title_value:
if isinstance(res, models.PipeObject):
res.title = title_value
elif isinstance(res, dict):
res['title'] = title_value
# Determine where to add tags: Hydrus, local DB, or sidecar
if storage_source and storage_source.lower() == 'hydrus':
@@ -237,6 +262,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"[add_tags] Adding {len(existing_tags)} tag(s) to Hydrus file: {target_hash}", file=sys.stderr)
hydrus_client = hydrus_wrapper.get_client(config)
hydrus_client.add_tags(target_hash, existing_tags, "my tags")
# Delete old namespace tags we replaced (e.g., previous title:)
if removed_tags:
unique_removed = sorted(set(removed_tags))
hydrus_client.delete_tags(target_hash, unique_removed, "my tags")
log(f"[add_tags] ✓ Tags added to Hydrus", file=sys.stderr)
sidecar_count += 1
except Exception as e:
@@ -274,3 +303,29 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"[add_tags] Processed {len(results)} result(s)", file=sys.stderr)
return 0
CMDLET = Cmdlet(
name="add-tags",
summary="Add tags to a Hydrus file or write them to a local .tags sidecar.",
usage="add-tags [-hash <sha256>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
args=[
CmdletArg("-hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."),
CmdletArg("tags", type="string", required=False, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tags from pipeline payload.", variadic=True),
],
details=[
"- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.",
"- Without -hash and when the selection is a local file, tags are written to <file>.tags.",
"- With a Hydrus hash, tags are sent to the 'my tags' service.",
"- Multiple tags can be comma-separated or space-separated.",
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
"- Tags can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"",
"- Use -duplicate to copy EXISTING tag values to new namespaces:",
" Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
"- The source namespace must already exist in the file being tagged.",
"- Target namespaces that already have a value are skipped (not overwritten).",
],
)

View File

@@ -2,6 +2,7 @@ from __future__ import annotations
from typing import Any, Dict, Sequence
import json
import sys
from . import register
import models
@@ -219,6 +220,12 @@ def _process_deletion(tags: list[str], hash_hex: str | None, file_path: str | No
if not tags:
return False
# Safety: block deleting title: without replacement to avoid untitled files
title_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("title:")]
if title_tags:
log("Cannot delete title: tag without replacement. Use add-tag \"title:new title\" instead.", file=sys.stderr)
return False
if not hash_hex and not file_path:
log("Item does not include a hash or file path")

View File

@@ -41,7 +41,8 @@ from config import resolve_output_dir
from metadata import (
fetch_openlibrary_metadata_tags,
format_playlist_entry,
extract_ytdlp_tags
extract_ytdlp_tags,
build_book_tags,
)
# ============================================================================
@@ -1499,12 +1500,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {}
mirrors = metadata.get('mirrors', {})
book_id = metadata.get('book_id', '')
author = metadata.get('author')
isbn_val = metadata.get('isbn')
year_val = metadata.get('year')
if url:
url_entry = {
'url': str(url),
'mirrors': mirrors, # Alternative mirrors for fallback
'book_id': book_id,
'title': title,
'author': author,
'isbn': isbn_val,
'year': year_val,
}
urls_to_download.append(url_entry)
debug(f"[search-result] LibGen: '{title}'")
@@ -1700,12 +1708,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {}
mirrors = metadata.get('mirrors', {})
book_id = metadata.get('book_id', '')
author = metadata.get('author')
isbn_val = metadata.get('isbn')
year_val = metadata.get('year')
if url:
url_entry = {
'url': str(url),
'mirrors': mirrors, # Alternative mirrors for fallback
'book_id': book_id,
'title': title,
'author': author,
'isbn': isbn_val,
'year': year_val,
}
urls_to_download.append(url_entry)
else:
@@ -2177,6 +2192,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
primary_url = url.get('url')
mirrors_dict = url.get('mirrors', {})
book_id = url.get('book_id', '')
title_val = url.get('title')
author_val = url.get('author')
isbn_val = url.get('isbn')
year_val = url.get('year')
if not primary_url:
debug(f"Skipping libgen entry: no primary URL")
@@ -2219,39 +2238,82 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
# Use libgen_service's download_from_mirror for proper libgen handling
from helper.libgen_service import download_from_mirror
# Generate filename from book_id and title
safe_title = "".join(c for c in str(title or "book") if c.isalnum() or c in (' ', '.', '-'))[:100]
file_path = final_output_dir / f"{safe_title}_{book_id}.pdf"
progress_bar = models.ProgressBar()
progress_start = time.time()
last_update = [progress_start]
progress_bytes = [0]
progress_total = [0]
def _libgen_progress(downloaded: int, total: int) -> None:
progress_bytes[0] = downloaded
progress_total[0] = total
now = time.time()
if total > 0 and now - last_update[0] >= 0.5:
percent = (downloaded / total) * 100
elapsed = max(now - progress_start, 1e-6)
speed = downloaded / elapsed if elapsed > 0 else 0
remaining = max(total - downloaded, 0)
eta = remaining / speed if speed > 0 else 0
minutes, seconds = divmod(int(eta), 60)
hours, minutes = divmod(minutes, 60)
eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}"
speed_str = f"{progress_bar.format_bytes(speed)}/s"
progress_line = progress_bar.format_progress(
percent_str=f"{percent:.1f}%",
downloaded=downloaded,
total=total,
speed_str=speed_str,
eta_str=eta_str,
)
debug(f" {progress_line}")
last_update[0] = now
# Attempt download using libgen's native function
success = download_from_mirror(
success, downloaded_path = download_from_mirror(
mirror_url=mirror_url,
output_path=file_path,
log_info=lambda msg: debug(f" {msg}"),
log_error=lambda msg: debug(f"{msg}")
log_error=lambda msg: debug(f"{msg}"),
progress_callback=_libgen_progress,
)
if success and file_path.exists():
final_path = Path(downloaded_path) if downloaded_path else file_path
if success and final_path.exists():
downloaded = progress_bytes[0] or final_path.stat().st_size
elapsed = time.time() - progress_start
avg_speed = downloaded / elapsed if elapsed > 0 else 0
debug(f" ✓ Downloaded in {elapsed:.1f}s at {progress_bar.format_bytes(avg_speed)}/s")
debug(f" ✓ Downloaded successfully from mirror #{mirror_idx}")
successful_mirror = mirror_url
download_succeeded = True
# Emit result for downstream cmdlets
file_hash = _compute_file_hash(file_path)
emit_tags = ['libgen', 'book']
file_hash = _compute_file_hash(final_path)
emit_tags = build_book_tags(
title=title_val or title,
author=author_val,
isbn=isbn_val,
year=year_val,
source='libgen',
extra=[f"libgen_id:{book_id}"] if book_id else None,
)
pipe_obj = create_pipe_object_result(
source='libgen',
identifier=book_id,
file_path=str(file_path),
file_path=str(final_path),
cmdlet_name='download-data',
file_hash=file_hash,
tags=emit_tags,
source_url=successful_mirror
)
pipeline_context.emit(pipe_obj)
downloaded_files.append(str(file_path))
downloaded_files.append(str(final_path))
exit_code = 0
break # Success, stop trying mirrors
@@ -2643,38 +2705,61 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results:
# Let's try to get metadata to make a good filename
filename = "libgen_download.bin"
title_from_results = None
author_from_results = None
year_from_results = None
if libgen_id and results:
title = results[0].get("title", "book")
title_from_results = results[0].get("title")
author_from_results = results[0].get("author")
year_from_results = results[0].get("year")
ext = results[0].get("extension", "pdf")
# Sanitize filename
safe_title = "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).strip()
safe_title = "".join(c for c in (title_from_results or "book") if c.isalnum() or c in (' ', '-', '_')).strip()
filename = f"{safe_title}.{ext}"
elif "series.php" in url:
filename = f"series_{re.search(r'id=(\d+)', url).group(1) if re.search(r'id=(\d+)', url) else 'unknown'}.pdf"
output_path = final_output_dir / filename
if download_from_mirror(url, output_path, log_info=debug, log_error=log):
debug(f"✓ LibGen download successful: {output_path}")
success, downloaded_path = download_from_mirror(
url,
output_path,
log_info=debug,
log_error=log,
)
final_file = Path(downloaded_path) if downloaded_path else output_path
if success and final_file.exists():
debug(f"✓ LibGen download successful: {final_file}")
# Create a result object
info = {
"id": libgen_id or "libgen",
"title": filename,
"webpage_url": url,
"ext": output_path.suffix.lstrip("."),
"ext": final_file.suffix.lstrip("."),
}
emit_tags = build_book_tags(
title=title_from_results or filename,
author=author_from_results,
year=year_from_results,
source="libgen",
extra=[f"libgen_id:{libgen_id}"] if libgen_id else None,
)
file_hash = _compute_file_hash(final_file)
# Emit result
pipeline_context.emit(create_pipe_object_result(
source="libgen",
identifier=libgen_id or "libgen",
file_path=str(output_path),
file_path=str(final_file),
cmdlet_name="download-data",
title=filename,
file_hash=file_hash,
tags=emit_tags,
extra=info
))
downloaded_files.append(str(output_path))
downloaded_files.append(str(final_file))
continue
else:
debug("⚠ LibGen specialized download failed, falling back to generic downloader...")

View File

@@ -316,6 +316,12 @@ def _play_in_mpv(file_url: str, file_title: str, is_stream: bool = False, header
return False
# Backward-compatible alias for modules expecting the old IPC helper name.
def _get_fixed_ipc_pipe() -> str:
"""Return the shared MPV IPC pipe path (compat shim)."""
return get_ipc_pipe_path()
def _handle_search_result(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Handle a file from search-file results using FileStorage backend."""
try:

View File

@@ -13,6 +13,7 @@ from __future__ import annotations
import sys
from helper.logger import log
from helper.metadata_search import get_metadata_provider
import subprocess
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Tuple
@@ -1015,33 +1016,82 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
scrape_url = parsed_args.get("scrape")
scrape_requested = scrape_url is not None
# Handle URL scraping mode
# Handle URL or provider scraping mode
if scrape_requested and scrape_url:
import json as json_module
# Don't print debug message - output should be JSON only for programmatic consumption
# logger.debug(f"Scraping URL: {scrape_url}")
title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url)
if scrape_url.startswith("http://") or scrape_url.startswith("https://"):
# URL scraping (existing behavior)
title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url)
if not tags:
log("No tags extracted from URL", file=sys.stderr)
return 1
output = {
"title": title,
"tags": tags,
"formats": [(label, fmt_id) for label, fmt_id in formats],
"playlist_items": playlist_items,
}
print(json_module.dumps(output, ensure_ascii=False))
return 0
if not tags:
log("No tags extracted from URL", file=sys.stderr)
# Provider scraping (e.g., itunes)
provider = get_metadata_provider(scrape_url, config)
if provider is None:
log(f"Unknown metadata provider: {scrape_url}", file=sys.stderr)
return 1
# Build result object
# result_obj = TagItem("url_scrape", tag_index=0, hash_hex=None, source="url", service_name=None)
# result_obj.title = title or "URL Content"
# Determine query from title on the result or filename
title_hint = get_field(result, "title", None) or get_field(result, "name", None)
if not title_hint:
file_path = get_field(result, "path", None) or get_field(result, "filename", None)
if file_path:
title_hint = Path(str(file_path)).stem
# Emit tags as JSON for pipeline consumption (output should be pure JSON on stdout)
output = {
"title": title,
"tags": tags,
"formats": [(label, fmt_id) for label, fmt_id in formats],
"playlist_items": playlist_items,
}
if not title_hint:
log("No title available to search for metadata", file=sys.stderr)
return 1
# Use print() directly to stdout for JSON output (NOT log() which adds prefix)
# This ensures the output is capturable by the download modal and other pipelines
# The modal filters for lines starting with '{' so the prefix breaks parsing
print(json_module.dumps(output, ensure_ascii=False))
items = provider.search(title_hint, limit=10)
if not items:
log("No metadata results found", file=sys.stderr)
return 1
from result_table import ResultTable
table = ResultTable(f"Metadata: {provider.name}")
table.set_source_command("get-tag", [])
selection_payload = []
hash_for_payload = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash_hex", None))
for idx, item in enumerate(items):
tags = provider.to_tags(item)
row = table.add_row()
row.add_column("Title", item.get("title", ""))
row.add_column("Artist", item.get("artist", ""))
row.add_column("Album", item.get("album", ""))
row.add_column("Year", item.get("year", ""))
payload = {
"tags": tags,
"provider": provider.name,
"title": item.get("title"),
"artist": item.get("artist"),
"album": item.get("album"),
"year": item.get("year"),
"extra": {
"tags": tags,
"provider": provider.name,
"hydrus_hash": hash_for_payload,
"storage_source": get_field(result, "source", None) or get_field(result, "origin", None),
},
"file_hash": hash_for_payload,
}
selection_payload.append(payload)
table.set_row_selection_args(idx, [str(idx + 1)])
ctx.set_last_result_table_overlay(table, selection_payload)
ctx.set_current_stage_table(table)
# Preserve items for @ selection and downstream pipes without emitting duplicates
ctx.set_last_result_items_only(selection_payload)
print(table)
return 0
# If -scrape was requested but no URL, that's an error
@@ -1178,7 +1228,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
CMDLET = Cmdlet(
name="get-tag",
summary="Get tags from Hydrus or local sidecar metadata",
usage="get-tag [-hash <sha256>] [--store <key>] [--emit] [-scrape <url>]",
usage="get-tag [-hash <sha256>] [--store <key>] [--emit] [-scrape <url|provider>]",
aliases=["tags"],
args=[
SharedArgs.HASH,
@@ -1197,7 +1247,7 @@ CMDLET = Cmdlet(
CmdletArg(
name="-scrape",
type="string",
description="Scrape metadata from URL (returns tags as JSON)",
description="Scrape metadata from URL or provider name (returns tags as JSON or table)",
required=False
)
]

39
cmdnats/__init__.py Normal file
View File

@@ -0,0 +1,39 @@
from __future__ import annotations
import os
from importlib import import_module
from typing import Any, Callable, Dict, Sequence
CmdletFn = Callable[[Any, Sequence[str], Dict[str, Any]], int]
def _register_cmdlet_object(cmdlet_obj, registry: Dict[str, CmdletFn]) -> None:
run_fn = getattr(cmdlet_obj, "exec", None) if hasattr(cmdlet_obj, "exec") else None
if not callable(run_fn):
return
if hasattr(cmdlet_obj, "name") and cmdlet_obj.name:
registry[cmdlet_obj.name.replace("_", "-").lower()] = run_fn
if hasattr(cmdlet_obj, "aliases") and getattr(cmdlet_obj, "aliases"):
for alias in cmdlet_obj.aliases:
registry[alias.replace("_", "-").lower()] = run_fn
def register_native_commands(registry: Dict[str, CmdletFn]) -> None:
"""Import native command modules and register their CMDLET exec functions."""
base_dir = os.path.dirname(__file__)
for filename in os.listdir(base_dir):
if not (filename.endswith(".py") and not filename.startswith("_") and filename != "__init__.py"):
continue
mod_name = filename[:-3]
try:
module = import_module(f".{mod_name}", __name__)
cmdlet_obj = getattr(module, "CMDLET", None)
if cmdlet_obj:
_register_cmdlet_object(cmdlet_obj, registry)
except Exception as exc:
import sys
print(f"Error importing native command '{mod_name}': {exc}", file=sys.stderr)
continue

View File

@@ -2,7 +2,7 @@ import json
import os
import sys
from typing import List, Dict, Any, Optional, Sequence
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from helper.logger import log
from result_table import ResultTable
import pipeline as ctx

View File

@@ -1,6 +1,6 @@
from typing import Any, Dict, Sequence, List
import sys
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from helper.logger import log, debug
from result_table import ResultTable
from helper.file_storage import MatrixStorageBackend

View File

@@ -5,7 +5,9 @@ import platform
import socket
import re
import subprocess
from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from urllib.parse import urlparse
from pathlib import Path
from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args
from helper.logger import log, debug
from result_table import ResultTable
from helper.mpv_ipc import get_ipc_pipe_path, MPVIPCClient
@@ -13,7 +15,7 @@ import pipeline as ctx
from helper.download import is_url_supported_by_ytdlp
from helper.local_library import LocalLibrarySearchOptimizer
from config import get_local_storage_path
from config import get_local_storage_path, get_hydrus_access_key, get_hydrus_url
from hydrus_health_check import get_cookies_file_path
def _send_ipc_command(command: Dict[str, Any], silent: bool = False) -> Optional[Any]:
@@ -72,6 +74,150 @@ def _extract_title_from_item(item: Dict[str, Any]) -> str:
return title or filename or "Unknown"
def _extract_target_from_memory_uri(text: str) -> Optional[str]:
"""Extract the real target URL/path from a memory:// M3U payload."""
if not isinstance(text, str) or not text.startswith("memory://"):
return None
for line in text.splitlines():
line = line.strip()
if not line or line.startswith('#') or line.startswith('memory://'):
continue
return line
return None
def _infer_store_from_playlist_item(item: Dict[str, Any]) -> str:
"""Infer a friendly store label from an MPV playlist entry."""
name = item.get("filename") if isinstance(item, dict) else None
target = str(name or "")
# Unwrap memory:// M3U wrapper
memory_target = _extract_target_from_memory_uri(target)
if memory_target:
target = memory_target
lower = target.lower()
if lower.startswith("magnet:"):
return "magnet"
if lower.startswith("hydrus://"):
return "hydrus"
# Windows / UNC paths
if re.match(r"^[a-z]:[\\/]", target, flags=re.IGNORECASE) or target.startswith("\\\\"):
return "local"
# file:// URLs
if lower.startswith("file://"):
return "local"
parsed = urlparse(target)
host = (parsed.netloc or "").lower()
path = parsed.path or ""
if not host:
return ""
host_no_port = host.split(":", 1)[0]
host_stripped = host_no_port[4:] if host_no_port.startswith("www.") else host_no_port
if "youtube" in host_stripped or "youtu.be" in target.lower():
return "youtube"
if "soundcloud" in host_stripped:
return "soundcloud"
if "bandcamp" in host_stripped:
return "bandcamp"
if "get_files" in path or host_stripped in {"127.0.0.1", "localhost"}:
return "hydrus"
if re.match(r"^\d+\.\d+\.\d+\.\d+$", host_stripped) and "get_files" in path:
return "hydrus"
parts = host_stripped.split('.')
if len(parts) >= 2:
return parts[-2] or host_stripped
return host_stripped
def _format_playlist_location(name: str, max_len: int = 48) -> str:
"""Format playlist filename/URL for display while keeping backend untouched."""
target = name or ""
memory_target = _extract_target_from_memory_uri(target)
if memory_target:
target = memory_target
lower = target.lower()
# Local paths: show basename only
if re.match(r"^[a-z]:[\\/]", target, flags=re.IGNORECASE) or target.startswith("\\\\"):
target = Path(target).name
elif lower.startswith("file://"):
parsed = urlparse(target)
target = Path(parsed.path or "").name or target
else:
parsed = urlparse(target)
host = parsed.netloc or ""
if host:
host_no_port = host.split(":", 1)[0]
host_no_port = host_no_port[4:] if host_no_port.startswith("www.") else host_no_port
tail = parsed.path.split('/')[-1] if parsed.path else ""
if tail:
target = f"{host_no_port}/{tail}"
else:
target = host_no_port
if len(target) > max_len:
return target[: max_len - 3] + "..."
return target
def _build_hydrus_header(config: Dict[str, Any]) -> Optional[str]:
"""Return header string for Hydrus auth if configured."""
try:
key = get_hydrus_access_key(config)
except Exception:
key = None
if not key:
return None
return f"Hydrus-Client-API-Access-Key: {key}"
def _build_ytdl_options(config: Optional[Dict[str, Any]], hydrus_header: Optional[str]) -> Optional[str]:
"""Compose ytdl-raw-options string including cookies and optional Hydrus header."""
opts: List[str] = []
try:
cookies_path = get_cookies_file_path()
except Exception:
cookies_path = None
if cookies_path:
opts.append(f"cookies={cookies_path.replace('\\', '/')}")
else:
opts.append("cookies-from-browser=chrome")
if hydrus_header:
opts.append(f"add-header={hydrus_header}")
return ",".join(opts) if opts else None
def _is_hydrus_target(target: str, hydrus_url: Optional[str]) -> bool:
if not target:
return False
lower = target.lower()
if "hydrus://" in lower:
return True
parsed = urlparse(target)
host = (parsed.netloc or "").lower()
path = parsed.path or ""
if hydrus_url:
try:
hydrus_host = urlparse(hydrus_url).netloc.lower()
if hydrus_host and hydrus_host in host:
return True
except Exception:
pass
if "get_files" in path or "file?hash=" in path:
return True
if re.match(r"^\d+\.\d+\.\d+\.\d+$", host) and "get_files" in path:
return True
return False
def _ensure_ytdl_cookies() -> None:
"""Ensure yt-dlp options are set correctly for this session."""
from pathlib import Path
@@ -127,8 +273,7 @@ def _monitor_mpv_logs(duration: float = 3.0) -> None:
client.disconnect()
except Exception:
pass
def _queue_items(items: List[Any], clear_first: bool = False) -> bool:
def _queue_items(items: List[Any], clear_first: bool = False, config: Optional[Dict[str, Any]] = None) -> bool:
"""Queue items to MPV, starting it if necessary.
Args:
@@ -141,6 +286,14 @@ def _queue_items(items: List[Any], clear_first: bool = False) -> bool:
# Just verify cookies are configured, don't try to set via IPC
_ensure_ytdl_cookies()
hydrus_header = _build_hydrus_header(config or {})
ytdl_opts = _build_ytdl_options(config, hydrus_header)
hydrus_url = None
try:
hydrus_url = get_hydrus_url(config) if config is not None else None
except Exception:
hydrus_url = None
for i, item in enumerate(items):
# Extract URL/Path
target = None
@@ -175,13 +328,21 @@ def _queue_items(items: List[Any], clear_first: bool = False) -> bool:
if clear_first and i == 0:
mode = "replace"
# If this is a Hydrus target, set header property and yt-dlp headers before loading
if hydrus_header and _is_hydrus_target(target_to_send, hydrus_url):
header_cmd = {"command": ["set_property", "http-header-fields", hydrus_header], "request_id": 199}
_send_ipc_command(header_cmd, silent=True)
if ytdl_opts:
ytdl_cmd = {"command": ["set_property", "ytdl-raw-options", ytdl_opts], "request_id": 197}
_send_ipc_command(ytdl_cmd, silent=True)
cmd = {"command": ["loadfile", target_to_send, mode], "request_id": 200}
resp = _send_ipc_command(cmd)
if resp is None:
# MPV not running (or died)
# Start MPV with remaining items
_start_mpv(items[i:])
_start_mpv(items[i:], config=config)
return True
elif resp.get("error") == "success":
# Also set property for good measure
@@ -448,7 +609,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
elif isinstance(result, dict):
items_to_add = [result]
if _queue_items(items_to_add):
if _queue_items(items_to_add, config=config):
mpv_started = True
if items_to_add:
@@ -472,7 +633,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 0
else:
debug("MPV is not running. Starting new instance...")
_start_mpv([])
_start_mpv([], config=config)
return 0
if not items:
@@ -491,6 +652,13 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
item = items[idx]
title = _extract_title_from_item(item)
filename = item.get("filename", "") if isinstance(item, dict) else ""
hydrus_header = _build_hydrus_header(config or {})
hydrus_url = None
try:
hydrus_url = get_hydrus_url(config) if config is not None else None
except Exception:
hydrus_url = None
if clear_mode:
# Remove item
@@ -507,6 +675,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 1
else:
# Play item
if hydrus_header and _is_hydrus_target(filename, hydrus_url):
header_cmd = {"command": ["set_property", "http-header-fields", hydrus_header], "request_id": 198}
_send_ipc_command(header_cmd, silent=True)
cmd = {"command": ["playlist-play-index", idx], "request_id": 102}
resp = _send_ipc_command(cmd)
if resp and resp.get("error") == "success":
@@ -544,6 +715,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
for i, item in enumerate(items):
is_current = item.get("current", False)
title = _extract_title_from_item(item)
store = _infer_store_from_playlist_item(item)
filename = item.get("filename", "") if isinstance(item, dict) else ""
display_loc = _format_playlist_location(filename)
# Truncate if too long
if len(title) > 80:
@@ -551,7 +725,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
row = table.add_row()
row.add_column("Current", "*" if is_current else "")
row.add_column("Store", store)
row.add_column("Title", title)
row.add_column("Filename", display_loc)
table.set_row_selection_args(i, [str(i + 1)])
@@ -565,7 +741,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
return 0
def _start_mpv(items: List[Any]) -> None:
def _start_mpv(items: List[Any], config: Optional[Dict[str, Any]] = None) -> None:
"""Start MPV with a list of items."""
import subprocess
import time as _time_module
@@ -584,20 +760,18 @@ def _start_mpv(items: List[Any]) -> None:
# Start MPV in idle mode with IPC server
cmd = ['mpv', f'--input-ipc-server={ipc_pipe}', '--idle', '--force-window']
cmd.append('--ytdl-format=bestvideo[height<=?1080]+bestaudio/best[height<=?1080]')
# Use cookies.txt if available, otherwise fallback to browser cookies
hydrus_header = _build_hydrus_header(config or {})
ytdl_opts = _build_ytdl_options(config, hydrus_header)
cookies_path = get_cookies_file_path()
if cookies_path:
# yt-dlp on Windows needs forward slashes OR properly escaped backslashes
# Using forward slashes is more reliable across systems
cookies_path_normalized = cookies_path.replace('\\', '/')
debug(f"Starting MPV with cookies file: {cookies_path_normalized}")
# yt-dlp expects the cookies option with file path
cmd.append(f'--ytdl-raw-options=cookies={cookies_path_normalized}')
debug(f"Starting MPV with cookies file: {cookies_path.replace('\\', '/')}")
else:
# Use cookies from browser (Chrome) to handle age-restricted content
debug("Starting MPV with browser cookies: chrome")
cmd.append('--ytdl-raw-options=cookies-from-browser=chrome')
if ytdl_opts:
cmd.append(f'--ytdl-raw-options={ytdl_opts}')
try:
kwargs = {}
@@ -607,6 +781,8 @@ def _start_mpv(items: List[Any]) -> None:
# Log the complete MPV command being executed
debug(f"DEBUG: Full MPV command: {' '.join(cmd)}")
if hydrus_header:
cmd.append(f'--http-header-fields={hydrus_header}')
subprocess.Popen(cmd, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs)
debug(f"Started MPV process")
@@ -625,7 +801,7 @@ def _start_mpv(items: List[Any]) -> None:
# Queue items via IPC
if items:
_queue_items(items)
_queue_items(items, config=config)
except Exception as e:
debug(f"Error starting MPV: {e}", file=sys.stderr)

View File

@@ -6,8 +6,8 @@ import json
import sys
from datetime import datetime, timezone
from . import register
from ._shared import Cmdlet, CmdletArg
from cmdlets import register
from cmdlets._shared import Cmdlet, CmdletArg
import pipeline as ctx
from helper.logger import log
from config import get_local_storage_path

View File

@@ -1397,6 +1397,10 @@ class FileStorage:
log(f"Registered remote storage backend: {name} -> {url}{auth_status}", file=sys.stderr)
except Exception as e:
log(f"Failed to register remote storage '{name}': {e}", file=sys.stderr)
def list_backends(self) -> list[str]:
"""Return available backend keys for autocomplete and validation."""
return sorted(self._backends.keys())
def __getitem__(self, backend_name: str) -> StorageBackend:
"""Get a storage backend by name.

View File

@@ -9,8 +9,8 @@ import logging
import re
import requests
from pathlib import Path
from typing import Any, Callable, Dict, List, Optional
from urllib.parse import quote, urljoin
from typing import Any, Callable, Dict, List, Optional, Tuple
from urllib.parse import quote, urljoin, urlparse, unquote
# Optional dependencies
try:
@@ -405,6 +405,61 @@ def _resolve_download_url(
return None
def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Optional[str]:
"""Guess the file extension from headers or the download URL."""
content_disposition = headers.get("content-disposition", "")
if content_disposition:
match = re.search(r'filename\*?=(?:UTF-8\'\'|"?)([^";]+)', content_disposition, flags=re.IGNORECASE)
if match:
filename = unquote(match.group(1).strip('"'))
suffix = Path(filename).suffix
if suffix:
return suffix.lstrip('.')
parsed = urlparse(download_url)
suffix = Path(parsed.path).suffix
if suffix:
return suffix.lstrip('.')
content_type = headers.get('content-type', '').lower()
mime_map = {
'application/pdf': 'pdf',
'application/epub+zip': 'epub',
'application/x-mobipocket-ebook': 'mobi',
'application/x-cbr': 'cbr',
'application/x-cbz': 'cbz',
'application/zip': 'zip',
}
for mime, ext in mime_map.items():
if mime in content_type:
return ext
return None
def _apply_extension(path: Path, extension: Optional[str]) -> Path:
"""Rename the path to match the detected extension, if needed."""
if not extension:
return path
suffix = extension if extension.startswith('.') else f'.{extension}'
if path.suffix.lower() == suffix.lower():
return path
candidate = path.with_suffix(suffix)
base_stem = path.stem
counter = 1
while candidate.exists() and counter < 100:
candidate = path.with_name(f"{base_stem}({counter}){suffix}")
counter += 1
try:
path.replace(candidate)
return candidate
except Exception:
return path
def download_from_mirror(
mirror_url: str,
output_path: Path,
@@ -412,8 +467,9 @@ def download_from_mirror(
log_info: LogFn = None,
log_error: ErrorFn = None,
session: Optional[requests.Session] = None,
) -> bool:
"""Download file from a LibGen mirror URL."""
progress_callback: Optional[Callable[[int, int], None]] = None,
) -> Tuple[bool, Optional[Path]]:
"""Download file from a LibGen mirror URL with optional progress tracking."""
session = session or requests.Session()
output_path = Path(output_path)
output_path.parent.mkdir(parents=True, exist_ok=True)
@@ -425,33 +481,43 @@ def download_from_mirror(
if not download_url:
_call(log_error, "[download] Could not find direct download link")
return False
return False, None
_call(log_info, f"[download] Downloading from: {download_url}")
# Download the actual file
downloaded = 0
total_size = 0
headers: Dict[str, str] = {}
with session.get(download_url, stream=True, timeout=60) as r:
r.raise_for_status()
headers = dict(r.headers)
# Verify it's not HTML (error page)
ct = r.headers.get("content-type", "").lower()
ct = headers.get("content-type", "").lower()
if "text/html" in ct:
_call(log_error, "[download] Final URL returned HTML, not a file.")
return False
return False, None
total_size = int(r.headers.get("content-length", 0))
downloaded = 0
total_size = int(headers.get("content-length", 0) or 0)
with open(output_path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
downloaded += len(chunk)
# Optional: progress logging
_call(log_info, f"[download] Saved to {output_path}")
return True
if progress_callback:
progress_callback(downloaded, total_size)
final_extension = _guess_filename_extension(download_url, headers)
final_path = _apply_extension(output_path, final_extension)
if progress_callback and total_size > 0:
progress_callback(downloaded, total_size)
_call(log_info, f"[download] Saved to {final_path}")
return True, final_path
except Exception as e:
_call(log_error, f"[download] Download failed: {e}")
return False
return False, None

View File

@@ -38,6 +38,9 @@ def log(*args, **kwargs) -> None:
Example:
log("Upload started") # Output: [add_file.run] Upload started
"""
# When debug is disabled, suppress the automatic prefix for cleaner user-facing output.
add_prefix = _DEBUG_ENABLED
# Get the calling frame
frame = inspect.currentframe()
if frame is None:
@@ -60,11 +63,11 @@ def log(*args, **kwargs) -> None:
if 'file' not in kwargs:
kwargs['file'] = sys.stdout
# Build prefix
prefix = f"[{file_name}.{func_name}]"
# Print with prefix
print(prefix, *args, **kwargs)
if add_prefix:
prefix = f"[{file_name}.{func_name}]"
print(prefix, *args, **kwargs)
else:
print(*args, **kwargs)
finally:
del frame
del caller_frame

105
helper/metadata_search.py Normal file
View File

@@ -0,0 +1,105 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional, Type
import requests
import sys
from helper.logger import log, debug
class MetadataProvider(ABC):
"""Base class for metadata providers (music, movies, books, etc.)."""
def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
self.config = config or {}
@property
def name(self) -> str:
return self.__class__.__name__.replace("Provider", "").lower()
@abstractmethod
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
"""Return a list of candidate metadata records."""
def to_tags(self, item: Dict[str, Any]) -> List[str]:
"""Convert a result item into a list of tags."""
tags: List[str] = []
title = item.get("title")
artist = item.get("artist")
album = item.get("album")
year = item.get("year")
if title:
tags.append(f"title:{title}")
if artist:
tags.append(f"artist:{artist}")
if album:
tags.append(f"album:{album}")
if year:
tags.append(f"year:{year}")
tags.append(f"source:{self.name}")
return tags
class ITunesProvider(MetadataProvider):
"""Metadata provider using the iTunes Search API."""
def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]:
params = {"term": query, "media": "music", "entity": "song", "limit": limit}
try:
resp = requests.get("https://itunes.apple.com/search", params=params, timeout=10)
resp.raise_for_status()
results = resp.json().get("results", [])
except Exception as exc:
log(f"iTunes search failed: {exc}", file=sys.stderr)
return []
items: List[Dict[str, Any]] = []
for r in results:
item = {
"title": r.get("trackName"),
"artist": r.get("artistName"),
"album": r.get("collectionName"),
"year": str(r.get("releaseDate", ""))[:4],
"provider": self.name,
"raw": r,
}
items.append(item)
debug(f"iTunes returned {len(items)} items for '{query}'")
return items
# Registry ---------------------------------------------------------------
_METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = {
"itunes": ITunesProvider,
}
def register_provider(name: str, provider_cls: Type[MetadataProvider]) -> None:
_METADATA_PROVIDERS[name.lower()] = provider_cls
def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]:
availability: Dict[str, bool] = {}
for name, cls in _METADATA_PROVIDERS.items():
try:
provider = cls(config)
# Basic availability check: perform lightweight validation if defined
availability[name] = True
except Exception:
availability[name] = False
return availability
def get_metadata_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[MetadataProvider]:
cls = _METADATA_PROVIDERS.get(name.lower())
if not cls:
return None
try:
return cls(config)
except Exception as exc:
log(f"Provider init failed for '{name}': {exc}", file=sys.stderr)
return None

View File

@@ -73,7 +73,12 @@ class SearchResult:
self.columns = []
def to_dict(self) -> Dict[str, Any]:
"""Convert to dictionary for JSON serialization."""
"""Convert to dictionary for JSON serialization.
Note: full_metadata is excluded from dict to keep response size small
until the result is actually selected/used. This speeds up initial
search result display and piping.
"""
data = {
"origin": self.origin,
"title": self.title,
@@ -83,10 +88,10 @@ class SearchResult:
"media_kind": self.media_kind,
"size_bytes": self.size_bytes,
"tags": list(self.tags) if self.tags else [],
"full_metadata": self.full_metadata,
}
if self.columns:
data["columns"] = list(self.columns)
# Note: full_metadata is NOT included in dict to keep payload small
return data
@@ -377,6 +382,7 @@ class LibGenProvider(SearchProvider):
if isbn:
annotations.append(f"ISBN: {isbn}")
# Store full book data without mirrors in metadata to avoid serialization overhead
search_results.append(SearchResult(
origin="libgen",
title=title,
@@ -391,7 +397,8 @@ class LibGenProvider(SearchProvider):
"year": year,
"isbn": isbn,
"filesize": filesize,
"mirrors": book.get("mirrors", {}),
# Exclude mirrors dict from metadata to reduce serialization overhead
# Mirrors can be re-fetched if the result is selected
"book_id": book.get("book_id", ""),
"md5": book.get("md5", ""),
},

View File

@@ -450,30 +450,31 @@ class UnifiedBookDownloader:
if download_func is None:
return False, "Download function not available"
download_callable = cast(Callable[[str, str], bool], download_func)
download_callable = cast(Callable[[str, str], Tuple[bool, Optional[Path]]], download_func)
def download_wrapper():
return download_callable(mirror_url, str(output_path))
# Download (in thread)
try:
success = await loop.run_in_executor(None, download_wrapper)
success, downloaded_path = await loop.run_in_executor(None, download_wrapper)
if success:
dest_path = Path(downloaded_path) if downloaded_path else output_path
# Validate downloaded file is not HTML (common Libgen issue)
if output_path.exists():
if dest_path.exists():
try:
with open(output_path, 'rb') as f:
with open(dest_path, 'rb') as f:
file_start = f.read(1024).decode('utf-8', errors='ignore').lower()
if '<!doctype' in file_start or '<html' in file_start:
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} returned HTML instead of file, trying next mirror...")
output_path.unlink() # Delete the HTML file
dest_path.unlink() # Delete the HTML file
continue
except Exception as e:
logger.debug(f"[UnifiedBookDownloader] Could not validate file content: {e}")
logger.info(f"[UnifiedBookDownloader] Successfully downloaded from mirror {idx} to: {output_path}")
return True, str(output_path)
logger.info(f"[UnifiedBookDownloader] Successfully downloaded from mirror {idx} to: {dest_path}")
return True, str(dest_path)
else:
logger.warning(f"[UnifiedBookDownloader] Mirror {idx} download failed, trying next...")
except Exception as e:

View File

@@ -3015,6 +3015,40 @@ def process_tags_from_string(tags_str: str, expand_lists: bool = False) -> Set[s
return tags_set
def build_book_tags(
*,
title: Optional[str] = None,
author: Optional[str] = None,
isbn: Optional[str] = None,
year: Optional[str] = None,
source: Optional[str] = None,
extra: Optional[Sequence[str]] = None,
) -> List[str]:
"""Build consistent book tags for downloads (LibGen, OpenLibrary, etc.)."""
tags: List[str] = ["book"]
def _add(tag: Optional[str]) -> None:
if tag and isinstance(tag, str) and tag.strip():
tags.append(tag.strip())
_add(source)
if title:
_add(f"title:{title}")
if author:
_add(f"author:{author}")
if isbn:
_add(f"isbn:{isbn}")
if year:
_add(f"year:{year}")
if extra:
for tag in extra:
_add(tag)
# Deduplicate while preserving order
deduped = list(dict.fromkeys(tags))
return deduped
def fetch_openlibrary_metadata_tags(isbn: Optional[str] = None, olid: Optional[str] = None) -> List[str]:
"""Fetch book metadata from OpenLibrary and return as tags.