diff --git a/CLI.py b/CLI.py index eeb74fb..a82374c 100644 --- a/CLI.py +++ b/CLI.py @@ -407,24 +407,27 @@ def _get_cmdlet_names() -> List[str]: return [] +def _import_cmd_module(mod_name: str): + """Import a cmdlet/native module from cmdlets or cmdnats packages.""" + for package in ("cmdlets", "cmdnats", None): + try: + qualified = f"{package}.{mod_name}" if package else mod_name + return import_module(qualified) + except ModuleNotFoundError: + continue + except Exception: + continue + return None + + def _get_cmdlet_args(cmd_name: str) -> List[str]: """Get list of argument flags for a cmdlet (with - and -- prefixes).""" try: - # Try to load CMDLET object from the module mod_name = cmd_name.replace("-", "_") - - # Try importing as cmdlet first, then as root-level module data = None - try: - mod = import_module(f"cmdlets.{mod_name}") + mod = _import_cmd_module(mod_name) + if mod: data = getattr(mod, "CMDLET", None) - except (ModuleNotFoundError, ImportError): - try: - # Try root-level modules like search_soulseek - mod = import_module(mod_name) - data = getattr(mod, "CMDLET", None) - except (ModuleNotFoundError, ImportError): - pass if data: # If CMDLET is an object (not dict), use build_flag_registry if available @@ -458,25 +461,56 @@ def _get_arg_choices(cmd_name: str, arg_name: str) -> List[str]: """Get list of valid choices for a specific cmdlet argument.""" try: mod_name = cmd_name.replace("-", "_") - try: - mod = import_module(f"cmdlets.{mod_name}") - data = getattr(mod, "CMDLET", None) - if data: - args_list = data.get("args", []) if isinstance(data, dict) else getattr(data, "args", []) - for arg in args_list: + normalized_arg = arg_name.lstrip("-") + + # Dynamic storage backends: use current config to enumerate available storages + if normalized_arg == "storage": + try: + from helper.file_storage import FileStorage + storage = FileStorage(_load_cli_config()) + backends = storage.list_backends() + if backends: + return backends + except Exception: + pass + + # Dynamic search providers + if normalized_arg == "provider": + try: + from helper.search_provider import list_providers + providers = list_providers(_load_cli_config()) + available = [name for name, is_ready in providers.items() if is_ready] + provider_choices = sorted(available) if available else sorted(providers.keys()) + except Exception: + provider_choices = [] + + try: + from helper.metadata_search import list_metadata_providers + meta_providers = list_metadata_providers(_load_cli_config()) + meta_available = [n for n, ready in meta_providers.items() if ready] + meta_choices = sorted(meta_available) if meta_available else sorted(meta_providers.keys()) + except Exception: + meta_choices = [] + + merged = sorted(set(provider_choices + meta_choices)) + if merged: + return merged + mod = _import_cmd_module(mod_name) + data = getattr(mod, "CMDLET", None) if mod else None + if data: + args_list = data.get("args", []) if isinstance(data, dict) else getattr(data, "args", []) + for arg in args_list: + if isinstance(arg, dict): + arg_obj_name = arg.get("name", "") + else: + arg_obj_name = getattr(arg, "name", "") + + if arg_obj_name == arg_name: + # Found matching arg, get choices if isinstance(arg, dict): - arg_obj_name = arg.get("name", "") + return arg.get("choices", []) else: - arg_obj_name = getattr(arg, "name", "") - - if arg_obj_name == arg_name: - # Found matching arg, get choices - if isinstance(arg, dict): - return arg.get("choices", []) - else: - return getattr(arg, "choices", []) - except ModuleNotFoundError: - pass + return getattr(arg, "choices", []) return [] except Exception: return [] @@ -1575,43 +1609,40 @@ def _show_cmdlet_list(): from cmdlets import REGISTRY import os - # Collect unique commands by scanning cmdlet modules cmdlet_info = {} - cmdlets_dir = os.path.join(os.path.dirname(__file__), "cmdlets") - - # Iterate through cmdlet files - for filename in os.listdir(cmdlets_dir): - if filename.endswith(".py") and not filename.startswith("_"): - mod_name = filename[:-3] - try: - mod = import_module(f"cmdlets.{mod_name}") - if hasattr(mod, "CMDLET"): - cmdlet = getattr(mod, "CMDLET") - # Extract name, aliases, and args - if hasattr(cmdlet, "name"): - cmd_name = cmdlet.name - aliases = [] - if hasattr(cmdlet, "aliases"): - aliases = cmdlet.aliases - - # Extract argument names - arg_names = [] - if hasattr(cmdlet, "args"): - for arg in cmdlet.args: - if hasattr(arg, "name"): - arg_names.append(arg.name) - elif isinstance(arg, dict): - arg_names.append(arg.get("name", "")) - - # Store info (skip if already seen) - if cmd_name not in cmdlet_info: - cmdlet_info[cmd_name] = { - "aliases": aliases, - "args": arg_names, - } - except Exception: - # If we can't import the module, try to get info from REGISTRY - pass + base_dir = os.path.dirname(__file__) + + def _collect_cmdlets_from_dir(folder: str, package: str) -> None: + if not os.path.isdir(folder): + return + for filename in os.listdir(folder): + if filename.endswith(".py") and not filename.startswith("_") and filename != "__init__.py": + mod_name = filename[:-3] + try: + mod = import_module(f"{package}.{mod_name}") + if hasattr(mod, "CMDLET"): + cmdlet = getattr(mod, "CMDLET") + if hasattr(cmdlet, "name"): + cmd_name = cmdlet.name + aliases = getattr(cmdlet, "aliases", []) if hasattr(cmdlet, "aliases") else [] + + arg_names = [] + if hasattr(cmdlet, "args"): + for arg in cmdlet.args: + if hasattr(arg, "name"): + arg_names.append(arg.name) + elif isinstance(arg, dict): + arg_names.append(arg.get("name", "")) + if cmd_name not in cmdlet_info: + cmdlet_info[cmd_name] = { + "aliases": aliases, + "args": arg_names, + } + except Exception: + pass + + _collect_cmdlets_from_dir(os.path.join(base_dir, "cmdlets"), "cmdlets") + _collect_cmdlets_from_dir(os.path.join(base_dir, "cmdnats"), "cmdnats") # Also check root-level cmdlets (search_*, etc) # Note: search_libgen, search_soulseek, and search_debrid are consolidated into search-file with providers @@ -1700,14 +1731,11 @@ def _show_cmdlet_help(cmd_name: str): """Display help for a cmdlet.""" try: mod_name = cmd_name.replace("-", "_") - try: - mod = import_module(f"cmdlets.{mod_name}") - data = getattr(mod, "CMDLET", None) - if data: - _print_metadata(cmd_name, data) - return - except ModuleNotFoundError: - pass + mod = _import_cmd_module(mod_name) + data = getattr(mod, "CMDLET", None) if mod else None + if data: + _print_metadata(cmd_name, data) + return from cmdlets import REGISTRY cmd_fn = REGISTRY.get(cmd_name) diff --git a/cmdlets/__init__.py b/cmdlets/__init__.py index 904de25..950adba 100644 --- a/cmdlets/__init__.py +++ b/cmdlets/__init__.py @@ -91,38 +91,54 @@ def format_cmd_help(cmdlet) -> str: import os cmdlet_dir = os.path.dirname(__file__) for filename in os.listdir(cmdlet_dir): - if ( + if not ( filename.endswith(".py") and not filename.startswith("_") and filename != "__init__.py" ): - mod_name = filename[:-3] - try: - module = _import_module(f".{mod_name}", __name__) + continue + + mod_name = filename[:-3] + + # Enforce Powershell-style two-word cmdlet naming (e.g., add_file, get_file) + # Skip native/utility scripts that are not cmdlets (e.g., adjective, worker, matrix, pipe) + if "_" not in mod_name: + continue + + try: + module = _import_module(f".{mod_name}", __name__) + + # Auto-register based on CMDLET object with exec function + # This allows cmdlets to be fully self-contained in the CMDLET object + if hasattr(module, 'CMDLET'): + cmdlet_obj = module.CMDLET - # Auto-register based on CMDLET object with exec function - # This allows cmdlets to be fully self-contained in the CMDLET object - if hasattr(module, 'CMDLET'): - cmdlet_obj = module.CMDLET + # Get the execution function from the CMDLET object + run_fn = getattr(cmdlet_obj, 'exec', None) if hasattr(cmdlet_obj, 'exec') else None + + if callable(run_fn): + # Register main name + if hasattr(cmdlet_obj, 'name') and cmdlet_obj.name: + normalized_name = cmdlet_obj.name.replace('_', '-').lower() + REGISTRY[normalized_name] = run_fn - # Get the execution function from the CMDLET object - run_fn = getattr(cmdlet_obj, 'exec', None) if hasattr(cmdlet_obj, 'exec') else None - - if callable(run_fn): - # Register main name - if hasattr(cmdlet_obj, 'name') and cmdlet_obj.name: - normalized_name = cmdlet_obj.name.replace('_', '-').lower() - REGISTRY[normalized_name] = run_fn - - # Register all aliases - if hasattr(cmdlet_obj, 'aliases') and cmdlet_obj.aliases: - for alias in cmdlet_obj.aliases: - normalized_alias = alias.replace('_', '-').lower() - REGISTRY[normalized_alias] = run_fn - except Exception as e: - import sys - print(f"Error importing cmdlet '{mod_name}': {e}", file=sys.stderr) - continue + # Register all aliases + if hasattr(cmdlet_obj, 'aliases') and cmdlet_obj.aliases: + for alias in cmdlet_obj.aliases: + normalized_alias = alias.replace('_', '-').lower() + REGISTRY[normalized_alias] = run_fn + except Exception as e: + import sys + print(f"Error importing cmdlet '{mod_name}': {e}", file=sys.stderr) + continue + +# Import and register native commands that are not considered cmdlets +try: + from cmdnats import register_native_commands as _register_native_commands + _register_native_commands(REGISTRY) +except Exception: + # Native commands are optional; ignore if unavailable + pass # Import root-level modules that also register cmdlets # Note: search_libgen, search_soulseek, and search_debrid are now consolidated into search_provider.py diff --git a/cmdlets/add_file.py b/cmdlets/add_file.py index 3f56ef1..9e92895 100644 --- a/cmdlets/add_file.py +++ b/cmdlets/add_file.py @@ -267,13 +267,19 @@ def _handle_local_transfer(media_path: Path, destination_root: Path, result: Any log(f"Warning: Failed to rename file to match title: {e}", file=sys.stderr) try: + # Ensure filename is the hash when adding to local storage + resolved_hash = _resolve_file_hash(result, sidecar_hash, media_path) + if resolved_hash: + hashed_name = resolved_hash + media_path.suffix + target_path = destination_root / hashed_name + media_path = media_path.rename(target_path) if media_path != target_path else media_path dest_file = storage["local"].upload(media_path, location=str(destination_root), move=True) except Exception as exc: log(f"❌ Failed to move file into {destination_root}: {exc}", file=sys.stderr) return 1, None dest_path = Path(dest_file) - file_hash = _resolve_file_hash(result, sidecar_hash, dest_path) + file_hash = _resolve_file_hash(result, resolved_hash, dest_path) media_kind = _resolve_media_kind(result, dest_path) # If we have a title tag, keep it. Otherwise, derive from filename. diff --git a/cmdlets/add_tags.py b/cmdlets/add_tags.py index 0709d55..f74b43d 100644 --- a/cmdlets/add_tags.py +++ b/cmdlets/add_tags.py @@ -18,31 +18,17 @@ from ._shared import Cmdlet, CmdletArg, normalize_hash, parse_tag_arguments, exp from config import get_local_storage_path -CMDLET = Cmdlet( - name="add-tags", - summary="Add tags to a Hydrus file or write them to a local .tags sidecar.", - usage="add-tags [-hash ] [-duplicate ] [-list [,...]] [--all] [,...]", - args=[ - CmdletArg("-hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), - CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"), - CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."), - CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."), - CmdletArg("tags", type="string", required=True, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax.", variadic=True), - ], - details=[ - "- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.", - "- Without -hash and when the selection is a local file, tags are written to .tags.", - "- With a Hydrus hash, tags are sent to the 'my tags' service.", - "- Multiple tags can be comma-separated or space-separated.", - "- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult", - "- Tags can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"", - "- Use -duplicate to copy EXISTING tag values to new namespaces:", - " Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)", - " Inferred format: -duplicate title,album,artist (first is source, rest are targets)", - "- The source namespace must already exist in the file being tagged.", - "- Target namespaces that already have a value are skipped (not overwritten).", - ], -) +def _extract_title_tag(tags: List[str]) -> Optional[str]: + """Return the value of the first title: tag if present.""" + for tag in tags: + if isinstance(tag, str) and tag.lower().startswith("title:"): + value = tag.split(":", 1)[1].strip() + if value: + return value + return None + + + @register(["add-tag", "add-tags"]) def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: @@ -71,11 +57,30 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr) return 1 - # Get tags from arguments + # Get tags from arguments (or fallback to pipeline payload) raw_tags = parsed.get("tags", []) if isinstance(raw_tags, str): raw_tags = [raw_tags] + # Fallback: if no tags provided explicitly, try to pull from first result payload + if not raw_tags and results: + first = results[0] + payload_tags = None + if isinstance(first, models.PipeObject): + payload_tags = first.extra.get("tags") if isinstance(first.extra, dict) else None + elif isinstance(first, dict): + payload_tags = first.get("tags") + if not payload_tags: + payload_tags = first.get("extra", {}).get("tags") if isinstance(first.get("extra"), dict) else None + # If metadata payload stored tags under nested list, accept directly + if payload_tags is None: + payload_tags = getattr(first, "tags", None) + if payload_tags: + if isinstance(payload_tags, str): + raw_tags = [payload_tags] + elif isinstance(payload_tags, list): + raw_tags = payload_tags + # Handle -list argument (convert to {list} syntax) list_arg = parsed.get("list") if list_arg: @@ -88,6 +93,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: tags_to_add = parse_tag_arguments(raw_tags) tags_to_add = expand_tag_groups(tags_to_add) + if not tags_to_add: + log("No tags provided to add", file=sys.stderr) + return 1 + # Get other flags hash_override = normalize_hash(parsed.get("hash")) duplicate_arg = parsed.get("duplicate") @@ -139,6 +148,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Tags ARE provided - append them to each result and write sidecar files or add to Hydrus sidecar_count = 0 + removed_tags: List[str] = [] for res in results: # Handle both dict and PipeObject formats file_path = None @@ -166,6 +176,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: hydrus_hash = res.get('hydrus_hash') or res.get('hash') or res.get('hash_hex') if not hydrus_hash and 'extra' in res: hydrus_hash = res['extra'].get('hydrus_hash') or res['extra'].get('hash') or res['extra'].get('hash_hex') + if not hydrus_hash and file_hash: + hydrus_hash = file_hash + if not storage_source and hydrus_hash and not file_path: + storage_source = 'hydrus' else: ctx.emit(res) continue @@ -215,6 +229,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Check if this is a namespaced tag (format: "namespace:value") if ':' in new_tag: namespace = new_tag.split(':', 1)[0] + # Track removals for Hydrus: delete old tags in same namespace (except identical) + to_remove = [t for t in existing_tags if t.startswith(namespace + ':') and t.lower() != new_tag.lower()] + removed_tags.extend(to_remove) # Remove any existing tags with the same namespace existing_tags = [t for t in existing_tags if not (t.startswith(namespace + ':'))] @@ -227,6 +244,14 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: res.extra['tags'] = existing_tags elif isinstance(res, dict): res['tags'] = existing_tags + + # If a title: tag was added, update the in-memory title so downstream display reflects it immediately + title_value = _extract_title_tag(existing_tags) + if title_value: + if isinstance(res, models.PipeObject): + res.title = title_value + elif isinstance(res, dict): + res['title'] = title_value # Determine where to add tags: Hydrus, local DB, or sidecar if storage_source and storage_source.lower() == 'hydrus': @@ -237,6 +262,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: log(f"[add_tags] Adding {len(existing_tags)} tag(s) to Hydrus file: {target_hash}", file=sys.stderr) hydrus_client = hydrus_wrapper.get_client(config) hydrus_client.add_tags(target_hash, existing_tags, "my tags") + # Delete old namespace tags we replaced (e.g., previous title:) + if removed_tags: + unique_removed = sorted(set(removed_tags)) + hydrus_client.delete_tags(target_hash, unique_removed, "my tags") log(f"[add_tags] ✓ Tags added to Hydrus", file=sys.stderr) sidecar_count += 1 except Exception as e: @@ -274,3 +303,29 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: log(f"[add_tags] Processed {len(results)} result(s)", file=sys.stderr) return 0 + +CMDLET = Cmdlet( + name="add-tags", + summary="Add tags to a Hydrus file or write them to a local .tags sidecar.", + usage="add-tags [-hash ] [-duplicate ] [-list [,...]] [--all] [,...]", + args=[ + CmdletArg("-hash", type="string", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), + CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"), + CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."), + CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."), + CmdletArg("tags", type="string", required=False, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tags from pipeline payload.", variadic=True), + ], + details=[ + "- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.", + "- Without -hash and when the selection is a local file, tags are written to .tags.", + "- With a Hydrus hash, tags are sent to the 'my tags' service.", + "- Multiple tags can be comma-separated or space-separated.", + "- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult", + "- Tags can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"", + "- Use -duplicate to copy EXISTING tag values to new namespaces:", + " Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)", + " Inferred format: -duplicate title,album,artist (first is source, rest are targets)", + "- The source namespace must already exist in the file being tagged.", + "- Target namespaces that already have a value are skipped (not overwritten).", + ], +) \ No newline at end of file diff --git a/cmdlets/delete_tag.py b/cmdlets/delete_tag.py index bc91927..30d4a9b 100644 --- a/cmdlets/delete_tag.py +++ b/cmdlets/delete_tag.py @@ -2,6 +2,7 @@ from __future__ import annotations from typing import Any, Dict, Sequence import json +import sys from . import register import models @@ -219,6 +220,12 @@ def _process_deletion(tags: list[str], hash_hex: str | None, file_path: str | No if not tags: return False + + # Safety: block deleting title: without replacement to avoid untitled files + title_tags = [t for t in tags if isinstance(t, str) and t.lower().startswith("title:")] + if title_tags: + log("Cannot delete title: tag without replacement. Use add-tag \"title:new title\" instead.", file=sys.stderr) + return False if not hash_hex and not file_path: log("Item does not include a hash or file path") diff --git a/cmdlets/download_data.py b/cmdlets/download_data.py index 63e7668..c09aaef 100644 --- a/cmdlets/download_data.py +++ b/cmdlets/download_data.py @@ -41,7 +41,8 @@ from config import resolve_output_dir from metadata import ( fetch_openlibrary_metadata_tags, format_playlist_entry, - extract_ytdlp_tags + extract_ytdlp_tags, + build_book_tags, ) # ============================================================================ @@ -1499,12 +1500,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results: metadata = item.get('full_metadata', {}) if isinstance(item.get('full_metadata'), dict) else {} mirrors = metadata.get('mirrors', {}) book_id = metadata.get('book_id', '') + author = metadata.get('author') + isbn_val = metadata.get('isbn') + year_val = metadata.get('year') if url: url_entry = { 'url': str(url), 'mirrors': mirrors, # Alternative mirrors for fallback 'book_id': book_id, + 'title': title, + 'author': author, + 'isbn': isbn_val, + 'year': year_val, } urls_to_download.append(url_entry) debug(f"[search-result] LibGen: '{title}'") @@ -1700,12 +1708,19 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results: metadata = getattr(item, 'full_metadata', {}) if isinstance(getattr(item, 'full_metadata', None), dict) else {} mirrors = metadata.get('mirrors', {}) book_id = metadata.get('book_id', '') + author = metadata.get('author') + isbn_val = metadata.get('isbn') + year_val = metadata.get('year') if url: url_entry = { 'url': str(url), 'mirrors': mirrors, # Alternative mirrors for fallback 'book_id': book_id, + 'title': title, + 'author': author, + 'isbn': isbn_val, + 'year': year_val, } urls_to_download.append(url_entry) else: @@ -2177,6 +2192,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results: primary_url = url.get('url') mirrors_dict = url.get('mirrors', {}) book_id = url.get('book_id', '') + title_val = url.get('title') + author_val = url.get('author') + isbn_val = url.get('isbn') + year_val = url.get('year') if not primary_url: debug(f"Skipping libgen entry: no primary URL") @@ -2219,39 +2238,82 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results: # Use libgen_service's download_from_mirror for proper libgen handling from helper.libgen_service import download_from_mirror - + # Generate filename from book_id and title safe_title = "".join(c for c in str(title or "book") if c.isalnum() or c in (' ', '.', '-'))[:100] file_path = final_output_dir / f"{safe_title}_{book_id}.pdf" - + + progress_bar = models.ProgressBar() + progress_start = time.time() + last_update = [progress_start] + progress_bytes = [0] + progress_total = [0] + + def _libgen_progress(downloaded: int, total: int) -> None: + progress_bytes[0] = downloaded + progress_total[0] = total + now = time.time() + if total > 0 and now - last_update[0] >= 0.5: + percent = (downloaded / total) * 100 + elapsed = max(now - progress_start, 1e-6) + speed = downloaded / elapsed if elapsed > 0 else 0 + remaining = max(total - downloaded, 0) + eta = remaining / speed if speed > 0 else 0 + minutes, seconds = divmod(int(eta), 60) + hours, minutes = divmod(minutes, 60) + eta_str = f"{hours:02d}:{minutes:02d}:{seconds:02d}" + speed_str = f"{progress_bar.format_bytes(speed)}/s" + progress_line = progress_bar.format_progress( + percent_str=f"{percent:.1f}%", + downloaded=downloaded, + total=total, + speed_str=speed_str, + eta_str=eta_str, + ) + debug(f" {progress_line}") + last_update[0] = now + # Attempt download using libgen's native function - success = download_from_mirror( + success, downloaded_path = download_from_mirror( mirror_url=mirror_url, output_path=file_path, log_info=lambda msg: debug(f" {msg}"), - log_error=lambda msg: debug(f" ⚠ {msg}") + log_error=lambda msg: debug(f" ⚠ {msg}"), + progress_callback=_libgen_progress, ) - - if success and file_path.exists(): + + final_path = Path(downloaded_path) if downloaded_path else file_path + if success and final_path.exists(): + downloaded = progress_bytes[0] or final_path.stat().st_size + elapsed = time.time() - progress_start + avg_speed = downloaded / elapsed if elapsed > 0 else 0 + debug(f" ✓ Downloaded in {elapsed:.1f}s at {progress_bar.format_bytes(avg_speed)}/s") debug(f" ✓ Downloaded successfully from mirror #{mirror_idx}") successful_mirror = mirror_url download_succeeded = True - + # Emit result for downstream cmdlets - file_hash = _compute_file_hash(file_path) - emit_tags = ['libgen', 'book'] - + file_hash = _compute_file_hash(final_path) + emit_tags = build_book_tags( + title=title_val or title, + author=author_val, + isbn=isbn_val, + year=year_val, + source='libgen', + extra=[f"libgen_id:{book_id}"] if book_id else None, + ) + pipe_obj = create_pipe_object_result( source='libgen', identifier=book_id, - file_path=str(file_path), + file_path=str(final_path), cmdlet_name='download-data', file_hash=file_hash, tags=emit_tags, source_url=successful_mirror ) pipeline_context.emit(pipe_obj) - downloaded_files.append(str(file_path)) + downloaded_files.append(str(final_path)) exit_code = 0 break # Success, stop trying mirrors @@ -2643,38 +2705,61 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results: # Let's try to get metadata to make a good filename filename = "libgen_download.bin" + title_from_results = None + author_from_results = None + year_from_results = None if libgen_id and results: - title = results[0].get("title", "book") + title_from_results = results[0].get("title") + author_from_results = results[0].get("author") + year_from_results = results[0].get("year") ext = results[0].get("extension", "pdf") # Sanitize filename - safe_title = "".join(c for c in title if c.isalnum() or c in (' ', '-', '_')).strip() + safe_title = "".join(c for c in (title_from_results or "book") if c.isalnum() or c in (' ', '-', '_')).strip() filename = f"{safe_title}.{ext}" elif "series.php" in url: filename = f"series_{re.search(r'id=(\d+)', url).group(1) if re.search(r'id=(\d+)', url) else 'unknown'}.pdf" output_path = final_output_dir / filename - if download_from_mirror(url, output_path, log_info=debug, log_error=log): - debug(f"✓ LibGen download successful: {output_path}") - + success, downloaded_path = download_from_mirror( + url, + output_path, + log_info=debug, + log_error=log, + ) + final_file = Path(downloaded_path) if downloaded_path else output_path + if success and final_file.exists(): + debug(f"✓ LibGen download successful: {final_file}") + # Create a result object info = { "id": libgen_id or "libgen", "title": filename, "webpage_url": url, - "ext": output_path.suffix.lstrip("."), + "ext": final_file.suffix.lstrip("."), } - + + emit_tags = build_book_tags( + title=title_from_results or filename, + author=author_from_results, + year=year_from_results, + source="libgen", + extra=[f"libgen_id:{libgen_id}"] if libgen_id else None, + ) + file_hash = _compute_file_hash(final_file) + # Emit result pipeline_context.emit(create_pipe_object_result( source="libgen", identifier=libgen_id or "libgen", - file_path=str(output_path), + file_path=str(final_file), cmdlet_name="download-data", title=filename, + file_hash=file_hash, + tags=emit_tags, extra=info )) - downloaded_files.append(str(output_path)) + downloaded_files.append(str(final_file)) continue else: debug("⚠ LibGen specialized download failed, falling back to generic downloader...") diff --git a/cmdlets/get_file.py b/cmdlets/get_file.py index 00ad5ec..6096356 100644 --- a/cmdlets/get_file.py +++ b/cmdlets/get_file.py @@ -316,6 +316,12 @@ def _play_in_mpv(file_url: str, file_title: str, is_stream: bool = False, header return False +# Backward-compatible alias for modules expecting the old IPC helper name. +def _get_fixed_ipc_pipe() -> str: + """Return the shared MPV IPC pipe path (compat shim).""" + return get_ipc_pipe_path() + + def _handle_search_result(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: """Handle a file from search-file results using FileStorage backend.""" try: diff --git a/cmdlets/get_tag.py b/cmdlets/get_tag.py index c43219e..b9a1eb9 100644 --- a/cmdlets/get_tag.py +++ b/cmdlets/get_tag.py @@ -13,6 +13,7 @@ from __future__ import annotations import sys from helper.logger import log +from helper.metadata_search import get_metadata_provider import subprocess from pathlib import Path from typing import Any, Dict, List, Optional, Sequence, Tuple @@ -1015,33 +1016,82 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: scrape_url = parsed_args.get("scrape") scrape_requested = scrape_url is not None - # Handle URL scraping mode + # Handle URL or provider scraping mode if scrape_requested and scrape_url: import json as json_module - # Don't print debug message - output should be JSON only for programmatic consumption - # logger.debug(f"Scraping URL: {scrape_url}") - title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url) + + if scrape_url.startswith("http://") or scrape_url.startswith("https://"): + # URL scraping (existing behavior) + title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url) + if not tags: + log("No tags extracted from URL", file=sys.stderr) + return 1 + output = { + "title": title, + "tags": tags, + "formats": [(label, fmt_id) for label, fmt_id in formats], + "playlist_items": playlist_items, + } + print(json_module.dumps(output, ensure_ascii=False)) + return 0 - if not tags: - log("No tags extracted from URL", file=sys.stderr) + # Provider scraping (e.g., itunes) + provider = get_metadata_provider(scrape_url, config) + if provider is None: + log(f"Unknown metadata provider: {scrape_url}", file=sys.stderr) return 1 - # Build result object - # result_obj = TagItem("url_scrape", tag_index=0, hash_hex=None, source="url", service_name=None) - # result_obj.title = title or "URL Content" + # Determine query from title on the result or filename + title_hint = get_field(result, "title", None) or get_field(result, "name", None) + if not title_hint: + file_path = get_field(result, "path", None) or get_field(result, "filename", None) + if file_path: + title_hint = Path(str(file_path)).stem - # Emit tags as JSON for pipeline consumption (output should be pure JSON on stdout) - output = { - "title": title, - "tags": tags, - "formats": [(label, fmt_id) for label, fmt_id in formats], - "playlist_items": playlist_items, - } + if not title_hint: + log("No title available to search for metadata", file=sys.stderr) + return 1 - # Use print() directly to stdout for JSON output (NOT log() which adds prefix) - # This ensures the output is capturable by the download modal and other pipelines - # The modal filters for lines starting with '{' so the prefix breaks parsing - print(json_module.dumps(output, ensure_ascii=False)) + items = provider.search(title_hint, limit=10) + if not items: + log("No metadata results found", file=sys.stderr) + return 1 + + from result_table import ResultTable + table = ResultTable(f"Metadata: {provider.name}") + table.set_source_command("get-tag", []) + selection_payload = [] + hash_for_payload = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash_hex", None)) + for idx, item in enumerate(items): + tags = provider.to_tags(item) + row = table.add_row() + row.add_column("Title", item.get("title", "")) + row.add_column("Artist", item.get("artist", "")) + row.add_column("Album", item.get("album", "")) + row.add_column("Year", item.get("year", "")) + payload = { + "tags": tags, + "provider": provider.name, + "title": item.get("title"), + "artist": item.get("artist"), + "album": item.get("album"), + "year": item.get("year"), + "extra": { + "tags": tags, + "provider": provider.name, + "hydrus_hash": hash_for_payload, + "storage_source": get_field(result, "source", None) or get_field(result, "origin", None), + }, + "file_hash": hash_for_payload, + } + selection_payload.append(payload) + table.set_row_selection_args(idx, [str(idx + 1)]) + + ctx.set_last_result_table_overlay(table, selection_payload) + ctx.set_current_stage_table(table) + # Preserve items for @ selection and downstream pipes without emitting duplicates + ctx.set_last_result_items_only(selection_payload) + print(table) return 0 # If -scrape was requested but no URL, that's an error @@ -1178,7 +1228,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: CMDLET = Cmdlet( name="get-tag", summary="Get tags from Hydrus or local sidecar metadata", - usage="get-tag [-hash ] [--store ] [--emit] [-scrape ]", + usage="get-tag [-hash ] [--store ] [--emit] [-scrape ]", aliases=["tags"], args=[ SharedArgs.HASH, @@ -1197,7 +1247,7 @@ CMDLET = Cmdlet( CmdletArg( name="-scrape", type="string", - description="Scrape metadata from URL (returns tags as JSON)", + description="Scrape metadata from URL or provider name (returns tags as JSON or table)", required=False ) ] diff --git a/cmdnats/__init__.py b/cmdnats/__init__.py new file mode 100644 index 0000000..48e7688 --- /dev/null +++ b/cmdnats/__init__.py @@ -0,0 +1,39 @@ +from __future__ import annotations + +import os +from importlib import import_module +from typing import Any, Callable, Dict, Sequence + +CmdletFn = Callable[[Any, Sequence[str], Dict[str, Any]], int] + + +def _register_cmdlet_object(cmdlet_obj, registry: Dict[str, CmdletFn]) -> None: + run_fn = getattr(cmdlet_obj, "exec", None) if hasattr(cmdlet_obj, "exec") else None + if not callable(run_fn): + return + + if hasattr(cmdlet_obj, "name") and cmdlet_obj.name: + registry[cmdlet_obj.name.replace("_", "-").lower()] = run_fn + + if hasattr(cmdlet_obj, "aliases") and getattr(cmdlet_obj, "aliases"): + for alias in cmdlet_obj.aliases: + registry[alias.replace("_", "-").lower()] = run_fn + + +def register_native_commands(registry: Dict[str, CmdletFn]) -> None: + """Import native command modules and register their CMDLET exec functions.""" + base_dir = os.path.dirname(__file__) + for filename in os.listdir(base_dir): + if not (filename.endswith(".py") and not filename.startswith("_") and filename != "__init__.py"): + continue + + mod_name = filename[:-3] + try: + module = import_module(f".{mod_name}", __name__) + cmdlet_obj = getattr(module, "CMDLET", None) + if cmdlet_obj: + _register_cmdlet_object(cmdlet_obj, registry) + except Exception as exc: + import sys + print(f"Error importing native command '{mod_name}': {exc}", file=sys.stderr) + continue diff --git a/cmdlets/adjective.py b/cmdnats/adjective.py similarity index 98% rename from cmdlets/adjective.py rename to cmdnats/adjective.py index 4231a91..177e3f9 100644 --- a/cmdlets/adjective.py +++ b/cmdnats/adjective.py @@ -2,7 +2,7 @@ import json import os import sys from typing import List, Dict, Any, Optional, Sequence -from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args +from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args from helper.logger import log from result_table import ResultTable import pipeline as ctx diff --git a/cmdlets/matrix.py b/cmdnats/matrix.py similarity index 98% rename from cmdlets/matrix.py rename to cmdnats/matrix.py index a7f8777..3f3e3a7 100644 --- a/cmdlets/matrix.py +++ b/cmdnats/matrix.py @@ -1,6 +1,6 @@ from typing import Any, Dict, Sequence, List import sys -from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args +from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args from helper.logger import log, debug from result_table import ResultTable from helper.file_storage import MatrixStorageBackend diff --git a/cmdlets/pipe.py b/cmdnats/pipe.py similarity index 77% rename from cmdlets/pipe.py rename to cmdnats/pipe.py index 063a258..188e232 100644 --- a/cmdlets/pipe.py +++ b/cmdnats/pipe.py @@ -5,7 +5,9 @@ import platform import socket import re import subprocess -from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args +from urllib.parse import urlparse +from pathlib import Path +from cmdlets._shared import Cmdlet, CmdletArg, parse_cmdlet_args from helper.logger import log, debug from result_table import ResultTable from helper.mpv_ipc import get_ipc_pipe_path, MPVIPCClient @@ -13,7 +15,7 @@ import pipeline as ctx from helper.download import is_url_supported_by_ytdlp from helper.local_library import LocalLibrarySearchOptimizer -from config import get_local_storage_path +from config import get_local_storage_path, get_hydrus_access_key, get_hydrus_url from hydrus_health_check import get_cookies_file_path def _send_ipc_command(command: Dict[str, Any], silent: bool = False) -> Optional[Any]: @@ -72,6 +74,150 @@ def _extract_title_from_item(item: Dict[str, Any]) -> str: return title or filename or "Unknown" + +def _extract_target_from_memory_uri(text: str) -> Optional[str]: + """Extract the real target URL/path from a memory:// M3U payload.""" + if not isinstance(text, str) or not text.startswith("memory://"): + return None + for line in text.splitlines(): + line = line.strip() + if not line or line.startswith('#') or line.startswith('memory://'): + continue + return line + return None + + +def _infer_store_from_playlist_item(item: Dict[str, Any]) -> str: + """Infer a friendly store label from an MPV playlist entry.""" + name = item.get("filename") if isinstance(item, dict) else None + target = str(name or "") + + # Unwrap memory:// M3U wrapper + memory_target = _extract_target_from_memory_uri(target) + if memory_target: + target = memory_target + + lower = target.lower() + if lower.startswith("magnet:"): + return "magnet" + if lower.startswith("hydrus://"): + return "hydrus" + + # Windows / UNC paths + if re.match(r"^[a-z]:[\\/]", target, flags=re.IGNORECASE) or target.startswith("\\\\"): + return "local" + + # file:// URLs + if lower.startswith("file://"): + return "local" + + parsed = urlparse(target) + host = (parsed.netloc or "").lower() + path = parsed.path or "" + + if not host: + return "" + + host_no_port = host.split(":", 1)[0] + host_stripped = host_no_port[4:] if host_no_port.startswith("www.") else host_no_port + + if "youtube" in host_stripped or "youtu.be" in target.lower(): + return "youtube" + if "soundcloud" in host_stripped: + return "soundcloud" + if "bandcamp" in host_stripped: + return "bandcamp" + if "get_files" in path or host_stripped in {"127.0.0.1", "localhost"}: + return "hydrus" + if re.match(r"^\d+\.\d+\.\d+\.\d+$", host_stripped) and "get_files" in path: + return "hydrus" + + parts = host_stripped.split('.') + if len(parts) >= 2: + return parts[-2] or host_stripped + return host_stripped + + +def _format_playlist_location(name: str, max_len: int = 48) -> str: + """Format playlist filename/URL for display while keeping backend untouched.""" + target = name or "" + memory_target = _extract_target_from_memory_uri(target) + if memory_target: + target = memory_target + + lower = target.lower() + # Local paths: show basename only + if re.match(r"^[a-z]:[\\/]", target, flags=re.IGNORECASE) or target.startswith("\\\\"): + target = Path(target).name + elif lower.startswith("file://"): + parsed = urlparse(target) + target = Path(parsed.path or "").name or target + else: + parsed = urlparse(target) + host = parsed.netloc or "" + if host: + host_no_port = host.split(":", 1)[0] + host_no_port = host_no_port[4:] if host_no_port.startswith("www.") else host_no_port + tail = parsed.path.split('/')[-1] if parsed.path else "" + if tail: + target = f"{host_no_port}/{tail}" + else: + target = host_no_port + + if len(target) > max_len: + return target[: max_len - 3] + "..." + return target + + +def _build_hydrus_header(config: Dict[str, Any]) -> Optional[str]: + """Return header string for Hydrus auth if configured.""" + try: + key = get_hydrus_access_key(config) + except Exception: + key = None + if not key: + return None + return f"Hydrus-Client-API-Access-Key: {key}" + + +def _build_ytdl_options(config: Optional[Dict[str, Any]], hydrus_header: Optional[str]) -> Optional[str]: + """Compose ytdl-raw-options string including cookies and optional Hydrus header.""" + opts: List[str] = [] + try: + cookies_path = get_cookies_file_path() + except Exception: + cookies_path = None + if cookies_path: + opts.append(f"cookies={cookies_path.replace('\\', '/')}") + else: + opts.append("cookies-from-browser=chrome") + if hydrus_header: + opts.append(f"add-header={hydrus_header}") + return ",".join(opts) if opts else None + + +def _is_hydrus_target(target: str, hydrus_url: Optional[str]) -> bool: + if not target: + return False + lower = target.lower() + if "hydrus://" in lower: + return True + parsed = urlparse(target) + host = (parsed.netloc or "").lower() + path = parsed.path or "" + if hydrus_url: + try: + hydrus_host = urlparse(hydrus_url).netloc.lower() + if hydrus_host and hydrus_host in host: + return True + except Exception: + pass + if "get_files" in path or "file?hash=" in path: + return True + if re.match(r"^\d+\.\d+\.\d+\.\d+$", host) and "get_files" in path: + return True + return False + def _ensure_ytdl_cookies() -> None: """Ensure yt-dlp options are set correctly for this session.""" from pathlib import Path @@ -127,8 +273,7 @@ def _monitor_mpv_logs(duration: float = 3.0) -> None: client.disconnect() except Exception: pass - -def _queue_items(items: List[Any], clear_first: bool = False) -> bool: +def _queue_items(items: List[Any], clear_first: bool = False, config: Optional[Dict[str, Any]] = None) -> bool: """Queue items to MPV, starting it if necessary. Args: @@ -141,6 +286,14 @@ def _queue_items(items: List[Any], clear_first: bool = False) -> bool: # Just verify cookies are configured, don't try to set via IPC _ensure_ytdl_cookies() + hydrus_header = _build_hydrus_header(config or {}) + ytdl_opts = _build_ytdl_options(config, hydrus_header) + hydrus_url = None + try: + hydrus_url = get_hydrus_url(config) if config is not None else None + except Exception: + hydrus_url = None + for i, item in enumerate(items): # Extract URL/Path target = None @@ -175,13 +328,21 @@ def _queue_items(items: List[Any], clear_first: bool = False) -> bool: if clear_first and i == 0: mode = "replace" + # If this is a Hydrus target, set header property and yt-dlp headers before loading + if hydrus_header and _is_hydrus_target(target_to_send, hydrus_url): + header_cmd = {"command": ["set_property", "http-header-fields", hydrus_header], "request_id": 199} + _send_ipc_command(header_cmd, silent=True) + if ytdl_opts: + ytdl_cmd = {"command": ["set_property", "ytdl-raw-options", ytdl_opts], "request_id": 197} + _send_ipc_command(ytdl_cmd, silent=True) + cmd = {"command": ["loadfile", target_to_send, mode], "request_id": 200} resp = _send_ipc_command(cmd) if resp is None: # MPV not running (or died) # Start MPV with remaining items - _start_mpv(items[i:]) + _start_mpv(items[i:], config=config) return True elif resp.get("error") == "success": # Also set property for good measure @@ -448,7 +609,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: elif isinstance(result, dict): items_to_add = [result] - if _queue_items(items_to_add): + if _queue_items(items_to_add, config=config): mpv_started = True if items_to_add: @@ -472,7 +633,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: return 0 else: debug("MPV is not running. Starting new instance...") - _start_mpv([]) + _start_mpv([], config=config) return 0 if not items: @@ -491,6 +652,13 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: item = items[idx] title = _extract_title_from_item(item) + filename = item.get("filename", "") if isinstance(item, dict) else "" + hydrus_header = _build_hydrus_header(config or {}) + hydrus_url = None + try: + hydrus_url = get_hydrus_url(config) if config is not None else None + except Exception: + hydrus_url = None if clear_mode: # Remove item @@ -507,6 +675,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: return 1 else: # Play item + if hydrus_header and _is_hydrus_target(filename, hydrus_url): + header_cmd = {"command": ["set_property", "http-header-fields", hydrus_header], "request_id": 198} + _send_ipc_command(header_cmd, silent=True) cmd = {"command": ["playlist-play-index", idx], "request_id": 102} resp = _send_ipc_command(cmd) if resp and resp.get("error") == "success": @@ -544,6 +715,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: for i, item in enumerate(items): is_current = item.get("current", False) title = _extract_title_from_item(item) + store = _infer_store_from_playlist_item(item) + filename = item.get("filename", "") if isinstance(item, dict) else "" + display_loc = _format_playlist_location(filename) # Truncate if too long if len(title) > 80: @@ -551,7 +725,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: row = table.add_row() row.add_column("Current", "*" if is_current else "") + row.add_column("Store", store) row.add_column("Title", title) + row.add_column("Filename", display_loc) table.set_row_selection_args(i, [str(i + 1)]) @@ -565,7 +741,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: return 0 -def _start_mpv(items: List[Any]) -> None: +def _start_mpv(items: List[Any], config: Optional[Dict[str, Any]] = None) -> None: """Start MPV with a list of items.""" import subprocess import time as _time_module @@ -584,20 +760,18 @@ def _start_mpv(items: List[Any]) -> None: # Start MPV in idle mode with IPC server cmd = ['mpv', f'--input-ipc-server={ipc_pipe}', '--idle', '--force-window'] cmd.append('--ytdl-format=bestvideo[height<=?1080]+bestaudio/best[height<=?1080]') - - # Use cookies.txt if available, otherwise fallback to browser cookies + + hydrus_header = _build_hydrus_header(config or {}) + ytdl_opts = _build_ytdl_options(config, hydrus_header) + cookies_path = get_cookies_file_path() if cookies_path: - # yt-dlp on Windows needs forward slashes OR properly escaped backslashes - # Using forward slashes is more reliable across systems - cookies_path_normalized = cookies_path.replace('\\', '/') - debug(f"Starting MPV with cookies file: {cookies_path_normalized}") - # yt-dlp expects the cookies option with file path - cmd.append(f'--ytdl-raw-options=cookies={cookies_path_normalized}') + debug(f"Starting MPV with cookies file: {cookies_path.replace('\\', '/')}") else: - # Use cookies from browser (Chrome) to handle age-restricted content debug("Starting MPV with browser cookies: chrome") - cmd.append('--ytdl-raw-options=cookies-from-browser=chrome') + + if ytdl_opts: + cmd.append(f'--ytdl-raw-options={ytdl_opts}') try: kwargs = {} @@ -607,6 +781,8 @@ def _start_mpv(items: List[Any]) -> None: # Log the complete MPV command being executed debug(f"DEBUG: Full MPV command: {' '.join(cmd)}") + if hydrus_header: + cmd.append(f'--http-header-fields={hydrus_header}') subprocess.Popen(cmd, stdin=subprocess.DEVNULL, stdout=subprocess.PIPE, stderr=subprocess.PIPE, **kwargs) debug(f"Started MPV process") @@ -625,7 +801,7 @@ def _start_mpv(items: List[Any]) -> None: # Queue items via IPC if items: - _queue_items(items) + _queue_items(items, config=config) except Exception as e: debug(f"Error starting MPV: {e}", file=sys.stderr) diff --git a/cmdlets/worker.py b/cmdnats/worker.py similarity index 99% rename from cmdlets/worker.py rename to cmdnats/worker.py index 619d247..3a1287c 100644 --- a/cmdlets/worker.py +++ b/cmdnats/worker.py @@ -6,8 +6,8 @@ import json import sys from datetime import datetime, timezone -from . import register -from ._shared import Cmdlet, CmdletArg +from cmdlets import register +from cmdlets._shared import Cmdlet, CmdletArg import pipeline as ctx from helper.logger import log from config import get_local_storage_path diff --git a/helper/file_storage.py b/helper/file_storage.py index fad5d24..162a9c4 100644 --- a/helper/file_storage.py +++ b/helper/file_storage.py @@ -1397,6 +1397,10 @@ class FileStorage: log(f"Registered remote storage backend: {name} -> {url}{auth_status}", file=sys.stderr) except Exception as e: log(f"Failed to register remote storage '{name}': {e}", file=sys.stderr) + + def list_backends(self) -> list[str]: + """Return available backend keys for autocomplete and validation.""" + return sorted(self._backends.keys()) def __getitem__(self, backend_name: str) -> StorageBackend: """Get a storage backend by name. diff --git a/helper/libgen_service.py b/helper/libgen_service.py index e7fca3f..9a8ddfe 100644 --- a/helper/libgen_service.py +++ b/helper/libgen_service.py @@ -9,8 +9,8 @@ import logging import re import requests from pathlib import Path -from typing import Any, Callable, Dict, List, Optional -from urllib.parse import quote, urljoin +from typing import Any, Callable, Dict, List, Optional, Tuple +from urllib.parse import quote, urljoin, urlparse, unquote # Optional dependencies try: @@ -405,6 +405,61 @@ def _resolve_download_url( return None +def _guess_filename_extension(download_url: str, headers: Dict[str, str]) -> Optional[str]: + """Guess the file extension from headers or the download URL.""" + content_disposition = headers.get("content-disposition", "") + if content_disposition: + match = re.search(r'filename\*?=(?:UTF-8\'\'|"?)([^";]+)', content_disposition, flags=re.IGNORECASE) + if match: + filename = unquote(match.group(1).strip('"')) + suffix = Path(filename).suffix + if suffix: + return suffix.lstrip('.') + + parsed = urlparse(download_url) + suffix = Path(parsed.path).suffix + if suffix: + return suffix.lstrip('.') + + content_type = headers.get('content-type', '').lower() + mime_map = { + 'application/pdf': 'pdf', + 'application/epub+zip': 'epub', + 'application/x-mobipocket-ebook': 'mobi', + 'application/x-cbr': 'cbr', + 'application/x-cbz': 'cbz', + 'application/zip': 'zip', + } + + for mime, ext in mime_map.items(): + if mime in content_type: + return ext + + return None + + +def _apply_extension(path: Path, extension: Optional[str]) -> Path: + """Rename the path to match the detected extension, if needed.""" + if not extension: + return path + + suffix = extension if extension.startswith('.') else f'.{extension}' + if path.suffix.lower() == suffix.lower(): + return path + + candidate = path.with_suffix(suffix) + base_stem = path.stem + counter = 1 + while candidate.exists() and counter < 100: + candidate = path.with_name(f"{base_stem}({counter}){suffix}") + counter += 1 + + try: + path.replace(candidate) + return candidate + except Exception: + return path + def download_from_mirror( mirror_url: str, output_path: Path, @@ -412,8 +467,9 @@ def download_from_mirror( log_info: LogFn = None, log_error: ErrorFn = None, session: Optional[requests.Session] = None, -) -> bool: - """Download file from a LibGen mirror URL.""" + progress_callback: Optional[Callable[[int, int], None]] = None, +) -> Tuple[bool, Optional[Path]]: + """Download file from a LibGen mirror URL with optional progress tracking.""" session = session or requests.Session() output_path = Path(output_path) output_path.parent.mkdir(parents=True, exist_ok=True) @@ -425,33 +481,43 @@ def download_from_mirror( if not download_url: _call(log_error, "[download] Could not find direct download link") - return False + return False, None _call(log_info, f"[download] Downloading from: {download_url}") - # Download the actual file + downloaded = 0 + total_size = 0 + headers: Dict[str, str] = {} + with session.get(download_url, stream=True, timeout=60) as r: r.raise_for_status() + headers = dict(r.headers) # Verify it's not HTML (error page) - ct = r.headers.get("content-type", "").lower() + ct = headers.get("content-type", "").lower() if "text/html" in ct: _call(log_error, "[download] Final URL returned HTML, not a file.") - return False + return False, None - total_size = int(r.headers.get("content-length", 0)) - downloaded = 0 + total_size = int(headers.get("content-length", 0) or 0) with open(output_path, "wb") as f: for chunk in r.iter_content(chunk_size=8192): if chunk: f.write(chunk) downloaded += len(chunk) - # Optional: progress logging - - _call(log_info, f"[download] Saved to {output_path}") - return True + if progress_callback: + progress_callback(downloaded, total_size) + + final_extension = _guess_filename_extension(download_url, headers) + final_path = _apply_extension(output_path, final_extension) + + if progress_callback and total_size > 0: + progress_callback(downloaded, total_size) + + _call(log_info, f"[download] Saved to {final_path}") + return True, final_path except Exception as e: _call(log_error, f"[download] Download failed: {e}") - return False + return False, None diff --git a/helper/logger.py b/helper/logger.py index 809ec4f..ec86f01 100644 --- a/helper/logger.py +++ b/helper/logger.py @@ -38,6 +38,9 @@ def log(*args, **kwargs) -> None: Example: log("Upload started") # Output: [add_file.run] Upload started """ + # When debug is disabled, suppress the automatic prefix for cleaner user-facing output. + add_prefix = _DEBUG_ENABLED + # Get the calling frame frame = inspect.currentframe() if frame is None: @@ -60,11 +63,11 @@ def log(*args, **kwargs) -> None: if 'file' not in kwargs: kwargs['file'] = sys.stdout - # Build prefix - prefix = f"[{file_name}.{func_name}]" - - # Print with prefix - print(prefix, *args, **kwargs) + if add_prefix: + prefix = f"[{file_name}.{func_name}]" + print(prefix, *args, **kwargs) + else: + print(*args, **kwargs) finally: del frame del caller_frame diff --git a/helper/metadata_search.py b/helper/metadata_search.py new file mode 100644 index 0000000..6e08634 --- /dev/null +++ b/helper/metadata_search.py @@ -0,0 +1,105 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from typing import Any, Dict, List, Optional, Type +import requests +import sys + +from helper.logger import log, debug + + +class MetadataProvider(ABC): + """Base class for metadata providers (music, movies, books, etc.).""" + + def __init__(self, config: Optional[Dict[str, Any]] = None) -> None: + self.config = config or {} + + @property + def name(self) -> str: + return self.__class__.__name__.replace("Provider", "").lower() + + @abstractmethod + def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]: + """Return a list of candidate metadata records.""" + + def to_tags(self, item: Dict[str, Any]) -> List[str]: + """Convert a result item into a list of tags.""" + tags: List[str] = [] + title = item.get("title") + artist = item.get("artist") + album = item.get("album") + year = item.get("year") + + if title: + tags.append(f"title:{title}") + if artist: + tags.append(f"artist:{artist}") + if album: + tags.append(f"album:{album}") + if year: + tags.append(f"year:{year}") + + tags.append(f"source:{self.name}") + return tags + + +class ITunesProvider(MetadataProvider): + """Metadata provider using the iTunes Search API.""" + + def search(self, query: str, limit: int = 10) -> List[Dict[str, Any]]: + params = {"term": query, "media": "music", "entity": "song", "limit": limit} + try: + resp = requests.get("https://itunes.apple.com/search", params=params, timeout=10) + resp.raise_for_status() + results = resp.json().get("results", []) + except Exception as exc: + log(f"iTunes search failed: {exc}", file=sys.stderr) + return [] + + items: List[Dict[str, Any]] = [] + for r in results: + item = { + "title": r.get("trackName"), + "artist": r.get("artistName"), + "album": r.get("collectionName"), + "year": str(r.get("releaseDate", ""))[:4], + "provider": self.name, + "raw": r, + } + items.append(item) + debug(f"iTunes returned {len(items)} items for '{query}'") + return items + + +# Registry --------------------------------------------------------------- + +_METADATA_PROVIDERS: Dict[str, Type[MetadataProvider]] = { + "itunes": ITunesProvider, +} + + +def register_provider(name: str, provider_cls: Type[MetadataProvider]) -> None: + _METADATA_PROVIDERS[name.lower()] = provider_cls + + +def list_metadata_providers(config: Optional[Dict[str, Any]] = None) -> Dict[str, bool]: + availability: Dict[str, bool] = {} + for name, cls in _METADATA_PROVIDERS.items(): + try: + provider = cls(config) + # Basic availability check: perform lightweight validation if defined + availability[name] = True + except Exception: + availability[name] = False + return availability + + +def get_metadata_provider(name: str, config: Optional[Dict[str, Any]] = None) -> Optional[MetadataProvider]: + cls = _METADATA_PROVIDERS.get(name.lower()) + if not cls: + return None + try: + return cls(config) + except Exception as exc: + log(f"Provider init failed for '{name}': {exc}", file=sys.stderr) + return None diff --git a/helper/search_provider.py b/helper/search_provider.py index c3d40bc..131e3e8 100644 --- a/helper/search_provider.py +++ b/helper/search_provider.py @@ -73,7 +73,12 @@ class SearchResult: self.columns = [] def to_dict(self) -> Dict[str, Any]: - """Convert to dictionary for JSON serialization.""" + """Convert to dictionary for JSON serialization. + + Note: full_metadata is excluded from dict to keep response size small + until the result is actually selected/used. This speeds up initial + search result display and piping. + """ data = { "origin": self.origin, "title": self.title, @@ -83,10 +88,10 @@ class SearchResult: "media_kind": self.media_kind, "size_bytes": self.size_bytes, "tags": list(self.tags) if self.tags else [], - "full_metadata": self.full_metadata, } if self.columns: data["columns"] = list(self.columns) + # Note: full_metadata is NOT included in dict to keep payload small return data @@ -377,6 +382,7 @@ class LibGenProvider(SearchProvider): if isbn: annotations.append(f"ISBN: {isbn}") + # Store full book data without mirrors in metadata to avoid serialization overhead search_results.append(SearchResult( origin="libgen", title=title, @@ -391,7 +397,8 @@ class LibGenProvider(SearchProvider): "year": year, "isbn": isbn, "filesize": filesize, - "mirrors": book.get("mirrors", {}), + # Exclude mirrors dict from metadata to reduce serialization overhead + # Mirrors can be re-fetched if the result is selected "book_id": book.get("book_id", ""), "md5": book.get("md5", ""), }, diff --git a/helper/unified_book_downloader.py b/helper/unified_book_downloader.py index 1ce6f87..41a1fb5 100644 --- a/helper/unified_book_downloader.py +++ b/helper/unified_book_downloader.py @@ -450,30 +450,31 @@ class UnifiedBookDownloader: if download_func is None: return False, "Download function not available" - download_callable = cast(Callable[[str, str], bool], download_func) + download_callable = cast(Callable[[str, str], Tuple[bool, Optional[Path]]], download_func) def download_wrapper(): return download_callable(mirror_url, str(output_path)) # Download (in thread) try: - success = await loop.run_in_executor(None, download_wrapper) + success, downloaded_path = await loop.run_in_executor(None, download_wrapper) if success: + dest_path = Path(downloaded_path) if downloaded_path else output_path # Validate downloaded file is not HTML (common Libgen issue) - if output_path.exists(): + if dest_path.exists(): try: - with open(output_path, 'rb') as f: + with open(dest_path, 'rb') as f: file_start = f.read(1024).decode('utf-8', errors='ignore').lower() if ' Set[s return tags_set +def build_book_tags( + *, + title: Optional[str] = None, + author: Optional[str] = None, + isbn: Optional[str] = None, + year: Optional[str] = None, + source: Optional[str] = None, + extra: Optional[Sequence[str]] = None, +) -> List[str]: + """Build consistent book tags for downloads (LibGen, OpenLibrary, etc.).""" + tags: List[str] = ["book"] + + def _add(tag: Optional[str]) -> None: + if tag and isinstance(tag, str) and tag.strip(): + tags.append(tag.strip()) + + _add(source) + if title: + _add(f"title:{title}") + if author: + _add(f"author:{author}") + if isbn: + _add(f"isbn:{isbn}") + if year: + _add(f"year:{year}") + if extra: + for tag in extra: + _add(tag) + + # Deduplicate while preserving order + deduped = list(dict.fromkeys(tags)) + return deduped + + def fetch_openlibrary_metadata_tags(isbn: Optional[str] = None, olid: Optional[str] = None) -> List[str]: """Fetch book metadata from OpenLibrary and return as tags.