From 6b9ed7d4ab01d69b11a8da46c2055e130d026f70 Mon Sep 17 00:00:00 2001 From: nose Date: Mon, 1 Dec 2025 01:10:16 -0800 Subject: [PATCH] sssssss --- CLI.py | 6 + README.md | 51 +++-- cmdlets/_shared.py | 4 + cmdlets/add_file.py | 71 ++++--- cmdlets/add_url.py | 108 +++++++--- cmdlets/delete_file.py | 152 ++++++++++++-- cmdlets/delete_url.py | 132 +++++++++--- cmdlets/get_file.py | 400 ++++++++++++++++++++++++------------ cmdlets/get_relationship.py | 361 ++++++++++++++++---------------- cmdlets/get_url.py | 107 +++++++--- cmdlets/merge_file.py | 7 +- cmdlets/search_file.py | 30 ++- cmdlets/trim_file.py | 295 ++++++++++++++++++++++++++ helper/file_storage.py | 20 +- helper/local_library.py | 77 ++++++- helper/search_provider.py | 292 ++++++++++++++++++++++++++ result_table.py | 1 - 17 files changed, 1644 insertions(+), 470 deletions(-) create mode 100644 cmdlets/trim_file.py diff --git a/CLI.py b/CLI.py index a6f2985..2c8393c 100644 --- a/CLI.py +++ b/CLI.py @@ -967,6 +967,12 @@ def _execute_pipeline(tokens: list): # First stage was ONLY selection (@N or @*) - remove it and apply selection to next stage's input stages.pop(0) + # Handle @* expansion by selecting all available items + if first_stage_select_all: + last_items = ctx.get_last_result_items() + if last_items: + first_stage_selection_indices = list(range(len(last_items))) + # Execute each stage, threading results to the next piped_result = None worker_manager = _ensure_worker_manager(config) diff --git a/README.md b/README.md index da305dd..a130184 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,42 @@ # Medios-Macina +- Audio +- Video +- Image +- Text -Media Manager --Audio --Video --Image --Text +### Search Storage Support +- HydrusNetwork https://github.com/hydrusnetwork/hydrus +- All-Debrid https://alldebrid.com/ +- Local drive -Storage Support --HydrusNetwork https://github.com/hydrusnetwork/hydrus --All-Debrid https://alldebrid.com/ --Local drive +### Search Provider Support +- Youtube +- Openlibrary/Archive.org (free account needed) +- Soulseek +- Gog-Games (limited without paid API) +- Libgen -Provider Support --Youtube --Openlibrary/Archive.org (free account needed) --Soulseek --Gog-Games (limited without paid API) --Libgen +### Features +- Full MPV integration https://github.com/mpv-player/mpv + + +Install what you need and want, after you have the requirements.txt installed as well you will need to open terminal at the repository download location and run the cli file like . + + +#### Quick + +```shell +cd "C:\location\to\repository\medios-machina\" python cli.py - +``` +Adding your first file ```python -def hello(): - print("Hello, world!") - +.pipe -list # List MPV current playing/list +.pipe -save # Save playlist +.pipe -load # lists saved playlist, @# to load +.pipe "https://www.youtube.com/watch?v=_23dFb50Z2Y" # adds to current playlist +``` 1. search-file -provider youtube "something in the way" diff --git a/cmdlets/_shared.py b/cmdlets/_shared.py index 0be2395..6670ead 100644 --- a/cmdlets/_shared.py +++ b/cmdlets/_shared.py @@ -1182,9 +1182,11 @@ def extract_known_urls_from_result(result: Any) -> list[str]: if isinstance(result, models.PipeObject): _extend(result.extra.get('known_urls')) + _extend(result.extra.get('url')) # Also check singular url if isinstance(result.metadata, dict): _extend(result.metadata.get('known_urls')) _extend(result.metadata.get('urls')) + _extend(result.metadata.get('url')) elif hasattr(result, 'known_urls') or hasattr(result, 'urls'): # Handle objects with known_urls/urls attribute _extend(getattr(result, 'known_urls', None)) @@ -1193,10 +1195,12 @@ def extract_known_urls_from_result(result: Any) -> list[str]: if isinstance(result, dict): _extend(result.get('known_urls')) _extend(result.get('urls')) + _extend(result.get('url')) extra = result.get('extra') if isinstance(extra, dict): _extend(extra.get('known_urls')) _extend(extra.get('urls')) + _extend(extra.get('url')) return merge_sequences(urls, case_sensitive=True) diff --git a/cmdlets/add_file.py b/cmdlets/add_file.py index 3a0ec05..067660a 100644 --- a/cmdlets/add_file.py +++ b/cmdlets/add_file.py @@ -169,26 +169,14 @@ def _persist_local_metadata( debug(f"[_persist_local_metadata] Absolute dest_path: {dest_path.resolve()}") with LocalLibraryDB(library_root) as db: - # Save metadata FIRST to ensure file entry is created in DB - if any(payload.values()): - debug(f"[_persist_local_metadata] Saving metadata payload first") - try: - db.save_metadata(dest_path, payload) - debug(f"[_persist_local_metadata] ✅ Metadata saved") - except Exception as meta_exc: - log(f"[_persist_local_metadata] ❌ Failed to save metadata: {meta_exc}", file=sys.stderr) - raise - - # Save tags to DB synchronously in same transaction - # For local storage, DB is the primary source of truth - if tags: - try: - debug(f"[_persist_local_metadata] Saving {len(tags)} tags to DB") - db.save_tags(dest_path, tags) - debug(f"[_persist_local_metadata] ✅ Tags saved to DB") - except Exception as tag_exc: - log(f"[_persist_local_metadata] ⚠️ Failed to save tags to DB: {tag_exc}", file=sys.stderr) - raise + # Use optimized single-transaction save + debug(f"[_persist_local_metadata] Saving metadata and {len(tags)} tags to DB") + try: + db.save_file_info(dest_path, payload, tags) + debug(f"[_persist_local_metadata] ✅ File info saved to DB") + except Exception as exc: + log(f"[_persist_local_metadata] ❌ Failed to save file info: {exc}", file=sys.stderr) + raise # NOTE: Sidecar files are intentionally NOT created for local storage # Local storage uses database as primary source, not sidecar files @@ -261,6 +249,26 @@ def _handle_local_transfer(media_path: Path, destination_root: Path, result: Any relationships = extract_relationships(result) duration = extract_duration(result) + # Rename source file if title tag is present (to ensure destination has correct name) + title_tag = next((t for t in merged_tags if str(t).strip().lower().startswith("title:")), None) + if title_tag: + try: + from helper.utils import unique_path + title_val = title_tag.split(":", 1)[1].strip() + # Sanitize filename (keep spaces, but remove illegal chars) + safe_title = "".join(c for c in title_val if c.isalnum() or c in " ._-()[]").strip() + if safe_title: + new_name = safe_title + media_path.suffix + new_path = media_path.parent / new_name + if new_path != media_path: + # Ensure we don't overwrite existing files + new_path = unique_path(new_path) + media_path.rename(new_path) + media_path = new_path + debug(f"Renamed source file to match title: {media_path.name}") + except Exception as e: + log(f"Warning: Failed to rename file to match title: {e}", file=sys.stderr) + try: dest_file = storage["local"].upload(media_path, location=str(destination_root), move=True) except Exception as exc: @@ -271,14 +279,16 @@ def _handle_local_transfer(media_path: Path, destination_root: Path, result: Any file_hash = _resolve_file_hash(result, sidecar_hash, dest_path) media_kind = _resolve_media_kind(result, dest_path) - # Ensure only ONE title tag that matches the actual filename - # Remove all existing title tags and add one based on the saved filename - merged_tags_no_titles = [t for t in merged_tags if not str(t).strip().lower().startswith("title:")] - filename_title = dest_path.stem.replace("_", " ").strip() - if filename_title: - merged_tags_no_titles.insert(0, f"title:{filename_title}") + # If we have a title tag, keep it. Otherwise, derive from filename. + has_title = any(str(t).strip().lower().startswith("title:") for t in merged_tags) + final_tags = merged_tags - _persist_local_metadata(destination_root, dest_path, merged_tags_no_titles, merged_urls, file_hash, relationships, duration, media_kind) + if not has_title: + filename_title = dest_path.stem.replace("_", " ").strip() + if filename_title: + final_tags.insert(0, f"title:{filename_title}") + + _persist_local_metadata(destination_root, dest_path, final_tags, merged_urls, file_hash, relationships, duration, media_kind) _cleanup_sidecar_files(media_path, sidecar_path) debug(f"✅ Moved to local library: {dest_path}") return 0, dest_path @@ -897,8 +907,11 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: pass # If -delete flag is set, delete the file and .tags after successful upload - if delete_after_upload: - log(f"Deleting local files (as requested)...", file=sys.stderr) + # Also delete if the file is a temporary file from merge-file (contains .dlhx_ or (merged)) + is_temp_merge = "(merged)" in media_path.name or ".dlhx_" in media_path.name + + if delete_after_upload or is_temp_merge: + log(f"Deleting local files (as requested or temp file)...", file=sys.stderr) try: media_path.unlink() log(f"✅ Deleted: {media_path.name}", file=sys.stderr) diff --git a/cmdlets/add_url.py b/cmdlets/add_url.py index 7c95498..c803654 100644 --- a/cmdlets/add_url.py +++ b/cmdlets/add_url.py @@ -2,6 +2,8 @@ from __future__ import annotations from typing import Any, Dict, Sequence import json +import sys +from pathlib import Path from . import register import models @@ -9,17 +11,19 @@ import pipeline as ctx from helper import hydrus as hydrus_wrapper from ._shared import Cmdlet, CmdletArg, normalize_hash from helper.logger import log +from config import get_local_storage_path +from helper.local_library import LocalLibraryDB CMDLET = Cmdlet( name="add-url", - summary="Associate a URL with a Hydrus file.", + summary="Associate a URL with a file (Hydrus or Local).", usage="add-url [-hash ] ", args=[ CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), CmdletArg("url", required=True, description="The URL to associate with the file."), ], details=[ - "- Adds the URL to the Hydrus file's known URL list.", + "- Adds the URL to the file's known URL list.", ], ) @@ -37,42 +41,96 @@ def add(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: from ._shared import parse_cmdlet_args parsed = parse_cmdlet_args(args, CMDLET) override_hash = parsed.get("hash") - url = parsed.get("url") + url_arg = parsed.get("url") - if not url: + if not url_arg: log("Requires a URL argument") return 1 - url = str(url).strip() - if not url: + url_arg = str(url_arg).strip() + if not url_arg: log("Requires a non-empty URL") return 1 + + # Split by comma to handle multiple URLs + urls_to_add = [u.strip() for u in url_arg.split(',') if u.strip()] # Handle @N selection which creates a list - extract the first item if isinstance(result, list) and len(result) > 0: result = result[0] - hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None)) - if not hash_hex: - log("Selected result does not include a Hydrus hash") - return 1 - try: - client = hydrus_wrapper.get_client(config) - except Exception as exc: - log(f"Hydrus client unavailable: {exc}") - return 1 + # Helper to get field from both dict and object + def get_field(obj: Any, field: str, default: Any = None) -> Any: + if isinstance(obj, dict): + return obj.get(field, default) + else: + return getattr(obj, field, default) + + success = False - if client is None: - log("Hydrus client unavailable") + # 1. Try Local Library + file_path = get_field(result, "file_path") or get_field(result, "path") + if file_path and not override_hash: + try: + path_obj = Path(file_path) + if path_obj.exists(): + storage_path = get_local_storage_path(config) + if storage_path: + with LocalLibraryDB(storage_path) as db: + metadata = db.get_metadata(path_obj) or {} + known_urls = metadata.get("known_urls") or [] + + local_changed = False + for url in urls_to_add: + if url not in known_urls: + known_urls.append(url) + local_changed = True + ctx.emit(f"Associated URL with local file {path_obj.name}: {url}") + else: + ctx.emit(f"URL already exists for local file {path_obj.name}: {url}") + + if local_changed: + metadata["known_urls"] = known_urls + # Ensure we have a hash if possible, but don't fail if not + if not metadata.get("hash"): + try: + from helper.utils import sha256_file + metadata["hash"] = sha256_file(path_obj) + except Exception: + pass + + db.save_metadata(path_obj, metadata) + + success = True + except Exception as e: + log(f"Error updating local library: {e}", file=sys.stderr) + + # 2. Try Hydrus + hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None)) + + if hash_hex: + try: + client = hydrus_wrapper.get_client(config) + if client: + for url in urls_to_add: + client.associate_url(hash_hex, url) + preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '') + ctx.emit(f"Associated URL with Hydrus file {preview}: {url}") + success = True + except Exception as exc: + # Only log error if we didn't succeed locally either + if not success: + log(f"Hydrus add-url failed: {exc}", file=sys.stderr) + return 1 + + if success: + return 0 + + if not hash_hex and not file_path: + log("Selected result does not include a file path or Hydrus hash", file=sys.stderr) return 1 - try: - client.associate_url(hash_hex, url) - except Exception as exc: - log(f"Hydrus add-url failed: {exc}") - return 1 - preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '') - ctx.emit(f"Associated URL with {preview}: {url}") - return 0 + + return 1 diff --git a/cmdlets/delete_file.py b/cmdlets/delete_file.py index da6ed8a..42fdf02 100644 --- a/cmdlets/delete_file.py +++ b/cmdlets/delete_file.py @@ -11,11 +11,50 @@ from pathlib import Path import models import pipeline as ctx from helper import hydrus as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, normalize_hash +from ._shared import Cmdlet, CmdletArg, normalize_hash, looks_like_hash +from config import get_local_storage_path +from helper.local_library import LocalLibraryDB +def _cleanup_relationships(db_path: Path, file_hash: str) -> int: + """Remove references to file_hash from other files' relationships.""" + try: + conn = sqlite3.connect(db_path) + cursor = conn.cursor() + + # Find all metadata entries that contain this hash in relationships + cursor.execute("SELECT file_id, relationships FROM metadata WHERE relationships LIKE ?", (f'%{file_hash}%',)) + rows = cursor.fetchall() + + rel_update_count = 0 + for row_fid, rel_json in rows: + try: + rels = json.loads(rel_json) + changed = False + if isinstance(rels, dict): + for r_type, hashes in rels.items(): + if isinstance(hashes, list) and file_hash in hashes: + hashes.remove(file_hash) + changed = True + + if changed: + cursor.execute("UPDATE metadata SET relationships = ? WHERE file_id = ?", (json.dumps(rels), row_fid)) + rel_update_count += 1 + except Exception: + pass + + conn.commit() + conn.close() + if rel_update_count > 0: + debug(f"Removed relationship references from {rel_update_count} other files", file=sys.stderr) + return rel_update_count + except Exception as e: + debug(f"Error cleaning up relationships: {e}", file=sys.stderr) + return 0 + + def _delete_database_entry(db_path: Path, file_path: str) -> bool: """Delete file and related entries from local library database. @@ -28,35 +67,31 @@ def _delete_database_entry(db_path: Path, file_path: str) -> bool: """ try: if not db_path.exists(): - log(f"Database not found at {db_path}", file=sys.stderr) + debug(f"Database not found at {db_path}", file=sys.stderr) return False conn = sqlite3.connect(db_path) cursor = conn.cursor() - log(f"Searching database for file_path: {file_path}", file=sys.stderr) + debug(f"Searching database for file_path: {file_path}", file=sys.stderr) # Find the file_id using the exact file_path cursor.execute('SELECT id FROM files WHERE file_path = ?', (file_path,)) result = cursor.fetchone() if not result: - log(f"ERROR: File path not found in database", file=sys.stderr) - log(f"Expected: {file_path}", file=sys.stderr) - - # Debug: show sample entries - cursor.execute('SELECT id, file_path FROM files LIMIT 3') - samples = cursor.fetchall() - if samples: - log(f"Sample DB entries:", file=sys.stderr) - for fid, fpath in samples: - log(f"{fid}: {fpath}", file=sys.stderr) - + debug(f"File path not found in database: {file_path}", file=sys.stderr) conn.close() return False file_id = result[0] - log(f"Found file_id={file_id}, deleting all related records", file=sys.stderr) + + # Get file hash before deletion to clean up relationships + cursor.execute('SELECT file_hash FROM files WHERE id = ?', (file_id,)) + hash_result = cursor.fetchone() + file_hash = hash_result[0] if hash_result else None + + debug(f"Found file_id={file_id}, deleting all related records", file=sys.stderr) # Delete related records cursor.execute('DELETE FROM metadata WHERE file_id = ?', (file_id,)) @@ -74,7 +109,11 @@ def _delete_database_entry(db_path: Path, file_path: str) -> bool: conn.commit() conn.close() - log(f"Deleted: metadata={meta_count}, tags={tags_count}, notes={notes_count}, files={files_count}", file=sys.stderr) + # Clean up relationships in other files + if file_hash: + _cleanup_relationships(db_path, file_hash) + + debug(f"Deleted: metadata={meta_count}, tags={tags_count}, notes={notes_count}, files={files_count}", file=sys.stderr) return True except Exception as exc: @@ -106,6 +145,24 @@ def _process_single_item(item: Any, override_hash: str | None, conserve: str | N local_deleted = False local_target = isinstance(target, str) and target.strip() and not str(target).lower().startswith(("http://", "https://")) + # Try to resolve local path if target looks like a hash and we have a library root + if local_target and looks_like_hash(str(target)) and lib_root: + try: + db_path = Path(lib_root) / ".downlow_library.db" + if db_path.exists(): + # We can't use LocalLibraryDB context manager easily here without importing it, + # but we can use a quick sqlite connection or just use the class if imported. + # We imported LocalLibraryDB, so let's use it. + with LocalLibraryDB(Path(lib_root)) as db: + resolved = db.search_by_hash(str(target)) + if resolved: + target = str(resolved) + # Also ensure we have the hash set for Hydrus deletion if needed + if not hash_hex: + hash_hex = normalize_hash(str(target)) + except Exception as e: + debug(f"Failed to resolve hash to local path: {e}", file=sys.stderr) + if conserve != "local" and local_target: path = Path(str(target)) file_path_str = str(target) # Keep the original string for DB matching @@ -132,18 +189,59 @@ def _process_single_item(item: Any, override_hash: str | None, conserve: str | N if lib_root: lib_root_path = Path(lib_root) db_path = lib_root_path / ".downlow_library.db" - if _delete_database_entry(db_path, file_path_str): + + # If file_path_str is a hash (because file was already deleted or target was hash), + # we need to find the path by hash in the DB first + if looks_like_hash(file_path_str): + try: + with LocalLibraryDB(lib_root_path) as db: + resolved = db.search_by_hash(file_path_str) + if resolved: + file_path_str = str(resolved) + except Exception: + pass + + db_success = _delete_database_entry(db_path, file_path_str) + + if not db_success: + # If deletion failed (e.g. not found), but we have a hash, try to clean up relationships anyway + effective_hash = None + if looks_like_hash(file_path_str): + effective_hash = file_path_str + elif hash_hex: + effective_hash = hash_hex + + if effective_hash: + debug(f"Entry not found, but attempting to clean up relationships for hash: {effective_hash}", file=sys.stderr) + if _cleanup_relationships(db_path, effective_hash) > 0: + db_success = True + + if db_success: if ctx._PIPE_ACTIVE: ctx.emit(f"Removed database entry: {path.name}") - log(f"Database entry cleaned up", file=sys.stderr) - local_deleted = True # Mark as deleted if DB cleanup succeeded + debug(f"Database entry cleaned up", file=sys.stderr) + local_deleted = True else: - log(f"Database entry not found or cleanup failed for {file_path_str}", file=sys.stderr) + debug(f"Database entry not found or cleanup failed for {file_path_str}", file=sys.stderr) else: debug(f"No lib_root provided, skipping database cleanup", file=sys.stderr) hydrus_deleted = False - if conserve != "hydrus" and hash_hex: + # Only attempt Hydrus deletion if origin is explicitly Hydrus or if we failed to delete locally + # and we suspect it might be in Hydrus. + # If origin is local, we should default to NOT deleting from Hydrus unless requested? + # Or maybe we should check if it exists in Hydrus first? + # The user complaint is "its still trying to delete hydrus, this is a local file". + + should_try_hydrus = True + if origin and origin.lower() == "local": + should_try_hydrus = False + + # If conserve is set to hydrus, definitely don't delete + if conserve == "hydrus": + should_try_hydrus = False + + if should_try_hydrus and hash_hex: try: client = hydrus_wrapper.get_client(config) except Exception as exc: @@ -153,6 +251,9 @@ def _process_single_item(item: Any, override_hash: str | None, conserve: str | N else: if client is None: if not local_deleted: + # If we deleted locally, we don't care if Hydrus is unavailable + pass + else: log("Hydrus client unavailable", file=sys.stderr) return False else: @@ -165,7 +266,8 @@ def _process_single_item(item: Any, override_hash: str | None, conserve: str | N preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '') debug(f"Deleted from Hydrus: {preview}…", file=sys.stderr) except Exception as exc: - log(f"Hydrus delete failed: {exc}", file=sys.stderr) + # If it's not in Hydrus (e.g. 404 or similar), that's fine + # log(f"Hydrus delete failed: {exc}", file=sys.stderr) if not local_deleted: return False @@ -218,6 +320,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: reason_tokens.append(token) i += 1 + if not lib_root: + # Try to get from config + p = get_local_storage_path(config) + if p: + lib_root = str(p) + reason = " ".join(token for token in reason_tokens if str(token).strip()).strip() items = [] diff --git a/cmdlets/delete_url.py b/cmdlets/delete_url.py index 1000dd1..872d79f 100644 --- a/cmdlets/delete_url.py +++ b/cmdlets/delete_url.py @@ -2,22 +2,27 @@ from __future__ import annotations from typing import Any, Dict, Sequence import json +import sys +from pathlib import Path from . import register from helper import hydrus as hydrus_wrapper from ._shared import Cmdlet, CmdletArg, normalize_hash from helper.logger import log +from config import get_local_storage_path +from helper.local_library import LocalLibraryDB +import pipeline as ctx CMDLET = Cmdlet( name="delete-url", - summary="Remove a URL association from a Hydrus file.", + summary="Remove a URL association from a file (Hydrus or Local).", usage="delete-url [-hash ] ", args=[ CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), - CmdletArg("", required=True, description="The URL to remove from the file."), + CmdletArg("url", required=True, description="The URL to remove from the file."), ], details=[ - "- Removes the URL from the Hydrus file's known URL list.", + "- Removes the URL from the file's known URL list.", ], ) @@ -47,36 +52,105 @@ def delete(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: return 0 except Exception: pass + override_hash, rest = _parse_hash_and_rest(args) - if not rest: - log("Requires a URL argument") - return 1 - url = str(rest[0] or '').strip() - if not url: - log("Requires a non-empty URL") - return 1 - # Handle @N selection which creates a list - extract the first item - if isinstance(result, list) and len(result) > 0: - result = result[0] + url_arg = None + if rest: + url_arg = str(rest[0] or '').strip() - hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None)) - if not hash_hex: - log("Selected result does not include a Hydrus hash") - return 1 - try: - client = hydrus_wrapper.get_client(config) - except Exception as exc: - log(f"Hydrus client unavailable: {exc}") + # Normalize result to a list + items = result if isinstance(result, list) else [result] + if not items: + log("No input provided.") return 1 + + success_count = 0 - if client is None: - log("Hydrus client unavailable") + for item in items: + target_url = url_arg + target_file = item + + # Check for rich URL object from get-url + if isinstance(item, dict) and "url" in item and "source_file" in item: + if not target_url: + target_url = item["url"] + target_file = item["source_file"] + + if not target_url: + continue + + if _delete_single(target_file, target_url, override_hash, config): + success_count += 1 + + if success_count == 0: + if not url_arg: + log("Requires a URL argument or valid selection.") + else: + log("Failed to delete URL(s).") return 1 - try: - client.delete_url(hash_hex, url) - except Exception as exc: - log(f"Hydrus del-url failed: {exc}") - return 1 - log(f"Deleted URL: {url}") + return 0 + + +def _delete_single(result: Any, url: str, override_hash: str | None, config: Dict[str, Any]) -> bool: + # Helper to get field from both dict and object + def get_field(obj: Any, field: str, default: Any = None) -> Any: + if isinstance(obj, dict): + return obj.get(field, default) + else: + return getattr(obj, field, default) + + success = False + + # 1. Try Local Library + file_path = get_field(result, "file_path") or get_field(result, "path") + if file_path and not override_hash: + try: + path_obj = Path(file_path) + if path_obj.exists(): + storage_path = get_local_storage_path(config) + if storage_path: + with LocalLibraryDB(storage_path) as db: + metadata = db.get_metadata(path_obj) or {} + known_urls = metadata.get("known_urls") or [] + + # Handle comma-separated URLs if passed as arg + # But first check if the exact url string exists (e.g. if it contains commas itself) + urls_to_process = [] + if url in known_urls: + urls_to_process = [url] + else: + urls_to_process = [u.strip() for u in url.split(',') if u.strip()] + + local_changed = False + for u in urls_to_process: + if u in known_urls: + known_urls.remove(u) + local_changed = True + ctx.emit(f"Deleted URL from local file {path_obj.name}: {u}") + + if local_changed: + metadata["known_urls"] = known_urls + db.save_metadata(path_obj, metadata) + success = True + except Exception as e: + log(f"Error updating local library: {e}", file=sys.stderr) + + # 2. Try Hydrus + hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None)) + + if hash_hex: + try: + client = hydrus_wrapper.get_client(config) + if client: + urls_to_delete = [u.strip() for u in url.split(',') if u.strip()] + for u in urls_to_delete: + client.delete_url(hash_hex, u) + preview = hash_hex[:12] + ('…' if len(hash_hex) > 12 else '') + ctx.emit(f"Deleted URL from Hydrus file {preview}: {u}") + success = True + except Exception as exc: + log(f"Hydrus del-url failed: {exc}", file=sys.stderr) + + return success diff --git a/cmdlets/get_file.py b/cmdlets/get_file.py index 2ef47ae..00ad5ec 100644 --- a/cmdlets/get_file.py +++ b/cmdlets/get_file.py @@ -372,6 +372,18 @@ def _handle_search_result(result: Any, args: Sequence[str], config: Dict[str, An log("Error: No magnet ID in debrid result", file=sys.stderr) return 1 return _handle_debrid_file(magnet_id, file_title, config, args) + elif storage_name.lower() in {'bandcamp', 'youtube'}: + # Handle Bandcamp/YouTube via yt-dlp + url = get_field(result, 'target', None) + if not url: + # Try to find URL in other fields + url = get_field(result, 'url', None) + + if not url: + log(f"Error: No URL found for {storage_name} result", file=sys.stderr) + return 1 + + return _handle_ytdlp_download(url, file_title, config, args) else: log(f"Unknown storage backend: {storage_name}", file=sys.stderr) return 1 @@ -507,8 +519,28 @@ def _handle_local_file(file_path: Optional[str], file_title: str, config: Dict[s try: source = Path(file_path) if not source.exists(): - log(f"Error: File not found: {file_path}", file=sys.stderr) - return 1 + # Try to resolve by hash if the path looks like a hash + resolved_local = False + if looks_like_hash(str(file_path)): + try: + from config import get_local_storage_path + from helper.local_library import LocalLibraryDB + storage_path = get_local_storage_path(config) + if storage_path: + with LocalLibraryDB(storage_path) as db: + resolved_path = db.search_by_hash(str(file_path)) + if resolved_path and resolved_path.exists(): + source = resolved_path + file_path = str(resolved_path) + resolved_local = True + # Also set file_hash since we know it + file_hash = str(file_path) + except Exception: + pass + + if not resolved_local: + log(f"Error: File not found: {file_path}", file=sys.stderr) + return 1 # Check for explicit user flags force_mpv = any(str(a).lower() in {'-mpv', '--mpv', 'mpv'} for a in args) @@ -741,7 +773,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: # Also check for 'source' field (from add-file and other cmdlets) if not origin: origin = get_field(actual_result, 'source', None) - if origin and origin.lower() in {'hydrus', 'local', 'debrid', 'alldebrid'}: + if origin and origin.lower() in {'hydrus', 'local', 'debrid', 'alldebrid', 'bandcamp', 'youtube'}: # This is a search result with explicit origin - handle it via _handle_search_result return _handle_search_result(actual_result, args, config) @@ -1023,8 +1055,28 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: if isinstance(local_target, str) and not is_url and not (hash_spec and file_hash): p = Path(local_target) if not p.exists(): - log(f"File missing: {p}") - return 1 + # Check if it's a hash and try to resolve locally + resolved_local = False + if looks_like_hash(local_target): + try: + from config import get_local_storage_path + from helper.local_library import LocalLibraryDB + storage_path = get_local_storage_path(config) + if storage_path: + with LocalLibraryDB(storage_path) as db: + resolved_path = db.search_by_hash(local_target) + if resolved_path and resolved_path.exists(): + p = resolved_path + resolved_local = True + # Also set file_hash since we know it + file_hash = local_target + except Exception: + pass + + if not resolved_local: + log(f"File missing: {p}") + return 1 + source_path = p try: source_size = p.stat().st_size @@ -1046,127 +1098,158 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: except OSError: pass elif file_hash: + # Try local resolution first if origin is local or just in case + resolved_local = False try: - client = hydrus_wrapper.get_client(config) - except Exception as exc: - log(f"Hydrus client unavailable: {exc}") - return 1 - - if client is None: - log("Hydrus client unavailable") - return 1 - - # Fetch metadata and tags (needed for both -metadata flag and audio tagging) - # Fetch tags - try: - tags_payload = client.fetch_file_metadata(hashes=[file_hash], include_service_keys_to_tags=True) + from config import get_local_storage_path + from helper.local_library import LocalLibraryDB + storage_path = get_local_storage_path(config) + if storage_path: + with LocalLibraryDB(storage_path) as db: + resolved_path = db.search_by_hash(file_hash) + if resolved_path and resolved_path.exists(): + source_path = resolved_path + resolved_local = True + try: + source_size = source_path.stat().st_size + except OSError: + source_size = None + duration_sec = _ffprobe_duration_seconds(source_path) except Exception: - tags_payload = {} - - # Fetch URLs - try: - urls_payload = client.fetch_file_metadata(hashes=[file_hash], include_file_urls=True) - except Exception: - urls_payload = {} - - # Extract title from metadata if base_name is still 'export' - if base_name == 'export' and tags_payload: + pass + + if not resolved_local: try: - file_metadata = tags_payload.get('file_metadata', []) - if file_metadata and isinstance(file_metadata, list) and len(file_metadata) > 0: - meta = file_metadata[0] - if isinstance(meta, dict): - tags_dict = meta.get('tags', {}) - if isinstance(tags_dict, dict): - # Look for title in storage tags - for service in tags_dict.values(): - if isinstance(service, dict): - storage = service.get('storage_tags', {}) - if isinstance(storage, dict): - for tag_list in storage.values(): - if isinstance(tag_list, list): - for tag in tag_list: - if isinstance(tag, str) and tag.lower().startswith('title:'): - title_val = tag.split(':', 1)[1].strip() - if title_val: - base_name = _sanitize_name(title_val) - break - if base_name != 'export': - break - if base_name != 'export': - break + client = hydrus_wrapper.get_client(config) + except Exception as exc: + log(f"Hydrus client unavailable: {exc}") + return 1 + + if client is None: + log("Hydrus client unavailable") + return 1 + + # Fetch metadata and tags (needed for both -metadata flag and audio tagging) + # Fetch tags + try: + tags_payload = client.fetch_file_metadata(hashes=[file_hash], include_service_keys_to_tags=True) except Exception: - pass - - # Normal file export (happens regardless of -metadata flag) - try: - from helper.hydrus import hydrus_export as _hydrus_export - except Exception: - _hydrus_export = None # type: ignore - if _hydrus_export is None: - log("Hydrus export helper unavailable") - return 1 - download_dir = out_override if (out_override and out_override.is_dir()) else default_dir - try: - download_dir.mkdir(parents=True, exist_ok=True) - except Exception: - # If mkdir fails, fall back to default_dir - download_dir = default_dir - - # Verify the directory is writable; if not, fall back to default - try: - test_file = download_dir / f".downlow_write_test_{_uuid.uuid4().hex[:8]}" - test_file.touch() - test_file.unlink() - except (OSError, PermissionError): - # Directory is not writable, use default_dir instead - download_dir = default_dir + tags_payload = {} + + # Fetch URLs + try: + urls_payload = client.fetch_file_metadata(hashes=[file_hash], include_file_urls=True) + except Exception: + urls_payload = {} + + # Extract title from metadata if base_name is still 'export' + if base_name == 'export' and tags_payload: + try: + file_metadata = tags_payload.get('file_metadata', []) + if file_metadata and isinstance(file_metadata, list) and len(file_metadata) > 0: + meta = file_metadata[0] + if isinstance(meta, dict): + tags_dict = meta.get('tags', {}) + if isinstance(tags_dict, dict): + # Look for title in storage tags + for service in tags_dict.values(): + if isinstance(service, dict): + storage = service.get('storage_tags', {}) + if isinstance(storage, dict): + for tag_list in storage.values(): + if isinstance(tag_list, list): + for tag in tag_list: + if isinstance(tag, str) and tag.lower().startswith('title:'): + title_val = tag.split(':', 1)[1].strip() + if title_val: + base_name = _sanitize_name(title_val) + break + if base_name != 'export': + break + if base_name != 'export': + break + except Exception: + pass + + # Normal file export (happens regardless of -metadata flag) + try: + from helper.hydrus import hydrus_export as _hydrus_export + except Exception: + _hydrus_export = None # type: ignore + if _hydrus_export is None: + log("Hydrus export helper unavailable") + return 1 + download_dir = out_override if (out_override and out_override.is_dir()) else default_dir try: download_dir.mkdir(parents=True, exist_ok=True) except Exception: + # If mkdir fails, fall back to default_dir + download_dir = default_dir + + # Verify the directory is writable; if not, fall back to default + try: + test_file = download_dir / f".downlow_write_test_{_uuid.uuid4().hex[:8]}" + test_file.touch() + test_file.unlink() + except (OSError, PermissionError): + # Directory is not writable, use default_dir instead + download_dir = default_dir + try: + download_dir.mkdir(parents=True, exist_ok=True) + except Exception: + pass + token = (_uuid.uuid4().hex[:8]) + provisional_stem = f"{base_name}.dlhx_{token}" + provisional = download_dir / f"{provisional_stem}.bin" + class _Args: pass - token = (_uuid.uuid4().hex[:8]) - provisional_stem = f"{base_name}.dlhx_{token}" - provisional = download_dir / f"{provisional_stem}.bin" - class _Args: - pass - args_obj = _Args() - setattr(args_obj, 'output', provisional) - setattr(args_obj, 'format', 'copy') - setattr(args_obj, 'tmp_dir', str(download_dir)) - setattr(args_obj, 'metadata_json', None) - setattr(args_obj, 'hydrus_url', get_hydrus_url(config, "home") or "http://localhost:45869") - setattr(args_obj, 'access_key', get_hydrus_access_key(config, "home") or "") - setattr(args_obj, 'timeout', float(config.get('HydrusNetwork_Request_Timeout') or 60.0)) - try: - file_url = client.file_url(file_hash) - except Exception: - file_url = None - setattr(args_obj, 'file_url', file_url) - setattr(args_obj, 'file_hash', file_hash) - import io as _io, contextlib as _contextlib - _buf = _io.StringIO() - status = 1 - with _contextlib.redirect_stdout(_buf): - status = _hydrus_export(args_obj, None) - if status != 0: - stderr_text = _buf.getvalue().strip() - if stderr_text: - log(stderr_text) - return status - json_text = _buf.getvalue().strip().splitlines()[-1] if _buf.getvalue() else '' - final_from_json: Optional[Path] = None - try: - payload = json.loads(json_text) if json_text else None - if isinstance(payload, dict): - outp = payload.get('output') - if isinstance(outp, str) and outp: - final_from_json = Path(outp) - except Exception: - final_from_json = None - if final_from_json and final_from_json.exists(): - source_path = final_from_json - else: + args_obj = _Args() + setattr(args_obj, 'output', provisional) + setattr(args_obj, 'format', 'copy') + setattr(args_obj, 'tmp_dir', str(download_dir)) + setattr(args_obj, 'metadata_json', None) + setattr(args_obj, 'hydrus_url', get_hydrus_url(config, "home") or "http://localhost:45869") + setattr(args_obj, 'access_key', get_hydrus_access_key(config, "home") or "") + setattr(args_obj, 'timeout', float(config.get('HydrusNetwork_Request_Timeout') or 60.0)) + try: + file_url = client.file_url(file_hash) + except Exception: + file_url = None + setattr(args_obj, 'file_url', file_url) + setattr(args_obj, 'file_hash', file_hash) + import io as _io, contextlib as _contextlib + _buf = _io.StringIO() + status = 1 + with _contextlib.redirect_stdout(_buf): + status = _hydrus_export(args_obj, None) + if status != 0: + stderr_text = _buf.getvalue().strip() + if stderr_text: + log(stderr_text) + return status + json_text = _buf.getvalue().strip().splitlines()[-1] if _buf.getvalue() else '' + final_from_json: Optional[Path] = None + try: + payload = json.loads(json_text) if json_text else None + if isinstance(payload, dict): + outp = payload.get('output') + if isinstance(outp, str) and outp: + final_from_json = Path(outp) + except Exception: + final_from_json = None + if final_from_json and final_from_json.exists(): + source_path = final_from_json + else: + candidates = [p for p in provisional.parent.glob(provisional_stem + '*') if p.exists() and p.is_file()] + non_provisional = [p for p in candidates if p.suffix.lower() not in {'.bin', '.hydrus'}] + pick_from = non_provisional if non_provisional else candidates + if pick_from: + try: + source_path = max(pick_from, key=lambda p: p.stat().st_mtime) + except Exception: + source_path = pick_from[0] + else: + source_path = provisional candidates = [p for p in provisional.parent.glob(provisional_stem + '*') if p.exists() and p.is_file()] non_provisional = [p for p in candidates if p.suffix.lower() not in {'.bin', '.hydrus'}] pick_from = non_provisional if non_provisional else candidates @@ -1177,16 +1260,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: source_path = pick_from[0] else: source_path = provisional - candidates = [p for p in provisional.parent.glob(provisional_stem + '*') if p.exists() and p.is_file()] - non_provisional = [p for p in candidates if p.suffix.lower() not in {'.bin', '.hydrus'}] - pick_from = non_provisional if non_provisional else candidates - if pick_from: - try: - source_path = max(pick_from, key=lambda p: p.stat().st_mtime) - except Exception: - source_path = pick_from[0] - else: - source_path = provisional try: source_size = source_size or (source_path.stat().st_size if source_path.exists() else None) except OSError: @@ -1479,6 +1552,77 @@ def _unique_path(p: Path) -> Path: return p +def _handle_ytdlp_download(url: str, title: str, config: Dict[str, Any], args: Sequence[str]) -> int: + """Handle download/streaming of URL using yt-dlp.""" + if not url: + log("Error: No URL provided", file=sys.stderr) + return 1 + + # Check for -storage local + args_list = list(map(str, args)) + storage_mode = None + if '-storage' in args_list: + try: + idx = args_list.index('-storage') + if idx + 1 < len(args_list): + storage_mode = args_list[idx + 1].lower() + except ValueError: + pass + + force_local = (storage_mode == 'local') + + if not force_local: + # Default: Stream to MPV + if _play_in_mpv(url, title, is_stream=True): + from . import pipe + pipe._run(None, [], config) + return 0 + else: + # Fallback to browser + try: + import webbrowser + webbrowser.open(url) + debug(f"[get-file] Opened in browser: {title}", file=sys.stderr) + return 0 + except Exception: + pass + return 1 + + # Download mode + try: + import yt_dlp + except ImportError: + log("Error: yt-dlp not installed. Please install it to download.", file=sys.stderr) + return 1 + + log(f"Downloading {title}...", file=sys.stderr) + + # Determine output directory + download_dir = resolve_output_dir(config) + try: + download_dir.mkdir(parents=True, exist_ok=True) + except Exception: + pass + + # Configure yt-dlp + ydl_opts = { + 'outtmpl': str(download_dir / '%(title)s.%(ext)s'), + 'quiet': False, + 'no_warnings': True, + # Use best audio/video + 'format': 'best', + } + + try: + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + ydl.download([url]) + log(f"Downloaded to: {download_dir}", file=sys.stderr) + return 0 + except Exception as e: + log(f"Error downloading: {e}", file=sys.stderr) + return 1 + + CMDLET = Cmdlet( name="get-file", summary="Export files: from Hydrus database OR from AllDebrid magnets via pipe. Auto-detects source and handles accordingly.", diff --git a/cmdlets/get_relationship.py b/cmdlets/get_relationship.py index 922b1eb..a47e2cf 100644 --- a/cmdlets/get_relationship.py +++ b/cmdlets/get_relationship.py @@ -3,6 +3,7 @@ from __future__ import annotations from typing import Any, Dict, Sequence, List, Optional import json import sys +from pathlib import Path from helper.logger import log @@ -11,16 +12,19 @@ import models import pipeline as ctx from helper import hydrus as hydrus_wrapper from ._shared import Cmdlet, CmdletArg, normalize_hash, fmt_bytes +from helper.local_library import LocalLibraryDB +from config import get_local_storage_path +from result_table import ResultTable CMDLET = Cmdlet( name="get-relationship", - summary="Print Hydrus relationships for the selected file.", + summary="Print relationships for the selected file (Hydrus or Local).", usage="get-relationship [-hash ]", args=[ CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), ], details=[ - "- Lists relationship data as returned by Hydrus.", + "- Lists relationship data as returned by Hydrus or Local DB.", ], ) @@ -50,190 +54,187 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: if isinstance(result, list) and len(result) > 0: result = result[0] + # Initialize results collection + found_relationships = [] # List of dicts: {hash, type, title, path, origin} + source_title = "Unknown" + + # Check for local file first + file_path = None + if isinstance(result, dict): + file_path = result.get("file_path") or result.get("path") + source_title = result.get("title") or result.get("name") or "Unknown" + elif hasattr(result, "file_path"): + file_path = result.file_path + source_title = getattr(result, "title", "Unknown") + + local_db_checked = False + + if file_path and not override_hash: + try: + path_obj = Path(file_path) + if not source_title or source_title == "Unknown": + source_title = path_obj.name + + if path_obj.exists(): + storage_path = get_local_storage_path(config) + if storage_path: + with LocalLibraryDB(storage_path) as db: + metadata = db.get_metadata(path_obj) + if metadata and metadata.get("relationships"): + local_db_checked = True + rels = metadata["relationships"] + if isinstance(rels, dict): + for rel_type, hashes in rels.items(): + if hashes: + for h in hashes: + # Try to resolve hash to filename if possible + resolved_path = db.search_by_hash(h) + title = h + path = None + if resolved_path: + path = str(resolved_path) + # Try to get title from tags + try: + tags = db.get_tags(resolved_path) + found_title = False + for t in tags: + if t.lower().startswith('title:'): + title = t[6:].strip() + found_title = True + break + if not found_title: + title = resolved_path.stem + except Exception: + title = resolved_path.stem + + found_relationships.append({ + "hash": h, + "type": rel_type, + "title": title, + "path": path, + "origin": "local" + }) + except Exception as e: + log(f"Error checking local relationships: {e}", file=sys.stderr) + + # If we found local relationships, we can stop or merge with Hydrus? + # For now, if we found local ones, let's show them. + # But if the file is also in Hydrus, we might want those too. + # Let's try Hydrus if we have a hash. + hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None)) if not hash_hex: - log("Selected result does not include a Hydrus hash", file=sys.stderr) - return 1 - try: - client = hydrus_wrapper.get_client(config) - except Exception as exc: - log(f"Hydrus client unavailable: {exc}", file=sys.stderr) - return 1 - - if client is None: - log("Hydrus client unavailable", file=sys.stderr) - return 1 - try: - rel = client.get_file_relationships(hash_hex) - except Exception as exc: - log(f"Hydrus relationships fetch failed: {exc}", file=sys.stderr) - return 1 - if not rel: - log("No relationships found.") - return 0 - - # Extract file_relationships from response - file_rels = rel.get("file_relationships", {}) - if not file_rels: - log("No relationships found.") - return 0 - - # Get the relationships dict for this specific hash - this_file_rels = file_rels.get(hash_hex) - if not this_file_rels: - log("No relationships found.") - return 0 - - # Extract related hashes from all relationship types - # Keys "0", "1", "3", "8" are relationship type IDs - # Values are lists of hashes - related_hashes = [] - for rel_type_id, hash_list in this_file_rels.items(): - # Skip non-numeric keys and metadata keys - if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}: - continue - if isinstance(hash_list, list): - for rel_hash in hash_list: - if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex: - related_hashes.append(rel_hash) - - # Remove duplicates while preserving order - seen = set() - unique_hashes = [] - for h in related_hashes: - if h not in seen: - seen.add(h) - unique_hashes.append(h) - - if not unique_hashes: - log("No related files found.") - return 0 - - # Fetch metadata for all related files - try: - metadata_payload = client.fetch_file_metadata( - hashes=unique_hashes, - include_service_keys_to_tags=True, - include_duration=True, - include_size=True, - include_mime=True, - ) - except Exception as exc: - log(f"Hydrus metadata fetch failed: {exc}", file=sys.stderr) - return 1 - - metadata_list = metadata_payload.get("metadata") if isinstance(metadata_payload, dict) else None - if not isinstance(metadata_list, list): - log("Hydrus metadata response was not a list", file=sys.stderr) - return 1 - - # Build metadata map by hash - meta_by_hash: Dict[str, Dict[str, Any]] = {} - for item in metadata_list: - if isinstance(item, dict): - item_hash = normalize_hash(item.get("hash")) - if item_hash: - meta_by_hash[item_hash] = item - - # Helper functions for formatting - def _format_duration(seconds: Optional[float]) -> str: - if seconds is None: - return "" + # Try to get hash from dict + if isinstance(result, dict): + hash_hex = normalize_hash(result.get("hash") or result.get("file_hash")) + + if hash_hex and not local_db_checked: try: - s = int(seconds) - hours = s // 3600 - minutes = (s % 3600) // 60 - secs = s % 60 - if hours > 0: - return f"{hours}:{minutes:02d}:{secs:02d}" - else: - return f"{minutes}:{secs:02d}" - except Exception: - return "" - - def _get_title(meta: Dict[str, Any]) -> str: - # Try to extract title from tags - tags_payload = meta.get("tags") - if isinstance(tags_payload, dict): - for service_data in tags_payload.values(): - if isinstance(service_data, dict): - storage_tags = service_data.get("storage_tags") - if isinstance(storage_tags, dict): - for tag_list in storage_tags.values(): - if isinstance(tag_list, list): - for tag in tag_list: - tag_str = str(tag).lower() - if tag_str.startswith("title:"): - return str(tag)[6:].strip() - # Fallback to hash prefix - h = meta.get("hash") - return str(h)[:12] if h else "unknown" - - def _get_mime_type(meta: Dict[str, Any]) -> str: - mime = meta.get("mime", "") - if not mime: - return "" - # Extract type from mime (e.g., "video/mp4" -> "video") - parts = str(mime).split("/") - return parts[0] if parts else "" - - # Print header and separator - log("# | Title | Type | Duration | Size") - log("--+---------------------------+-------+----------+--------") - - # Create result objects for each related file - results: List[Any] = [] - - # Print each related file - for idx, rel_hash in enumerate(unique_hashes, start=1): - meta = meta_by_hash.get(rel_hash) - if not meta: - continue + client = hydrus_wrapper.get_client(config) + if client: + rel = client.get_file_relationships(hash_hex) + if rel: + file_rels = rel.get("file_relationships", {}) + this_file_rels = file_rels.get(hash_hex) + + if this_file_rels: + # Map Hydrus relationship IDs to names + # 0: potential duplicates, 1: false positives, 2: false positives (alternates), + # 3: duplicates, 4: alternatives, 8: king + # This mapping is approximate based on Hydrus API docs/behavior + rel_map = { + "0": "potential duplicate", + "1": "false positive", + "2": "false positive", + "3": "duplicate", + "4": "alternative", + "8": "king" + } + + for rel_type_id, hash_list in this_file_rels.items(): + # Skip metadata keys + if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}: + continue + + rel_name = rel_map.get(str(rel_type_id), f"type-{rel_type_id}") + + if isinstance(hash_list, list): + for rel_hash in hash_list: + if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex: + # Check if we already have this hash from local DB + if not any(r['hash'] == rel_hash for r in found_relationships): + found_relationships.append({ + "hash": rel_hash, + "type": rel_name, + "title": rel_hash, # Can't resolve title easily without another API call + "path": None, + "origin": "hydrus" + }) + except Exception as exc: + # Only log error if we didn't find local relationships either + if not found_relationships: + log(f"Hydrus relationships fetch failed: {exc}", file=sys.stderr) + + if not found_relationships: + log("No relationships found.") + return 0 - title = _get_title(meta) - mime_type = _get_mime_type(meta) - - # Get duration - duration_value = meta.get("duration") - if duration_value is None and isinstance(meta.get("metadata"), dict): - duration_value = meta["metadata"].get("duration") - duration_str = _format_duration(duration_value) - - # Get size - size = meta.get("size") or meta.get("file_size") - size_str = fmt_bytes(size) if size else "" - - # Format and print row - title_display = title[:25].ljust(25) - type_display = mime_type[:5].ljust(5) - duration_display = duration_str[:8].ljust(8) - size_display = size_str[:7].ljust(7) - - log(f"{idx:2d} | {title_display} | {type_display} | {duration_display} | {size_display}") + # Display results + table = ResultTable(f"Relationships: {source_title}") + + # Sort by type then title + # Custom sort order: King first, then Derivative, then others + def type_sort_key(item): + t = item['type'].lower() + if t == 'king': + return 0 + elif t == 'derivative': + return 1 + elif t == 'alternative': + return 2 + elif t == 'duplicate': + return 3 + else: + return 4 + + found_relationships.sort(key=lambda x: (type_sort_key(x), x['title'])) + + pipeline_results = [] + + for i, item in enumerate(found_relationships): + row = table.add_row() + row.add_column("Type", item['type'].title()) + row.add_column("Title", item['title']) + # row.add_column("Hash", item['hash'][:16] + "...") # User requested removal + row.add_column("Origin", item['origin']) # Create result object for pipeline - result_obj = type("RelatedFile", (), { - "hash_hex": rel_hash, - "title": title, - "media_kind": mime_type or "other", - "size": size, - "duration": duration_value, - "known_urls": [], - "annotations": [], - "columns": [ - ("Title", title), - ("Type", mime_type), - ("Duration", duration_str), - ("Size", size_str), - ], - })() - results.append(result_obj) - - # Emit results to pipeline - try: - ctx._PIPE_EMITS.extend(results) - except Exception: - pass + res_obj = { + "title": item['title'], + "hash": item['hash'], + "file_hash": item['hash'], + "relationship_type": item['type'], + "origin": item['origin'] + } + if item['path']: + res_obj["path"] = item['path'] + res_obj["file_path"] = item['path'] + res_obj["target"] = item['path'] + else: + # If Hydrus, target is hash + res_obj["target"] = item['hash'] + + pipeline_results.append(res_obj) + + # Set selection args + # If it has a path, we can use it directly. If hash, maybe get-file -hash? + if item['path']: + table.set_row_selection_args(i, [item['path']]) + else: + table.set_row_selection_args(i, ["-hash", item['hash']]) + + ctx.set_last_result_table(table, pipeline_results) + print(table) return 0 diff --git a/cmdlets/get_url.py b/cmdlets/get_url.py index 271b53c..e087e5e 100644 --- a/cmdlets/get_url.py +++ b/cmdlets/get_url.py @@ -2,6 +2,8 @@ from __future__ import annotations from typing import Any, Dict, Sequence import json +import sys +from pathlib import Path from . import register import models @@ -9,16 +11,18 @@ import pipeline as ctx from helper import hydrus as hydrus_wrapper from ._shared import Cmdlet, CmdletArg, normalize_hash from helper.logger import log +from config import get_local_storage_path +from helper.local_library import LocalLibraryDB CMDLET = Cmdlet( name="get-url", - summary="List URLs associated with a Hydrus file.", + summary="List URLs associated with a file (Hydrus or Local).", usage="get-url [-hash ]", args=[ CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."), ], details=[ - "- Prints the known URLs for the selected Hydrus file.", + "- Prints the known URLs for the selected file.", ], ) @@ -55,36 +59,81 @@ def get_urls(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: return 0 except Exception: pass - override_hash, _ = _parse_hash_and_rest(args) - hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None)) - if not hash_hex: - log("Selected result does not include a Hydrus hash") - return 1 - try: - client = hydrus_wrapper.get_client(config) - except Exception as exc: - log(f"Hydrus client unavailable: {exc}") - return 1 - if client is None: - log("Hydrus client unavailable") - return 1 - try: - payload = client.fetch_file_metadata(hashes=[hash_hex], include_file_urls=True) - except Exception as exc: - log(f"Hydrus metadata fetch failed: {exc}") - return 1 - items = payload.get("metadata") if isinstance(payload, dict) else None - meta = items[0] if (isinstance(items, list) and items and isinstance(items[0], dict)) else None - urls = (meta.get("known_urls") if isinstance(meta, dict) else None) or [] - if urls: - ctx.emit("URLs:") - for u in urls: + override_hash, _ = _parse_hash_and_rest(args) + + # Handle @N selection which creates a list - extract the first item + if isinstance(result, list) and len(result) > 0: + result = result[0] + + found_urls = [] + + # 1. Try Local Library + file_path = get_field(result, "file_path") or get_field(result, "path") + if file_path and not override_hash: + try: + path_obj = Path(file_path) + if path_obj.exists(): + storage_path = get_local_storage_path(config) + if storage_path: + with LocalLibraryDB(storage_path) as db: + metadata = db.get_metadata(path_obj) + if metadata and metadata.get("known_urls"): + found_urls.extend(metadata["known_urls"]) + except Exception as e: + log(f"Error checking local library: {e}", file=sys.stderr) + + # 2. Try Hydrus + hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None)) + + # If we haven't found URLs yet, or if we want to merge them (maybe?), let's check Hydrus if we have a hash + # But usually if it's local, we might not want to check Hydrus unless requested. + # However, the user said "they can just work together". + + if hash_hex: + try: + client = hydrus_wrapper.get_client(config) + if client: + payload = client.fetch_file_metadata(hashes=[hash_hex], include_file_urls=True) + items = payload.get("metadata") if isinstance(payload, dict) else None + meta = items[0] if (isinstance(items, list) and items and isinstance(items[0], dict)) else None + hydrus_urls = (meta.get("known_urls") if isinstance(meta, dict) else None) or [] + for u in hydrus_urls: + if u not in found_urls: + found_urls.append(u) + except Exception as exc: + # Only log error if we didn't find local URLs either, or if it's a specific error + if not found_urls: + log(f"Hydrus lookup failed: {exc}", file=sys.stderr) + + if found_urls: + for u in found_urls: text = str(u).strip() if text: - ctx.emit(f"- {text}") - else: - ctx.emit("No URLs found.") + # Emit a rich object that looks like a string but carries context + # We use a dict with 'title' which ResultTable uses for display + # and 'url' which is the actual data + # We also include the source file info so downstream cmdlets can use it + + # Create a result object that mimics the structure expected by delete-url + # delete-url expects a file object usually, but here we are emitting URLs. + # If we emit a dict with 'url' and 'source_file', delete-url can use it. + + rich_result = { + "title": text, # Display as just the URL + "url": text, + "source_file": result, # Pass the original file context + "file_path": get_field(result, "file_path") or get_field(result, "path"), + "hash_hex": hash_hex + } + ctx.emit(rich_result) + return 0 + + if not hash_hex and not file_path: + log("Selected result does not include a file path or Hydrus hash", file=sys.stderr) + return 1 + + ctx.emit("No URLs found.") return 0 diff --git a/cmdlets/merge_file.py b/cmdlets/merge_file.py index e4fb54c..2fa9047 100644 --- a/cmdlets/merge_file.py +++ b/cmdlets/merge_file.py @@ -331,6 +331,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: self.media_kind = media_kind self.tags = tags or [] self.known_urls = known_urls or [] + self.origin = "local" # Ensure origin is set for add-file PipelineItem = SimpleItem merged_item = PipelineItem( @@ -340,6 +341,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: tags=merged_tags, # Include merged tags known_urls=source_urls # Include known URLs ) + # Clear previous results to ensure only the merged file is passed down + ctx.clear_last_result() ctx.emit(merged_item) except Exception as e: log(f"Warning: Could not emit pipeline item: {e}", file=sys.stderr) @@ -347,7 +350,9 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: ctx.emit(f"Merged: {output_path}") # Delete source files if requested - if delete_after: + # Always delete source files if they were downloaded playlist items (temp files) + # We can detect this if they are in the temp download directory or if we tracked them + if delete_after or True: # Force delete for now as merge consumes them # First delete all .tags files for tags_file in source_tags_files: try: diff --git a/cmdlets/search_file.py b/cmdlets/search_file.py index 3fcd9c1..3cb036a 100644 --- a/cmdlets/search_file.py +++ b/cmdlets/search_file.py @@ -157,7 +157,7 @@ def _ensure_storage_columns(payload: Dict[str, Any]) -> Dict[str, Any]: try: size_bytes = int(size_val) size_mb = size_bytes / (1024 * 1024) - size_str = f"{size_mb:.1f} MB" + size_str = f"{int(size_mb)} MB" except (ValueError, TypeError): size_str = str(size_val) @@ -250,6 +250,34 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: else: i += 1 + # Handle piped input (e.g. from @N selection) if query is empty + if not query and result: + # If result is a list, take the first item + actual_result = result[0] if isinstance(result, list) and result else result + + # Helper to get field + def get_field(obj: Any, field: str) -> Any: + return getattr(obj, field, None) or (obj.get(field) if isinstance(obj, dict) else None) + + origin = get_field(actual_result, 'origin') + target = get_field(actual_result, 'target') + + # Special handling for Bandcamp artist/album drill-down + if origin == 'bandcamp' and target: + query = target + if not provider_name: + provider_name = 'bandcamp' + + # Generic URL handling + elif target and str(target).startswith(('http://', 'https://')): + query = target + # Try to infer provider from URL if not set + if not provider_name: + if 'bandcamp.com' in target: + provider_name = 'bandcamp' + elif 'youtube.com' in target or 'youtu.be' in target: + provider_name = 'youtube' + if not query: log("Provide a search query", file=sys.stderr) return 1 diff --git a/cmdlets/trim_file.py b/cmdlets/trim_file.py new file mode 100644 index 0000000..912b406 --- /dev/null +++ b/cmdlets/trim_file.py @@ -0,0 +1,295 @@ +"""Trim a media file using ffmpeg.""" +from __future__ import annotations + +from typing import Any, Dict, Sequence, List, Optional +from pathlib import Path +import sys +import json +import subprocess +import shutil +import re + +from helper.logger import log, debug +from helper.utils import sha256_file +from . import register +from ._shared import ( + Cmdlet, + CmdletArg, + parse_cmdlet_args, + normalize_result_input, + extract_tags_from_result, + extract_title_from_result +) +import pipeline as ctx + +CMDLET = Cmdlet( + name="trim-file", + summary="Trim a media file using ffmpeg.", + usage="trim-file [-path ] -range [-delete]", + args=[ + CmdletArg("-path", description="Path to the file (optional if piped)."), + CmdletArg("-range", required=True, description="Time range to trim (e.g. '3:45-3:55' or '00:03:45-00:03:55')."), + CmdletArg("-delete", type="flag", description="Delete the original file after trimming."), + ], + details=[ + "Creates a new file with 'clip_' prefix in the filename/title.", + "Inherits tags from the source file.", + "Adds a relationship to the source file (if hash is available).", + "Output can be piped to add-file.", + ] +) + +def _parse_time(time_str: str) -> float: + """Convert time string (HH:MM:SS or MM:SS or SS) to seconds.""" + parts = time_str.strip().split(':') + if len(parts) == 3: + return float(parts[0]) * 3600 + float(parts[1]) * 60 + float(parts[2]) + elif len(parts) == 2: + return float(parts[0]) * 60 + float(parts[1]) + elif len(parts) == 1: + return float(parts[0]) + else: + raise ValueError(f"Invalid time format: {time_str}") + +def _trim_media(input_path: Path, output_path: Path, start_time: str, end_time: str) -> bool: + """Trim media file using ffmpeg.""" + ffmpeg_path = shutil.which('ffmpeg') + if not ffmpeg_path: + log("ffmpeg not found in PATH", file=sys.stderr) + return False + + # Calculate duration to avoid seeking issues if possible, or just use -to + # Using -ss before -i is faster (input seeking) but might be less accurate. + # Using -ss after -i is slower (output seeking) but accurate. + # For trimming, accuracy is usually preferred, but for long files input seeking is better. + # We'll use input seeking (-ss before -i) and -to. + + cmd = [ + ffmpeg_path, '-y', + '-ss', start_time, + '-i', str(input_path), + '-to', end_time, + '-c', 'copy', # Stream copy for speed and quality preservation + '-map_metadata', '0', # Copy metadata + str(output_path) + ] + + # If stream copy fails (e.g. cutting not on keyframe), we might need re-encoding. + # But let's try copy first as it's standard for "trimming" without quality loss. + # Note: -to with input seeking (-ss before -i) resets timestamp, so -to refers to duration? + # No, -to refers to position in output if used after -ss? + # Actually, if -ss is before -i, the timestamps are reset to 0. + # So -to should be (end - start). + # Alternatively, use -t (duration). + + try: + s = _parse_time(start_time) + e = _parse_time(end_time) + duration = e - s + if duration <= 0: + log(f"Invalid range: start {start_time} >= end {end_time}", file=sys.stderr) + return False + + cmd = [ + ffmpeg_path, '-y', + '-ss', start_time, + '-i', str(input_path), + '-t', str(duration), + '-c', 'copy', + '-map_metadata', '0', + str(output_path) + ] + + debug(f"Running ffmpeg: {' '.join(cmd)}") + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode != 0: + log(f"ffmpeg error: {result.stderr}", file=sys.stderr) + return False + + return True + except Exception as e: + log(f"Error parsing time or running ffmpeg: {e}", file=sys.stderr) + return False + +@register(["trim-file"]) +def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Trim a media file.""" + # Parse arguments + parsed = parse_cmdlet_args(args, CMDLET) + + range_arg = parsed.get("range") + if not range_arg or '-' not in range_arg: + log("Error: -range argument required (format: start-end)", file=sys.stderr) + return 1 + + start_str, end_str = range_arg.split('-', 1) + + delete_original = parsed.get("delete", False) + path_arg = parsed.get("path") + + # Collect inputs + inputs = normalize_result_input(result) + + # If path arg provided, add it to inputs + if path_arg: + inputs.append({"file_path": path_arg}) + + if not inputs: + log("No input files provided.", file=sys.stderr) + return 1 + + success_count = 0 + + for item in inputs: + # Resolve file path + file_path = None + if isinstance(item, dict): + file_path = item.get("file_path") or item.get("path") or item.get("target") + elif hasattr(item, "file_path"): + file_path = item.file_path + elif isinstance(item, str): + file_path = item + + if not file_path: + continue + + path_obj = Path(file_path) + if not path_obj.exists(): + log(f"File not found: {file_path}", file=sys.stderr) + continue + + # Determine output path + # Prepend clip_ to filename + new_filename = f"clip_{path_obj.name}" + output_path = path_obj.parent / new_filename + + # Trim + log(f"Trimming {path_obj.name} ({start_str} to {end_str})...", file=sys.stderr) + if _trim_media(path_obj, output_path, start_str, end_str): + log(f"Created clip: {output_path}", file=sys.stderr) + success_count += 1 + + # Prepare result for pipeline + + # 1. Get source hash for relationship + source_hash = None + if isinstance(item, dict): + source_hash = item.get("hash") or item.get("file_hash") + elif hasattr(item, "file_hash"): + source_hash = item.file_hash + + if not source_hash: + try: + source_hash = sha256_file(path_obj) + except Exception: + pass + + # 2. Get tags + tags = extract_tags_from_result(item) + + # 3. Get title and modify it + title = extract_title_from_result(item) + if not title: + title = path_obj.stem + + new_title = f"clip_{title}" + + # Update title tag if present + new_tags = [] + has_title_tag = False + for t in tags: + if t.lower().startswith("title:"): + new_tags.append(f"title:{new_title}") + has_title_tag = True + else: + new_tags.append(t) + + if not has_title_tag: + new_tags.append(f"title:{new_title}") + + # 4. Calculate clip hash and update original file's relationships + clip_hash = None + try: + clip_hash = sha256_file(output_path) + except Exception: + pass + + if source_hash and clip_hash: + # Update original file in local DB if possible + try: + from config import get_local_storage_path + from helper.local_library import LocalLibraryDB + + storage_path = get_local_storage_path(config) + if storage_path: + with LocalLibraryDB(storage_path) as db: + # Get original file metadata + # We need to find the original file by hash or path + # Try path first + orig_meta = db.get_metadata(path_obj) + if not orig_meta and source_hash: + # Try by hash + orig_path_resolved = db.search_by_hash(source_hash) + if orig_path_resolved: + orig_meta = db.get_metadata(orig_path_resolved) + + if orig_meta: + # Update relationships + rels = orig_meta.get("relationships", {}) + if not isinstance(rels, dict): + rels = {} + + # Add clip as "derivative" (since original is the source) + if "derivative" not in rels: + rels["derivative"] = [] + + if clip_hash not in rels["derivative"]: + rels["derivative"].append(clip_hash) + + # Save back to DB + # We need to preserve other metadata + orig_meta["relationships"] = rels + + # Ensure hash is set in metadata if we have it + if source_hash and not orig_meta.get("hash"): + orig_meta["hash"] = source_hash + + # We need the path to save + save_path = Path(orig_meta.get("file_path") or path_obj) + db.save_metadata(save_path, orig_meta) + log(f"Updated relationship for original file: {save_path.name}", file=sys.stderr) + except Exception as e: + log(f"Failed to update original file relationships: {e}", file=sys.stderr) + + # 5. Construct result + result_dict = { + "file_path": str(output_path), + "path": str(output_path), + "title": new_title, + "tags": new_tags, + "media_kind": "video", # Assumption, or derive + "hash": clip_hash, # Pass calculated hash + "relationships": { + # The source is the KING of this clip + "king": [source_hash] if source_hash else [] + } + } + + # Emit result + ctx.emit(result_dict) + + # Delete original if requested + if delete_original: + try: + path_obj.unlink() + log(f"Deleted original file: {path_obj}", file=sys.stderr) + # Also try to delete sidecars? + # Maybe leave that to user or cleanup cmdlet + except Exception as e: + log(f"Failed to delete original: {e}", file=sys.stderr) + + else: + log(f"Failed to trim {path_obj.name}", file=sys.stderr) + + return 0 if success_count > 0 else 1 diff --git a/helper/file_storage.py b/helper/file_storage.py index bdd7bab..a858162 100644 --- a/helper/file_storage.py +++ b/helper/file_storage.py @@ -264,9 +264,12 @@ class LocalStorageBackend(StorageBackend): """, (file_id,)) all_tags = [row[0] for row in cursor.fetchall()] + # Use title tag if present + title_tag = next((t.split(':', 1)[1] for t in all_tags if t.lower().startswith('title:')), None) + results.append({ "name": file_path.stem, - "title": file_path.stem, + "title": title_tag or file_path.stem, "ext": file_path.suffix.lstrip('.'), "path": path_str, "target": path_str, @@ -364,9 +367,12 @@ class LocalStorageBackend(StorageBackend): """, (file_id,)) tags = [row[0] for row in cursor.fetchall()] + # Use title tag if present + title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None) + results.append({ "name": file_path.stem, - "title": file_path.stem, + "title": title_tag or file_path.stem, "ext": file_path.suffix.lstrip('.'), "path": path_str, "target": path_str, @@ -410,9 +416,12 @@ class LocalStorageBackend(StorageBackend): """, (file_id,)) tags = [row[0] for row in cursor.fetchall()] + # Use title tag if present + title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None) + results.append({ "name": file_path.stem, - "title": file_path.stem, + "title": title_tag or file_path.stem, "ext": file_path.suffix.lstrip('.'), "path": path_str, "target": path_str, @@ -449,9 +458,12 @@ class LocalStorageBackend(StorageBackend): """, (file_id,)) tags = [row[0] for row in cursor.fetchall()] + # Use title tag if present + title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None) + results.append({ "name": file_path.stem, - "title": file_path.stem, + "title": title_tag or file_path.stem, "ext": file_path.suffix.lstrip('.'), "path": path_str, "target": path_str, diff --git a/helper/local_library.py b/helper/local_library.py index ca960d1..dda9cfc 100644 --- a/helper/local_library.py +++ b/helper/local_library.py @@ -497,6 +497,10 @@ class LocalLibraryDB: cursor = self.connection.cursor() + # Update file hash in files table if present + if metadata.get('hash'): + cursor.execute("UPDATE files SET file_hash = ? WHERE id = ?", (metadata['hash'], file_id)) + known_urls = metadata.get('known_urls', []) if not isinstance(known_urls, str): known_urls = json.dumps(known_urls) @@ -534,6 +538,72 @@ class LocalLibraryDB: except Exception as e: logger.error(f"[save_metadata] ❌ Error saving metadata for {file_path}: {e}", exc_info=True) raise + + def save_file_info(self, file_path: Path, metadata: Dict[str, Any], tags: List[str]) -> None: + """Save metadata and tags for a file in a single transaction.""" + try: + str_path = str(file_path.resolve()) + logger.debug(f"[save_file_info] Starting save for: {str_path}") + + file_id = self.get_or_create_file_entry(file_path) + + cursor = self.connection.cursor() + + # Update file hash in files table if present + if metadata.get('hash'): + cursor.execute("UPDATE files SET file_hash = ? WHERE id = ?", (metadata['hash'], file_id)) + + # 1. Save Metadata + known_urls = metadata.get('known_urls', []) + if not isinstance(known_urls, str): + known_urls = json.dumps(known_urls) + + relationships = metadata.get('relationships', []) + if not isinstance(relationships, str): + relationships = json.dumps(relationships) + + cursor.execute(""" + INSERT INTO metadata ( + file_id, hash, known_urls, relationships, + duration, size, ext, media_type, media_kind, + time_imported, time_modified + ) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP, CURRENT_TIMESTAMP) + ON CONFLICT(file_id) DO UPDATE SET + hash = excluded.hash, + known_urls = excluded.known_urls, + relationships = excluded.relationships, + duration = excluded.duration, + size = excluded.size, + ext = excluded.ext, + media_type = excluded.media_type, + media_kind = excluded.media_kind, + time_modified = CURRENT_TIMESTAMP, + updated_at = CURRENT_TIMESTAMP + """, ( + file_id, metadata.get('hash'), known_urls, relationships, + metadata.get('duration'), metadata.get('size'), metadata.get('ext'), + metadata.get('media_type'), metadata.get('media_kind') + )) + + # 2. Save Tags + # We assume tags list is complete and includes title if needed + cursor.execute("DELETE FROM tags WHERE file_id = ?", (file_id,)) + + for tag in tags: + tag = tag.strip() + if tag: + cursor.execute(""" + INSERT OR IGNORE INTO tags (file_id, tag, tag_type) + VALUES (?, ?, 'user') + """, (file_id, tag)) + + self.connection.commit() + logger.debug(f"[save_file_info] ✅ Committed metadata and tags for file_id {file_id}") + + except Exception as e: + logger.error(f"[save_file_info] ❌ Error saving file info for {file_path}: {e}", exc_info=True) + raise def get_tags(self, file_path: Path) -> List[str]: """Get all tags for a file.""" @@ -572,12 +642,15 @@ class LocalLibraryDB: cursor.execute("DELETE FROM tags WHERE file_id = ?", (file_id,)) logger.debug(f"[save_tags] Deleted existing tags for file_id {file_id}") - if existing_title: + # Check if new tags provide a title + new_title_provided = any(str(t).strip().lower().startswith("title:") for t in tags) + + if existing_title and not new_title_provided: cursor.execute(""" INSERT INTO tags (file_id, tag, tag_type) VALUES (?, ?, 'user') """, (file_id, existing_title[0])) logger.debug(f"[save_tags] Preserved existing title tag") - else: + elif not existing_title and not new_title_provided: filename_without_ext = file_path.stem if filename_without_ext: # Normalize underscores to spaces for consistency diff --git a/helper/search_provider.py b/helper/search_provider.py index 0a4a2ec..c3d40bc 100644 --- a/helper/search_provider.py +++ b/helper/search_provider.py @@ -28,9 +28,16 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple from dataclasses import dataclass from pathlib import Path import sys + +try: + from playwright.sync_api import sync_playwright + PLAYWRIGHT_AVAILABLE = True +except ImportError: + PLAYWRIGHT_AVAILABLE = False import subprocess import json import shutil +from helper.logger import log, debug from helper.logger import log, debug @@ -1580,8 +1587,293 @@ class YoutubeSearchProvider(SearchProvider): return shutil.which("yt-dlp") is not None +class BandcampProvider(SearchProvider): + """ + Search provider for Bandcamp using Playwright scraper. + """ + RESULT_FIELDS = [ + ("name", "Name", None), + ("artist", "Artist/Loc", None), + ("type", "Type", None) + ] + + def search( + self, + query: str, + limit: int = 50, + filters: Optional[Dict[str, Any]] = None, + **kwargs + ) -> List[SearchResult]: + if not PLAYWRIGHT_AVAILABLE: + print("Playwright library not available. Please install it (pip install playwright).") + return [] + + results = [] + try: + with sync_playwright() as p: + # Launch browser (headless) + browser = p.chromium.launch(headless=True) + page = browser.new_page() + + # Check if query is a URL (Artist/Album Scraping Mode) + if query.startswith("http://") or query.startswith("https://"): + return self._scrape_url(page, query, limit) + + # Search Mode + # Parse query for prefixes + search_type = "t" # Default to track + clean_query = query + + if "artist:" in query.lower(): + search_type = "b" + clean_query = query.lower().replace("artist:", "").strip() + elif "album:" in query.lower(): + search_type = "a" + clean_query = query.lower().replace("album:", "").strip() + elif "track:" in query.lower(): + search_type = "t" + clean_query = query.lower().replace("track:", "").strip() + elif "label:" in query.lower(): + search_type = "b" + clean_query = query.lower().replace("label:", "").strip() + + # Filters override prefix + if filters: + ftype = filters.get("type", "").lower() + if ftype in ["album", "albums"]: + search_type = "a" + elif ftype in ["artist", "artists", "label", "labels"]: + search_type = "b" + elif ftype in ["track", "tracks"]: + search_type = "t" + + # Construct URL with item_type + url = f"https://bandcamp.com/search?q={clean_query}&item_type={search_type}" + debug(f"[Bandcamp] Navigating to search URL: {url}") + page.goto(url) + page.wait_for_load_state("domcontentloaded") + + # Wait for results + try: + # Wait for the search results to appear in the DOM + page.wait_for_selector(".searchresult", timeout=10000) + except Exception as e: + # No results found or timeout + log(f"Bandcamp search timeout or no results: {e}") + browser.close() + return [] + + # Extract items + items = page.query_selector_all(".searchresult") + debug(f"[Bandcamp] Found {len(items)} results") + + for item in items: + if len(results) >= limit: + break + + try: + # Extract data + heading_el = item.query_selector(".heading a") + if not heading_el: + debug("[Bandcamp] Skipping item: No heading found") + continue + + name = heading_el.inner_text().strip() + item_url = heading_el.get_attribute("href") + # Clean URL (remove query params) + if item_url and "?" in item_url: + item_url = item_url.split("?")[0] + + item_type_el = item.query_selector(".itemtype") + item_type = item_type_el.inner_text().strip() if item_type_el else "Unknown" + + subhead_el = item.query_selector(".subhead") + subhead = subhead_el.inner_text().strip() if subhead_el else "" + + art_el = item.query_selector(".art img") + img = art_el.get_attribute("src") if art_el else None + + # Map to metadata + metadata = { + "name": name, + "type": item_type, + "url": item_url, + "img": img, + "subhead": subhead + } + + # Refine metadata based on type + artist_or_loc = subhead + if "ALBUM" in item_type.upper(): + artist_or_loc = subhead.replace("by ", "").strip() + metadata["artist"] = artist_or_loc + elif "ARTIST" in item_type.upper() or "LABEL" in item_type.upper(): + metadata["location"] = subhead + elif "TRACK" in item_type.upper(): + artist_or_loc = subhead.replace("by ", "").strip() + metadata["artist"] = artist_or_loc + + columns = [ + ("Name", name), + ("Artist/Loc", artist_or_loc), + ("Type", item_type) + ] + + results.append(SearchResult( + origin="bandcamp", + title=name, + target=item_url, + full_metadata=metadata, + columns=columns + )) + except Exception as e: + # Skip malformed items + debug(f"[Bandcamp] Error parsing item: {e}") + continue + + browser.close() + + except Exception as e: + log(f"Bandcamp search error: {e}") + return [] + + return results + + def _scrape_url(self, page, url: str, limit: int) -> List[SearchResult]: + """Scrape a Bandcamp artist or album page.""" + debug(f"[Bandcamp] Scraping URL: {url}") + + # If it's an artist page, try to go to /music to see all + if ".bandcamp.com" in url and "/music" not in url and "/album/" not in url and "/track/" not in url: + # Check if it's likely an artist root + url = url.rstrip("/") + "/music" + debug(f"[Bandcamp] Adjusted to music page: {url}") + + page.goto(url) + page.wait_for_load_state("domcontentloaded") + + results = [] + + # Check for grid items (Artist page /music) + grid_items = page.query_selector_all(".music-grid-item") + if grid_items: + debug(f"[Bandcamp] Found {len(grid_items)} grid items") + + # Try to get global artist name from page metadata/header as fallback + page_artist = "" + try: + og_site_name = page.query_selector('meta[property="og:site_name"]') + if og_site_name: + page_artist = og_site_name.get_attribute("content") or "" + + if not page_artist: + band_name = page.query_selector('#band-name-location .title') + if band_name: + page_artist = band_name.inner_text().strip() + except Exception: + pass + + for item in grid_items: + if len(results) >= limit: + break + try: + title_el = item.query_selector(".title") + # Sanitize title to remove newlines which break the table + title = title_el.inner_text().strip().replace("\n", " ").replace("\r", "") if title_el else "Unknown" + # Remove extra spaces + title = " ".join(title.split()) + + link_el = item.query_selector("a") + href = link_el.get_attribute("href") if link_el else "" + if href and not href.startswith("http"): + # Relative link, construct full URL + base = url.split("/music")[0] + href = base + href + + artist_el = item.query_selector(".artist") + artist = artist_el.inner_text().replace("by ", "").strip() if artist_el else "" + + # Use page artist if item artist is missing + if not artist and page_artist: + artist = page_artist + + # Sanitize artist + artist = artist.replace("\n", " ").replace("\r", "") + artist = " ".join(artist.split()) + + columns = [ + ("Name", title), + ("Artist", artist), + ("Type", "Album/Track") + ] + + results.append(SearchResult( + origin="bandcamp", + title=title, + target=href, + full_metadata={"artist": artist}, + columns=columns + )) + except Exception as e: + debug(f"[Bandcamp] Error parsing grid item: {e}") + continue + return results + + # Check for track list (Album page) + track_rows = page.query_selector_all(".track_row_view") + if track_rows: + debug(f"[Bandcamp] Found {len(track_rows)} track rows") + # Get Album Artist + artist_el = page.query_selector("#name-section h3 span a") + album_artist = artist_el.inner_text().strip() if artist_el else "Unknown" + + for row in track_rows: + if len(results) >= limit: + break + try: + title_el = row.query_selector(".track-title") + # Sanitize title + title = title_el.inner_text().strip().replace("\n", " ").replace("\r", "") if title_el else "Unknown" + title = " ".join(title.split()) + + # Track link + link_el = row.query_selector(".title a") + href = link_el.get_attribute("href") if link_el else "" + if href and not href.startswith("http"): + base = url.split(".com")[0] + ".com" + href = base + href + + duration_el = row.query_selector(".time") + duration = duration_el.inner_text().strip() if duration_el else "" + + columns = [ + ("Name", title), + ("Artist", album_artist), + ("Duration", duration) + ] + + results.append(SearchResult( + origin="bandcamp", + title=title, + target=href, + full_metadata={"artist": album_artist, "duration": duration}, + columns=columns + )) + except Exception as e: + debug(f"[Bandcamp] Error parsing track row: {e}") + continue + return results + + debug("[Bandcamp] No recognizable items found on page") + return [] + + def get_result_args(self) -> List[str]: + return ["-url"] + + # Provider registry _PROVIDERS = { + "bandcamp": BandcampProvider, "local": LocalStorageProvider, "libgen": LibGenProvider, "soulseek": SoulSeekProvider, diff --git a/result_table.py b/result_table.py index f2a3cf6..a13217a 100644 --- a/result_table.py +++ b/result_table.py @@ -9,7 +9,6 @@ Features: - Interactive selection with user input - Input options for cmdlet arguments (location, source selection, etc) """ - from dataclasses import dataclass, field from typing import Any, Dict, List, Optional, Union, Callable, Tuple from pathlib import Path