From 5e4df11dbf628b9663d02ca7a2ca0a61a8a89b8e Mon Sep 17 00:00:00 2001 From: nose Date: Wed, 3 Dec 2025 15:18:57 -0800 Subject: [PATCH] upk --- cmdlets/add_file.py | 5 +- cmdlets/add_relationship.py | 291 +++++++++++--- cmdlets/delete_relationship.py | 168 ++++++++ cmdlets/delete_tag.py | 4 +- cmdlets/download_data.py | 42 +- cmdlets/get_relationship.py | 186 ++++++++- cmdlets/pipe.py | 43 ++- cmdlets/worker.py | 7 +- helper/download.py | 157 ++++++-- helper/file_storage.py | 656 ++++++++++++++++++++------------ helper/local_library.py | 217 +++++++++++ helper/remote_storage_server.py | 523 +++++++++++++++++++++++++ 12 files changed, 1953 insertions(+), 346 deletions(-) create mode 100644 cmdlets/delete_relationship.py create mode 100644 helper/remote_storage_server.py diff --git a/cmdlets/add_file.py b/cmdlets/add_file.py index 067660a..3f56ef1 100644 --- a/cmdlets/add_file.py +++ b/cmdlets/add_file.py @@ -68,8 +68,7 @@ def _load_sidecar_bundle(media_path: Path, origin: Optional[str] = None, config: if db_root: try: - db = LocalLibraryDB(Path(db_root)) - try: + with LocalLibraryDB(Path(db_root)) as db: # Get tags and metadata from database tags = db.get_tags(media_path) or [] metadata = db.get_metadata(media_path) or {} @@ -79,8 +78,6 @@ def _load_sidecar_bundle(media_path: Path, origin: Optional[str] = None, config: if tags or known_urls or file_hash: debug(f"Found metadata in local database: {len(tags)} tag(s), {len(known_urls)} URL(s)") return None, file_hash, tags, known_urls - finally: - db.close() except Exception as exc: log(f"⚠️ Could not query local database: {exc}", file=sys.stderr) except Exception: diff --git a/cmdlets/add_relationship.py b/cmdlets/add_relationship.py index ff9af02..2111fb4 100644 --- a/cmdlets/add_relationship.py +++ b/cmdlets/add_relationship.py @@ -14,22 +14,25 @@ from . import register import models import pipeline as ctx from helper import hydrus as hydrus_wrapper -from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args +from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input from helper.local_library import read_sidecar, find_sidecar CMDLET = Cmdlet( name="add-relationship", summary="Associate file relationships (king/alt/related) in Hydrus based on relationship tags in sidecar.", - usage="add-relationship OR add-relationship -path ", + usage="@1-3 | add-relationship -king @4 OR add-relationship -path OR @1,@2,@3 | add-relationship", args=[ CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."), + CmdletArg("-king", type="string", description="Explicitly set the king hash/file for relationships (e.g., -king @4 or -king hash)"), + CmdletArg("-type", type="string", description="Relationship type for piped items (default: 'alt', options: 'king', 'alt', 'related')"), ], details=[ - "- Reads relationship tags from sidecar (format: 'relationship: hash(king),hash(alt),hash(related)')", - "- Calls Hydrus API to associate the hashes as relationships", + "- Mode 1: Pipe multiple items, first becomes king, rest become alts (default)", + "- Mode 2: Use -king to explicitly set which item/hash is the king: @1-3 | add-relationship -king @4", + "- Mode 3: Read relationships from sidecar (format: 'relationship: hash(king),hash(alt)...')", "- Supports three relationship types: king (primary), alt (alternative), related (other versions)", - "- Works with piped file results or -path argument for direct invocation", + "- When using -king, all piped items become the specified relationship type to the king", ], ) @@ -67,6 +70,81 @@ def _extract_relationships_from_tag(tag_value: str) -> Dict[str, list[str]]: return result +def _resolve_king_reference(king_arg: str) -> Optional[str]: + """Resolve a king reference like '@4' to its actual hash or path. + + Supports: + - Direct hash: '0123456789abcdef...' (64 chars) + - Selection reference: '@4' (resolves from pipeline context) + + Returns: + - For Hydrus items: normalized hash + - For local storage items: file path + - None if not found + """ + if not king_arg: + return None + + # Check if it's already a valid hash + normalized = _normalise_hash_hex(king_arg) + if normalized: + return normalized + + # Try to resolve as @N selection from pipeline context + if king_arg.startswith('@'): + try: + # Get the result items from the pipeline context + from pipeline import get_last_result_items + items = get_last_result_items() + if not items: + log(f"Cannot resolve {king_arg}: no search results in context", file=sys.stderr) + return None + + # Parse @N to get the index (1-based) + index_str = king_arg[1:] # Remove '@' + index = int(index_str) - 1 # Convert to 0-based + + if 0 <= index < len(items): + item = items[index] + + # Try to extract hash from the item (could be dict or object) + item_hash = None + if isinstance(item, dict): + # Dictionary: try common hash field names + item_hash = item.get('hash_hex') or item.get('hash') or item.get('file_hash') + else: + # Object: use getattr + item_hash = getattr(item, 'hash_hex', None) or getattr(item, 'hash', None) + + if item_hash: + normalized = _normalise_hash_hex(item_hash) + if normalized: + return normalized + + # If no hash, try to get file path (for local storage) + file_path = None + if isinstance(item, dict): + # Dictionary: try common path field names + file_path = item.get('file_path') or item.get('path') or item.get('target') + else: + # Object: use getattr + file_path = getattr(item, 'file_path', None) or getattr(item, 'path', None) or getattr(item, 'target', None) + + if file_path: + return str(file_path) + + log(f"Item {king_arg} has no hash or path information", file=sys.stderr) + return None + else: + log(f"Index {king_arg} out of range", file=sys.stderr) + return None + except (ValueError, IndexError) as e: + log(f"Cannot resolve {king_arg}: {e}", file=sys.stderr) + return None + + return None + + @register(["add-relationship", "add-rel"]) # primary name and alias def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: """Associate file relationships in Hydrus. @@ -88,6 +166,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: # Parse arguments using CMDLET spec parsed = parse_cmdlet_args(_args, CMDLET) arg_path: Optional[Path] = None + king_arg = parsed.get("king") # New: explicit king argument + rel_type = parsed.get("type", "alt") # New: relationship type (default: alt) + if parsed: # Get the first arg value (e.g., -path) first_arg_name = CMDLET.get("args", [{}])[0].get("name") if CMDLET.get("args") else None @@ -98,62 +179,160 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: except Exception: arg_path = Path(str(arg_value)) - # Get Hydrus client - try: - client = hydrus_wrapper.get_client(config) - except Exception as exc: - log(f"Hydrus client unavailable: {exc}", file=sys.stderr) + # Handle @N selection which creates a list + # Use normalize_result_input to handle both single items and lists + items_to_process = normalize_result_input(result) + + if not items_to_process and not arg_path: + log("No items provided to add-relationship (no piped result and no -path)", file=sys.stderr) return 1 - if client is None: - log("Hydrus client unavailable", file=sys.stderr) - return 1 + # If no items from pipeline, just process the -path arg + if not items_to_process and arg_path: + items_to_process = [{"file_path": arg_path}] - # Handle @N selection which creates a list - extract the first item - if isinstance(result, list) and len(result) > 0: - result = result[0] - - # Check if we're in pipeline mode (have a hash) or file mode - file_hash = getattr(result, "hash_hex", None) + # Import local storage utilities + from helper.local_library import LocalLibrarySearchOptimizer + from config import get_local_storage_path - # PIPELINE MODE: Track relationships across multiple items - if file_hash: - file_hash = _normalise_hash_hex(file_hash) - if not file_hash: - log("Invalid file hash format", file=sys.stderr) - return 1 - - # Load or initialize king hash from pipeline context + local_storage_path = get_local_storage_path(config) if config else None + + # Check if any items have Hydrus hashes (file_hash or hash_hex fields) + has_hydrus_hashes = any( + (isinstance(item, dict) and (item.get('hash_hex') or item.get('hash'))) + or (hasattr(item, 'hash_hex') or hasattr(item, 'hash')) + for item in items_to_process + ) + + # Only try to initialize Hydrus if we actually have Hydrus hashes to work with + hydrus_client = None + if has_hydrus_hashes: try: - king_hash = ctx.load_value("relationship_king") - except Exception: - king_hash = None - - # If this is the first item, make it the king - if not king_hash: - try: - ctx.store_value("relationship_king", file_hash) - log(f"Established king hash: {file_hash}", file=sys.stderr) - return 0 # First item just becomes the king, no relationships yet - except Exception: - pass - - # If we already have a king and this is a different hash, link them - if king_hash and king_hash != file_hash: - try: - client.set_relationship(file_hash, king_hash, "alt") - log( - f"[add-relationship] Set alt relationship: {file_hash} <-> {king_hash}", - file=sys.stderr - ) - return 0 - except Exception as exc: - log(f"Failed to set relationship: {exc}", file=sys.stderr) - return 1 - - return 0 + hydrus_client = hydrus_wrapper.get_client(config) + except Exception as exc: + log(f"Hydrus unavailable, will use local storage: {exc}", file=sys.stderr) - # FILE MODE: Read relationships from sidecar + # Use local storage if it's available and either Hydrus is not available or items are local files + use_local_storage = local_storage_path and (not has_hydrus_hashes or (arg_path and arg_path.exists())) + + # Resolve the king reference once (if provided) + king_hash = None + if king_arg: + # Resolve the king reference (could be @4 or a direct hash) + king_hash = _resolve_king_reference(king_arg) + if not king_hash: + log(f"Failed to resolve king argument: {king_arg}", file=sys.stderr) + return 1 + log(f"Using king hash: {king_hash}", file=sys.stderr) + + # Process each item in the list + for item_idx, item in enumerate(items_to_process): + # Extract hash and path from current item + file_hash = None + file_path_from_result = None + + if isinstance(item, dict): + file_hash = item.get("hash_hex") or item.get("hash") + file_path_from_result = item.get("file_path") or item.get("path") or item.get("target") + else: + file_hash = getattr(item, "hash_hex", None) or getattr(item, "hash", None) + file_path_from_result = getattr(item, "file_path", None) or getattr(item, "path", None) + + # PIPELINE MODE with Hydrus: Track relationships using hash + if file_hash and hydrus_client: + file_hash = _normalise_hash_hex(file_hash) + if not file_hash: + log("Invalid file hash format", file=sys.stderr) + return 1 + + # If explicit -king provided, use it + if king_hash: + try: + hydrus_client.set_relationship(file_hash, king_hash, rel_type) + log( + f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {king_hash}", + file=sys.stderr + ) + except Exception as exc: + log(f"Failed to set relationship: {exc}", file=sys.stderr) + return 1 + else: + # Original behavior: no explicit king, first becomes king, rest become alts + try: + existing_king = ctx.load_value("relationship_king") + except Exception: + existing_king = None + + # If this is the first item, make it the king + if not existing_king: + try: + ctx.store_value("relationship_king", file_hash) + log(f"Established king hash: {file_hash}", file=sys.stderr) + continue # Move to next item + except Exception: + pass + + # If we already have a king and this is a different hash, link them + if existing_king and existing_king != file_hash: + try: + hydrus_client.set_relationship(file_hash, existing_king, rel_type) + log( + f"[add-relationship] Set {rel_type} relationship: {file_hash} <-> {existing_king}", + file=sys.stderr + ) + except Exception as exc: + log(f"Failed to set relationship: {exc}", file=sys.stderr) + return 1 + + # LOCAL STORAGE MODE: Handle relationships for local files + elif use_local_storage and file_path_from_result: + try: + file_path_obj = Path(str(file_path_from_result)) + + if not file_path_obj.exists(): + log(f"File not found: {file_path_obj}", file=sys.stderr) + return 1 + + if king_hash: + # king_hash is a file path from _resolve_king_reference (or a Hydrus hash) + king_file_path = Path(str(king_hash)) if king_hash else None + if king_file_path and king_file_path.exists(): + with LocalLibrarySearchOptimizer(local_storage_path) as db: + db.set_relationship(file_path_obj, king_file_path, rel_type) + log(f"Set {rel_type} relationship: {file_path_obj.name} -> {king_file_path.name}", file=sys.stderr) + else: + log(f"King file not found or invalid: {king_hash}", file=sys.stderr) + return 1 + else: + # Original behavior: first becomes king, rest become alts + try: + king_path = ctx.load_value("relationship_king_path") + except Exception: + king_path = None + + if not king_path: + try: + ctx.store_value("relationship_king_path", str(file_path_obj)) + log(f"Established king file: {file_path_obj.name}", file=sys.stderr) + continue # Move to next item + except Exception: + pass + + if king_path and king_path != str(file_path_obj): + try: + with LocalLibrarySearchOptimizer(local_storage_path) as db: + db.set_relationship(file_path_obj, Path(king_path), rel_type) + log(f"Set {rel_type} relationship: {file_path_obj.name} -> {Path(king_path).name}", file=sys.stderr) + except Exception as exc: + log(f"Failed to set relationship: {exc}", file=sys.stderr) + return 1 + except Exception as exc: + log(f"Local storage error: {exc}", file=sys.stderr) + return 1 + + return 0 + + # FILE MODE: Read relationships from sidecar (legacy mode - for -path arg only) log("Note: Use piping mode for easier relationships. Example: 1,2,3 | add-relationship", file=sys.stderr) # Resolve media path from -path arg or result target @@ -235,7 +414,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: continue try: - client.set_relationship(file_hash, related_hash, rel_type) + hydrus_client.set_relationship(file_hash, related_hash, rel_type) log( f"[add-relationship] Set {rel_type} relationship: " f"{file_hash} <-> {related_hash}", diff --git a/cmdlets/delete_relationship.py b/cmdlets/delete_relationship.py new file mode 100644 index 0000000..042b410 --- /dev/null +++ b/cmdlets/delete_relationship.py @@ -0,0 +1,168 @@ +"""Delete file relationships.""" + +from __future__ import annotations + +from typing import Any, Dict, Optional, Sequence +import json +from pathlib import Path +import sys + +from helper.logger import log + +import pipeline as ctx +from ._shared import Cmdlet, CmdletArg, parse_cmdlet_args, normalize_result_input +from helper.local_library import LocalLibrarySearchOptimizer +from config import get_local_storage_path + + +def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: + """Delete relationships from files. + + Args: + result: Input result(s) from previous cmdlet + args: Command arguments + config: CLI configuration + + Returns: + Exit code (0 = success) + """ + try: + # Parse arguments + parsed_args = parse_cmdlet_args(args, CMDLET) + delete_all_flag = parsed_args.get("all", False) + rel_type_filter = parsed_args.get("type") + + # Get storage path + local_storage_path = get_local_storage_path(config) + if not local_storage_path: + log("Local storage path not configured", file=sys.stderr) + return 1 + + # Normalize input + results = normalize_result_input(result) + + if not results: + log("No results to process", file=sys.stderr) + return 1 + + deleted_count = 0 + + for single_result in results: + try: + # Get file path from result + file_path_from_result = None + + if isinstance(single_result, dict): + file_path_from_result = ( + single_result.get("file_path") or + single_result.get("path") or + single_result.get("target") + ) + else: + file_path_from_result = ( + getattr(single_result, "file_path", None) or + getattr(single_result, "path", None) or + getattr(single_result, "target", None) or + str(single_result) + ) + + if not file_path_from_result: + log("Could not extract file path from result", file=sys.stderr) + return 1 + + file_path_obj = Path(str(file_path_from_result)) + + if not file_path_obj.exists(): + log(f"File not found: {file_path_obj}", file=sys.stderr) + return 1 + + with LocalLibrarySearchOptimizer(local_storage_path) as db: + file_id = db.db.get_file_id(file_path_obj) + + if not file_id: + log(f"File not in database: {file_path_obj.name}", file=sys.stderr) + continue + + # Get current relationships + cursor = db.db.connection.cursor() + cursor.execute(""" + SELECT relationships FROM metadata WHERE file_id = ? + """, (file_id,)) + + row = cursor.fetchone() + if not row: + log(f"No relationships found for: {file_path_obj.name}", file=sys.stderr) + continue + + relationships_str = row[0] + if not relationships_str: + log(f"No relationships found for: {file_path_obj.name}", file=sys.stderr) + continue + + try: + relationships = json.loads(relationships_str) + except json.JSONDecodeError: + log(f"Invalid relationship data for: {file_path_obj.name}", file=sys.stderr) + continue + + if not isinstance(relationships, dict): + relationships = {} + + # Determine what to delete + if delete_all_flag: + # Delete all relationships + deleted_types = list(relationships.keys()) + relationships = {} + log(f"Deleted all relationships ({len(deleted_types)} types) from: {file_path_obj.name}", file=sys.stderr) + elif rel_type_filter: + # Delete specific type + if rel_type_filter in relationships: + deleted_count_for_type = len(relationships[rel_type_filter]) + del relationships[rel_type_filter] + log(f"Deleted {deleted_count_for_type} {rel_type_filter} relationship(s) from: {file_path_obj.name}", file=sys.stderr) + else: + log(f"No {rel_type_filter} relationships found for: {file_path_obj.name}", file=sys.stderr) + continue + else: + log("Specify --all to delete all relationships or -type to delete specific type", file=sys.stderr) + return 1 + + # Save updated relationships + cursor.execute(""" + INSERT INTO metadata (file_id, relationships) + VALUES (?, ?) + ON CONFLICT(file_id) DO UPDATE SET + relationships = excluded.relationships, + time_modified = CURRENT_TIMESTAMP + """, (file_id, json.dumps(relationships) if relationships else None)) + + db.db.connection.commit() + deleted_count += 1 + + except Exception as exc: + log(f"Error deleting relationship: {exc}", file=sys.stderr) + return 1 + + log(f"Successfully deleted relationships from {deleted_count} file(s)", file=sys.stderr) + return 0 + + except Exception as exc: + log(f"Error in delete-relationship: {exc}", file=sys.stderr) + return 1 + + +CMDLET = Cmdlet( + name="delete-relationship", + summary="Remove relationships from files.", + usage="@1 | delete-relationship --all OR delete-relationship -path --all OR @1-3 | delete-relationship -type alt", + args=[ + CmdletArg("path", type="string", description="Specify the local file path (if not piping a result)."), + CmdletArg("all", type="flag", description="Delete all relationships for the file(s)."), + CmdletArg("type", type="string", description="Delete specific relationship type ('alt', 'king', 'related'). Default: delete all types."), + ], + details=[ + "- Delete all relationships: pipe files | delete-relationship --all", + "- Delete specific type: pipe files | delete-relationship -type alt", + "- Delete all from file: delete-relationship -path --all", + ], +) diff --git a/cmdlets/delete_tag.py b/cmdlets/delete_tag.py index 7a19fcd..bc91927 100644 --- a/cmdlets/delete_tag.py +++ b/cmdlets/delete_tag.py @@ -243,8 +243,8 @@ def _process_deletion(tags: list[str], hash_hex: str | None, file_path: str | No # Fallback: assume file is in a library root or use its parent local_root = path_obj.parent - db = LocalLibraryDB(local_root) - db.remove_tags(path_obj, tags) + with LocalLibraryDB(local_root) as db: + db.remove_tags(path_obj, tags) debug(f"Removed {len(tags)} tag(s) from {path_obj.name} (local)") return True diff --git a/cmdlets/download_data.py b/cmdlets/download_data.py index b0726e6..63e7668 100644 --- a/cmdlets/download_data.py +++ b/cmdlets/download_data.py @@ -1224,12 +1224,31 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results: from helper.local_library import LocalLibraryDB from config import get_local_storage_path + # Define LazyDB proxy to avoid keeping DB connection open for long duration + class LazyDB: + def __init__(self, root): + self.root = root + + def _op(self, func_name, *args, **kwargs): + try: + with LocalLibraryDB(self.root) as db: + func = getattr(db, func_name) + return func(*args, **kwargs) + except Exception as e: + # Log error but don't crash + pass + + def insert_worker(self, *args, **kwargs): self._op('insert_worker', *args, **kwargs) + def update_worker_status(self, *args, **kwargs): self._op('update_worker_status', *args, **kwargs) + def append_worker_stdout(self, *args, **kwargs): self._op('append_worker_stdout', *args, **kwargs) + def close(self): pass + worker_id = str(uuid.uuid4()) library_root = get_local_storage_path(config or {}) db = None if library_root: try: - db = LocalLibraryDB(library_root) + db = LazyDB(library_root) db.insert_worker( worker_id, "download", @@ -1812,12 +1831,18 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results: current_format_selector = format_id # If it's a video-only format (has vcodec but no acodec), add bestaudio + # BUT: Skip this for -section downloads because combining formats causes re-encoding + # For -section, use formats that already have audio (muxed) to avoid FFmpeg re-encoding vcodec = url.get('vcodec', '') acodec = url.get('acodec', '') if vcodec and vcodec != "none" and (not acodec or acodec == "none"): - # Video-only format, add bestaudio automatically - current_format_selector = f"{format_id}+bestaudio" - debug(f" ℹ️ Video-only format detected, automatically adding bestaudio") + if not clip_range and not section_ranges: + # Only add bestaudio if NOT doing -section or -clip + # For section downloads, we need muxed formats to avoid re-encoding + current_format_selector = f"{format_id}+bestaudio" + debug(f" ℹ️ Video-only format detected, automatically adding bestaudio") + else: + debug(f" ℹ️ Section/clip download: using video-only format as-is (no bestaudio to avoid re-encoding)") actual_url = url.get('source_url') url = actual_url # Use the actual URL for further processing @@ -2488,11 +2513,16 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any], emit_results: current_format_selector = fmt.get("format_id") # If video-only format is selected, append +bestaudio to merge with best audio + # BUT: Skip this for -section downloads because combining formats causes re-encoding vcodec = fmt.get("vcodec") acodec = fmt.get("acodec") if vcodec and vcodec != "none" and (not acodec or acodec == "none"): - current_format_selector = f"{current_format_selector}+bestaudio" - debug(f"Video-only format selected, appending bestaudio: {current_format_selector}") + if not clip_range and not section_ranges: + # Only add bestaudio if NOT doing -section or -clip + current_format_selector = f"{current_format_selector}+bestaudio" + debug(f"Video-only format selected, appending bestaudio: {current_format_selector}") + else: + debug(f"Section/clip download: using video-only format as-is (no bestaudio to avoid re-encoding)") debug(f"Selected format #{idx}: {current_format_selector}") playlist_items = None # Clear so it doesn't affect download options diff --git a/cmdlets/get_relationship.py b/cmdlets/get_relationship.py index a47e2cf..02e25dd 100644 --- a/cmdlets/get_relationship.py +++ b/cmdlets/get_relationship.py @@ -74,24 +74,33 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: path_obj = Path(file_path) if not source_title or source_title == "Unknown": source_title = path_obj.name + + print(f"\n[DEBUG] Starting get-relationship for: {path_obj.name}", file=sys.stderr) + print(f"[DEBUG] Path exists: {path_obj.exists()}", file=sys.stderr) if path_obj.exists(): storage_path = get_local_storage_path(config) + print(f"[DEBUG] Storage path: {storage_path}", file=sys.stderr) if storage_path: with LocalLibraryDB(storage_path) as db: metadata = db.get_metadata(path_obj) + print(f"[DEBUG] Metadata found: {metadata is not None}", file=sys.stderr) if metadata and metadata.get("relationships"): local_db_checked = True rels = metadata["relationships"] + print(f"[DEBUG] Relationships dict: {rels}", file=sys.stderr) if isinstance(rels, dict): for rel_type, hashes in rels.items(): + print(f"[DEBUG] Processing rel_type: {rel_type}, hashes: {hashes}", file=sys.stderr) if hashes: for h in hashes: - # Try to resolve hash to filename if possible + # h is now a file hash (not a path) + print(f"[DEBUG] Processing relationship hash: h={h}", file=sys.stderr) + # Resolve hash to file path resolved_path = db.search_by_hash(h) - title = h + title = h[:16] + "..." path = None - if resolved_path: + if resolved_path and resolved_path.exists(): path = str(resolved_path) # Try to get title from tags try: @@ -114,6 +123,177 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int: "path": path, "origin": "local" }) + + # RECURSIVE LOOKUP: If this is an "alt" relationship (meaning we're an alt pointing to a king), + # then we should look up the king's other alts to show siblings. + # NOTE: We only do this for "alt" relationships, not "king", to avoid duplicating + # the king's direct relationships with its alts. + print(f"[DEBUG] Checking if recursive lookup needed: rel_type={rel_type}, path={path}", file=sys.stderr) + if rel_type.lower() == "alt" and path: + print(f"[DEBUG] 🔍 RECURSIVE LOOKUP TRIGGERED for parent: {path}", file=sys.stderr) + try: + parent_path_obj = Path(path) + print(f"[DEBUG] Parent path obj: {parent_path_obj}", file=sys.stderr) + + # Also add the king/parent itself if not already in results + if not any(str(r['hash']).lower() == str(path).lower() for r in found_relationships): + parent_title = parent_path_obj.stem + try: + parent_tags = db.get_tags(parent_path_obj) + for t in parent_tags: + if t.lower().startswith('title:'): + parent_title = t[6:].strip() + break + except Exception: + pass + + print(f"[DEBUG] ➕ Adding king/parent to results: {parent_title}", file=sys.stderr) + found_relationships.append({ + "hash": str(path), + "type": "king" if rel_type.lower() == "alt" else rel_type, + "title": parent_title, + "path": str(path), + "origin": "local" + }) + else: + # If already in results, ensure it's marked as king if appropriate + for r in found_relationships: + if str(r['hash']).lower() == str(path).lower(): + if rel_type.lower() == "alt": + r['type'] = "king" + break + + # 1. Check forward relationships from parent (siblings) + parent_metadata = db.get_metadata(parent_path_obj) + print(f"[DEBUG] 📖 Parent metadata: {parent_metadata is not None}", file=sys.stderr) + if parent_metadata: + print(f"[DEBUG] Parent metadata keys: {parent_metadata.keys()}", file=sys.stderr) + if parent_metadata and parent_metadata.get("relationships"): + parent_rels = parent_metadata["relationships"] + print(f"[DEBUG] 👑 Parent has relationships: {list(parent_rels.keys())}", file=sys.stderr) + if isinstance(parent_rels, dict): + for child_type, child_hashes in parent_rels.items(): + print(f"[DEBUG] Type '{child_type}': {len(child_hashes) if child_hashes else 0} children", file=sys.stderr) + if child_hashes: + for child_h in child_hashes: + # child_h is now a HASH, not a path - resolve it + child_path_obj = db.search_by_hash(child_h) + print(f"[DEBUG] Resolved hash {child_h[:16]}... to: {child_path_obj}", file=sys.stderr) + + if not child_path_obj: + # Hash doesn't resolve - skip it + print(f"[DEBUG] ⏭️ Hash doesn't resolve, skipping: {child_h}", file=sys.stderr) + continue + + # Skip the current file we're querying + if str(child_path_obj).lower() == str(path_obj).lower(): + print(f"[DEBUG] ⏭️ Skipping current file: {child_path_obj}", file=sys.stderr) + continue + + # Check if already added (case-insensitive hash check) + if any(str(r['hash']).lower() == str(child_h).lower() for r in found_relationships): + print(f"[DEBUG] ⏭️ Already in results: {child_h}", file=sys.stderr) + continue + + # Now child_path_obj is a Path, so we can get tags + child_title = child_path_obj.stem + try: + child_tags = db.get_tags(child_path_obj) + for t in child_tags: + if t.lower().startswith('title:'): + child_title = t[6:].strip() + break + except Exception: + pass + + print(f"[DEBUG] ➕ Adding sibling: {child_title}", file=sys.stderr) + found_relationships.append({ + "hash": child_h, + "type": f"alt" if child_type == "alt" else f"sibling ({child_type})", + "title": child_title, + "path": str(child_path_obj), + "origin": "local" + }) + else: + print(f"[DEBUG] ⚠️ Parent has no relationships metadata", file=sys.stderr) + + # 2. Check reverse relationships pointing TO parent (siblings via reverse lookup) + # This handles the case where siblings point to parent but parent doesn't point to siblings + reverse_children = db.find_files_pointing_to(parent_path_obj) + print(f"[DEBUG] 🔄 Reverse lookup found {len(reverse_children)} children", file=sys.stderr) + for child in reverse_children: + child_path = child['path'] + child_type = child['type'] + print(f"[DEBUG] Reverse child: {child_path}, type: {child_type}", file=sys.stderr) + + # Skip the current file + if str(child_path).lower() == str(path_obj).lower(): + print(f"[DEBUG] ⏭️ Skipping self", file=sys.stderr) + continue + + # Skip if already added (check by path, case-insensitive) + if any(str(r.get('path', '')).lower() == str(child_path).lower() for r in found_relationships): + print(f"[DEBUG] ⏭️ Already in results: {child_path}", file=sys.stderr) + continue + + child_path_obj = Path(child_path) + child_title = child_path_obj.stem + try: + child_tags = db.get_tags(child_path_obj) + for t in child_tags: + if t.lower().startswith('title:'): + child_title = t[6:].strip() + break + except Exception: + pass + + print(f"[DEBUG] ➕ Adding reverse sibling: {child_title}", file=sys.stderr) + found_relationships.append({ + "hash": child_path, + "type": f"alt" if child_type == "alt" else f"sibling ({child_type})", + "title": child_title, + "path": child_path, + "origin": "local" + }) + + except Exception as e: + print(f"[DEBUG] ❌ Recursive lookup error: {e}", file=sys.stderr) + import traceback + traceback.print_exc(file=sys.stderr) + + except Exception as e: + log(f"Recursive lookup error: {e}", file=sys.stderr) + + + # ALSO CHECK REVERSE RELATIONSHIPS FOR THE CURRENT FILE + # NOTE: This is now handled via recursive lookup above, which finds siblings through the parent. + # We keep this disabled to avoid adding the same relationships twice. + # If needed in future, can be re-enabled with better deduplication. + # for rev in reverse_rels: + # rev_path = rev['path'] + # rev_type = rev['type'] + # + # if any(r['hash'] == rev_path for r in found_relationships): continue + # + # rev_path_obj = Path(rev_path) + # rev_title = rev_path_obj.stem + # try: + # rev_tags = db.get_tags(rev_path_obj) + # for t in rev_tags: + # if t.lower().startswith('title:'): + # rev_title = t[6:].strip(); break + # except Exception: pass + # + # # If someone points to us as 'alt' or 'king', they are our 'child' or 'subject' + # # But we'll just list them with the relationship type they used + # found_relationships.append({ + # "hash": rev_path, + # "type": f"reverse-{rev_type}", # e.g. reverse-alt + # "title": rev_title, + # "path": rev_path, + # "origin": "local" + # }) + except Exception as e: log(f"Error checking local relationships: {e}", file=sys.stderr) diff --git a/cmdlets/pipe.py b/cmdlets/pipe.py index a023654..063a258 100644 --- a/cmdlets/pipe.py +++ b/cmdlets/pipe.py @@ -223,6 +223,42 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: pause_mode = parsed.get("pause") save_mode = parsed.get("save") load_mode = parsed.get("load") + current_mode = parsed.get("current") + + # Handle --current flag: emit currently playing item to pipeline + if current_mode: + items = _get_playlist() + if items is None: + debug("MPV is not running or not accessible.", file=sys.stderr) + return 1 + + # Find the currently playing item + current_item = None + for item in items: + if item.get("current", False): + current_item = item + break + + if current_item is None: + debug("No item is currently playing.", file=sys.stderr) + return 1 + + # Build result object with file info + title = _extract_title_from_item(current_item) + filename = current_item.get("filename", "") + + # Emit the current item to pipeline + result_obj = { + 'file_path': filename, + 'title': title, + 'cmdlet_name': '.pipe', + 'source': 'pipe', + '__pipe_index': items.index(current_item), + } + + ctx.emit(result_obj) + debug(f"Emitted current item: {title}") + return 0 # Handle URL queuing mpv_started = False @@ -599,7 +635,7 @@ CMDLET = Cmdlet( name=".pipe", aliases=["pipe", "playlist", "queue", "ls-pipe"], summary="Manage and play items in the MPV playlist via IPC", - usage=".pipe [index|url] [-clear] [-url URL]", + usage=".pipe [index|url] [-current] [-clear] [-list] [-url URL]", args=[ CmdletArg( name="index", @@ -643,6 +679,11 @@ CMDLET = Cmdlet( type="flag", description="List saved playlists" ), + CmdletArg( + name="current", + type="flag", + description="Emit the currently playing item to pipeline for further processing" + ), ], exec=_run ) diff --git a/cmdlets/worker.py b/cmdlets/worker.py index 83653e6..619d247 100644 --- a/cmdlets/worker.py +++ b/cmdlets/worker.py @@ -88,9 +88,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: try: from helper.local_library import LocalLibraryDB - db: LocalLibraryDB | None = None - try: - db = LocalLibraryDB(library_root) + with LocalLibraryDB(library_root) as db: if clear_requested: count = db.clear_finished_workers() log(f"Cleared {count} finished workers.") @@ -115,9 +113,6 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int: if selection_requested: return _render_worker_selection(db, result) return _render_worker_list(db, status_filter, limit) - finally: - if db: - db.close() except Exception as exc: log(f"Workers query failed: {exc}", file=sys.stderr) import traceback diff --git a/helper/download.py b/helper/download.py index 3153e85..cc4acda 100644 --- a/helper/download.py +++ b/helper/download.py @@ -8,7 +8,12 @@ Lean, focused downloader without event infrastructure overhead. """ from __future__ import annotations -import re # noqa: F401 +import glob # noqa: F401 +import hashlib +import json # noqa: F401 +import random +import re +import string import subprocess import sys import time @@ -157,31 +162,72 @@ def list_formats(url: str, no_playlist: bool = False, playlist_items: Optional[s return None -def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str]) -> None: +def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sections: List[str]) -> tuple[Optional[str], Dict[str, Any]]: """Download each section separately so merge-file can combine them. yt-dlp with multiple --download-sections args merges them into one file. We need separate files for merge-file, so download each section individually. + + Uses hash-based filenames for sections (not title-based) to prevent yt-dlp from + thinking sections are already downloaded. The title is extracted and stored in tags. + + Returns: + (session_id, first_section_info_dict) - session_id for finding files, info dict for metadata extraction """ sections_list = ytdl_options.get("download_sections", []) if not sections_list: - return + return "", {} - # Download each section separately with unique output template + # Generate a unique hash-based ID for this download session + # This ensures different videos/downloads don't have filename collisions + session_id = hashlib.md5( + (url + str(time.time()) + ''.join(random.choices(string.ascii_letters, k=10))).encode() + ).hexdigest()[:12] + + first_section_info = None + title_from_first = None + + # Download each section separately with unique output template using session ID for section_idx, section in enumerate(sections_list, 1): - # Build unique output template for this section - # e.g., "title.section_1_of_3.ext" for the first section + # Build unique output template for this section using session-based filename + # e.g., "{session_id}_{section_idx}.ext" - simple and unique per section base_outtmpl = ytdl_options.get("outtmpl", "%(title)s.%(ext)s") + output_dir_path = Path(base_outtmpl).parent - # Insert section number before extension - # e.g., "/path/title.hash.webm" → "/path/title.hash.section_1_of_3.webm" + # Use session_id + section index for temp filename + # e.g., "/path/{session_id}_1.%(ext)s" + filename_tmpl = f"{session_id}_{section_idx}" if base_outtmpl.endswith(".%(ext)s"): - section_outtmpl = base_outtmpl.replace(".%(ext)s", f".section_{section_idx}_of_{len(sections_list)}.%(ext)s") - else: - section_outtmpl = base_outtmpl + f".section_{section_idx}_of_{len(sections_list)}" + filename_tmpl += ".%(ext)s" + + # Use Path to handle separators correctly for the OS + section_outtmpl = str(output_dir_path / filename_tmpl) - # Build yt-dlp command for this section + # For the first section, extract metadata first (separate call) + if section_idx == 1: + metadata_cmd = ["yt-dlp", "--dump-json", "--skip-download"] + if ytdl_options.get("cookiefile"): + cookies_path = ytdl_options["cookiefile"].replace("\\", "/") + metadata_cmd.extend(["--cookies", cookies_path]) + if ytdl_options.get("noplaylist"): + metadata_cmd.append("--no-playlist") + metadata_cmd.append(url) + + try: + meta_result = subprocess.run(metadata_cmd, capture_output=True, text=True) + if meta_result.returncode == 0 and meta_result.stdout: + try: + info_dict = json.loads(meta_result.stdout.strip()) + first_section_info = info_dict + title_from_first = info_dict.get('title') + debug(f"Extracted title from metadata: {title_from_first}") + except json.JSONDecodeError: + debug("Could not parse JSON metadata") + except Exception as e: + debug(f"Error extracting metadata: {e}") + + # Build yt-dlp command for downloading this section cmd = ["yt-dlp"] # Add format @@ -212,14 +258,18 @@ def _download_with_sections_via_cli(url: str, ytdl_options: Dict[str, Any], sect cmd.append(url) debug(f"Running yt-dlp for section {section_idx}/{len(sections_list)}: {section}") + debug(f"Command: {' '.join(cmd)}") - # Run the subprocess + # Run the subprocess - don't capture output so progress is shown try: - result = subprocess.run(cmd, capture_output=False, text=True) + result = subprocess.run(cmd) + if result.returncode != 0: raise DownloadError(f"yt-dlp subprocess failed for section {section_idx} with code {result.returncode}") except Exception as exc: raise DownloadError(f"yt-dlp subprocess error for section {section_idx}: {exc}") from exc + + return session_id, first_section_info or {} def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]: @@ -296,8 +346,8 @@ def _build_ytdlp_options(opts: DownloadOptions) -> Dict[str, Any]: # Pass each section as a separate element in the list (yt-dlp expects multiple --download-sections args) base_options["download_sections"] = sections debug(f"Download sections configured: {', '.join(sections)}") - # Force keyframes at cuts for accurate section boundaries - base_options["force_keyframes_at_cuts"] = True + # Note: Not using --force-keyframes-at-cuts to avoid re-encoding + # This may result in less precise cuts but faster downloads # Add playlist items selection if provided if opts.playlist_items: @@ -751,8 +801,10 @@ def download_media( debug(f"[yt-dlp] force_keyframes_at_cuts: {ytdl_options.get('force_keyframes_at_cuts', False)}") # Use subprocess when download_sections are present (Python API doesn't support them properly) + session_id = None + first_section_info = {} if ytdl_options.get("download_sections"): - _download_with_sections_via_cli(opts.url, ytdl_options, ytdl_options.get("download_sections", [])) + session_id, first_section_info = _download_with_sections_via_cli(opts.url, ytdl_options, ytdl_options.get("download_sections", [])) info = None else: with yt_dlp.YoutubeDL(ytdl_options) as ydl: # type: ignore[arg-type] @@ -780,7 +832,7 @@ def download_media( import re # Get the expected filename pattern from outtmpl - # For sections: "C:\path\title.section_1_of_3.ext", "C:\path\title.section_2_of_3.ext", etc. + # For sections: "C:\path\{session_id}.section_1_of_3.ext", etc. # For non-sections: "C:\path\title.ext" # Wait a moment to ensure files are fully written @@ -791,10 +843,10 @@ def download_media( if not files: raise FileNotFoundError(f"No files found in {opts.output_dir}") - # If we downloaded sections, look for files with .section_N_of_M pattern - if opts.clip_sections: - # Pattern: "title.section_1_of_3.ext", "title.section_2_of_3.ext", etc. - section_pattern = re.compile(r'\.section_(\d+)_of_(\d+)\.') + # If we downloaded sections, look for files with the session_id pattern + if opts.clip_sections and session_id: + # Pattern: "{session_id}_1.ext", "{session_id}_2.ext", etc. + section_pattern = re.compile(rf'^{re.escape(session_id)}_(\d+)\.') matching_files = [f for f in files if section_pattern.search(f.name)] if matching_files: @@ -804,13 +856,44 @@ def download_media( return int(match.group(1)) if match else 999 matching_files.sort(key=extract_section_num) - media_path = matching_files[0] # First section - media_paths = matching_files # All sections - debug(f"✓ Downloaded {len(media_paths)} section file(s)") + debug(f"Found {len(matching_files)} section file(s) matching pattern") + + # Now rename section files to use hash-based names + # This ensures unique filenames for each section content + renamed_files = [] + + for idx, section_file in enumerate(matching_files, 1): + try: + # Calculate hash for the file + file_hash = sha256_file(section_file) + ext = section_file.suffix + new_name = f"{file_hash}{ext}" + new_path = opts.output_dir / new_name + + if new_path.exists() and new_path != section_file: + # If file with same hash exists, use it and delete the temp one + debug(f"File with hash {file_hash} already exists, using existing file.") + try: + section_file.unlink() + except OSError: + pass + renamed_files.append(new_path) + else: + section_file.rename(new_path) + debug(f"Renamed section file: {section_file.name} → {new_name}") + renamed_files.append(new_path) + except Exception as e: + debug(f"Failed to process section file {section_file.name}: {e}") + renamed_files.append(section_file) + + media_path = renamed_files[0] + media_paths = renamed_files + debug(f"✓ Downloaded {len(media_paths)} section file(s) (session: {session_id})") else: # Fallback to most recent file if pattern not found media_path = files[0] media_paths = None + debug(f"✓ Downloaded section file (pattern not found): {media_path.name}") else: # No sections, just take the most recent file media_path = files[0] @@ -830,10 +913,30 @@ def download_media( # Create result with minimal data extracted from filename file_hash = sha256_file(media_path) + + # For section downloads, create tags with the title and build proper info dict + tags = [] + title = '' + if first_section_info: + title = first_section_info.get('title', '') + if title: + tags.append(f'title:{title}') + debug(f"Added title tag for section download: {title}") + + # Build info dict - always use extracted title if available, not hash + if first_section_info: + info_dict = first_section_info + else: + info_dict = { + "id": media_path.stem, + "title": title or media_path.stem, + "ext": media_path.suffix.lstrip(".") + } + return DownloadMediaResult( path=media_path, - info={"id": media_path.stem, "title": media_path.stem, "ext": media_path.suffix.lstrip(".")}, - tags=[], + info=info_dict, + tags=tags, source_url=opts.url, hash_value=file_hash, paths=media_paths, # Include all section files if present diff --git a/helper/file_storage.py b/helper/file_storage.py index a858162..fad5d24 100644 --- a/helper/file_storage.py +++ b/helper/file_storage.py @@ -137,14 +137,14 @@ class LocalStorageBackend(StorageBackend): # Check for duplicate files using LocalLibraryDB (fast - uses index) try: - db = LocalLibraryDB(dest_dir) - existing_path = db.search_by_hash(file_hash) - if existing_path and existing_path.exists(): - log( - f"✓ File already in local storage: {existing_path}", - file=sys.stderr, - ) - return str(existing_path) + with LocalLibraryDB(dest_dir) as db: + existing_path = db.search_by_hash(file_hash) + if existing_path and existing_path.exists(): + log( + f"✓ File already in local storage: {existing_path}", + file=sys.stderr, + ) + return str(existing_path) except Exception as exc: log(f"⚠️ Could not check for duplicates in DB: {exc}", file=sys.stderr) @@ -205,247 +205,156 @@ class LocalStorageBackend(StorageBackend): # Try database search first (much faster than filesystem scan) try: - db = LocalLibraryDB(search_dir) - cursor = db.connection.cursor() - - # Check if query is a tag namespace search (format: "namespace:pattern") - if ":" in query and not query.startswith(":"): - namespace, pattern = query.split(":", 1) - namespace = namespace.strip().lower() - pattern = pattern.strip().lower() - debug(f"Performing namespace search: {namespace}:{pattern}") + with LocalLibraryDB(search_dir) as db: + cursor = db.connection.cursor() - # Search for tags matching the namespace and pattern - query_pattern = f"{namespace}:%" - - cursor.execute(""" - SELECT DISTINCT f.id, f.file_path, f.file_size - FROM files f - JOIN tags t ON f.id = t.file_id - WHERE LOWER(t.tag) LIKE ? - ORDER BY f.file_path - LIMIT ? - """, (query_pattern, limit or 1000)) - - rows = cursor.fetchall() - debug(f"Found {len(rows)} potential matches in DB") - - # Filter results by pattern match - for file_id, file_path_str, size_bytes in rows: - if not file_path_str: - continue + # Check if query is a tag namespace search (format: "namespace:pattern") + if ":" in query and not query.startswith(":"): + namespace, pattern = query.split(":", 1) + namespace = namespace.strip().lower() + pattern = pattern.strip().lower() + debug(f"Performing namespace search: {namespace}:{pattern}") + + # Search for tags matching the namespace and pattern + query_pattern = f"{namespace}:%" - # Get the file's tags and check if any match the pattern cursor.execute(""" - SELECT DISTINCT tag FROM tags - WHERE file_id = ? - AND LOWER(tag) LIKE ? - """, (file_id, query_pattern)) + SELECT DISTINCT f.id, f.file_path, f.file_size + FROM files f + JOIN tags t ON f.id = t.file_id + WHERE LOWER(t.tag) LIKE ? + ORDER BY f.file_path + LIMIT ? + """, (query_pattern, limit or 1000)) - tags = [row[0] for row in cursor.fetchall()] + rows = cursor.fetchall() + debug(f"Found {len(rows)} potential matches in DB") - # Check if any tag matches the pattern (case-insensitive wildcard) - for tag in tags: - tag_lower = tag.lower() - # Extract the value part after "namespace:" - if tag_lower.startswith(f"{namespace}:"): - value = tag_lower[len(namespace)+1:] - # Use fnmatch for wildcard matching - if fnmatch(value, pattern): - file_path = Path(file_path_str) - if file_path.exists(): - path_str = str(file_path) - if size_bytes is None: - size_bytes = file_path.stat().st_size - - # Fetch all tags for this file - cursor.execute(""" - SELECT tag FROM tags WHERE file_id = ? - """, (file_id,)) - all_tags = [row[0] for row in cursor.fetchall()] - - # Use title tag if present - title_tag = next((t.split(':', 1)[1] for t in all_tags if t.lower().startswith('title:')), None) - - results.append({ - "name": file_path.stem, - "title": title_tag or file_path.stem, - "ext": file_path.suffix.lstrip('.'), - "path": path_str, - "target": path_str, - "origin": "local", - "size": size_bytes, - "size_bytes": size_bytes, - "tags": all_tags, - }) - else: - debug(f"File missing on disk: {file_path}") - break # Don't add same file multiple times - - if limit is not None and len(results) >= limit: - return results - - elif not match_all: - # Search by filename or simple tags (namespace-agnostic for plain text) - # For plain text search, match: - # 1. Filenames containing the query - # 2. Simple tags (without namespace) containing the query - # NOTE: Does NOT match namespaced tags (e.g., "joe" won't match "channel:Joe Mullan") - # Use explicit namespace search for that (e.g., "channel:joe*") - - # Split query into terms for AND logic - terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()] - if not terms: - terms = [query_lower] - - debug(f"Performing filename/tag search for terms: {terms}") - - # Fetch more results than requested to allow for filtering - fetch_limit = (limit or 45) * 50 - - # 1. Filename search (AND logic) - conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms] - params = [f"%{t}%" for t in terms] - where_clause = " AND ".join(conditions) - - cursor.execute(f""" - SELECT DISTINCT f.id, f.file_path, f.file_size - FROM files f - WHERE {where_clause} - ORDER BY f.file_path - LIMIT ? - """, (*params, fetch_limit)) - - rows = cursor.fetchall() - debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)") - - # Compile regex for whole word matching (only if single term, otherwise skip) - word_regex = None - if len(terms) == 1: - term = terms[0] - # Check if term contains wildcard characters - has_wildcard = '*' in term or '?' in term - - if has_wildcard: - # Use fnmatch for wildcard patterns (e.g., "sie*" matches "SiebeliebenWohl...") - try: - from fnmatch import translate - word_regex = re.compile(translate(term), re.IGNORECASE) - except Exception: - word_regex = None - else: - # Use custom boundary that treats underscores as separators - # \b treats _ as a word character, so "foo_bar" wouldn't match "bar" with \b - try: - # Match if not preceded or followed by alphanumeric chars - pattern = r'(?= limit: return results - - else: - # Match all - get all files from database - cursor.execute(""" - SELECT id, file_path, file_size - FROM files - ORDER BY file_path - LIMIT ? - """, (limit or 1000,)) - rows = cursor.fetchall() - for file_id, file_path_str, size_bytes in rows: - if file_path_str: + elif not match_all: + # Search by filename or simple tags (namespace-agnostic for plain text) + # For plain text search, match: + # 1. Filenames containing the query + # 2. Simple tags (without namespace) containing the query + # NOTE: Does NOT match namespaced tags (e.g., "joe" won't match "channel:Joe Mullan") + # Use explicit namespace search for that (e.g., "channel:joe*") + + # Split query into terms for AND logic + terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()] + if not terms: + terms = [query_lower] + + debug(f"Performing filename/tag search for terms: {terms}") + + # Fetch more results than requested to allow for filtering + fetch_limit = (limit or 45) * 50 + + # 1. Filename search (AND logic) + conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms] + params = [f"%{t}%" for t in terms] + where_clause = " AND ".join(conditions) + + cursor.execute(f""" + SELECT DISTINCT f.id, f.file_path, f.file_size + FROM files f + WHERE {where_clause} + ORDER BY f.file_path + LIMIT ? + """, (*params, fetch_limit)) + + rows = cursor.fetchall() + debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)") + + # Compile regex for whole word matching (only if single term, otherwise skip) + word_regex = None + if len(terms) == 1: + term = terms[0] + # Check if term contains wildcard characters + has_wildcard = '*' in term or '?' in term + + if has_wildcard: + # Use fnmatch for wildcard patterns (e.g., "sie*" matches "SiebeliebenWohl...") + try: + from fnmatch import translate + word_regex = re.compile(translate(term), re.IGNORECASE) + except Exception: + word_regex = None + else: + # Use custom boundary that treats underscores as separators + # \b treats _ as a word character, so "foo_bar" wouldn't match "bar" with \b + try: + # Match if not preceded or followed by alphanumeric chars + pattern = r'(?= limit: + return results + + else: + # Match all - get all files from database + cursor.execute(""" + SELECT id, file_path, file_size + FROM files + ORDER BY file_path + LIMIT ? + """, (limit or 1000,)) + + rows = cursor.fetchall() + for file_id, file_path_str, size_bytes in rows: + if file_path_str: + file_path = Path(file_path_str) + if file_path.exists(): + path_str = str(file_path) + if size_bytes is None: + size_bytes = file_path.stat().st_size + + # Fetch tags for this file + cursor.execute(""" + SELECT tag FROM tags WHERE file_id = ? + """, (file_id,)) + tags = [row[0] for row in cursor.fetchall()] + + # Use title tag if present + title_tag = next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), None) + + results.append({ + "name": file_path.stem, + "title": title_tag or file_path.stem, + "ext": file_path.suffix.lstrip('.'), + "path": path_str, + "target": path_str, + "origin": "local", + "size": size_bytes, + "size_bytes": size_bytes, + "tags": tags, + }) + + if results: + debug(f"Returning {len(results)} results from DB") + else: + debug("No results found in DB") + return results except Exception as e: log(f"⚠️ Database search failed: {e}", file=sys.stderr) @@ -1175,6 +1175,161 @@ class MatrixStorageBackend(StorageBackend): raise +class RemoteStorageBackend(StorageBackend): + """File storage backend for remote Android/network storage servers. + + Connects to a remote storage server (e.g., running on Android phone) + via REST API. All operations are proxied to the remote server. + """ + + def __init__(self, server_url: str, timeout: int = 30, api_key: str = None) -> None: + """Initialize remote storage backend. + + Args: + server_url: Base URL of remote storage server (e.g., http://192.168.1.100:5000) + timeout: Request timeout in seconds + api_key: Optional API key for authentication + """ + try: + import requests + except ImportError: + raise ImportError("requests library required for RemoteStorageBackend. Install with: pip install requests") + + self.server_url = server_url.rstrip('/') + self.timeout = timeout + self.api_key = api_key + self._session = requests.Session() + + # Add API key to default headers if provided + if self.api_key: + self._session.headers.update({'X-API-Key': self.api_key}) + + def get_name(self) -> str: + return "remote" + + def _request(self, method: str, endpoint: str, **kwargs) -> Dict[str, Any]: + """Make HTTP request to remote server.""" + import requests + from urllib.parse import urljoin + + url = urljoin(self.server_url, endpoint) + + try: + response = self._session.request( + method, + url, + timeout=self.timeout, + **kwargs + ) + + if response.status_code == 404: + raise Exception(f"Remote resource not found: {endpoint}") + + if response.status_code >= 400: + try: + error_data = response.json() + error_msg = error_data.get('error', response.text) + except: + error_msg = response.text + raise Exception(f"Remote server error {response.status_code}: {error_msg}") + + return response.json() + + except requests.exceptions.RequestException as e: + raise Exception(f"Connection to {self.server_url} failed: {e}") + + def upload(self, file_path: Path, **kwargs: Any) -> str: + """Upload file to remote storage. + + Args: + file_path: Path to the file to upload + tags: Optional list of tags to add + urls: Optional list of known URLs + + Returns: + Remote file hash + """ + from helper.utils import sha256_file + + if not file_path.exists(): + raise ValueError(f"File not found: {file_path}") + + try: + # Index the file on remote server + data = {"path": str(file_path)} + + tags = kwargs.get("tags", []) + if tags: + data["tags"] = tags + + urls = kwargs.get("urls", []) + if urls: + data["urls"] = urls + + result = self._request('POST', '/files/index', json=data) + file_hash = result.get('hash') + + if file_hash: + log(f"✓ File indexed on remote storage: {file_hash}", file=sys.stderr) + return file_hash + else: + raise Exception("Remote server did not return file hash") + + except Exception as exc: + debug(f"Remote upload failed: {exc}", file=sys.stderr) + raise + + def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: + """Search files on remote storage. + + Args: + query: Search query + limit: Maximum results + + Returns: + List of search results + """ + limit = kwargs.get("limit") + try: + limit = int(limit) if limit is not None else 100 + except (TypeError, ValueError): + limit = 100 + + if limit <= 0: + limit = 100 + + try: + response = self._request('GET', '/files/search', params={ + 'q': query, + 'limit': limit + }) + + files = response.get('files', []) + + # Transform remote format to standard result format + results = [] + for f in files: + results.append({ + "name": f.get('name', '').split('/')[-1], # Get filename from path + "title": f.get('name', f.get('path', '')).split('/')[-1], + "ext": f.get('ext', ''), + "path": f.get('path', ''), + "target": f.get('path', ''), + "hash": f.get('hash', ''), + "origin": "remote", + "size": f.get('size', 0), + "size_bytes": f.get('size', 0), + "tags": f.get('tags', []), + }) + + debug(f"Remote search found {len(results)} results", file=sys.stderr) + return results + + except Exception as exc: + log(f"❌ Remote search failed: {exc}", file=sys.stderr) + raise + + class FileStorage: """Unified file storage interface supporting multiple backend services. @@ -1223,6 +1378,25 @@ class FileStorage: # Include Matrix backend self._backends["matrix"] = MatrixStorageBackend() + + # Include remote storage backends from config (for Android/network servers) + remote_storages = config.get("remote_storages", []) + if isinstance(remote_storages, list): + for remote_config in remote_storages: + if isinstance(remote_config, dict): + name = remote_config.get("name", "remote") + url = remote_config.get("url") + timeout = remote_config.get("timeout", 30) + api_key = remote_config.get("api_key") + + if url: + try: + backend = RemoteStorageBackend(url, timeout=timeout, api_key=api_key) + self._backends[name] = backend + auth_status = " (with auth)" if api_key else " (no auth)" + log(f"Registered remote storage backend: {name} -> {url}{auth_status}", file=sys.stderr) + except Exception as e: + log(f"Failed to register remote storage '{name}': {e}", file=sys.stderr) def __getitem__(self, backend_name: str) -> StorageBackend: """Get a storage backend by name. diff --git a/helper/local_library.py b/helper/local_library.py index dda9cfc..c3d692a 100644 --- a/helper/local_library.py +++ b/helper/local_library.py @@ -19,6 +19,8 @@ from datetime import datetime from pathlib import Path from typing import Optional, Dict, Any, List, Tuple, Set +from .utils import sha256_file + logger = logging.getLogger(__name__) # Try to import optional dependencies @@ -455,6 +457,18 @@ class LocalLibraryDB: logger.error(f"[get_or_create_file_entry] ❌ Error getting/creating file entry for {file_path}: {e}", exc_info=True) raise + def get_file_id(self, file_path: Path) -> Optional[int]: + """Get the file ID for a file path, or None if not found.""" + try: + str_path = str(file_path.resolve()) + cursor = self.connection.cursor() + cursor.execute("SELECT id FROM files WHERE file_path = ?", (str_path,)) + row = cursor.fetchone() + return row[0] if row else None + except Exception as e: + logger.error(f"Error getting file ID for {file_path}: {e}", exc_info=True) + return None + def get_metadata(self, file_path: Path) -> Optional[Dict[str, Any]]: """Get metadata for a file.""" try: @@ -748,6 +762,177 @@ class LocalLibraryDB: logger.error(f"Error removing tags for {file_path}: {e}", exc_info=True) raise + def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None: + """Set a relationship between two local files. + + Args: + file_path: Path to the file being related + related_file_path: Path to the related file + rel_type: Type of relationship ('king', 'alt', 'related') + """ + try: + str_path = str(file_path.resolve()) + str_related_path = str(related_file_path.resolve()) + + file_id = self.get_or_create_file_entry(file_path) + related_file_id = self.get_or_create_file_entry(related_file_path) + + cursor = self.connection.cursor() + + # Get hashes for both files + file_hash = sha256_file(file_path) + related_file_hash = sha256_file(related_file_path) + + if not file_hash or not related_file_hash: + logger.warning(f"Cannot set relationship: missing hash for {file_path} or {related_file_path}") + return + + # Store the hashes in the files table for future lookups + cursor.execute(""" + UPDATE files SET file_hash = ? WHERE id = ? + """, (file_hash, file_id)) + cursor.execute(""" + UPDATE files SET file_hash = ? WHERE id = ? + """, (related_file_hash, related_file_id)) + + # Get current relationships + cursor.execute(""" + SELECT relationships FROM metadata WHERE file_id = ? + """, (file_id,)) + + row = cursor.fetchone() + # Use index access to be safe regardless of row_factory + relationships_str = row[0] if row else None + + try: + if relationships_str: + relationships = json.loads(relationships_str) + else: + relationships = {} + except (json.JSONDecodeError, TypeError): + relationships = {} + + # Ensure relationships is a dict (handle case where DB has a list) + if not isinstance(relationships, dict): + relationships = {} + + # Ensure rel_type key exists + if rel_type not in relationships: + relationships[rel_type] = [] + + # Add the relationship (store as hash string) + if related_file_hash not in relationships[rel_type]: + relationships[rel_type].append(related_file_hash) + + # Save the updated relationships for the main file + cursor.execute(""" + INSERT INTO metadata (file_id, relationships) + VALUES (?, ?) + ON CONFLICT(file_id) DO UPDATE SET + relationships = excluded.relationships, + time_modified = CURRENT_TIMESTAMP + """, (file_id, json.dumps(relationships))) + + logger.debug(f"Set {rel_type} relationship: {str_path} ({file_hash}) -> {str_related_path} ({related_file_hash})") + + # Set reverse relationship (bidirectional) + # For 'alt' and 'related', the reverse is the same + # For 'king', the reverse is 'subject' (or we just use 'alt' for simplicity as Hydrus does) + # Let's use the same type for now to keep it simple and consistent with Hydrus 'alternates' + reverse_type = rel_type + + # Update the related file + cursor.execute(""" + SELECT relationships FROM metadata WHERE file_id = ? + """, (related_file_id,)) + + row = cursor.fetchone() + relationships_str = row[0] if row else None + + try: + if relationships_str: + reverse_relationships = json.loads(relationships_str) + else: + reverse_relationships = {} + except (json.JSONDecodeError, TypeError): + reverse_relationships = {} + + if not isinstance(reverse_relationships, dict): + reverse_relationships = {} + + if reverse_type not in reverse_relationships: + reverse_relationships[reverse_type] = [] + + if file_hash not in reverse_relationships[reverse_type]: + reverse_relationships[reverse_type].append(file_hash) + + # Save the updated reverse relationships + cursor.execute(""" + INSERT INTO metadata (file_id, relationships) + VALUES (?, ?) + ON CONFLICT(file_id) DO UPDATE SET + relationships = excluded.relationships, + time_modified = CURRENT_TIMESTAMP + """, (related_file_id, json.dumps(reverse_relationships))) + + self.connection.commit() + + except Exception as e: + logger.error(f"Error setting relationship: {e}", exc_info=True) + raise + + def find_files_pointing_to(self, target_path: Path) -> List[Dict[str, Any]]: + """Find all files that have a relationship pointing to the target path. + + Args: + target_path: The file path to look for in other files' relationships + + Returns: + List of dicts with {path, type} for files pointing to target + """ + try: + # Get the hash of the target file + target_hash = sha256_file(target_path) + if not target_hash: + logger.warning(f"Cannot find files pointing to {target_path}: unable to compute hash") + return [] + + cursor = self.connection.cursor() + + # Scan all metadata (this might be slow on huge DBs but fine for local library) + # We select file_path and relationships json + cursor.execute(""" + SELECT f.file_path, m.relationships + FROM metadata m + JOIN files f ON m.file_id = f.id + WHERE m.relationships LIKE ? + """, (f"%{target_hash}%",)) + + results = [] + + for row in cursor.fetchall(): + f_path = row[0] + rels_json = row[1] + + try: + rels = json.loads(rels_json) + if isinstance(rels, dict): + for r_type, hashes in rels.items(): + if isinstance(hashes, list): + # Check if target hash is in this relationship type + if target_hash in hashes: + results.append({ + "path": f_path, + "type": r_type + }) + except (json.JSONDecodeError, TypeError): + continue + + return results + except Exception as e: + logger.error(f"Error finding files pointing to {target_path}: {e}", exc_info=True) + return [] + def get_note(self, file_path: Path) -> Optional[str]: """Get note for a file.""" try: @@ -1076,6 +1261,11 @@ class LocalLibraryDB: if not text: return True try: + # Check if connection is valid + if not self.connection: + logger.warning(f"Database connection not available for worker {worker_id}") + return False + cursor = self.connection.cursor() cursor.execute("SELECT stdout FROM worker WHERE worker_id = ?", (worker_id,)) row = cursor.fetchone() @@ -1097,6 +1287,13 @@ class LocalLibraryDB: self.connection.commit() return cursor.rowcount > 0 + except sqlite3.ProgrammingError as e: + # Handle "Cannot operate on a closed database" gracefully + if "closed database" in str(e).lower(): + logger.warning(f"Database connection closed, cannot append stdout for worker {worker_id}") + return False + logger.error(f"Error appending stdout to worker {worker_id}: {e}", exc_info=True) + return False except Exception as e: logger.error(f"Error appending stdout to worker {worker_id}: {e}", exc_info=True) return False @@ -1599,3 +1796,23 @@ class LocalLibrarySearchOptimizer: if not self.db: return None return self.db.search_by_hash(file_hash) + + def set_relationship(self, file_path: Path, related_file_path: Path, rel_type: str = "alt") -> None: + """Set a relationship between two files in the database. + + Delegates to LocalLibraryDB.set_relationship(). + + Args: + file_path: Path to the first file + related_file_path: Path to the related file + rel_type: Type of relationship ('king', 'alt', 'related', etc.) + """ + if not self.db: + return + self.db.set_relationship(file_path, related_file_path, rel_type) + + def find_files_pointing_to(self, target_path: Path) -> List[Dict[str, Any]]: + """Find all files that have a relationship pointing to the target path.""" + if not self.db: + return [] + return self.db.find_files_pointing_to(target_path) diff --git a/helper/remote_storage_server.py b/helper/remote_storage_server.py new file mode 100644 index 0000000..5868fae --- /dev/null +++ b/helper/remote_storage_server.py @@ -0,0 +1,523 @@ +"""Remote Storage Server - REST API for file management on mobile devices. + +This server runs on a mobile device (Android with Termux, iOS with iSH, etc.) +and exposes the local library database as a REST API. Your PC connects to this +server and uses it as a remote storage backend through the RemoteStorageBackend. + +## INSTALLATION + +### On Android (Termux): +1. Install Termux from Play Store: https://play.google.com/store/apps/details?id=com.termux +2. In Termux: + $ apt update && apt install python + $ pip install flask flask-cors +3. Copy this file to your device +4. Run it (with optional API key): + $ python remote_storage_server.py --storage-path /path/to/storage --port 5000 + $ python remote_storage_server.py --storage-path /path/to/storage --api-key mysecretkey +5. Server prints connection info automatically (IP, port, API key) + +### On PC: +1. Install requests: pip install requests +2. Add to config.json: + { + "remote_storages": [ + { + "name": "phone", + "url": "http://192.168.1.100:5000", + "api_key": "mysecretkey", + "timeout": 30 + } + ] + } + Note: API key is optional. Works on WiFi or cellular data. + +## USAGE + +After setup, all cmdlets work with the phone: +$ search-file zohar -store phone +$ @1-3 | add-relationship -king @4 -store phone +$ @1 | get-relationship -store phone + +The server exposes REST endpoints that RemoteStorageBackend uses internally. +""" + +from __future__ import annotations + +import os +import sys +import json +import argparse +import logging +from pathlib import Path +from typing import Optional, Dict, Any +from datetime import datetime +from functools import wraps + +# Add parent directory to path for imports +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from helper.logger import log + +# ============================================================================ +# CONFIGURATION +# ============================================================================ + +logging.basicConfig( + level=logging.INFO, + format='[%(asctime)s] %(levelname)s: %(message)s' +) +logger = logging.getLogger(__name__) + +STORAGE_PATH: Optional[Path] = None +API_KEY: Optional[str] = None # API key for authentication (None = no auth required) + +# Try importing Flask - will be used in main() only +try: + from flask import Flask, request, jsonify + from flask_cors import CORS + HAS_FLASK = True +except ImportError: + HAS_FLASK = False + +# ============================================================================ +# UTILITY FUNCTIONS +# ============================================================================ + +def get_local_ip() -> Optional[str]: + """Get the local IP address that would be used for external connections.""" + import socket + try: + # Create a socket to determine which interface would be used + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect(("8.8.8.8", 80)) # Google DNS + ip = s.getsockname()[0] + s.close() + return ip + except Exception: + return None + +# ============================================================================ +# FLASK APP FACTORY +# ============================================================================ + +def create_app(): + """Create and configure Flask app with all routes.""" + if not HAS_FLASK: + raise ImportError("Flask not installed. Install with: pip install flask flask-cors") + + from flask import Flask, request, jsonify + from flask_cors import CORS + + app = Flask(__name__) + CORS(app) + + # ======================================================================== + # HELPER DECORATORS + # ======================================================================== + + def require_auth(): + """Decorator to check API key authentication if configured.""" + def decorator(f): + @wraps(f) + def decorated_function(*args, **kwargs): + if API_KEY: + # Get API key from header or query parameter + provided_key = request.headers.get('X-API-Key') or request.args.get('api_key') + if not provided_key or provided_key != API_KEY: + return jsonify({"error": "Unauthorized. Invalid or missing API key."}), 401 + return f(*args, **kwargs) + return decorated_function + return decorator + + def require_storage(): + """Decorator to ensure storage path is configured.""" + def decorator(f): + @wraps(f) + def decorated_function(*args, **kwargs): + if not STORAGE_PATH: + return jsonify({"error": "Storage path not configured"}), 500 + return f(*args, **kwargs) + return decorated_function + return decorator + + # ======================================================================== + # HEALTH CHECK + # ======================================================================== + + @app.route('/health', methods=['GET']) + @require_auth() + def health(): + """Check server health and storage availability.""" + status = { + "status": "ok", + "storage_configured": STORAGE_PATH is not None, + "timestamp": datetime.now().isoformat() + } + + if STORAGE_PATH: + status["storage_path"] = str(STORAGE_PATH) + status["storage_exists"] = STORAGE_PATH.exists() + try: + from helper.local_library import LocalLibraryDB + with LocalLibraryDB(STORAGE_PATH) as db: + status["database_accessible"] = True + except Exception as e: + status["database_accessible"] = False + status["database_error"] = str(e) + + return jsonify(status), 200 + + # ======================================================================== + # FILE OPERATIONS + # ======================================================================== + + @app.route('/files/search', methods=['GET']) + @require_auth() + @require_storage() + def search_files(): + """Search for files by name or tag.""" + from helper.local_library import LocalLibrarySearchOptimizer + + query = request.args.get('q', '') + limit = request.args.get('limit', 100, type=int) + + if not query: + return jsonify({"error": "Search query required"}), 400 + + try: + with LocalLibrarySearchOptimizer(STORAGE_PATH) as db: + results = db.search_by_name(query, limit) + tag_results = db.search_by_tag(query, limit) + all_results = {r['hash']: r for r in (results + tag_results)} + + return jsonify({ + "query": query, + "count": len(all_results), + "files": list(all_results.values()) + }), 200 + except Exception as e: + logger.error(f"Search error: {e}", exc_info=True) + return jsonify({"error": f"Search failed: {str(e)}"}), 500 + + @app.route('/files/', methods=['GET']) + @require_auth() + @require_storage() + def get_file_metadata(file_hash: str): + """Get metadata for a specific file by hash.""" + from helper.local_library import LocalLibraryDB + + try: + with LocalLibraryDB(STORAGE_PATH) as db: + file_path = db.search_by_hash(file_hash) + + if not file_path or not file_path.exists(): + return jsonify({"error": "File not found"}), 404 + + metadata = db.get_metadata(file_path) + tags = db.get_tags(file_path) + + return jsonify({ + "hash": file_hash, + "path": str(file_path), + "size": file_path.stat().st_size, + "metadata": metadata, + "tags": tags + }), 200 + except Exception as e: + logger.error(f"Get metadata error: {e}", exc_info=True) + return jsonify({"error": f"Failed to get metadata: {str(e)}"}), 500 + + @app.route('/files/index', methods=['POST']) + @require_auth() + @require_storage() + def index_file(): + """Index a new file in the storage.""" + from helper.local_library import LocalLibraryDB + from helper.utils import sha256_file + + data = request.get_json() or {} + file_path_str = data.get('path') + tags = data.get('tags', []) + urls = data.get('urls', []) + + if not file_path_str: + return jsonify({"error": "File path required"}), 400 + + try: + file_path = Path(file_path_str) + + if not file_path.exists(): + return jsonify({"error": "File does not exist"}), 404 + + with LocalLibraryDB(STORAGE_PATH) as db: + db.get_or_create_file_entry(file_path) + + if tags: + db.add_tags(file_path, tags) + + if urls: + db.add_known_urls(file_path, urls) + + file_hash = sha256_file(file_path) + + return jsonify({ + "hash": file_hash, + "path": str(file_path), + "tags_added": len(tags), + "urls_added": len(urls) + }), 201 + except Exception as e: + logger.error(f"Index error: {e}", exc_info=True) + return jsonify({"error": f"Indexing failed: {str(e)}"}), 500 + + # ======================================================================== + # TAG OPERATIONS + # ======================================================================== + + @app.route('/tags/', methods=['GET']) + @require_auth() + @require_storage() + def get_tags(file_hash: str): + """Get tags for a file.""" + from helper.local_library import LocalLibraryDB + + try: + with LocalLibraryDB(STORAGE_PATH) as db: + file_path = db.search_by_hash(file_hash) + if not file_path: + return jsonify({"error": "File not found"}), 404 + + tags = db.get_tags(file_path) + return jsonify({"hash": file_hash, "tags": tags}), 200 + except Exception as e: + logger.error(f"Get tags error: {e}", exc_info=True) + return jsonify({"error": f"Failed: {str(e)}"}), 500 + + @app.route('/tags/', methods=['POST']) + @require_auth() + @require_storage() + def add_tags(file_hash: str): + """Add tags to a file.""" + from helper.local_library import LocalLibraryDB + + data = request.get_json() or {} + tags = data.get('tags', []) + mode = data.get('mode', 'add') + + if not tags: + return jsonify({"error": "Tags required"}), 400 + + try: + with LocalLibraryDB(STORAGE_PATH) as db: + file_path = db.search_by_hash(file_hash) + if not file_path: + return jsonify({"error": "File not found"}), 404 + + if mode == 'replace': + db.remove_tags(file_path, db.get_tags(file_path)) + + db.add_tags(file_path, tags) + return jsonify({"hash": file_hash, "tags_added": len(tags), "mode": mode}), 200 + except Exception as e: + logger.error(f"Add tags error: {e}", exc_info=True) + return jsonify({"error": f"Failed: {str(e)}"}), 500 + + @app.route('/tags/', methods=['DELETE']) + @require_auth() + @require_storage() + def remove_tags(file_hash: str): + """Remove tags from a file.""" + from helper.local_library import LocalLibraryDB + + tags_str = request.args.get('tags', '') + + try: + with LocalLibraryDB(STORAGE_PATH) as db: + file_path = db.search_by_hash(file_hash) + if not file_path: + return jsonify({"error": "File not found"}), 404 + + if tags_str: + tags_to_remove = [t.strip() for t in tags_str.split(',')] + else: + tags_to_remove = db.get_tags(file_path) + + db.remove_tags(file_path, tags_to_remove) + return jsonify({"hash": file_hash, "tags_removed": len(tags_to_remove)}), 200 + except Exception as e: + logger.error(f"Remove tags error: {e}", exc_info=True) + return jsonify({"error": f"Failed: {str(e)}"}), 500 + + # ======================================================================== + # RELATIONSHIP OPERATIONS + # ======================================================================== + + @app.route('/relationships/', methods=['GET']) + @require_auth() + @require_storage() + def get_relationships(file_hash: str): + """Get relationships for a file.""" + from helper.local_library import LocalLibraryDB + + try: + with LocalLibraryDB(STORAGE_PATH) as db: + file_path = db.search_by_hash(file_hash) + if not file_path: + return jsonify({"error": "File not found"}), 404 + + metadata = db.get_metadata(file_path) + relationships = metadata.get('relationships', {}) if metadata else {} + return jsonify({"hash": file_hash, "relationships": relationships}), 200 + except Exception as e: + logger.error(f"Get relationships error: {e}", exc_info=True) + return jsonify({"error": f"Failed: {str(e)}"}), 500 + + @app.route('/relationships', methods=['POST']) + @require_auth() + @require_storage() + def set_relationship(): + """Set a relationship between two files.""" + from helper.local_library import LocalLibraryDB + + data = request.get_json() or {} + from_hash = data.get('from_hash') + to_hash = data.get('to_hash') + rel_type = data.get('type', 'alt') + + if not from_hash or not to_hash: + return jsonify({"error": "from_hash and to_hash required"}), 400 + + try: + with LocalLibraryDB(STORAGE_PATH) as db: + from_path = db.search_by_hash(from_hash) + to_path = db.search_by_hash(to_hash) + + if not from_path or not to_path: + return jsonify({"error": "File not found"}), 404 + + db.set_relationship(from_path, to_path, rel_type) + return jsonify({"from_hash": from_hash, "to_hash": to_hash, "type": rel_type}), 200 + except Exception as e: + logger.error(f"Set relationship error: {e}", exc_info=True) + return jsonify({"error": f"Failed: {str(e)}"}), 500 + + # ======================================================================== + # URL OPERATIONS + # ======================================================================== + + @app.route('/urls/', methods=['GET']) + @require_auth() + @require_storage() + def get_urls(file_hash: str): + """Get known URLs for a file.""" + from helper.local_library import LocalLibraryDB + + try: + with LocalLibraryDB(STORAGE_PATH) as db: + file_path = db.search_by_hash(file_hash) + if not file_path: + return jsonify({"error": "File not found"}), 404 + + metadata = db.get_metadata(file_path) + urls = metadata.get('known_urls', []) if metadata else [] + return jsonify({"hash": file_hash, "urls": urls}), 200 + except Exception as e: + logger.error(f"Get URLs error: {e}", exc_info=True) + return jsonify({"error": f"Failed: {str(e)}"}), 500 + + @app.route('/urls/', methods=['POST']) + @require_auth() + @require_storage() + def add_urls(file_hash: str): + """Add URLs to a file.""" + from helper.local_library import LocalLibraryDB + + data = request.get_json() or {} + urls = data.get('urls', []) + + if not urls: + return jsonify({"error": "URLs required"}), 400 + + try: + with LocalLibraryDB(STORAGE_PATH) as db: + file_path = db.search_by_hash(file_hash) + if not file_path: + return jsonify({"error": "File not found"}), 404 + + db.add_known_urls(file_path, urls) + return jsonify({"hash": file_hash, "urls_added": len(urls)}), 200 + except Exception as e: + logger.error(f"Add URLs error: {e}", exc_info=True) + return jsonify({"error": f"Failed: {str(e)}"}), 500 + + return app + +# ============================================================================ +# MAIN +# ============================================================================ + +def main(): + if not HAS_FLASK: + print("ERROR: Flask and flask-cors required") + print("Install with: pip install flask flask-cors") + sys.exit(1) + + parser = argparse.ArgumentParser( + description='Remote Storage Server for Medios-Macina', + epilog='Example: python remote_storage_server.py --storage-path /storage/media --port 5000 --api-key mysecretkey' + ) + parser.add_argument('--storage-path', type=str, required=True, help='Path to storage directory') + parser.add_argument('--host', type=str, default='0.0.0.0', help='Server host (default: 0.0.0.0)') + parser.add_argument('--port', type=int, default=5000, help='Server port (default: 5000)') + parser.add_argument('--api-key', type=str, default=None, help='API key for authentication (optional)') + parser.add_argument('--debug', action='store_true', help='Enable debug mode') + + args = parser.parse_args() + + global STORAGE_PATH, API_KEY + STORAGE_PATH = Path(args.storage_path).resolve() + API_KEY = args.api_key + + if not STORAGE_PATH.exists(): + print(f"ERROR: Storage path does not exist: {STORAGE_PATH}") + sys.exit(1) + + # Get local IP address + local_ip = get_local_ip() + if not local_ip: + local_ip = "127.0.0.1" + + print(f"\n{'='*70}") + print(f"Remote Storage Server - Medios-Macina") + print(f"{'='*70}") + print(f"Storage Path: {STORAGE_PATH}") + print(f"Local IP: {local_ip}") + print(f"Server URL: http://{local_ip}:{args.port}") + print(f"Health URL: http://{local_ip}:{args.port}/health") + print(f"API Key: {'Enabled - ' + ('***' + args.api_key[-4:]) if args.api_key else 'Disabled (no auth)'}") + print(f"Debug Mode: {args.debug}") + print(f"\n📋 Config for config.json:") + config_entry = { + "name": "phone", + "url": f"http://{local_ip}:{args.port}", + "timeout": 30 + } + if args.api_key: + config_entry["api_key"] = args.api_key + print(json.dumps(config_entry, indent=2)) + print(f"\n{'='*70}\n") + + try: + from helper.local_library import LocalLibraryDB + with LocalLibraryDB(STORAGE_PATH) as db: + logger.info("Database initialized successfully") + except Exception as e: + logger.error(f"Failed to initialize database: {e}") + sys.exit(1) + + app = create_app() + app.run(host=args.host, port=args.port, debug=args.debug, use_reloader=False) + +if __name__ == '__main__': + main()