from __future__ import annotations import re import sys from pathlib import Path from typing import Any, Dict, List, Optional, Tuple from SYS.logger import debug, log from SYS.utils_constant import mime_maps from Store._base import Store class HydrusNetwork(Store): """File storage backend for Hydrus client. Each instance represents a specific Hydrus client connection. Maintains its own HydrusClient with session key. """ def __new__(cls, *args: Any, **kwargs: Any) -> "HydrusNetwork": instance = super().__new__(cls) name = kwargs.get("NAME") api = kwargs.get("API") url = kwargs.get("URL") if name is not None: setattr(instance, "NAME", str(name)) if api is not None: setattr(instance, "API", str(api)) if url is not None: setattr(instance, "URL", str(url)) return instance setattr(__new__, "keys", ("NAME", "API", "URL")) def __init__( self, instance_name: Optional[str] = None, api_key: Optional[str] = None, url: Optional[str] = None, *, NAME: Optional[str] = None, API: Optional[str] = None, URL: Optional[str] = None, ) -> None: """Initialize Hydrus storage backend. Args: instance_name: Name of this Hydrus instance (e.g., 'home', 'work') api_key: Hydrus Client API access key url: Hydrus client URL (e.g., 'http://192.168.1.230:45869') """ from API.HydrusNetwork import HydrusNetwork as HydrusClient if instance_name is None and NAME is not None: instance_name = str(NAME) if api_key is None and API is not None: api_key = str(API) if url is None and URL is not None: url = str(URL) if not instance_name or not api_key or not url: raise ValueError("HydrusNetwork requires NAME, API, and URL") self.NAME = instance_name self.API = api_key self.URL = url # Create persistent client with session key for this instance self._client = HydrusClient(url=url, access_key=api_key) # Self health-check: acquire a session key immediately so broken configs # fail-fast and the registry can skip registering this backend. try: if self._client is not None: self._client.ensure_session_key() except Exception as exc: # Best-effort cleanup so partially constructed objects don't linger. try: self._client = None except Exception: pass raise RuntimeError(f"Hydrus '{self.NAME}' unavailable: {exc}") from exc def name(self) -> str: return self.NAME def get_name(self) -> str: return self.NAME def add_file(self, file_path: Path, **kwargs: Any) -> str: """Upload file to Hydrus with full metadata support. Args: file_path: Path to the file to upload tag: Optional list of tag values to add url: Optional list of url to associate with the file title: Optional title (will be added as 'title:value' tag) Returns: File hash from Hydrus Raises: Exception: If upload fails """ from SYS.utils import sha256_file tag_list = kwargs.get("tag", []) url = kwargs.get("url", []) title = kwargs.get("title") # Add title to tags if provided and not already present if title: title_tag = f"title:{title}" if not any(str(candidate).lower().startswith("title:") for candidate in tag_list): tag_list = [title_tag] + list(tag_list) try: # Compute file hash file_hash = sha256_file(file_path) debug(f"File hash: {file_hash}") # Use persistent client with session key client = self._client if client is None: raise Exception("Hydrus client unavailable") # Check if file already exists in Hydrus file_exists = False try: metadata = client.fetch_file_metadata(hashes=[file_hash]) if metadata and isinstance(metadata, dict): files = metadata.get("file_metadata", []) if files: file_exists = True log( f"ℹ️ Duplicate detected - file already in Hydrus with hash: {file_hash}", file=sys.stderr, ) except Exception: pass # Upload file if not already present if not file_exists: log(f"Uploading to Hydrus: {file_path.name}", file=sys.stderr) response = client.add_file(file_path) # Extract hash from response hydrus_hash: Optional[str] = None if isinstance(response, dict): hydrus_hash = response.get("hash") or response.get("file_hash") if not hydrus_hash: hashes = response.get("hashes") if isinstance(hashes, list) and hashes: hydrus_hash = hashes[0] if not hydrus_hash: raise Exception(f"Hydrus response missing file hash: {response}") file_hash = hydrus_hash log(f"Hydrus: {file_hash}", file=sys.stderr) # Add tags if provided (both for new and existing files) if tag_list: try: # Use default tag service service_name = "my tags" except Exception: service_name = "my tags" try: debug(f"Adding {len(tag_list)} tag(s) to Hydrus: {tag_list}") client.add_tag(file_hash, tag_list, service_name) log(f"Tags added via '{service_name}'", file=sys.stderr) except Exception as exc: log(f"⚠️ Failed to add tags: {exc}", file=sys.stderr) # Associate url if provided (both for new and existing files) if url: log(f"Associating {len(url)} URL(s) with file", file=sys.stderr) for url in url: if url: try: client.associate_url(file_hash, str(url)) debug(f"Associated URL: {url}") except Exception as exc: log(f"⚠️ Failed to associate URL {url}: {exc}", file=sys.stderr) return file_hash except Exception as exc: log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr) raise def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: """Search Hydrus database for files matching query. Args: query: Search query (tags, filenames, hashes, etc.) limit: Maximum number of results to return (default: 100) Returns: List of dicts with 'name', 'hash', 'size', 'tags' fields Example: results = storage["hydrus"].search("artist:john_doe music") results = storage["hydrus"].search("Simple Man") """ limit = kwargs.get("limit", 100) try: client = self._client if client is None: raise Exception("Hydrus client unavailable") debug(f"Searching Hydrus for: {query}") # Parse the query into tags # Handle both simple tags and complex queries # "*" means "match all" - use system:everything tag in Hydrus if query.strip() == "*": # Use system:everything to match all files in Hydrus tags = ["system:everything"] else: query_lower = query.lower().strip() # If query doesn't have a namespace (no ':'), search all files and filter by title/tags # If query has explicit namespace, use it as a tag search if ':' not in query_lower: # No namespace provided: search all files, then filter by title/tags containing the query tags = ["system:everything"] else: # User provided explicit namespace (e.g., "creator:john" or "system:has_audio") # Use it as a tag search tags = [query_lower] if not tags: debug(f"Found 0 result(s)") return [] # Search files with the tags search_result = client.search_files( tags=tags, return_hashes=True, return_file_ids=True ) # Extract file IDs from search result file_ids = search_result.get("file_ids", []) hashes = search_result.get("hashes", []) if not file_ids and not hashes: debug(f"Found 0 result(s)") return [] # Fetch metadata for the found files results = [] query_lower = query.lower().strip() # Split by comma or space for AND logic search_terms = set(query_lower.replace(',', ' ').split()) # For substring matching if file_ids: metadata = client.fetch_file_metadata(file_ids=file_ids) metadata_list = metadata.get("metadata", []) for meta in metadata_list: if len(results) >= limit: break file_id = meta.get("file_id") hash_hex = meta.get("hash") size = meta.get("size", 0) # Get tags for this file and extract title tags_set = meta.get("tags", {}) all_tags = [] title = f"Hydrus File {file_id}" # Default fallback all_tags_str = "" # For substring matching # debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}") if isinstance(tags_set, dict): # Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen def _collect(tag_list: Any) -> None: nonlocal title, all_tags_str if not isinstance(tag_list, list): return for tag in tag_list: tag_text = str(tag) if tag else "" if not tag_text: continue all_tags.append(tag_text) all_tags_str += " " + tag_text.lower() if tag_text.lower().startswith("title:") and title == f"Hydrus File {file_id}": title = tag_text.split(":", 1)[1].strip() for _service_name, service_tags in tags_set.items(): if not isinstance(service_tags, dict): continue storage_tags = service_tags.get("storage_tags", {}) if isinstance(storage_tags, dict): for tag_list in storage_tags.values(): _collect(tag_list) display_tags = service_tags.get("display_tags", []) _collect(display_tags) # Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it) top_level_tags = meta.get("tags_flat", []) or meta.get("tags", []) _collect(top_level_tags) # Resolve extension from MIME type mime_type = meta.get("mime") ext = "" if mime_type: for category in mime_maps.values(): for _ext_key, info in category.items(): if mime_type in info.get("mimes", []): ext = info.get("ext", "").lstrip('.') break if ext: break # Filter results based on query type # If user provided explicit namespace (has ':'), don't do substring filtering # Just include what the tag search returned has_namespace = ':' in query_lower if has_namespace: # Explicit namespace search - already filtered by Hydrus tag search # Include this result as-is file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}" results.append({ "hash": hash_hex, "url": file_url, "name": title, "title": title, "size": size, "size_bytes": size, "store": self.NAME, "tag": all_tags, "file_id": file_id, "mime": mime_type, "ext": ext, }) else: # Free-form search: check if search terms match the title or tags # Match if ALL search terms are found in title or tags (AND logic) # AND use whole word matching # Combine title and tags for searching searchable_text = (title + " " + all_tags_str).lower() match = True if query_lower != "*": for term in search_terms: # Regex for whole word: \bterm\b # Escape term to handle special chars pattern = r'\b' + re.escape(term) + r'\b' if not re.search(pattern, searchable_text): match = False break if match: file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}" results.append({ "hash": hash_hex, "url": file_url, "name": title, "title": title, "size": size, "size_bytes": size, "store": self.NAME, "tag": all_tags, "file_id": file_id, "mime": mime_type, "ext": ext, }) debug(f"Found {len(results)} result(s)") return results[:limit] except Exception as exc: log(f"❌ Hydrus search failed: {exc}", file=sys.stderr) import traceback traceback.print_exc(file=sys.stderr) raise def get_file(self, file_hash: str, **kwargs: Any) -> Path | str | None: """Open file in browser via Hydrus client API URL.""" import webbrowser debug(f"[HydrusNetwork.get_file] Starting for hash: {file_hash[:12]}...") # Build browser URL with access key base_url = str(self.URL).rstrip('/') access_key = str(self.API) browser_url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}" debug(f"[HydrusNetwork.get_file] Opening URL: {browser_url}") # Open in default browser webbrowser.open(browser_url) debug(f"[HydrusNetwork.get_file] Browser opened successfully") # Return the URL string instead of downloading debug(f"[HydrusNetwork.get_file] Returning URL: {browser_url}") return browser_url def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]: """Get metadata for a file from Hydrus by hash. Args: file_hash: SHA256 hash of the file (64-char hex string) Returns: Dict with metadata fields or None if not found """ try: client = self._client if not client: debug("get_metadata: Hydrus client unavailable") return None # Fetch file metadata payload = client.fetch_file_metadata(hashes=[file_hash], include_service_keys_to_tags=True) if not payload or not payload.get("metadata"): return None meta = payload["metadata"][0] # Extract title from tags title = f"Hydrus_{file_hash[:12]}" tags_payload = meta.get("tags", {}) if isinstance(tags_payload, dict): for service_data in tags_payload.values(): if isinstance(service_data, dict): display_tags = service_data.get("display_tags", {}) if isinstance(display_tags, dict): current_tags = display_tags.get("0", []) if isinstance(current_tags, list): for tag in current_tags: if str(tag).lower().startswith("title:"): title = tag.split(":", 1)[1].strip() break if title != f"Hydrus_{file_hash[:12]}": break # Determine extension from mime type mime_type = meta.get("mime", "") ext = "" if mime_type: from SYS.utils_constant import mime_maps for _category, extensions in mime_maps.items(): for extension, mime in extensions.items(): if mime == mime_type: ext = extension.lstrip(".") break if ext: break return { "hash": file_hash, "title": title, "ext": ext, "size": meta.get("size", 0), "mime": mime_type, } except Exception as exc: debug(f"Failed to get metadata from Hydrus: {exc}") return None def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: """Get tags for a file from Hydrus by hash. Args: file_identifier: File hash (SHA256 hex string) **kwargs: Optional service_name parameter Returns: Tuple of (tags_list, source_description) where source is always "hydrus" """ try: from API import HydrusNetwork as hydrus_wrapper file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): debug(f"get_tags: invalid file hash '{file_identifier}'") return [], "unknown" # Get Hydrus client and service info client = self._client if not client: debug("get_tags: Hydrus client unavailable") return [], "unknown" # Fetch file metadata payload = client.fetch_file_metadata( hashes=[file_hash], include_service_keys_to_tags=True, include_file_url=False ) items = payload.get("metadata") if isinstance(payload, dict) else None if not isinstance(items, list) or not items: debug(f"get_tags: No metadata returned for hash {file_hash}") return [], "unknown" meta = items[0] if isinstance(items[0], dict) else None if not isinstance(meta, dict) or meta.get("file_id") is None: debug(f"get_tags: Invalid metadata for hash {file_hash}") return [], "unknown" # Extract tags using service name service_name = "my tags" service_key = hydrus_wrapper.get_tag_service_key(client, service_name) # Extract tags from metadata tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name) return tags, "hydrus" except Exception as exc: debug(f"get_tags failed for Hydrus file: {exc}") return [], "unknown" def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: """Add tags to a Hydrus file. """ try: client = self._client if client is None: debug("add_tag: Hydrus client unavailable") return False file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): debug(f"add_tag: invalid file hash '{file_identifier}'") return False service_name = kwargs.get("service_name") or "my tags" # Ensure tags is a list tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] if not tag_list: return False client.add_tag(file_hash, tag_list, service_name) return True except Exception as exc: debug(f"Hydrus add_tag failed: {exc}") return False def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: """Delete tags from a Hydrus file. """ try: client = self._client if client is None: debug("delete_tag: Hydrus client unavailable") return False file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): debug(f"delete_tag: invalid file hash '{file_identifier}'") return False service_name = kwargs.get("service_name") or "my tags" tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] if not tag_list: return False client.delete_tag(file_hash, tag_list, service_name) return True except Exception as exc: debug(f"Hydrus delete_tag failed: {exc}") return False def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: """Get known url for a Hydrus file. """ try: client = self._client if client is None: debug("get_url: Hydrus client unavailable") return [] file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): return [] payload = client.fetch_file_metadata(hashes=[file_hash], include_file_url=True) items = payload.get("metadata") if isinstance(payload, dict) else None if not isinstance(items, list) or not items: return [] meta = items[0] url = meta.get("url") or [] return list(url) except Exception as exc: debug(f"Hydrus get_url failed: {exc}") return [] def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: """Associate one or more url with a Hydrus file. """ try: client = self._client if client is None: debug("add_url: Hydrus client unavailable") return False for u in url: client.associate_url(file_identifier, u) return True except Exception as exc: debug(f"Hydrus add_url failed: {exc}") return False def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: """Delete one or more url from a Hydrus file. """ try: client = self._client if client is None: debug("delete_url: Hydrus client unavailable") return False for u in url: client.delete_url(file_identifier, u) return True except Exception as exc: debug(f"Hydrus delete_url failed: {exc}") return False def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]: """Get notes for a Hydrus file (default note service only).""" try: client = self._client if client is None: debug("get_note: Hydrus client unavailable") return {} file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): return {} payload = client.fetch_file_metadata(hashes=[file_hash], include_notes=True) items = payload.get("metadata") if isinstance(payload, dict) else None if not isinstance(items, list) or not items: return {} meta = items[0] if isinstance(items[0], dict) else None if not isinstance(meta, dict): return {} notes_payload = meta.get("notes") if isinstance(notes_payload, dict): return {str(k): str(v or "") for k, v in notes_payload.items() if str(k).strip()} return {} except Exception as exc: debug(f"Hydrus get_note failed: {exc}") return {} def set_note(self, file_identifier: str, name: str, text: str, **kwargs: Any) -> bool: """Set a named note for a Hydrus file (default note service only).""" try: client = self._client if client is None: debug("set_note: Hydrus client unavailable") return False file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): return False note_name = str(name or "").strip() if not note_name: return False note_text = str(text or "") client.set_notes(file_hash, {note_name: note_text}) return True except Exception as exc: debug(f"Hydrus set_note failed: {exc}") return False def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool: """Delete a named note for a Hydrus file (default note service only).""" try: client = self._client if client is None: debug("delete_note: Hydrus client unavailable") return False file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): return False note_name = str(name or "").strip() if not note_name: return False client.delete_notes(file_hash, [note_name]) return True except Exception as exc: debug(f"Hydrus delete_note failed: {exc}") return False @staticmethod def _extract_tags_from_hydrus_meta( meta: Dict[str, Any], service_key: Optional[str], service_name: str ) -> List[str]: """Extract current tags from Hydrus metadata dict. Prefers display_tags (includes siblings/parents, excludes deleted). Falls back to storage_tags status '0' (current). """ tags_payload = meta.get("tags") if not isinstance(tags_payload, dict): return [] svc_data = None if service_key: svc_data = tags_payload.get(service_key) if not isinstance(svc_data, dict): return [] # Prefer display_tags (Hydrus computes siblings/parents) display = svc_data.get("display_tags") if isinstance(display, list) and display: return [str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip()] # Fallback to storage_tags status '0' (current) storage = svc_data.get("storage_tags") if isinstance(storage, dict): current_list = storage.get("0") or storage.get(0) if isinstance(current_list, list): return [str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()] return []