from __future__ import annotations import re import sys from pathlib import Path from typing import Any, Dict, List, Optional, Tuple import httpx from SYS.logger import debug, log from SYS.utils_constant import mime_maps from Store._base import Store _HYDRUS_INIT_CHECK_CACHE: dict[tuple[str, str], tuple[bool, Optional[str]]] = {} class HydrusNetwork(Store): """File storage backend for Hydrus client. Each instance represents a specific Hydrus client connection. Maintains its own HydrusClient. """ def _log_prefix(self) -> str: store_name = getattr(self, "NAME", None) or "unknown" return f"[hydrusnetwork:{store_name}]" def __new__(cls, *args: Any, **kwargs: Any) -> "HydrusNetwork": instance = super().__new__(cls) name = kwargs.get("NAME") api = kwargs.get("API") url = kwargs.get("URL") if name is not None: setattr(instance, "NAME", str(name)) if api is not None: setattr(instance, "API", str(api)) if url is not None: setattr(instance, "URL", str(url)) return instance setattr(__new__, "keys", ("NAME", "API", "URL")) def __init__( self, instance_name: Optional[str] = None, api_key: Optional[str] = None, url: Optional[str] = None, *, NAME: Optional[str] = None, API: Optional[str] = None, URL: Optional[str] = None, ) -> None: """Initialize Hydrus storage backend. Args: instance_name: Name of this Hydrus instance (e.g., 'home', 'work') api_key: Hydrus Client API access key url: Hydrus client URL (e.g., 'http://192.168.1.230:45869') """ from API.HydrusNetwork import HydrusNetwork as HydrusClient if instance_name is None and NAME is not None: instance_name = str(NAME) if api_key is None and API is not None: api_key = str(API) if url is None and URL is not None: url = str(URL) if not instance_name or not api_key or not url: raise ValueError("HydrusNetwork requires NAME, API, and URL") self.NAME = instance_name self.API = api_key self.URL = url.rstrip("/") # Total count (best-effort, used for startup diagnostics) self.total_count: Optional[int] = None # Self health-check: validate the URL is reachable and the access key is accepted. # This MUST NOT attempt to acquire a session key. cache_key = (self.URL, self.API) cached = _HYDRUS_INIT_CHECK_CACHE.get(cache_key) if cached is not None: ok, err = cached if not ok: raise RuntimeError( f"Hydrus '{self.NAME}' unavailable: {err or 'Unavailable'}" ) else: api_version_url = f"{self.URL}/api_version" verify_key_url = f"{self.URL}/verify_access_key" try: with httpx.Client(timeout=5.0, verify=False, follow_redirects=True) as client: version_resp = client.get(api_version_url) version_resp.raise_for_status() version_payload = version_resp.json() if not isinstance(version_payload, dict): raise RuntimeError( "Hydrus /api_version returned an unexpected response" ) verify_resp = client.get( verify_key_url, headers={ "Hydrus-Client-API-Access-Key": self.API }, ) verify_resp.raise_for_status() verify_payload = verify_resp.json() if not isinstance(verify_payload, dict): raise RuntimeError( "Hydrus /verify_access_key returned an unexpected response" ) _HYDRUS_INIT_CHECK_CACHE[cache_key] = (True, None) except Exception as exc: err = str(exc) _HYDRUS_INIT_CHECK_CACHE[cache_key] = (False, err) raise RuntimeError(f"Hydrus '{self.NAME}' unavailable: {err}") from exc # Create a persistent client for this instance (auth via access key by default). self._client = HydrusClient( url=self.URL, access_key=self.API, instance_name=self.NAME ) # Best-effort total count (used for startup diagnostics). Avoid heavy payloads. # Some Hydrus setups appear to return no count via the CBOR client for this endpoint, # so prefer a direct JSON request with a short timeout. try: self.get_total_count(refresh=True) except Exception: pass def get_total_count(self, *, refresh: bool = False) -> Optional[int]: """Best-effort total file count for this Hydrus instance. Intended for diagnostics (e.g., REPL startup checks). This should be fast, and it MUST NOT raise. """ if self.total_count is not None and not refresh: return self.total_count # 1) Prefer a direct JSON request (fast + avoids CBOR edge cases). try: import json as _json url = f"{self.URL}/get_files/search_files" params = { "tags": _json.dumps(["system:everything"]), "return_hashes": "false", "return_file_ids": "false", "return_file_count": "true", } headers = { "Hydrus-Client-API-Access-Key": self.API, "Accept": "application/json", } with httpx.Client(timeout=5.0, verify=False, follow_redirects=True) as client: resp = client.get(url, params=params, headers=headers) resp.raise_for_status() payload = resp.json() count_val = None if isinstance(payload, dict): count_val = payload.get("file_count") if count_val is None: count_val = payload.get("file_count_inclusive") if count_val is None: count_val = payload.get("num_files") if isinstance(count_val, int): self.total_count = count_val return self.total_count except Exception as exc: debug( f"{self._log_prefix()} total count (json) unavailable: {exc}", file=sys.stderr ) # 2) Fallback to the API client (CBOR). try: payload = self._client.search_files( tags=["system:everything"], return_hashes=False, return_file_ids=False, return_file_count=True, ) count_val = None if isinstance(payload, dict): count_val = payload.get("file_count") if count_val is None: count_val = payload.get("file_count_inclusive") if count_val is None: count_val = payload.get("num_files") if isinstance(count_val, int): self.total_count = count_val return self.total_count except Exception as exc: debug( f"{self._log_prefix()} total count (client) unavailable: {exc}", file=sys.stderr ) return self.total_count def name(self) -> str: return self.NAME def get_name(self) -> str: return self.NAME def add_file(self, file_path: Path, **kwargs: Any) -> str: """Upload file to Hydrus with full metadata support. Args: file_path: Path to the file to upload tag: Optional list of tag values to add url: Optional list of url to associate with the file title: Optional title (will be added as 'title:value' tag) Returns: File hash from Hydrus Raises: Exception: If upload fails """ from SYS.utils import sha256_file tag_list = kwargs.get("tag", []) url = kwargs.get("url", []) title = kwargs.get("title") # Add title to tags if provided and not already present if title: title_tag = f"title:{title}".strip().lower() if not any(str(candidate).lower().startswith("title:") for candidate in tag_list): tag_list = [title_tag] + list(tag_list) # Hydrus is lowercase-only tags; normalize here for consistency. tag_list = [ str(t).strip().lower() for t in (tag_list or []) if isinstance(t, str) and str(t).strip() ] try: # Compute file hash file_hash = sha256_file(file_path) debug(f"{self._log_prefix()} file hash: {file_hash}") # Use persistent client with session key client = self._client if client is None: raise Exception("Hydrus client unavailable") # Check if file already exists in Hydrus file_exists = False try: metadata = client.fetch_file_metadata( hashes=[file_hash], include_service_keys_to_tags=False, include_file_url=False, include_duration=False, include_size=False, include_mime=False, ) if metadata and isinstance(metadata, dict): metas = metadata.get("metadata", []) if isinstance(metas, list) and metas: # Hydrus returns placeholder rows for unknown hashes. # Only treat as a real duplicate if it has a concrete file_id. for meta in metas: if isinstance(meta, dict) and meta.get("file_id") is not None: file_exists = True break if file_exists: log( f"ℹ️ Duplicate detected - file already in Hydrus with hash: {file_hash}", file=sys.stderr, ) except Exception: pass # If Hydrus reports an existing file, it may be in trash. Best-effort restore it to 'my files'. # This keeps behavior aligned with user expectation: "use API only" and ensure it lands in my files. if file_exists: try: client.undelete_files([file_hash]) except Exception: pass # Upload file if not already present if not file_exists: log( f"{self._log_prefix()} Uploading: {file_path.name}", file=sys.stderr ) response = client.add_file(file_path) # Extract hash from response hydrus_hash: Optional[str] = None if isinstance(response, dict): hydrus_hash = response.get("hash") or response.get("file_hash") if not hydrus_hash: hashes = response.get("hashes") if isinstance(hashes, list) and hashes: hydrus_hash = hashes[0] if not hydrus_hash: raise Exception(f"Hydrus response missing file hash: {response}") file_hash = hydrus_hash log(f"{self._log_prefix()} hash: {file_hash}", file=sys.stderr) # Add tags if provided (both for new and existing files) if tag_list: try: # Use default tag service service_name = "my tags" except Exception: service_name = "my tags" try: debug( f"{self._log_prefix()} Adding {len(tag_list)} tag(s): {tag_list}" ) client.add_tag(file_hash, tag_list, service_name) log( f"{self._log_prefix()} Tags added via '{service_name}'", file=sys.stderr ) except Exception as exc: log( f"{self._log_prefix()} ⚠️ Failed to add tags: {exc}", file=sys.stderr ) # Associate url if provided (both for new and existing files) if url: log( f"{self._log_prefix()} Associating {len(url)} URL(s) with file", file=sys.stderr ) for url in url: if url: try: client.associate_url(file_hash, str(url)) debug(f"{self._log_prefix()} Associated URL: {url}") except Exception as exc: log( f"{self._log_prefix()} ⚠️ Failed to associate URL {url}: {exc}", file=sys.stderr, ) return file_hash except Exception as exc: log(f"{self._log_prefix()} ❌ upload failed: {exc}", file=sys.stderr) raise def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: """Search Hydrus database for files matching query. Args: query: Search query (tags, filenames, hashes, etc.) limit: Maximum number of results to return (default: 100) Returns: List of dicts with 'name', 'hash', 'size', 'tags' fields Example: results = storage["hydrus"].search("artist:john_doe music") results = storage["hydrus"].search("Simple Man") """ limit = kwargs.get("limit", 100) try: client = self._client if client is None: raise Exception("Hydrus client unavailable") prefix = self._log_prefix() debug(f"{prefix} Searching for: {query}") def _extract_urls(meta_obj: Any) -> list[str]: if not isinstance(meta_obj, dict): return [] raw = meta_obj.get("url") if raw is None: raw = meta_obj.get("urls") if isinstance(raw, str): val = raw.strip() return [val] if val else [] if isinstance(raw, list): out: list[str] = [] for item in raw: if not isinstance(item, str): continue s = item.strip() if s: out.append(s) return out return [] def _iter_url_filtered_metadata( url_value: str | None, want_any: bool, fetch_limit: int ) -> list[dict[str, Any]]: """Best-effort URL search by scanning Hydrus metadata with include_file_url=True.""" # First try a fast system predicate if Hydrus supports it. candidate_file_ids: list[int] = [] try: if want_any: predicate = "system:has url" url_search = client.search_files( tags=[predicate], return_hashes=False, return_file_ids=True, return_file_count=False, ) ids = url_search.get("file_ids", []) if isinstance(url_search, dict) else [] if isinstance(ids, list): candidate_file_ids = [ int(x) for x in ids if isinstance(x, (int, float, str)) and str(x).strip().isdigit() ] except Exception: candidate_file_ids = [] if not candidate_file_ids: # Fallback: scan from system:everything and filter by URL substring. everything = client.search_files( tags=["system:everything"], return_hashes=False, return_file_ids=True, return_file_count=False, ) ids = everything.get("file_ids", []) if isinstance(everything, dict) else [] if isinstance(ids, list): candidate_file_ids = [ int(x) for x in ids if isinstance(x, (int, float)) ] if not candidate_file_ids: return [] needle = (url_value or "").strip().lower() chunk_size = 200 out: list[dict[str, Any]] = [] for start in range(0, len(candidate_file_ids), chunk_size): if len(out) >= fetch_limit: break chunk = candidate_file_ids[start:start + chunk_size] try: payload = client.fetch_file_metadata( file_ids=chunk, include_file_url=True, include_service_keys_to_tags=True, include_duration=True, include_size=True, include_mime=True, ) except Exception: continue metas = payload.get("metadata", []) if isinstance(payload, dict) else [] if not isinstance(metas, list): continue for meta in metas: if not isinstance(meta, dict): continue urls = _extract_urls(meta) if not urls: continue if want_any: out.append(meta) if len(out) >= fetch_limit: break continue if not needle: continue if any(needle in u.lower() for u in urls): out.append(meta) if len(out) >= fetch_limit: break return out query_lower = query.lower().strip() # Support `ext:` anywhere in the query. We filter results by the # Hydrus metadata extension field. def _normalize_ext_filter(value: str) -> str: v = str(value or "").strip().lower().lstrip(".") v = "".join(ch for ch in v if ch.isalnum()) return v ext_filter: str | None = None ext_only: bool = False try: m = re.search(r"\bext:([^\s,]+)", query_lower) if not m: m = re.search(r"\bextension:([^\s,]+)", query_lower) if m: ext_filter = _normalize_ext_filter(m.group(1)) or None query_lower = re.sub( r"\s*\b(?:ext|extension):[^\s,]+", " ", query_lower ) query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(",") query = query_lower if ext_filter and not query_lower: query = "*" query_lower = "*" ext_only = True except Exception: ext_filter = None ext_only = False # Split into meaningful terms for AND logic. # Avoid punctuation tokens like '-' that would make matching brittle. search_terms = [t for t in re.findall(r"[a-z0-9]+", query_lower) if t] # Special case: url:* and url: metadata_list: list[dict[str, Any]] | None = None if ":" in query_lower and not query_lower.startswith(":"): namespace, pattern = query_lower.split(":", 1) namespace = namespace.strip().lower() pattern = pattern.strip() if namespace == "url": if not pattern or pattern == "*": metadata_list = _iter_url_filtered_metadata( None, want_any=True, fetch_limit=int(limit) if limit else 100 ) else: # Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided. try: if pattern.startswith("http://") or pattern.startswith( "https://"): from API.HydrusNetwork import HydrusRequestSpec spec = HydrusRequestSpec( method="GET", endpoint="/add_urls/get_url_files", query={ "url": pattern }, ) response = client._perform_request( spec ) # type: ignore[attr-defined] hashes: list[str] = [] file_ids: list[int] = [] if isinstance(response, dict): raw_hashes = response.get("hashes") or response.get( "file_hashes" ) if isinstance(raw_hashes, list): hashes = [ str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip() ] raw_ids = response.get("file_ids") if isinstance(raw_ids, list): for item in raw_ids: try: file_ids.append(int(item)) except (TypeError, ValueError): continue if file_ids: payload = client.fetch_file_metadata( file_ids=file_ids, include_file_url=True, include_service_keys_to_tags=True, include_duration=True, include_size=True, include_mime=True, ) metas = ( payload.get("metadata", []) if isinstance(payload, dict) else [] ) if isinstance(metas, list): metadata_list = [ m for m in metas if isinstance(m, dict) ] elif hashes: payload = client.fetch_file_metadata( hashes=hashes, include_file_url=True, include_service_keys_to_tags=True, include_duration=True, include_size=True, include_mime=True, ) metas = ( payload.get("metadata", []) if isinstance(payload, dict) else [] ) if isinstance(metas, list): metadata_list = [ m for m in metas if isinstance(m, dict) ] except Exception: metadata_list = None # Fallback: substring scan if metadata_list is None: metadata_list = _iter_url_filtered_metadata( pattern, want_any=False, fetch_limit=int(limit) if limit else 100 ) # Parse the query into tags # "*" means "match all" - use system:everything tag in Hydrus # If query has explicit namespace, use it as a tag search. # If query is free-form, search BOTH: # - title:*term* (title: is the only namespace searched implicitly) # - *term* (freeform tags; we will filter out other namespace matches client-side) tags: list[str] = [] freeform_union_search: bool = False title_predicates: list[str] = [] freeform_predicates: list[str] = [] if query.strip() == "*": tags = ["system:everything"] elif ":" in query_lower: tags = [query_lower] else: freeform_union_search = True if search_terms: # Hydrus supports wildcard matching primarily as a prefix (e.g., tag*). # Use per-term prefix matching for both title: and freeform tags. title_predicates = [f"title:{term}*" for term in search_terms] freeform_predicates = [f"{term}*" for term in search_terms] else: # If we can't extract alnum terms, fall back to the raw query text. title_predicates = [f"title:{query_lower}*"] freeform_predicates = [f"{query_lower}*"] # Search files with the tags (unless url: search already produced metadata) results = [] def _extract_search_ids(payload: Any) -> tuple[list[int], list[str]]: if not isinstance(payload, dict): return [], [] raw_ids = payload.get("file_ids", []) raw_hashes = payload.get("hashes", []) ids_out: list[int] = [] hashes_out: list[str] = [] if isinstance(raw_ids, list): for item in raw_ids: try: ids_out.append(int(item)) except (TypeError, ValueError): continue if isinstance(raw_hashes, list): hashes_out = [ str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip() ] return ids_out, hashes_out if metadata_list is None: file_ids: list[int] = [] hashes: list[str] = [] if freeform_union_search: if not title_predicates and not freeform_predicates: debug(f"{prefix} 0 result(s)") return [] payloads: list[Any] = [] try: payloads.append( client.search_files( tags=title_predicates, return_hashes=True, return_file_ids=True, ) ) except Exception: pass try: payloads.append( client.search_files( tags=freeform_predicates, return_hashes=True, return_file_ids=True, ) ) except Exception: pass id_set: set[int] = set() hash_set: set[str] = set() for payload in payloads: ids_part, hashes_part = _extract_search_ids(payload) for fid in ids_part: id_set.add(fid) for hh in hashes_part: hash_set.add(hh) file_ids = list(id_set) hashes = list(hash_set) else: if not tags: debug(f"{prefix} 0 result(s)") return [] search_result = client.search_files( tags=tags, return_hashes=True, return_file_ids=True ) file_ids, hashes = _extract_search_ids(search_result) # Fast path: ext-only search. Avoid fetching metadata for an unbounded # system:everything result set; fetch in chunks until we have enough. if ext_only and ext_filter: results: list[dict[str, Any]] = [] if not file_ids and not hashes: debug(f"{prefix} 0 result(s)") return [] # Prefer file_ids if available. if file_ids: chunk_size = 200 for start in range(0, len(file_ids), chunk_size): if len(results) >= limit: break chunk = file_ids[start:start + chunk_size] try: payload = client.fetch_file_metadata( file_ids=chunk, include_service_keys_to_tags=True, include_file_url=False, include_duration=True, include_size=True, include_mime=True, ) except Exception: continue metas = payload.get("metadata", []) if isinstance(payload, dict) else [] if not isinstance(metas, list): continue for meta in metas: if len(results) >= limit: break if not isinstance(meta, dict): continue mime_type = meta.get("mime") ext = str(meta.get("ext") or "").strip().lstrip(".") if not ext and mime_type: for category in mime_maps.values(): for _ext_key, info in category.items(): if mime_type in info.get("mimes", []): ext = str(info.get("ext", "") ).strip().lstrip(".") break if ext: break if _normalize_ext_filter(ext) != ext_filter: continue file_id = meta.get("file_id") hash_hex = meta.get("hash") size = meta.get("size", 0) tags_set = meta.get("tags", {}) all_tags: list[str] = [] title = f"Hydrus File {file_id}" if isinstance(tags_set, dict): def _collect(tag_list: Any) -> None: nonlocal title if not isinstance(tag_list, list): return for tag in tag_list: tag_text = str(tag) if tag else "" if not tag_text: continue tag_l = tag_text.strip().lower() if not tag_l: continue all_tags.append(tag_l) if (tag_l.startswith("title:") and title == f"Hydrus File {file_id}"): title = tag_l.split(":", 1)[1].strip() for _service_name, service_tags in tags_set.items(): if not isinstance(service_tags, dict): continue storage_tags = service_tags.get( "storage_tags", {} ) if isinstance(storage_tags, dict): for tag_list in storage_tags.values(): _collect(tag_list) display_tags = service_tags.get( "display_tags", [] ) _collect(display_tags) file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}" results.append( { "hash": hash_hex, "url": file_url, "name": title, "title": title, "size": size, "size_bytes": size, "store": self.NAME, "tag": all_tags, "file_id": file_id, "mime": mime_type, "ext": ext, } ) debug(f"{prefix} {len(results)} result(s)") return results[:limit] # If we only got hashes, fall back to the normal flow below. if not file_ids and not hashes: debug(f"{prefix} 0 result(s)") return [] if file_ids: metadata = client.fetch_file_metadata( file_ids=file_ids, include_service_keys_to_tags=True, include_file_url=False, include_duration=True, include_size=True, include_mime=True, ) metadata_list = metadata.get("metadata", []) elif hashes: metadata = client.fetch_file_metadata( hashes=hashes, include_service_keys_to_tags=True, include_file_url=False, include_duration=True, include_size=True, include_mime=True, ) metadata_list = metadata.get("metadata", []) else: metadata_list = [] # If our free-text searches produce nothing (or nothing survived downstream filtering), fallback to scanning. if (not metadata_list) and (query_lower != "*") and (":" not in query_lower): try: search_result = client.search_files( tags=["system:everything"], return_hashes=True, return_file_ids=True, ) file_ids, hashes = _extract_search_ids(search_result) if file_ids: metadata = client.fetch_file_metadata( file_ids=file_ids, include_service_keys_to_tags=True, include_file_url=False, include_duration=True, include_size=True, include_mime=True, ) metadata_list = metadata.get("metadata", []) elif hashes: metadata = client.fetch_file_metadata( hashes=hashes, include_service_keys_to_tags=True, include_file_url=False, include_duration=True, include_size=True, include_mime=True, ) metadata_list = metadata.get("metadata", []) except Exception: pass if not isinstance(metadata_list, list): metadata_list = [] for meta in metadata_list: if len(results) >= limit: break file_id = meta.get("file_id") hash_hex = meta.get("hash") size = meta.get("size", 0) # Get tags for this file and extract title tags_set = meta.get("tags", {}) all_tags = [] title = f"Hydrus File {file_id}" # Default fallback all_tags_str = "" # For substring matching # debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}") if isinstance(tags_set, dict): # Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen def _collect(tag_list: Any) -> None: nonlocal title, all_tags_str if not isinstance(tag_list, list): return for tag in tag_list: tag_text = str(tag) if tag else "" if not tag_text: continue tag_l = tag_text.strip().lower() if not tag_l: continue all_tags.append(tag_l) all_tags_str += " " + tag_l if tag_l.startswith("title:" ) and title == f"Hydrus File {file_id}": title = tag_l.split(":", 1)[1].strip() for _service_name, service_tags in tags_set.items(): if not isinstance(service_tags, dict): continue storage_tags = service_tags.get("storage_tags", {}) if isinstance(storage_tags, dict): for tag_list in storage_tags.values(): _collect(tag_list) display_tags = service_tags.get("display_tags", []) _collect(display_tags) # Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it) top_level_tags = meta.get("tags_flat", []) or meta.get("tags", []) _collect(top_level_tags) # Prefer Hydrus-provided extension (e.g. ".webm"); fall back to MIME map. mime_type = meta.get("mime") ext = str(meta.get("ext") or "").strip().lstrip(".") if not ext and mime_type: for category in mime_maps.values(): for _ext_key, info in category.items(): if mime_type in info.get("mimes", []): ext = str(info.get("ext", "")).strip().lstrip(".") break if ext: break # Filter results based on query type # If user provided explicit namespace (has ':'), don't do substring filtering # Just include what the tag search returned has_namespace = ":" in query_lower if has_namespace: # Explicit namespace search - already filtered by Hydrus tag search # Include this result as-is file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}" results.append( { "hash": hash_hex, "url": file_url, "name": title, "title": title, "size": size, "size_bytes": size, "store": self.NAME, "tag": all_tags, "file_id": file_id, "mime": mime_type, "ext": ext, } ) else: # Free-form search: check if search terms match title or FREEFORM tags. # Do NOT implicitly match other namespace tags (except title:). freeform_tags = [ t for t in all_tags if isinstance(t, str) and t and (":" not in t) ] searchable_text = (title + " " + " ".join(freeform_tags)).lower() match = True if query_lower != "*" and search_terms: for term in search_terms: if term not in searchable_text: match = False break if match: file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}" results.append( { "hash": hash_hex, "url": file_url, "name": title, "title": title, "size": size, "size_bytes": size, "store": self.NAME, "tag": all_tags, "file_id": file_id, "mime": mime_type, "ext": ext, } ) debug(f"{prefix} {len(results)} result(s)") if ext_filter: wanted = ext_filter filtered: list[dict[str, Any]] = [] for item in results: try: if _normalize_ext_filter(str(item.get("ext") or "")) == wanted: filtered.append(item) except Exception: continue results = filtered return results[:limit] except Exception as exc: log(f"❌ Hydrus search failed: {exc}", file=sys.stderr) import traceback traceback.print_exc(file=sys.stderr) raise def get_file(self, file_hash: str, **kwargs: Any) -> Path | str | None: """Return a browser URL for the file. IMPORTANT: this method must be side-effect free (do not auto-open a browser). Only explicit user actions (e.g. the get-file cmdlet) should open files. """ debug(f"{self._log_prefix()} get_file: start hash={file_hash[:12]}...") # Build browser URL with access key base_url = str(self.URL).rstrip("/") access_key = str(self.API) browser_url = ( f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}" ) debug(f"{self._log_prefix()} get_file: url={browser_url}") return browser_url def delete_file(self, file_identifier: str, **kwargs: Any) -> bool: """Delete a file from Hydrus, then clear the deletion record. This is used by the delete-file cmdlet when the item belongs to a HydrusNetwork store. """ try: client = self._client if client is None: debug(f"{self._log_prefix()} delete_file: client unavailable") return False file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): debug( f"{self._log_prefix()} delete_file: invalid file hash '{file_identifier}'" ) return False reason = kwargs.get("reason") reason_text = ( str(reason).strip() if isinstance(reason, str) and reason.strip() else None ) # 1) Delete file client.delete_files([file_hash], reason=reason_text) # 2) Clear deletion record (best-effort) try: client.clear_file_deletion_record([file_hash]) except Exception as exc: debug( f"{self._log_prefix()} delete_file: clear_file_deletion_record failed: {exc}" ) return True except Exception as exc: debug(f"{self._log_prefix()} delete_file failed: {exc}") return False def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]: """Get metadata for a file from Hydrus by hash. Args: file_hash: SHA256 hash of the file (64-char hex string) Returns: Dict with metadata fields or None if not found """ try: client = self._client if not client: debug(f"{self._log_prefix()} get_metadata: client unavailable") return None # Fetch file metadata with the fields we need for CLI display. payload = client.fetch_file_metadata( hashes=[file_hash], include_service_keys_to_tags=True, include_file_url=True, include_duration=True, include_size=True, include_mime=True, ) if not payload or not payload.get("metadata"): return None meta = payload["metadata"][0] # Hydrus can return placeholder metadata rows for unknown hashes. if not isinstance(meta, dict) or meta.get("file_id") is None: return None # Extract title from tags title = f"Hydrus_{file_hash[:12]}" tags_payload = meta.get("tags", {}) if isinstance(tags_payload, dict): for service_data in tags_payload.values(): if isinstance(service_data, dict): display_tags = service_data.get("display_tags", {}) if isinstance(display_tags, dict): current_tags = display_tags.get("0", []) if isinstance(current_tags, list): for tag in current_tags: if str(tag).lower().startswith("title:"): title = tag.split(":", 1)[1].strip() break if title != f"Hydrus_{file_hash[:12]}": break # Hydrus may return mime as an int enum, or sometimes a human label. mime_val = meta.get("mime") filetype_human = ( meta.get("filetype_human") or meta.get("mime_human") or meta.get("mime_string") ) # Determine ext: prefer Hydrus metadata ext, then filetype_human (when it looks like an ext), # then title suffix, then file path suffix. ext = str(meta.get("ext") or "").strip().lstrip(".") if not ext: ft = str(filetype_human or "").strip().lstrip(".").lower() if ft and ft != "unknown filetype" and ft.isalnum() and len(ft) <= 8: # Treat simple labels like "mp4", "m4a", "webm" as extensions. ext = ft if not ext and isinstance(title, str) and "." in title: try: ext = Path(title).suffix.lstrip(".") except Exception: ext = "" if not ext: try: path_payload = client.get_file_path(file_hash) if isinstance(path_payload, dict): p = path_payload.get("path") if isinstance(p, str) and p.strip(): ext = Path(p.strip()).suffix.lstrip(".") except Exception: ext = "" # If extension is still unknown, attempt a best-effort lookup from MIME. def _mime_from_ext(ext_value: str) -> str: ext_clean = str(ext_value or "").strip().lstrip(".").lower() if not ext_clean: return "" try: for category in mime_maps.values(): info = category.get(ext_clean) if isinstance(info, dict): mimes = info.get("mimes") if isinstance(mimes, list) and mimes: first = mimes[0] return str(first) except Exception: return "" return "" # Normalize to a MIME string for CLI output. # Avoid passing through human labels like "unknown filetype". mime_type = "" if isinstance(mime_val, str): candidate = mime_val.strip() if "/" in candidate and candidate.lower() != "unknown filetype": mime_type = candidate if not mime_type and isinstance(filetype_human, str): candidate = filetype_human.strip() if "/" in candidate and candidate.lower() != "unknown filetype": mime_type = candidate if not mime_type: mime_type = _mime_from_ext(ext) # Normalize size/duration to stable scalar types. size_val = meta.get("size") if size_val is None: size_val = meta.get("size_bytes") try: size_int: int | None = int(size_val) if size_val is not None else None except Exception: size_int = None dur_val = meta.get("duration") if dur_val is None: dur_val = meta.get("duration_ms") try: dur_int: int | None = int(dur_val) if dur_val is not None else None except Exception: dur_int = None raw_urls = meta.get("known_urls") or meta.get("urls") or meta.get("url" ) or [] url_list: list[str] = [] if isinstance(raw_urls, str): s = raw_urls.strip() url_list = [s] if s else [] elif isinstance(raw_urls, list): url_list = [ str(u).strip() for u in raw_urls if isinstance(u, str) and str(u).strip() ] return { "hash": file_hash, "title": title, "ext": ext, "size": size_int, "mime": mime_type, # Keep raw fields available for troubleshooting/other callers. "hydrus_mime": mime_val, "filetype_human": filetype_human, "duration_ms": dur_int, "url": url_list, } except Exception as exc: debug(f"{self._log_prefix()} get_metadata failed: {exc}") return None def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: """Get tags for a file from Hydrus by hash. Args: file_identifier: File hash (SHA256 hex string) **kwargs: Optional service_name parameter Returns: Tuple of (tags_list, source_description) where source is always "hydrus" """ try: from API import HydrusNetwork as hydrus_wrapper file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): debug( f"{self._log_prefix()} get_tags: invalid file hash '{file_identifier}'" ) return [], "unknown" # Get Hydrus client and service info client = self._client if not client: debug(f"{self._log_prefix()} get_tags: client unavailable") return [], "unknown" # Fetch file metadata payload = client.fetch_file_metadata( hashes=[file_hash], include_service_keys_to_tags=True, include_file_url=False ) items = payload.get("metadata") if isinstance(payload, dict) else None if not isinstance(items, list) or not items: debug( f"{self._log_prefix()} get_tags: no metadata for hash {file_hash}" ) return [], "unknown" meta = items[0] if isinstance(items[0], dict) else None if not isinstance(meta, dict) or meta.get("file_id") is None: debug( f"{self._log_prefix()} get_tags: invalid metadata for hash {file_hash}" ) return [], "unknown" # Extract tags using service name service_name = "my tags" service_key = hydrus_wrapper.get_tag_service_key(client, service_name) # Extract tags from metadata tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name) return [ str(t).strip().lower() for t in tags if isinstance(t, str) and t.strip() ], "hydrus" except Exception as exc: debug(f"{self._log_prefix()} get_tags failed: {exc}") return [], "unknown" def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: """Add tags to a Hydrus file.""" try: client = self._client if client is None: debug(f"{self._log_prefix()} add_tag: client unavailable") return False file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): debug( f"{self._log_prefix()} add_tag: invalid file hash '{file_identifier}'" ) return False service_name = kwargs.get("service_name") or "my tags" incoming_tags = [ str(t).strip().lower() for t in (tags or []) if isinstance(t, str) and str(t).strip() ] if not incoming_tags: return True try: existing_tags, _src = self.get_tag(file_hash) except Exception: existing_tags = [] from metadata import compute_namespaced_tag_overwrite tags_to_remove, tags_to_add, _merged = compute_namespaced_tag_overwrite( existing_tags, incoming_tags ) if not tags_to_add and not tags_to_remove: return True did_any = False if tags_to_remove: try: client.delete_tag(file_hash, tags_to_remove, service_name) did_any = True except Exception as exc: debug(f"{self._log_prefix()} add_tag: delete_tag failed: {exc}") if tags_to_add: try: client.add_tag(file_hash, tags_to_add, service_name) did_any = True except Exception as exc: debug(f"{self._log_prefix()} add_tag: add_tag failed: {exc}") return did_any except Exception as exc: debug(f"{self._log_prefix()} add_tag failed: {exc}") return False def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: """Delete tags from a Hydrus file.""" try: client = self._client if client is None: debug(f"{self._log_prefix()} delete_tag: client unavailable") return False file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): debug( f"{self._log_prefix()} delete_tag: invalid file hash '{file_identifier}'" ) return False service_name = kwargs.get("service_name") or "my tags" raw_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] tag_list = [ str(t).strip().lower() for t in raw_list if isinstance(t, str) and str(t).strip() ] if not tag_list: return False client.delete_tag(file_hash, tag_list, service_name) return True except Exception as exc: debug(f"{self._log_prefix()} delete_tag failed: {exc}") return False def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: """Get known url for a Hydrus file.""" try: client = self._client file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): return [] payload = client.fetch_file_metadata( hashes=[file_hash], include_file_url=False ) items = payload.get("metadata") if isinstance(payload, dict) else None if not isinstance(items, list) or not items: return [] meta = items[0] if isinstance(items[0], dict) else {} raw_urls: Any = meta.get("known_urls" ) or meta.get("urls") or meta.get("url") or [] if isinstance(raw_urls, str): val = raw_urls.strip() return [val] if val else [] if isinstance(raw_urls, list): out: list[str] = [] for u in raw_urls: if not isinstance(u, str): continue u = u.strip() if u: out.append(u) return out return [] except Exception as exc: debug(f"{self._log_prefix()} get_url failed: {exc}") return [] def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: """Associate one or more url with a Hydrus file.""" try: client = self._client if client is None: debug(f"{self._log_prefix()} add_url: client unavailable") return False for u in url: client.associate_url(file_identifier, u) return True except Exception as exc: debug(f"{self._log_prefix()} add_url failed: {exc}") return False def add_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool: """Bulk associate urls with Hydrus files. This is a best-effort convenience wrapper used by cmdlets to batch url associations. Hydrus' client API is still called per (hash,url) pair, but this consolidates the cmdlet-level control flow so url association can be deferred until the end. """ try: client = self._client if client is None: debug(f"{self._log_prefix()} add_url_bulk: client unavailable") return False any_success = False for file_identifier, urls in items or []: h = str(file_identifier or "").strip().lower() if len(h) != 64: continue for u in urls or []: s = str(u or "").strip() if not s: continue try: client.associate_url(h, s) any_success = True except Exception: continue return any_success except Exception as exc: debug(f"{self._log_prefix()} add_url_bulk failed: {exc}") return False def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: """Delete one or more url from a Hydrus file.""" try: client = self._client if client is None: debug(f"{self._log_prefix()} delete_url: client unavailable") return False for u in url: client.delete_url(file_identifier, u) return True except Exception as exc: debug(f"{self._log_prefix()} delete_url failed: {exc}") return False def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]: """Get notes for a Hydrus file (default note service only).""" try: client = self._client if client is None: debug(f"{self._log_prefix()} get_note: client unavailable") return {} file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): return {} payload = client.fetch_file_metadata(hashes=[file_hash], include_notes=True) items = payload.get("metadata") if isinstance(payload, dict) else None if not isinstance(items, list) or not items: return {} meta = items[0] if isinstance(items[0], dict) else None if not isinstance(meta, dict): return {} notes_payload = meta.get("notes") if isinstance(notes_payload, dict): return { str(k): str(v or "") for k, v in notes_payload.items() if str(k).strip() } return {} except Exception as exc: debug(f"{self._log_prefix()} get_note failed: {exc}") return {} def set_note( self, file_identifier: str, name: str, text: str, **kwargs: Any ) -> bool: """Set a named note for a Hydrus file (default note service only).""" try: client = self._client if client is None: debug(f"{self._log_prefix()} set_note: client unavailable") return False file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): return False note_name = str(name or "").strip() if not note_name: return False note_text = str(text or "") client.set_notes(file_hash, { note_name: note_text }) return True except Exception as exc: debug(f"{self._log_prefix()} set_note failed: {exc}") return False def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool: """Delete a named note for a Hydrus file (default note service only).""" try: client = self._client if client is None: debug(f"{self._log_prefix()} delete_note: client unavailable") return False file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): return False note_name = str(name or "").strip() if not note_name: return False client.delete_notes(file_hash, [note_name]) return True except Exception as exc: debug(f"{self._log_prefix()} delete_note failed: {exc}") return False @staticmethod def _extract_tags_from_hydrus_meta( meta: Dict[str, Any], service_key: Optional[str], service_name: str ) -> List[str]: """Extract current tags from Hydrus metadata dict. Prefers display_tags (includes siblings/parents, excludes deleted). Falls back to storage_tags status '0' (current). """ tags_payload = meta.get("tags") if not isinstance(tags_payload, dict): return [] svc_data = None if service_key: svc_data = tags_payload.get(service_key) if not isinstance(svc_data, dict): return [] # Prefer display_tags (Hydrus computes siblings/parents) display = svc_data.get("display_tags") if isinstance(display, list) and display: return [ str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip() ] # Fallback to storage_tags status '0' (current) storage = svc_data.get("storage_tags") if isinstance(storage, dict): current_list = storage.get("0") or storage.get(0) if isinstance(current_list, list): return [ str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip() ] return []