from __future__ import annotations import re import sys import tempfile import shutil from pathlib import Path from typing import Any, Dict, List, Optional, Tuple from urllib.parse import quote import httpx from SYS.logger import debug, log from SYS.utils_constant import mime_maps from Store._base import Store _HYDRUS_INIT_CHECK_CACHE: dict[tuple[str, str], tuple[bool, Optional[str]]] = {} class HydrusNetwork(Store): """File storage backend for Hydrus client. Each instance represents a specific Hydrus client connection. Maintains its own HydrusClient. """ def _log_prefix(self) -> str: store_name = getattr(self, "NAME", None) or "unknown" return f"[hydrusnetwork:{store_name}]" def __new__(cls, *args: Any, **kwargs: Any) -> "HydrusNetwork": instance = super().__new__(cls) name = kwargs.get("NAME") api = kwargs.get("API") url = kwargs.get("URL") if name is not None: setattr(instance, "NAME", str(name)) if api is not None: setattr(instance, "API", str(api)) if url is not None: setattr(instance, "URL", str(url)) return instance setattr(__new__, "keys", ("NAME", "API", "URL")) def __init__( self, instance_name: Optional[str] = None, api_key: Optional[str] = None, url: Optional[str] = None, *, NAME: Optional[str] = None, API: Optional[str] = None, URL: Optional[str] = None, ) -> None: """Initialize Hydrus storage backend. Args: instance_name: Name of this Hydrus instance (e.g., 'home', 'work') api_key: Hydrus Client API access key url: Hydrus client URL (e.g., 'http://192.168.1.230:45869') """ from API.HydrusNetwork import HydrusNetwork as HydrusClient if instance_name is None and NAME is not None: instance_name = str(NAME) if api_key is None and API is not None: api_key = str(API) if url is None and URL is not None: url = str(URL) if not instance_name or not api_key or not url: raise ValueError("HydrusNetwork requires NAME, API, and URL") self.NAME = instance_name self.API = api_key self.URL = url.rstrip("/") # Total count (best-effort, used for startup diagnostics) self.total_count: Optional[int] = None # Self health-check: validate the URL is reachable and the access key is accepted. # This MUST NOT attempt to acquire a session key. cache_key = (self.URL, self.API) cached = _HYDRUS_INIT_CHECK_CACHE.get(cache_key) if cached is not None: ok, err = cached if not ok: raise RuntimeError( f"Hydrus '{self.NAME}' unavailable: {err or 'Unavailable'}" ) else: api_version_url = f"{self.URL}/api_version" verify_key_url = f"{self.URL}/verify_access_key" try: with httpx.Client(timeout=5.0, verify=False, follow_redirects=True) as client: version_resp = client.get(api_version_url) version_resp.raise_for_status() version_payload = version_resp.json() if not isinstance(version_payload, dict): raise RuntimeError( "Hydrus /api_version returned an unexpected response" ) verify_resp = client.get( verify_key_url, headers={ "Hydrus-Client-API-Access-Key": self.API }, ) verify_resp.raise_for_status() verify_payload = verify_resp.json() if not isinstance(verify_payload, dict): raise RuntimeError( "Hydrus /verify_access_key returned an unexpected response" ) _HYDRUS_INIT_CHECK_CACHE[cache_key] = (True, None) except Exception as exc: err = str(exc) _HYDRUS_INIT_CHECK_CACHE[cache_key] = (False, err) raise RuntimeError(f"Hydrus '{self.NAME}' unavailable: {err}") from exc # Create a persistent client for this instance (auth via access key by default). self._client = HydrusClient( url=self.URL, access_key=self.API, instance_name=self.NAME ) self._service_key_cache: Dict[str, Optional[str]] = {} # Best-effort total count (used for startup diagnostics). Avoid heavy payloads. # Some Hydrus setups appear to return no count via the CBOR client for this endpoint, # so prefer a direct JSON request with a short timeout. # NOTE: Disabled to avoid unnecessary API call during init; count will be retrieved on first search/list if needed. # try: # self.get_total_count(refresh=True) # except Exception: # pass def _get_service_key(self, service_name: str, *, refresh: bool = False) -> Optional[str]: """Resolve (and cache) the Hydrus service key for the given service name.""" normalized = str(service_name or "my tags").strip() if not normalized: normalized = "my tags" cache_key = normalized.lower() if not refresh and cache_key in self._service_key_cache: return self._service_key_cache[cache_key] client = self._client if client is None: self._service_key_cache[cache_key] = None return None try: from API import HydrusNetwork as hydrus_wrapper resolved = hydrus_wrapper.get_tag_service_key(client, normalized) except Exception: resolved = None self._service_key_cache[cache_key] = resolved return resolved def get_total_count(self, *, refresh: bool = False) -> Optional[int]: """Best-effort total file count for this Hydrus instance. Intended for diagnostics (e.g., REPL startup checks). This should be fast, and it MUST NOT raise. """ if self.total_count is not None and not refresh: return self.total_count # 1) Prefer a direct JSON request (fast + avoids CBOR edge cases). try: import json as _json url = f"{self.URL}/get_files/search_files" params = { "tags": _json.dumps(["system:everything"]), "return_hashes": "false", "return_file_ids": "false", "return_file_count": "true", } headers = { "Hydrus-Client-API-Access-Key": self.API, "Accept": "application/json", } with httpx.Client(timeout=5.0, verify=False, follow_redirects=True) as client: resp = client.get(url, params=params, headers=headers) resp.raise_for_status() payload = resp.json() count_val = None if isinstance(payload, dict): count_val = payload.get("file_count") if count_val is None: count_val = payload.get("file_count_inclusive") if count_val is None: count_val = payload.get("num_files") if isinstance(count_val, int): self.total_count = count_val return self.total_count except Exception as exc: debug( f"{self._log_prefix()} total count (json) unavailable: {exc}", file=sys.stderr ) # 2) Fallback to the API client (CBOR). try: payload = self._client.search_files( tags=["system:everything"], return_hashes=False, return_file_ids=False, return_file_count=True, ) count_val = None if isinstance(payload, dict): count_val = payload.get("file_count") if count_val is None: count_val = payload.get("file_count_inclusive") if count_val is None: count_val = payload.get("num_files") if isinstance(count_val, int): self.total_count = count_val return self.total_count except Exception as exc: debug( f"{self._log_prefix()} total count (client) unavailable: {exc}", file=sys.stderr ) return self.total_count def name(self) -> str: return self.NAME def get_name(self) -> str: return self.NAME def set_relationship(self, alt_hash: str, king_hash: str, kind: str = "alt") -> bool: """Persist a relationship via the Hydrus client API for this backend instance.""" try: alt_norm = str(alt_hash or "").strip().lower() king_norm = str(king_hash or "").strip().lower() if len(alt_norm) != 64 or len(king_norm) != 64 or alt_norm == king_norm: return False client = getattr(self, "_client", None) if client is None or not hasattr(client, "set_relationship"): return False client.set_relationship(alt_norm, king_norm, str(kind or "alt")) return True except Exception: return False def add_file(self, file_path: Path, **kwargs: Any) -> str: """Upload file to Hydrus with full metadata support. Args: file_path: Path to the file to upload tag: Optional list of tag values to add url: Optional list of url to associate with the file title: Optional title (will be added as 'title:value' tag) Returns: File hash from Hydrus Raises: Exception: If upload fails """ from SYS.utils import sha256_file tag_list = kwargs.get("tag", []) url = kwargs.get("url", []) title = kwargs.get("title") # Add title to tags if provided and not already present if title: title_tag = f"title:{title}".strip().lower() if not any(str(candidate).lower().startswith("title:") for candidate in tag_list): tag_list = [title_tag] + list(tag_list) # Hydrus is lowercase-only tags; normalize here for consistency. tag_list = [ str(t).strip().lower() for t in (tag_list or []) if isinstance(t, str) and str(t).strip() ] try: # Compute file hash file_hash = sha256_file(file_path) debug(f"{self._log_prefix()} file hash: {file_hash}") # Use persistent client with session key client = self._client if client is None: raise Exception("Hydrus client unavailable") # Check if file already exists in Hydrus file_exists = False try: metadata = client.fetch_file_metadata( hashes=[file_hash], include_service_keys_to_tags=False, include_file_url=True, include_duration=False, include_size=False, include_mime=False, ) if metadata and isinstance(metadata, dict): metas = metadata.get("metadata", []) if isinstance(metas, list) and metas: # Hydrus returns placeholder rows for unknown hashes. # Only treat as a real duplicate if it has a concrete file_id. for meta in metas: if isinstance(meta, dict) and meta.get("file_id") is not None: file_exists = True break if file_exists: debug( f"{self._log_prefix()} Duplicate detected - file already in Hydrus with hash: {file_hash}" ) except Exception: pass # If Hydrus reports an existing file, it may be in trash. Best-effort restore it to 'my files'. # This keeps behavior aligned with user expectation: "use API only" and ensure it lands in my files. if file_exists: try: client.undelete_files([file_hash]) except Exception: pass # Upload file if not already present if not file_exists: debug( f"{self._log_prefix()} Uploading: {file_path.name}" ) response = client.add_file(file_path) # Extract hash from response hydrus_hash: Optional[str] = None if isinstance(response, dict): hydrus_hash = response.get("hash") or response.get("file_hash") if not hydrus_hash: hashes = response.get("hashes") if isinstance(hashes, list) and hashes: hydrus_hash = hashes[0] if not hydrus_hash: raise Exception(f"Hydrus response missing file hash: {response}") file_hash = hydrus_hash debug(f"{self._log_prefix()} hash: {file_hash}") # Add tags if provided (both for new and existing files) if tag_list: try: # Use default tag service service_name = "my tags" except Exception: service_name = "my tags" try: debug( f"{self._log_prefix()} Adding {len(tag_list)} tag(s): {tag_list}" ) client.add_tag(file_hash, tag_list, service_name) debug( f"{self._log_prefix()} Tags added via '{service_name}'" ) except Exception as exc: log( f"{self._log_prefix()} ⚠️ Failed to add tags: {exc}", file=sys.stderr ) # Associate url if provided (both for new and existing files) if url: debug( f"{self._log_prefix()} Associating {len(url)} URL(s) with file" ) for url in url: if url: try: client.associate_url(file_hash, str(url)) debug(f"{self._log_prefix()} Associated URL: {url}") except Exception as exc: log( f"{self._log_prefix()} ⚠️ Failed to associate URL {url}: {exc}", file=sys.stderr, ) return file_hash except Exception as exc: log(f"{self._log_prefix()} ❌ upload failed: {exc}", file=sys.stderr) raise def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]: """Search Hydrus database for files matching query. Args: query: Search query (tags, filenames, hashes, etc.) limit: Maximum number of results to return (default: 100) Returns: List of dicts with 'name', 'hash', 'size', 'tags' fields Example: results = storage["hydrus"].search("artist:john_doe music") results = storage["hydrus"].search("Simple Man") """ limit = kwargs.get("limit", 100) try: client = self._client if client is None: raise Exception("Hydrus client unavailable") prefix = self._log_prefix() debug(f"{prefix} Searching for: {query}") def _extract_urls(meta_obj: Any) -> list[str]: if not isinstance(meta_obj, dict): return [] raw = meta_obj.get("url") if raw is None: raw = meta_obj.get("urls") if isinstance(raw, str): val = raw.strip() return [val] if val else [] if isinstance(raw, list): out: list[str] = [] for item in raw: if not isinstance(item, str): continue s = item.strip() if s: out.append(s) return out return [] def _iter_url_filtered_metadata( url_value: str | None, want_any: bool, fetch_limit: int ) -> list[dict[str, Any]]: """Best-effort URL search by scanning Hydrus metadata with include_file_url=True.""" # First try a fast system predicate if Hydrus supports it. candidate_file_ids: list[int] = [] try: if want_any: predicate = "system:has url" url_search = client.search_files( tags=[predicate], return_hashes=False, return_file_ids=True, return_file_count=False, ) ids = url_search.get("file_ids", []) if isinstance(url_search, dict) else [] if isinstance(ids, list): candidate_file_ids = [ int(x) for x in ids if isinstance(x, (int, float, str)) and str(x).strip().isdigit() ] except Exception: candidate_file_ids = [] if not candidate_file_ids: # Fallback: scan from system:everything and filter by URL substring. everything = client.search_files( tags=["system:everything"], return_hashes=False, return_file_ids=True, return_file_count=False, ) ids = everything.get("file_ids", []) if isinstance(everything, dict) else [] if isinstance(ids, list): candidate_file_ids = [ int(x) for x in ids if isinstance(x, (int, float)) ] if not candidate_file_ids: return [] needle = (url_value or "").strip().lower() chunk_size = 200 out: list[dict[str, Any]] = [] for start in range(0, len(candidate_file_ids), chunk_size): if len(out) >= fetch_limit: break chunk = candidate_file_ids[start:start + chunk_size] try: payload = client.fetch_file_metadata( file_ids=chunk, include_file_url=True, include_service_keys_to_tags=True, include_duration=True, include_size=True, include_mime=True, ) except Exception: continue metas = payload.get("metadata", []) if isinstance(payload, dict) else [] if not isinstance(metas, list): continue for meta in metas: if not isinstance(meta, dict): continue urls = _extract_urls(meta) if not urls: continue if want_any: out.append(meta) if len(out) >= fetch_limit: break continue if not needle: continue if any(needle in u.lower() for u in urls): out.append(meta) if len(out) >= fetch_limit: break return out query_lower = query.lower().strip() # Support `ext:` anywhere in the query. We filter results by the # Hydrus metadata extension field. def _normalize_ext_filter(value: str) -> str: v = str(value or "").strip().lower().lstrip(".") v = "".join(ch for ch in v if ch.isalnum()) return v ext_filter: str | None = None ext_only: bool = False try: m = re.search(r"\bext:([^\s,]+)", query_lower) if not m: m = re.search(r"\bextension:([^\s,]+)", query_lower) if m: ext_filter = _normalize_ext_filter(m.group(1)) or None query_lower = re.sub( r"\s*\b(?:ext|extension):[^\s,]+", " ", query_lower ) query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(",") query = query_lower if ext_filter and not query_lower: query = "*" query_lower = "*" ext_only = True except Exception: ext_filter = None ext_only = False # Split into meaningful terms for AND logic. # Avoid punctuation tokens like '-' that would make matching brittle. search_terms = [t for t in re.findall(r"[a-z0-9]+", query_lower) if t] # Special case: url:* and url: metadata_list: list[dict[str, Any]] | None = None if ":" in query_lower and not query_lower.startswith(":"): namespace, pattern = query_lower.split(":", 1) namespace = namespace.strip().lower() pattern = pattern.strip() if namespace == "url": if not pattern or pattern == "*": metadata_list = _iter_url_filtered_metadata( None, want_any=True, fetch_limit=int(limit) if limit else 100 ) else: # Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided. try: if pattern.startswith("http://") or pattern.startswith( "https://"): from API.HydrusNetwork import HydrusRequestSpec spec = HydrusRequestSpec( method="GET", endpoint="/add_urls/get_url_files", query={ "url": pattern }, ) response = client._perform_request( spec ) # type: ignore[attr-defined] hashes: list[str] = [] file_ids: list[int] = [] if isinstance(response, dict): raw_hashes = response.get("hashes") or response.get( "file_hashes" ) if isinstance(raw_hashes, list): hashes = [ str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip() ] raw_ids = response.get("file_ids") if isinstance(raw_ids, list): for item in raw_ids: try: file_ids.append(int(item)) except (TypeError, ValueError): continue if file_ids: payload = client.fetch_file_metadata( file_ids=file_ids, include_file_url=True, include_service_keys_to_tags=True, include_duration=True, include_size=True, include_mime=True, ) metas = ( payload.get("metadata", []) if isinstance(payload, dict) else [] ) if isinstance(metas, list): metadata_list = [ m for m in metas if isinstance(m, dict) ] elif hashes: payload = client.fetch_file_metadata( hashes=hashes, include_file_url=True, include_service_keys_to_tags=True, include_duration=True, include_size=True, include_mime=True, ) metas = ( payload.get("metadata", []) if isinstance(payload, dict) else [] ) if isinstance(metas, list): metadata_list = [ m for m in metas if isinstance(m, dict) ] except Exception: metadata_list = None # Fallback: substring scan if metadata_list is None: metadata_list = _iter_url_filtered_metadata( pattern, want_any=False, fetch_limit=int(limit) if limit else 100 ) # Parse the query into tags # "*" means "match all" - use system:everything tag in Hydrus # If query has explicit namespace, use it as a tag search. # If query is free-form, search BOTH: # - title:*term* (title: is the only namespace searched implicitly) # - *term* (freeform tags; we will filter out other namespace matches client-side) tags: list[str] = [] freeform_union_search: bool = False title_predicates: list[str] = [] freeform_predicates: list[str] = [] if query.strip() == "*": tags = ["system:everything"] elif ":" in query_lower: tags = [query_lower] else: freeform_union_search = True if search_terms: # Hydrus supports wildcard matching primarily as a prefix (e.g., tag*). # Use per-term prefix matching for both title: and freeform tags. title_predicates = [f"title:{term}*" for term in search_terms] freeform_predicates = [f"{term}*" for term in search_terms] else: # If we can't extract alnum terms, fall back to the raw query text. title_predicates = [f"title:{query_lower}*"] freeform_predicates = [f"{query_lower}*"] # Search files with the tags (unless url: search already produced metadata) results = [] def _extract_search_ids(payload: Any) -> tuple[list[int], list[str]]: if not isinstance(payload, dict): return [], [] raw_ids = payload.get("file_ids", []) raw_hashes = payload.get("hashes", []) ids_out: list[int] = [] hashes_out: list[str] = [] if isinstance(raw_ids, list): for item in raw_ids: try: ids_out.append(int(item)) except (TypeError, ValueError): continue if isinstance(raw_hashes, list): hashes_out = [ str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip() ] return ids_out, hashes_out if metadata_list is None: file_ids: list[int] = [] hashes: list[str] = [] if freeform_union_search: if not title_predicates and not freeform_predicates: debug(f"{prefix} 0 result(s)") return [] payloads: list[Any] = [] try: payloads.append( client.search_files( tags=title_predicates, return_hashes=True, return_file_ids=True, ) ) except Exception: pass try: payloads.append( client.search_files( tags=freeform_predicates, return_hashes=True, return_file_ids=True, ) ) except Exception: pass id_set: set[int] = set() hash_set: set[str] = set() for payload in payloads: ids_part, hashes_part = _extract_search_ids(payload) for fid in ids_part: id_set.add(fid) for hh in hashes_part: hash_set.add(hh) file_ids = list(id_set) hashes = list(hash_set) else: if not tags: debug(f"{prefix} 0 result(s)") return [] search_result = client.search_files( tags=tags, return_hashes=True, return_file_ids=True ) file_ids, hashes = _extract_search_ids(search_result) # Fast path: ext-only search. Avoid fetching metadata for an unbounded # system:everything result set; fetch in chunks until we have enough. if ext_only and ext_filter: results: list[dict[str, Any]] = [] if not file_ids and not hashes: debug(f"{prefix} 0 result(s)") return [] # Prefer file_ids if available. if file_ids: chunk_size = 200 for start in range(0, len(file_ids), chunk_size): if len(results) >= limit: break chunk = file_ids[start:start + chunk_size] try: payload = client.fetch_file_metadata( file_ids=chunk, include_service_keys_to_tags=True, include_file_url=True, include_duration=True, include_size=True, include_mime=True, ) except Exception: continue metas = payload.get("metadata", []) if isinstance(payload, dict) else [] if not isinstance(metas, list): continue for meta in metas: if len(results) >= limit: break if not isinstance(meta, dict): continue mime_type = meta.get("mime") ext = str(meta.get("ext") or "").strip().lstrip(".") if not ext and mime_type: for category in mime_maps.values(): for _ext_key, info in category.items(): if mime_type in info.get("mimes", []): ext = str(info.get("ext", "") ).strip().lstrip(".") break if ext: break if _normalize_ext_filter(ext) != ext_filter: continue file_id = meta.get("file_id") hash_hex = meta.get("hash") size = meta.get("size", 0) tags_set = meta.get("tags", {}) all_tags: list[str] = [] title = f"Hydrus File {file_id}" if isinstance(tags_set, dict): def _collect(tag_list: Any) -> None: nonlocal title if not isinstance(tag_list, list): return for tag in tag_list: tag_text = str(tag) if tag else "" if not tag_text: continue tag_l = tag_text.strip().lower() if not tag_l: continue all_tags.append(tag_l) if (tag_l.startswith("title:") and title == f"Hydrus File {file_id}"): title = tag_l.split(":", 1)[1].strip() for _service_name, service_tags in tags_set.items(): if not isinstance(service_tags, dict): continue storage_tags = service_tags.get( "storage_tags", {} ) if isinstance(storage_tags, dict): for tag_list in storage_tags.values(): _collect(tag_list) display_tags = service_tags.get( "display_tags", [] ) _collect(display_tags) # Unique tags all_tags = sorted(list(set(all_tags))) # Use known URLs (source URLs) from Hydrus if available (matches get-url cmdlet) item_url = meta.get("known_urls") or meta.get("urls") or meta.get("url") or [] if not item_url: item_url = meta.get("file_url") or f"{self.URL.rstrip('/')}/view_file?hash={hash_hex}" results.append( { "hash": hash_hex, "url": item_url, "name": title, "title": title, "size": size, "size_bytes": size, "store": self.NAME, "tag": all_tags, "file_id": file_id, "mime": mime_type, "ext": ext, } ) debug(f"{prefix} {len(results)} result(s)") return results[:limit] # If we only got hashes, fall back to the normal flow below. if not file_ids and not hashes: debug(f"{prefix} 0 result(s)") return [] if file_ids: metadata = client.fetch_file_metadata( file_ids=file_ids, include_service_keys_to_tags=True, include_file_url=True, include_duration=True, include_size=True, include_mime=True, ) metadata_list = metadata.get("metadata", []) elif hashes: metadata = client.fetch_file_metadata( hashes=hashes, include_service_keys_to_tags=True, include_file_url=True, include_duration=True, include_size=True, include_mime=True, ) metadata_list = metadata.get("metadata", []) else: metadata_list = [] # If our free-text searches produce nothing (or nothing survived downstream filtering), fallback to scanning. if (not metadata_list) and (query_lower != "*") and (":" not in query_lower): try: search_result = client.search_files( tags=["system:everything"], return_hashes=True, return_file_ids=True, ) file_ids, hashes = _extract_search_ids(search_result) if file_ids: metadata = client.fetch_file_metadata( file_ids=file_ids, include_service_keys_to_tags=True, include_file_url=True, include_duration=True, include_size=True, include_mime=True, ) metadata_list = metadata.get("metadata", []) elif hashes: metadata = client.fetch_file_metadata( hashes=hashes, include_service_keys_to_tags=True, include_file_url=True, include_duration=True, include_size=True, include_mime=True, ) metadata_list = metadata.get("metadata", []) except Exception: pass if not isinstance(metadata_list, list): metadata_list = [] for meta in metadata_list: if len(results) >= limit: break file_id = meta.get("file_id") hash_hex = meta.get("hash") size = meta.get("size", 0) # Get tags for this file and extract title tags_set = meta.get("tags", {}) all_tags = [] title = f"Hydrus File {file_id}" # Default fallback all_tags_str = "" # For substring matching # debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}") if isinstance(tags_set, dict): # Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen def _collect(tag_list: Any) -> None: nonlocal title, all_tags_str if not isinstance(tag_list, list): return for tag in tag_list: tag_text = str(tag) if tag else "" if not tag_text: continue tag_l = tag_text.strip().lower() if not tag_l: continue all_tags.append(tag_l) all_tags_str += " " + tag_l if tag_l.startswith("title:" ) and title == f"Hydrus File {file_id}": title = tag_l.split(":", 1)[1].strip() for _service_name, service_tags in tags_set.items(): if not isinstance(service_tags, dict): continue storage_tags = service_tags.get("storage_tags", {}) if isinstance(storage_tags, dict): for tag_list in storage_tags.values(): _collect(tag_list) display_tags = service_tags.get("display_tags", []) _collect(display_tags) # Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it) top_level_tags = meta.get("tags_flat", []) or meta.get("tags", []) _collect(top_level_tags) # Unique tags all_tags = sorted(list(set(all_tags))) # Prefer Hydrus-provided extension (e.g. ".webm"); fall back to MIME map. mime_type = meta.get("mime") ext = str(meta.get("ext") or "").strip().lstrip(".") if not ext and mime_type: for category in mime_maps.values(): for _ext_key, info in category.items(): if mime_type in info.get("mimes", []): ext = str(info.get("ext", "")).strip().lstrip(".") break if ext: break # Filter results based on query type # If user provided explicit namespace (has ':'), don't do substring filtering # Just include what the tag search returned has_namespace = ":" in query_lower # Use known URLs (source URLs) from Hydrus if available (matches get-url cmdlet) item_url = meta.get("known_urls") or meta.get("urls") or meta.get("url") or [] if not item_url: item_url = meta.get("file_url") or f"{self.URL.rstrip('/')}/view_file?hash={hash_hex}" if has_namespace: # Explicit namespace search - already filtered by Hydrus tag search # Include this result as-is results.append( { "hash": hash_hex, "url": item_url, "name": title, "title": title, "size": size, "size_bytes": size, "store": self.NAME, "tag": all_tags, "file_id": file_id, "mime": mime_type, "ext": ext, } ) else: # Free-form search: check if search terms match title or FREEFORM tags. # Do NOT implicitly match other namespace tags (except title:). freeform_tags = [ t for t in all_tags if isinstance(t, str) and t and (":" not in t) ] searchable_text = (title + " " + " ".join(freeform_tags)).lower() match = True if query_lower != "*" and search_terms: for term in search_terms: if term not in searchable_text: match = False break if match: results.append( { "hash": hash_hex, "url": item_url, "name": title, "title": title, "size": size, "size_bytes": size, "store": self.NAME, "tag": all_tags, "file_id": file_id, "mime": mime_type, "ext": ext, } ) debug(f"{prefix} {len(results)} result(s)") if ext_filter: wanted = ext_filter filtered: list[dict[str, Any]] = [] for item in results: try: if _normalize_ext_filter(str(item.get("ext") or "")) == wanted: filtered.append(item) except Exception: continue results = filtered return results[:limit] except Exception as exc: log(f"❌ Hydrus search failed: {exc}", file=sys.stderr) import traceback traceback.print_exc(file=sys.stderr) raise def get_file(self, file_hash: str, **kwargs: Any) -> Path | str | None: """Return the local file system path if available, else a browser URL. IMPORTANT: this method must be side-effect free (do not auto-open a browser). Only explicit user actions (e.g. the get-file cmdlet) should open files. """ debug(f"{self._log_prefix()} get_file: start hash={file_hash[:12]}...") # Try to get the local disk path if possible (works if Hydrus is on same machine) server_path = None try: path_res = self._client.get_file_path(file_hash) if isinstance(path_res, dict) and "path" in path_res: server_path = path_res["path"] if server_path: local_path = Path(server_path) if local_path.exists(): debug(f"{self._log_prefix()} get_file: found local path: {local_path}") return local_path except Exception as e: debug(f"{self._log_prefix()} get_file: could not resolve path from API: {e}") # If we found a path on the server but it's not locally accessible, # return it as a string so it can be displayed in metadata panels. if server_path: debug(f"{self._log_prefix()} get_file: returning server path (not local): {server_path}") return server_path # Fallback to browser URL with access key base_url = str(self.URL).rstrip("/") access_key = str(self.API) browser_url = ( f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}" ) debug(f"{self._log_prefix()} get_file: falling back to url={browser_url}") return browser_url def download_to_temp( self, file_hash: str, *, temp_root: Optional[Path] = None, ) -> Optional[Path]: """Download a Hydrus file to a temporary path for downstream uploads.""" try: client = self._client if client is None: return None h = str(file_hash or "").strip().lower() if len(h) != 64 or not all(ch in "0123456789abcdef" for ch in h): return None created_tmp = False base_tmp = Path(temp_root) if temp_root is not None else Path( tempfile.mkdtemp(prefix="hydrus-file-") ) if temp_root is None: created_tmp = True base_tmp.mkdir(parents=True, exist_ok=True) def _safe_filename(raw: str) -> str: cleaned = re.sub(r"[\\/:*?\"<>|]", "_", str(raw or "")).strip() if not cleaned: return h cleaned = cleaned.strip(". ") or h return cleaned # Prefer ext/title from metadata when available. fname = h ext_val = "" try: meta = self.get_metadata(h) or {} if isinstance(meta, dict): title_val = str(meta.get("title") or "").strip() if title_val: fname = _safe_filename(title_val) ext_val = str(meta.get("ext") or "").strip().lstrip(".") except Exception: pass if not fname: fname = h if ext_val and not fname.lower().endswith(f".{ext_val.lower()}"): fname = f"{fname}.{ext_val}" try: file_url = client.file_url(h) except Exception: file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={quote(h)}" dest_path = base_tmp / fname with httpx.stream( "GET", file_url, headers={"Hydrus-Client-API-Access-Key": self.API}, follow_redirects=True, timeout=60.0, verify=False, ) as resp: resp.raise_for_status() with dest_path.open("wb") as fh: for chunk in resp.iter_bytes(): if chunk: fh.write(chunk) if dest_path.exists(): return dest_path if created_tmp: try: shutil.rmtree(base_tmp, ignore_errors=True) except Exception: pass return None except Exception as exc: log(f"{self._log_prefix()} download_to_temp failed: {exc}", file=sys.stderr) try: if temp_root is None and "base_tmp" in locals(): shutil.rmtree(base_tmp, ignore_errors=True) # type: ignore[arg-type] except Exception: pass return None def delete_file(self, file_identifier: str, **kwargs: Any) -> bool: """Delete a file from Hydrus, then clear the deletion record. This is used by the delete-file cmdlet when the item belongs to a HydrusNetwork store. """ try: client = self._client if client is None: debug(f"{self._log_prefix()} delete_file: client unavailable") return False file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): debug( f"{self._log_prefix()} delete_file: invalid file hash '{file_identifier}'" ) return False reason = kwargs.get("reason") reason_text = ( str(reason).strip() if isinstance(reason, str) and reason.strip() else None ) # 1) Delete file client.delete_files([file_hash], reason=reason_text) # 2) Clear deletion record (best-effort) try: client.clear_file_deletion_record([file_hash]) except Exception as exc: debug( f"{self._log_prefix()} delete_file: clear_file_deletion_record failed: {exc}" ) return True except Exception as exc: debug(f"{self._log_prefix()} delete_file failed: {exc}") return False def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]: """Get metadata for a file from Hydrus by hash. Args: file_hash: SHA256 hash of the file (64-char hex string) Returns: Dict with metadata fields or None if not found """ try: client = self._client if not client: debug(f"{self._log_prefix()} get_metadata: client unavailable") return None # Fetch file metadata with the fields we need for CLI display. payload = client.fetch_file_metadata( hashes=[file_hash], include_service_keys_to_tags=True, include_file_url=True, include_duration=True, include_size=True, include_mime=True, ) if not payload or not payload.get("metadata"): return None meta = payload["metadata"][0] # Hydrus can return placeholder metadata rows for unknown hashes. if not isinstance(meta, dict) or meta.get("file_id") is None: return None # Extract title from tags title = f"Hydrus_{file_hash[:12]}" tags_payload = meta.get("tags", {}) if isinstance(tags_payload, dict): for service_data in tags_payload.values(): if isinstance(service_data, dict): display_tags = service_data.get("display_tags", {}) if isinstance(display_tags, dict): current_tags = display_tags.get("0", []) if isinstance(current_tags, list): for tag in current_tags: if str(tag).lower().startswith("title:"): title = tag.split(":", 1)[1].strip() break if title != f"Hydrus_{file_hash[:12]}": break # Hydrus may return mime as an int enum, or sometimes a human label. mime_val = meta.get("mime") filetype_human = ( meta.get("filetype_human") or meta.get("mime_human") or meta.get("mime_string") ) # Determine ext: prefer Hydrus metadata ext, then filetype_human (when it looks like an ext), # then title suffix, then file path suffix. ext = str(meta.get("ext") or "").strip().lstrip(".") if not ext: ft = str(filetype_human or "").strip().lstrip(".").lower() if ft and ft != "unknown filetype" and ft.isalnum() and len(ft) <= 8: # Treat simple labels like "mp4", "m4a", "webm" as extensions. ext = ft if not ext and isinstance(title, str) and "." in title: try: ext = Path(title).suffix.lstrip(".") except Exception: ext = "" if not ext: try: path_payload = client.get_file_path(file_hash) if isinstance(path_payload, dict): p = path_payload.get("path") if isinstance(p, str) and p.strip(): ext = Path(p.strip()).suffix.lstrip(".") except Exception: ext = "" # If extension is still unknown, attempt a best-effort lookup from MIME. def _mime_from_ext(ext_value: str) -> str: ext_clean = str(ext_value or "").strip().lstrip(".").lower() if not ext_clean: return "" try: for category in mime_maps.values(): info = category.get(ext_clean) if isinstance(info, dict): mimes = info.get("mimes") if isinstance(mimes, list) and mimes: first = mimes[0] return str(first) except Exception: return "" return "" # Normalize to a MIME string for CLI output. # Avoid passing through human labels like "unknown filetype". mime_type = "" if isinstance(mime_val, str): candidate = mime_val.strip() if "/" in candidate and candidate.lower() != "unknown filetype": mime_type = candidate if not mime_type and isinstance(filetype_human, str): candidate = filetype_human.strip() if "/" in candidate and candidate.lower() != "unknown filetype": mime_type = candidate if not mime_type: mime_type = _mime_from_ext(ext) # Normalize size/duration to stable scalar types. size_val = meta.get("size") if size_val is None: size_val = meta.get("size_bytes") try: size_int: int | None = int(size_val) if size_val is not None else None except Exception: size_int = None dur_val = meta.get("duration") if dur_val is None: dur_val = meta.get("duration_ms") try: dur_int: int | None = int(dur_val) if dur_val is not None else None except Exception: dur_int = None raw_urls = meta.get("known_urls") or meta.get("urls") or meta.get("url" ) or [] url_list: list[str] = [] if isinstance(raw_urls, str): s = raw_urls.strip() url_list = [s] if s else [] elif isinstance(raw_urls, list): url_list = [ str(u).strip() for u in raw_urls if isinstance(u, str) and str(u).strip() ] return { "hash": file_hash, "title": title, "ext": ext, "size": size_int, "mime": mime_type, # Keep raw fields available for troubleshooting/other callers. "hydrus_mime": mime_val, "filetype_human": filetype_human, "duration_ms": dur_int, "url": url_list, } except Exception as exc: debug(f"{self._log_prefix()} get_metadata failed: {exc}") return None def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]: """Get tags for a file from Hydrus by hash. Args: file_identifier: File hash (SHA256 hex string) **kwargs: Optional service_name parameter Returns: Tuple of (tags_list, source_description) where source is always "hydrus" """ try: file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): debug( f"{self._log_prefix()} get_tags: invalid file hash '{file_identifier}'" ) return [], "unknown" # Get Hydrus client and service info client = self._client if not client: debug(f"{self._log_prefix()} get_tags: client unavailable") return [], "unknown" # Fetch file metadata payload = client.fetch_file_metadata( hashes=[file_hash], include_service_keys_to_tags=True, include_file_url=True ) items = payload.get("metadata") if isinstance(payload, dict) else None if not isinstance(items, list) or not items: debug( f"{self._log_prefix()} get_tags: no metadata for hash {file_hash}" ) return [], "unknown" meta = items[0] if isinstance(items[0], dict) else None if not isinstance(meta, dict) or meta.get("file_id") is None: debug( f"{self._log_prefix()} get_tags: invalid metadata for hash {file_hash}" ) return [], "unknown" service_name = kwargs.get("service_name") or "my tags" service_key = self._get_service_key(service_name) # Extract tags from metadata tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name) return [ str(t).strip().lower() for t in tags if isinstance(t, str) and t.strip() ], "hydrus" except Exception as exc: debug(f"{self._log_prefix()} get_tags failed: {exc}") return [], "unknown" def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: """Add tags to a Hydrus file.""" try: client = self._client if client is None: debug(f"{self._log_prefix()} add_tag: client unavailable") return False file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): debug( f"{self._log_prefix()} add_tag: invalid file hash '{file_identifier}'" ) return False service_name = kwargs.get("service_name") or "my tags" incoming_tags = [ str(t).strip().lower() for t in (tags or []) if isinstance(t, str) and str(t).strip() ] if not incoming_tags: return True try: existing_tags, _src = self.get_tag(file_hash) except Exception: existing_tags = [] from SYS.metadata import compute_namespaced_tag_overwrite tags_to_remove, tags_to_add, _merged = compute_namespaced_tag_overwrite( existing_tags, incoming_tags ) if not tags_to_add and not tags_to_remove: return True service_key: Optional[str] = None service_key = self._get_service_key(service_name) mutate_success = False if service_key: try: client.mutate_tags_by_key( file_hash, service_key, add_tags=tags_to_add, remove_tags=tags_to_remove, ) mutate_success = True except Exception as exc: debug( f"{self._log_prefix()} add_tag: mutate_tags_by_key failed: {exc}" ) did_any = False if not mutate_success: if tags_to_remove: try: client.delete_tag(file_hash, tags_to_remove, service_name) did_any = True except Exception as exc: debug( f"{self._log_prefix()} add_tag: delete_tag failed: {exc}" ) if tags_to_add: try: client.add_tag(file_hash, tags_to_add, service_name) did_any = True except Exception as exc: debug( f"{self._log_prefix()} add_tag: add_tag failed: {exc}" ) else: did_any = bool(tags_to_add or tags_to_remove) return did_any except Exception as exc: debug(f"{self._log_prefix()} add_tag failed: {exc}") return False def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool: """Delete tags from a Hydrus file.""" try: client = self._client if client is None: debug(f"{self._log_prefix()} delete_tag: client unavailable") return False file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): debug( f"{self._log_prefix()} delete_tag: invalid file hash '{file_identifier}'" ) return False service_name = kwargs.get("service_name") or "my tags" raw_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)] tag_list = [ str(t).strip().lower() for t in raw_list if isinstance(t, str) and str(t).strip() ] if not tag_list: return False client.delete_tag(file_hash, tag_list, service_name) return True except Exception as exc: debug(f"{self._log_prefix()} delete_tag failed: {exc}") return False def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]: """Get known url for a Hydrus file.""" try: client = self._client file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): return [] payload = client.fetch_file_metadata( hashes=[file_hash], include_file_url=True ) items = payload.get("metadata") if isinstance(payload, dict) else None if not isinstance(items, list) or not items: return [] meta = items[0] if isinstance(items[0], dict) else {} raw_urls: Any = meta.get("known_urls" ) or meta.get("urls") or meta.get("url") or [] if isinstance(raw_urls, str): val = raw_urls.strip() return [val] if val else [] if isinstance(raw_urls, list): out: list[str] = [] for u in raw_urls: if not isinstance(u, str): continue u = u.strip() if u: out.append(u) return out return [] except Exception as exc: debug(f"{self._log_prefix()} get_url failed: {exc}") return [] def get_url_info(self, url: str, **kwargs: Any) -> dict[str, Any] | None: """Return Hydrus URL info for a single URL (Hydrus-only helper). Uses: GET /add_urls/get_url_info """ try: client = self._client if client is None: return None u = str(url or "").strip() if not u: return None try: return client.get_url_info(u) # type: ignore[attr-defined] except Exception: from API.HydrusNetwork import HydrusRequestSpec spec = HydrusRequestSpec( method="GET", endpoint="/add_urls/get_url_info", query={ "url": u }, ) response = client._perform_request(spec) # type: ignore[attr-defined] return response if isinstance(response, dict) else None except Exception as exc: debug(f"{self._log_prefix()} get_url_info failed: {exc}") return None def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: """Associate one or more url with a Hydrus file.""" try: client = self._client if client is None: debug(f"{self._log_prefix()} add_url: client unavailable") return False for u in url: client.associate_url(file_identifier, u) return True except Exception as exc: debug(f"{self._log_prefix()} add_url failed: {exc}") return False def add_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool: """Bulk associate urls with Hydrus files. This is a best-effort convenience wrapper used by cmdlets to batch url associations. Hydrus' client API is still called per (hash,url) pair, but this consolidates the cmdlet-level control flow so url association can be deferred until the end. """ try: client = self._client if client is None: debug(f"{self._log_prefix()} add_url_bulk: client unavailable") return False any_success = False for file_identifier, urls in items or []: h = str(file_identifier or "").strip().lower() if len(h) != 64: continue for u in urls or []: s = str(u or "").strip() if not s: continue try: client.associate_url(h, s) any_success = True except Exception: continue return any_success except Exception as exc: debug(f"{self._log_prefix()} add_url_bulk failed: {exc}") return False def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool: """Delete one or more url from a Hydrus file.""" try: client = self._client if client is None: debug(f"{self._log_prefix()} delete_url: client unavailable") return False for u in url: client.delete_url(file_identifier, u) return True except Exception as exc: debug(f"{self._log_prefix()} delete_url failed: {exc}") return False def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]: """Get notes for a Hydrus file (default note service only).""" try: client = self._client if client is None: debug(f"{self._log_prefix()} get_note: client unavailable") return {} file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): return {} payload = client.fetch_file_metadata(hashes=[file_hash], include_notes=True) items = payload.get("metadata") if isinstance(payload, dict) else None if not isinstance(items, list) or not items: return {} meta = items[0] if isinstance(items[0], dict) else None if not isinstance(meta, dict): return {} notes_payload = meta.get("notes") if isinstance(notes_payload, dict): return { str(k): str(v or "") for k, v in notes_payload.items() if str(k).strip() } return {} except Exception as exc: debug(f"{self._log_prefix()} get_note failed: {exc}") return {} def set_note( self, file_identifier: str, name: str, text: str, **kwargs: Any ) -> bool: """Set a named note for a Hydrus file (default note service only).""" try: client = self._client if client is None: debug(f"{self._log_prefix()} set_note: client unavailable") return False file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): return False note_name = str(name or "").strip() if not note_name: return False note_text = str(text or "") client.set_notes(file_hash, { note_name: note_text }) return True except Exception as exc: debug(f"{self._log_prefix()} set_note failed: {exc}") return False def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool: """Delete a named note for a Hydrus file (default note service only).""" try: client = self._client if client is None: debug(f"{self._log_prefix()} delete_note: client unavailable") return False file_hash = str(file_identifier or "").strip().lower() if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash): return False note_name = str(name or "").strip() if not note_name: return False client.delete_notes(file_hash, [note_name]) return True except Exception as exc: debug(f"{self._log_prefix()} delete_note failed: {exc}") return False @staticmethod def _extract_tags_from_hydrus_meta( meta: Dict[str, Any], service_key: Optional[str], service_name: str ) -> List[str]: """Extract current tags from Hydrus metadata dict. Prefers display_tags (includes siblings/parents, excludes deleted). Falls back to storage_tags status '0' (current). """ tags_payload = meta.get("tags") if not isinstance(tags_payload, dict): return [] svc_data = None if service_key: svc_data = tags_payload.get(service_key) if not isinstance(svc_data, dict): return [] # Prefer display_tags (Hydrus computes siblings/parents) display = svc_data.get("display_tags") if isinstance(display, list) and display: return [ str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip() ] # Fallback to storage_tags status '0' (current) storage = svc_data.get("storage_tags") if isinstance(storage, dict): current_list = storage.get("0") or storage.get(0) if isinstance(current_list, list): return [ str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip() ] return []