Files
Medios-Macina/Store/HydrusNetwork.py
2025-12-16 23:23:43 -08:00

1087 lines
47 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import re
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
import httpx
from SYS.logger import debug, log
from SYS.utils_constant import mime_maps
from Store._base import Store
_HYDRUS_INIT_CHECK_CACHE: dict[tuple[str, str], tuple[bool, Optional[str]]] = {}
class HydrusNetwork(Store):
"""File storage backend for Hydrus client.
Each instance represents a specific Hydrus client connection.
Maintains its own HydrusClient.
"""
def _log_prefix(self) -> str:
store_name = getattr(self, "NAME", None) or "unknown"
return f"[hydrusnetwork:{store_name}]"
def __new__(cls, *args: Any, **kwargs: Any) -> "HydrusNetwork":
instance = super().__new__(cls)
name = kwargs.get("NAME")
api = kwargs.get("API")
url = kwargs.get("URL")
if name is not None:
setattr(instance, "NAME", str(name))
if api is not None:
setattr(instance, "API", str(api))
if url is not None:
setattr(instance, "URL", str(url))
return instance
setattr(__new__, "keys", ("NAME", "API", "URL"))
def __init__(
self,
instance_name: Optional[str] = None,
api_key: Optional[str] = None,
url: Optional[str] = None,
*,
NAME: Optional[str] = None,
API: Optional[str] = None,
URL: Optional[str] = None,
) -> None:
"""Initialize Hydrus storage backend.
Args:
instance_name: Name of this Hydrus instance (e.g., 'home', 'work')
api_key: Hydrus Client API access key
url: Hydrus client URL (e.g., 'http://192.168.1.230:45869')
"""
from API.HydrusNetwork import HydrusNetwork as HydrusClient
if instance_name is None and NAME is not None:
instance_name = str(NAME)
if api_key is None and API is not None:
api_key = str(API)
if url is None and URL is not None:
url = str(URL)
if not instance_name or not api_key or not url:
raise ValueError("HydrusNetwork requires NAME, API, and URL")
self.NAME = instance_name
self.API = api_key
self.URL = url.rstrip("/")
# Total count (best-effort, used for startup diagnostics)
self.total_count: Optional[int] = None
# Self health-check: validate the URL is reachable and the access key is accepted.
# This MUST NOT attempt to acquire a session key.
cache_key = (self.URL, self.API)
cached = _HYDRUS_INIT_CHECK_CACHE.get(cache_key)
if cached is not None:
ok, err = cached
if not ok:
raise RuntimeError(f"Hydrus '{self.NAME}' unavailable: {err or 'Unavailable'}")
else:
api_version_url = f"{self.URL}/api_version"
verify_key_url = f"{self.URL}/verify_access_key"
try:
with httpx.Client(timeout=5.0, verify=False, follow_redirects=True) as client:
version_resp = client.get(api_version_url)
version_resp.raise_for_status()
version_payload = version_resp.json()
if not isinstance(version_payload, dict):
raise RuntimeError("Hydrus /api_version returned an unexpected response")
verify_resp = client.get(
verify_key_url,
headers={"Hydrus-Client-API-Access-Key": self.API},
)
verify_resp.raise_for_status()
verify_payload = verify_resp.json()
if not isinstance(verify_payload, dict):
raise RuntimeError("Hydrus /verify_access_key returned an unexpected response")
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (True, None)
except Exception as exc:
err = str(exc)
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (False, err)
raise RuntimeError(f"Hydrus '{self.NAME}' unavailable: {err}") from exc
# Create a persistent client for this instance (auth via access key by default).
self._client = HydrusClient(url=self.URL, access_key=self.API, instance_name=self.NAME)
# Best-effort total count (fast on Hydrus side; does not fetch IDs/hashes).
try:
payload = self._client.search_files(
tags=["system:everything"],
return_hashes=False,
return_file_ids=False,
return_file_count=True,
)
count_val = None
if isinstance(payload, dict):
count_val = payload.get("file_count")
if count_val is None:
count_val = payload.get("file_count_inclusive")
if count_val is None:
count_val = payload.get("num_files")
if isinstance(count_val, int):
self.total_count = count_val
except Exception as exc:
debug(f"{self._log_prefix()} total count unavailable: {exc}", file=sys.stderr)
def name(self) -> str:
return self.NAME
def get_name(self) -> str:
return self.NAME
def add_file(self, file_path: Path, **kwargs: Any) -> str:
"""Upload file to Hydrus with full metadata support.
Args:
file_path: Path to the file to upload
tag: Optional list of tag values to add
url: Optional list of url to associate with the file
title: Optional title (will be added as 'title:value' tag)
Returns:
File hash from Hydrus
Raises:
Exception: If upload fails
"""
from SYS.utils import sha256_file
tag_list = kwargs.get("tag", [])
url = kwargs.get("url", [])
title = kwargs.get("title")
# Add title to tags if provided and not already present
if title:
title_tag = f"title:{title}"
if not any(str(candidate).lower().startswith("title:") for candidate in tag_list):
tag_list = [title_tag] + list(tag_list)
try:
# Compute file hash
file_hash = sha256_file(file_path)
debug(f"{self._log_prefix()} file hash: {file_hash}")
# Use persistent client with session key
client = self._client
if client is None:
raise Exception("Hydrus client unavailable")
# Check if file already exists in Hydrus
file_exists = False
try:
metadata = client.fetch_file_metadata(
hashes=[file_hash],
include_service_keys_to_tags=False,
include_file_url=False,
include_duration=False,
include_size=False,
include_mime=False,
)
if metadata and isinstance(metadata, dict):
metas = metadata.get("metadata", [])
if isinstance(metas, list) and metas:
# Hydrus returns placeholder rows for unknown hashes.
# Only treat as a real duplicate if it has a concrete file_id.
for meta in metas:
if isinstance(meta, dict) and meta.get("file_id") is not None:
file_exists = True
break
if file_exists:
log(
f" Duplicate detected - file already in Hydrus with hash: {file_hash}",
file=sys.stderr,
)
except Exception:
pass
# If Hydrus reports an existing file, it may be in trash. Best-effort restore it to 'my files'.
# This keeps behavior aligned with user expectation: "use API only" and ensure it lands in my files.
if file_exists:
try:
client.undelete_files([file_hash])
except Exception:
pass
# Upload file if not already present
if not file_exists:
log(f"{self._log_prefix()} Uploading: {file_path.name}", file=sys.stderr)
response = client.add_file(file_path)
# Extract hash from response
hydrus_hash: Optional[str] = None
if isinstance(response, dict):
hydrus_hash = response.get("hash") or response.get("file_hash")
if not hydrus_hash:
hashes = response.get("hashes")
if isinstance(hashes, list) and hashes:
hydrus_hash = hashes[0]
if not hydrus_hash:
raise Exception(f"Hydrus response missing file hash: {response}")
file_hash = hydrus_hash
log(f"{self._log_prefix()} hash: {file_hash}", file=sys.stderr)
# Add tags if provided (both for new and existing files)
if tag_list:
try:
# Use default tag service
service_name = "my tags"
except Exception:
service_name = "my tags"
try:
debug(f"{self._log_prefix()} Adding {len(tag_list)} tag(s): {tag_list}")
client.add_tag(file_hash, tag_list, service_name)
log(f"{self._log_prefix()} Tags added via '{service_name}'", file=sys.stderr)
except Exception as exc:
log(f"{self._log_prefix()} ⚠️ Failed to add tags: {exc}", file=sys.stderr)
# Associate url if provided (both for new and existing files)
if url:
log(f"{self._log_prefix()} Associating {len(url)} URL(s) with file", file=sys.stderr)
for url in url:
if url:
try:
client.associate_url(file_hash, str(url))
debug(f"{self._log_prefix()} Associated URL: {url}")
except Exception as exc:
log(f"{self._log_prefix()} ⚠️ Failed to associate URL {url}: {exc}", file=sys.stderr)
return file_hash
except Exception as exc:
log(f"{self._log_prefix()} ❌ upload failed: {exc}", file=sys.stderr)
raise
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
"""Search Hydrus database for files matching query.
Args:
query: Search query (tags, filenames, hashes, etc.)
limit: Maximum number of results to return (default: 100)
Returns:
List of dicts with 'name', 'hash', 'size', 'tags' fields
Example:
results = storage["hydrus"].search("artist:john_doe music")
results = storage["hydrus"].search("Simple Man")
"""
limit = kwargs.get("limit", 100)
try:
client = self._client
if client is None:
raise Exception("Hydrus client unavailable")
prefix = self._log_prefix()
debug(f"{prefix} Searching for: {query}")
def _extract_urls(meta_obj: Any) -> list[str]:
if not isinstance(meta_obj, dict):
return []
raw = meta_obj.get("url")
if raw is None:
raw = meta_obj.get("urls")
if isinstance(raw, str):
val = raw.strip()
return [val] if val else []
if isinstance(raw, list):
out: list[str] = []
for item in raw:
if not isinstance(item, str):
continue
s = item.strip()
if s:
out.append(s)
return out
return []
def _iter_url_filtered_metadata(url_value: str | None, want_any: bool, fetch_limit: int) -> list[dict[str, Any]]:
"""Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
# First try a fast system predicate if Hydrus supports it.
candidate_file_ids: list[int] = []
try:
if want_any:
predicate = "system:has url"
url_search = client.search_files(
tags=[predicate],
return_hashes=False,
return_file_ids=True,
return_file_count=False,
)
ids = url_search.get("file_ids", []) if isinstance(url_search, dict) else []
if isinstance(ids, list):
candidate_file_ids = [int(x) for x in ids if isinstance(x, (int, float, str)) and str(x).strip().isdigit()]
except Exception:
candidate_file_ids = []
if not candidate_file_ids:
# Fallback: scan from system:everything and filter by URL substring.
everything = client.search_files(
tags=["system:everything"],
return_hashes=False,
return_file_ids=True,
return_file_count=False,
)
ids = everything.get("file_ids", []) if isinstance(everything, dict) else []
if isinstance(ids, list):
candidate_file_ids = [int(x) for x in ids if isinstance(x, (int, float))]
if not candidate_file_ids:
return []
needle = (url_value or "").strip().lower()
chunk_size = 200
out: list[dict[str, Any]] = []
for start in range(0, len(candidate_file_ids), chunk_size):
if len(out) >= fetch_limit:
break
chunk = candidate_file_ids[start : start + chunk_size]
try:
payload = client.fetch_file_metadata(
file_ids=chunk,
include_file_url=True,
include_service_keys_to_tags=True,
include_duration=True,
include_size=True,
include_mime=True,
)
except Exception:
continue
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
if not isinstance(metas, list):
continue
for meta in metas:
if not isinstance(meta, dict):
continue
urls = _extract_urls(meta)
if not urls:
continue
if want_any:
out.append(meta)
if len(out) >= fetch_limit:
break
continue
if not needle:
continue
if any(needle in u.lower() for u in urls):
out.append(meta)
if len(out) >= fetch_limit:
break
return out
query_lower = query.lower().strip()
# Special case: url:* and url:<value>
metadata_list: list[dict[str, Any]] | None = None
if ":" in query_lower and not query_lower.startswith(":"):
namespace, pattern = query_lower.split(":", 1)
namespace = namespace.strip().lower()
pattern = pattern.strip()
if namespace == "url":
if not pattern or pattern == "*":
metadata_list = _iter_url_filtered_metadata(None, want_any=True, fetch_limit=int(limit) if limit else 100)
else:
# Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
try:
if pattern.startswith("http://") or pattern.startswith("https://"):
from API.HydrusNetwork import HydrusRequestSpec
spec = HydrusRequestSpec(method="GET", endpoint="/add_urls/get_url_files", query={"url": pattern})
response = client._perform_request(spec) # type: ignore[attr-defined]
hashes: list[str] = []
file_ids: list[int] = []
if isinstance(response, dict):
raw_hashes = response.get("hashes") or response.get("file_hashes")
if isinstance(raw_hashes, list):
hashes = [str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()]
raw_ids = response.get("file_ids")
if isinstance(raw_ids, list):
for item in raw_ids:
try:
file_ids.append(int(item))
except (TypeError, ValueError):
continue
if file_ids:
payload = client.fetch_file_metadata(
file_ids=file_ids,
include_file_url=True,
include_service_keys_to_tags=True,
include_duration=True,
include_size=True,
include_mime=True,
)
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
if isinstance(metas, list):
metadata_list = [m for m in metas if isinstance(m, dict)]
elif hashes:
payload = client.fetch_file_metadata(
hashes=hashes,
include_file_url=True,
include_service_keys_to_tags=True,
include_duration=True,
include_size=True,
include_mime=True,
)
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
if isinstance(metas, list):
metadata_list = [m for m in metas if isinstance(m, dict)]
except Exception:
metadata_list = None
# Fallback: substring scan
if metadata_list is None:
metadata_list = _iter_url_filtered_metadata(pattern, want_any=False, fetch_limit=int(limit) if limit else 100)
# Parse the query into tags
# Handle both simple tags and complex queries
# "*" means "match all" - use system:everything tag in Hydrus
if query.strip() == "*":
# Use system:everything to match all files in Hydrus
tags = ["system:everything"]
else:
# If query doesn't have a namespace (no ':'), search all files and filter by title/tags
# If query has explicit namespace, use it as a tag search
if ':' not in query_lower:
# No namespace provided: search all files, then filter by title/tags containing the query
tags = ["system:everything"]
else:
# User provided explicit namespace (e.g., "creator:john" or "system:has_audio")
# Use it as a tag search
tags = [query_lower]
if not tags:
debug(f"{prefix} 0 result(s)")
return []
# Search files with the tags (unless url: search already produced metadata)
results = []
# Split by comma or space for AND logic
search_terms = set(query_lower.replace(',', ' ').split()) # For substring matching
if metadata_list is None:
search_result = client.search_files(
tags=tags,
return_hashes=True,
return_file_ids=True
)
file_ids = search_result.get("file_ids", []) if isinstance(search_result, dict) else []
hashes = search_result.get("hashes", []) if isinstance(search_result, dict) else []
if not file_ids and not hashes:
debug(f"{prefix} 0 result(s)")
return []
if file_ids:
metadata = client.fetch_file_metadata(file_ids=file_ids)
metadata_list = metadata.get("metadata", [])
elif hashes:
metadata = client.fetch_file_metadata(hashes=hashes)
metadata_list = metadata.get("metadata", [])
else:
metadata_list = []
if not isinstance(metadata_list, list):
metadata_list = []
for meta in metadata_list:
if len(results) >= limit:
break
file_id = meta.get("file_id")
hash_hex = meta.get("hash")
size = meta.get("size", 0)
# Get tags for this file and extract title
tags_set = meta.get("tags", {})
all_tags = []
title = f"Hydrus File {file_id}" # Default fallback
all_tags_str = "" # For substring matching
# debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}")
if isinstance(tags_set, dict):
# Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen
def _collect(tag_list: Any) -> None:
nonlocal title, all_tags_str
if not isinstance(tag_list, list):
return
for tag in tag_list:
tag_text = str(tag) if tag else ""
if not tag_text:
continue
all_tags.append(tag_text)
all_tags_str += " " + tag_text.lower()
if tag_text.lower().startswith("title:") and title == f"Hydrus File {file_id}":
title = tag_text.split(":", 1)[1].strip()
for _service_name, service_tags in tags_set.items():
if not isinstance(service_tags, dict):
continue
storage_tags = service_tags.get("storage_tags", {})
if isinstance(storage_tags, dict):
for tag_list in storage_tags.values():
_collect(tag_list)
display_tags = service_tags.get("display_tags", [])
_collect(display_tags)
# Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it)
top_level_tags = meta.get("tags_flat", []) or meta.get("tags", [])
_collect(top_level_tags)
# Prefer Hydrus-provided extension (e.g. ".webm"); fall back to MIME map.
mime_type = meta.get("mime")
ext = str(meta.get("ext") or "").strip().lstrip('.')
if not ext and mime_type:
for category in mime_maps.values():
for _ext_key, info in category.items():
if mime_type in info.get("mimes", []):
ext = str(info.get("ext", "")).strip().lstrip('.')
break
if ext:
break
# Filter results based on query type
# If user provided explicit namespace (has ':'), don't do substring filtering
# Just include what the tag search returned
has_namespace = ':' in query_lower
if has_namespace:
# Explicit namespace search - already filtered by Hydrus tag search
# Include this result as-is
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append({
"hash": hash_hex,
"url": file_url,
"name": title,
"title": title,
"size": size,
"size_bytes": size,
"store": self.NAME,
"tag": all_tags,
"file_id": file_id,
"mime": mime_type,
"ext": ext,
})
else:
# Free-form search: check if search terms match the title or tags
# Match if ALL search terms are found in title or tags (AND logic)
# AND use whole word matching
# Combine title and tags for searching
searchable_text = (title + " " + all_tags_str).lower()
match = True
if query_lower != "*":
for term in search_terms:
# Regex for whole word: \bterm\b
# Escape term to handle special chars
pattern = r'\b' + re.escape(term) + r'\b'
if not re.search(pattern, searchable_text):
match = False
break
if match:
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append({
"hash": hash_hex,
"url": file_url,
"name": title,
"title": title,
"size": size,
"size_bytes": size,
"store": self.NAME,
"tag": all_tags,
"file_id": file_id,
"mime": mime_type,
"ext": ext,
})
debug(f"{prefix} {len(results)} result(s)")
return results[:limit]
except Exception as exc:
log(f"❌ Hydrus search failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
raise
def get_file(self, file_hash: str, **kwargs: Any) -> Path | str | None:
"""Return a browser URL for the file.
IMPORTANT: this method must be side-effect free (do not auto-open a browser).
Only explicit user actions (e.g. the get-file cmdlet) should open files.
"""
debug(f"{self._log_prefix()} get_file: start hash={file_hash[:12]}...")
# Build browser URL with access key
base_url = str(self.URL).rstrip('/')
access_key = str(self.API)
browser_url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
debug(f"{self._log_prefix()} get_file: url={browser_url}")
return browser_url
def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
"""Get metadata for a file from Hydrus by hash.
Args:
file_hash: SHA256 hash of the file (64-char hex string)
Returns:
Dict with metadata fields or None if not found
"""
try:
client = self._client
if not client:
debug(f"{self._log_prefix()} get_metadata: client unavailable")
return None
# Fetch file metadata with the fields we need for CLI display.
payload = client.fetch_file_metadata(
hashes=[file_hash],
include_service_keys_to_tags=True,
include_file_url=True,
include_duration=True,
include_size=True,
include_mime=True,
)
if not payload or not payload.get("metadata"):
return None
meta = payload["metadata"][0]
# Hydrus can return placeholder metadata rows for unknown hashes.
if not isinstance(meta, dict) or meta.get("file_id") is None:
return None
# Extract title from tags
title = f"Hydrus_{file_hash[:12]}"
tags_payload = meta.get("tags", {})
if isinstance(tags_payload, dict):
for service_data in tags_payload.values():
if isinstance(service_data, dict):
display_tags = service_data.get("display_tags", {})
if isinstance(display_tags, dict):
current_tags = display_tags.get("0", [])
if isinstance(current_tags, list):
for tag in current_tags:
if str(tag).lower().startswith("title:"):
title = tag.split(":", 1)[1].strip()
break
if title != f"Hydrus_{file_hash[:12]}":
break
# Hydrus may return mime as an int enum, or sometimes a human label.
mime_val = meta.get("mime")
filetype_human = meta.get("filetype_human") or meta.get("mime_human") or meta.get("mime_string")
# Determine ext: prefer Hydrus metadata ext, then filetype_human (when it looks like an ext),
# then title suffix, then file path suffix.
ext = str(meta.get("ext") or "").strip().lstrip(".")
if not ext:
ft = str(filetype_human or "").strip().lstrip(".").lower()
if ft and ft != "unknown filetype" and ft.isalnum() and len(ft) <= 8:
# Treat simple labels like "mp4", "m4a", "webm" as extensions.
ext = ft
if not ext and isinstance(title, str) and "." in title:
try:
ext = Path(title).suffix.lstrip(".")
except Exception:
ext = ""
if not ext:
try:
path_payload = client.get_file_path(file_hash)
if isinstance(path_payload, dict):
p = path_payload.get("path")
if isinstance(p, str) and p.strip():
ext = Path(p.strip()).suffix.lstrip(".")
except Exception:
ext = ""
# If extension is still unknown, attempt a best-effort lookup from MIME.
def _mime_from_ext(ext_value: str) -> str:
ext_clean = str(ext_value or "").strip().lstrip(".").lower()
if not ext_clean:
return ""
try:
for category in mime_maps.values():
info = category.get(ext_clean)
if isinstance(info, dict):
mimes = info.get("mimes")
if isinstance(mimes, list) and mimes:
first = mimes[0]
return str(first)
except Exception:
return ""
return ""
# Normalize to a MIME string for CLI output.
# Avoid passing through human labels like "unknown filetype".
mime_type = ""
if isinstance(mime_val, str):
candidate = mime_val.strip()
if "/" in candidate and candidate.lower() != "unknown filetype":
mime_type = candidate
if not mime_type and isinstance(filetype_human, str):
candidate = filetype_human.strip()
if "/" in candidate and candidate.lower() != "unknown filetype":
mime_type = candidate
if not mime_type:
mime_type = _mime_from_ext(ext)
# Normalize size/duration to stable scalar types.
size_val = meta.get("size")
if size_val is None:
size_val = meta.get("size_bytes")
try:
size_int: int | None = int(size_val) if size_val is not None else None
except Exception:
size_int = None
dur_val = meta.get("duration")
if dur_val is None:
dur_val = meta.get("duration_ms")
try:
dur_int: int | None = int(dur_val) if dur_val is not None else None
except Exception:
dur_int = None
raw_urls = (
meta.get("known_urls")
or meta.get("urls")
or meta.get("url")
or []
)
url_list: list[str] = []
if isinstance(raw_urls, str):
s = raw_urls.strip()
url_list = [s] if s else []
elif isinstance(raw_urls, list):
url_list = [str(u).strip() for u in raw_urls if isinstance(u, str) and str(u).strip()]
return {
"hash": file_hash,
"title": title,
"ext": ext,
"size": size_int,
"mime": mime_type,
# Keep raw fields available for troubleshooting/other callers.
"hydrus_mime": mime_val,
"filetype_human": filetype_human,
"duration_ms": dur_int,
"url": url_list,
}
except Exception as exc:
debug(f"{self._log_prefix()} get_metadata failed: {exc}")
return None
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
"""Get tags for a file from Hydrus by hash.
Args:
file_identifier: File hash (SHA256 hex string)
**kwargs: Optional service_name parameter
Returns:
Tuple of (tags_list, source_description)
where source is always "hydrus"
"""
try:
from API import HydrusNetwork as hydrus_wrapper
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
debug(f"{self._log_prefix()} get_tags: invalid file hash '{file_identifier}'")
return [], "unknown"
# Get Hydrus client and service info
client = self._client
if not client:
debug(f"{self._log_prefix()} get_tags: client unavailable")
return [], "unknown"
# Fetch file metadata
payload = client.fetch_file_metadata(
hashes=[file_hash],
include_service_keys_to_tags=True,
include_file_url=False
)
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
debug(f"{self._log_prefix()} get_tags: no metadata for hash {file_hash}")
return [], "unknown"
meta = items[0] if isinstance(items[0], dict) else None
if not isinstance(meta, dict) or meta.get("file_id") is None:
debug(f"{self._log_prefix()} get_tags: invalid metadata for hash {file_hash}")
return [], "unknown"
# Extract tags using service name
service_name = "my tags"
service_key = hydrus_wrapper.get_tag_service_key(client, service_name)
# Extract tags from metadata
tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name)
return tags, "hydrus"
except Exception as exc:
debug(f"{self._log_prefix()} get_tags failed: {exc}")
return [], "unknown"
def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
"""Add tags to a Hydrus file.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} add_tag: client unavailable")
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
debug(f"{self._log_prefix()} add_tag: invalid file hash '{file_identifier}'")
return False
service_name = kwargs.get("service_name") or "my tags"
# Ensure tags is a list
tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
if not tag_list:
return False
client.add_tag(file_hash, tag_list, service_name)
return True
except Exception as exc:
debug(f"{self._log_prefix()} add_tag failed: {exc}")
return False
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
"""Delete tags from a Hydrus file.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} delete_tag: client unavailable")
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
debug(f"{self._log_prefix()} delete_tag: invalid file hash '{file_identifier}'")
return False
service_name = kwargs.get("service_name") or "my tags"
tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
if not tag_list:
return False
client.delete_tag(file_hash, tag_list, service_name)
return True
except Exception as exc:
debug(f"{self._log_prefix()} delete_tag failed: {exc}")
return False
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
"""Get known url for a Hydrus file.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} get_url: client unavailable")
return []
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
return []
payload = client.fetch_file_metadata(hashes=[file_hash], include_file_url=False)
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
return []
meta = items[0] if isinstance(items[0], dict) else {}
raw_urls: Any = (
meta.get("known_urls")
or meta.get("urls")
or meta.get("url")
or []
)
if isinstance(raw_urls, str):
val = raw_urls.strip()
return [val] if val else []
if isinstance(raw_urls, list):
out: list[str] = []
for u in raw_urls:
if not isinstance(u, str):
continue
u = u.strip()
if u:
out.append(u)
return out
return []
except Exception as exc:
debug(f"{self._log_prefix()} get_url failed: {exc}")
return []
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Associate one or more url with a Hydrus file.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} add_url: client unavailable")
return False
for u in url:
client.associate_url(file_identifier, u)
return True
except Exception as exc:
debug(f"{self._log_prefix()} add_url failed: {exc}")
return False
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Delete one or more url from a Hydrus file.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} delete_url: client unavailable")
return False
for u in url:
client.delete_url(file_identifier, u)
return True
except Exception as exc:
debug(f"{self._log_prefix()} delete_url failed: {exc}")
return False
def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
"""Get notes for a Hydrus file (default note service only)."""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} get_note: client unavailable")
return {}
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
return {}
payload = client.fetch_file_metadata(hashes=[file_hash], include_notes=True)
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
return {}
meta = items[0] if isinstance(items[0], dict) else None
if not isinstance(meta, dict):
return {}
notes_payload = meta.get("notes")
if isinstance(notes_payload, dict):
return {str(k): str(v or "") for k, v in notes_payload.items() if str(k).strip()}
return {}
except Exception as exc:
debug(f"{self._log_prefix()} get_note failed: {exc}")
return {}
def set_note(self, file_identifier: str, name: str, text: str, **kwargs: Any) -> bool:
"""Set a named note for a Hydrus file (default note service only)."""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} set_note: client unavailable")
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
return False
note_name = str(name or "").strip()
if not note_name:
return False
note_text = str(text or "")
client.set_notes(file_hash, {note_name: note_text})
return True
except Exception as exc:
debug(f"{self._log_prefix()} set_note failed: {exc}")
return False
def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
"""Delete a named note for a Hydrus file (default note service only)."""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} delete_note: client unavailable")
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
return False
note_name = str(name or "").strip()
if not note_name:
return False
client.delete_notes(file_hash, [note_name])
return True
except Exception as exc:
debug(f"{self._log_prefix()} delete_note failed: {exc}")
return False
@staticmethod
def _extract_tags_from_hydrus_meta(
meta: Dict[str, Any],
service_key: Optional[str],
service_name: str
) -> List[str]:
"""Extract current tags from Hydrus metadata dict.
Prefers display_tags (includes siblings/parents, excludes deleted).
Falls back to storage_tags status '0' (current).
"""
tags_payload = meta.get("tags")
if not isinstance(tags_payload, dict):
return []
svc_data = None
if service_key:
svc_data = tags_payload.get(service_key)
if not isinstance(svc_data, dict):
return []
# Prefer display_tags (Hydrus computes siblings/parents)
display = svc_data.get("display_tags")
if isinstance(display, list) and display:
return [str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip()]
# Fallback to storage_tags status '0' (current)
storage = svc_data.get("storage_tags")
if isinstance(storage, dict):
current_list = storage.get("0") or storage.get(0)
if isinstance(current_list, list):
return [str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()]
return []