Files
Medios-Macina/Store/HydrusNetwork.py
Nose fcdd507d00
Some checks failed
smoke-mm / Install & smoke test mm --help (push) Has been cancelled
df
2025-12-27 14:50:59 -08:00

1488 lines
65 KiB
Python
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import re
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
import httpx
from SYS.logger import debug, log
from SYS.utils_constant import mime_maps
from Store._base import Store
_HYDRUS_INIT_CHECK_CACHE: dict[tuple[str, str], tuple[bool, Optional[str]]] = {}
class HydrusNetwork(Store):
"""File storage backend for Hydrus client.
Each instance represents a specific Hydrus client connection.
Maintains its own HydrusClient.
"""
def _log_prefix(self) -> str:
store_name = getattr(self, "NAME", None) or "unknown"
return f"[hydrusnetwork:{store_name}]"
def __new__(cls, *args: Any, **kwargs: Any) -> "HydrusNetwork":
instance = super().__new__(cls)
name = kwargs.get("NAME")
api = kwargs.get("API")
url = kwargs.get("URL")
if name is not None:
setattr(instance, "NAME", str(name))
if api is not None:
setattr(instance, "API", str(api))
if url is not None:
setattr(instance, "URL", str(url))
return instance
setattr(__new__, "keys", ("NAME", "API", "URL"))
def __init__(
self,
instance_name: Optional[str] = None,
api_key: Optional[str] = None,
url: Optional[str] = None,
*,
NAME: Optional[str] = None,
API: Optional[str] = None,
URL: Optional[str] = None,
) -> None:
"""Initialize Hydrus storage backend.
Args:
instance_name: Name of this Hydrus instance (e.g., 'home', 'work')
api_key: Hydrus Client API access key
url: Hydrus client URL (e.g., 'http://192.168.1.230:45869')
"""
from API.HydrusNetwork import HydrusNetwork as HydrusClient
if instance_name is None and NAME is not None:
instance_name = str(NAME)
if api_key is None and API is not None:
api_key = str(API)
if url is None and URL is not None:
url = str(URL)
if not instance_name or not api_key or not url:
raise ValueError("HydrusNetwork requires NAME, API, and URL")
self.NAME = instance_name
self.API = api_key
self.URL = url.rstrip("/")
# Total count (best-effort, used for startup diagnostics)
self.total_count: Optional[int] = None
# Self health-check: validate the URL is reachable and the access key is accepted.
# This MUST NOT attempt to acquire a session key.
cache_key = (self.URL, self.API)
cached = _HYDRUS_INIT_CHECK_CACHE.get(cache_key)
if cached is not None:
ok, err = cached
if not ok:
raise RuntimeError(f"Hydrus '{self.NAME}' unavailable: {err or 'Unavailable'}")
else:
api_version_url = f"{self.URL}/api_version"
verify_key_url = f"{self.URL}/verify_access_key"
try:
with httpx.Client(timeout=5.0, verify=False, follow_redirects=True) as client:
version_resp = client.get(api_version_url)
version_resp.raise_for_status()
version_payload = version_resp.json()
if not isinstance(version_payload, dict):
raise RuntimeError("Hydrus /api_version returned an unexpected response")
verify_resp = client.get(
verify_key_url,
headers={"Hydrus-Client-API-Access-Key": self.API},
)
verify_resp.raise_for_status()
verify_payload = verify_resp.json()
if not isinstance(verify_payload, dict):
raise RuntimeError("Hydrus /verify_access_key returned an unexpected response")
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (True, None)
except Exception as exc:
err = str(exc)
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (False, err)
raise RuntimeError(f"Hydrus '{self.NAME}' unavailable: {err}") from exc
# Create a persistent client for this instance (auth via access key by default).
self._client = HydrusClient(url=self.URL, access_key=self.API, instance_name=self.NAME)
# Best-effort total count (used for startup diagnostics). Avoid heavy payloads.
# Some Hydrus setups appear to return no count via the CBOR client for this endpoint,
# so prefer a direct JSON request with a short timeout.
try:
self.get_total_count(refresh=True)
except Exception:
pass
def get_total_count(self, *, refresh: bool = False) -> Optional[int]:
"""Best-effort total file count for this Hydrus instance.
Intended for diagnostics (e.g., REPL startup checks). This should be fast,
and it MUST NOT raise.
"""
if self.total_count is not None and not refresh:
return self.total_count
# 1) Prefer a direct JSON request (fast + avoids CBOR edge cases).
try:
import json as _json
url = f"{self.URL}/get_files/search_files"
params = {
"tags": _json.dumps(["system:everything"]),
"return_hashes": "false",
"return_file_ids": "false",
"return_file_count": "true",
}
headers = {
"Hydrus-Client-API-Access-Key": self.API,
"Accept": "application/json",
}
with httpx.Client(timeout=5.0, verify=False, follow_redirects=True) as client:
resp = client.get(url, params=params, headers=headers)
resp.raise_for_status()
payload = resp.json()
count_val = None
if isinstance(payload, dict):
count_val = payload.get("file_count")
if count_val is None:
count_val = payload.get("file_count_inclusive")
if count_val is None:
count_val = payload.get("num_files")
if isinstance(count_val, int):
self.total_count = count_val
return self.total_count
except Exception as exc:
debug(f"{self._log_prefix()} total count (json) unavailable: {exc}", file=sys.stderr)
# 2) Fallback to the API client (CBOR).
try:
payload = self._client.search_files(
tags=["system:everything"],
return_hashes=False,
return_file_ids=False,
return_file_count=True,
)
count_val = None
if isinstance(payload, dict):
count_val = payload.get("file_count")
if count_val is None:
count_val = payload.get("file_count_inclusive")
if count_val is None:
count_val = payload.get("num_files")
if isinstance(count_val, int):
self.total_count = count_val
return self.total_count
except Exception as exc:
debug(f"{self._log_prefix()} total count (client) unavailable: {exc}", file=sys.stderr)
return self.total_count
def name(self) -> str:
return self.NAME
def get_name(self) -> str:
return self.NAME
def add_file(self, file_path: Path, **kwargs: Any) -> str:
"""Upload file to Hydrus with full metadata support.
Args:
file_path: Path to the file to upload
tag: Optional list of tag values to add
url: Optional list of url to associate with the file
title: Optional title (will be added as 'title:value' tag)
Returns:
File hash from Hydrus
Raises:
Exception: If upload fails
"""
from SYS.utils import sha256_file
tag_list = kwargs.get("tag", [])
url = kwargs.get("url", [])
title = kwargs.get("title")
# Add title to tags if provided and not already present
if title:
title_tag = f"title:{title}".strip().lower()
if not any(str(candidate).lower().startswith("title:") for candidate in tag_list):
tag_list = [title_tag] + list(tag_list)
# Hydrus is lowercase-only tags; normalize here for consistency.
tag_list = [str(t).strip().lower() for t in (tag_list or []) if isinstance(t, str) and str(t).strip()]
try:
# Compute file hash
file_hash = sha256_file(file_path)
debug(f"{self._log_prefix()} file hash: {file_hash}")
# Use persistent client with session key
client = self._client
if client is None:
raise Exception("Hydrus client unavailable")
# Check if file already exists in Hydrus
file_exists = False
try:
metadata = client.fetch_file_metadata(
hashes=[file_hash],
include_service_keys_to_tags=False,
include_file_url=False,
include_duration=False,
include_size=False,
include_mime=False,
)
if metadata and isinstance(metadata, dict):
metas = metadata.get("metadata", [])
if isinstance(metas, list) and metas:
# Hydrus returns placeholder rows for unknown hashes.
# Only treat as a real duplicate if it has a concrete file_id.
for meta in metas:
if isinstance(meta, dict) and meta.get("file_id") is not None:
file_exists = True
break
if file_exists:
log(
f" Duplicate detected - file already in Hydrus with hash: {file_hash}",
file=sys.stderr,
)
except Exception:
pass
# If Hydrus reports an existing file, it may be in trash. Best-effort restore it to 'my files'.
# This keeps behavior aligned with user expectation: "use API only" and ensure it lands in my files.
if file_exists:
try:
client.undelete_files([file_hash])
except Exception:
pass
# Upload file if not already present
if not file_exists:
log(f"{self._log_prefix()} Uploading: {file_path.name}", file=sys.stderr)
response = client.add_file(file_path)
# Extract hash from response
hydrus_hash: Optional[str] = None
if isinstance(response, dict):
hydrus_hash = response.get("hash") or response.get("file_hash")
if not hydrus_hash:
hashes = response.get("hashes")
if isinstance(hashes, list) and hashes:
hydrus_hash = hashes[0]
if not hydrus_hash:
raise Exception(f"Hydrus response missing file hash: {response}")
file_hash = hydrus_hash
log(f"{self._log_prefix()} hash: {file_hash}", file=sys.stderr)
# Add tags if provided (both for new and existing files)
if tag_list:
try:
# Use default tag service
service_name = "my tags"
except Exception:
service_name = "my tags"
try:
debug(f"{self._log_prefix()} Adding {len(tag_list)} tag(s): {tag_list}")
client.add_tag(file_hash, tag_list, service_name)
log(f"{self._log_prefix()} Tags added via '{service_name}'", file=sys.stderr)
except Exception as exc:
log(f"{self._log_prefix()} ⚠️ Failed to add tags: {exc}", file=sys.stderr)
# Associate url if provided (both for new and existing files)
if url:
log(f"{self._log_prefix()} Associating {len(url)} URL(s) with file", file=sys.stderr)
for url in url:
if url:
try:
client.associate_url(file_hash, str(url))
debug(f"{self._log_prefix()} Associated URL: {url}")
except Exception as exc:
log(f"{self._log_prefix()} ⚠️ Failed to associate URL {url}: {exc}", file=sys.stderr)
return file_hash
except Exception as exc:
log(f"{self._log_prefix()} ❌ upload failed: {exc}", file=sys.stderr)
raise
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
"""Search Hydrus database for files matching query.
Args:
query: Search query (tags, filenames, hashes, etc.)
limit: Maximum number of results to return (default: 100)
Returns:
List of dicts with 'name', 'hash', 'size', 'tags' fields
Example:
results = storage["hydrus"].search("artist:john_doe music")
results = storage["hydrus"].search("Simple Man")
"""
limit = kwargs.get("limit", 100)
try:
client = self._client
if client is None:
raise Exception("Hydrus client unavailable")
prefix = self._log_prefix()
debug(f"{prefix} Searching for: {query}")
def _extract_urls(meta_obj: Any) -> list[str]:
if not isinstance(meta_obj, dict):
return []
raw = meta_obj.get("url")
if raw is None:
raw = meta_obj.get("urls")
if isinstance(raw, str):
val = raw.strip()
return [val] if val else []
if isinstance(raw, list):
out: list[str] = []
for item in raw:
if not isinstance(item, str):
continue
s = item.strip()
if s:
out.append(s)
return out
return []
def _iter_url_filtered_metadata(url_value: str | None, want_any: bool, fetch_limit: int) -> list[dict[str, Any]]:
"""Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
# First try a fast system predicate if Hydrus supports it.
candidate_file_ids: list[int] = []
try:
if want_any:
predicate = "system:has url"
url_search = client.search_files(
tags=[predicate],
return_hashes=False,
return_file_ids=True,
return_file_count=False,
)
ids = url_search.get("file_ids", []) if isinstance(url_search, dict) else []
if isinstance(ids, list):
candidate_file_ids = [int(x) for x in ids if isinstance(x, (int, float, str)) and str(x).strip().isdigit()]
except Exception:
candidate_file_ids = []
if not candidate_file_ids:
# Fallback: scan from system:everything and filter by URL substring.
everything = client.search_files(
tags=["system:everything"],
return_hashes=False,
return_file_ids=True,
return_file_count=False,
)
ids = everything.get("file_ids", []) if isinstance(everything, dict) else []
if isinstance(ids, list):
candidate_file_ids = [int(x) for x in ids if isinstance(x, (int, float))]
if not candidate_file_ids:
return []
needle = (url_value or "").strip().lower()
chunk_size = 200
out: list[dict[str, Any]] = []
for start in range(0, len(candidate_file_ids), chunk_size):
if len(out) >= fetch_limit:
break
chunk = candidate_file_ids[start : start + chunk_size]
try:
payload = client.fetch_file_metadata(
file_ids=chunk,
include_file_url=True,
include_service_keys_to_tags=True,
include_duration=True,
include_size=True,
include_mime=True,
)
except Exception:
continue
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
if not isinstance(metas, list):
continue
for meta in metas:
if not isinstance(meta, dict):
continue
urls = _extract_urls(meta)
if not urls:
continue
if want_any:
out.append(meta)
if len(out) >= fetch_limit:
break
continue
if not needle:
continue
if any(needle in u.lower() for u in urls):
out.append(meta)
if len(out) >= fetch_limit:
break
return out
query_lower = query.lower().strip()
# Support `ext:<value>` anywhere in the query. We filter results by the
# Hydrus metadata extension field.
def _normalize_ext_filter(value: str) -> str:
v = str(value or "").strip().lower().lstrip('.')
v = "".join(ch for ch in v if ch.isalnum())
return v
ext_filter: str | None = None
ext_only: bool = False
try:
m = re.search(r"\bext:([^\s,]+)", query_lower)
if not m:
m = re.search(r"\bextension:([^\s,]+)", query_lower)
if m:
ext_filter = _normalize_ext_filter(m.group(1)) or None
query_lower = re.sub(r"\s*\b(?:ext|extension):[^\s,]+", " ", query_lower)
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(',')
query = query_lower
if ext_filter and not query_lower:
query = "*"
query_lower = "*"
ext_only = True
except Exception:
ext_filter = None
ext_only = False
# Split into meaningful terms for AND logic.
# Avoid punctuation tokens like '-' that would make matching brittle.
search_terms = [t for t in re.findall(r"[a-z0-9]+", query_lower) if t]
# Special case: url:* and url:<value>
metadata_list: list[dict[str, Any]] | None = None
if ":" in query_lower and not query_lower.startswith(":"):
namespace, pattern = query_lower.split(":", 1)
namespace = namespace.strip().lower()
pattern = pattern.strip()
if namespace == "url":
if not pattern or pattern == "*":
metadata_list = _iter_url_filtered_metadata(None, want_any=True, fetch_limit=int(limit) if limit else 100)
else:
# Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
try:
if pattern.startswith("http://") or pattern.startswith("https://"):
from API.HydrusNetwork import HydrusRequestSpec
spec = HydrusRequestSpec(method="GET", endpoint="/add_urls/get_url_files", query={"url": pattern})
response = client._perform_request(spec) # type: ignore[attr-defined]
hashes: list[str] = []
file_ids: list[int] = []
if isinstance(response, dict):
raw_hashes = response.get("hashes") or response.get("file_hashes")
if isinstance(raw_hashes, list):
hashes = [str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()]
raw_ids = response.get("file_ids")
if isinstance(raw_ids, list):
for item in raw_ids:
try:
file_ids.append(int(item))
except (TypeError, ValueError):
continue
if file_ids:
payload = client.fetch_file_metadata(
file_ids=file_ids,
include_file_url=True,
include_service_keys_to_tags=True,
include_duration=True,
include_size=True,
include_mime=True,
)
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
if isinstance(metas, list):
metadata_list = [m for m in metas if isinstance(m, dict)]
elif hashes:
payload = client.fetch_file_metadata(
hashes=hashes,
include_file_url=True,
include_service_keys_to_tags=True,
include_duration=True,
include_size=True,
include_mime=True,
)
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
if isinstance(metas, list):
metadata_list = [m for m in metas if isinstance(m, dict)]
except Exception:
metadata_list = None
# Fallback: substring scan
if metadata_list is None:
metadata_list = _iter_url_filtered_metadata(pattern, want_any=False, fetch_limit=int(limit) if limit else 100)
# Parse the query into tags
# "*" means "match all" - use system:everything tag in Hydrus
# If query has explicit namespace, use it as a tag search.
# If query is free-form, search BOTH:
# - title:*term* (title: is the only namespace searched implicitly)
# - *term* (freeform tags; we will filter out other namespace matches client-side)
tags: list[str] = []
freeform_union_search: bool = False
title_predicates: list[str] = []
freeform_predicates: list[str] = []
if query.strip() == "*":
tags = ["system:everything"]
elif ':' in query_lower:
tags = [query_lower]
else:
freeform_union_search = True
if search_terms:
# Hydrus supports wildcard matching primarily as a prefix (e.g., tag*).
# Use per-term prefix matching for both title: and freeform tags.
title_predicates = [f"title:{term}*" for term in search_terms]
freeform_predicates = [f"{term}*" for term in search_terms]
else:
# If we can't extract alnum terms, fall back to the raw query text.
title_predicates = [f"title:{query_lower}*"]
freeform_predicates = [f"{query_lower}*"]
# Search files with the tags (unless url: search already produced metadata)
results = []
def _extract_search_ids(payload: Any) -> tuple[list[int], list[str]]:
if not isinstance(payload, dict):
return [], []
raw_ids = payload.get("file_ids", [])
raw_hashes = payload.get("hashes", [])
ids_out: list[int] = []
hashes_out: list[str] = []
if isinstance(raw_ids, list):
for item in raw_ids:
try:
ids_out.append(int(item))
except (TypeError, ValueError):
continue
if isinstance(raw_hashes, list):
hashes_out = [str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()]
return ids_out, hashes_out
if metadata_list is None:
file_ids: list[int] = []
hashes: list[str] = []
if freeform_union_search:
if not title_predicates and not freeform_predicates:
debug(f"{prefix} 0 result(s)")
return []
payloads: list[Any] = []
try:
payloads.append(
client.search_files(
tags=title_predicates,
return_hashes=True,
return_file_ids=True,
)
)
except Exception:
pass
try:
payloads.append(
client.search_files(
tags=freeform_predicates,
return_hashes=True,
return_file_ids=True,
)
)
except Exception:
pass
id_set: set[int] = set()
hash_set: set[str] = set()
for payload in payloads:
ids_part, hashes_part = _extract_search_ids(payload)
for fid in ids_part:
id_set.add(fid)
for hh in hashes_part:
hash_set.add(hh)
file_ids = list(id_set)
hashes = list(hash_set)
else:
if not tags:
debug(f"{prefix} 0 result(s)")
return []
search_result = client.search_files(
tags=tags,
return_hashes=True,
return_file_ids=True
)
file_ids, hashes = _extract_search_ids(search_result)
# Fast path: ext-only search. Avoid fetching metadata for an unbounded
# system:everything result set; fetch in chunks until we have enough.
if ext_only and ext_filter:
results: list[dict[str, Any]] = []
if not file_ids and not hashes:
debug(f"{prefix} 0 result(s)")
return []
# Prefer file_ids if available.
if file_ids:
chunk_size = 200
for start in range(0, len(file_ids), chunk_size):
if len(results) >= limit:
break
chunk = file_ids[start : start + chunk_size]
try:
payload = client.fetch_file_metadata(
file_ids=chunk,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=True,
include_size=True,
include_mime=True,
)
except Exception:
continue
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
if not isinstance(metas, list):
continue
for meta in metas:
if len(results) >= limit:
break
if not isinstance(meta, dict):
continue
mime_type = meta.get("mime")
ext = str(meta.get("ext") or "").strip().lstrip('.')
if not ext and mime_type:
for category in mime_maps.values():
for _ext_key, info in category.items():
if mime_type in info.get("mimes", []):
ext = str(info.get("ext", "")).strip().lstrip('.')
break
if ext:
break
if _normalize_ext_filter(ext) != ext_filter:
continue
file_id = meta.get("file_id")
hash_hex = meta.get("hash")
size = meta.get("size", 0)
tags_set = meta.get("tags", {})
all_tags: list[str] = []
title = f"Hydrus File {file_id}"
if isinstance(tags_set, dict):
def _collect(tag_list: Any) -> None:
nonlocal title
if not isinstance(tag_list, list):
return
for tag in tag_list:
tag_text = str(tag) if tag else ""
if not tag_text:
continue
tag_l = tag_text.strip().lower()
if not tag_l:
continue
all_tags.append(tag_l)
if tag_l.startswith("title:") and title == f"Hydrus File {file_id}":
title = tag_l.split(":", 1)[1].strip()
for _service_name, service_tags in tags_set.items():
if not isinstance(service_tags, dict):
continue
storage_tags = service_tags.get("storage_tags", {})
if isinstance(storage_tags, dict):
for tag_list in storage_tags.values():
_collect(tag_list)
display_tags = service_tags.get("display_tags", [])
_collect(display_tags)
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append(
{
"hash": hash_hex,
"url": file_url,
"name": title,
"title": title,
"size": size,
"size_bytes": size,
"store": self.NAME,
"tag": all_tags,
"file_id": file_id,
"mime": mime_type,
"ext": ext,
}
)
debug(f"{prefix} {len(results)} result(s)")
return results[:limit]
# If we only got hashes, fall back to the normal flow below.
if not file_ids and not hashes:
debug(f"{prefix} 0 result(s)")
return []
if file_ids:
metadata = client.fetch_file_metadata(
file_ids=file_ids,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=True,
include_size=True,
include_mime=True,
)
metadata_list = metadata.get("metadata", [])
elif hashes:
metadata = client.fetch_file_metadata(
hashes=hashes,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=True,
include_size=True,
include_mime=True,
)
metadata_list = metadata.get("metadata", [])
else:
metadata_list = []
# If our free-text searches produce nothing (or nothing survived downstream filtering), fallback to scanning.
if (not metadata_list) and (query_lower != "*") and (":" not in query_lower):
try:
search_result = client.search_files(
tags=["system:everything"],
return_hashes=True,
return_file_ids=True,
)
file_ids, hashes = _extract_search_ids(search_result)
if file_ids:
metadata = client.fetch_file_metadata(
file_ids=file_ids,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=True,
include_size=True,
include_mime=True,
)
metadata_list = metadata.get("metadata", [])
elif hashes:
metadata = client.fetch_file_metadata(
hashes=hashes,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=True,
include_size=True,
include_mime=True,
)
metadata_list = metadata.get("metadata", [])
except Exception:
pass
if not isinstance(metadata_list, list):
metadata_list = []
for meta in metadata_list:
if len(results) >= limit:
break
file_id = meta.get("file_id")
hash_hex = meta.get("hash")
size = meta.get("size", 0)
# Get tags for this file and extract title
tags_set = meta.get("tags", {})
all_tags = []
title = f"Hydrus File {file_id}" # Default fallback
all_tags_str = "" # For substring matching
# debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}")
if isinstance(tags_set, dict):
# Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen
def _collect(tag_list: Any) -> None:
nonlocal title, all_tags_str
if not isinstance(tag_list, list):
return
for tag in tag_list:
tag_text = str(tag) if tag else ""
if not tag_text:
continue
tag_l = tag_text.strip().lower()
if not tag_l:
continue
all_tags.append(tag_l)
all_tags_str += " " + tag_l
if tag_l.startswith("title:") and title == f"Hydrus File {file_id}":
title = tag_l.split(":", 1)[1].strip()
for _service_name, service_tags in tags_set.items():
if not isinstance(service_tags, dict):
continue
storage_tags = service_tags.get("storage_tags", {})
if isinstance(storage_tags, dict):
for tag_list in storage_tags.values():
_collect(tag_list)
display_tags = service_tags.get("display_tags", [])
_collect(display_tags)
# Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it)
top_level_tags = meta.get("tags_flat", []) or meta.get("tags", [])
_collect(top_level_tags)
# Prefer Hydrus-provided extension (e.g. ".webm"); fall back to MIME map.
mime_type = meta.get("mime")
ext = str(meta.get("ext") or "").strip().lstrip('.')
if not ext and mime_type:
for category in mime_maps.values():
for _ext_key, info in category.items():
if mime_type in info.get("mimes", []):
ext = str(info.get("ext", "")).strip().lstrip('.')
break
if ext:
break
# Filter results based on query type
# If user provided explicit namespace (has ':'), don't do substring filtering
# Just include what the tag search returned
has_namespace = ':' in query_lower
if has_namespace:
# Explicit namespace search - already filtered by Hydrus tag search
# Include this result as-is
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append({
"hash": hash_hex,
"url": file_url,
"name": title,
"title": title,
"size": size,
"size_bytes": size,
"store": self.NAME,
"tag": all_tags,
"file_id": file_id,
"mime": mime_type,
"ext": ext,
})
else:
# Free-form search: check if search terms match title or FREEFORM tags.
# Do NOT implicitly match other namespace tags (except title:).
freeform_tags = [t for t in all_tags if isinstance(t, str) and t and (":" not in t)]
searchable_text = (title + " " + " ".join(freeform_tags)).lower()
match = True
if query_lower != "*" and search_terms:
for term in search_terms:
if term not in searchable_text:
match = False
break
if match:
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append({
"hash": hash_hex,
"url": file_url,
"name": title,
"title": title,
"size": size,
"size_bytes": size,
"store": self.NAME,
"tag": all_tags,
"file_id": file_id,
"mime": mime_type,
"ext": ext,
})
debug(f"{prefix} {len(results)} result(s)")
if ext_filter:
wanted = ext_filter
filtered: list[dict[str, Any]] = []
for item in results:
try:
if _normalize_ext_filter(str(item.get("ext") or "")) == wanted:
filtered.append(item)
except Exception:
continue
results = filtered
return results[:limit]
except Exception as exc:
log(f"❌ Hydrus search failed: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
raise
def get_file(self, file_hash: str, **kwargs: Any) -> Path | str | None:
"""Return a browser URL for the file.
IMPORTANT: this method must be side-effect free (do not auto-open a browser).
Only explicit user actions (e.g. the get-file cmdlet) should open files.
"""
debug(f"{self._log_prefix()} get_file: start hash={file_hash[:12]}...")
# Build browser URL with access key
base_url = str(self.URL).rstrip('/')
access_key = str(self.API)
browser_url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
debug(f"{self._log_prefix()} get_file: url={browser_url}")
return browser_url
def delete_file(self, file_identifier: str, **kwargs: Any) -> bool:
"""Delete a file from Hydrus, then clear the deletion record.
This is used by the delete-file cmdlet when the item belongs to a HydrusNetwork store.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} delete_file: client unavailable")
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
debug(f"{self._log_prefix()} delete_file: invalid file hash '{file_identifier}'")
return False
reason = kwargs.get("reason")
reason_text = str(reason).strip() if isinstance(reason, str) and reason.strip() else None
# 1) Delete file
client.delete_files([file_hash], reason=reason_text)
# 2) Clear deletion record (best-effort)
try:
client.clear_file_deletion_record([file_hash])
except Exception as exc:
debug(f"{self._log_prefix()} delete_file: clear_file_deletion_record failed: {exc}")
return True
except Exception as exc:
debug(f"{self._log_prefix()} delete_file failed: {exc}")
return False
def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
"""Get metadata for a file from Hydrus by hash.
Args:
file_hash: SHA256 hash of the file (64-char hex string)
Returns:
Dict with metadata fields or None if not found
"""
try:
client = self._client
if not client:
debug(f"{self._log_prefix()} get_metadata: client unavailable")
return None
# Fetch file metadata with the fields we need for CLI display.
payload = client.fetch_file_metadata(
hashes=[file_hash],
include_service_keys_to_tags=True,
include_file_url=True,
include_duration=True,
include_size=True,
include_mime=True,
)
if not payload or not payload.get("metadata"):
return None
meta = payload["metadata"][0]
# Hydrus can return placeholder metadata rows for unknown hashes.
if not isinstance(meta, dict) or meta.get("file_id") is None:
return None
# Extract title from tags
title = f"Hydrus_{file_hash[:12]}"
tags_payload = meta.get("tags", {})
if isinstance(tags_payload, dict):
for service_data in tags_payload.values():
if isinstance(service_data, dict):
display_tags = service_data.get("display_tags", {})
if isinstance(display_tags, dict):
current_tags = display_tags.get("0", [])
if isinstance(current_tags, list):
for tag in current_tags:
if str(tag).lower().startswith("title:"):
title = tag.split(":", 1)[1].strip()
break
if title != f"Hydrus_{file_hash[:12]}":
break
# Hydrus may return mime as an int enum, or sometimes a human label.
mime_val = meta.get("mime")
filetype_human = meta.get("filetype_human") or meta.get("mime_human") or meta.get("mime_string")
# Determine ext: prefer Hydrus metadata ext, then filetype_human (when it looks like an ext),
# then title suffix, then file path suffix.
ext = str(meta.get("ext") or "").strip().lstrip(".")
if not ext:
ft = str(filetype_human or "").strip().lstrip(".").lower()
if ft and ft != "unknown filetype" and ft.isalnum() and len(ft) <= 8:
# Treat simple labels like "mp4", "m4a", "webm" as extensions.
ext = ft
if not ext and isinstance(title, str) and "." in title:
try:
ext = Path(title).suffix.lstrip(".")
except Exception:
ext = ""
if not ext:
try:
path_payload = client.get_file_path(file_hash)
if isinstance(path_payload, dict):
p = path_payload.get("path")
if isinstance(p, str) and p.strip():
ext = Path(p.strip()).suffix.lstrip(".")
except Exception:
ext = ""
# If extension is still unknown, attempt a best-effort lookup from MIME.
def _mime_from_ext(ext_value: str) -> str:
ext_clean = str(ext_value or "").strip().lstrip(".").lower()
if not ext_clean:
return ""
try:
for category in mime_maps.values():
info = category.get(ext_clean)
if isinstance(info, dict):
mimes = info.get("mimes")
if isinstance(mimes, list) and mimes:
first = mimes[0]
return str(first)
except Exception:
return ""
return ""
# Normalize to a MIME string for CLI output.
# Avoid passing through human labels like "unknown filetype".
mime_type = ""
if isinstance(mime_val, str):
candidate = mime_val.strip()
if "/" in candidate and candidate.lower() != "unknown filetype":
mime_type = candidate
if not mime_type and isinstance(filetype_human, str):
candidate = filetype_human.strip()
if "/" in candidate and candidate.lower() != "unknown filetype":
mime_type = candidate
if not mime_type:
mime_type = _mime_from_ext(ext)
# Normalize size/duration to stable scalar types.
size_val = meta.get("size")
if size_val is None:
size_val = meta.get("size_bytes")
try:
size_int: int | None = int(size_val) if size_val is not None else None
except Exception:
size_int = None
dur_val = meta.get("duration")
if dur_val is None:
dur_val = meta.get("duration_ms")
try:
dur_int: int | None = int(dur_val) if dur_val is not None else None
except Exception:
dur_int = None
raw_urls = (
meta.get("known_urls")
or meta.get("urls")
or meta.get("url")
or []
)
url_list: list[str] = []
if isinstance(raw_urls, str):
s = raw_urls.strip()
url_list = [s] if s else []
elif isinstance(raw_urls, list):
url_list = [str(u).strip() for u in raw_urls if isinstance(u, str) and str(u).strip()]
return {
"hash": file_hash,
"title": title,
"ext": ext,
"size": size_int,
"mime": mime_type,
# Keep raw fields available for troubleshooting/other callers.
"hydrus_mime": mime_val,
"filetype_human": filetype_human,
"duration_ms": dur_int,
"url": url_list,
}
except Exception as exc:
debug(f"{self._log_prefix()} get_metadata failed: {exc}")
return None
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
"""Get tags for a file from Hydrus by hash.
Args:
file_identifier: File hash (SHA256 hex string)
**kwargs: Optional service_name parameter
Returns:
Tuple of (tags_list, source_description)
where source is always "hydrus"
"""
try:
from API import HydrusNetwork as hydrus_wrapper
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
debug(f"{self._log_prefix()} get_tags: invalid file hash '{file_identifier}'")
return [], "unknown"
# Get Hydrus client and service info
client = self._client
if not client:
debug(f"{self._log_prefix()} get_tags: client unavailable")
return [], "unknown"
# Fetch file metadata
payload = client.fetch_file_metadata(
hashes=[file_hash],
include_service_keys_to_tags=True,
include_file_url=False
)
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
debug(f"{self._log_prefix()} get_tags: no metadata for hash {file_hash}")
return [], "unknown"
meta = items[0] if isinstance(items[0], dict) else None
if not isinstance(meta, dict) or meta.get("file_id") is None:
debug(f"{self._log_prefix()} get_tags: invalid metadata for hash {file_hash}")
return [], "unknown"
# Extract tags using service name
service_name = "my tags"
service_key = hydrus_wrapper.get_tag_service_key(client, service_name)
# Extract tags from metadata
tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name)
return [str(t).strip().lower() for t in tags if isinstance(t, str) and t.strip()], "hydrus"
except Exception as exc:
debug(f"{self._log_prefix()} get_tags failed: {exc}")
return [], "unknown"
def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
"""Add tags to a Hydrus file.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} add_tag: client unavailable")
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
debug(f"{self._log_prefix()} add_tag: invalid file hash '{file_identifier}'")
return False
service_name = kwargs.get("service_name") or "my tags"
incoming_tags = [str(t).strip().lower() for t in (tags or []) if isinstance(t, str) and str(t).strip()]
if not incoming_tags:
return True
try:
existing_tags, _src = self.get_tag(file_hash)
except Exception:
existing_tags = []
from metadata import compute_namespaced_tag_overwrite
tags_to_remove, tags_to_add, _merged = compute_namespaced_tag_overwrite(existing_tags, incoming_tags)
if not tags_to_add and not tags_to_remove:
return True
did_any = False
if tags_to_remove:
try:
client.delete_tag(file_hash, tags_to_remove, service_name)
did_any = True
except Exception as exc:
debug(f"{self._log_prefix()} add_tag: delete_tag failed: {exc}")
if tags_to_add:
try:
client.add_tag(file_hash, tags_to_add, service_name)
did_any = True
except Exception as exc:
debug(f"{self._log_prefix()} add_tag: add_tag failed: {exc}")
return did_any
except Exception as exc:
debug(f"{self._log_prefix()} add_tag failed: {exc}")
return False
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
"""Delete tags from a Hydrus file.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} delete_tag: client unavailable")
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
debug(f"{self._log_prefix()} delete_tag: invalid file hash '{file_identifier}'")
return False
service_name = kwargs.get("service_name") or "my tags"
raw_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
tag_list = [str(t).strip().lower() for t in raw_list if isinstance(t, str) and str(t).strip()]
if not tag_list:
return False
client.delete_tag(file_hash, tag_list, service_name)
return True
except Exception as exc:
debug(f"{self._log_prefix()} delete_tag failed: {exc}")
return False
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
"""Get known url for a Hydrus file.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} get_url: client unavailable")
return []
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
return []
payload = client.fetch_file_metadata(hashes=[file_hash], include_file_url=False)
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
return []
meta = items[0] if isinstance(items[0], dict) else {}
raw_urls: Any = (
meta.get("known_urls")
or meta.get("urls")
or meta.get("url")
or []
)
if isinstance(raw_urls, str):
val = raw_urls.strip()
return [val] if val else []
if isinstance(raw_urls, list):
out: list[str] = []
for u in raw_urls:
if not isinstance(u, str):
continue
u = u.strip()
if u:
out.append(u)
return out
return []
except Exception as exc:
debug(f"{self._log_prefix()} get_url failed: {exc}")
return []
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Associate one or more url with a Hydrus file.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} add_url: client unavailable")
return False
for u in url:
client.associate_url(file_identifier, u)
return True
except Exception as exc:
debug(f"{self._log_prefix()} add_url failed: {exc}")
return False
def add_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool:
"""Bulk associate urls with Hydrus files.
This is a best-effort convenience wrapper used by cmdlets to batch url associations.
Hydrus' client API is still called per (hash,url) pair, but this consolidates the
cmdlet-level control flow so url association can be deferred until the end.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} add_url_bulk: client unavailable")
return False
any_success = False
for file_identifier, urls in (items or []):
h = str(file_identifier or "").strip().lower()
if len(h) != 64:
continue
for u in (urls or []):
s = str(u or "").strip()
if not s:
continue
try:
client.associate_url(h, s)
any_success = True
except Exception:
continue
return any_success
except Exception as exc:
debug(f"{self._log_prefix()} add_url_bulk failed: {exc}")
return False
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
"""Delete one or more url from a Hydrus file.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} delete_url: client unavailable")
return False
for u in url:
client.delete_url(file_identifier, u)
return True
except Exception as exc:
debug(f"{self._log_prefix()} delete_url failed: {exc}")
return False
def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
"""Get notes for a Hydrus file (default note service only)."""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} get_note: client unavailable")
return {}
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
return {}
payload = client.fetch_file_metadata(hashes=[file_hash], include_notes=True)
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
return {}
meta = items[0] if isinstance(items[0], dict) else None
if not isinstance(meta, dict):
return {}
notes_payload = meta.get("notes")
if isinstance(notes_payload, dict):
return {str(k): str(v or "") for k, v in notes_payload.items() if str(k).strip()}
return {}
except Exception as exc:
debug(f"{self._log_prefix()} get_note failed: {exc}")
return {}
def set_note(self, file_identifier: str, name: str, text: str, **kwargs: Any) -> bool:
"""Set a named note for a Hydrus file (default note service only)."""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} set_note: client unavailable")
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
return False
note_name = str(name or "").strip()
if not note_name:
return False
note_text = str(text or "")
client.set_notes(file_hash, {note_name: note_text})
return True
except Exception as exc:
debug(f"{self._log_prefix()} set_note failed: {exc}")
return False
def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
"""Delete a named note for a Hydrus file (default note service only)."""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} delete_note: client unavailable")
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
return False
note_name = str(name or "").strip()
if not note_name:
return False
client.delete_notes(file_hash, [note_name])
return True
except Exception as exc:
debug(f"{self._log_prefix()} delete_note failed: {exc}")
return False
@staticmethod
def _extract_tags_from_hydrus_meta(
meta: Dict[str, Any],
service_key: Optional[str],
service_name: str
) -> List[str]:
"""Extract current tags from Hydrus metadata dict.
Prefers display_tags (includes siblings/parents, excludes deleted).
Falls back to storage_tags status '0' (current).
"""
tags_payload = meta.get("tags")
if not isinstance(tags_payload, dict):
return []
svc_data = None
if service_key:
svc_data = tags_payload.get(service_key)
if not isinstance(svc_data, dict):
return []
# Prefer display_tags (Hydrus computes siblings/parents)
display = svc_data.get("display_tags")
if isinstance(display, list) and display:
return [str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip()]
# Fallback to storage_tags status '0' (current)
storage = svc_data.get("storage_tags")
if isinstance(storage, dict):
current_list = storage.get("0") or storage.get(0)
if isinstance(current_list, list):
return [str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()]
return []