Files
Medios-Macina/Store/HydrusNetwork.py

1669 lines
69 KiB
Python
Raw Normal View History

2025-12-11 19:04:02 -08:00
from __future__ import annotations
import re
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple
2025-12-13 12:09:50 -08:00
import httpx
2025-12-11 19:04:02 -08:00
from SYS.logger import debug, log
from SYS.utils_constant import mime_maps
2025-12-11 23:21:45 -08:00
from Store._base import Store
2025-12-11 19:04:02 -08:00
_HYDRUS_INIT_CHECK_CACHE: dict[tuple[str,
str],
tuple[bool,
Optional[str]]] = {}
2025-12-13 12:09:50 -08:00
2025-12-11 23:21:45 -08:00
class HydrusNetwork(Store):
2025-12-11 19:04:02 -08:00
"""File storage backend for Hydrus client.
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Each instance represents a specific Hydrus client connection.
2025-12-13 12:09:50 -08:00
Maintains its own HydrusClient.
2025-12-11 19:04:02 -08:00
"""
2025-12-13 00:18:30 -08:00
2025-12-16 23:23:43 -08:00
def _log_prefix(self) -> str:
store_name = getattr(self, "NAME", None) or "unknown"
return f"[hydrusnetwork:{store_name}]"
2025-12-13 00:18:30 -08:00
def __new__(cls, *args: Any, **kwargs: Any) -> "HydrusNetwork":
instance = super().__new__(cls)
name = kwargs.get("NAME")
api = kwargs.get("API")
url = kwargs.get("URL")
if name is not None:
setattr(instance, "NAME", str(name))
if api is not None:
setattr(instance, "API", str(api))
if url is not None:
setattr(instance, "URL", str(url))
return instance
setattr(__new__, "keys", ("NAME", "API", "URL"))
2025-12-29 17:05:03 -08:00
2025-12-13 00:18:30 -08:00
def __init__(
self,
instance_name: Optional[str] = None,
api_key: Optional[str] = None,
url: Optional[str] = None,
*,
NAME: Optional[str] = None,
API: Optional[str] = None,
URL: Optional[str] = None,
) -> None:
2025-12-11 19:04:02 -08:00
"""Initialize Hydrus storage backend.
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Args:
instance_name: Name of this Hydrus instance (e.g., 'home', 'work')
api_key: Hydrus Client API access key
url: Hydrus client URL (e.g., 'http://192.168.1.230:45869')
"""
2025-12-11 23:21:45 -08:00
from API.HydrusNetwork import HydrusNetwork as HydrusClient
2025-12-13 00:18:30 -08:00
if instance_name is None and NAME is not None:
instance_name = str(NAME)
if api_key is None and API is not None:
api_key = str(API)
if url is None and URL is not None:
url = str(URL)
if not instance_name or not api_key or not url:
raise ValueError("HydrusNetwork requires NAME, API, and URL")
2025-12-29 17:05:03 -08:00
2025-12-13 00:18:30 -08:00
self.NAME = instance_name
self.API = api_key
2025-12-13 12:09:50 -08:00
self.URL = url.rstrip("/")
# Total count (best-effort, used for startup diagnostics)
self.total_count: Optional[int] = None
# Self health-check: validate the URL is reachable and the access key is accepted.
# This MUST NOT attempt to acquire a session key.
cache_key = (self.URL, self.API)
cached = _HYDRUS_INIT_CHECK_CACHE.get(cache_key)
if cached is not None:
ok, err = cached
if not ok:
raise RuntimeError(
f"Hydrus '{self.NAME}' unavailable: {err or 'Unavailable'}"
)
2025-12-13 12:09:50 -08:00
else:
api_version_url = f"{self.URL}/api_version"
verify_key_url = f"{self.URL}/verify_access_key"
try:
with httpx.Client(timeout=5.0,
verify=False,
follow_redirects=True) as client:
2025-12-13 12:09:50 -08:00
version_resp = client.get(api_version_url)
version_resp.raise_for_status()
version_payload = version_resp.json()
if not isinstance(version_payload, dict):
raise RuntimeError(
"Hydrus /api_version returned an unexpected response"
)
2025-12-13 12:09:50 -08:00
verify_resp = client.get(
verify_key_url,
headers={
"Hydrus-Client-API-Access-Key": self.API
},
2025-12-13 12:09:50 -08:00
)
verify_resp.raise_for_status()
verify_payload = verify_resp.json()
if not isinstance(verify_payload, dict):
2025-12-29 17:05:03 -08:00
raise RuntimeError(
"Hydrus /verify_access_key returned an unexpected response"
)
2025-12-13 12:09:50 -08:00
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (True, None)
except Exception as exc:
err = str(exc)
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (False, err)
raise RuntimeError(f"Hydrus '{self.NAME}' unavailable: {err}") from exc
2025-12-11 19:04:02 -08:00
2025-12-13 12:09:50 -08:00
# Create a persistent client for this instance (auth via access key by default).
self._client = HydrusClient(
url=self.URL,
access_key=self.API,
instance_name=self.NAME
)
2025-12-13 12:09:50 -08:00
2025-12-17 03:16:41 -08:00
# Best-effort total count (used for startup diagnostics). Avoid heavy payloads.
# Some Hydrus setups appear to return no count via the CBOR client for this endpoint,
# so prefer a direct JSON request with a short timeout.
try:
self.get_total_count(refresh=True)
except Exception:
pass
def get_total_count(self, *, refresh: bool = False) -> Optional[int]:
"""Best-effort total file count for this Hydrus instance.
Intended for diagnostics (e.g., REPL startup checks). This should be fast,
and it MUST NOT raise.
"""
if self.total_count is not None and not refresh:
return self.total_count
# 1) Prefer a direct JSON request (fast + avoids CBOR edge cases).
try:
import json as _json
url = f"{self.URL}/get_files/search_files"
params = {
"tags": _json.dumps(["system:everything"]),
"return_hashes": "false",
"return_file_ids": "false",
"return_file_count": "true",
}
headers = {
"Hydrus-Client-API-Access-Key": self.API,
"Accept": "application/json",
}
with httpx.Client(timeout=5.0,
verify=False,
follow_redirects=True) as client:
2025-12-17 03:16:41 -08:00
resp = client.get(url, params=params, headers=headers)
resp.raise_for_status()
payload = resp.json()
count_val = None
if isinstance(payload, dict):
count_val = payload.get("file_count")
if count_val is None:
count_val = payload.get("file_count_inclusive")
if count_val is None:
count_val = payload.get("num_files")
if isinstance(count_val, int):
self.total_count = count_val
return self.total_count
except Exception as exc:
debug(
f"{self._log_prefix()} total count (json) unavailable: {exc}",
file=sys.stderr
)
2025-12-17 03:16:41 -08:00
# 2) Fallback to the API client (CBOR).
2025-12-13 00:18:30 -08:00
try:
2025-12-13 12:09:50 -08:00
payload = self._client.search_files(
tags=["system:everything"],
return_hashes=False,
return_file_ids=False,
return_file_count=True,
)
count_val = None
if isinstance(payload, dict):
count_val = payload.get("file_count")
if count_val is None:
count_val = payload.get("file_count_inclusive")
if count_val is None:
count_val = payload.get("num_files")
if isinstance(count_val, int):
self.total_count = count_val
2025-12-17 03:16:41 -08:00
return self.total_count
2025-12-13 00:18:30 -08:00
except Exception as exc:
debug(
f"{self._log_prefix()} total count (client) unavailable: {exc}",
file=sys.stderr
)
2025-12-17 03:16:41 -08:00
return self.total_count
2025-12-13 00:18:30 -08:00
2025-12-11 19:04:02 -08:00
def name(self) -> str:
2025-12-13 00:18:30 -08:00
return self.NAME
2025-12-11 19:04:02 -08:00
def get_name(self) -> str:
2025-12-13 00:18:30 -08:00
return self.NAME
2025-12-11 19:04:02 -08:00
def add_file(self, file_path: Path, **kwargs: Any) -> str:
"""Upload file to Hydrus with full metadata support.
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Args:
file_path: Path to the file to upload
2025-12-11 23:21:45 -08:00
tag: Optional list of tag values to add
2025-12-11 19:04:02 -08:00
url: Optional list of url to associate with the file
title: Optional title (will be added as 'title:value' tag)
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Returns:
File hash from Hydrus
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Raises:
Exception: If upload fails
"""
from SYS.utils import sha256_file
2025-12-11 23:21:45 -08:00
tag_list = kwargs.get("tag", [])
2025-12-11 19:04:02 -08:00
url = kwargs.get("url", [])
title = kwargs.get("title")
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
# Add title to tags if provided and not already present
if title:
2025-12-20 23:57:44 -08:00
title_tag = f"title:{title}".strip().lower()
if not any(str(candidate).lower().startswith("title:")
for candidate in tag_list):
2025-12-11 23:21:45 -08:00
tag_list = [title_tag] + list(tag_list)
2025-12-11 19:04:02 -08:00
2025-12-20 23:57:44 -08:00
# Hydrus is lowercase-only tags; normalize here for consistency.
2025-12-29 17:05:03 -08:00
tag_list = [
str(t).strip().lower() for t in (tag_list or [])
2025-12-29 17:05:03 -08:00
if isinstance(t, str) and str(t).strip()
]
2025-12-20 23:57:44 -08:00
2025-12-11 19:04:02 -08:00
try:
# Compute file hash
file_hash = sha256_file(file_path)
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} file hash: {file_hash}")
2025-12-11 19:04:02 -08:00
# Use persistent client with session key
client = self._client
if client is None:
raise Exception("Hydrus client unavailable")
# Check if file already exists in Hydrus
file_exists = False
try:
2025-12-16 23:23:43 -08:00
metadata = client.fetch_file_metadata(
hashes=[file_hash],
include_service_keys_to_tags=False,
include_file_url=False,
include_duration=False,
include_size=False,
include_mime=False,
)
2025-12-11 19:04:02 -08:00
if metadata and isinstance(metadata, dict):
2025-12-16 23:23:43 -08:00
metas = metadata.get("metadata", [])
if isinstance(metas, list) and metas:
# Hydrus returns placeholder rows for unknown hashes.
# Only treat as a real duplicate if it has a concrete file_id.
for meta in metas:
if isinstance(meta,
dict) and meta.get("file_id") is not None:
2025-12-16 23:23:43 -08:00
file_exists = True
break
if file_exists:
log(
f" Duplicate detected - file already in Hydrus with hash: {file_hash}",
file=sys.stderr,
)
2025-12-11 19:04:02 -08:00
except Exception:
pass
2025-12-16 23:23:43 -08:00
# If Hydrus reports an existing file, it may be in trash. Best-effort restore it to 'my files'.
# This keeps behavior aligned with user expectation: "use API only" and ensure it lands in my files.
if file_exists:
try:
client.undelete_files([file_hash])
except Exception:
pass
2025-12-11 19:04:02 -08:00
# Upload file if not already present
if not file_exists:
log(
f"{self._log_prefix()} Uploading: {file_path.name}",
file=sys.stderr
)
2025-12-11 19:04:02 -08:00
response = client.add_file(file_path)
# Extract hash from response
hydrus_hash: Optional[str] = None
if isinstance(response, dict):
hydrus_hash = response.get("hash") or response.get("file_hash")
if not hydrus_hash:
hashes = response.get("hashes")
if isinstance(hashes, list) and hashes:
hydrus_hash = hashes[0]
if not hydrus_hash:
raise Exception(f"Hydrus response missing file hash: {response}")
file_hash = hydrus_hash
2025-12-16 23:23:43 -08:00
log(f"{self._log_prefix()} hash: {file_hash}", file=sys.stderr)
2025-12-11 19:04:02 -08:00
# Add tags if provided (both for new and existing files)
2025-12-11 23:21:45 -08:00
if tag_list:
2025-12-11 19:04:02 -08:00
try:
# Use default tag service
service_name = "my tags"
except Exception:
service_name = "my tags"
try:
debug(
f"{self._log_prefix()} Adding {len(tag_list)} tag(s): {tag_list}"
)
2025-12-11 23:21:45 -08:00
client.add_tag(file_hash, tag_list, service_name)
log(
f"{self._log_prefix()} Tags added via '{service_name}'",
file=sys.stderr
)
2025-12-11 19:04:02 -08:00
except Exception as exc:
log(
f"{self._log_prefix()} ⚠️ Failed to add tags: {exc}",
file=sys.stderr
)
2025-12-11 19:04:02 -08:00
# Associate url if provided (both for new and existing files)
if url:
2025-12-29 17:05:03 -08:00
log(
f"{self._log_prefix()} Associating {len(url)} URL(s) with file",
file=sys.stderr
2025-12-29 17:05:03 -08:00
)
2025-12-11 19:04:02 -08:00
for url in url:
if url:
try:
client.associate_url(file_hash, str(url))
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} Associated URL: {url}")
2025-12-11 19:04:02 -08:00
except Exception as exc:
2025-12-29 17:05:03 -08:00
log(
f"{self._log_prefix()} ⚠️ Failed to associate URL {url}: {exc}",
file=sys.stderr,
)
2025-12-11 19:04:02 -08:00
return file_hash
except Exception as exc:
2025-12-16 23:23:43 -08:00
log(f"{self._log_prefix()} ❌ upload failed: {exc}", file=sys.stderr)
2025-12-11 19:04:02 -08:00
raise
2025-12-11 23:21:45 -08:00
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
2025-12-11 19:04:02 -08:00
"""Search Hydrus database for files matching query.
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Args:
query: Search query (tags, filenames, hashes, etc.)
limit: Maximum number of results to return (default: 100)
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Returns:
List of dicts with 'name', 'hash', 'size', 'tags' fields
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Example:
results = storage["hydrus"].search("artist:john_doe music")
results = storage["hydrus"].search("Simple Man")
"""
limit = kwargs.get("limit", 100)
try:
client = self._client
if client is None:
raise Exception("Hydrus client unavailable")
2025-12-16 23:23:43 -08:00
prefix = self._log_prefix()
debug(f"{prefix} Searching for: {query}")
2025-12-11 19:04:02 -08:00
2025-12-14 00:53:52 -08:00
def _extract_urls(meta_obj: Any) -> list[str]:
if not isinstance(meta_obj, dict):
return []
raw = meta_obj.get("url")
if raw is None:
raw = meta_obj.get("urls")
if isinstance(raw, str):
val = raw.strip()
return [val] if val else []
if isinstance(raw, list):
out: list[str] = []
for item in raw:
if not isinstance(item, str):
continue
s = item.strip()
if s:
out.append(s)
return out
return []
2025-12-29 17:05:03 -08:00
def _iter_url_filtered_metadata(
url_value: str | None,
want_any: bool,
fetch_limit: int
) -> list[dict[str,
Any]]:
2025-12-14 00:53:52 -08:00
"""Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
# First try a fast system predicate if Hydrus supports it.
candidate_file_ids: list[int] = []
try:
if want_any:
predicate = "system:has url"
url_search = client.search_files(
tags=[predicate],
return_hashes=False,
return_file_ids=True,
return_file_count=False,
)
ids = url_search.get("file_ids",
[]) if isinstance(url_search,
dict) else []
2025-12-14 00:53:52 -08:00
if isinstance(ids, list):
2025-12-29 17:05:03 -08:00
candidate_file_ids = [
int(x) for x in ids
if isinstance(x, (int, float,
str)) and str(x).strip().isdigit()
2025-12-29 17:05:03 -08:00
]
2025-12-14 00:53:52 -08:00
except Exception:
candidate_file_ids = []
if not candidate_file_ids:
# Fallback: scan from system:everything and filter by URL substring.
everything = client.search_files(
tags=["system:everything"],
return_hashes=False,
return_file_ids=True,
return_file_count=False,
)
ids = everything.get("file_ids",
[]) if isinstance(everything,
dict) else []
2025-12-14 00:53:52 -08:00
if isinstance(ids, list):
candidate_file_ids = [
int(x) for x in ids if isinstance(x, (int, float))
]
2025-12-14 00:53:52 -08:00
if not candidate_file_ids:
return []
needle = (url_value or "").strip().lower()
chunk_size = 200
out: list[dict[str, Any]] = []
for start in range(0, len(candidate_file_ids), chunk_size):
if len(out) >= fetch_limit:
break
chunk = candidate_file_ids[start:start + chunk_size]
2025-12-14 00:53:52 -08:00
try:
payload = client.fetch_file_metadata(
file_ids=chunk,
include_file_url=True,
include_service_keys_to_tags=True,
include_duration=True,
include_size=True,
include_mime=True,
)
except Exception:
continue
metas = payload.get("metadata",
[]) if isinstance(payload,
dict) else []
2025-12-14 00:53:52 -08:00
if not isinstance(metas, list):
continue
for meta in metas:
if not isinstance(meta, dict):
continue
urls = _extract_urls(meta)
if not urls:
continue
if want_any:
out.append(meta)
if len(out) >= fetch_limit:
break
continue
if not needle:
continue
if any(needle in u.lower() for u in urls):
out.append(meta)
if len(out) >= fetch_limit:
break
return out
query_lower = query.lower().strip()
2025-12-20 23:57:44 -08:00
# Support `ext:<value>` anywhere in the query. We filter results by the
# Hydrus metadata extension field.
def _normalize_ext_filter(value: str) -> str:
2025-12-29 17:05:03 -08:00
v = str(value or "").strip().lower().lstrip(".")
2025-12-20 23:57:44 -08:00
v = "".join(ch for ch in v if ch.isalnum())
return v
ext_filter: str | None = None
ext_only: bool = False
try:
m = re.search(r"\bext:([^\s,]+)", query_lower)
if not m:
m = re.search(r"\bextension:([^\s,]+)", query_lower)
if m:
ext_filter = _normalize_ext_filter(m.group(1)) or None
query_lower = re.sub(
r"\s*\b(?:ext|extension):[^\s,]+",
" ",
query_lower
)
2025-12-29 17:05:03 -08:00
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(",")
2025-12-20 23:57:44 -08:00
query = query_lower
if ext_filter and not query_lower:
query = "*"
query_lower = "*"
ext_only = True
except Exception:
ext_filter = None
ext_only = False
# Split into meaningful terms for AND logic.
# Avoid punctuation tokens like '-' that would make matching brittle.
search_terms = [t for t in re.findall(r"[a-z0-9]+", query_lower) if t]
2025-12-14 00:53:52 -08:00
# Special case: url:* and url:<value>
metadata_list: list[dict[str, Any]] | None = None
if ":" in query_lower and not query_lower.startswith(":"):
namespace, pattern = query_lower.split(":", 1)
namespace = namespace.strip().lower()
pattern = pattern.strip()
if namespace == "url":
if not pattern or pattern == "*":
2025-12-29 17:05:03 -08:00
metadata_list = _iter_url_filtered_metadata(
None,
want_any=True,
fetch_limit=int(limit) if limit else 100
2025-12-29 17:05:03 -08:00
)
2025-12-14 00:53:52 -08:00
else:
2025-12-16 01:45:01 -08:00
# Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
2025-12-14 00:53:52 -08:00
try:
if pattern.startswith("http://") or pattern.startswith(
"https://"):
2025-12-14 00:53:52 -08:00
from API.HydrusNetwork import HydrusRequestSpec
2025-12-29 17:05:03 -08:00
spec = HydrusRequestSpec(
method="GET",
endpoint="/add_urls/get_url_files",
query={
"url": pattern
},
2025-12-29 17:05:03 -08:00
)
response = client._perform_request(
spec
) # type: ignore[attr-defined]
2025-12-14 00:53:52 -08:00
hashes: list[str] = []
file_ids: list[int] = []
if isinstance(response, dict):
2025-12-29 17:05:03 -08:00
raw_hashes = response.get("hashes") or response.get(
"file_hashes"
)
2025-12-14 00:53:52 -08:00
if isinstance(raw_hashes, list):
2025-12-29 17:05:03 -08:00
hashes = [
str(h).strip() for h in raw_hashes
2025-12-29 17:05:03 -08:00
if isinstance(h, str) and str(h).strip()
]
2025-12-14 00:53:52 -08:00
raw_ids = response.get("file_ids")
if isinstance(raw_ids, list):
for item in raw_ids:
try:
file_ids.append(int(item))
except (TypeError, ValueError):
continue
if file_ids:
payload = client.fetch_file_metadata(
file_ids=file_ids,
include_file_url=True,
include_service_keys_to_tags=True,
include_duration=True,
include_size=True,
include_mime=True,
)
2025-12-29 17:05:03 -08:00
metas = (
payload.get("metadata",
[]) if isinstance(payload,
dict) else []
2025-12-29 17:05:03 -08:00
)
2025-12-14 00:53:52 -08:00
if isinstance(metas, list):
metadata_list = [
m for m in metas if isinstance(m, dict)
]
2025-12-14 00:53:52 -08:00
elif hashes:
payload = client.fetch_file_metadata(
hashes=hashes,
include_file_url=True,
include_service_keys_to_tags=True,
include_duration=True,
include_size=True,
include_mime=True,
)
2025-12-29 17:05:03 -08:00
metas = (
payload.get("metadata",
[]) if isinstance(payload,
dict) else []
2025-12-29 17:05:03 -08:00
)
2025-12-14 00:53:52 -08:00
if isinstance(metas, list):
metadata_list = [
m for m in metas if isinstance(m, dict)
]
2025-12-14 00:53:52 -08:00
except Exception:
metadata_list = None
# Fallback: substring scan
if metadata_list is None:
2025-12-29 17:05:03 -08:00
metadata_list = _iter_url_filtered_metadata(
pattern,
want_any=False,
fetch_limit=int(limit) if limit else 100
2025-12-29 17:05:03 -08:00
)
2025-12-14 00:53:52 -08:00
2025-12-11 19:04:02 -08:00
# Parse the query into tags
# "*" means "match all" - use system:everything tag in Hydrus
2025-12-20 23:57:44 -08:00
# If query has explicit namespace, use it as a tag search.
# If query is free-form, search BOTH:
# - title:*term* (title: is the only namespace searched implicitly)
# - *term* (freeform tags; we will filter out other namespace matches client-side)
tags: list[str] = []
freeform_union_search: bool = False
title_predicates: list[str] = []
freeform_predicates: list[str] = []
2025-12-11 19:04:02 -08:00
if query.strip() == "*":
tags = ["system:everything"]
2025-12-29 17:05:03 -08:00
elif ":" in query_lower:
2025-12-20 23:57:44 -08:00
tags = [query_lower]
2025-12-11 19:04:02 -08:00
else:
2025-12-20 23:57:44 -08:00
freeform_union_search = True
if search_terms:
# Hydrus supports wildcard matching primarily as a prefix (e.g., tag*).
# Use per-term prefix matching for both title: and freeform tags.
title_predicates = [f"title:{term}*" for term in search_terms]
freeform_predicates = [f"{term}*" for term in search_terms]
2025-12-11 19:04:02 -08:00
else:
2025-12-20 23:57:44 -08:00
# If we can't extract alnum terms, fall back to the raw query text.
title_predicates = [f"title:{query_lower}*"]
freeform_predicates = [f"{query_lower}*"]
2025-12-29 17:05:03 -08:00
2025-12-14 00:53:52 -08:00
# Search files with the tags (unless url: search already produced metadata)
2025-12-11 19:04:02 -08:00
results = []
2025-12-20 23:57:44 -08:00
def _extract_search_ids(payload: Any) -> tuple[list[int], list[str]]:
if not isinstance(payload, dict):
return [], []
raw_ids = payload.get("file_ids", [])
raw_hashes = payload.get("hashes", [])
ids_out: list[int] = []
hashes_out: list[str] = []
if isinstance(raw_ids, list):
for item in raw_ids:
try:
ids_out.append(int(item))
except (TypeError, ValueError):
continue
if isinstance(raw_hashes, list):
2025-12-29 17:05:03 -08:00
hashes_out = [
str(h).strip() for h in raw_hashes
if isinstance(h, str) and str(h).strip()
2025-12-29 17:05:03 -08:00
]
2025-12-20 23:57:44 -08:00
return ids_out, hashes_out
2025-12-14 00:53:52 -08:00
if metadata_list is None:
2025-12-20 23:57:44 -08:00
file_ids: list[int] = []
hashes: list[str] = []
if freeform_union_search:
if not title_predicates and not freeform_predicates:
debug(f"{prefix} 0 result(s)")
return []
payloads: list[Any] = []
try:
payloads.append(
client.search_files(
tags=title_predicates,
return_hashes=True,
return_file_ids=True,
)
)
except Exception:
pass
try:
payloads.append(
client.search_files(
tags=freeform_predicates,
return_hashes=True,
return_file_ids=True,
)
)
except Exception:
pass
id_set: set[int] = set()
hash_set: set[str] = set()
for payload in payloads:
ids_part, hashes_part = _extract_search_ids(payload)
for fid in ids_part:
id_set.add(fid)
for hh in hashes_part:
hash_set.add(hh)
file_ids = list(id_set)
hashes = list(hash_set)
else:
if not tags:
debug(f"{prefix} 0 result(s)")
return []
search_result = client.search_files(
tags=tags,
return_hashes=True,
return_file_ids=True
2025-12-20 23:57:44 -08:00
)
file_ids, hashes = _extract_search_ids(search_result)
# Fast path: ext-only search. Avoid fetching metadata for an unbounded
# system:everything result set; fetch in chunks until we have enough.
if ext_only and ext_filter:
results: list[dict[str, Any]] = []
if not file_ids and not hashes:
debug(f"{prefix} 0 result(s)")
return []
# Prefer file_ids if available.
if file_ids:
chunk_size = 200
for start in range(0, len(file_ids), chunk_size):
if len(results) >= limit:
break
chunk = file_ids[start:start + chunk_size]
2025-12-20 23:57:44 -08:00
try:
payload = client.fetch_file_metadata(
file_ids=chunk,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=True,
include_size=True,
include_mime=True,
)
except Exception:
continue
metas = payload.get("metadata",
[]) if isinstance(payload,
dict) else []
2025-12-20 23:57:44 -08:00
if not isinstance(metas, list):
continue
for meta in metas:
if len(results) >= limit:
break
if not isinstance(meta, dict):
continue
mime_type = meta.get("mime")
2025-12-29 17:05:03 -08:00
ext = str(meta.get("ext") or "").strip().lstrip(".")
2025-12-20 23:57:44 -08:00
if not ext and mime_type:
for category in mime_maps.values():
for _ext_key, info in category.items():
if mime_type in info.get("mimes", []):
ext = str(info.get("ext",
"")
).strip().lstrip(".")
2025-12-20 23:57:44 -08:00
break
if ext:
break
if _normalize_ext_filter(ext) != ext_filter:
continue
2025-12-14 00:53:52 -08:00
2025-12-20 23:57:44 -08:00
file_id = meta.get("file_id")
hash_hex = meta.get("hash")
size = meta.get("size", 0)
tags_set = meta.get("tags",
{})
2025-12-20 23:57:44 -08:00
all_tags: list[str] = []
title = f"Hydrus File {file_id}"
if isinstance(tags_set, dict):
2025-12-29 17:05:03 -08:00
2025-12-20 23:57:44 -08:00
def _collect(tag_list: Any) -> None:
nonlocal title
if not isinstance(tag_list, list):
return
for tag in tag_list:
tag_text = str(tag) if tag else ""
if not tag_text:
continue
tag_l = tag_text.strip().lower()
if not tag_l:
continue
all_tags.append(tag_l)
if (tag_l.startswith("title:") and title
== f"Hydrus File {file_id}"):
2025-12-20 23:57:44 -08:00
title = tag_l.split(":", 1)[1].strip()
for _service_name, service_tags in tags_set.items():
if not isinstance(service_tags, dict):
continue
storage_tags = service_tags.get(
"storage_tags",
{}
)
2025-12-20 23:57:44 -08:00
if isinstance(storage_tags, dict):
for tag_list in storage_tags.values():
_collect(tag_list)
display_tags = service_tags.get(
"display_tags",
[]
)
2025-12-20 23:57:44 -08:00
_collect(display_tags)
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append(
{
"hash": hash_hex,
"url": file_url,
"name": title,
"title": title,
"size": size,
"size_bytes": size,
"store": self.NAME,
"tag": all_tags,
"file_id": file_id,
"mime": mime_type,
"ext": ext,
}
)
debug(f"{prefix} {len(results)} result(s)")
return results[:limit]
# If we only got hashes, fall back to the normal flow below.
2025-12-14 00:53:52 -08:00
if not file_ids and not hashes:
2025-12-16 23:23:43 -08:00
debug(f"{prefix} 0 result(s)")
2025-12-14 00:53:52 -08:00
return []
if file_ids:
2025-12-20 23:57:44 -08:00
metadata = client.fetch_file_metadata(
file_ids=file_ids,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=True,
include_size=True,
include_mime=True,
)
2025-12-14 00:53:52 -08:00
metadata_list = metadata.get("metadata", [])
elif hashes:
2025-12-20 23:57:44 -08:00
metadata = client.fetch_file_metadata(
hashes=hashes,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=True,
include_size=True,
include_mime=True,
)
2025-12-14 00:53:52 -08:00
metadata_list = metadata.get("metadata", [])
else:
metadata_list = []
2025-12-20 23:57:44 -08:00
# If our free-text searches produce nothing (or nothing survived downstream filtering), fallback to scanning.
if (not metadata_list) and (query_lower
!= "*") and (":" not in query_lower):
2025-12-20 23:57:44 -08:00
try:
search_result = client.search_files(
tags=["system:everything"],
return_hashes=True,
return_file_ids=True,
)
file_ids, hashes = _extract_search_ids(search_result)
if file_ids:
metadata = client.fetch_file_metadata(
file_ids=file_ids,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=True,
include_size=True,
include_mime=True,
)
metadata_list = metadata.get("metadata", [])
elif hashes:
metadata = client.fetch_file_metadata(
hashes=hashes,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=True,
include_size=True,
include_mime=True,
)
metadata_list = metadata.get("metadata", [])
except Exception:
pass
2025-12-14 00:53:52 -08:00
if not isinstance(metadata_list, list):
metadata_list = []
2025-12-16 01:45:01 -08:00
for meta in metadata_list:
2025-12-29 17:05:03 -08:00
if len(results) >= limit:
break
file_id = meta.get("file_id")
hash_hex = meta.get("hash")
size = meta.get("size", 0)
# Get tags for this file and extract title
tags_set = meta.get("tags",
{})
2025-12-29 17:05:03 -08:00
all_tags = []
title = f"Hydrus File {file_id}" # Default fallback
all_tags_str = "" # For substring matching
# debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}")
if isinstance(tags_set, dict):
# Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen
def _collect(tag_list: Any) -> None:
nonlocal title, all_tags_str
if not isinstance(tag_list, list):
return
for tag in tag_list:
tag_text = str(tag) if tag else ""
if not tag_text:
continue
tag_l = tag_text.strip().lower()
if not tag_l:
2025-12-11 19:04:02 -08:00
continue
2025-12-29 17:05:03 -08:00
all_tags.append(tag_l)
all_tags_str += " " + tag_l
if tag_l.startswith("title:"
) and title == f"Hydrus File {file_id}":
2025-12-29 17:05:03 -08:00
title = tag_l.split(":", 1)[1].strip()
2025-12-11 19:04:02 -08:00
2025-12-29 17:05:03 -08:00
for _service_name, service_tags in tags_set.items():
if not isinstance(service_tags, dict):
continue
2025-12-11 19:04:02 -08:00
storage_tags = service_tags.get("storage_tags",
{})
2025-12-29 17:05:03 -08:00
if isinstance(storage_tags, dict):
for tag_list in storage_tags.values():
_collect(tag_list)
display_tags = service_tags.get("display_tags", [])
_collect(display_tags)
# Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it)
top_level_tags = meta.get("tags_flat", []) or meta.get("tags", [])
_collect(top_level_tags)
# Prefer Hydrus-provided extension (e.g. ".webm"); fall back to MIME map.
mime_type = meta.get("mime")
ext = str(meta.get("ext") or "").strip().lstrip(".")
if not ext and mime_type:
for category in mime_maps.values():
for _ext_key, info in category.items():
if mime_type in info.get("mimes", []):
ext = str(info.get("ext", "")).strip().lstrip(".")
break
if ext:
break
# Filter results based on query type
# If user provided explicit namespace (has ':'), don't do substring filtering
# Just include what the tag search returned
has_namespace = ":" in query_lower
if has_namespace:
# Explicit namespace search - already filtered by Hydrus tag search
# Include this result as-is
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append(
{
2025-12-11 19:04:02 -08:00
"hash": hash_hex,
"url": file_url,
"name": title,
"title": title,
"size": size,
"size_bytes": size,
2025-12-13 00:18:30 -08:00
"store": self.NAME,
2025-12-29 17:05:03 -08:00
"tag": all_tags,
2025-12-11 19:04:02 -08:00
"file_id": file_id,
"mime": mime_type,
"ext": ext,
2025-12-29 17:05:03 -08:00
}
)
else:
# Free-form search: check if search terms match title or FREEFORM tags.
# Do NOT implicitly match other namespace tags (except title:).
freeform_tags = [
t for t in all_tags
if isinstance(t, str) and t and (":" not in t)
2025-12-29 17:05:03 -08:00
]
searchable_text = (title + " " + " ".join(freeform_tags)).lower()
match = True
if query_lower != "*" and search_terms:
for term in search_terms:
if term not in searchable_text:
match = False
break
if match:
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
results.append(
{
2025-12-11 19:04:02 -08:00
"hash": hash_hex,
"url": file_url,
"name": title,
"title": title,
"size": size,
"size_bytes": size,
2025-12-13 00:18:30 -08:00
"store": self.NAME,
2025-12-11 23:21:45 -08:00
"tag": all_tags,
2025-12-11 19:04:02 -08:00
"file_id": file_id,
"mime": mime_type,
"ext": ext,
2025-12-29 17:05:03 -08:00
}
)
2025-12-16 23:23:43 -08:00
debug(f"{prefix} {len(results)} result(s)")
2025-12-20 23:57:44 -08:00
if ext_filter:
wanted = ext_filter
filtered: list[dict[str, Any]] = []
for item in results:
try:
if _normalize_ext_filter(str(item.get("ext") or "")) == wanted:
filtered.append(item)
except Exception:
continue
results = filtered
2025-12-11 19:04:02 -08:00
return results[:limit]
except Exception as exc:
log(f"❌ Hydrus search failed: {exc}", file=sys.stderr)
import traceback
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
traceback.print_exc(file=sys.stderr)
raise
def get_file(self, file_hash: str, **kwargs: Any) -> Path | str | None:
2025-12-16 01:45:01 -08:00
"""Return a browser URL for the file.
IMPORTANT: this method must be side-effect free (do not auto-open a browser).
Only explicit user actions (e.g. the get-file cmdlet) should open files.
"""
2025-12-29 17:05:03 -08:00
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} get_file: start hash={file_hash[:12]}...")
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
# Build browser URL with access key
2025-12-29 17:05:03 -08:00
base_url = str(self.URL).rstrip("/")
2025-12-13 00:18:30 -08:00
access_key = str(self.API)
2025-12-29 17:05:03 -08:00
browser_url = (
f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
)
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} get_file: url={browser_url}")
2025-12-11 19:04:02 -08:00
return browser_url
2025-12-27 14:50:59 -08:00
def delete_file(self, file_identifier: str, **kwargs: Any) -> bool:
"""Delete a file from Hydrus, then clear the deletion record.
This is used by the delete-file cmdlet when the item belongs to a HydrusNetwork store.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} delete_file: client unavailable")
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
for ch in file_hash):
debug(
f"{self._log_prefix()} delete_file: invalid file hash '{file_identifier}'"
)
2025-12-27 14:50:59 -08:00
return False
reason = kwargs.get("reason")
2025-12-29 17:05:03 -08:00
reason_text = (
str(reason).strip() if isinstance(reason,
str) and reason.strip() else None
2025-12-29 17:05:03 -08:00
)
2025-12-27 14:50:59 -08:00
# 1) Delete file
client.delete_files([file_hash], reason=reason_text)
# 2) Clear deletion record (best-effort)
try:
client.clear_file_deletion_record([file_hash])
except Exception as exc:
debug(
f"{self._log_prefix()} delete_file: clear_file_deletion_record failed: {exc}"
)
2025-12-27 14:50:59 -08:00
return True
except Exception as exc:
debug(f"{self._log_prefix()} delete_file failed: {exc}")
return False
2025-12-11 19:04:02 -08:00
def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
"""Get metadata for a file from Hydrus by hash.
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Args:
file_hash: SHA256 hash of the file (64-char hex string)
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Returns:
Dict with metadata fields or None if not found
"""
try:
client = self._client
if not client:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} get_metadata: client unavailable")
2025-12-11 19:04:02 -08:00
return None
2025-12-29 17:05:03 -08:00
2025-12-16 23:23:43 -08:00
# Fetch file metadata with the fields we need for CLI display.
payload = client.fetch_file_metadata(
hashes=[file_hash],
include_service_keys_to_tags=True,
include_file_url=True,
include_duration=True,
include_size=True,
include_mime=True,
)
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
if not payload or not payload.get("metadata"):
return None
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
meta = payload["metadata"][0]
2025-12-16 23:23:43 -08:00
# Hydrus can return placeholder metadata rows for unknown hashes.
if not isinstance(meta, dict) or meta.get("file_id") is None:
return None
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
# Extract title from tags
title = f"Hydrus_{file_hash[:12]}"
tags_payload = meta.get("tags",
{})
2025-12-11 19:04:02 -08:00
if isinstance(tags_payload, dict):
for service_data in tags_payload.values():
if isinstance(service_data, dict):
display_tags = service_data.get("display_tags",
{})
2025-12-11 19:04:02 -08:00
if isinstance(display_tags, dict):
current_tags = display_tags.get("0", [])
if isinstance(current_tags, list):
for tag in current_tags:
if str(tag).lower().startswith("title:"):
title = tag.split(":", 1)[1].strip()
break
if title != f"Hydrus_{file_hash[:12]}":
break
2025-12-29 17:05:03 -08:00
2025-12-16 23:23:43 -08:00
# Hydrus may return mime as an int enum, or sometimes a human label.
mime_val = meta.get("mime")
2025-12-29 17:05:03 -08:00
filetype_human = (
meta.get("filetype_human") or meta.get("mime_human")
or meta.get("mime_string")
2025-12-29 17:05:03 -08:00
)
2025-12-16 23:23:43 -08:00
# Determine ext: prefer Hydrus metadata ext, then filetype_human (when it looks like an ext),
# then title suffix, then file path suffix.
ext = str(meta.get("ext") or "").strip().lstrip(".")
if not ext:
ft = str(filetype_human or "").strip().lstrip(".").lower()
if ft and ft != "unknown filetype" and ft.isalnum() and len(ft) <= 8:
# Treat simple labels like "mp4", "m4a", "webm" as extensions.
ext = ft
if not ext and isinstance(title, str) and "." in title:
2025-12-16 01:45:01 -08:00
try:
2025-12-16 23:23:43 -08:00
ext = Path(title).suffix.lstrip(".")
2025-12-16 01:45:01 -08:00
except Exception:
ext = ""
2025-12-16 23:23:43 -08:00
if not ext:
try:
path_payload = client.get_file_path(file_hash)
if isinstance(path_payload, dict):
p = path_payload.get("path")
if isinstance(p, str) and p.strip():
ext = Path(p.strip()).suffix.lstrip(".")
except Exception:
ext = ""
# If extension is still unknown, attempt a best-effort lookup from MIME.
def _mime_from_ext(ext_value: str) -> str:
ext_clean = str(ext_value or "").strip().lstrip(".").lower()
if not ext_clean:
return ""
try:
for category in mime_maps.values():
info = category.get(ext_clean)
if isinstance(info, dict):
mimes = info.get("mimes")
if isinstance(mimes, list) and mimes:
first = mimes[0]
return str(first)
except Exception:
return ""
return ""
# Normalize to a MIME string for CLI output.
# Avoid passing through human labels like "unknown filetype".
mime_type = ""
if isinstance(mime_val, str):
candidate = mime_val.strip()
if "/" in candidate and candidate.lower() != "unknown filetype":
mime_type = candidate
if not mime_type and isinstance(filetype_human, str):
candidate = filetype_human.strip()
if "/" in candidate and candidate.lower() != "unknown filetype":
mime_type = candidate
if not mime_type:
mime_type = _mime_from_ext(ext)
# Normalize size/duration to stable scalar types.
size_val = meta.get("size")
if size_val is None:
size_val = meta.get("size_bytes")
try:
size_int: int | None = int(size_val) if size_val is not None else None
except Exception:
size_int = None
dur_val = meta.get("duration")
if dur_val is None:
dur_val = meta.get("duration_ms")
try:
dur_int: int | None = int(dur_val) if dur_val is not None else None
except Exception:
dur_int = None
2025-12-29 17:05:03 -08:00
raw_urls = meta.get("known_urls") or meta.get("urls") or meta.get("url"
) or []
2025-12-16 23:23:43 -08:00
url_list: list[str] = []
if isinstance(raw_urls, str):
s = raw_urls.strip()
url_list = [s] if s else []
elif isinstance(raw_urls, list):
2025-12-29 17:05:03 -08:00
url_list = [
str(u).strip() for u in raw_urls
if isinstance(u, str) and str(u).strip()
2025-12-29 17:05:03 -08:00
]
2025-12-16 23:23:43 -08:00
2025-12-11 19:04:02 -08:00
return {
"hash": file_hash,
"title": title,
"ext": ext,
2025-12-16 23:23:43 -08:00
"size": size_int,
2025-12-11 19:04:02 -08:00
"mime": mime_type,
2025-12-16 23:23:43 -08:00
# Keep raw fields available for troubleshooting/other callers.
"hydrus_mime": mime_val,
"filetype_human": filetype_human,
"duration_ms": dur_int,
"url": url_list,
2025-12-11 19:04:02 -08:00
}
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} get_metadata failed: {exc}")
2025-12-11 19:04:02 -08:00
return None
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
"""Get tags for a file from Hydrus by hash.
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Args:
file_identifier: File hash (SHA256 hex string)
**kwargs: Optional service_name parameter
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Returns:
Tuple of (tags_list, source_description)
where source is always "hydrus"
"""
try:
from API import HydrusNetwork as hydrus_wrapper
2025-12-12 21:55:38 -08:00
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
for ch in file_hash):
debug(
f"{self._log_prefix()} get_tags: invalid file hash '{file_identifier}'"
)
2025-12-12 21:55:38 -08:00
return [], "unknown"
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
# Get Hydrus client and service info
client = self._client
if not client:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} get_tags: client unavailable")
2025-12-11 19:04:02 -08:00
return [], "unknown"
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
# Fetch file metadata
payload = client.fetch_file_metadata(
hashes=[file_hash],
include_service_keys_to_tags=True,
include_file_url=False
2025-12-11 19:04:02 -08:00
)
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
debug(
f"{self._log_prefix()} get_tags: no metadata for hash {file_hash}"
)
2025-12-11 19:04:02 -08:00
return [], "unknown"
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
meta = items[0] if isinstance(items[0], dict) else None
if not isinstance(meta, dict) or meta.get("file_id") is None:
debug(
f"{self._log_prefix()} get_tags: invalid metadata for hash {file_hash}"
)
2025-12-11 19:04:02 -08:00
return [], "unknown"
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
# Extract tags using service name
service_name = "my tags"
service_key = hydrus_wrapper.get_tag_service_key(client, service_name)
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
# Extract tags from metadata
tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name)
2025-12-20 23:57:44 -08:00
2025-12-29 17:05:03 -08:00
return [
str(t).strip().lower() for t in tags if isinstance(t, str) and t.strip()
], "hydrus"
2025-12-11 19:04:02 -08:00
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} get_tags failed: {exc}")
2025-12-11 19:04:02 -08:00
return [], "unknown"
def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
2025-12-29 17:05:03 -08:00
"""Add tags to a Hydrus file."""
2025-12-11 19:04:02 -08:00
try:
client = self._client
if client is None:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} add_tag: client unavailable")
2025-12-11 19:04:02 -08:00
return False
2025-12-12 21:55:38 -08:00
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
for ch in file_hash):
debug(
f"{self._log_prefix()} add_tag: invalid file hash '{file_identifier}'"
)
2025-12-12 21:55:38 -08:00
return False
2025-12-11 19:04:02 -08:00
service_name = kwargs.get("service_name") or "my tags"
2025-12-20 23:57:44 -08:00
2025-12-29 17:05:03 -08:00
incoming_tags = [
str(t).strip().lower() for t in (tags or [])
2025-12-29 17:05:03 -08:00
if isinstance(t, str) and str(t).strip()
]
2025-12-20 23:57:44 -08:00
if not incoming_tags:
return True
try:
existing_tags, _src = self.get_tag(file_hash)
except Exception:
existing_tags = []
2025-12-29 23:40:50 -08:00
from SYS.metadata import compute_namespaced_tag_overwrite
2025-12-20 23:57:44 -08:00
2025-12-29 17:05:03 -08:00
tags_to_remove, tags_to_add, _merged = compute_namespaced_tag_overwrite(
existing_tags, incoming_tags
)
2025-12-20 23:57:44 -08:00
if not tags_to_add and not tags_to_remove:
return True
did_any = False
if tags_to_remove:
try:
client.delete_tag(file_hash, tags_to_remove, service_name)
did_any = True
except Exception as exc:
debug(f"{self._log_prefix()} add_tag: delete_tag failed: {exc}")
if tags_to_add:
try:
client.add_tag(file_hash, tags_to_add, service_name)
did_any = True
except Exception as exc:
debug(f"{self._log_prefix()} add_tag: add_tag failed: {exc}")
return did_any
2025-12-11 19:04:02 -08:00
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} add_tag failed: {exc}")
2025-12-11 19:04:02 -08:00
return False
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
2025-12-29 17:05:03 -08:00
"""Delete tags from a Hydrus file."""
2025-12-11 19:04:02 -08:00
try:
client = self._client
if client is None:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} delete_tag: client unavailable")
2025-12-11 19:04:02 -08:00
return False
2025-12-12 21:55:38 -08:00
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
for ch in file_hash):
debug(
f"{self._log_prefix()} delete_tag: invalid file hash '{file_identifier}'"
)
2025-12-12 21:55:38 -08:00
return False
2025-12-11 19:04:02 -08:00
service_name = kwargs.get("service_name") or "my tags"
2025-12-20 23:57:44 -08:00
raw_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
2025-12-29 17:05:03 -08:00
tag_list = [
str(t).strip().lower() for t in raw_list
if isinstance(t, str) and str(t).strip()
2025-12-29 17:05:03 -08:00
]
2025-12-11 19:04:02 -08:00
if not tag_list:
return False
2025-12-12 21:55:38 -08:00
client.delete_tag(file_hash, tag_list, service_name)
2025-12-11 19:04:02 -08:00
return True
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} delete_tag failed: {exc}")
2025-12-11 19:04:02 -08:00
return False
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
2025-12-29 17:05:03 -08:00
"""Get known url for a Hydrus file."""
2025-12-11 19:04:02 -08:00
try:
client = self._client
2025-12-12 21:55:38 -08:00
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
for ch in file_hash):
2025-12-12 21:55:38 -08:00
return []
payload = client.fetch_file_metadata(
hashes=[file_hash],
include_file_url=False
)
2025-12-11 19:04:02 -08:00
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
return []
meta = items[0] if isinstance(items[0],
dict) else {}
2025-12-16 01:45:01 -08:00
raw_urls: Any = meta.get("known_urls"
) or meta.get("urls") or meta.get("url") or []
2025-12-16 01:45:01 -08:00
if isinstance(raw_urls, str):
val = raw_urls.strip()
return [val] if val else []
if isinstance(raw_urls, list):
out: list[str] = []
for u in raw_urls:
if not isinstance(u, str):
continue
u = u.strip()
if u:
out.append(u)
return out
return []
2025-12-11 19:04:02 -08:00
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} get_url failed: {exc}")
2025-12-11 19:04:02 -08:00
return []
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
2025-12-29 17:05:03 -08:00
"""Associate one or more url with a Hydrus file."""
2025-12-11 19:04:02 -08:00
try:
client = self._client
if client is None:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} add_url: client unavailable")
2025-12-11 19:04:02 -08:00
return False
for u in url:
client.associate_url(file_identifier, u)
return True
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} add_url failed: {exc}")
2025-12-11 19:04:02 -08:00
return False
2025-12-20 23:57:44 -08:00
def add_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool:
"""Bulk associate urls with Hydrus files.
This is a best-effort convenience wrapper used by cmdlets to batch url associations.
Hydrus' client API is still called per (hash,url) pair, but this consolidates the
cmdlet-level control flow so url association can be deferred until the end.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} add_url_bulk: client unavailable")
return False
any_success = False
2025-12-29 17:05:03 -08:00
for file_identifier, urls in items or []:
2025-12-20 23:57:44 -08:00
h = str(file_identifier or "").strip().lower()
if len(h) != 64:
continue
2025-12-29 17:05:03 -08:00
for u in urls or []:
2025-12-20 23:57:44 -08:00
s = str(u or "").strip()
if not s:
continue
try:
client.associate_url(h, s)
any_success = True
except Exception:
continue
return any_success
except Exception as exc:
debug(f"{self._log_prefix()} add_url_bulk failed: {exc}")
return False
2025-12-11 19:04:02 -08:00
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
2025-12-29 17:05:03 -08:00
"""Delete one or more url from a Hydrus file."""
2025-12-11 19:04:02 -08:00
try:
client = self._client
if client is None:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} delete_url: client unavailable")
2025-12-11 19:04:02 -08:00
return False
for u in url:
client.delete_url(file_identifier, u)
return True
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} delete_url failed: {exc}")
2025-12-11 19:04:02 -08:00
return False
2025-12-12 21:55:38 -08:00
def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
"""Get notes for a Hydrus file (default note service only)."""
try:
client = self._client
if client is None:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} get_note: client unavailable")
2025-12-12 21:55:38 -08:00
return {}
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
for ch in file_hash):
2025-12-12 21:55:38 -08:00
return {}
payload = client.fetch_file_metadata(hashes=[file_hash], include_notes=True)
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
return {}
meta = items[0] if isinstance(items[0], dict) else None
if not isinstance(meta, dict):
return {}
notes_payload = meta.get("notes")
if isinstance(notes_payload, dict):
return {
str(k): str(v or "")
for k, v in notes_payload.items() if str(k).strip()
}
2025-12-12 21:55:38 -08:00
return {}
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} get_note failed: {exc}")
2025-12-12 21:55:38 -08:00
return {}
def set_note(
self,
file_identifier: str,
name: str,
text: str,
**kwargs: Any
) -> bool:
2025-12-12 21:55:38 -08:00
"""Set a named note for a Hydrus file (default note service only)."""
try:
client = self._client
if client is None:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} set_note: client unavailable")
2025-12-12 21:55:38 -08:00
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
for ch in file_hash):
2025-12-12 21:55:38 -08:00
return False
note_name = str(name or "").strip()
if not note_name:
return False
note_text = str(text or "")
client.set_notes(file_hash,
{
note_name: note_text
})
2025-12-12 21:55:38 -08:00
return True
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} set_note failed: {exc}")
2025-12-12 21:55:38 -08:00
return False
def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
"""Delete a named note for a Hydrus file (default note service only)."""
try:
client = self._client
if client is None:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} delete_note: client unavailable")
2025-12-12 21:55:38 -08:00
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
for ch in file_hash):
2025-12-12 21:55:38 -08:00
return False
note_name = str(name or "").strip()
if not note_name:
return False
client.delete_notes(file_hash, [note_name])
return True
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} delete_note failed: {exc}")
2025-12-12 21:55:38 -08:00
return False
2025-12-11 19:04:02 -08:00
@staticmethod
def _extract_tags_from_hydrus_meta(
meta: Dict[str,
Any],
service_key: Optional[str],
service_name: str
2025-12-11 19:04:02 -08:00
) -> List[str]:
"""Extract current tags from Hydrus metadata dict.
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Prefers display_tags (includes siblings/parents, excludes deleted).
Falls back to storage_tags status '0' (current).
"""
tags_payload = meta.get("tags")
if not isinstance(tags_payload, dict):
return []
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
svc_data = None
if service_key:
svc_data = tags_payload.get(service_key)
if not isinstance(svc_data, dict):
return []
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
# Prefer display_tags (Hydrus computes siblings/parents)
display = svc_data.get("display_tags")
if isinstance(display, list) and display:
return [
str(t) for t in display
if isinstance(t, (str, bytes)) and str(t).strip()
]
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
# Fallback to storage_tags status '0' (current)
storage = svc_data.get("storage_tags")
if isinstance(storage, dict):
current_list = storage.get("0") or storage.get(0)
if isinstance(current_list, list):
2025-12-29 17:05:03 -08:00
return [
str(t) for t in current_list
if isinstance(t, (str, bytes)) and str(t).strip()
2025-12-29 17:05:03 -08:00
]
2025-12-11 19:04:02 -08:00
return []