1087 lines
47 KiB
Python
1087 lines
47 KiB
Python
from __future__ import annotations
|
||
|
||
import re
|
||
import sys
|
||
from pathlib import Path
|
||
from typing import Any, Dict, List, Optional, Tuple
|
||
|
||
import httpx
|
||
|
||
from SYS.logger import debug, log
|
||
from SYS.utils_constant import mime_maps
|
||
|
||
from Store._base import Store
|
||
|
||
|
||
_HYDRUS_INIT_CHECK_CACHE: dict[tuple[str, str], tuple[bool, Optional[str]]] = {}
|
||
|
||
|
||
class HydrusNetwork(Store):
|
||
"""File storage backend for Hydrus client.
|
||
|
||
Each instance represents a specific Hydrus client connection.
|
||
Maintains its own HydrusClient.
|
||
"""
|
||
|
||
def _log_prefix(self) -> str:
|
||
store_name = getattr(self, "NAME", None) or "unknown"
|
||
return f"[hydrusnetwork:{store_name}]"
|
||
|
||
def __new__(cls, *args: Any, **kwargs: Any) -> "HydrusNetwork":
|
||
instance = super().__new__(cls)
|
||
name = kwargs.get("NAME")
|
||
api = kwargs.get("API")
|
||
url = kwargs.get("URL")
|
||
if name is not None:
|
||
setattr(instance, "NAME", str(name))
|
||
if api is not None:
|
||
setattr(instance, "API", str(api))
|
||
if url is not None:
|
||
setattr(instance, "URL", str(url))
|
||
return instance
|
||
|
||
setattr(__new__, "keys", ("NAME", "API", "URL"))
|
||
|
||
def __init__(
|
||
self,
|
||
instance_name: Optional[str] = None,
|
||
api_key: Optional[str] = None,
|
||
url: Optional[str] = None,
|
||
*,
|
||
NAME: Optional[str] = None,
|
||
API: Optional[str] = None,
|
||
URL: Optional[str] = None,
|
||
) -> None:
|
||
"""Initialize Hydrus storage backend.
|
||
|
||
Args:
|
||
instance_name: Name of this Hydrus instance (e.g., 'home', 'work')
|
||
api_key: Hydrus Client API access key
|
||
url: Hydrus client URL (e.g., 'http://192.168.1.230:45869')
|
||
"""
|
||
from API.HydrusNetwork import HydrusNetwork as HydrusClient
|
||
|
||
if instance_name is None and NAME is not None:
|
||
instance_name = str(NAME)
|
||
if api_key is None and API is not None:
|
||
api_key = str(API)
|
||
if url is None and URL is not None:
|
||
url = str(URL)
|
||
|
||
if not instance_name or not api_key or not url:
|
||
raise ValueError("HydrusNetwork requires NAME, API, and URL")
|
||
|
||
self.NAME = instance_name
|
||
self.API = api_key
|
||
self.URL = url.rstrip("/")
|
||
|
||
# Total count (best-effort, used for startup diagnostics)
|
||
self.total_count: Optional[int] = None
|
||
|
||
# Self health-check: validate the URL is reachable and the access key is accepted.
|
||
# This MUST NOT attempt to acquire a session key.
|
||
cache_key = (self.URL, self.API)
|
||
cached = _HYDRUS_INIT_CHECK_CACHE.get(cache_key)
|
||
if cached is not None:
|
||
ok, err = cached
|
||
if not ok:
|
||
raise RuntimeError(f"Hydrus '{self.NAME}' unavailable: {err or 'Unavailable'}")
|
||
else:
|
||
api_version_url = f"{self.URL}/api_version"
|
||
verify_key_url = f"{self.URL}/verify_access_key"
|
||
try:
|
||
with httpx.Client(timeout=5.0, verify=False, follow_redirects=True) as client:
|
||
version_resp = client.get(api_version_url)
|
||
version_resp.raise_for_status()
|
||
version_payload = version_resp.json()
|
||
if not isinstance(version_payload, dict):
|
||
raise RuntimeError("Hydrus /api_version returned an unexpected response")
|
||
|
||
verify_resp = client.get(
|
||
verify_key_url,
|
||
headers={"Hydrus-Client-API-Access-Key": self.API},
|
||
)
|
||
verify_resp.raise_for_status()
|
||
verify_payload = verify_resp.json()
|
||
if not isinstance(verify_payload, dict):
|
||
raise RuntimeError("Hydrus /verify_access_key returned an unexpected response")
|
||
|
||
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (True, None)
|
||
except Exception as exc:
|
||
err = str(exc)
|
||
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (False, err)
|
||
raise RuntimeError(f"Hydrus '{self.NAME}' unavailable: {err}") from exc
|
||
|
||
# Create a persistent client for this instance (auth via access key by default).
|
||
self._client = HydrusClient(url=self.URL, access_key=self.API, instance_name=self.NAME)
|
||
|
||
# Best-effort total count (fast on Hydrus side; does not fetch IDs/hashes).
|
||
try:
|
||
payload = self._client.search_files(
|
||
tags=["system:everything"],
|
||
return_hashes=False,
|
||
return_file_ids=False,
|
||
return_file_count=True,
|
||
)
|
||
count_val = None
|
||
if isinstance(payload, dict):
|
||
count_val = payload.get("file_count")
|
||
if count_val is None:
|
||
count_val = payload.get("file_count_inclusive")
|
||
if count_val is None:
|
||
count_val = payload.get("num_files")
|
||
if isinstance(count_val, int):
|
||
self.total_count = count_val
|
||
except Exception as exc:
|
||
debug(f"{self._log_prefix()} total count unavailable: {exc}", file=sys.stderr)
|
||
|
||
def name(self) -> str:
|
||
return self.NAME
|
||
|
||
def get_name(self) -> str:
|
||
return self.NAME
|
||
|
||
def add_file(self, file_path: Path, **kwargs: Any) -> str:
|
||
"""Upload file to Hydrus with full metadata support.
|
||
|
||
Args:
|
||
file_path: Path to the file to upload
|
||
tag: Optional list of tag values to add
|
||
url: Optional list of url to associate with the file
|
||
title: Optional title (will be added as 'title:value' tag)
|
||
|
||
Returns:
|
||
File hash from Hydrus
|
||
|
||
Raises:
|
||
Exception: If upload fails
|
||
"""
|
||
from SYS.utils import sha256_file
|
||
|
||
tag_list = kwargs.get("tag", [])
|
||
url = kwargs.get("url", [])
|
||
title = kwargs.get("title")
|
||
|
||
# Add title to tags if provided and not already present
|
||
if title:
|
||
title_tag = f"title:{title}"
|
||
if not any(str(candidate).lower().startswith("title:") for candidate in tag_list):
|
||
tag_list = [title_tag] + list(tag_list)
|
||
|
||
try:
|
||
# Compute file hash
|
||
file_hash = sha256_file(file_path)
|
||
debug(f"{self._log_prefix()} file hash: {file_hash}")
|
||
|
||
# Use persistent client with session key
|
||
client = self._client
|
||
if client is None:
|
||
raise Exception("Hydrus client unavailable")
|
||
|
||
# Check if file already exists in Hydrus
|
||
file_exists = False
|
||
try:
|
||
metadata = client.fetch_file_metadata(
|
||
hashes=[file_hash],
|
||
include_service_keys_to_tags=False,
|
||
include_file_url=False,
|
||
include_duration=False,
|
||
include_size=False,
|
||
include_mime=False,
|
||
)
|
||
if metadata and isinstance(metadata, dict):
|
||
metas = metadata.get("metadata", [])
|
||
if isinstance(metas, list) and metas:
|
||
# Hydrus returns placeholder rows for unknown hashes.
|
||
# Only treat as a real duplicate if it has a concrete file_id.
|
||
for meta in metas:
|
||
if isinstance(meta, dict) and meta.get("file_id") is not None:
|
||
file_exists = True
|
||
break
|
||
if file_exists:
|
||
log(
|
||
f"ℹ️ Duplicate detected - file already in Hydrus with hash: {file_hash}",
|
||
file=sys.stderr,
|
||
)
|
||
except Exception:
|
||
pass
|
||
|
||
# If Hydrus reports an existing file, it may be in trash. Best-effort restore it to 'my files'.
|
||
# This keeps behavior aligned with user expectation: "use API only" and ensure it lands in my files.
|
||
if file_exists:
|
||
try:
|
||
client.undelete_files([file_hash])
|
||
except Exception:
|
||
pass
|
||
|
||
# Upload file if not already present
|
||
if not file_exists:
|
||
log(f"{self._log_prefix()} Uploading: {file_path.name}", file=sys.stderr)
|
||
response = client.add_file(file_path)
|
||
|
||
# Extract hash from response
|
||
hydrus_hash: Optional[str] = None
|
||
if isinstance(response, dict):
|
||
hydrus_hash = response.get("hash") or response.get("file_hash")
|
||
if not hydrus_hash:
|
||
hashes = response.get("hashes")
|
||
if isinstance(hashes, list) and hashes:
|
||
hydrus_hash = hashes[0]
|
||
|
||
if not hydrus_hash:
|
||
raise Exception(f"Hydrus response missing file hash: {response}")
|
||
|
||
file_hash = hydrus_hash
|
||
log(f"{self._log_prefix()} hash: {file_hash}", file=sys.stderr)
|
||
|
||
# Add tags if provided (both for new and existing files)
|
||
if tag_list:
|
||
try:
|
||
# Use default tag service
|
||
service_name = "my tags"
|
||
except Exception:
|
||
service_name = "my tags"
|
||
|
||
try:
|
||
debug(f"{self._log_prefix()} Adding {len(tag_list)} tag(s): {tag_list}")
|
||
client.add_tag(file_hash, tag_list, service_name)
|
||
log(f"{self._log_prefix()} Tags added via '{service_name}'", file=sys.stderr)
|
||
except Exception as exc:
|
||
log(f"{self._log_prefix()} ⚠️ Failed to add tags: {exc}", file=sys.stderr)
|
||
|
||
# Associate url if provided (both for new and existing files)
|
||
if url:
|
||
log(f"{self._log_prefix()} Associating {len(url)} URL(s) with file", file=sys.stderr)
|
||
for url in url:
|
||
if url:
|
||
try:
|
||
client.associate_url(file_hash, str(url))
|
||
debug(f"{self._log_prefix()} Associated URL: {url}")
|
||
except Exception as exc:
|
||
log(f"{self._log_prefix()} ⚠️ Failed to associate URL {url}: {exc}", file=sys.stderr)
|
||
|
||
return file_hash
|
||
|
||
except Exception as exc:
|
||
log(f"{self._log_prefix()} ❌ upload failed: {exc}", file=sys.stderr)
|
||
raise
|
||
|
||
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
|
||
"""Search Hydrus database for files matching query.
|
||
|
||
Args:
|
||
query: Search query (tags, filenames, hashes, etc.)
|
||
limit: Maximum number of results to return (default: 100)
|
||
|
||
Returns:
|
||
List of dicts with 'name', 'hash', 'size', 'tags' fields
|
||
|
||
Example:
|
||
results = storage["hydrus"].search("artist:john_doe music")
|
||
results = storage["hydrus"].search("Simple Man")
|
||
"""
|
||
limit = kwargs.get("limit", 100)
|
||
|
||
try:
|
||
client = self._client
|
||
if client is None:
|
||
raise Exception("Hydrus client unavailable")
|
||
|
||
prefix = self._log_prefix()
|
||
debug(f"{prefix} Searching for: {query}")
|
||
|
||
def _extract_urls(meta_obj: Any) -> list[str]:
|
||
if not isinstance(meta_obj, dict):
|
||
return []
|
||
raw = meta_obj.get("url")
|
||
if raw is None:
|
||
raw = meta_obj.get("urls")
|
||
if isinstance(raw, str):
|
||
val = raw.strip()
|
||
return [val] if val else []
|
||
if isinstance(raw, list):
|
||
out: list[str] = []
|
||
for item in raw:
|
||
if not isinstance(item, str):
|
||
continue
|
||
s = item.strip()
|
||
if s:
|
||
out.append(s)
|
||
return out
|
||
return []
|
||
|
||
def _iter_url_filtered_metadata(url_value: str | None, want_any: bool, fetch_limit: int) -> list[dict[str, Any]]:
|
||
"""Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
|
||
|
||
# First try a fast system predicate if Hydrus supports it.
|
||
candidate_file_ids: list[int] = []
|
||
try:
|
||
if want_any:
|
||
predicate = "system:has url"
|
||
url_search = client.search_files(
|
||
tags=[predicate],
|
||
return_hashes=False,
|
||
return_file_ids=True,
|
||
return_file_count=False,
|
||
)
|
||
ids = url_search.get("file_ids", []) if isinstance(url_search, dict) else []
|
||
if isinstance(ids, list):
|
||
candidate_file_ids = [int(x) for x in ids if isinstance(x, (int, float, str)) and str(x).strip().isdigit()]
|
||
except Exception:
|
||
candidate_file_ids = []
|
||
|
||
if not candidate_file_ids:
|
||
# Fallback: scan from system:everything and filter by URL substring.
|
||
everything = client.search_files(
|
||
tags=["system:everything"],
|
||
return_hashes=False,
|
||
return_file_ids=True,
|
||
return_file_count=False,
|
||
)
|
||
ids = everything.get("file_ids", []) if isinstance(everything, dict) else []
|
||
if isinstance(ids, list):
|
||
candidate_file_ids = [int(x) for x in ids if isinstance(x, (int, float))]
|
||
|
||
if not candidate_file_ids:
|
||
return []
|
||
|
||
needle = (url_value or "").strip().lower()
|
||
chunk_size = 200
|
||
out: list[dict[str, Any]] = []
|
||
|
||
for start in range(0, len(candidate_file_ids), chunk_size):
|
||
if len(out) >= fetch_limit:
|
||
break
|
||
chunk = candidate_file_ids[start : start + chunk_size]
|
||
try:
|
||
payload = client.fetch_file_metadata(
|
||
file_ids=chunk,
|
||
include_file_url=True,
|
||
include_service_keys_to_tags=True,
|
||
include_duration=True,
|
||
include_size=True,
|
||
include_mime=True,
|
||
)
|
||
except Exception:
|
||
continue
|
||
|
||
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
|
||
if not isinstance(metas, list):
|
||
continue
|
||
|
||
for meta in metas:
|
||
if not isinstance(meta, dict):
|
||
continue
|
||
urls = _extract_urls(meta)
|
||
if not urls:
|
||
continue
|
||
if want_any:
|
||
out.append(meta)
|
||
if len(out) >= fetch_limit:
|
||
break
|
||
continue
|
||
|
||
if not needle:
|
||
continue
|
||
if any(needle in u.lower() for u in urls):
|
||
out.append(meta)
|
||
if len(out) >= fetch_limit:
|
||
break
|
||
|
||
return out
|
||
|
||
query_lower = query.lower().strip()
|
||
|
||
# Special case: url:* and url:<value>
|
||
metadata_list: list[dict[str, Any]] | None = None
|
||
if ":" in query_lower and not query_lower.startswith(":"):
|
||
namespace, pattern = query_lower.split(":", 1)
|
||
namespace = namespace.strip().lower()
|
||
pattern = pattern.strip()
|
||
if namespace == "url":
|
||
if not pattern or pattern == "*":
|
||
metadata_list = _iter_url_filtered_metadata(None, want_any=True, fetch_limit=int(limit) if limit else 100)
|
||
else:
|
||
# Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
|
||
try:
|
||
if pattern.startswith("http://") or pattern.startswith("https://"):
|
||
from API.HydrusNetwork import HydrusRequestSpec
|
||
|
||
spec = HydrusRequestSpec(method="GET", endpoint="/add_urls/get_url_files", query={"url": pattern})
|
||
response = client._perform_request(spec) # type: ignore[attr-defined]
|
||
hashes: list[str] = []
|
||
file_ids: list[int] = []
|
||
if isinstance(response, dict):
|
||
raw_hashes = response.get("hashes") or response.get("file_hashes")
|
||
if isinstance(raw_hashes, list):
|
||
hashes = [str(h).strip() for h in raw_hashes if isinstance(h, str) and str(h).strip()]
|
||
raw_ids = response.get("file_ids")
|
||
if isinstance(raw_ids, list):
|
||
for item in raw_ids:
|
||
try:
|
||
file_ids.append(int(item))
|
||
except (TypeError, ValueError):
|
||
continue
|
||
|
||
if file_ids:
|
||
payload = client.fetch_file_metadata(
|
||
file_ids=file_ids,
|
||
include_file_url=True,
|
||
include_service_keys_to_tags=True,
|
||
include_duration=True,
|
||
include_size=True,
|
||
include_mime=True,
|
||
)
|
||
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
|
||
if isinstance(metas, list):
|
||
metadata_list = [m for m in metas if isinstance(m, dict)]
|
||
elif hashes:
|
||
payload = client.fetch_file_metadata(
|
||
hashes=hashes,
|
||
include_file_url=True,
|
||
include_service_keys_to_tags=True,
|
||
include_duration=True,
|
||
include_size=True,
|
||
include_mime=True,
|
||
)
|
||
metas = payload.get("metadata", []) if isinstance(payload, dict) else []
|
||
if isinstance(metas, list):
|
||
metadata_list = [m for m in metas if isinstance(m, dict)]
|
||
except Exception:
|
||
metadata_list = None
|
||
|
||
# Fallback: substring scan
|
||
if metadata_list is None:
|
||
metadata_list = _iter_url_filtered_metadata(pattern, want_any=False, fetch_limit=int(limit) if limit else 100)
|
||
|
||
# Parse the query into tags
|
||
# Handle both simple tags and complex queries
|
||
# "*" means "match all" - use system:everything tag in Hydrus
|
||
if query.strip() == "*":
|
||
# Use system:everything to match all files in Hydrus
|
||
tags = ["system:everything"]
|
||
else:
|
||
# If query doesn't have a namespace (no ':'), search all files and filter by title/tags
|
||
# If query has explicit namespace, use it as a tag search
|
||
if ':' not in query_lower:
|
||
# No namespace provided: search all files, then filter by title/tags containing the query
|
||
tags = ["system:everything"]
|
||
else:
|
||
# User provided explicit namespace (e.g., "creator:john" or "system:has_audio")
|
||
# Use it as a tag search
|
||
tags = [query_lower]
|
||
|
||
if not tags:
|
||
debug(f"{prefix} 0 result(s)")
|
||
return []
|
||
|
||
# Search files with the tags (unless url: search already produced metadata)
|
||
results = []
|
||
# Split by comma or space for AND logic
|
||
search_terms = set(query_lower.replace(',', ' ').split()) # For substring matching
|
||
|
||
if metadata_list is None:
|
||
search_result = client.search_files(
|
||
tags=tags,
|
||
return_hashes=True,
|
||
return_file_ids=True
|
||
)
|
||
|
||
file_ids = search_result.get("file_ids", []) if isinstance(search_result, dict) else []
|
||
hashes = search_result.get("hashes", []) if isinstance(search_result, dict) else []
|
||
|
||
if not file_ids and not hashes:
|
||
debug(f"{prefix} 0 result(s)")
|
||
return []
|
||
|
||
if file_ids:
|
||
metadata = client.fetch_file_metadata(file_ids=file_ids)
|
||
metadata_list = metadata.get("metadata", [])
|
||
elif hashes:
|
||
metadata = client.fetch_file_metadata(hashes=hashes)
|
||
metadata_list = metadata.get("metadata", [])
|
||
else:
|
||
metadata_list = []
|
||
|
||
if not isinstance(metadata_list, list):
|
||
metadata_list = []
|
||
|
||
for meta in metadata_list:
|
||
if len(results) >= limit:
|
||
break
|
||
|
||
file_id = meta.get("file_id")
|
||
hash_hex = meta.get("hash")
|
||
size = meta.get("size", 0)
|
||
|
||
# Get tags for this file and extract title
|
||
tags_set = meta.get("tags", {})
|
||
all_tags = []
|
||
title = f"Hydrus File {file_id}" # Default fallback
|
||
all_tags_str = "" # For substring matching
|
||
|
||
# debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}")
|
||
|
||
if isinstance(tags_set, dict):
|
||
# Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen
|
||
def _collect(tag_list: Any) -> None:
|
||
nonlocal title, all_tags_str
|
||
if not isinstance(tag_list, list):
|
||
return
|
||
for tag in tag_list:
|
||
tag_text = str(tag) if tag else ""
|
||
if not tag_text:
|
||
continue
|
||
all_tags.append(tag_text)
|
||
all_tags_str += " " + tag_text.lower()
|
||
if tag_text.lower().startswith("title:") and title == f"Hydrus File {file_id}":
|
||
title = tag_text.split(":", 1)[1].strip()
|
||
|
||
for _service_name, service_tags in tags_set.items():
|
||
if not isinstance(service_tags, dict):
|
||
continue
|
||
|
||
storage_tags = service_tags.get("storage_tags", {})
|
||
if isinstance(storage_tags, dict):
|
||
for tag_list in storage_tags.values():
|
||
_collect(tag_list)
|
||
|
||
display_tags = service_tags.get("display_tags", [])
|
||
_collect(display_tags)
|
||
|
||
# Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it)
|
||
top_level_tags = meta.get("tags_flat", []) or meta.get("tags", [])
|
||
_collect(top_level_tags)
|
||
|
||
# Prefer Hydrus-provided extension (e.g. ".webm"); fall back to MIME map.
|
||
mime_type = meta.get("mime")
|
||
ext = str(meta.get("ext") or "").strip().lstrip('.')
|
||
if not ext and mime_type:
|
||
for category in mime_maps.values():
|
||
for _ext_key, info in category.items():
|
||
if mime_type in info.get("mimes", []):
|
||
ext = str(info.get("ext", "")).strip().lstrip('.')
|
||
break
|
||
if ext:
|
||
break
|
||
|
||
# Filter results based on query type
|
||
# If user provided explicit namespace (has ':'), don't do substring filtering
|
||
# Just include what the tag search returned
|
||
has_namespace = ':' in query_lower
|
||
|
||
if has_namespace:
|
||
# Explicit namespace search - already filtered by Hydrus tag search
|
||
# Include this result as-is
|
||
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
|
||
results.append({
|
||
"hash": hash_hex,
|
||
"url": file_url,
|
||
"name": title,
|
||
"title": title,
|
||
"size": size,
|
||
"size_bytes": size,
|
||
"store": self.NAME,
|
||
"tag": all_tags,
|
||
"file_id": file_id,
|
||
"mime": mime_type,
|
||
"ext": ext,
|
||
})
|
||
else:
|
||
# Free-form search: check if search terms match the title or tags
|
||
# Match if ALL search terms are found in title or tags (AND logic)
|
||
# AND use whole word matching
|
||
|
||
# Combine title and tags for searching
|
||
searchable_text = (title + " " + all_tags_str).lower()
|
||
|
||
match = True
|
||
if query_lower != "*":
|
||
for term in search_terms:
|
||
# Regex for whole word: \bterm\b
|
||
# Escape term to handle special chars
|
||
pattern = r'\b' + re.escape(term) + r'\b'
|
||
if not re.search(pattern, searchable_text):
|
||
match = False
|
||
break
|
||
|
||
if match:
|
||
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
|
||
results.append({
|
||
"hash": hash_hex,
|
||
"url": file_url,
|
||
"name": title,
|
||
"title": title,
|
||
"size": size,
|
||
"size_bytes": size,
|
||
"store": self.NAME,
|
||
"tag": all_tags,
|
||
"file_id": file_id,
|
||
"mime": mime_type,
|
||
"ext": ext,
|
||
})
|
||
|
||
debug(f"{prefix} {len(results)} result(s)")
|
||
return results[:limit]
|
||
|
||
except Exception as exc:
|
||
log(f"❌ Hydrus search failed: {exc}", file=sys.stderr)
|
||
import traceback
|
||
traceback.print_exc(file=sys.stderr)
|
||
raise
|
||
|
||
def get_file(self, file_hash: str, **kwargs: Any) -> Path | str | None:
|
||
"""Return a browser URL for the file.
|
||
|
||
IMPORTANT: this method must be side-effect free (do not auto-open a browser).
|
||
Only explicit user actions (e.g. the get-file cmdlet) should open files.
|
||
"""
|
||
|
||
debug(f"{self._log_prefix()} get_file: start hash={file_hash[:12]}...")
|
||
|
||
# Build browser URL with access key
|
||
base_url = str(self.URL).rstrip('/')
|
||
access_key = str(self.API)
|
||
browser_url = f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
|
||
debug(f"{self._log_prefix()} get_file: url={browser_url}")
|
||
return browser_url
|
||
|
||
def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
|
||
"""Get metadata for a file from Hydrus by hash.
|
||
|
||
Args:
|
||
file_hash: SHA256 hash of the file (64-char hex string)
|
||
|
||
Returns:
|
||
Dict with metadata fields or None if not found
|
||
"""
|
||
try:
|
||
client = self._client
|
||
if not client:
|
||
debug(f"{self._log_prefix()} get_metadata: client unavailable")
|
||
return None
|
||
|
||
# Fetch file metadata with the fields we need for CLI display.
|
||
payload = client.fetch_file_metadata(
|
||
hashes=[file_hash],
|
||
include_service_keys_to_tags=True,
|
||
include_file_url=True,
|
||
include_duration=True,
|
||
include_size=True,
|
||
include_mime=True,
|
||
)
|
||
|
||
if not payload or not payload.get("metadata"):
|
||
return None
|
||
|
||
meta = payload["metadata"][0]
|
||
|
||
# Hydrus can return placeholder metadata rows for unknown hashes.
|
||
if not isinstance(meta, dict) or meta.get("file_id") is None:
|
||
return None
|
||
|
||
# Extract title from tags
|
||
title = f"Hydrus_{file_hash[:12]}"
|
||
tags_payload = meta.get("tags", {})
|
||
if isinstance(tags_payload, dict):
|
||
for service_data in tags_payload.values():
|
||
if isinstance(service_data, dict):
|
||
display_tags = service_data.get("display_tags", {})
|
||
if isinstance(display_tags, dict):
|
||
current_tags = display_tags.get("0", [])
|
||
if isinstance(current_tags, list):
|
||
for tag in current_tags:
|
||
if str(tag).lower().startswith("title:"):
|
||
title = tag.split(":", 1)[1].strip()
|
||
break
|
||
if title != f"Hydrus_{file_hash[:12]}":
|
||
break
|
||
|
||
# Hydrus may return mime as an int enum, or sometimes a human label.
|
||
mime_val = meta.get("mime")
|
||
filetype_human = meta.get("filetype_human") or meta.get("mime_human") or meta.get("mime_string")
|
||
|
||
# Determine ext: prefer Hydrus metadata ext, then filetype_human (when it looks like an ext),
|
||
# then title suffix, then file path suffix.
|
||
ext = str(meta.get("ext") or "").strip().lstrip(".")
|
||
if not ext:
|
||
ft = str(filetype_human or "").strip().lstrip(".").lower()
|
||
if ft and ft != "unknown filetype" and ft.isalnum() and len(ft) <= 8:
|
||
# Treat simple labels like "mp4", "m4a", "webm" as extensions.
|
||
ext = ft
|
||
if not ext and isinstance(title, str) and "." in title:
|
||
try:
|
||
ext = Path(title).suffix.lstrip(".")
|
||
except Exception:
|
||
ext = ""
|
||
if not ext:
|
||
try:
|
||
path_payload = client.get_file_path(file_hash)
|
||
if isinstance(path_payload, dict):
|
||
p = path_payload.get("path")
|
||
if isinstance(p, str) and p.strip():
|
||
ext = Path(p.strip()).suffix.lstrip(".")
|
||
except Exception:
|
||
ext = ""
|
||
|
||
# If extension is still unknown, attempt a best-effort lookup from MIME.
|
||
def _mime_from_ext(ext_value: str) -> str:
|
||
ext_clean = str(ext_value or "").strip().lstrip(".").lower()
|
||
if not ext_clean:
|
||
return ""
|
||
try:
|
||
for category in mime_maps.values():
|
||
info = category.get(ext_clean)
|
||
if isinstance(info, dict):
|
||
mimes = info.get("mimes")
|
||
if isinstance(mimes, list) and mimes:
|
||
first = mimes[0]
|
||
return str(first)
|
||
except Exception:
|
||
return ""
|
||
return ""
|
||
|
||
# Normalize to a MIME string for CLI output.
|
||
# Avoid passing through human labels like "unknown filetype".
|
||
mime_type = ""
|
||
if isinstance(mime_val, str):
|
||
candidate = mime_val.strip()
|
||
if "/" in candidate and candidate.lower() != "unknown filetype":
|
||
mime_type = candidate
|
||
if not mime_type and isinstance(filetype_human, str):
|
||
candidate = filetype_human.strip()
|
||
if "/" in candidate and candidate.lower() != "unknown filetype":
|
||
mime_type = candidate
|
||
if not mime_type:
|
||
mime_type = _mime_from_ext(ext)
|
||
|
||
# Normalize size/duration to stable scalar types.
|
||
size_val = meta.get("size")
|
||
if size_val is None:
|
||
size_val = meta.get("size_bytes")
|
||
try:
|
||
size_int: int | None = int(size_val) if size_val is not None else None
|
||
except Exception:
|
||
size_int = None
|
||
|
||
dur_val = meta.get("duration")
|
||
if dur_val is None:
|
||
dur_val = meta.get("duration_ms")
|
||
try:
|
||
dur_int: int | None = int(dur_val) if dur_val is not None else None
|
||
except Exception:
|
||
dur_int = None
|
||
|
||
raw_urls = (
|
||
meta.get("known_urls")
|
||
or meta.get("urls")
|
||
or meta.get("url")
|
||
or []
|
||
)
|
||
url_list: list[str] = []
|
||
if isinstance(raw_urls, str):
|
||
s = raw_urls.strip()
|
||
url_list = [s] if s else []
|
||
elif isinstance(raw_urls, list):
|
||
url_list = [str(u).strip() for u in raw_urls if isinstance(u, str) and str(u).strip()]
|
||
|
||
return {
|
||
"hash": file_hash,
|
||
"title": title,
|
||
"ext": ext,
|
||
"size": size_int,
|
||
"mime": mime_type,
|
||
# Keep raw fields available for troubleshooting/other callers.
|
||
"hydrus_mime": mime_val,
|
||
"filetype_human": filetype_human,
|
||
"duration_ms": dur_int,
|
||
"url": url_list,
|
||
}
|
||
|
||
except Exception as exc:
|
||
debug(f"{self._log_prefix()} get_metadata failed: {exc}")
|
||
return None
|
||
|
||
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
|
||
"""Get tags for a file from Hydrus by hash.
|
||
|
||
Args:
|
||
file_identifier: File hash (SHA256 hex string)
|
||
**kwargs: Optional service_name parameter
|
||
|
||
Returns:
|
||
Tuple of (tags_list, source_description)
|
||
where source is always "hydrus"
|
||
"""
|
||
try:
|
||
from API import HydrusNetwork as hydrus_wrapper
|
||
|
||
file_hash = str(file_identifier or "").strip().lower()
|
||
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
|
||
debug(f"{self._log_prefix()} get_tags: invalid file hash '{file_identifier}'")
|
||
return [], "unknown"
|
||
|
||
# Get Hydrus client and service info
|
||
client = self._client
|
||
if not client:
|
||
debug(f"{self._log_prefix()} get_tags: client unavailable")
|
||
return [], "unknown"
|
||
|
||
# Fetch file metadata
|
||
payload = client.fetch_file_metadata(
|
||
hashes=[file_hash],
|
||
include_service_keys_to_tags=True,
|
||
include_file_url=False
|
||
)
|
||
|
||
items = payload.get("metadata") if isinstance(payload, dict) else None
|
||
if not isinstance(items, list) or not items:
|
||
debug(f"{self._log_prefix()} get_tags: no metadata for hash {file_hash}")
|
||
return [], "unknown"
|
||
|
||
meta = items[0] if isinstance(items[0], dict) else None
|
||
if not isinstance(meta, dict) or meta.get("file_id") is None:
|
||
debug(f"{self._log_prefix()} get_tags: invalid metadata for hash {file_hash}")
|
||
return [], "unknown"
|
||
|
||
# Extract tags using service name
|
||
service_name = "my tags"
|
||
service_key = hydrus_wrapper.get_tag_service_key(client, service_name)
|
||
|
||
# Extract tags from metadata
|
||
tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name)
|
||
|
||
return tags, "hydrus"
|
||
|
||
except Exception as exc:
|
||
debug(f"{self._log_prefix()} get_tags failed: {exc}")
|
||
return [], "unknown"
|
||
|
||
def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
|
||
"""Add tags to a Hydrus file.
|
||
"""
|
||
try:
|
||
client = self._client
|
||
if client is None:
|
||
debug(f"{self._log_prefix()} add_tag: client unavailable")
|
||
return False
|
||
|
||
file_hash = str(file_identifier or "").strip().lower()
|
||
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
|
||
debug(f"{self._log_prefix()} add_tag: invalid file hash '{file_identifier}'")
|
||
return False
|
||
service_name = kwargs.get("service_name") or "my tags"
|
||
# Ensure tags is a list
|
||
tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
|
||
if not tag_list:
|
||
return False
|
||
client.add_tag(file_hash, tag_list, service_name)
|
||
return True
|
||
except Exception as exc:
|
||
debug(f"{self._log_prefix()} add_tag failed: {exc}")
|
||
return False
|
||
|
||
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
|
||
"""Delete tags from a Hydrus file.
|
||
"""
|
||
try:
|
||
client = self._client
|
||
if client is None:
|
||
debug(f"{self._log_prefix()} delete_tag: client unavailable")
|
||
return False
|
||
|
||
file_hash = str(file_identifier or "").strip().lower()
|
||
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
|
||
debug(f"{self._log_prefix()} delete_tag: invalid file hash '{file_identifier}'")
|
||
return False
|
||
service_name = kwargs.get("service_name") or "my tags"
|
||
tag_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
|
||
if not tag_list:
|
||
return False
|
||
client.delete_tag(file_hash, tag_list, service_name)
|
||
return True
|
||
except Exception as exc:
|
||
debug(f"{self._log_prefix()} delete_tag failed: {exc}")
|
||
return False
|
||
|
||
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
|
||
"""Get known url for a Hydrus file.
|
||
"""
|
||
try:
|
||
client = self._client
|
||
if client is None:
|
||
debug(f"{self._log_prefix()} get_url: client unavailable")
|
||
return []
|
||
|
||
file_hash = str(file_identifier or "").strip().lower()
|
||
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
|
||
return []
|
||
|
||
payload = client.fetch_file_metadata(hashes=[file_hash], include_file_url=False)
|
||
items = payload.get("metadata") if isinstance(payload, dict) else None
|
||
if not isinstance(items, list) or not items:
|
||
return []
|
||
meta = items[0] if isinstance(items[0], dict) else {}
|
||
|
||
raw_urls: Any = (
|
||
meta.get("known_urls")
|
||
or meta.get("urls")
|
||
or meta.get("url")
|
||
or []
|
||
)
|
||
if isinstance(raw_urls, str):
|
||
val = raw_urls.strip()
|
||
return [val] if val else []
|
||
if isinstance(raw_urls, list):
|
||
out: list[str] = []
|
||
for u in raw_urls:
|
||
if not isinstance(u, str):
|
||
continue
|
||
u = u.strip()
|
||
if u:
|
||
out.append(u)
|
||
return out
|
||
return []
|
||
except Exception as exc:
|
||
debug(f"{self._log_prefix()} get_url failed: {exc}")
|
||
return []
|
||
|
||
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
||
"""Associate one or more url with a Hydrus file.
|
||
"""
|
||
try:
|
||
client = self._client
|
||
if client is None:
|
||
debug(f"{self._log_prefix()} add_url: client unavailable")
|
||
return False
|
||
for u in url:
|
||
client.associate_url(file_identifier, u)
|
||
return True
|
||
except Exception as exc:
|
||
debug(f"{self._log_prefix()} add_url failed: {exc}")
|
||
return False
|
||
|
||
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
||
"""Delete one or more url from a Hydrus file.
|
||
"""
|
||
try:
|
||
client = self._client
|
||
if client is None:
|
||
debug(f"{self._log_prefix()} delete_url: client unavailable")
|
||
return False
|
||
for u in url:
|
||
client.delete_url(file_identifier, u)
|
||
return True
|
||
except Exception as exc:
|
||
debug(f"{self._log_prefix()} delete_url failed: {exc}")
|
||
return False
|
||
|
||
def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
|
||
"""Get notes for a Hydrus file (default note service only)."""
|
||
try:
|
||
client = self._client
|
||
if client is None:
|
||
debug(f"{self._log_prefix()} get_note: client unavailable")
|
||
return {}
|
||
|
||
file_hash = str(file_identifier or "").strip().lower()
|
||
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
|
||
return {}
|
||
|
||
payload = client.fetch_file_metadata(hashes=[file_hash], include_notes=True)
|
||
items = payload.get("metadata") if isinstance(payload, dict) else None
|
||
if not isinstance(items, list) or not items:
|
||
return {}
|
||
meta = items[0] if isinstance(items[0], dict) else None
|
||
if not isinstance(meta, dict):
|
||
return {}
|
||
|
||
notes_payload = meta.get("notes")
|
||
if isinstance(notes_payload, dict):
|
||
return {str(k): str(v or "") for k, v in notes_payload.items() if str(k).strip()}
|
||
|
||
return {}
|
||
except Exception as exc:
|
||
debug(f"{self._log_prefix()} get_note failed: {exc}")
|
||
return {}
|
||
|
||
def set_note(self, file_identifier: str, name: str, text: str, **kwargs: Any) -> bool:
|
||
"""Set a named note for a Hydrus file (default note service only)."""
|
||
try:
|
||
client = self._client
|
||
if client is None:
|
||
debug(f"{self._log_prefix()} set_note: client unavailable")
|
||
return False
|
||
|
||
file_hash = str(file_identifier or "").strip().lower()
|
||
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
|
||
return False
|
||
|
||
note_name = str(name or "").strip()
|
||
if not note_name:
|
||
return False
|
||
note_text = str(text or "")
|
||
|
||
client.set_notes(file_hash, {note_name: note_text})
|
||
return True
|
||
except Exception as exc:
|
||
debug(f"{self._log_prefix()} set_note failed: {exc}")
|
||
return False
|
||
|
||
def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
|
||
"""Delete a named note for a Hydrus file (default note service only)."""
|
||
try:
|
||
client = self._client
|
||
if client is None:
|
||
debug(f"{self._log_prefix()} delete_note: client unavailable")
|
||
return False
|
||
|
||
file_hash = str(file_identifier or "").strip().lower()
|
||
if len(file_hash) != 64 or not all(ch in "0123456789abcdef" for ch in file_hash):
|
||
return False
|
||
|
||
note_name = str(name or "").strip()
|
||
if not note_name:
|
||
return False
|
||
|
||
client.delete_notes(file_hash, [note_name])
|
||
return True
|
||
except Exception as exc:
|
||
debug(f"{self._log_prefix()} delete_note failed: {exc}")
|
||
return False
|
||
|
||
@staticmethod
|
||
def _extract_tags_from_hydrus_meta(
|
||
meta: Dict[str, Any],
|
||
service_key: Optional[str],
|
||
service_name: str
|
||
) -> List[str]:
|
||
"""Extract current tags from Hydrus metadata dict.
|
||
|
||
Prefers display_tags (includes siblings/parents, excludes deleted).
|
||
Falls back to storage_tags status '0' (current).
|
||
"""
|
||
tags_payload = meta.get("tags")
|
||
if not isinstance(tags_payload, dict):
|
||
return []
|
||
|
||
svc_data = None
|
||
if service_key:
|
||
svc_data = tags_payload.get(service_key)
|
||
if not isinstance(svc_data, dict):
|
||
return []
|
||
|
||
# Prefer display_tags (Hydrus computes siblings/parents)
|
||
display = svc_data.get("display_tags")
|
||
if isinstance(display, list) and display:
|
||
return [str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip()]
|
||
|
||
# Fallback to storage_tags status '0' (current)
|
||
storage = svc_data.get("storage_tags")
|
||
if isinstance(storage, dict):
|
||
current_list = storage.get("0") or storage.get(0)
|
||
if isinstance(current_list, list):
|
||
return [str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()]
|
||
|
||
return []
|