2025-12-11 19:04:02 -08:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
import sys
|
2026-01-04 02:23:50 -08:00
|
|
|
import tempfile
|
|
|
|
|
import shutil
|
2025-12-11 19:04:02 -08:00
|
|
|
from pathlib import Path
|
|
|
|
|
from typing import Any, Dict, List, Optional, Tuple
|
|
|
|
|
|
2026-01-04 02:23:50 -08:00
|
|
|
from urllib.parse import quote
|
|
|
|
|
|
2025-12-13 12:09:50 -08:00
|
|
|
import httpx
|
|
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
from SYS.logger import debug, log
|
|
|
|
|
from SYS.utils_constant import mime_maps
|
|
|
|
|
|
2025-12-11 23:21:45 -08:00
|
|
|
from Store._base import Store
|
2025-12-11 19:04:02 -08:00
|
|
|
|
2025-12-29 18:42:02 -08:00
|
|
|
_HYDRUS_INIT_CHECK_CACHE: dict[tuple[str,
|
|
|
|
|
str],
|
|
|
|
|
tuple[bool,
|
|
|
|
|
Optional[str]]] = {}
|
2025-12-13 12:09:50 -08:00
|
|
|
|
|
|
|
|
|
2025-12-11 23:21:45 -08:00
|
|
|
class HydrusNetwork(Store):
|
2025-12-11 19:04:02 -08:00
|
|
|
"""File storage backend for Hydrus client.
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
Each instance represents a specific Hydrus client connection.
|
2025-12-13 12:09:50 -08:00
|
|
|
Maintains its own HydrusClient.
|
2025-12-11 19:04:02 -08:00
|
|
|
"""
|
2025-12-13 00:18:30 -08:00
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
def _log_prefix(self) -> str:
|
|
|
|
|
store_name = getattr(self, "NAME", None) or "unknown"
|
|
|
|
|
return f"[hydrusnetwork:{store_name}]"
|
|
|
|
|
|
2025-12-13 00:18:30 -08:00
|
|
|
def __new__(cls, *args: Any, **kwargs: Any) -> "HydrusNetwork":
|
|
|
|
|
instance = super().__new__(cls)
|
|
|
|
|
name = kwargs.get("NAME")
|
|
|
|
|
api = kwargs.get("API")
|
|
|
|
|
url = kwargs.get("URL")
|
|
|
|
|
if name is not None:
|
|
|
|
|
setattr(instance, "NAME", str(name))
|
|
|
|
|
if api is not None:
|
|
|
|
|
setattr(instance, "API", str(api))
|
|
|
|
|
if url is not None:
|
|
|
|
|
setattr(instance, "URL", str(url))
|
|
|
|
|
return instance
|
|
|
|
|
|
|
|
|
|
setattr(__new__, "keys", ("NAME", "API", "URL"))
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-13 00:18:30 -08:00
|
|
|
def __init__(
|
|
|
|
|
self,
|
|
|
|
|
instance_name: Optional[str] = None,
|
|
|
|
|
api_key: Optional[str] = None,
|
|
|
|
|
url: Optional[str] = None,
|
|
|
|
|
*,
|
|
|
|
|
NAME: Optional[str] = None,
|
|
|
|
|
API: Optional[str] = None,
|
|
|
|
|
URL: Optional[str] = None,
|
|
|
|
|
) -> None:
|
2025-12-11 19:04:02 -08:00
|
|
|
"""Initialize Hydrus storage backend.
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
Args:
|
|
|
|
|
instance_name: Name of this Hydrus instance (e.g., 'home', 'work')
|
|
|
|
|
api_key: Hydrus Client API access key
|
|
|
|
|
url: Hydrus client URL (e.g., 'http://192.168.1.230:45869')
|
|
|
|
|
"""
|
2025-12-11 23:21:45 -08:00
|
|
|
from API.HydrusNetwork import HydrusNetwork as HydrusClient
|
2025-12-13 00:18:30 -08:00
|
|
|
|
|
|
|
|
if instance_name is None and NAME is not None:
|
|
|
|
|
instance_name = str(NAME)
|
|
|
|
|
if api_key is None and API is not None:
|
|
|
|
|
api_key = str(API)
|
|
|
|
|
if url is None and URL is not None:
|
|
|
|
|
url = str(URL)
|
|
|
|
|
|
|
|
|
|
if not instance_name or not api_key or not url:
|
|
|
|
|
raise ValueError("HydrusNetwork requires NAME, API, and URL")
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-13 00:18:30 -08:00
|
|
|
self.NAME = instance_name
|
|
|
|
|
self.API = api_key
|
2025-12-13 12:09:50 -08:00
|
|
|
self.URL = url.rstrip("/")
|
|
|
|
|
|
|
|
|
|
# Total count (best-effort, used for startup diagnostics)
|
|
|
|
|
self.total_count: Optional[int] = None
|
|
|
|
|
|
|
|
|
|
# Self health-check: validate the URL is reachable and the access key is accepted.
|
|
|
|
|
# This MUST NOT attempt to acquire a session key.
|
|
|
|
|
cache_key = (self.URL, self.API)
|
|
|
|
|
cached = _HYDRUS_INIT_CHECK_CACHE.get(cache_key)
|
|
|
|
|
if cached is not None:
|
|
|
|
|
ok, err = cached
|
|
|
|
|
if not ok:
|
2025-12-29 18:42:02 -08:00
|
|
|
raise RuntimeError(
|
|
|
|
|
f"Hydrus '{self.NAME}' unavailable: {err or 'Unavailable'}"
|
|
|
|
|
)
|
2025-12-13 12:09:50 -08:00
|
|
|
else:
|
|
|
|
|
api_version_url = f"{self.URL}/api_version"
|
|
|
|
|
verify_key_url = f"{self.URL}/verify_access_key"
|
|
|
|
|
try:
|
2025-12-29 18:42:02 -08:00
|
|
|
with httpx.Client(timeout=5.0,
|
|
|
|
|
verify=False,
|
|
|
|
|
follow_redirects=True) as client:
|
2025-12-13 12:09:50 -08:00
|
|
|
version_resp = client.get(api_version_url)
|
|
|
|
|
version_resp.raise_for_status()
|
|
|
|
|
version_payload = version_resp.json()
|
|
|
|
|
if not isinstance(version_payload, dict):
|
2025-12-29 18:42:02 -08:00
|
|
|
raise RuntimeError(
|
|
|
|
|
"Hydrus /api_version returned an unexpected response"
|
|
|
|
|
)
|
2025-12-13 12:09:50 -08:00
|
|
|
|
|
|
|
|
verify_resp = client.get(
|
|
|
|
|
verify_key_url,
|
2025-12-29 18:42:02 -08:00
|
|
|
headers={
|
|
|
|
|
"Hydrus-Client-API-Access-Key": self.API
|
|
|
|
|
},
|
2025-12-13 12:09:50 -08:00
|
|
|
)
|
|
|
|
|
verify_resp.raise_for_status()
|
|
|
|
|
verify_payload = verify_resp.json()
|
|
|
|
|
if not isinstance(verify_payload, dict):
|
2025-12-29 17:05:03 -08:00
|
|
|
raise RuntimeError(
|
|
|
|
|
"Hydrus /verify_access_key returned an unexpected response"
|
|
|
|
|
)
|
2025-12-13 12:09:50 -08:00
|
|
|
|
|
|
|
|
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (True, None)
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
err = str(exc)
|
|
|
|
|
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (False, err)
|
|
|
|
|
raise RuntimeError(f"Hydrus '{self.NAME}' unavailable: {err}") from exc
|
2025-12-11 19:04:02 -08:00
|
|
|
|
2025-12-13 12:09:50 -08:00
|
|
|
# Create a persistent client for this instance (auth via access key by default).
|
2025-12-29 18:42:02 -08:00
|
|
|
self._client = HydrusClient(
|
|
|
|
|
url=self.URL,
|
|
|
|
|
access_key=self.API,
|
|
|
|
|
instance_name=self.NAME
|
|
|
|
|
)
|
2025-12-13 12:09:50 -08:00
|
|
|
|
2025-12-17 03:16:41 -08:00
|
|
|
# Best-effort total count (used for startup diagnostics). Avoid heavy payloads.
|
|
|
|
|
# Some Hydrus setups appear to return no count via the CBOR client for this endpoint,
|
|
|
|
|
# so prefer a direct JSON request with a short timeout.
|
|
|
|
|
try:
|
|
|
|
|
self.get_total_count(refresh=True)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
def get_total_count(self, *, refresh: bool = False) -> Optional[int]:
|
|
|
|
|
"""Best-effort total file count for this Hydrus instance.
|
|
|
|
|
|
|
|
|
|
Intended for diagnostics (e.g., REPL startup checks). This should be fast,
|
|
|
|
|
and it MUST NOT raise.
|
|
|
|
|
"""
|
|
|
|
|
if self.total_count is not None and not refresh:
|
|
|
|
|
return self.total_count
|
|
|
|
|
|
|
|
|
|
# 1) Prefer a direct JSON request (fast + avoids CBOR edge cases).
|
|
|
|
|
try:
|
|
|
|
|
import json as _json
|
|
|
|
|
|
|
|
|
|
url = f"{self.URL}/get_files/search_files"
|
|
|
|
|
params = {
|
|
|
|
|
"tags": _json.dumps(["system:everything"]),
|
|
|
|
|
"return_hashes": "false",
|
|
|
|
|
"return_file_ids": "false",
|
|
|
|
|
"return_file_count": "true",
|
|
|
|
|
}
|
|
|
|
|
headers = {
|
|
|
|
|
"Hydrus-Client-API-Access-Key": self.API,
|
|
|
|
|
"Accept": "application/json",
|
|
|
|
|
}
|
2025-12-29 18:42:02 -08:00
|
|
|
with httpx.Client(timeout=5.0,
|
|
|
|
|
verify=False,
|
|
|
|
|
follow_redirects=True) as client:
|
2025-12-17 03:16:41 -08:00
|
|
|
resp = client.get(url, params=params, headers=headers)
|
|
|
|
|
resp.raise_for_status()
|
|
|
|
|
payload = resp.json()
|
|
|
|
|
|
|
|
|
|
count_val = None
|
|
|
|
|
if isinstance(payload, dict):
|
|
|
|
|
count_val = payload.get("file_count")
|
|
|
|
|
if count_val is None:
|
|
|
|
|
count_val = payload.get("file_count_inclusive")
|
|
|
|
|
if count_val is None:
|
|
|
|
|
count_val = payload.get("num_files")
|
|
|
|
|
if isinstance(count_val, int):
|
|
|
|
|
self.total_count = count_val
|
|
|
|
|
return self.total_count
|
|
|
|
|
except Exception as exc:
|
2025-12-29 18:42:02 -08:00
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} total count (json) unavailable: {exc}",
|
|
|
|
|
file=sys.stderr
|
|
|
|
|
)
|
2025-12-17 03:16:41 -08:00
|
|
|
|
|
|
|
|
# 2) Fallback to the API client (CBOR).
|
2025-12-13 00:18:30 -08:00
|
|
|
try:
|
2025-12-13 12:09:50 -08:00
|
|
|
payload = self._client.search_files(
|
|
|
|
|
tags=["system:everything"],
|
|
|
|
|
return_hashes=False,
|
|
|
|
|
return_file_ids=False,
|
|
|
|
|
return_file_count=True,
|
|
|
|
|
)
|
|
|
|
|
count_val = None
|
|
|
|
|
if isinstance(payload, dict):
|
|
|
|
|
count_val = payload.get("file_count")
|
|
|
|
|
if count_val is None:
|
|
|
|
|
count_val = payload.get("file_count_inclusive")
|
|
|
|
|
if count_val is None:
|
|
|
|
|
count_val = payload.get("num_files")
|
|
|
|
|
if isinstance(count_val, int):
|
|
|
|
|
self.total_count = count_val
|
2025-12-17 03:16:41 -08:00
|
|
|
return self.total_count
|
2025-12-13 00:18:30 -08:00
|
|
|
except Exception as exc:
|
2025-12-29 18:42:02 -08:00
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} total count (client) unavailable: {exc}",
|
|
|
|
|
file=sys.stderr
|
|
|
|
|
)
|
2025-12-17 03:16:41 -08:00
|
|
|
|
|
|
|
|
return self.total_count
|
2025-12-13 00:18:30 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
def name(self) -> str:
|
2025-12-13 00:18:30 -08:00
|
|
|
return self.NAME
|
2025-12-11 19:04:02 -08:00
|
|
|
|
|
|
|
|
def get_name(self) -> str:
|
2025-12-13 00:18:30 -08:00
|
|
|
return self.NAME
|
2025-12-11 19:04:02 -08:00
|
|
|
|
2026-01-02 02:28:59 -08:00
|
|
|
def set_relationship(self, alt_hash: str, king_hash: str, kind: str = "alt") -> bool:
|
|
|
|
|
"""Persist a relationship via the Hydrus client API for this backend instance."""
|
|
|
|
|
try:
|
|
|
|
|
alt_norm = str(alt_hash or "").strip().lower()
|
|
|
|
|
king_norm = str(king_hash or "").strip().lower()
|
|
|
|
|
if len(alt_norm) != 64 or len(king_norm) != 64 or alt_norm == king_norm:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
client = getattr(self, "_client", None)
|
|
|
|
|
if client is None or not hasattr(client, "set_relationship"):
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
client.set_relationship(alt_norm, king_norm, str(kind or "alt"))
|
|
|
|
|
return True
|
|
|
|
|
except Exception:
|
|
|
|
|
return False
|
|
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
def add_file(self, file_path: Path, **kwargs: Any) -> str:
|
|
|
|
|
"""Upload file to Hydrus with full metadata support.
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
Args:
|
|
|
|
|
file_path: Path to the file to upload
|
2025-12-11 23:21:45 -08:00
|
|
|
tag: Optional list of tag values to add
|
2025-12-11 19:04:02 -08:00
|
|
|
url: Optional list of url to associate with the file
|
|
|
|
|
title: Optional title (will be added as 'title:value' tag)
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
Returns:
|
|
|
|
|
File hash from Hydrus
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
Raises:
|
|
|
|
|
Exception: If upload fails
|
|
|
|
|
"""
|
|
|
|
|
from SYS.utils import sha256_file
|
|
|
|
|
|
2025-12-11 23:21:45 -08:00
|
|
|
tag_list = kwargs.get("tag", [])
|
2025-12-11 19:04:02 -08:00
|
|
|
url = kwargs.get("url", [])
|
|
|
|
|
title = kwargs.get("title")
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
# Add title to tags if provided and not already present
|
|
|
|
|
if title:
|
2025-12-20 23:57:44 -08:00
|
|
|
title_tag = f"title:{title}".strip().lower()
|
2025-12-29 18:42:02 -08:00
|
|
|
if not any(str(candidate).lower().startswith("title:")
|
|
|
|
|
for candidate in tag_list):
|
2025-12-11 23:21:45 -08:00
|
|
|
tag_list = [title_tag] + list(tag_list)
|
2025-12-11 19:04:02 -08:00
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
# Hydrus is lowercase-only tags; normalize here for consistency.
|
2025-12-29 17:05:03 -08:00
|
|
|
tag_list = [
|
2025-12-29 18:42:02 -08:00
|
|
|
str(t).strip().lower() for t in (tag_list or [])
|
2025-12-29 17:05:03 -08:00
|
|
|
if isinstance(t, str) and str(t).strip()
|
|
|
|
|
]
|
2025-12-20 23:57:44 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
try:
|
|
|
|
|
# Compute file hash
|
|
|
|
|
file_hash = sha256_file(file_path)
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} file hash: {file_hash}")
|
2025-12-11 19:04:02 -08:00
|
|
|
|
|
|
|
|
# Use persistent client with session key
|
|
|
|
|
client = self._client
|
|
|
|
|
if client is None:
|
|
|
|
|
raise Exception("Hydrus client unavailable")
|
|
|
|
|
|
|
|
|
|
# Check if file already exists in Hydrus
|
|
|
|
|
file_exists = False
|
|
|
|
|
try:
|
2025-12-16 23:23:43 -08:00
|
|
|
metadata = client.fetch_file_metadata(
|
|
|
|
|
hashes=[file_hash],
|
|
|
|
|
include_service_keys_to_tags=False,
|
|
|
|
|
include_file_url=False,
|
|
|
|
|
include_duration=False,
|
|
|
|
|
include_size=False,
|
|
|
|
|
include_mime=False,
|
|
|
|
|
)
|
2025-12-11 19:04:02 -08:00
|
|
|
if metadata and isinstance(metadata, dict):
|
2025-12-16 23:23:43 -08:00
|
|
|
metas = metadata.get("metadata", [])
|
|
|
|
|
if isinstance(metas, list) and metas:
|
|
|
|
|
# Hydrus returns placeholder rows for unknown hashes.
|
|
|
|
|
# Only treat as a real duplicate if it has a concrete file_id.
|
|
|
|
|
for meta in metas:
|
2025-12-29 18:42:02 -08:00
|
|
|
if isinstance(meta,
|
|
|
|
|
dict) and meta.get("file_id") is not None:
|
2025-12-16 23:23:43 -08:00
|
|
|
file_exists = True
|
|
|
|
|
break
|
|
|
|
|
if file_exists:
|
2026-01-02 02:28:59 -08:00
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} Duplicate detected - file already in Hydrus with hash: {file_hash}"
|
2025-12-16 23:23:43 -08:00
|
|
|
)
|
2025-12-11 19:04:02 -08:00
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
# If Hydrus reports an existing file, it may be in trash. Best-effort restore it to 'my files'.
|
|
|
|
|
# This keeps behavior aligned with user expectation: "use API only" and ensure it lands in my files.
|
|
|
|
|
if file_exists:
|
|
|
|
|
try:
|
|
|
|
|
client.undelete_files([file_hash])
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
# Upload file if not already present
|
|
|
|
|
if not file_exists:
|
2026-01-02 02:28:59 -08:00
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} Uploading: {file_path.name}"
|
2025-12-29 18:42:02 -08:00
|
|
|
)
|
2025-12-11 19:04:02 -08:00
|
|
|
response = client.add_file(file_path)
|
|
|
|
|
|
|
|
|
|
# Extract hash from response
|
|
|
|
|
hydrus_hash: Optional[str] = None
|
|
|
|
|
if isinstance(response, dict):
|
|
|
|
|
hydrus_hash = response.get("hash") or response.get("file_hash")
|
|
|
|
|
if not hydrus_hash:
|
|
|
|
|
hashes = response.get("hashes")
|
|
|
|
|
if isinstance(hashes, list) and hashes:
|
|
|
|
|
hydrus_hash = hashes[0]
|
|
|
|
|
|
|
|
|
|
if not hydrus_hash:
|
|
|
|
|
raise Exception(f"Hydrus response missing file hash: {response}")
|
|
|
|
|
|
|
|
|
|
file_hash = hydrus_hash
|
2026-01-02 02:28:59 -08:00
|
|
|
debug(f"{self._log_prefix()} hash: {file_hash}")
|
2025-12-11 19:04:02 -08:00
|
|
|
|
|
|
|
|
# Add tags if provided (both for new and existing files)
|
2025-12-11 23:21:45 -08:00
|
|
|
if tag_list:
|
2025-12-11 19:04:02 -08:00
|
|
|
try:
|
|
|
|
|
# Use default tag service
|
|
|
|
|
service_name = "my tags"
|
|
|
|
|
except Exception:
|
|
|
|
|
service_name = "my tags"
|
|
|
|
|
|
|
|
|
|
try:
|
2025-12-29 18:42:02 -08:00
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} Adding {len(tag_list)} tag(s): {tag_list}"
|
|
|
|
|
)
|
2025-12-11 23:21:45 -08:00
|
|
|
client.add_tag(file_hash, tag_list, service_name)
|
2026-01-02 02:28:59 -08:00
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} Tags added via '{service_name}'"
|
2025-12-29 18:42:02 -08:00
|
|
|
)
|
2025-12-11 19:04:02 -08:00
|
|
|
except Exception as exc:
|
2025-12-29 18:42:02 -08:00
|
|
|
log(
|
|
|
|
|
f"{self._log_prefix()} ⚠️ Failed to add tags: {exc}",
|
|
|
|
|
file=sys.stderr
|
|
|
|
|
)
|
2025-12-11 19:04:02 -08:00
|
|
|
|
|
|
|
|
# Associate url if provided (both for new and existing files)
|
|
|
|
|
if url:
|
2026-01-02 02:28:59 -08:00
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} Associating {len(url)} URL(s) with file"
|
2025-12-29 17:05:03 -08:00
|
|
|
)
|
2025-12-11 19:04:02 -08:00
|
|
|
for url in url:
|
|
|
|
|
if url:
|
|
|
|
|
try:
|
|
|
|
|
client.associate_url(file_hash, str(url))
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} Associated URL: {url}")
|
2025-12-11 19:04:02 -08:00
|
|
|
except Exception as exc:
|
2025-12-29 17:05:03 -08:00
|
|
|
log(
|
|
|
|
|
f"{self._log_prefix()} ⚠️ Failed to associate URL {url}: {exc}",
|
|
|
|
|
file=sys.stderr,
|
|
|
|
|
)
|
2025-12-11 19:04:02 -08:00
|
|
|
|
|
|
|
|
return file_hash
|
|
|
|
|
|
|
|
|
|
except Exception as exc:
|
2025-12-16 23:23:43 -08:00
|
|
|
log(f"{self._log_prefix()} ❌ upload failed: {exc}", file=sys.stderr)
|
2025-12-11 19:04:02 -08:00
|
|
|
raise
|
|
|
|
|
|
2025-12-11 23:21:45 -08:00
|
|
|
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
|
2025-12-11 19:04:02 -08:00
|
|
|
"""Search Hydrus database for files matching query.
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
Args:
|
|
|
|
|
query: Search query (tags, filenames, hashes, etc.)
|
|
|
|
|
limit: Maximum number of results to return (default: 100)
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
Returns:
|
|
|
|
|
List of dicts with 'name', 'hash', 'size', 'tags' fields
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
Example:
|
|
|
|
|
results = storage["hydrus"].search("artist:john_doe music")
|
|
|
|
|
results = storage["hydrus"].search("Simple Man")
|
|
|
|
|
"""
|
|
|
|
|
limit = kwargs.get("limit", 100)
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
client = self._client
|
|
|
|
|
if client is None:
|
|
|
|
|
raise Exception("Hydrus client unavailable")
|
|
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
prefix = self._log_prefix()
|
|
|
|
|
debug(f"{prefix} Searching for: {query}")
|
2025-12-11 19:04:02 -08:00
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
def _extract_urls(meta_obj: Any) -> list[str]:
|
|
|
|
|
if not isinstance(meta_obj, dict):
|
|
|
|
|
return []
|
|
|
|
|
raw = meta_obj.get("url")
|
|
|
|
|
if raw is None:
|
|
|
|
|
raw = meta_obj.get("urls")
|
|
|
|
|
if isinstance(raw, str):
|
|
|
|
|
val = raw.strip()
|
|
|
|
|
return [val] if val else []
|
|
|
|
|
if isinstance(raw, list):
|
|
|
|
|
out: list[str] = []
|
|
|
|
|
for item in raw:
|
|
|
|
|
if not isinstance(item, str):
|
|
|
|
|
continue
|
|
|
|
|
s = item.strip()
|
|
|
|
|
if s:
|
|
|
|
|
out.append(s)
|
|
|
|
|
return out
|
|
|
|
|
return []
|
|
|
|
|
|
2025-12-29 17:05:03 -08:00
|
|
|
def _iter_url_filtered_metadata(
|
2025-12-29 18:42:02 -08:00
|
|
|
url_value: str | None,
|
|
|
|
|
want_any: bool,
|
|
|
|
|
fetch_limit: int
|
|
|
|
|
) -> list[dict[str,
|
|
|
|
|
Any]]:
|
2025-12-14 00:53:52 -08:00
|
|
|
"""Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
|
|
|
|
|
|
|
|
|
|
# First try a fast system predicate if Hydrus supports it.
|
|
|
|
|
candidate_file_ids: list[int] = []
|
|
|
|
|
try:
|
|
|
|
|
if want_any:
|
|
|
|
|
predicate = "system:has url"
|
|
|
|
|
url_search = client.search_files(
|
|
|
|
|
tags=[predicate],
|
|
|
|
|
return_hashes=False,
|
|
|
|
|
return_file_ids=True,
|
|
|
|
|
return_file_count=False,
|
|
|
|
|
)
|
2025-12-29 18:42:02 -08:00
|
|
|
ids = url_search.get("file_ids",
|
|
|
|
|
[]) if isinstance(url_search,
|
|
|
|
|
dict) else []
|
2025-12-14 00:53:52 -08:00
|
|
|
if isinstance(ids, list):
|
2025-12-29 17:05:03 -08:00
|
|
|
candidate_file_ids = [
|
2025-12-29 18:42:02 -08:00
|
|
|
int(x) for x in ids
|
|
|
|
|
if isinstance(x, (int, float,
|
|
|
|
|
str)) and str(x).strip().isdigit()
|
2025-12-29 17:05:03 -08:00
|
|
|
]
|
2025-12-14 00:53:52 -08:00
|
|
|
except Exception:
|
|
|
|
|
candidate_file_ids = []
|
|
|
|
|
|
|
|
|
|
if not candidate_file_ids:
|
|
|
|
|
# Fallback: scan from system:everything and filter by URL substring.
|
|
|
|
|
everything = client.search_files(
|
|
|
|
|
tags=["system:everything"],
|
|
|
|
|
return_hashes=False,
|
|
|
|
|
return_file_ids=True,
|
|
|
|
|
return_file_count=False,
|
|
|
|
|
)
|
2025-12-29 18:42:02 -08:00
|
|
|
ids = everything.get("file_ids",
|
|
|
|
|
[]) if isinstance(everything,
|
|
|
|
|
dict) else []
|
2025-12-14 00:53:52 -08:00
|
|
|
if isinstance(ids, list):
|
2025-12-29 18:42:02 -08:00
|
|
|
candidate_file_ids = [
|
|
|
|
|
int(x) for x in ids if isinstance(x, (int, float))
|
|
|
|
|
]
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
if not candidate_file_ids:
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
needle = (url_value or "").strip().lower()
|
|
|
|
|
chunk_size = 200
|
|
|
|
|
out: list[dict[str, Any]] = []
|
|
|
|
|
|
|
|
|
|
for start in range(0, len(candidate_file_ids), chunk_size):
|
|
|
|
|
if len(out) >= fetch_limit:
|
|
|
|
|
break
|
2025-12-29 18:42:02 -08:00
|
|
|
chunk = candidate_file_ids[start:start + chunk_size]
|
2025-12-14 00:53:52 -08:00
|
|
|
try:
|
|
|
|
|
payload = client.fetch_file_metadata(
|
|
|
|
|
file_ids=chunk,
|
|
|
|
|
include_file_url=True,
|
|
|
|
|
include_service_keys_to_tags=True,
|
|
|
|
|
include_duration=True,
|
|
|
|
|
include_size=True,
|
|
|
|
|
include_mime=True,
|
|
|
|
|
)
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
|
2025-12-29 18:42:02 -08:00
|
|
|
metas = payload.get("metadata",
|
|
|
|
|
[]) if isinstance(payload,
|
|
|
|
|
dict) else []
|
2025-12-14 00:53:52 -08:00
|
|
|
if not isinstance(metas, list):
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
for meta in metas:
|
|
|
|
|
if not isinstance(meta, dict):
|
|
|
|
|
continue
|
|
|
|
|
urls = _extract_urls(meta)
|
|
|
|
|
if not urls:
|
|
|
|
|
continue
|
|
|
|
|
if want_any:
|
|
|
|
|
out.append(meta)
|
|
|
|
|
if len(out) >= fetch_limit:
|
|
|
|
|
break
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if not needle:
|
|
|
|
|
continue
|
|
|
|
|
if any(needle in u.lower() for u in urls):
|
|
|
|
|
out.append(meta)
|
|
|
|
|
if len(out) >= fetch_limit:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
return out
|
|
|
|
|
|
|
|
|
|
query_lower = query.lower().strip()
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
# Support `ext:<value>` anywhere in the query. We filter results by the
|
|
|
|
|
# Hydrus metadata extension field.
|
|
|
|
|
def _normalize_ext_filter(value: str) -> str:
|
2025-12-29 17:05:03 -08:00
|
|
|
v = str(value or "").strip().lower().lstrip(".")
|
2025-12-20 23:57:44 -08:00
|
|
|
v = "".join(ch for ch in v if ch.isalnum())
|
|
|
|
|
return v
|
|
|
|
|
|
|
|
|
|
ext_filter: str | None = None
|
|
|
|
|
ext_only: bool = False
|
|
|
|
|
try:
|
|
|
|
|
m = re.search(r"\bext:([^\s,]+)", query_lower)
|
|
|
|
|
if not m:
|
|
|
|
|
m = re.search(r"\bextension:([^\s,]+)", query_lower)
|
|
|
|
|
if m:
|
|
|
|
|
ext_filter = _normalize_ext_filter(m.group(1)) or None
|
2025-12-29 18:42:02 -08:00
|
|
|
query_lower = re.sub(
|
|
|
|
|
r"\s*\b(?:ext|extension):[^\s,]+",
|
|
|
|
|
" ",
|
|
|
|
|
query_lower
|
|
|
|
|
)
|
2025-12-29 17:05:03 -08:00
|
|
|
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(",")
|
2025-12-20 23:57:44 -08:00
|
|
|
query = query_lower
|
|
|
|
|
if ext_filter and not query_lower:
|
|
|
|
|
query = "*"
|
|
|
|
|
query_lower = "*"
|
|
|
|
|
ext_only = True
|
|
|
|
|
except Exception:
|
|
|
|
|
ext_filter = None
|
|
|
|
|
ext_only = False
|
|
|
|
|
|
|
|
|
|
# Split into meaningful terms for AND logic.
|
|
|
|
|
# Avoid punctuation tokens like '-' that would make matching brittle.
|
|
|
|
|
search_terms = [t for t in re.findall(r"[a-z0-9]+", query_lower) if t]
|
|
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
# Special case: url:* and url:<value>
|
|
|
|
|
metadata_list: list[dict[str, Any]] | None = None
|
|
|
|
|
if ":" in query_lower and not query_lower.startswith(":"):
|
|
|
|
|
namespace, pattern = query_lower.split(":", 1)
|
|
|
|
|
namespace = namespace.strip().lower()
|
|
|
|
|
pattern = pattern.strip()
|
|
|
|
|
if namespace == "url":
|
|
|
|
|
if not pattern or pattern == "*":
|
2025-12-29 17:05:03 -08:00
|
|
|
metadata_list = _iter_url_filtered_metadata(
|
2025-12-29 18:42:02 -08:00
|
|
|
None,
|
|
|
|
|
want_any=True,
|
|
|
|
|
fetch_limit=int(limit) if limit else 100
|
2025-12-29 17:05:03 -08:00
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
else:
|
2025-12-16 01:45:01 -08:00
|
|
|
# Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
|
2025-12-14 00:53:52 -08:00
|
|
|
try:
|
2025-12-29 18:42:02 -08:00
|
|
|
if pattern.startswith("http://") or pattern.startswith(
|
|
|
|
|
"https://"):
|
2025-12-14 00:53:52 -08:00
|
|
|
from API.HydrusNetwork import HydrusRequestSpec
|
|
|
|
|
|
2025-12-29 17:05:03 -08:00
|
|
|
spec = HydrusRequestSpec(
|
|
|
|
|
method="GET",
|
|
|
|
|
endpoint="/add_urls/get_url_files",
|
2025-12-29 18:42:02 -08:00
|
|
|
query={
|
|
|
|
|
"url": pattern
|
|
|
|
|
},
|
2025-12-29 17:05:03 -08:00
|
|
|
)
|
2025-12-29 18:42:02 -08:00
|
|
|
response = client._perform_request(
|
|
|
|
|
spec
|
|
|
|
|
) # type: ignore[attr-defined]
|
2025-12-14 00:53:52 -08:00
|
|
|
hashes: list[str] = []
|
|
|
|
|
file_ids: list[int] = []
|
|
|
|
|
if isinstance(response, dict):
|
2025-12-29 17:05:03 -08:00
|
|
|
raw_hashes = response.get("hashes") or response.get(
|
|
|
|
|
"file_hashes"
|
|
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
if isinstance(raw_hashes, list):
|
2025-12-29 17:05:03 -08:00
|
|
|
hashes = [
|
2025-12-29 18:42:02 -08:00
|
|
|
str(h).strip() for h in raw_hashes
|
2025-12-29 17:05:03 -08:00
|
|
|
if isinstance(h, str) and str(h).strip()
|
|
|
|
|
]
|
2025-12-14 00:53:52 -08:00
|
|
|
raw_ids = response.get("file_ids")
|
|
|
|
|
if isinstance(raw_ids, list):
|
|
|
|
|
for item in raw_ids:
|
|
|
|
|
try:
|
|
|
|
|
file_ids.append(int(item))
|
|
|
|
|
except (TypeError, ValueError):
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if file_ids:
|
|
|
|
|
payload = client.fetch_file_metadata(
|
|
|
|
|
file_ids=file_ids,
|
|
|
|
|
include_file_url=True,
|
|
|
|
|
include_service_keys_to_tags=True,
|
|
|
|
|
include_duration=True,
|
|
|
|
|
include_size=True,
|
|
|
|
|
include_mime=True,
|
|
|
|
|
)
|
2025-12-29 17:05:03 -08:00
|
|
|
metas = (
|
2025-12-29 18:42:02 -08:00
|
|
|
payload.get("metadata",
|
|
|
|
|
[]) if isinstance(payload,
|
|
|
|
|
dict) else []
|
2025-12-29 17:05:03 -08:00
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
if isinstance(metas, list):
|
2025-12-29 18:42:02 -08:00
|
|
|
metadata_list = [
|
|
|
|
|
m for m in metas if isinstance(m, dict)
|
|
|
|
|
]
|
2025-12-14 00:53:52 -08:00
|
|
|
elif hashes:
|
|
|
|
|
payload = client.fetch_file_metadata(
|
|
|
|
|
hashes=hashes,
|
|
|
|
|
include_file_url=True,
|
|
|
|
|
include_service_keys_to_tags=True,
|
|
|
|
|
include_duration=True,
|
|
|
|
|
include_size=True,
|
|
|
|
|
include_mime=True,
|
|
|
|
|
)
|
2025-12-29 17:05:03 -08:00
|
|
|
metas = (
|
2025-12-29 18:42:02 -08:00
|
|
|
payload.get("metadata",
|
|
|
|
|
[]) if isinstance(payload,
|
|
|
|
|
dict) else []
|
2025-12-29 17:05:03 -08:00
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
if isinstance(metas, list):
|
2025-12-29 18:42:02 -08:00
|
|
|
metadata_list = [
|
|
|
|
|
m for m in metas if isinstance(m, dict)
|
|
|
|
|
]
|
2025-12-14 00:53:52 -08:00
|
|
|
except Exception:
|
|
|
|
|
metadata_list = None
|
|
|
|
|
|
|
|
|
|
# Fallback: substring scan
|
|
|
|
|
if metadata_list is None:
|
2025-12-29 17:05:03 -08:00
|
|
|
metadata_list = _iter_url_filtered_metadata(
|
2025-12-29 18:42:02 -08:00
|
|
|
pattern,
|
|
|
|
|
want_any=False,
|
|
|
|
|
fetch_limit=int(limit) if limit else 100
|
2025-12-29 17:05:03 -08:00
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
# Parse the query into tags
|
|
|
|
|
# "*" means "match all" - use system:everything tag in Hydrus
|
2025-12-20 23:57:44 -08:00
|
|
|
# If query has explicit namespace, use it as a tag search.
|
|
|
|
|
# If query is free-form, search BOTH:
|
|
|
|
|
# - title:*term* (title: is the only namespace searched implicitly)
|
|
|
|
|
# - *term* (freeform tags; we will filter out other namespace matches client-side)
|
|
|
|
|
tags: list[str] = []
|
|
|
|
|
freeform_union_search: bool = False
|
|
|
|
|
title_predicates: list[str] = []
|
|
|
|
|
freeform_predicates: list[str] = []
|
|
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
if query.strip() == "*":
|
|
|
|
|
tags = ["system:everything"]
|
2025-12-29 17:05:03 -08:00
|
|
|
elif ":" in query_lower:
|
2025-12-20 23:57:44 -08:00
|
|
|
tags = [query_lower]
|
2025-12-11 19:04:02 -08:00
|
|
|
else:
|
2025-12-20 23:57:44 -08:00
|
|
|
freeform_union_search = True
|
|
|
|
|
if search_terms:
|
|
|
|
|
# Hydrus supports wildcard matching primarily as a prefix (e.g., tag*).
|
|
|
|
|
# Use per-term prefix matching for both title: and freeform tags.
|
|
|
|
|
title_predicates = [f"title:{term}*" for term in search_terms]
|
|
|
|
|
freeform_predicates = [f"{term}*" for term in search_terms]
|
2025-12-11 19:04:02 -08:00
|
|
|
else:
|
2025-12-20 23:57:44 -08:00
|
|
|
# If we can't extract alnum terms, fall back to the raw query text.
|
|
|
|
|
title_predicates = [f"title:{query_lower}*"]
|
|
|
|
|
freeform_predicates = [f"{query_lower}*"]
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
# Search files with the tags (unless url: search already produced metadata)
|
2025-12-11 19:04:02 -08:00
|
|
|
results = []
|
2025-12-20 23:57:44 -08:00
|
|
|
|
|
|
|
|
def _extract_search_ids(payload: Any) -> tuple[list[int], list[str]]:
|
|
|
|
|
if not isinstance(payload, dict):
|
|
|
|
|
return [], []
|
|
|
|
|
raw_ids = payload.get("file_ids", [])
|
|
|
|
|
raw_hashes = payload.get("hashes", [])
|
|
|
|
|
ids_out: list[int] = []
|
|
|
|
|
hashes_out: list[str] = []
|
|
|
|
|
if isinstance(raw_ids, list):
|
|
|
|
|
for item in raw_ids:
|
|
|
|
|
try:
|
|
|
|
|
ids_out.append(int(item))
|
|
|
|
|
except (TypeError, ValueError):
|
|
|
|
|
continue
|
|
|
|
|
if isinstance(raw_hashes, list):
|
2025-12-29 17:05:03 -08:00
|
|
|
hashes_out = [
|
2025-12-29 18:42:02 -08:00
|
|
|
str(h).strip() for h in raw_hashes
|
|
|
|
|
if isinstance(h, str) and str(h).strip()
|
2025-12-29 17:05:03 -08:00
|
|
|
]
|
2025-12-20 23:57:44 -08:00
|
|
|
return ids_out, hashes_out
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
if metadata_list is None:
|
2025-12-20 23:57:44 -08:00
|
|
|
file_ids: list[int] = []
|
|
|
|
|
hashes: list[str] = []
|
|
|
|
|
|
|
|
|
|
if freeform_union_search:
|
|
|
|
|
if not title_predicates and not freeform_predicates:
|
|
|
|
|
debug(f"{prefix} 0 result(s)")
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
payloads: list[Any] = []
|
|
|
|
|
try:
|
|
|
|
|
payloads.append(
|
|
|
|
|
client.search_files(
|
|
|
|
|
tags=title_predicates,
|
|
|
|
|
return_hashes=True,
|
|
|
|
|
return_file_ids=True,
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
payloads.append(
|
|
|
|
|
client.search_files(
|
|
|
|
|
tags=freeform_predicates,
|
|
|
|
|
return_hashes=True,
|
|
|
|
|
return_file_ids=True,
|
|
|
|
|
)
|
|
|
|
|
)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
id_set: set[int] = set()
|
|
|
|
|
hash_set: set[str] = set()
|
|
|
|
|
for payload in payloads:
|
|
|
|
|
ids_part, hashes_part = _extract_search_ids(payload)
|
|
|
|
|
for fid in ids_part:
|
|
|
|
|
id_set.add(fid)
|
|
|
|
|
for hh in hashes_part:
|
|
|
|
|
hash_set.add(hh)
|
|
|
|
|
file_ids = list(id_set)
|
|
|
|
|
hashes = list(hash_set)
|
|
|
|
|
else:
|
|
|
|
|
if not tags:
|
|
|
|
|
debug(f"{prefix} 0 result(s)")
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
search_result = client.search_files(
|
2025-12-29 18:42:02 -08:00
|
|
|
tags=tags,
|
|
|
|
|
return_hashes=True,
|
|
|
|
|
return_file_ids=True
|
2025-12-20 23:57:44 -08:00
|
|
|
)
|
|
|
|
|
file_ids, hashes = _extract_search_ids(search_result)
|
|
|
|
|
|
|
|
|
|
# Fast path: ext-only search. Avoid fetching metadata for an unbounded
|
|
|
|
|
# system:everything result set; fetch in chunks until we have enough.
|
|
|
|
|
if ext_only and ext_filter:
|
|
|
|
|
results: list[dict[str, Any]] = []
|
|
|
|
|
if not file_ids and not hashes:
|
|
|
|
|
debug(f"{prefix} 0 result(s)")
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
# Prefer file_ids if available.
|
|
|
|
|
if file_ids:
|
|
|
|
|
chunk_size = 200
|
|
|
|
|
for start in range(0, len(file_ids), chunk_size):
|
|
|
|
|
if len(results) >= limit:
|
|
|
|
|
break
|
2025-12-29 18:42:02 -08:00
|
|
|
chunk = file_ids[start:start + chunk_size]
|
2025-12-20 23:57:44 -08:00
|
|
|
try:
|
|
|
|
|
payload = client.fetch_file_metadata(
|
|
|
|
|
file_ids=chunk,
|
|
|
|
|
include_service_keys_to_tags=True,
|
|
|
|
|
include_file_url=False,
|
|
|
|
|
include_duration=True,
|
|
|
|
|
include_size=True,
|
|
|
|
|
include_mime=True,
|
|
|
|
|
)
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
2025-12-29 18:42:02 -08:00
|
|
|
metas = payload.get("metadata",
|
|
|
|
|
[]) if isinstance(payload,
|
|
|
|
|
dict) else []
|
2025-12-20 23:57:44 -08:00
|
|
|
if not isinstance(metas, list):
|
|
|
|
|
continue
|
|
|
|
|
for meta in metas:
|
|
|
|
|
if len(results) >= limit:
|
|
|
|
|
break
|
|
|
|
|
if not isinstance(meta, dict):
|
|
|
|
|
continue
|
|
|
|
|
mime_type = meta.get("mime")
|
2025-12-29 17:05:03 -08:00
|
|
|
ext = str(meta.get("ext") or "").strip().lstrip(".")
|
2025-12-20 23:57:44 -08:00
|
|
|
if not ext and mime_type:
|
|
|
|
|
for category in mime_maps.values():
|
|
|
|
|
for _ext_key, info in category.items():
|
|
|
|
|
if mime_type in info.get("mimes", []):
|
2025-12-29 18:42:02 -08:00
|
|
|
ext = str(info.get("ext",
|
|
|
|
|
"")
|
|
|
|
|
).strip().lstrip(".")
|
2025-12-20 23:57:44 -08:00
|
|
|
break
|
|
|
|
|
if ext:
|
|
|
|
|
break
|
|
|
|
|
if _normalize_ext_filter(ext) != ext_filter:
|
|
|
|
|
continue
|
2025-12-14 00:53:52 -08:00
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
file_id = meta.get("file_id")
|
|
|
|
|
hash_hex = meta.get("hash")
|
|
|
|
|
size = meta.get("size", 0)
|
|
|
|
|
|
2025-12-29 18:42:02 -08:00
|
|
|
tags_set = meta.get("tags",
|
|
|
|
|
{})
|
2025-12-20 23:57:44 -08:00
|
|
|
all_tags: list[str] = []
|
|
|
|
|
title = f"Hydrus File {file_id}"
|
|
|
|
|
if isinstance(tags_set, dict):
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
def _collect(tag_list: Any) -> None:
|
|
|
|
|
nonlocal title
|
|
|
|
|
if not isinstance(tag_list, list):
|
|
|
|
|
return
|
|
|
|
|
for tag in tag_list:
|
|
|
|
|
tag_text = str(tag) if tag else ""
|
|
|
|
|
if not tag_text:
|
|
|
|
|
continue
|
|
|
|
|
tag_l = tag_text.strip().lower()
|
|
|
|
|
if not tag_l:
|
|
|
|
|
continue
|
|
|
|
|
all_tags.append(tag_l)
|
2025-12-29 18:42:02 -08:00
|
|
|
if (tag_l.startswith("title:") and title
|
|
|
|
|
== f"Hydrus File {file_id}"):
|
2025-12-20 23:57:44 -08:00
|
|
|
title = tag_l.split(":", 1)[1].strip()
|
|
|
|
|
|
|
|
|
|
for _service_name, service_tags in tags_set.items():
|
|
|
|
|
if not isinstance(service_tags, dict):
|
|
|
|
|
continue
|
2025-12-29 18:42:02 -08:00
|
|
|
storage_tags = service_tags.get(
|
|
|
|
|
"storage_tags",
|
|
|
|
|
{}
|
|
|
|
|
)
|
2025-12-20 23:57:44 -08:00
|
|
|
if isinstance(storage_tags, dict):
|
|
|
|
|
for tag_list in storage_tags.values():
|
|
|
|
|
_collect(tag_list)
|
2025-12-29 18:42:02 -08:00
|
|
|
display_tags = service_tags.get(
|
|
|
|
|
"display_tags",
|
|
|
|
|
[]
|
|
|
|
|
)
|
2025-12-20 23:57:44 -08:00
|
|
|
_collect(display_tags)
|
|
|
|
|
|
|
|
|
|
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
|
|
|
|
|
results.append(
|
|
|
|
|
{
|
|
|
|
|
"hash": hash_hex,
|
|
|
|
|
"url": file_url,
|
|
|
|
|
"name": title,
|
|
|
|
|
"title": title,
|
|
|
|
|
"size": size,
|
|
|
|
|
"size_bytes": size,
|
|
|
|
|
"store": self.NAME,
|
|
|
|
|
"tag": all_tags,
|
|
|
|
|
"file_id": file_id,
|
|
|
|
|
"mime": mime_type,
|
|
|
|
|
"ext": ext,
|
|
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
debug(f"{prefix} {len(results)} result(s)")
|
|
|
|
|
return results[:limit]
|
|
|
|
|
|
|
|
|
|
# If we only got hashes, fall back to the normal flow below.
|
2025-12-14 00:53:52 -08:00
|
|
|
|
|
|
|
|
if not file_ids and not hashes:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{prefix} 0 result(s)")
|
2025-12-14 00:53:52 -08:00
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
if file_ids:
|
2025-12-20 23:57:44 -08:00
|
|
|
metadata = client.fetch_file_metadata(
|
|
|
|
|
file_ids=file_ids,
|
|
|
|
|
include_service_keys_to_tags=True,
|
|
|
|
|
include_file_url=False,
|
|
|
|
|
include_duration=True,
|
|
|
|
|
include_size=True,
|
|
|
|
|
include_mime=True,
|
|
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
metadata_list = metadata.get("metadata", [])
|
|
|
|
|
elif hashes:
|
2025-12-20 23:57:44 -08:00
|
|
|
metadata = client.fetch_file_metadata(
|
|
|
|
|
hashes=hashes,
|
|
|
|
|
include_service_keys_to_tags=True,
|
|
|
|
|
include_file_url=False,
|
|
|
|
|
include_duration=True,
|
|
|
|
|
include_size=True,
|
|
|
|
|
include_mime=True,
|
|
|
|
|
)
|
2025-12-14 00:53:52 -08:00
|
|
|
metadata_list = metadata.get("metadata", [])
|
|
|
|
|
else:
|
|
|
|
|
metadata_list = []
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
# If our free-text searches produce nothing (or nothing survived downstream filtering), fallback to scanning.
|
2025-12-29 18:42:02 -08:00
|
|
|
if (not metadata_list) and (query_lower
|
|
|
|
|
!= "*") and (":" not in query_lower):
|
2025-12-20 23:57:44 -08:00
|
|
|
try:
|
|
|
|
|
search_result = client.search_files(
|
|
|
|
|
tags=["system:everything"],
|
|
|
|
|
return_hashes=True,
|
|
|
|
|
return_file_ids=True,
|
|
|
|
|
)
|
|
|
|
|
file_ids, hashes = _extract_search_ids(search_result)
|
|
|
|
|
|
|
|
|
|
if file_ids:
|
|
|
|
|
metadata = client.fetch_file_metadata(
|
|
|
|
|
file_ids=file_ids,
|
|
|
|
|
include_service_keys_to_tags=True,
|
|
|
|
|
include_file_url=False,
|
|
|
|
|
include_duration=True,
|
|
|
|
|
include_size=True,
|
|
|
|
|
include_mime=True,
|
|
|
|
|
)
|
|
|
|
|
metadata_list = metadata.get("metadata", [])
|
|
|
|
|
elif hashes:
|
|
|
|
|
metadata = client.fetch_file_metadata(
|
|
|
|
|
hashes=hashes,
|
|
|
|
|
include_service_keys_to_tags=True,
|
|
|
|
|
include_file_url=False,
|
|
|
|
|
include_duration=True,
|
|
|
|
|
include_size=True,
|
|
|
|
|
include_mime=True,
|
|
|
|
|
)
|
|
|
|
|
metadata_list = metadata.get("metadata", [])
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
2025-12-14 00:53:52 -08:00
|
|
|
if not isinstance(metadata_list, list):
|
|
|
|
|
metadata_list = []
|
2025-12-16 01:45:01 -08:00
|
|
|
|
|
|
|
|
for meta in metadata_list:
|
2025-12-29 17:05:03 -08:00
|
|
|
if len(results) >= limit:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
file_id = meta.get("file_id")
|
|
|
|
|
hash_hex = meta.get("hash")
|
|
|
|
|
size = meta.get("size", 0)
|
|
|
|
|
|
|
|
|
|
# Get tags for this file and extract title
|
2025-12-29 18:42:02 -08:00
|
|
|
tags_set = meta.get("tags",
|
|
|
|
|
{})
|
2025-12-29 17:05:03 -08:00
|
|
|
all_tags = []
|
|
|
|
|
title = f"Hydrus File {file_id}" # Default fallback
|
|
|
|
|
all_tags_str = "" # For substring matching
|
|
|
|
|
|
|
|
|
|
# debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}")
|
|
|
|
|
|
|
|
|
|
if isinstance(tags_set, dict):
|
|
|
|
|
# Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen
|
|
|
|
|
def _collect(tag_list: Any) -> None:
|
|
|
|
|
nonlocal title, all_tags_str
|
|
|
|
|
if not isinstance(tag_list, list):
|
|
|
|
|
return
|
|
|
|
|
for tag in tag_list:
|
|
|
|
|
tag_text = str(tag) if tag else ""
|
|
|
|
|
if not tag_text:
|
|
|
|
|
continue
|
|
|
|
|
tag_l = tag_text.strip().lower()
|
|
|
|
|
if not tag_l:
|
2025-12-11 19:04:02 -08:00
|
|
|
continue
|
2025-12-29 17:05:03 -08:00
|
|
|
all_tags.append(tag_l)
|
|
|
|
|
all_tags_str += " " + tag_l
|
2025-12-29 18:42:02 -08:00
|
|
|
if tag_l.startswith("title:"
|
|
|
|
|
) and title == f"Hydrus File {file_id}":
|
2025-12-29 17:05:03 -08:00
|
|
|
title = tag_l.split(":", 1)[1].strip()
|
2025-12-11 19:04:02 -08:00
|
|
|
|
2025-12-29 17:05:03 -08:00
|
|
|
for _service_name, service_tags in tags_set.items():
|
|
|
|
|
if not isinstance(service_tags, dict):
|
|
|
|
|
continue
|
2025-12-11 19:04:02 -08:00
|
|
|
|
2025-12-29 18:42:02 -08:00
|
|
|
storage_tags = service_tags.get("storage_tags",
|
|
|
|
|
{})
|
2025-12-29 17:05:03 -08:00
|
|
|
if isinstance(storage_tags, dict):
|
|
|
|
|
for tag_list in storage_tags.values():
|
|
|
|
|
_collect(tag_list)
|
|
|
|
|
|
|
|
|
|
display_tags = service_tags.get("display_tags", [])
|
|
|
|
|
_collect(display_tags)
|
|
|
|
|
|
|
|
|
|
# Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it)
|
|
|
|
|
top_level_tags = meta.get("tags_flat", []) or meta.get("tags", [])
|
|
|
|
|
_collect(top_level_tags)
|
|
|
|
|
|
|
|
|
|
# Prefer Hydrus-provided extension (e.g. ".webm"); fall back to MIME map.
|
|
|
|
|
mime_type = meta.get("mime")
|
|
|
|
|
ext = str(meta.get("ext") or "").strip().lstrip(".")
|
|
|
|
|
if not ext and mime_type:
|
|
|
|
|
for category in mime_maps.values():
|
|
|
|
|
for _ext_key, info in category.items():
|
|
|
|
|
if mime_type in info.get("mimes", []):
|
|
|
|
|
ext = str(info.get("ext", "")).strip().lstrip(".")
|
|
|
|
|
break
|
|
|
|
|
if ext:
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
# Filter results based on query type
|
|
|
|
|
# If user provided explicit namespace (has ':'), don't do substring filtering
|
|
|
|
|
# Just include what the tag search returned
|
|
|
|
|
has_namespace = ":" in query_lower
|
|
|
|
|
|
|
|
|
|
if has_namespace:
|
|
|
|
|
# Explicit namespace search - already filtered by Hydrus tag search
|
|
|
|
|
# Include this result as-is
|
|
|
|
|
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
|
|
|
|
|
results.append(
|
|
|
|
|
{
|
2025-12-11 19:04:02 -08:00
|
|
|
"hash": hash_hex,
|
|
|
|
|
"url": file_url,
|
|
|
|
|
"name": title,
|
|
|
|
|
"title": title,
|
|
|
|
|
"size": size,
|
|
|
|
|
"size_bytes": size,
|
2025-12-13 00:18:30 -08:00
|
|
|
"store": self.NAME,
|
2025-12-29 17:05:03 -08:00
|
|
|
"tag": all_tags,
|
2025-12-11 19:04:02 -08:00
|
|
|
"file_id": file_id,
|
|
|
|
|
"mime": mime_type,
|
|
|
|
|
"ext": ext,
|
2025-12-29 17:05:03 -08:00
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
# Free-form search: check if search terms match title or FREEFORM tags.
|
|
|
|
|
# Do NOT implicitly match other namespace tags (except title:).
|
|
|
|
|
freeform_tags = [
|
2025-12-29 18:42:02 -08:00
|
|
|
t for t in all_tags
|
|
|
|
|
if isinstance(t, str) and t and (":" not in t)
|
2025-12-29 17:05:03 -08:00
|
|
|
]
|
|
|
|
|
searchable_text = (title + " " + " ".join(freeform_tags)).lower()
|
|
|
|
|
|
|
|
|
|
match = True
|
|
|
|
|
if query_lower != "*" and search_terms:
|
|
|
|
|
for term in search_terms:
|
|
|
|
|
if term not in searchable_text:
|
|
|
|
|
match = False
|
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
if match:
|
|
|
|
|
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={hash_hex}"
|
|
|
|
|
results.append(
|
|
|
|
|
{
|
2025-12-11 19:04:02 -08:00
|
|
|
"hash": hash_hex,
|
|
|
|
|
"url": file_url,
|
|
|
|
|
"name": title,
|
|
|
|
|
"title": title,
|
|
|
|
|
"size": size,
|
|
|
|
|
"size_bytes": size,
|
2025-12-13 00:18:30 -08:00
|
|
|
"store": self.NAME,
|
2025-12-11 23:21:45 -08:00
|
|
|
"tag": all_tags,
|
2025-12-11 19:04:02 -08:00
|
|
|
"file_id": file_id,
|
|
|
|
|
"mime": mime_type,
|
|
|
|
|
"ext": ext,
|
2025-12-29 17:05:03 -08:00
|
|
|
}
|
|
|
|
|
)
|
|
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{prefix} {len(results)} result(s)")
|
2025-12-20 23:57:44 -08:00
|
|
|
if ext_filter:
|
|
|
|
|
wanted = ext_filter
|
|
|
|
|
filtered: list[dict[str, Any]] = []
|
|
|
|
|
for item in results:
|
|
|
|
|
try:
|
|
|
|
|
if _normalize_ext_filter(str(item.get("ext") or "")) == wanted:
|
|
|
|
|
filtered.append(item)
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
results = filtered
|
|
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
return results[:limit]
|
|
|
|
|
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
log(f"❌ Hydrus search failed: {exc}", file=sys.stderr)
|
|
|
|
|
import traceback
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
traceback.print_exc(file=sys.stderr)
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
def get_file(self, file_hash: str, **kwargs: Any) -> Path | str | None:
|
2025-12-16 01:45:01 -08:00
|
|
|
"""Return a browser URL for the file.
|
|
|
|
|
|
|
|
|
|
IMPORTANT: this method must be side-effect free (do not auto-open a browser).
|
|
|
|
|
Only explicit user actions (e.g. the get-file cmdlet) should open files.
|
|
|
|
|
"""
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} get_file: start hash={file_hash[:12]}...")
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
# Build browser URL with access key
|
2025-12-29 17:05:03 -08:00
|
|
|
base_url = str(self.URL).rstrip("/")
|
2025-12-13 00:18:30 -08:00
|
|
|
access_key = str(self.API)
|
2025-12-29 17:05:03 -08:00
|
|
|
browser_url = (
|
|
|
|
|
f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
|
|
|
|
|
)
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} get_file: url={browser_url}")
|
2025-12-11 19:04:02 -08:00
|
|
|
return browser_url
|
|
|
|
|
|
2026-01-04 02:23:50 -08:00
|
|
|
def download_to_temp(
|
|
|
|
|
self,
|
|
|
|
|
file_hash: str,
|
|
|
|
|
*,
|
|
|
|
|
temp_root: Optional[Path] = None,
|
|
|
|
|
) -> Optional[Path]:
|
|
|
|
|
"""Download a Hydrus file to a temporary path for downstream uploads."""
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
client = self._client
|
|
|
|
|
if client is None:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
h = str(file_hash or "").strip().lower()
|
|
|
|
|
if len(h) != 64 or not all(ch in "0123456789abcdef" for ch in h):
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
created_tmp = False
|
|
|
|
|
base_tmp = Path(temp_root) if temp_root is not None else Path(
|
|
|
|
|
tempfile.mkdtemp(prefix="hydrus-file-")
|
|
|
|
|
)
|
|
|
|
|
if temp_root is None:
|
|
|
|
|
created_tmp = True
|
|
|
|
|
base_tmp.mkdir(parents=True, exist_ok=True)
|
|
|
|
|
|
|
|
|
|
def _safe_filename(raw: str) -> str:
|
|
|
|
|
cleaned = re.sub(r"[\\/:*?\"<>|]", "_", str(raw or "")).strip()
|
|
|
|
|
if not cleaned:
|
|
|
|
|
return h
|
|
|
|
|
cleaned = cleaned.strip(". ") or h
|
|
|
|
|
return cleaned
|
|
|
|
|
|
|
|
|
|
# Prefer ext/title from metadata when available.
|
|
|
|
|
fname = h
|
|
|
|
|
ext_val = ""
|
|
|
|
|
try:
|
|
|
|
|
meta = self.get_metadata(h) or {}
|
|
|
|
|
if isinstance(meta, dict):
|
|
|
|
|
title_val = str(meta.get("title") or "").strip()
|
|
|
|
|
if title_val:
|
|
|
|
|
fname = _safe_filename(title_val)
|
|
|
|
|
ext_val = str(meta.get("ext") or "").strip().lstrip(".")
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
|
if not fname:
|
|
|
|
|
fname = h
|
|
|
|
|
if ext_val and not fname.lower().endswith(f".{ext_val.lower()}"):
|
|
|
|
|
fname = f"{fname}.{ext_val}"
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
file_url = client.file_url(h)
|
|
|
|
|
except Exception:
|
|
|
|
|
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={quote(h)}"
|
|
|
|
|
|
|
|
|
|
dest_path = base_tmp / fname
|
|
|
|
|
with httpx.stream(
|
|
|
|
|
"GET",
|
|
|
|
|
file_url,
|
|
|
|
|
headers={"Hydrus-Client-API-Access-Key": self.API},
|
|
|
|
|
follow_redirects=True,
|
|
|
|
|
timeout=60.0,
|
|
|
|
|
verify=False,
|
|
|
|
|
) as resp:
|
|
|
|
|
resp.raise_for_status()
|
|
|
|
|
with dest_path.open("wb") as fh:
|
|
|
|
|
for chunk in resp.iter_bytes():
|
|
|
|
|
if chunk:
|
|
|
|
|
fh.write(chunk)
|
|
|
|
|
|
|
|
|
|
if dest_path.exists():
|
|
|
|
|
return dest_path
|
|
|
|
|
|
|
|
|
|
if created_tmp:
|
|
|
|
|
try:
|
|
|
|
|
shutil.rmtree(base_tmp, ignore_errors=True)
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
return None
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
log(f"{self._log_prefix()} download_to_temp failed: {exc}", file=sys.stderr)
|
|
|
|
|
try:
|
|
|
|
|
if temp_root is None and "base_tmp" in locals():
|
|
|
|
|
shutil.rmtree(base_tmp, ignore_errors=True) # type: ignore[arg-type]
|
|
|
|
|
except Exception:
|
|
|
|
|
pass
|
|
|
|
|
return None
|
|
|
|
|
|
2025-12-27 14:50:59 -08:00
|
|
|
def delete_file(self, file_identifier: str, **kwargs: Any) -> bool:
|
|
|
|
|
"""Delete a file from Hydrus, then clear the deletion record.
|
|
|
|
|
|
|
|
|
|
This is used by the delete-file cmdlet when the item belongs to a HydrusNetwork store.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
client = self._client
|
|
|
|
|
if client is None:
|
|
|
|
|
debug(f"{self._log_prefix()} delete_file: client unavailable")
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
file_hash = str(file_identifier or "").strip().lower()
|
2025-12-29 18:42:02 -08:00
|
|
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
|
|
|
|
|
for ch in file_hash):
|
|
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} delete_file: invalid file hash '{file_identifier}'"
|
|
|
|
|
)
|
2025-12-27 14:50:59 -08:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
reason = kwargs.get("reason")
|
2025-12-29 17:05:03 -08:00
|
|
|
reason_text = (
|
2025-12-29 18:42:02 -08:00
|
|
|
str(reason).strip() if isinstance(reason,
|
|
|
|
|
str) and reason.strip() else None
|
2025-12-29 17:05:03 -08:00
|
|
|
)
|
2025-12-27 14:50:59 -08:00
|
|
|
|
|
|
|
|
# 1) Delete file
|
|
|
|
|
client.delete_files([file_hash], reason=reason_text)
|
|
|
|
|
|
|
|
|
|
# 2) Clear deletion record (best-effort)
|
|
|
|
|
try:
|
|
|
|
|
client.clear_file_deletion_record([file_hash])
|
|
|
|
|
except Exception as exc:
|
2025-12-29 18:42:02 -08:00
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} delete_file: clear_file_deletion_record failed: {exc}"
|
|
|
|
|
)
|
2025-12-27 14:50:59 -08:00
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
debug(f"{self._log_prefix()} delete_file failed: {exc}")
|
|
|
|
|
return False
|
|
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
|
|
|
|
|
"""Get metadata for a file from Hydrus by hash.
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
Args:
|
|
|
|
|
file_hash: SHA256 hash of the file (64-char hex string)
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
Returns:
|
|
|
|
|
Dict with metadata fields or None if not found
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
client = self._client
|
|
|
|
|
if not client:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} get_metadata: client unavailable")
|
2025-12-11 19:04:02 -08:00
|
|
|
return None
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
# Fetch file metadata with the fields we need for CLI display.
|
|
|
|
|
payload = client.fetch_file_metadata(
|
|
|
|
|
hashes=[file_hash],
|
|
|
|
|
include_service_keys_to_tags=True,
|
|
|
|
|
include_file_url=True,
|
|
|
|
|
include_duration=True,
|
|
|
|
|
include_size=True,
|
|
|
|
|
include_mime=True,
|
|
|
|
|
)
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
if not payload or not payload.get("metadata"):
|
|
|
|
|
return None
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
meta = payload["metadata"][0]
|
2025-12-16 23:23:43 -08:00
|
|
|
|
|
|
|
|
# Hydrus can return placeholder metadata rows for unknown hashes.
|
|
|
|
|
if not isinstance(meta, dict) or meta.get("file_id") is None:
|
|
|
|
|
return None
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
# Extract title from tags
|
|
|
|
|
title = f"Hydrus_{file_hash[:12]}"
|
2025-12-29 18:42:02 -08:00
|
|
|
tags_payload = meta.get("tags",
|
|
|
|
|
{})
|
2025-12-11 19:04:02 -08:00
|
|
|
if isinstance(tags_payload, dict):
|
|
|
|
|
for service_data in tags_payload.values():
|
|
|
|
|
if isinstance(service_data, dict):
|
2025-12-29 18:42:02 -08:00
|
|
|
display_tags = service_data.get("display_tags",
|
|
|
|
|
{})
|
2025-12-11 19:04:02 -08:00
|
|
|
if isinstance(display_tags, dict):
|
|
|
|
|
current_tags = display_tags.get("0", [])
|
|
|
|
|
if isinstance(current_tags, list):
|
|
|
|
|
for tag in current_tags:
|
|
|
|
|
if str(tag).lower().startswith("title:"):
|
|
|
|
|
title = tag.split(":", 1)[1].strip()
|
|
|
|
|
break
|
|
|
|
|
if title != f"Hydrus_{file_hash[:12]}":
|
|
|
|
|
break
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-16 23:23:43 -08:00
|
|
|
# Hydrus may return mime as an int enum, or sometimes a human label.
|
|
|
|
|
mime_val = meta.get("mime")
|
2025-12-29 17:05:03 -08:00
|
|
|
filetype_human = (
|
2025-12-29 18:42:02 -08:00
|
|
|
meta.get("filetype_human") or meta.get("mime_human")
|
|
|
|
|
or meta.get("mime_string")
|
2025-12-29 17:05:03 -08:00
|
|
|
)
|
2025-12-16 23:23:43 -08:00
|
|
|
|
|
|
|
|
# Determine ext: prefer Hydrus metadata ext, then filetype_human (when it looks like an ext),
|
|
|
|
|
# then title suffix, then file path suffix.
|
|
|
|
|
ext = str(meta.get("ext") or "").strip().lstrip(".")
|
|
|
|
|
if not ext:
|
|
|
|
|
ft = str(filetype_human or "").strip().lstrip(".").lower()
|
|
|
|
|
if ft and ft != "unknown filetype" and ft.isalnum() and len(ft) <= 8:
|
|
|
|
|
# Treat simple labels like "mp4", "m4a", "webm" as extensions.
|
|
|
|
|
ext = ft
|
|
|
|
|
if not ext and isinstance(title, str) and "." in title:
|
2025-12-16 01:45:01 -08:00
|
|
|
try:
|
2025-12-16 23:23:43 -08:00
|
|
|
ext = Path(title).suffix.lstrip(".")
|
2025-12-16 01:45:01 -08:00
|
|
|
except Exception:
|
|
|
|
|
ext = ""
|
2025-12-16 23:23:43 -08:00
|
|
|
if not ext:
|
|
|
|
|
try:
|
|
|
|
|
path_payload = client.get_file_path(file_hash)
|
|
|
|
|
if isinstance(path_payload, dict):
|
|
|
|
|
p = path_payload.get("path")
|
|
|
|
|
if isinstance(p, str) and p.strip():
|
|
|
|
|
ext = Path(p.strip()).suffix.lstrip(".")
|
|
|
|
|
except Exception:
|
|
|
|
|
ext = ""
|
|
|
|
|
|
|
|
|
|
# If extension is still unknown, attempt a best-effort lookup from MIME.
|
|
|
|
|
def _mime_from_ext(ext_value: str) -> str:
|
|
|
|
|
ext_clean = str(ext_value or "").strip().lstrip(".").lower()
|
|
|
|
|
if not ext_clean:
|
|
|
|
|
return ""
|
|
|
|
|
try:
|
|
|
|
|
for category in mime_maps.values():
|
|
|
|
|
info = category.get(ext_clean)
|
|
|
|
|
if isinstance(info, dict):
|
|
|
|
|
mimes = info.get("mimes")
|
|
|
|
|
if isinstance(mimes, list) and mimes:
|
|
|
|
|
first = mimes[0]
|
|
|
|
|
return str(first)
|
|
|
|
|
except Exception:
|
|
|
|
|
return ""
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
# Normalize to a MIME string for CLI output.
|
|
|
|
|
# Avoid passing through human labels like "unknown filetype".
|
|
|
|
|
mime_type = ""
|
|
|
|
|
if isinstance(mime_val, str):
|
|
|
|
|
candidate = mime_val.strip()
|
|
|
|
|
if "/" in candidate and candidate.lower() != "unknown filetype":
|
|
|
|
|
mime_type = candidate
|
|
|
|
|
if not mime_type and isinstance(filetype_human, str):
|
|
|
|
|
candidate = filetype_human.strip()
|
|
|
|
|
if "/" in candidate and candidate.lower() != "unknown filetype":
|
|
|
|
|
mime_type = candidate
|
|
|
|
|
if not mime_type:
|
|
|
|
|
mime_type = _mime_from_ext(ext)
|
|
|
|
|
|
|
|
|
|
# Normalize size/duration to stable scalar types.
|
|
|
|
|
size_val = meta.get("size")
|
|
|
|
|
if size_val is None:
|
|
|
|
|
size_val = meta.get("size_bytes")
|
|
|
|
|
try:
|
|
|
|
|
size_int: int | None = int(size_val) if size_val is not None else None
|
|
|
|
|
except Exception:
|
|
|
|
|
size_int = None
|
|
|
|
|
|
|
|
|
|
dur_val = meta.get("duration")
|
|
|
|
|
if dur_val is None:
|
|
|
|
|
dur_val = meta.get("duration_ms")
|
|
|
|
|
try:
|
|
|
|
|
dur_int: int | None = int(dur_val) if dur_val is not None else None
|
|
|
|
|
except Exception:
|
|
|
|
|
dur_int = None
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-29 18:42:02 -08:00
|
|
|
raw_urls = meta.get("known_urls") or meta.get("urls") or meta.get("url"
|
|
|
|
|
) or []
|
2025-12-16 23:23:43 -08:00
|
|
|
url_list: list[str] = []
|
|
|
|
|
if isinstance(raw_urls, str):
|
|
|
|
|
s = raw_urls.strip()
|
|
|
|
|
url_list = [s] if s else []
|
|
|
|
|
elif isinstance(raw_urls, list):
|
2025-12-29 17:05:03 -08:00
|
|
|
url_list = [
|
2025-12-29 18:42:02 -08:00
|
|
|
str(u).strip() for u in raw_urls
|
|
|
|
|
if isinstance(u, str) and str(u).strip()
|
2025-12-29 17:05:03 -08:00
|
|
|
]
|
2025-12-16 23:23:43 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
return {
|
|
|
|
|
"hash": file_hash,
|
|
|
|
|
"title": title,
|
|
|
|
|
"ext": ext,
|
2025-12-16 23:23:43 -08:00
|
|
|
"size": size_int,
|
2025-12-11 19:04:02 -08:00
|
|
|
"mime": mime_type,
|
2025-12-16 23:23:43 -08:00
|
|
|
# Keep raw fields available for troubleshooting/other callers.
|
|
|
|
|
"hydrus_mime": mime_val,
|
|
|
|
|
"filetype_human": filetype_human,
|
|
|
|
|
"duration_ms": dur_int,
|
|
|
|
|
"url": url_list,
|
2025-12-11 19:04:02 -08:00
|
|
|
}
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
except Exception as exc:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} get_metadata failed: {exc}")
|
2025-12-11 19:04:02 -08:00
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
|
|
|
|
|
"""Get tags for a file from Hydrus by hash.
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
Args:
|
|
|
|
|
file_identifier: File hash (SHA256 hex string)
|
|
|
|
|
**kwargs: Optional service_name parameter
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
Returns:
|
|
|
|
|
Tuple of (tags_list, source_description)
|
|
|
|
|
where source is always "hydrus"
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
from API import HydrusNetwork as hydrus_wrapper
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
file_hash = str(file_identifier or "").strip().lower()
|
2025-12-29 18:42:02 -08:00
|
|
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
|
|
|
|
|
for ch in file_hash):
|
|
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} get_tags: invalid file hash '{file_identifier}'"
|
|
|
|
|
)
|
2025-12-12 21:55:38 -08:00
|
|
|
return [], "unknown"
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
# Get Hydrus client and service info
|
|
|
|
|
client = self._client
|
|
|
|
|
if not client:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} get_tags: client unavailable")
|
2025-12-11 19:04:02 -08:00
|
|
|
return [], "unknown"
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
# Fetch file metadata
|
|
|
|
|
payload = client.fetch_file_metadata(
|
2025-12-29 18:42:02 -08:00
|
|
|
hashes=[file_hash],
|
|
|
|
|
include_service_keys_to_tags=True,
|
|
|
|
|
include_file_url=False
|
2025-12-11 19:04:02 -08:00
|
|
|
)
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
items = payload.get("metadata") if isinstance(payload, dict) else None
|
|
|
|
|
if not isinstance(items, list) or not items:
|
2025-12-29 18:42:02 -08:00
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} get_tags: no metadata for hash {file_hash}"
|
|
|
|
|
)
|
2025-12-11 19:04:02 -08:00
|
|
|
return [], "unknown"
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
meta = items[0] if isinstance(items[0], dict) else None
|
|
|
|
|
if not isinstance(meta, dict) or meta.get("file_id") is None:
|
2025-12-29 18:42:02 -08:00
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} get_tags: invalid metadata for hash {file_hash}"
|
|
|
|
|
)
|
2025-12-11 19:04:02 -08:00
|
|
|
return [], "unknown"
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
# Extract tags using service name
|
|
|
|
|
service_name = "my tags"
|
|
|
|
|
service_key = hydrus_wrapper.get_tag_service_key(client, service_name)
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
# Extract tags from metadata
|
|
|
|
|
tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name)
|
2025-12-20 23:57:44 -08:00
|
|
|
|
2025-12-29 17:05:03 -08:00
|
|
|
return [
|
|
|
|
|
str(t).strip().lower() for t in tags if isinstance(t, str) and t.strip()
|
|
|
|
|
], "hydrus"
|
|
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
except Exception as exc:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} get_tags failed: {exc}")
|
2025-12-11 19:04:02 -08:00
|
|
|
return [], "unknown"
|
|
|
|
|
|
|
|
|
|
def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
|
2025-12-29 17:05:03 -08:00
|
|
|
"""Add tags to a Hydrus file."""
|
2025-12-11 19:04:02 -08:00
|
|
|
try:
|
|
|
|
|
client = self._client
|
|
|
|
|
if client is None:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} add_tag: client unavailable")
|
2025-12-11 19:04:02 -08:00
|
|
|
return False
|
2025-12-12 21:55:38 -08:00
|
|
|
|
|
|
|
|
file_hash = str(file_identifier or "").strip().lower()
|
2025-12-29 18:42:02 -08:00
|
|
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
|
|
|
|
|
for ch in file_hash):
|
|
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} add_tag: invalid file hash '{file_identifier}'"
|
|
|
|
|
)
|
2025-12-12 21:55:38 -08:00
|
|
|
return False
|
2025-12-11 19:04:02 -08:00
|
|
|
service_name = kwargs.get("service_name") or "my tags"
|
2025-12-20 23:57:44 -08:00
|
|
|
|
2025-12-29 17:05:03 -08:00
|
|
|
incoming_tags = [
|
2025-12-29 18:42:02 -08:00
|
|
|
str(t).strip().lower() for t in (tags or [])
|
2025-12-29 17:05:03 -08:00
|
|
|
if isinstance(t, str) and str(t).strip()
|
|
|
|
|
]
|
2025-12-20 23:57:44 -08:00
|
|
|
if not incoming_tags:
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
existing_tags, _src = self.get_tag(file_hash)
|
|
|
|
|
except Exception:
|
|
|
|
|
existing_tags = []
|
|
|
|
|
|
2025-12-29 23:40:50 -08:00
|
|
|
from SYS.metadata import compute_namespaced_tag_overwrite
|
2025-12-20 23:57:44 -08:00
|
|
|
|
2025-12-29 17:05:03 -08:00
|
|
|
tags_to_remove, tags_to_add, _merged = compute_namespaced_tag_overwrite(
|
|
|
|
|
existing_tags, incoming_tags
|
|
|
|
|
)
|
2025-12-20 23:57:44 -08:00
|
|
|
|
|
|
|
|
if not tags_to_add and not tags_to_remove:
|
|
|
|
|
return True
|
|
|
|
|
|
2026-01-01 20:37:27 -08:00
|
|
|
service_key: Optional[str] = None
|
|
|
|
|
try:
|
|
|
|
|
from API import HydrusNetwork as hydrus_wrapper
|
|
|
|
|
|
|
|
|
|
service_key = hydrus_wrapper.get_tag_service_key(
|
|
|
|
|
client, service_name
|
|
|
|
|
)
|
|
|
|
|
except Exception:
|
|
|
|
|
service_key = None
|
|
|
|
|
|
|
|
|
|
mutate_success = False
|
|
|
|
|
if service_key:
|
2025-12-20 23:57:44 -08:00
|
|
|
try:
|
2026-01-01 20:37:27 -08:00
|
|
|
client.mutate_tags_by_key(
|
|
|
|
|
file_hash,
|
|
|
|
|
service_key,
|
|
|
|
|
add_tags=tags_to_add,
|
|
|
|
|
remove_tags=tags_to_remove,
|
|
|
|
|
)
|
|
|
|
|
mutate_success = True
|
2025-12-20 23:57:44 -08:00
|
|
|
except Exception as exc:
|
2026-01-01 20:37:27 -08:00
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} add_tag: mutate_tags_by_key failed: {exc}"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
did_any = False
|
|
|
|
|
if not mutate_success:
|
|
|
|
|
if tags_to_remove:
|
|
|
|
|
try:
|
|
|
|
|
client.delete_tag(file_hash, tags_to_remove, service_name)
|
|
|
|
|
did_any = True
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} add_tag: delete_tag failed: {exc}"
|
|
|
|
|
)
|
|
|
|
|
if tags_to_add:
|
|
|
|
|
try:
|
|
|
|
|
client.add_tag(file_hash, tags_to_add, service_name)
|
|
|
|
|
did_any = True
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} add_tag: add_tag failed: {exc}"
|
|
|
|
|
)
|
|
|
|
|
else:
|
|
|
|
|
did_any = bool(tags_to_add or tags_to_remove)
|
2025-12-20 23:57:44 -08:00
|
|
|
|
|
|
|
|
return did_any
|
2025-12-11 19:04:02 -08:00
|
|
|
except Exception as exc:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} add_tag failed: {exc}")
|
2025-12-11 19:04:02 -08:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
|
2025-12-29 17:05:03 -08:00
|
|
|
"""Delete tags from a Hydrus file."""
|
2025-12-11 19:04:02 -08:00
|
|
|
try:
|
|
|
|
|
client = self._client
|
|
|
|
|
if client is None:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} delete_tag: client unavailable")
|
2025-12-11 19:04:02 -08:00
|
|
|
return False
|
2025-12-12 21:55:38 -08:00
|
|
|
|
|
|
|
|
file_hash = str(file_identifier or "").strip().lower()
|
2025-12-29 18:42:02 -08:00
|
|
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
|
|
|
|
|
for ch in file_hash):
|
|
|
|
|
debug(
|
|
|
|
|
f"{self._log_prefix()} delete_tag: invalid file hash '{file_identifier}'"
|
|
|
|
|
)
|
2025-12-12 21:55:38 -08:00
|
|
|
return False
|
2025-12-11 19:04:02 -08:00
|
|
|
service_name = kwargs.get("service_name") or "my tags"
|
2025-12-20 23:57:44 -08:00
|
|
|
raw_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
|
2025-12-29 17:05:03 -08:00
|
|
|
tag_list = [
|
2025-12-29 18:42:02 -08:00
|
|
|
str(t).strip().lower() for t in raw_list
|
|
|
|
|
if isinstance(t, str) and str(t).strip()
|
2025-12-29 17:05:03 -08:00
|
|
|
]
|
2025-12-11 19:04:02 -08:00
|
|
|
if not tag_list:
|
|
|
|
|
return False
|
2025-12-12 21:55:38 -08:00
|
|
|
client.delete_tag(file_hash, tag_list, service_name)
|
2025-12-11 19:04:02 -08:00
|
|
|
return True
|
|
|
|
|
except Exception as exc:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} delete_tag failed: {exc}")
|
2025-12-11 19:04:02 -08:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
|
2025-12-29 17:05:03 -08:00
|
|
|
"""Get known url for a Hydrus file."""
|
2025-12-11 19:04:02 -08:00
|
|
|
try:
|
|
|
|
|
client = self._client
|
2025-12-12 21:55:38 -08:00
|
|
|
|
|
|
|
|
file_hash = str(file_identifier or "").strip().lower()
|
2025-12-29 18:42:02 -08:00
|
|
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
|
|
|
|
|
for ch in file_hash):
|
2025-12-12 21:55:38 -08:00
|
|
|
return []
|
|
|
|
|
|
2025-12-29 18:42:02 -08:00
|
|
|
payload = client.fetch_file_metadata(
|
|
|
|
|
hashes=[file_hash],
|
|
|
|
|
include_file_url=False
|
|
|
|
|
)
|
2025-12-11 19:04:02 -08:00
|
|
|
items = payload.get("metadata") if isinstance(payload, dict) else None
|
|
|
|
|
if not isinstance(items, list) or not items:
|
|
|
|
|
return []
|
2025-12-29 18:42:02 -08:00
|
|
|
meta = items[0] if isinstance(items[0],
|
|
|
|
|
dict) else {}
|
2025-12-16 01:45:01 -08:00
|
|
|
|
2025-12-29 18:42:02 -08:00
|
|
|
raw_urls: Any = meta.get("known_urls"
|
|
|
|
|
) or meta.get("urls") or meta.get("url") or []
|
2025-12-16 01:45:01 -08:00
|
|
|
if isinstance(raw_urls, str):
|
|
|
|
|
val = raw_urls.strip()
|
|
|
|
|
return [val] if val else []
|
|
|
|
|
if isinstance(raw_urls, list):
|
|
|
|
|
out: list[str] = []
|
|
|
|
|
for u in raw_urls:
|
|
|
|
|
if not isinstance(u, str):
|
|
|
|
|
continue
|
|
|
|
|
u = u.strip()
|
|
|
|
|
if u:
|
|
|
|
|
out.append(u)
|
|
|
|
|
return out
|
|
|
|
|
return []
|
2025-12-11 19:04:02 -08:00
|
|
|
except Exception as exc:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} get_url failed: {exc}")
|
2025-12-11 19:04:02 -08:00
|
|
|
return []
|
|
|
|
|
|
2025-12-30 05:48:01 -08:00
|
|
|
def get_url_info(self, url: str, **kwargs: Any) -> dict[str, Any] | None:
|
|
|
|
|
"""Return Hydrus URL info for a single URL (Hydrus-only helper).
|
|
|
|
|
|
|
|
|
|
Uses: GET /add_urls/get_url_info
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
client = self._client
|
|
|
|
|
if client is None:
|
|
|
|
|
return None
|
|
|
|
|
u = str(url or "").strip()
|
|
|
|
|
if not u:
|
|
|
|
|
return None
|
|
|
|
|
try:
|
|
|
|
|
return client.get_url_info(u) # type: ignore[attr-defined]
|
|
|
|
|
except Exception:
|
|
|
|
|
from API.HydrusNetwork import HydrusRequestSpec
|
|
|
|
|
|
|
|
|
|
spec = HydrusRequestSpec(
|
|
|
|
|
method="GET",
|
|
|
|
|
endpoint="/add_urls/get_url_info",
|
|
|
|
|
query={
|
|
|
|
|
"url": u
|
|
|
|
|
},
|
|
|
|
|
)
|
|
|
|
|
response = client._perform_request(spec) # type: ignore[attr-defined]
|
|
|
|
|
return response if isinstance(response, dict) else None
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
debug(f"{self._log_prefix()} get_url_info failed: {exc}")
|
|
|
|
|
return None
|
|
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
2025-12-29 17:05:03 -08:00
|
|
|
"""Associate one or more url with a Hydrus file."""
|
2025-12-11 19:04:02 -08:00
|
|
|
try:
|
|
|
|
|
client = self._client
|
|
|
|
|
if client is None:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} add_url: client unavailable")
|
2025-12-11 19:04:02 -08:00
|
|
|
return False
|
|
|
|
|
for u in url:
|
|
|
|
|
client.associate_url(file_identifier, u)
|
|
|
|
|
return True
|
|
|
|
|
except Exception as exc:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} add_url failed: {exc}")
|
2025-12-11 19:04:02 -08:00
|
|
|
return False
|
|
|
|
|
|
2025-12-20 23:57:44 -08:00
|
|
|
def add_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool:
|
|
|
|
|
"""Bulk associate urls with Hydrus files.
|
|
|
|
|
|
|
|
|
|
This is a best-effort convenience wrapper used by cmdlets to batch url associations.
|
|
|
|
|
Hydrus' client API is still called per (hash,url) pair, but this consolidates the
|
|
|
|
|
cmdlet-level control flow so url association can be deferred until the end.
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
client = self._client
|
|
|
|
|
if client is None:
|
|
|
|
|
debug(f"{self._log_prefix()} add_url_bulk: client unavailable")
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
any_success = False
|
2025-12-29 17:05:03 -08:00
|
|
|
for file_identifier, urls in items or []:
|
2025-12-20 23:57:44 -08:00
|
|
|
h = str(file_identifier or "").strip().lower()
|
|
|
|
|
if len(h) != 64:
|
|
|
|
|
continue
|
2025-12-29 17:05:03 -08:00
|
|
|
for u in urls or []:
|
2025-12-20 23:57:44 -08:00
|
|
|
s = str(u or "").strip()
|
|
|
|
|
if not s:
|
|
|
|
|
continue
|
|
|
|
|
try:
|
|
|
|
|
client.associate_url(h, s)
|
|
|
|
|
any_success = True
|
|
|
|
|
except Exception:
|
|
|
|
|
continue
|
|
|
|
|
return any_success
|
|
|
|
|
except Exception as exc:
|
|
|
|
|
debug(f"{self._log_prefix()} add_url_bulk failed: {exc}")
|
|
|
|
|
return False
|
|
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
2025-12-29 17:05:03 -08:00
|
|
|
"""Delete one or more url from a Hydrus file."""
|
2025-12-11 19:04:02 -08:00
|
|
|
try:
|
|
|
|
|
client = self._client
|
|
|
|
|
if client is None:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} delete_url: client unavailable")
|
2025-12-11 19:04:02 -08:00
|
|
|
return False
|
|
|
|
|
for u in url:
|
|
|
|
|
client.delete_url(file_identifier, u)
|
|
|
|
|
return True
|
|
|
|
|
except Exception as exc:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} delete_url failed: {exc}")
|
2025-12-11 19:04:02 -08:00
|
|
|
return False
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
|
|
|
|
|
"""Get notes for a Hydrus file (default note service only)."""
|
|
|
|
|
try:
|
|
|
|
|
client = self._client
|
|
|
|
|
if client is None:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} get_note: client unavailable")
|
2025-12-12 21:55:38 -08:00
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
file_hash = str(file_identifier or "").strip().lower()
|
2025-12-29 18:42:02 -08:00
|
|
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
|
|
|
|
|
for ch in file_hash):
|
2025-12-12 21:55:38 -08:00
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
payload = client.fetch_file_metadata(hashes=[file_hash], include_notes=True)
|
|
|
|
|
items = payload.get("metadata") if isinstance(payload, dict) else None
|
|
|
|
|
if not isinstance(items, list) or not items:
|
|
|
|
|
return {}
|
|
|
|
|
meta = items[0] if isinstance(items[0], dict) else None
|
|
|
|
|
if not isinstance(meta, dict):
|
|
|
|
|
return {}
|
|
|
|
|
|
|
|
|
|
notes_payload = meta.get("notes")
|
|
|
|
|
if isinstance(notes_payload, dict):
|
2025-12-29 18:42:02 -08:00
|
|
|
return {
|
|
|
|
|
str(k): str(v or "")
|
|
|
|
|
for k, v in notes_payload.items() if str(k).strip()
|
|
|
|
|
}
|
2025-12-12 21:55:38 -08:00
|
|
|
|
|
|
|
|
return {}
|
|
|
|
|
except Exception as exc:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} get_note failed: {exc}")
|
2025-12-12 21:55:38 -08:00
|
|
|
return {}
|
|
|
|
|
|
2025-12-29 18:42:02 -08:00
|
|
|
def set_note(
|
|
|
|
|
self,
|
|
|
|
|
file_identifier: str,
|
|
|
|
|
name: str,
|
|
|
|
|
text: str,
|
|
|
|
|
**kwargs: Any
|
|
|
|
|
) -> bool:
|
2025-12-12 21:55:38 -08:00
|
|
|
"""Set a named note for a Hydrus file (default note service only)."""
|
|
|
|
|
try:
|
|
|
|
|
client = self._client
|
|
|
|
|
if client is None:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} set_note: client unavailable")
|
2025-12-12 21:55:38 -08:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
file_hash = str(file_identifier or "").strip().lower()
|
2025-12-29 18:42:02 -08:00
|
|
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
|
|
|
|
|
for ch in file_hash):
|
2025-12-12 21:55:38 -08:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
note_name = str(name or "").strip()
|
|
|
|
|
if not note_name:
|
|
|
|
|
return False
|
|
|
|
|
note_text = str(text or "")
|
|
|
|
|
|
2025-12-29 18:42:02 -08:00
|
|
|
client.set_notes(file_hash,
|
|
|
|
|
{
|
|
|
|
|
note_name: note_text
|
|
|
|
|
})
|
2025-12-12 21:55:38 -08:00
|
|
|
return True
|
|
|
|
|
except Exception as exc:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} set_note failed: {exc}")
|
2025-12-12 21:55:38 -08:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
|
|
|
|
|
"""Delete a named note for a Hydrus file (default note service only)."""
|
|
|
|
|
try:
|
|
|
|
|
client = self._client
|
|
|
|
|
if client is None:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} delete_note: client unavailable")
|
2025-12-12 21:55:38 -08:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
file_hash = str(file_identifier or "").strip().lower()
|
2025-12-29 18:42:02 -08:00
|
|
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
|
|
|
|
|
for ch in file_hash):
|
2025-12-12 21:55:38 -08:00
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
note_name = str(name or "").strip()
|
|
|
|
|
if not note_name:
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
client.delete_notes(file_hash, [note_name])
|
|
|
|
|
return True
|
|
|
|
|
except Exception as exc:
|
2025-12-16 23:23:43 -08:00
|
|
|
debug(f"{self._log_prefix()} delete_note failed: {exc}")
|
2025-12-12 21:55:38 -08:00
|
|
|
return False
|
|
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
@staticmethod
|
|
|
|
|
def _extract_tags_from_hydrus_meta(
|
2025-12-29 18:42:02 -08:00
|
|
|
meta: Dict[str,
|
|
|
|
|
Any],
|
|
|
|
|
service_key: Optional[str],
|
|
|
|
|
service_name: str
|
2025-12-11 19:04:02 -08:00
|
|
|
) -> List[str]:
|
|
|
|
|
"""Extract current tags from Hydrus metadata dict.
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
Prefers display_tags (includes siblings/parents, excludes deleted).
|
|
|
|
|
Falls back to storage_tags status '0' (current).
|
|
|
|
|
"""
|
|
|
|
|
tags_payload = meta.get("tags")
|
|
|
|
|
if not isinstance(tags_payload, dict):
|
|
|
|
|
return []
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
svc_data = None
|
|
|
|
|
if service_key:
|
|
|
|
|
svc_data = tags_payload.get(service_key)
|
|
|
|
|
if not isinstance(svc_data, dict):
|
|
|
|
|
return []
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
# Prefer display_tags (Hydrus computes siblings/parents)
|
|
|
|
|
display = svc_data.get("display_tags")
|
|
|
|
|
if isinstance(display, list) and display:
|
2025-12-29 18:42:02 -08:00
|
|
|
return [
|
|
|
|
|
str(t) for t in display
|
|
|
|
|
if isinstance(t, (str, bytes)) and str(t).strip()
|
|
|
|
|
]
|
2025-12-29 17:05:03 -08:00
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
# Fallback to storage_tags status '0' (current)
|
|
|
|
|
storage = svc_data.get("storage_tags")
|
|
|
|
|
if isinstance(storage, dict):
|
|
|
|
|
current_list = storage.get("0") or storage.get(0)
|
|
|
|
|
if isinstance(current_list, list):
|
2025-12-29 17:05:03 -08:00
|
|
|
return [
|
2025-12-29 18:42:02 -08:00
|
|
|
str(t) for t in current_list
|
|
|
|
|
if isinstance(t, (str, bytes)) and str(t).strip()
|
2025-12-29 17:05:03 -08:00
|
|
|
]
|
|
|
|
|
|
2025-12-11 19:04:02 -08:00
|
|
|
return []
|