Files
Medios-Macina/Store/HydrusNetwork.py

2426 lines
100 KiB
Python
Raw Normal View History

2025-12-11 19:04:02 -08:00
from __future__ import annotations
import re
import sys
2026-01-04 02:23:50 -08:00
import tempfile
import shutil
2025-12-11 19:04:02 -08:00
from pathlib import Path
2026-01-24 09:11:05 -08:00
from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple
2025-12-11 19:04:02 -08:00
2026-01-04 02:23:50 -08:00
from urllib.parse import quote
2025-12-13 12:09:50 -08:00
import httpx
2026-02-11 19:06:38 -08:00
from API.httpx_shared import get_shared_httpx_client
2025-12-13 12:09:50 -08:00
2025-12-11 19:04:02 -08:00
from SYS.logger import debug, log
from SYS.utils_constant import mime_maps
2026-02-07 14:58:13 -08:00
_KNOWN_EXTS = {
str(info.get("ext") or "").strip().lstrip(".")
for category in mime_maps.values()
for info in category.values()
if isinstance(info, dict) and info.get("ext")
}
def _resolve_ext_from_meta(meta: Dict[str, Any], mime_type: Optional[str]) -> str:
2026-02-08 01:35:44 -08:00
ext = ""
for key in ("ext", "file_ext", "extension", "file_extension"):
raw = meta.get(key)
if raw:
ext = str(raw).strip().lstrip(".")
break
2026-02-07 14:58:13 -08:00
if ext and ext not in _KNOWN_EXTS:
ext = ""
if ext.lower() == "ebook":
ext = ""
2026-02-08 01:35:44 -08:00
if not ext:
filetype_human = (
meta.get("filetype_human")
or meta.get("mime_human")
or meta.get("mime_string")
or meta.get("filetype")
)
ft = str(filetype_human or "").strip().lstrip(".").lower()
if ft and ft != "unknown filetype":
if ft.isalnum() and len(ft) <= 8:
ext = ft
else:
try:
for token in re.findall(r"[a-z0-9]+", ft):
if token in _KNOWN_EXTS:
ext = token
break
except Exception:
pass
if not ext:
if not mime_type or not isinstance(mime_type, str) or "/" not in mime_type:
mime_type = meta.get("mime_string") or meta.get("mime_human") or meta.get("filetype_mime") or mime_type
2026-02-07 14:58:13 -08:00
if not ext and mime_type:
2026-02-08 01:35:44 -08:00
try:
mime_type = str(mime_type).split(";", 1)[0].strip().lower()
except Exception:
mime_type = str(mime_type)
2026-02-07 14:58:13 -08:00
for category in mime_maps.values():
for _ext_key, info in category.items():
if mime_type in info.get("mimes", []):
ext = str(info.get("ext", "")).strip().lstrip(".")
break
if ext:
break
return ext
2025-12-11 23:21:45 -08:00
from Store._base import Store
2025-12-11 19:04:02 -08:00
_HYDRUS_INIT_CHECK_CACHE: dict[tuple[str,
str],
tuple[bool,
Optional[str]]] = {}
2025-12-13 12:09:50 -08:00
2025-12-11 23:21:45 -08:00
class HydrusNetwork(Store):
2025-12-11 19:04:02 -08:00
"""File storage backend for Hydrus client.
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Each instance represents a specific Hydrus client connection.
2025-12-13 12:09:50 -08:00
Maintains its own HydrusClient.
2025-12-11 19:04:02 -08:00
"""
2025-12-13 00:18:30 -08:00
2026-01-11 03:24:49 -08:00
@classmethod
2026-01-19 06:24:09 -08:00
def config_schema(cls) -> List[Dict[str, Any]]:
2026-01-11 03:24:49 -08:00
return [
{
"key": "NAME",
"label": "Store Name",
"default": "",
"placeholder": "e.g. home_hydrus",
"required": True
},
{
"key": "URL",
"label": "Hydrus URL",
"default": "http://127.0.0.1:45869",
"placeholder": "http://127.0.0.1:45869",
"required": True
},
{
"key": "API",
"label": "API Key",
"default": "",
"required": True,
"secret": True
}
]
2026-01-31 23:41:47 -08:00
@property
def is_remote(self) -> bool:
return True
@property
def prefer_defer_tags(self) -> bool:
return True
2025-12-16 23:23:43 -08:00
def _log_prefix(self) -> str:
store_name = getattr(self, "NAME", None) or "unknown"
return f"[hydrusnetwork:{store_name}]"
2026-02-07 14:58:13 -08:00
def _append_access_key(self, url: str) -> str:
if not url:
return url
if "access_key=" in url:
return url
if not getattr(self, "API", None):
return url
separator = "&" if "?" in url else "?"
return f"{url}{separator}access_key={quote(str(self.API))}"
2025-12-13 00:18:30 -08:00
def __new__(cls, *args: Any, **kwargs: Any) -> "HydrusNetwork":
instance = super().__new__(cls)
name = kwargs.get("NAME")
api = kwargs.get("API")
url = kwargs.get("URL")
if name is not None:
setattr(instance, "NAME", str(name))
if api is not None:
setattr(instance, "API", str(api))
if url is not None:
setattr(instance, "URL", str(url))
return instance
def __init__(
self,
instance_name: Optional[str] = None,
api_key: Optional[str] = None,
url: Optional[str] = None,
*,
NAME: Optional[str] = None,
API: Optional[str] = None,
URL: Optional[str] = None,
) -> None:
2025-12-11 19:04:02 -08:00
"""Initialize Hydrus storage backend.
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Args:
instance_name: Name of this Hydrus instance (e.g., 'home', 'work')
api_key: Hydrus Client API access key
url: Hydrus client URL (e.g., 'http://192.168.1.230:45869')
"""
2025-12-11 23:21:45 -08:00
from API.HydrusNetwork import HydrusNetwork as HydrusClient
2025-12-13 00:18:30 -08:00
if instance_name is None and NAME is not None:
instance_name = str(NAME)
if api_key is None and API is not None:
api_key = str(API)
if url is None and URL is not None:
url = str(URL)
if not instance_name or not api_key or not url:
raise ValueError("HydrusNetwork requires NAME, API, and URL")
2025-12-29 17:05:03 -08:00
2025-12-13 00:18:30 -08:00
self.NAME = instance_name
self.API = api_key
2025-12-13 12:09:50 -08:00
self.URL = url.rstrip("/")
# Total count (best-effort, used for startup diagnostics)
self.total_count: Optional[int] = None
# Self health-check: validate the URL is reachable and the access key is accepted.
# This MUST NOT attempt to acquire a session key.
cache_key = (self.URL, self.API)
cached = _HYDRUS_INIT_CHECK_CACHE.get(cache_key)
if cached is not None:
ok, err = cached
if not ok:
raise RuntimeError(
f"Hydrus '{self.NAME}' unavailable: {err or 'Unavailable'}"
)
2025-12-13 12:09:50 -08:00
else:
api_version_url = f"{self.URL}/api_version"
verify_key_url = f"{self.URL}/verify_access_key"
try:
2026-02-11 19:06:38 -08:00
client = get_shared_httpx_client(timeout=5.0, verify_ssl=False)
version_resp = client.get(api_version_url, follow_redirects=True)
version_resp.raise_for_status()
version_payload = version_resp.json()
if not isinstance(version_payload, dict):
raise RuntimeError(
"Hydrus /api_version returned an unexpected response"
)
2025-12-13 12:09:50 -08:00
2026-02-11 19:06:38 -08:00
verify_resp = client.get(
verify_key_url,
headers={
"Hydrus-Client-API-Access-Key": self.API
},
follow_redirects=True,
)
verify_resp.raise_for_status()
verify_payload = verify_resp.json()
if not isinstance(verify_payload, dict):
raise RuntimeError(
"Hydrus /verify_access_key returned an unexpected response"
2025-12-13 12:09:50 -08:00
)
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (True, None)
except Exception as exc:
err = str(exc)
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (False, err)
raise RuntimeError(f"Hydrus '{self.NAME}' unavailable: {err}") from exc
2025-12-11 19:04:02 -08:00
2025-12-13 12:09:50 -08:00
# Create a persistent client for this instance (auth via access key by default).
self._client = HydrusClient(
url=self.URL,
access_key=self.API,
instance_name=self.NAME
)
2025-12-13 12:09:50 -08:00
2026-01-07 05:09:59 -08:00
self._service_key_cache: Dict[str, Optional[str]] = {}
2025-12-17 03:16:41 -08:00
# Best-effort total count (used for startup diagnostics). Avoid heavy payloads.
# Some Hydrus setups appear to return no count via the CBOR client for this endpoint,
# so prefer a direct JSON request with a short timeout.
2026-01-10 17:30:18 -08:00
# NOTE: Disabled to avoid unnecessary API call during init; count will be retrieved on first search/list if needed.
# try:
# self.get_total_count(refresh=True)
# except Exception:
# pass
2025-12-17 03:16:41 -08:00
2026-01-07 05:09:59 -08:00
def _get_service_key(self, service_name: str, *, refresh: bool = False) -> Optional[str]:
"""Resolve (and cache) the Hydrus service key for the given service name."""
normalized = str(service_name or "my tags").strip()
if not normalized:
normalized = "my tags"
cache_key = normalized.lower()
if not refresh and cache_key in self._service_key_cache:
return self._service_key_cache[cache_key]
client = self._client
if client is None:
self._service_key_cache[cache_key] = None
return None
try:
from API import HydrusNetwork as hydrus_wrapper
resolved = hydrus_wrapper.get_tag_service_key(client, normalized)
except Exception:
resolved = None
self._service_key_cache[cache_key] = resolved
return resolved
2025-12-17 03:16:41 -08:00
def get_total_count(self, *, refresh: bool = False) -> Optional[int]:
"""Best-effort total file count for this Hydrus instance.
Intended for diagnostics (e.g., REPL startup checks). This should be fast,
and it MUST NOT raise.
"""
if self.total_count is not None and not refresh:
return self.total_count
# 1) Prefer a direct JSON request (fast + avoids CBOR edge cases).
try:
import json as _json
url = f"{self.URL}/get_files/search_files"
params = {
"tags": _json.dumps(["system:everything"]),
"return_hashes": "false",
"return_file_ids": "false",
"return_file_count": "true",
}
headers = {
"Hydrus-Client-API-Access-Key": self.API,
"Accept": "application/json",
}
2026-02-11 19:06:38 -08:00
client = get_shared_httpx_client(timeout=5.0, verify_ssl=False)
resp = client.get(url, params=params, headers=headers, follow_redirects=True)
resp.raise_for_status()
payload = resp.json()
2025-12-17 03:16:41 -08:00
count_val = None
if isinstance(payload, dict):
count_val = payload.get("file_count")
if count_val is None:
count_val = payload.get("file_count_inclusive")
if count_val is None:
count_val = payload.get("num_files")
if isinstance(count_val, int):
self.total_count = count_val
return self.total_count
except Exception as exc:
debug(
f"{self._log_prefix()} total count (json) unavailable: {exc}",
file=sys.stderr
)
2025-12-17 03:16:41 -08:00
# 2) Fallback to the API client (CBOR).
2025-12-13 00:18:30 -08:00
try:
2025-12-13 12:09:50 -08:00
payload = self._client.search_files(
tags=["system:everything"],
return_hashes=False,
return_file_ids=False,
return_file_count=True,
)
count_val = None
if isinstance(payload, dict):
count_val = payload.get("file_count")
if count_val is None:
count_val = payload.get("file_count_inclusive")
if count_val is None:
count_val = payload.get("num_files")
if isinstance(count_val, int):
self.total_count = count_val
2025-12-17 03:16:41 -08:00
return self.total_count
2025-12-13 00:18:30 -08:00
except Exception as exc:
debug(
f"{self._log_prefix()} total count (client) unavailable: {exc}",
file=sys.stderr
)
2025-12-17 03:16:41 -08:00
return self.total_count
2025-12-13 00:18:30 -08:00
2025-12-11 19:04:02 -08:00
def name(self) -> str:
2025-12-13 00:18:30 -08:00
return self.NAME
2025-12-11 19:04:02 -08:00
def get_name(self) -> str:
2025-12-13 00:18:30 -08:00
return self.NAME
2025-12-11 19:04:02 -08:00
2026-01-02 02:28:59 -08:00
def set_relationship(self, alt_hash: str, king_hash: str, kind: str = "alt") -> bool:
"""Persist a relationship via the Hydrus client API for this backend instance."""
try:
alt_norm = str(alt_hash or "").strip().lower()
king_norm = str(king_hash or "").strip().lower()
if len(alt_norm) != 64 or len(king_norm) != 64 or alt_norm == king_norm:
return False
client = getattr(self, "_client", None)
if client is None or not hasattr(client, "set_relationship"):
return False
client.set_relationship(alt_norm, king_norm, str(kind or "alt"))
return True
except Exception:
return False
2026-02-11 20:25:22 -08:00
@staticmethod
def _has_current_file_service(meta: Dict[str, Any]) -> bool:
services = meta.get("file_services")
if not isinstance(services, dict):
return False
current = services.get("current")
if isinstance(current, dict):
return any(bool(v) for v in current.values())
if isinstance(current, list):
return len(current) > 0
return False
2025-12-11 19:04:02 -08:00
def add_file(self, file_path: Path, **kwargs: Any) -> str:
"""Upload file to Hydrus with full metadata support.
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Args:
file_path: Path to the file to upload
2025-12-11 23:21:45 -08:00
tag: Optional list of tag values to add
2025-12-11 19:04:02 -08:00
url: Optional list of url to associate with the file
title: Optional title (will be added as 'title:value' tag)
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Returns:
File hash from Hydrus
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Raises:
Exception: If upload fails
"""
from SYS.utils import sha256_file
2025-12-11 23:21:45 -08:00
tag_list = kwargs.get("tag", [])
2025-12-11 19:04:02 -08:00
url = kwargs.get("url", [])
title = kwargs.get("title")
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
# Add title to tags if provided and not already present
if title:
2025-12-20 23:57:44 -08:00
title_tag = f"title:{title}".strip().lower()
if not any(str(candidate).lower().startswith("title:")
for candidate in tag_list):
2025-12-11 23:21:45 -08:00
tag_list = [title_tag] + list(tag_list)
2025-12-11 19:04:02 -08:00
2025-12-20 23:57:44 -08:00
# Hydrus is lowercase-only tags; normalize here for consistency.
2025-12-29 17:05:03 -08:00
tag_list = [
str(t).strip().lower() for t in (tag_list or [])
2025-12-29 17:05:03 -08:00
if isinstance(t, str) and str(t).strip()
]
2025-12-20 23:57:44 -08:00
2025-12-11 19:04:02 -08:00
try:
2026-01-15 00:45:42 -08:00
# Compute file hash (or use hint from kwargs to avoid redundant IO)
file_hash = kwargs.get("hash") or kwargs.get("file_hash")
if not file_hash:
file_hash = sha256_file(file_path)
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} file hash: {file_hash}")
2025-12-11 19:04:02 -08:00
# Use persistent client with session key
client = self._client
if client is None:
raise Exception("Hydrus client unavailable")
2026-02-11 20:25:22 -08:00
# Check if file already exists in Hydrus.
# IMPORTANT: some Hydrus deployments can return a metadata record (file_id)
# even when the file is not in any current file service (e.g. trashed/missing).
# Only treat as a real duplicate if it is in a current file service.
2025-12-11 19:04:02 -08:00
file_exists = False
try:
2025-12-16 23:23:43 -08:00
metadata = client.fetch_file_metadata(
hashes=[file_hash],
include_service_keys_to_tags=False,
2026-02-11 20:25:22 -08:00
include_file_services=True,
include_is_trashed=True,
2026-01-11 02:26:39 -08:00
include_file_url=True,
2025-12-16 23:23:43 -08:00
include_duration=False,
2026-02-11 20:25:22 -08:00
include_size=True,
include_mime=True,
2025-12-16 23:23:43 -08:00
)
2025-12-11 19:04:02 -08:00
if metadata and isinstance(metadata, dict):
2025-12-16 23:23:43 -08:00
metas = metadata.get("metadata", [])
if isinstance(metas, list) and metas:
# Hydrus returns placeholder rows for unknown hashes.
2026-02-11 20:25:22 -08:00
# Only treat as a real duplicate if it has a concrete file_id AND
# appears in a current file service.
2025-12-16 23:23:43 -08:00
for meta in metas:
2026-02-11 20:25:22 -08:00
if not isinstance(meta, dict):
continue
if meta.get("file_id") is None:
continue
# Preferred: use file_services.current.
if isinstance(meta.get("file_services"), dict):
if self._has_current_file_service(meta):
file_exists = True
break
continue
# Fallback: if Hydrus doesn't return file_services, only treat as
# existing when the metadata looks like a real file (non-zero size).
size_val = meta.get("size")
if size_val is None:
size_val = meta.get("size_bytes")
try:
size_int = int(size_val) if size_val is not None else 0
except Exception:
size_int = 0
if size_int > 0:
2025-12-16 23:23:43 -08:00
file_exists = True
break
if file_exists:
2026-01-02 02:28:59 -08:00
debug(
f"{self._log_prefix()} Duplicate detected - file already in Hydrus with hash: {file_hash}"
2025-12-16 23:23:43 -08:00
)
2026-01-18 13:10:31 -08:00
except Exception as exc:
debug(f"{self._log_prefix()} metadata fetch failed: {exc}")
2025-12-11 19:04:02 -08:00
2025-12-16 23:23:43 -08:00
# If Hydrus reports an existing file, it may be in trash. Best-effort restore it to 'my files'.
2026-02-11 20:25:22 -08:00
# Then re-check that it is actually in a current file service; if not, we'll proceed to upload.
2025-12-16 23:23:43 -08:00
if file_exists:
try:
client.undelete_files([file_hash])
except Exception:
pass
2026-02-11 20:25:22 -08:00
try:
metadata2 = client.fetch_file_metadata(
hashes=[file_hash],
include_service_keys_to_tags=False,
include_file_services=True,
include_is_trashed=True,
include_file_url=False,
include_duration=False,
include_size=False,
include_mime=False,
)
metas2 = metadata2.get("metadata", []) if isinstance(metadata2, dict) else []
if isinstance(metas2, list) and metas2:
still_current = False
for meta in metas2:
if not isinstance(meta, dict):
continue
if meta.get("file_id") is None:
continue
if isinstance(meta.get("file_services"), dict):
if self._has_current_file_service(meta):
still_current = True
break
continue
size_val = meta.get("size")
if size_val is None:
size_val = meta.get("size_bytes")
try:
size_int = int(size_val) if size_val is not None else 0
except Exception:
size_int = 0
if size_int > 0:
still_current = True
break
if not still_current:
file_exists = False
except Exception:
# If re-check fails, keep prior behavior (avoid forcing uploads in unknown states)
pass
2025-12-11 19:04:02 -08:00
# Upload file if not already present
if not file_exists:
2026-01-02 02:28:59 -08:00
debug(
f"{self._log_prefix()} Uploading: {file_path.name}"
)
2025-12-11 19:04:02 -08:00
response = client.add_file(file_path)
# Extract hash from response
hydrus_hash: Optional[str] = None
if isinstance(response, dict):
hydrus_hash = response.get("hash") or response.get("file_hash")
if not hydrus_hash:
hashes = response.get("hashes")
if isinstance(hashes, list) and hashes:
hydrus_hash = hashes[0]
2026-02-07 14:32:33 -08:00
if isinstance(hydrus_hash, (bytes, bytearray)):
try:
hydrus_hash = bytes(hydrus_hash).hex()
except Exception:
hydrus_hash = None
if hydrus_hash:
try:
hydrus_hash = str(hydrus_hash).strip().lower()
except Exception:
hydrus_hash = None
if not hydrus_hash or len(str(hydrus_hash)) != 64:
debug(
f"{self._log_prefix()} Hydrus response hash missing/invalid; using precomputed hash"
)
hydrus_hash = file_hash
2025-12-11 19:04:02 -08:00
if not hydrus_hash:
raise Exception(f"Hydrus response missing file hash: {response}")
file_hash = hydrus_hash
2026-01-02 02:28:59 -08:00
debug(f"{self._log_prefix()} hash: {file_hash}")
2025-12-11 19:04:02 -08:00
# Add tags if provided (both for new and existing files)
2025-12-11 23:21:45 -08:00
if tag_list:
2025-12-11 19:04:02 -08:00
try:
# Use default tag service
service_name = "my tags"
except Exception:
service_name = "my tags"
try:
debug(
f"{self._log_prefix()} Adding {len(tag_list)} tag(s): {tag_list}"
)
2025-12-11 23:21:45 -08:00
client.add_tag(file_hash, tag_list, service_name)
2026-01-02 02:28:59 -08:00
debug(
f"{self._log_prefix()} Tags added via '{service_name}'"
)
2025-12-11 19:04:02 -08:00
except Exception as exc:
log(
f"{self._log_prefix()} ⚠️ Failed to add tags: {exc}",
file=sys.stderr
)
2025-12-11 19:04:02 -08:00
# Associate url if provided (both for new and existing files)
if url:
2026-01-02 02:28:59 -08:00
debug(
f"{self._log_prefix()} Associating {len(url)} URL(s) with file"
2025-12-29 17:05:03 -08:00
)
2025-12-11 19:04:02 -08:00
for url in url:
if url:
try:
client.associate_url(file_hash, str(url))
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} Associated URL: {url}")
2025-12-11 19:04:02 -08:00
except Exception as exc:
2025-12-29 17:05:03 -08:00
log(
f"{self._log_prefix()} ⚠️ Failed to associate URL {url}: {exc}",
file=sys.stderr,
)
2025-12-11 19:04:02 -08:00
return file_hash
except Exception as exc:
2025-12-16 23:23:43 -08:00
log(f"{self._log_prefix()} ❌ upload failed: {exc}", file=sys.stderr)
2025-12-11 19:04:02 -08:00
raise
2025-12-11 23:21:45 -08:00
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
2025-12-11 19:04:02 -08:00
"""Search Hydrus database for files matching query.
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Args:
query: Search query (tags, filenames, hashes, etc.)
limit: Maximum number of results to return (default: 100)
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Returns:
List of dicts with 'name', 'hash', 'size', 'tags' fields
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Example:
results = storage["hydrus"].search("artist:john_doe music")
results = storage["hydrus"].search("Simple Man")
"""
limit = kwargs.get("limit", 100)
2026-01-24 01:38:12 -08:00
minimal = bool(kwargs.get("minimal", False))
2026-01-24 09:11:05 -08:00
url_only = bool(kwargs.get("url_only", False))
2025-12-11 19:04:02 -08:00
try:
client = self._client
if client is None:
raise Exception("Hydrus client unavailable")
2025-12-16 23:23:43 -08:00
prefix = self._log_prefix()
debug(f"{prefix} Searching for: {query}")
2025-12-11 19:04:02 -08:00
2025-12-14 00:53:52 -08:00
def _extract_urls(meta_obj: Any) -> list[str]:
if not isinstance(meta_obj, dict):
return []
2026-01-16 01:47:00 -08:00
raw = meta_obj.get("known_urls")
if raw is None:
raw = meta_obj.get("url")
2025-12-14 00:53:52 -08:00
if raw is None:
raw = meta_obj.get("urls")
if isinstance(raw, str):
val = raw.strip()
return [val] if val else []
if isinstance(raw, list):
out: list[str] = []
for item in raw:
if not isinstance(item, str):
continue
s = item.strip()
if s:
out.append(s)
return out
return []
2026-01-16 01:47:00 -08:00
def _extract_search_ids(payload: Any) -> tuple[list[int], list[str]]:
if not isinstance(payload, dict):
return [], []
raw_ids = payload.get("file_ids", [])
raw_hashes = payload.get("hashes", [])
ids_out: list[int] = []
hashes_out: list[str] = []
if isinstance(raw_ids, list):
for item in raw_ids:
try:
if isinstance(item, (int, float)):
ids_out.append(int(item))
continue
if isinstance(item, str) and item.strip().isdigit():
ids_out.append(int(item.strip()))
except Exception:
continue
if isinstance(raw_hashes, list):
for item in raw_hashes:
try:
candidate = str(item or "").strip().lower()
if candidate:
hashes_out.append(candidate)
except Exception:
continue
return ids_out, hashes_out
2025-12-29 17:05:03 -08:00
def _iter_url_filtered_metadata(
url_value: str | None,
want_any: bool,
2026-01-16 01:47:00 -08:00
fetch_limit: int,
2026-01-17 21:32:44 -08:00
scan_limit: int | None = None,
needles: Optional[Sequence[str]] = None,
2026-01-24 01:38:12 -08:00
*,
minimal: bool = False,
2026-01-16 01:47:00 -08:00
) -> list[dict[str, Any]]:
2025-12-14 00:53:52 -08:00
"""Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
candidate_file_ids: list[int] = []
2026-01-16 01:47:00 -08:00
candidate_hashes: list[str] = []
seen_file_ids: set[int] = set()
seen_hashes: set[str] = set()
def _add_candidates(ids: list[int], hashes: list[str]) -> None:
for fid in ids:
if fid in seen_file_ids:
continue
seen_file_ids.add(fid)
candidate_file_ids.append(fid)
for hh in hashes:
if hh in seen_hashes:
continue
seen_hashes.add(hh)
candidate_hashes.append(hh)
predicate_supported = getattr(self, "_has_url_predicate", None)
if predicate_supported is not False:
try:
2025-12-14 00:53:52 -08:00
predicate = "system:has url"
url_search = client.search_files(
tags=[predicate],
2026-01-16 01:47:00 -08:00
return_hashes=True,
return_file_ids=False,
2025-12-14 00:53:52 -08:00
return_file_count=False,
)
2026-01-16 01:47:00 -08:00
ids, hashes = _extract_search_ids(url_search)
_add_candidates(ids, hashes)
self._has_url_predicate = True
except Exception as exc:
try:
from API.HydrusNetwork import HydrusRequestError
if isinstance(exc, HydrusRequestError) and getattr(exc, "status", None) == 400:
self._has_url_predicate = False
except Exception:
pass
2025-12-14 00:53:52 -08:00
2026-01-16 01:47:00 -08:00
if not candidate_file_ids and not candidate_hashes:
2025-12-14 00:53:52 -08:00
everything = client.search_files(
tags=["system:everything"],
2026-01-16 01:47:00 -08:00
return_hashes=True,
return_file_ids=False,
2025-12-14 00:53:52 -08:00
return_file_count=False,
)
2026-01-16 01:47:00 -08:00
ids, hashes = _extract_search_ids(everything)
_add_candidates(ids, hashes)
if not candidate_file_ids and not candidate_hashes:
2025-12-14 00:53:52 -08:00
return []
2026-01-17 21:32:44 -08:00
needle_list: list[str] = []
if isinstance(needles, (list, tuple, set)):
for item in needles:
text = str(item or "").strip().lower()
if text and text not in needle_list:
needle_list.append(text)
if not needle_list:
needle = (url_value or "").strip().lower()
if needle:
needle_list = [needle]
2025-12-14 00:53:52 -08:00
chunk_size = 200
out: list[dict[str, Any]] = []
2026-01-16 01:47:00 -08:00
if scan_limit is None:
2025-12-14 00:53:52 -08:00
try:
2026-01-17 21:32:44 -08:00
if not want_any and needle_list:
if len(needle_list) > 1:
scan_limit = max(int(fetch_limit) * 20, 2000)
else:
scan_limit = max(200, min(int(fetch_limit), 400))
2026-01-16 01:47:00 -08:00
else:
scan_limit = max(int(fetch_limit) * 5, 1000)
2025-12-14 00:53:52 -08:00
except Exception:
2026-01-17 21:32:44 -08:00
scan_limit = 400 if (not want_any and needle_list) else 1000
2026-01-16 01:47:00 -08:00
if scan_limit is not None:
scan_limit = min(int(scan_limit), 10000)
scanned = 0
def _process_source(items: list[Any], kind: str) -> None:
nonlocal scanned
for start in range(0, len(items), chunk_size):
if len(out) >= fetch_limit:
return
if scan_limit is not None and scanned >= scan_limit:
return
chunk = items[start:start + chunk_size]
if scan_limit is not None:
remaining = scan_limit - scanned
if remaining <= 0:
return
if len(chunk) > remaining:
chunk = chunk[:remaining]
scanned += len(chunk)
try:
if kind == "hashes":
payload = client.fetch_file_metadata(
hashes=chunk,
include_file_url=True,
2026-01-24 01:38:12 -08:00
include_service_keys_to_tags=not minimal,
include_duration=not minimal,
include_size=not minimal,
include_mime=not minimal,
2026-01-16 01:47:00 -08:00
)
else:
payload = client.fetch_file_metadata(
file_ids=chunk,
include_file_url=True,
2026-01-24 01:38:12 -08:00
include_service_keys_to_tags=not minimal,
include_duration=not minimal,
include_size=not minimal,
include_mime=not minimal,
2026-01-16 01:47:00 -08:00
)
except Exception:
2025-12-14 00:53:52 -08:00
continue
2026-01-16 01:47:00 -08:00
metas = payload.get("metadata",
[]) if isinstance(payload,
dict) else []
if not isinstance(metas, list):
2025-12-14 00:53:52 -08:00
continue
2026-01-16 01:47:00 -08:00
for meta in metas:
2025-12-14 00:53:52 -08:00
if len(out) >= fetch_limit:
break
2026-01-16 01:47:00 -08:00
if not isinstance(meta, dict):
continue
urls = _extract_urls(meta)
if not urls:
continue
if want_any:
out.append(meta)
continue
2026-01-17 21:32:44 -08:00
if not needle_list:
2026-01-16 01:47:00 -08:00
continue
2026-01-17 21:32:44 -08:00
if any(any(n in u.lower() for n in needle_list) for u in urls):
2026-01-16 01:47:00 -08:00
out.append(meta)
continue
sources: list[tuple[str, list[Any]]] = []
if candidate_hashes:
sources.append(("hashes", candidate_hashes))
elif candidate_file_ids:
sources.append(("file_ids", candidate_file_ids))
for kind, items in sources:
if len(out) >= fetch_limit:
break
_process_source(items, kind)
2025-12-14 00:53:52 -08:00
return out
2026-01-24 09:11:05 -08:00
def _search_url_query_metadata(
url_query: str,
fetch_limit: int,
*,
minimal: bool = False,
) -> list[dict[str, Any]]:
"""Run a strict url:<pattern> search without falling back to system predicates."""
if not url_query:
return []
try:
payload = client.search_files(
tags=[url_query],
return_hashes=True,
return_file_ids=True,
)
except Exception:
return []
candidate_ids, candidate_hashes = _extract_search_ids(payload)
if not candidate_ids and not candidate_hashes:
return []
metas_out: list[dict[str, Any]] = []
chunk_size = 200
def _fetch_chunk(kind: Literal["file_ids", "hashes"], values: list[Any]) -> None:
nonlocal metas_out
if not values or len(metas_out) >= fetch_limit:
return
for start in range(0, len(values), chunk_size):
if len(metas_out) >= fetch_limit:
break
remaining = fetch_limit - len(metas_out)
if remaining <= 0:
break
end = start + min(chunk_size, remaining)
chunk = values[start:end]
if not chunk:
continue
try:
if kind == "file_ids":
metadata = client.fetch_file_metadata(
file_ids=chunk,
include_file_url=True,
include_service_keys_to_tags=False,
include_duration=False,
include_size=not minimal,
include_mime=False,
)
else:
metadata = client.fetch_file_metadata(
hashes=chunk,
include_file_url=True,
include_service_keys_to_tags=False,
include_duration=False,
include_size=not minimal,
include_mime=False,
)
except Exception:
continue
fetched = metadata.get("metadata", []) if isinstance(metadata, dict) else []
if not isinstance(fetched, list):
continue
for meta in fetched:
if len(metas_out) >= fetch_limit:
break
if not isinstance(meta, dict):
continue
metas_out.append(meta)
if candidate_ids:
_fetch_chunk("file_ids", candidate_ids)
if len(metas_out) < fetch_limit and candidate_hashes:
_fetch_chunk("hashes", candidate_hashes)
return metas_out[:fetch_limit]
2025-12-14 00:53:52 -08:00
query_lower = query.lower().strip()
2025-12-20 23:57:44 -08:00
# Support `ext:<value>` anywhere in the query. We filter results by the
# Hydrus metadata extension field.
def _normalize_ext_filter(value: str) -> str:
2025-12-29 17:05:03 -08:00
v = str(value or "").strip().lower().lstrip(".")
2025-12-20 23:57:44 -08:00
v = "".join(ch for ch in v if ch.isalnum())
return v
ext_filter: str | None = None
ext_only: bool = False
try:
m = re.search(r"\bext:([^\s,]+)", query_lower)
if not m:
m = re.search(r"\bextension:([^\s,]+)", query_lower)
if m:
ext_filter = _normalize_ext_filter(m.group(1)) or None
query_lower = re.sub(
r"\s*\b(?:ext|extension):[^\s,]+",
" ",
query_lower
)
2025-12-29 17:05:03 -08:00
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(",")
2025-12-20 23:57:44 -08:00
query = query_lower
if ext_filter and not query_lower:
query = "*"
query_lower = "*"
ext_only = True
except Exception:
ext_filter = None
ext_only = False
# Split into meaningful terms for AND logic.
# Avoid punctuation tokens like '-' that would make matching brittle.
search_terms = [t for t in re.findall(r"[a-z0-9]+", query_lower) if t]
2025-12-14 00:53:52 -08:00
# Special case: url:* and url:<value>
metadata_list: list[dict[str, Any]] | None = None
2026-01-17 21:32:44 -08:00
pattern_hint_raw = kwargs.get("pattern_hint")
pattern_hints: list[str] = []
if isinstance(pattern_hint_raw, (list, tuple, set)):
for item in pattern_hint_raw:
text = str(item or "").strip().lower()
if text and text not in pattern_hints:
pattern_hints.append(text)
elif isinstance(pattern_hint_raw, str):
text = pattern_hint_raw.strip().lower()
if text:
pattern_hints.append(text)
pattern_hint = pattern_hints[0] if pattern_hints else ""
2026-01-19 06:24:09 -08:00
hashes: list[str] = []
file_ids: list[int] = []
2025-12-14 00:53:52 -08:00
if ":" in query_lower and not query_lower.startswith(":"):
namespace, pattern = query_lower.split(":", 1)
namespace = namespace.strip().lower()
pattern = pattern.strip()
if namespace == "url":
2026-01-24 09:11:05 -08:00
try:
fetch_limit_raw = int(limit) if limit else 100
except Exception:
fetch_limit_raw = 100
if url_only:
metadata_list = _search_url_query_metadata(
query_lower,
fetch_limit_raw,
minimal=minimal,
)
2025-12-14 00:53:52 -08:00
else:
2026-01-24 09:11:05 -08:00
if not pattern or pattern == "*":
if pattern_hints:
metadata_list = _iter_url_filtered_metadata(
None,
want_any=False,
fetch_limit=fetch_limit_raw,
needles=pattern_hints,
minimal=minimal,
2025-12-29 17:05:03 -08:00
)
2026-01-24 09:11:05 -08:00
else:
metadata_list = _iter_url_filtered_metadata(
None,
want_any=True,
fetch_limit=fetch_limit_raw,
minimal=minimal,
)
else:
def _clean_url_search_token(value: str | None) -> str:
token = str(value or "").strip().lower()
if not token:
return ""
return token.replace("*", "").replace("?", "")
# Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
try:
if pattern.startswith("http://") or pattern.startswith(
"https://"):
from API.HydrusNetwork import HydrusRequestSpec
spec = HydrusRequestSpec(
method="GET",
endpoint="/add_urls/get_url_files",
query={
"url": pattern
},
2025-12-29 17:05:03 -08:00
)
2026-01-24 09:11:05 -08:00
response = client._perform_request(
spec
) # type: ignore[attr-defined]
hashes = []
file_ids = []
if isinstance(response, dict):
raw_hashes = response.get("hashes") or response.get(
"file_hashes"
)
if isinstance(raw_hashes, list):
hashes = [
str(h).strip() for h in raw_hashes
if isinstance(h, str) and str(h).strip()
]
raw_ids = response.get("file_ids")
if isinstance(raw_ids, list):
for item in raw_ids:
try:
file_ids.append(int(item))
except (TypeError, ValueError):
continue
if file_ids:
payload = client.fetch_file_metadata(
file_ids=file_ids,
include_file_url=True,
include_service_keys_to_tags=not minimal,
include_duration=not minimal,
include_size=not minimal,
include_mime=not minimal,
)
metas = (
payload.get("metadata",
[]) if isinstance(payload,
dict) else []
)
if isinstance(metas, list):
metadata_list = [
m for m in metas if isinstance(m, dict)
]
elif hashes:
payload = client.fetch_file_metadata(
hashes=hashes,
include_file_url=True,
include_service_keys_to_tags=not minimal,
include_duration=not minimal,
include_size=not minimal,
include_mime=not minimal,
)
metas = (
payload.get("metadata",
[]) if isinstance(payload,
dict) else []
)
if isinstance(metas, list):
metadata_list = [
m for m in metas if isinstance(m, dict)
]
except Exception:
metadata_list = None
# Fallback: substring scan
if metadata_list is None:
search_token = _clean_url_search_token(pattern_hint or pattern)
scan_limit_override: int | None = None
if search_token:
is_domain_only = ("://" not in search_token and "/" not in search_token)
if is_domain_only:
try:
scan_limit_override = max(fetch_limit_raw * 20, 2000)
except Exception:
scan_limit_override = 2000
metadata_list = _iter_url_filtered_metadata(
search_token,
want_any=False,
fetch_limit=fetch_limit_raw,
scan_limit=scan_limit_override,
needles=pattern_hints if pattern_hints else None,
minimal=minimal,
)
2026-01-23 19:21:06 -08:00
elif namespace == "system":
normalized_system_predicate = pattern.strip()
if normalized_system_predicate == "has url":
try:
fetch_limit = int(limit) if limit else 100
except Exception:
fetch_limit = 100
metadata_list = _iter_url_filtered_metadata(
None,
want_any=not bool(pattern_hints),
fetch_limit=fetch_limit,
needles=pattern_hints if pattern_hints else None,
2026-01-24 01:38:12 -08:00
minimal=minimal,
2026-01-23 19:21:06 -08:00
)
2025-12-14 00:53:52 -08:00
2025-12-11 19:04:02 -08:00
# Parse the query into tags
# "*" means "match all" - use system:everything tag in Hydrus
2025-12-20 23:57:44 -08:00
# If query has explicit namespace, use it as a tag search.
# If query is free-form, search BOTH:
# - title:*term* (title: is the only namespace searched implicitly)
# - *term* (freeform tags; we will filter out other namespace matches client-side)
tags: list[str] = []
freeform_union_search: bool = False
title_predicates: list[str] = []
freeform_predicates: list[str] = []
2025-12-11 19:04:02 -08:00
if query.strip() == "*":
tags = ["system:everything"]
2025-12-29 17:05:03 -08:00
elif ":" in query_lower:
2025-12-20 23:57:44 -08:00
tags = [query_lower]
2025-12-11 19:04:02 -08:00
else:
2025-12-20 23:57:44 -08:00
freeform_union_search = True
if search_terms:
# Hydrus supports wildcard matching primarily as a prefix (e.g., tag*).
# Use per-term prefix matching for both title: and freeform tags.
title_predicates = [f"title:{term}*" for term in search_terms]
freeform_predicates = [f"{term}*" for term in search_terms]
2025-12-11 19:04:02 -08:00
else:
2025-12-20 23:57:44 -08:00
# If we can't extract alnum terms, fall back to the raw query text.
title_predicates = [f"title:{query_lower}*"]
freeform_predicates = [f"{query_lower}*"]
2025-12-29 17:05:03 -08:00
2025-12-14 00:53:52 -08:00
# Search files with the tags (unless url: search already produced metadata)
2026-01-19 06:24:09 -08:00
results: list[dict[str, Any]] = []
2025-12-20 23:57:44 -08:00
2025-12-14 00:53:52 -08:00
if metadata_list is None:
2026-01-19 06:24:09 -08:00
file_ids = []
hashes = []
2025-12-20 23:57:44 -08:00
if freeform_union_search:
if not title_predicates and not freeform_predicates:
debug(f"{prefix} 0 result(s)")
return []
payloads: list[Any] = []
try:
payloads.append(
client.search_files(
tags=title_predicates,
return_hashes=True,
return_file_ids=True,
)
)
except Exception:
pass
2026-02-08 01:35:44 -08:00
# Extra pass: match a full title phrase when the query includes
# spaces or punctuation (e.g., "i've been down").
2025-12-20 23:57:44 -08:00
try:
2026-02-08 01:35:44 -08:00
if query_lower and query_lower != "*" and "*" not in query_lower:
if any(ch in query_lower for ch in (" ", "'", "-", "_")):
payloads.append(
client.search_files(
tags=[f"title:{query_lower}*"],
return_hashes=True,
return_file_ids=True,
)
)
except Exception:
pass
try:
title_ids, title_hashes = _extract_search_ids(
payloads[0] if payloads else None
2025-12-20 23:57:44 -08:00
)
2026-02-08 01:35:44 -08:00
# Optimization: for single-term queries, skip the freeform query
# to avoid duplicate requests.
single_term = bool(search_terms and len(search_terms) == 1)
if not single_term:
payloads.append(
client.search_files(
tags=freeform_predicates,
return_hashes=True,
return_file_ids=True,
)
)
2025-12-20 23:57:44 -08:00
except Exception:
pass
id_set: set[int] = set()
hash_set: set[str] = set()
for payload in payloads:
ids_part, hashes_part = _extract_search_ids(payload)
for fid in ids_part:
id_set.add(fid)
for hh in hashes_part:
hash_set.add(hh)
file_ids = list(id_set)
hashes = list(hash_set)
else:
if not tags:
debug(f"{prefix} 0 result(s)")
return []
search_result = client.search_files(
tags=tags,
return_hashes=True,
return_file_ids=True
2025-12-20 23:57:44 -08:00
)
file_ids, hashes = _extract_search_ids(search_result)
# Fast path: ext-only search. Avoid fetching metadata for an unbounded
# system:everything result set; fetch in chunks until we have enough.
if ext_only and ext_filter:
2026-01-19 06:24:09 -08:00
results = []
2025-12-20 23:57:44 -08:00
if not file_ids and not hashes:
debug(f"{prefix} 0 result(s)")
return []
# Prefer file_ids if available.
if file_ids:
chunk_size = 200
for start in range(0, len(file_ids), chunk_size):
if len(results) >= limit:
break
chunk = file_ids[start:start + chunk_size]
2025-12-20 23:57:44 -08:00
try:
payload = client.fetch_file_metadata(
file_ids=chunk,
include_service_keys_to_tags=True,
2026-01-11 02:26:39 -08:00
include_file_url=True,
2025-12-20 23:57:44 -08:00
include_duration=True,
include_size=True,
include_mime=True,
)
except Exception:
continue
metas = payload.get("metadata",
[]) if isinstance(payload,
dict) else []
2025-12-20 23:57:44 -08:00
if not isinstance(metas, list):
continue
for meta in metas:
if len(results) >= limit:
break
if not isinstance(meta, dict):
continue
mime_type = meta.get("mime")
2026-02-07 14:58:13 -08:00
ext = _resolve_ext_from_meta(meta, mime_type)
2025-12-20 23:57:44 -08:00
if _normalize_ext_filter(ext) != ext_filter:
continue
2025-12-14 00:53:52 -08:00
2025-12-20 23:57:44 -08:00
file_id = meta.get("file_id")
hash_hex = meta.get("hash")
2026-02-11 20:25:22 -08:00
size_val = meta.get("size")
if size_val is None:
size_val = meta.get("size_bytes")
try:
size = int(size_val) if size_val is not None else 0
except Exception:
size = 0
2025-12-20 23:57:44 -08:00
2026-02-11 20:25:22 -08:00
title, all_tags = self._extract_title_and_tags(meta, file_id)
2026-01-11 02:26:39 -08:00
# Use known URLs (source URLs) from Hydrus if available (matches get-url cmdlet)
item_url = meta.get("known_urls") or meta.get("urls") or meta.get("url") or []
if not item_url:
item_url = meta.get("file_url") or f"{self.URL.rstrip('/')}/view_file?hash={hash_hex}"
2026-02-07 14:58:13 -08:00
if isinstance(item_url, str) and "/view_file" in item_url:
item_url = self._append_access_key(item_url)
2026-01-11 02:26:39 -08:00
2025-12-20 23:57:44 -08:00
results.append(
{
"hash": hash_hex,
2026-01-11 02:26:39 -08:00
"url": item_url,
2025-12-20 23:57:44 -08:00
"name": title,
"title": title,
"size": size,
"size_bytes": size,
"store": self.NAME,
"tag": all_tags,
"file_id": file_id,
"mime": mime_type,
2026-02-07 14:58:13 -08:00
"ext": _resolve_ext_from_meta(meta, mime_type),
2025-12-20 23:57:44 -08:00
}
)
debug(f"{prefix} {len(results)} result(s)")
return results[:limit]
# If we only got hashes, fall back to the normal flow below.
2025-12-14 00:53:52 -08:00
if not file_ids and not hashes:
2025-12-16 23:23:43 -08:00
debug(f"{prefix} 0 result(s)")
2025-12-14 00:53:52 -08:00
return []
if file_ids:
2025-12-20 23:57:44 -08:00
metadata = client.fetch_file_metadata(
file_ids=file_ids,
include_service_keys_to_tags=True,
2026-01-11 02:26:39 -08:00
include_file_url=True,
2025-12-20 23:57:44 -08:00
include_duration=True,
include_size=True,
include_mime=True,
)
2025-12-14 00:53:52 -08:00
metadata_list = metadata.get("metadata", [])
elif hashes:
2025-12-20 23:57:44 -08:00
metadata = client.fetch_file_metadata(
hashes=hashes,
include_service_keys_to_tags=True,
2026-01-11 02:26:39 -08:00
include_file_url=True,
2025-12-20 23:57:44 -08:00
include_duration=True,
include_size=True,
include_mime=True,
)
2025-12-14 00:53:52 -08:00
metadata_list = metadata.get("metadata", [])
else:
metadata_list = []
2025-12-20 23:57:44 -08:00
# If our free-text searches produce nothing (or nothing survived downstream filtering), fallback to scanning.
if (not metadata_list) and (query_lower
!= "*") and (":" not in query_lower):
2025-12-20 23:57:44 -08:00
try:
search_result = client.search_files(
tags=["system:everything"],
return_hashes=True,
return_file_ids=True,
)
file_ids, hashes = _extract_search_ids(search_result)
if file_ids:
metadata = client.fetch_file_metadata(
file_ids=file_ids,
include_service_keys_to_tags=True,
2026-01-11 02:26:39 -08:00
include_file_url=True,
2025-12-20 23:57:44 -08:00
include_duration=True,
include_size=True,
include_mime=True,
)
metadata_list = metadata.get("metadata", [])
elif hashes:
metadata = client.fetch_file_metadata(
hashes=hashes,
include_service_keys_to_tags=True,
2026-01-11 02:26:39 -08:00
include_file_url=True,
2025-12-20 23:57:44 -08:00
include_duration=True,
include_size=True,
include_mime=True,
)
metadata_list = metadata.get("metadata", [])
except Exception:
pass
2025-12-14 00:53:52 -08:00
if not isinstance(metadata_list, list):
metadata_list = []
2025-12-16 01:45:01 -08:00
for meta in metadata_list:
2025-12-29 17:05:03 -08:00
if len(results) >= limit:
break
file_id = meta.get("file_id")
hash_hex = meta.get("hash")
2026-02-11 20:25:22 -08:00
size_val = meta.get("size")
if size_val is None:
size_val = meta.get("size_bytes")
try:
size = int(size_val) if size_val is not None else 0
except Exception:
size = 0
2025-12-29 17:05:03 -08:00
2026-02-11 20:25:22 -08:00
title, all_tags = self._extract_title_and_tags(meta, file_id)
2025-12-29 17:05:03 -08:00
# Prefer Hydrus-provided extension (e.g. ".webm"); fall back to MIME map.
mime_type = meta.get("mime")
2026-02-07 14:58:13 -08:00
ext = _resolve_ext_from_meta(meta, mime_type)
2025-12-29 17:05:03 -08:00
# Filter results based on query type
# If user provided explicit namespace (has ':'), don't do substring filtering
# Just include what the tag search returned
has_namespace = ":" in query_lower
2026-01-11 02:26:39 -08:00
# Use known URLs (source URLs) from Hydrus if available (matches get-url cmdlet)
item_url = meta.get("known_urls") or meta.get("urls") or meta.get("url") or []
if not item_url:
item_url = meta.get("file_url") or f"{self.URL.rstrip('/')}/view_file?hash={hash_hex}"
2026-02-07 14:58:13 -08:00
if isinstance(item_url, str) and "/view_file" in item_url:
item_url = self._append_access_key(item_url)
2026-01-11 02:26:39 -08:00
2025-12-29 17:05:03 -08:00
if has_namespace:
# Explicit namespace search - already filtered by Hydrus tag search
# Include this result as-is
results.append(
{
2025-12-11 19:04:02 -08:00
"hash": hash_hex,
2026-01-11 02:26:39 -08:00
"url": item_url,
2025-12-11 19:04:02 -08:00
"name": title,
"title": title,
"size": size,
"size_bytes": size,
2025-12-13 00:18:30 -08:00
"store": self.NAME,
2025-12-29 17:05:03 -08:00
"tag": all_tags,
2025-12-11 19:04:02 -08:00
"file_id": file_id,
"mime": mime_type,
"ext": ext,
2025-12-29 17:05:03 -08:00
}
)
else:
# Free-form search: check if search terms match title or FREEFORM tags.
# Do NOT implicitly match other namespace tags (except title:).
freeform_tags = [
t for t in all_tags
if isinstance(t, str) and t and (":" not in t)
2025-12-29 17:05:03 -08:00
]
searchable_text = (title + " " + " ".join(freeform_tags)).lower()
match = True
if query_lower != "*" and search_terms:
for term in search_terms:
if term not in searchable_text:
match = False
break
if match:
results.append(
{
2025-12-11 19:04:02 -08:00
"hash": hash_hex,
2026-01-11 02:26:39 -08:00
"url": item_url,
2025-12-11 19:04:02 -08:00
"name": title,
"title": title,
"size": size,
"size_bytes": size,
2025-12-13 00:18:30 -08:00
"store": self.NAME,
2025-12-11 23:21:45 -08:00
"tag": all_tags,
2025-12-11 19:04:02 -08:00
"file_id": file_id,
"mime": mime_type,
"ext": ext,
2025-12-29 17:05:03 -08:00
}
)
2025-12-16 23:23:43 -08:00
debug(f"{prefix} {len(results)} result(s)")
2025-12-20 23:57:44 -08:00
if ext_filter:
wanted = ext_filter
filtered: list[dict[str, Any]] = []
for item in results:
try:
if _normalize_ext_filter(str(item.get("ext") or "")) == wanted:
filtered.append(item)
except Exception:
continue
results = filtered
2025-12-11 19:04:02 -08:00
return results[:limit]
except Exception as exc:
log(f"❌ Hydrus search failed: {exc}", file=sys.stderr)
import traceback
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
traceback.print_exc(file=sys.stderr)
raise
def get_file(self, file_hash: str, **kwargs: Any) -> Path | str | None:
2026-01-11 02:26:39 -08:00
"""Return the local file system path if available, else a browser URL.
2025-12-16 01:45:01 -08:00
IMPORTANT: this method must be side-effect free (do not auto-open a browser).
Only explicit user actions (e.g. the get-file cmdlet) should open files.
"""
2026-01-22 04:17:27 -08:00
file_hash = str(file_hash or "").strip().lower()
debug(f"{self._log_prefix()} get_file(hash={file_hash[:12]}..., url={kwargs.get('url')})")
# If 'url=True' is passed, we preference the browser URL even if a local path is available.
# This is typically used by the 'get-file' cmdlet for interactive viewing.
if kwargs.get("url"):
base_url = str(self.URL).rstrip("/")
access_key = str(self.API)
browser_url = (
f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
)
debug(f"{self._log_prefix()} get_file: returning browser URL per request: {browser_url}")
return browser_url
2025-12-29 17:05:03 -08:00
2026-01-11 02:26:39 -08:00
# Try to get the local disk path if possible (works if Hydrus is on same machine)
server_path = None
try:
path_res = self._client.get_file_path(file_hash)
if isinstance(path_res, dict) and "path" in path_res:
server_path = path_res["path"]
if server_path:
local_path = Path(server_path)
if local_path.exists():
debug(f"{self._log_prefix()} get_file: found local path: {local_path}")
return local_path
except Exception as e:
debug(f"{self._log_prefix()} get_file: could not resolve path from API: {e}")
# If we found a path on the server but it's not locally accessible,
2026-01-16 01:52:29 -08:00
# keep it for logging but continue to the browser URL fallback so the UI
# can still open the file via the Hydrus web UI.
2026-01-11 02:26:39 -08:00
if server_path:
2026-01-16 01:52:29 -08:00
debug(
f"{self._log_prefix()} get_file: server path not locally accessible, falling back to HTTP: {server_path}"
)
2026-01-11 02:26:39 -08:00
# Fallback to browser URL with access key
2025-12-29 17:05:03 -08:00
base_url = str(self.URL).rstrip("/")
2025-12-13 00:18:30 -08:00
access_key = str(self.API)
2025-12-29 17:05:03 -08:00
browser_url = (
f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
)
2026-01-11 02:26:39 -08:00
debug(f"{self._log_prefix()} get_file: falling back to url={browser_url}")
2025-12-11 19:04:02 -08:00
return browser_url
2026-01-04 02:23:50 -08:00
def download_to_temp(
self,
file_hash: str,
*,
temp_root: Optional[Path] = None,
) -> Optional[Path]:
"""Download a Hydrus file to a temporary path for downstream uploads."""
try:
client = self._client
if client is None:
return None
h = str(file_hash or "").strip().lower()
if len(h) != 64 or not all(ch in "0123456789abcdef" for ch in h):
return None
created_tmp = False
base_tmp = Path(temp_root) if temp_root is not None else Path(
tempfile.mkdtemp(prefix="hydrus-file-")
)
if temp_root is None:
created_tmp = True
base_tmp.mkdir(parents=True, exist_ok=True)
def _safe_filename(raw: str) -> str:
cleaned = re.sub(r"[\\/:*?\"<>|]", "_", str(raw or "")).strip()
if not cleaned:
return h
cleaned = cleaned.strip(". ") or h
return cleaned
# Prefer ext/title from metadata when available.
fname = h
ext_val = ""
try:
meta = self.get_metadata(h) or {}
if isinstance(meta, dict):
title_val = str(meta.get("title") or "").strip()
if title_val:
fname = _safe_filename(title_val)
ext_val = str(meta.get("ext") or "").strip().lstrip(".")
except Exception:
pass
if not fname:
fname = h
if ext_val and not fname.lower().endswith(f".{ext_val.lower()}"):
fname = f"{fname}.{ext_val}"
try:
file_url = client.file_url(h)
except Exception:
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={quote(h)}"
dest_path = base_tmp / fname
2026-02-11 19:06:38 -08:00
stream_client = get_shared_httpx_client(timeout=60.0, verify_ssl=False)
with stream_client.stream(
2026-01-04 02:23:50 -08:00
"GET",
file_url,
headers={"Hydrus-Client-API-Access-Key": self.API},
follow_redirects=True,
timeout=60.0,
) as resp:
resp.raise_for_status()
with dest_path.open("wb") as fh:
for chunk in resp.iter_bytes():
if chunk:
fh.write(chunk)
if dest_path.exists():
return dest_path
if created_tmp:
try:
shutil.rmtree(base_tmp, ignore_errors=True)
except Exception:
pass
return None
except Exception as exc:
log(f"{self._log_prefix()} download_to_temp failed: {exc}", file=sys.stderr)
try:
if temp_root is None and "base_tmp" in locals():
shutil.rmtree(base_tmp, ignore_errors=True) # type: ignore[arg-type]
except Exception:
pass
return None
2025-12-27 14:50:59 -08:00
def delete_file(self, file_identifier: str, **kwargs: Any) -> bool:
"""Delete a file from Hydrus, then clear the deletion record.
This is used by the delete-file cmdlet when the item belongs to a HydrusNetwork store.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} delete_file: client unavailable")
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
for ch in file_hash):
debug(
f"{self._log_prefix()} delete_file: invalid file hash '{file_identifier}'"
)
2025-12-27 14:50:59 -08:00
return False
reason = kwargs.get("reason")
2025-12-29 17:05:03 -08:00
reason_text = (
str(reason).strip() if isinstance(reason,
str) and reason.strip() else None
2025-12-29 17:05:03 -08:00
)
2025-12-27 14:50:59 -08:00
# 1) Delete file
client.delete_files([file_hash], reason=reason_text)
# 2) Clear deletion record (best-effort)
try:
client.clear_file_deletion_record([file_hash])
except Exception as exc:
debug(
f"{self._log_prefix()} delete_file: clear_file_deletion_record failed: {exc}"
)
2025-12-27 14:50:59 -08:00
return True
except Exception as exc:
debug(f"{self._log_prefix()} delete_file failed: {exc}")
return False
2025-12-11 19:04:02 -08:00
def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
"""Get metadata for a file from Hydrus by hash.
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Args:
file_hash: SHA256 hash of the file (64-char hex string)
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Returns:
Dict with metadata fields or None if not found
"""
try:
client = self._client
if not client:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} get_metadata: client unavailable")
2025-12-11 19:04:02 -08:00
return None
2025-12-29 17:05:03 -08:00
2025-12-16 23:23:43 -08:00
# Fetch file metadata with the fields we need for CLI display.
payload = client.fetch_file_metadata(
hashes=[file_hash],
include_service_keys_to_tags=True,
include_file_url=True,
include_duration=True,
include_size=True,
include_mime=True,
)
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
if not payload or not payload.get("metadata"):
return None
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
meta = payload["metadata"][0]
2025-12-16 23:23:43 -08:00
# Hydrus can return placeholder metadata rows for unknown hashes.
if not isinstance(meta, dict) or meta.get("file_id") is None:
return None
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
# Extract title from tags
title = f"Hydrus_{file_hash[:12]}"
2026-02-11 20:25:22 -08:00
extracted_tags = self._extract_tags_from_hydrus_meta(
meta,
service_key=None,
service_name="my tags",
)
for raw_tag in extracted_tags:
tag_text = str(raw_tag or "").strip()
if not tag_text:
continue
if tag_text.lower().startswith("title:"):
value = tag_text.split(":", 1)[1].strip()
if value:
title = value
2025-12-11 19:04:02 -08:00
break
2025-12-29 17:05:03 -08:00
2025-12-16 23:23:43 -08:00
# Hydrus may return mime as an int enum, or sometimes a human label.
mime_val = meta.get("mime")
2025-12-29 17:05:03 -08:00
filetype_human = (
meta.get("filetype_human") or meta.get("mime_human")
or meta.get("mime_string")
2025-12-29 17:05:03 -08:00
)
2025-12-16 23:23:43 -08:00
# Determine ext: prefer Hydrus metadata ext, then filetype_human (when it looks like an ext),
# then title suffix, then file path suffix.
ext = str(meta.get("ext") or "").strip().lstrip(".")
if not ext:
ft = str(filetype_human or "").strip().lstrip(".").lower()
if ft and ft != "unknown filetype" and ft.isalnum() and len(ft) <= 8:
# Treat simple labels like "mp4", "m4a", "webm" as extensions.
ext = ft
if not ext and isinstance(title, str) and "." in title:
2025-12-16 01:45:01 -08:00
try:
2025-12-16 23:23:43 -08:00
ext = Path(title).suffix.lstrip(".")
2025-12-16 01:45:01 -08:00
except Exception:
ext = ""
2025-12-16 23:23:43 -08:00
if not ext:
try:
path_payload = client.get_file_path(file_hash)
if isinstance(path_payload, dict):
p = path_payload.get("path")
if isinstance(p, str) and p.strip():
ext = Path(p.strip()).suffix.lstrip(".")
except Exception:
ext = ""
# If extension is still unknown, attempt a best-effort lookup from MIME.
def _mime_from_ext(ext_value: str) -> str:
ext_clean = str(ext_value or "").strip().lstrip(".").lower()
if not ext_clean:
return ""
try:
for category in mime_maps.values():
info = category.get(ext_clean)
if isinstance(info, dict):
mimes = info.get("mimes")
if isinstance(mimes, list) and mimes:
first = mimes[0]
return str(first)
except Exception:
return ""
return ""
# Normalize to a MIME string for CLI output.
# Avoid passing through human labels like "unknown filetype".
mime_type = ""
if isinstance(mime_val, str):
candidate = mime_val.strip()
if "/" in candidate and candidate.lower() != "unknown filetype":
mime_type = candidate
if not mime_type and isinstance(filetype_human, str):
candidate = filetype_human.strip()
if "/" in candidate and candidate.lower() != "unknown filetype":
mime_type = candidate
if not mime_type:
mime_type = _mime_from_ext(ext)
# Normalize size/duration to stable scalar types.
size_val = meta.get("size")
if size_val is None:
size_val = meta.get("size_bytes")
try:
2026-02-11 20:25:22 -08:00
size_int: int | None = int(size_val) if size_val is not None else 0
2025-12-16 23:23:43 -08:00
except Exception:
2026-02-11 20:25:22 -08:00
size_int = 0
2025-12-16 23:23:43 -08:00
dur_val = meta.get("duration")
if dur_val is None:
dur_val = meta.get("duration_ms")
try:
dur_int: int | None = int(dur_val) if dur_val is not None else None
except Exception:
dur_int = None
2025-12-29 17:05:03 -08:00
raw_urls = meta.get("known_urls") or meta.get("urls") or meta.get("url"
) or []
2025-12-16 23:23:43 -08:00
url_list: list[str] = []
if isinstance(raw_urls, str):
s = raw_urls.strip()
url_list = [s] if s else []
elif isinstance(raw_urls, list):
2025-12-29 17:05:03 -08:00
url_list = [
str(u).strip() for u in raw_urls
if isinstance(u, str) and str(u).strip()
2025-12-29 17:05:03 -08:00
]
2025-12-16 23:23:43 -08:00
2025-12-11 19:04:02 -08:00
return {
"hash": file_hash,
"title": title,
"ext": ext,
2025-12-16 23:23:43 -08:00
"size": size_int,
2025-12-11 19:04:02 -08:00
"mime": mime_type,
2025-12-16 23:23:43 -08:00
# Keep raw fields available for troubleshooting/other callers.
"hydrus_mime": mime_val,
"filetype_human": filetype_human,
"duration_ms": dur_int,
"url": url_list,
2025-12-11 19:04:02 -08:00
}
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} get_metadata failed: {exc}")
2025-12-11 19:04:02 -08:00
return None
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
"""Get tags for a file from Hydrus by hash.
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Args:
file_identifier: File hash (SHA256 hex string)
**kwargs: Optional service_name parameter
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Returns:
Tuple of (tags_list, source_description)
where source is always "hydrus"
"""
try:
2025-12-12 21:55:38 -08:00
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
for ch in file_hash):
debug(
f"{self._log_prefix()} get_tags: invalid file hash '{file_identifier}'"
)
2025-12-12 21:55:38 -08:00
return [], "unknown"
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
# Get Hydrus client and service info
client = self._client
if not client:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} get_tags: client unavailable")
2025-12-11 19:04:02 -08:00
return [], "unknown"
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
# Fetch file metadata
payload = client.fetch_file_metadata(
hashes=[file_hash],
include_service_keys_to_tags=True,
2026-01-11 02:26:39 -08:00
include_file_url=True
2025-12-11 19:04:02 -08:00
)
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
debug(
f"{self._log_prefix()} get_tags: no metadata for hash {file_hash}"
)
2025-12-11 19:04:02 -08:00
return [], "unknown"
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
meta = items[0] if isinstance(items[0], dict) else None
if not isinstance(meta, dict) or meta.get("file_id") is None:
debug(
f"{self._log_prefix()} get_tags: invalid metadata for hash {file_hash}"
)
2025-12-11 19:04:02 -08:00
return [], "unknown"
2025-12-29 17:05:03 -08:00
2026-01-07 05:09:59 -08:00
service_name = kwargs.get("service_name") or "my tags"
service_key = self._get_service_key(service_name)
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
# Extract tags from metadata
tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name)
2025-12-20 23:57:44 -08:00
2025-12-29 17:05:03 -08:00
return [
str(t).strip().lower() for t in tags if isinstance(t, str) and t.strip()
], "hydrus"
2025-12-11 19:04:02 -08:00
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} get_tags failed: {exc}")
2025-12-11 19:04:02 -08:00
return [], "unknown"
def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
2025-12-29 17:05:03 -08:00
"""Add tags to a Hydrus file."""
2025-12-11 19:04:02 -08:00
try:
client = self._client
if client is None:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} add_tag: client unavailable")
2025-12-11 19:04:02 -08:00
return False
2025-12-12 21:55:38 -08:00
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
for ch in file_hash):
debug(
f"{self._log_prefix()} add_tag: invalid file hash '{file_identifier}'"
)
2025-12-12 21:55:38 -08:00
return False
2025-12-11 19:04:02 -08:00
service_name = kwargs.get("service_name") or "my tags"
2025-12-20 23:57:44 -08:00
2025-12-29 17:05:03 -08:00
incoming_tags = [
str(t).strip().lower() for t in (tags or [])
2025-12-29 17:05:03 -08:00
if isinstance(t, str) and str(t).strip()
]
2025-12-20 23:57:44 -08:00
if not incoming_tags:
return True
2026-01-16 04:57:05 -08:00
existing_tags = kwargs.get("existing_tags")
if existing_tags is None:
try:
existing_tags, _src = self.get_tag(file_hash)
except Exception:
existing_tags = []
if isinstance(existing_tags, (list, tuple, set)):
existing_tags = [
str(t).strip().lower() for t in existing_tags
if isinstance(t, str) and str(t).strip()
]
else:
2025-12-20 23:57:44 -08:00
existing_tags = []
2025-12-29 23:40:50 -08:00
from SYS.metadata import compute_namespaced_tag_overwrite
2025-12-20 23:57:44 -08:00
2025-12-29 17:05:03 -08:00
tags_to_remove, tags_to_add, _merged = compute_namespaced_tag_overwrite(
existing_tags, incoming_tags
)
2025-12-20 23:57:44 -08:00
if not tags_to_add and not tags_to_remove:
return True
2026-01-01 20:37:27 -08:00
service_key: Optional[str] = None
2026-01-07 05:09:59 -08:00
service_key = self._get_service_key(service_name)
2026-01-01 20:37:27 -08:00
mutate_success = False
if service_key:
2025-12-20 23:57:44 -08:00
try:
2026-01-01 20:37:27 -08:00
client.mutate_tags_by_key(
file_hash,
service_key,
add_tags=tags_to_add,
remove_tags=tags_to_remove,
)
mutate_success = True
2025-12-20 23:57:44 -08:00
except Exception as exc:
2026-01-01 20:37:27 -08:00
debug(
f"{self._log_prefix()} add_tag: mutate_tags_by_key failed: {exc}"
)
did_any = False
if not mutate_success:
if tags_to_remove:
try:
client.delete_tag(file_hash, tags_to_remove, service_name)
did_any = True
except Exception as exc:
debug(
f"{self._log_prefix()} add_tag: delete_tag failed: {exc}"
)
if tags_to_add:
try:
client.add_tag(file_hash, tags_to_add, service_name)
did_any = True
except Exception as exc:
debug(
f"{self._log_prefix()} add_tag: add_tag failed: {exc}"
)
else:
did_any = bool(tags_to_add or tags_to_remove)
2025-12-20 23:57:44 -08:00
return did_any
2025-12-11 19:04:02 -08:00
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} add_tag failed: {exc}")
2025-12-11 19:04:02 -08:00
return False
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
2025-12-29 17:05:03 -08:00
"""Delete tags from a Hydrus file."""
2025-12-11 19:04:02 -08:00
try:
client = self._client
if client is None:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} delete_tag: client unavailable")
2025-12-11 19:04:02 -08:00
return False
2025-12-12 21:55:38 -08:00
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
for ch in file_hash):
debug(
f"{self._log_prefix()} delete_tag: invalid file hash '{file_identifier}'"
)
2025-12-12 21:55:38 -08:00
return False
2025-12-11 19:04:02 -08:00
service_name = kwargs.get("service_name") or "my tags"
2025-12-20 23:57:44 -08:00
raw_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
2025-12-29 17:05:03 -08:00
tag_list = [
str(t).strip().lower() for t in raw_list
if isinstance(t, str) and str(t).strip()
2025-12-29 17:05:03 -08:00
]
2025-12-11 19:04:02 -08:00
if not tag_list:
return False
2025-12-12 21:55:38 -08:00
client.delete_tag(file_hash, tag_list, service_name)
2025-12-11 19:04:02 -08:00
return True
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} delete_tag failed: {exc}")
2025-12-11 19:04:02 -08:00
return False
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
2025-12-29 17:05:03 -08:00
"""Get known url for a Hydrus file."""
2025-12-11 19:04:02 -08:00
try:
client = self._client
2025-12-12 21:55:38 -08:00
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
for ch in file_hash):
2025-12-12 21:55:38 -08:00
return []
payload = client.fetch_file_metadata(
hashes=[file_hash],
2026-01-11 02:26:39 -08:00
include_file_url=True
)
2025-12-11 19:04:02 -08:00
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
return []
meta = items[0] if isinstance(items[0],
dict) else {}
2025-12-16 01:45:01 -08:00
raw_urls: Any = meta.get("known_urls"
) or meta.get("urls") or meta.get("url") or []
2026-01-12 04:05:52 -08:00
def _is_url(s: Any) -> bool:
if not isinstance(s, str):
return False
v = s.strip().lower()
return bool(v and ("://" in v or v.startswith(("magnet:", "torrent:"))))
2025-12-16 01:45:01 -08:00
if isinstance(raw_urls, str):
val = raw_urls.strip()
2026-01-12 04:05:52 -08:00
return [val] if _is_url(val) else []
2025-12-16 01:45:01 -08:00
if isinstance(raw_urls, list):
out: list[str] = []
for u in raw_urls:
if not isinstance(u, str):
continue
u = u.strip()
2026-01-12 04:05:52 -08:00
if u and _is_url(u):
2025-12-16 01:45:01 -08:00
out.append(u)
return out
return []
2025-12-11 19:04:02 -08:00
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} get_url failed: {exc}")
2025-12-11 19:04:02 -08:00
return []
2025-12-30 05:48:01 -08:00
def get_url_info(self, url: str, **kwargs: Any) -> dict[str, Any] | None:
"""Return Hydrus URL info for a single URL (Hydrus-only helper).
Uses: GET /add_urls/get_url_info
"""
try:
client = self._client
if client is None:
return None
u = str(url or "").strip()
if not u:
return None
try:
return client.get_url_info(u) # type: ignore[attr-defined]
except Exception:
from API.HydrusNetwork import HydrusRequestSpec
spec = HydrusRequestSpec(
method="GET",
endpoint="/add_urls/get_url_info",
query={
"url": u
},
)
response = client._perform_request(spec) # type: ignore[attr-defined]
return response if isinstance(response, dict) else None
except Exception as exc:
debug(f"{self._log_prefix()} get_url_info failed: {exc}")
return None
2025-12-11 19:04:02 -08:00
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
2025-12-29 17:05:03 -08:00
"""Associate one or more url with a Hydrus file."""
2025-12-11 19:04:02 -08:00
try:
client = self._client
if client is None:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} add_url: client unavailable")
2025-12-11 19:04:02 -08:00
return False
for u in url:
client.associate_url(file_identifier, u)
return True
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} add_url failed: {exc}")
2025-12-11 19:04:02 -08:00
return False
2025-12-20 23:57:44 -08:00
def add_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool:
"""Bulk associate urls with Hydrus files.
This is a best-effort convenience wrapper used by cmdlets to batch url associations.
Hydrus' client API is still called per (hash,url) pair, but this consolidates the
cmdlet-level control flow so url association can be deferred until the end.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} add_url_bulk: client unavailable")
return False
any_success = False
2025-12-29 17:05:03 -08:00
for file_identifier, urls in items or []:
2025-12-20 23:57:44 -08:00
h = str(file_identifier or "").strip().lower()
if len(h) != 64:
continue
2025-12-29 17:05:03 -08:00
for u in urls or []:
2025-12-20 23:57:44 -08:00
s = str(u or "").strip()
if not s:
continue
try:
client.associate_url(h, s)
any_success = True
except Exception:
continue
return any_success
except Exception as exc:
debug(f"{self._log_prefix()} add_url_bulk failed: {exc}")
return False
2026-01-19 03:14:30 -08:00
def add_tags_bulk(self, items: List[tuple[str, List[str]]], *, service_name: str | None = None) -> bool:
"""Bulk add tags to multiple Hydrus files.
Groups files by identical tag-sets and uses the Hydrus `mutate_tags_by_key`
call (when a service key is available) to reduce the number of API calls.
Falls back to per-hash `add_tag` calls if necessary.
"""
try:
client = self._client
if client is None:
debug(f"{self._log_prefix()} add_tags_bulk: client unavailable")
return False
# Group by canonical tag set (sorted tuple) to batch identical additions
buckets: dict[tuple[str, ...], list[str]] = {}
for file_identifier, tags in items or []:
h = str(file_identifier or "").strip().lower()
if len(h) != 64:
continue
tlist = [str(t).strip().lower() for t in (tags or []) if isinstance(t, str) and str(t).strip()]
if not tlist:
continue
key = tuple(sorted(tlist))
buckets.setdefault(key, []).append(h)
if not buckets:
return False
svc = service_name or "my tags"
service_key = self._get_service_key(svc)
any_success = False
for tag_tuple, hashes in buckets.items():
try:
if service_key:
# Mutate tags for many hashes in a single request
2026-02-11 20:25:22 -08:00
client.mutate_tags_by_key(hash=hashes, service_key=service_key, add_tags=list(tag_tuple))
2026-01-19 03:14:30 -08:00
any_success = True
continue
except Exception as exc:
debug(f"{self._log_prefix()} add_tags_bulk mutate failed for tags {tag_tuple}: {exc}")
# Fallback: apply per-hash add_tag
for h in hashes:
try:
client.add_tag(h, list(tag_tuple), svc)
any_success = True
except Exception:
continue
return any_success
except Exception as exc:
debug(f"{self._log_prefix()} add_tags_bulk failed: {exc}")
return False
2025-12-11 19:04:02 -08:00
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
2025-12-29 17:05:03 -08:00
"""Delete one or more url from a Hydrus file."""
2025-12-11 19:04:02 -08:00
try:
client = self._client
if client is None:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} delete_url: client unavailable")
2025-12-11 19:04:02 -08:00
return False
for u in url:
client.delete_url(file_identifier, u)
return True
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} delete_url failed: {exc}")
2025-12-11 19:04:02 -08:00
return False
2025-12-12 21:55:38 -08:00
def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
"""Get notes for a Hydrus file (default note service only)."""
try:
client = self._client
if client is None:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} get_note: client unavailable")
2025-12-12 21:55:38 -08:00
return {}
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
for ch in file_hash):
2025-12-12 21:55:38 -08:00
return {}
payload = client.fetch_file_metadata(hashes=[file_hash], include_notes=True)
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
return {}
meta = items[0] if isinstance(items[0], dict) else None
if not isinstance(meta, dict):
return {}
notes_payload = meta.get("notes")
if isinstance(notes_payload, dict):
return {
str(k): str(v or "")
for k, v in notes_payload.items() if str(k).strip()
}
2025-12-12 21:55:38 -08:00
return {}
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} get_note failed: {exc}")
2025-12-12 21:55:38 -08:00
return {}
def set_note(
self,
file_identifier: str,
name: str,
text: str,
**kwargs: Any
) -> bool:
2025-12-12 21:55:38 -08:00
"""Set a named note for a Hydrus file (default note service only)."""
try:
client = self._client
if client is None:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} set_note: client unavailable")
2025-12-12 21:55:38 -08:00
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
for ch in file_hash):
2025-12-12 21:55:38 -08:00
return False
note_name = str(name or "").strip()
if not note_name:
return False
note_text = str(text or "")
client.set_notes(file_hash,
{
note_name: note_text
})
2025-12-12 21:55:38 -08:00
return True
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} set_note failed: {exc}")
2025-12-12 21:55:38 -08:00
return False
def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
"""Delete a named note for a Hydrus file (default note service only)."""
try:
client = self._client
if client is None:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} delete_note: client unavailable")
2025-12-12 21:55:38 -08:00
return False
file_hash = str(file_identifier or "").strip().lower()
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
for ch in file_hash):
2025-12-12 21:55:38 -08:00
return False
note_name = str(name or "").strip()
if not note_name:
return False
client.delete_notes(file_hash, [note_name])
return True
except Exception as exc:
2025-12-16 23:23:43 -08:00
debug(f"{self._log_prefix()} delete_note failed: {exc}")
2025-12-12 21:55:38 -08:00
return False
2025-12-11 19:04:02 -08:00
@staticmethod
def _extract_tags_from_hydrus_meta(
meta: Dict[str,
Any],
service_key: Optional[str],
service_name: str
2025-12-11 19:04:02 -08:00
) -> List[str]:
"""Extract current tags from Hydrus metadata dict.
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
Prefers display_tags (includes siblings/parents, excludes deleted).
Falls back to storage_tags status '0' (current).
"""
tags_payload = meta.get("tags")
if not isinstance(tags_payload, dict):
return []
2025-12-29 17:05:03 -08:00
2026-02-11 20:25:22 -08:00
desired_service_name = str(service_name or "").strip().lower()
desired_service_key = str(service_key).strip() if service_key is not None else ""
2025-12-29 17:05:03 -08:00
2026-02-11 20:25:22 -08:00
def _append_tag(out: List[str], value: Any) -> None:
text = ""
if isinstance(value, bytes):
try:
text = value.decode("utf-8", errors="ignore")
except Exception:
text = str(value)
elif isinstance(value, str):
text = value
if not text:
return
cleaned = text.strip()
if cleaned:
out.append(cleaned)
def _collect_current(container: Any, out: List[str]) -> None:
if isinstance(container, list):
for tag in container:
_append_tag(out, tag)
return
if isinstance(container, dict):
current = container.get("0")
if current is None:
current = container.get(0)
if isinstance(current, list):
for tag in current:
_append_tag(out, tag)
def _collect_service_data(service_data: Any, out: List[str]) -> None:
if not isinstance(service_data, dict):
return
display = (
service_data.get("display_tags")
or service_data.get("display_friendly_tags")
or service_data.get("display")
)
_collect_current(display, out)
2025-12-29 17:05:03 -08:00
2026-02-11 20:25:22 -08:00
storage = (
service_data.get("storage_tags")
or service_data.get("statuses_to_tags")
or service_data.get("tags")
)
_collect_current(storage, out)
collected: List[str] = []
if desired_service_key:
_collect_service_data(tags_payload.get(desired_service_key), collected)
if not collected and desired_service_name:
for maybe_service in tags_payload.values():
if not isinstance(maybe_service, dict):
continue
svc_name = str(
maybe_service.get("service_name")
or maybe_service.get("name")
or ""
).strip().lower()
if svc_name and svc_name == desired_service_name:
_collect_service_data(maybe_service, collected)
names_map = tags_payload.get("service_keys_to_names")
statuses_map = tags_payload.get("service_keys_to_statuses_to_tags")
if isinstance(statuses_map, dict):
keys_to_collect: List[str] = []
if desired_service_key:
keys_to_collect.append(desired_service_key)
if desired_service_name and isinstance(names_map, dict):
for raw_key, raw_name in names_map.items():
if str(raw_name or "").strip().lower() == desired_service_name:
keys_to_collect.append(str(raw_key))
keys_filter = {k for k in keys_to_collect if k}
for raw_key, status_payload in statuses_map.items():
raw_key_text = str(raw_key)
if keys_filter and raw_key_text not in keys_filter:
continue
_collect_current(status_payload, collected)
if not collected:
for maybe_service in tags_payload.values():
_collect_service_data(maybe_service, collected)
top_level_tags = meta.get("tags_flat")
if isinstance(top_level_tags, list):
_collect_current(top_level_tags, collected)
deduped: List[str] = []
seen: set[str] = set()
for tag in collected:
key = str(tag).strip().lower()
if not key or key in seen:
continue
seen.add(key)
deduped.append(tag)
return deduped
@staticmethod
def _extract_title_and_tags(meta: Dict[str, Any], file_id: Any) -> Tuple[str, List[str]]:
title = f"Hydrus File {file_id}"
tags = HydrusNetwork._extract_tags_from_hydrus_meta(
meta,
service_key=None,
service_name="my tags",
)
2025-12-29 17:05:03 -08:00
2026-02-11 20:25:22 -08:00
normalized_tags: List[str] = []
seen: set[str] = set()
for raw_tag in tags:
text = str(raw_tag or "").strip().lower()
if not text or text in seen:
continue
seen.add(text)
normalized_tags.append(text)
if text.startswith("title:") and title == f"Hydrus File {file_id}":
value = text.split(":", 1)[1].strip()
if value:
title = value
return title, normalized_tags