2426 lines
100 KiB
Python
2426 lines
100 KiB
Python
from __future__ import annotations
|
|
|
|
import re
|
|
import sys
|
|
import tempfile
|
|
import shutil
|
|
from pathlib import Path
|
|
from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple
|
|
|
|
from urllib.parse import quote
|
|
|
|
import httpx
|
|
from API.httpx_shared import get_shared_httpx_client
|
|
|
|
from SYS.logger import debug, log
|
|
from SYS.utils_constant import mime_maps
|
|
|
|
_KNOWN_EXTS = {
|
|
str(info.get("ext") or "").strip().lstrip(".")
|
|
for category in mime_maps.values()
|
|
for info in category.values()
|
|
if isinstance(info, dict) and info.get("ext")
|
|
}
|
|
|
|
|
|
def _resolve_ext_from_meta(meta: Dict[str, Any], mime_type: Optional[str]) -> str:
|
|
ext = ""
|
|
for key in ("ext", "file_ext", "extension", "file_extension"):
|
|
raw = meta.get(key)
|
|
if raw:
|
|
ext = str(raw).strip().lstrip(".")
|
|
break
|
|
if ext and ext not in _KNOWN_EXTS:
|
|
ext = ""
|
|
if ext.lower() == "ebook":
|
|
ext = ""
|
|
|
|
if not ext:
|
|
filetype_human = (
|
|
meta.get("filetype_human")
|
|
or meta.get("mime_human")
|
|
or meta.get("mime_string")
|
|
or meta.get("filetype")
|
|
)
|
|
ft = str(filetype_human or "").strip().lstrip(".").lower()
|
|
if ft and ft != "unknown filetype":
|
|
if ft.isalnum() and len(ft) <= 8:
|
|
ext = ft
|
|
else:
|
|
try:
|
|
for token in re.findall(r"[a-z0-9]+", ft):
|
|
if token in _KNOWN_EXTS:
|
|
ext = token
|
|
break
|
|
except Exception:
|
|
pass
|
|
|
|
if not ext:
|
|
if not mime_type or not isinstance(mime_type, str) or "/" not in mime_type:
|
|
mime_type = meta.get("mime_string") or meta.get("mime_human") or meta.get("filetype_mime") or mime_type
|
|
|
|
if not ext and mime_type:
|
|
try:
|
|
mime_type = str(mime_type).split(";", 1)[0].strip().lower()
|
|
except Exception:
|
|
mime_type = str(mime_type)
|
|
for category in mime_maps.values():
|
|
for _ext_key, info in category.items():
|
|
if mime_type in info.get("mimes", []):
|
|
ext = str(info.get("ext", "")).strip().lstrip(".")
|
|
break
|
|
if ext:
|
|
break
|
|
return ext
|
|
|
|
from Store._base import Store
|
|
|
|
_HYDRUS_INIT_CHECK_CACHE: dict[tuple[str,
|
|
str],
|
|
tuple[bool,
|
|
Optional[str]]] = {}
|
|
|
|
|
|
class HydrusNetwork(Store):
|
|
"""File storage backend for Hydrus client.
|
|
|
|
Each instance represents a specific Hydrus client connection.
|
|
Maintains its own HydrusClient.
|
|
"""
|
|
|
|
@classmethod
|
|
def config_schema(cls) -> List[Dict[str, Any]]:
|
|
return [
|
|
{
|
|
"key": "NAME",
|
|
"label": "Store Name",
|
|
"default": "",
|
|
"placeholder": "e.g. home_hydrus",
|
|
"required": True
|
|
},
|
|
{
|
|
"key": "URL",
|
|
"label": "Hydrus URL",
|
|
"default": "http://127.0.0.1:45869",
|
|
"placeholder": "http://127.0.0.1:45869",
|
|
"required": True
|
|
},
|
|
{
|
|
"key": "API",
|
|
"label": "API Key",
|
|
"default": "",
|
|
"required": True,
|
|
"secret": True
|
|
}
|
|
]
|
|
|
|
@property
|
|
def is_remote(self) -> bool:
|
|
return True
|
|
|
|
@property
|
|
def prefer_defer_tags(self) -> bool:
|
|
return True
|
|
|
|
def _log_prefix(self) -> str:
|
|
store_name = getattr(self, "NAME", None) or "unknown"
|
|
return f"[hydrusnetwork:{store_name}]"
|
|
|
|
def _append_access_key(self, url: str) -> str:
|
|
if not url:
|
|
return url
|
|
if "access_key=" in url:
|
|
return url
|
|
if not getattr(self, "API", None):
|
|
return url
|
|
separator = "&" if "?" in url else "?"
|
|
return f"{url}{separator}access_key={quote(str(self.API))}"
|
|
|
|
def __new__(cls, *args: Any, **kwargs: Any) -> "HydrusNetwork":
|
|
instance = super().__new__(cls)
|
|
name = kwargs.get("NAME")
|
|
api = kwargs.get("API")
|
|
url = kwargs.get("URL")
|
|
if name is not None:
|
|
setattr(instance, "NAME", str(name))
|
|
if api is not None:
|
|
setattr(instance, "API", str(api))
|
|
if url is not None:
|
|
setattr(instance, "URL", str(url))
|
|
return instance
|
|
|
|
def __init__(
|
|
self,
|
|
instance_name: Optional[str] = None,
|
|
api_key: Optional[str] = None,
|
|
url: Optional[str] = None,
|
|
*,
|
|
NAME: Optional[str] = None,
|
|
API: Optional[str] = None,
|
|
URL: Optional[str] = None,
|
|
) -> None:
|
|
"""Initialize Hydrus storage backend.
|
|
|
|
Args:
|
|
instance_name: Name of this Hydrus instance (e.g., 'home', 'work')
|
|
api_key: Hydrus Client API access key
|
|
url: Hydrus client URL (e.g., 'http://192.168.1.230:45869')
|
|
"""
|
|
from API.HydrusNetwork import HydrusNetwork as HydrusClient
|
|
|
|
if instance_name is None and NAME is not None:
|
|
instance_name = str(NAME)
|
|
if api_key is None and API is not None:
|
|
api_key = str(API)
|
|
if url is None and URL is not None:
|
|
url = str(URL)
|
|
|
|
if not instance_name or not api_key or not url:
|
|
raise ValueError("HydrusNetwork requires NAME, API, and URL")
|
|
|
|
self.NAME = instance_name
|
|
self.API = api_key
|
|
self.URL = url.rstrip("/")
|
|
|
|
# Total count (best-effort, used for startup diagnostics)
|
|
self.total_count: Optional[int] = None
|
|
|
|
# Self health-check: validate the URL is reachable and the access key is accepted.
|
|
# This MUST NOT attempt to acquire a session key.
|
|
cache_key = (self.URL, self.API)
|
|
cached = _HYDRUS_INIT_CHECK_CACHE.get(cache_key)
|
|
if cached is not None:
|
|
ok, err = cached
|
|
if not ok:
|
|
raise RuntimeError(
|
|
f"Hydrus '{self.NAME}' unavailable: {err or 'Unavailable'}"
|
|
)
|
|
else:
|
|
api_version_url = f"{self.URL}/api_version"
|
|
verify_key_url = f"{self.URL}/verify_access_key"
|
|
try:
|
|
client = get_shared_httpx_client(timeout=5.0, verify_ssl=False)
|
|
version_resp = client.get(api_version_url, follow_redirects=True)
|
|
version_resp.raise_for_status()
|
|
version_payload = version_resp.json()
|
|
if not isinstance(version_payload, dict):
|
|
raise RuntimeError(
|
|
"Hydrus /api_version returned an unexpected response"
|
|
)
|
|
|
|
verify_resp = client.get(
|
|
verify_key_url,
|
|
headers={
|
|
"Hydrus-Client-API-Access-Key": self.API
|
|
},
|
|
follow_redirects=True,
|
|
)
|
|
verify_resp.raise_for_status()
|
|
verify_payload = verify_resp.json()
|
|
if not isinstance(verify_payload, dict):
|
|
raise RuntimeError(
|
|
"Hydrus /verify_access_key returned an unexpected response"
|
|
)
|
|
|
|
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (True, None)
|
|
except Exception as exc:
|
|
err = str(exc)
|
|
_HYDRUS_INIT_CHECK_CACHE[cache_key] = (False, err)
|
|
raise RuntimeError(f"Hydrus '{self.NAME}' unavailable: {err}") from exc
|
|
|
|
# Create a persistent client for this instance (auth via access key by default).
|
|
self._client = HydrusClient(
|
|
url=self.URL,
|
|
access_key=self.API,
|
|
instance_name=self.NAME
|
|
)
|
|
|
|
self._service_key_cache: Dict[str, Optional[str]] = {}
|
|
|
|
# Best-effort total count (used for startup diagnostics). Avoid heavy payloads.
|
|
# Some Hydrus setups appear to return no count via the CBOR client for this endpoint,
|
|
# so prefer a direct JSON request with a short timeout.
|
|
# NOTE: Disabled to avoid unnecessary API call during init; count will be retrieved on first search/list if needed.
|
|
# try:
|
|
# self.get_total_count(refresh=True)
|
|
# except Exception:
|
|
# pass
|
|
|
|
def _get_service_key(self, service_name: str, *, refresh: bool = False) -> Optional[str]:
|
|
"""Resolve (and cache) the Hydrus service key for the given service name."""
|
|
normalized = str(service_name or "my tags").strip()
|
|
if not normalized:
|
|
normalized = "my tags"
|
|
cache_key = normalized.lower()
|
|
if not refresh and cache_key in self._service_key_cache:
|
|
return self._service_key_cache[cache_key]
|
|
|
|
client = self._client
|
|
if client is None:
|
|
self._service_key_cache[cache_key] = None
|
|
return None
|
|
|
|
try:
|
|
from API import HydrusNetwork as hydrus_wrapper
|
|
|
|
resolved = hydrus_wrapper.get_tag_service_key(client, normalized)
|
|
except Exception:
|
|
resolved = None
|
|
|
|
self._service_key_cache[cache_key] = resolved
|
|
return resolved
|
|
|
|
def get_total_count(self, *, refresh: bool = False) -> Optional[int]:
|
|
"""Best-effort total file count for this Hydrus instance.
|
|
|
|
Intended for diagnostics (e.g., REPL startup checks). This should be fast,
|
|
and it MUST NOT raise.
|
|
"""
|
|
if self.total_count is not None and not refresh:
|
|
return self.total_count
|
|
|
|
# 1) Prefer a direct JSON request (fast + avoids CBOR edge cases).
|
|
try:
|
|
import json as _json
|
|
|
|
url = f"{self.URL}/get_files/search_files"
|
|
params = {
|
|
"tags": _json.dumps(["system:everything"]),
|
|
"return_hashes": "false",
|
|
"return_file_ids": "false",
|
|
"return_file_count": "true",
|
|
}
|
|
headers = {
|
|
"Hydrus-Client-API-Access-Key": self.API,
|
|
"Accept": "application/json",
|
|
}
|
|
client = get_shared_httpx_client(timeout=5.0, verify_ssl=False)
|
|
resp = client.get(url, params=params, headers=headers, follow_redirects=True)
|
|
resp.raise_for_status()
|
|
payload = resp.json()
|
|
|
|
count_val = None
|
|
if isinstance(payload, dict):
|
|
count_val = payload.get("file_count")
|
|
if count_val is None:
|
|
count_val = payload.get("file_count_inclusive")
|
|
if count_val is None:
|
|
count_val = payload.get("num_files")
|
|
if isinstance(count_val, int):
|
|
self.total_count = count_val
|
|
return self.total_count
|
|
except Exception as exc:
|
|
debug(
|
|
f"{self._log_prefix()} total count (json) unavailable: {exc}",
|
|
file=sys.stderr
|
|
)
|
|
|
|
# 2) Fallback to the API client (CBOR).
|
|
try:
|
|
payload = self._client.search_files(
|
|
tags=["system:everything"],
|
|
return_hashes=False,
|
|
return_file_ids=False,
|
|
return_file_count=True,
|
|
)
|
|
count_val = None
|
|
if isinstance(payload, dict):
|
|
count_val = payload.get("file_count")
|
|
if count_val is None:
|
|
count_val = payload.get("file_count_inclusive")
|
|
if count_val is None:
|
|
count_val = payload.get("num_files")
|
|
if isinstance(count_val, int):
|
|
self.total_count = count_val
|
|
return self.total_count
|
|
except Exception as exc:
|
|
debug(
|
|
f"{self._log_prefix()} total count (client) unavailable: {exc}",
|
|
file=sys.stderr
|
|
)
|
|
|
|
return self.total_count
|
|
|
|
def name(self) -> str:
|
|
return self.NAME
|
|
|
|
def get_name(self) -> str:
|
|
return self.NAME
|
|
|
|
def set_relationship(self, alt_hash: str, king_hash: str, kind: str = "alt") -> bool:
|
|
"""Persist a relationship via the Hydrus client API for this backend instance."""
|
|
try:
|
|
alt_norm = str(alt_hash or "").strip().lower()
|
|
king_norm = str(king_hash or "").strip().lower()
|
|
if len(alt_norm) != 64 or len(king_norm) != 64 or alt_norm == king_norm:
|
|
return False
|
|
|
|
client = getattr(self, "_client", None)
|
|
if client is None or not hasattr(client, "set_relationship"):
|
|
return False
|
|
|
|
client.set_relationship(alt_norm, king_norm, str(kind or "alt"))
|
|
return True
|
|
except Exception:
|
|
return False
|
|
|
|
@staticmethod
|
|
def _has_current_file_service(meta: Dict[str, Any]) -> bool:
|
|
services = meta.get("file_services")
|
|
if not isinstance(services, dict):
|
|
return False
|
|
current = services.get("current")
|
|
if isinstance(current, dict):
|
|
return any(bool(v) for v in current.values())
|
|
if isinstance(current, list):
|
|
return len(current) > 0
|
|
return False
|
|
|
|
def add_file(self, file_path: Path, **kwargs: Any) -> str:
|
|
"""Upload file to Hydrus with full metadata support.
|
|
|
|
Args:
|
|
file_path: Path to the file to upload
|
|
tag: Optional list of tag values to add
|
|
url: Optional list of url to associate with the file
|
|
title: Optional title (will be added as 'title:value' tag)
|
|
|
|
Returns:
|
|
File hash from Hydrus
|
|
|
|
Raises:
|
|
Exception: If upload fails
|
|
"""
|
|
from SYS.utils import sha256_file
|
|
|
|
tag_list = kwargs.get("tag", [])
|
|
url = kwargs.get("url", [])
|
|
title = kwargs.get("title")
|
|
|
|
# Add title to tags if provided and not already present
|
|
if title:
|
|
title_tag = f"title:{title}".strip().lower()
|
|
if not any(str(candidate).lower().startswith("title:")
|
|
for candidate in tag_list):
|
|
tag_list = [title_tag] + list(tag_list)
|
|
|
|
# Hydrus is lowercase-only tags; normalize here for consistency.
|
|
tag_list = [
|
|
str(t).strip().lower() for t in (tag_list or [])
|
|
if isinstance(t, str) and str(t).strip()
|
|
]
|
|
|
|
try:
|
|
# Compute file hash (or use hint from kwargs to avoid redundant IO)
|
|
file_hash = kwargs.get("hash") or kwargs.get("file_hash")
|
|
if not file_hash:
|
|
file_hash = sha256_file(file_path)
|
|
|
|
debug(f"{self._log_prefix()} file hash: {file_hash}")
|
|
|
|
# Use persistent client with session key
|
|
client = self._client
|
|
if client is None:
|
|
raise Exception("Hydrus client unavailable")
|
|
|
|
# Check if file already exists in Hydrus.
|
|
# IMPORTANT: some Hydrus deployments can return a metadata record (file_id)
|
|
# even when the file is not in any current file service (e.g. trashed/missing).
|
|
# Only treat as a real duplicate if it is in a current file service.
|
|
file_exists = False
|
|
try:
|
|
metadata = client.fetch_file_metadata(
|
|
hashes=[file_hash],
|
|
include_service_keys_to_tags=False,
|
|
include_file_services=True,
|
|
include_is_trashed=True,
|
|
include_file_url=True,
|
|
include_duration=False,
|
|
include_size=True,
|
|
include_mime=True,
|
|
)
|
|
if metadata and isinstance(metadata, dict):
|
|
metas = metadata.get("metadata", [])
|
|
if isinstance(metas, list) and metas:
|
|
# Hydrus returns placeholder rows for unknown hashes.
|
|
# Only treat as a real duplicate if it has a concrete file_id AND
|
|
# appears in a current file service.
|
|
for meta in metas:
|
|
if not isinstance(meta, dict):
|
|
continue
|
|
if meta.get("file_id") is None:
|
|
continue
|
|
# Preferred: use file_services.current.
|
|
if isinstance(meta.get("file_services"), dict):
|
|
if self._has_current_file_service(meta):
|
|
file_exists = True
|
|
break
|
|
continue
|
|
|
|
# Fallback: if Hydrus doesn't return file_services, only treat as
|
|
# existing when the metadata looks like a real file (non-zero size).
|
|
size_val = meta.get("size")
|
|
if size_val is None:
|
|
size_val = meta.get("size_bytes")
|
|
try:
|
|
size_int = int(size_val) if size_val is not None else 0
|
|
except Exception:
|
|
size_int = 0
|
|
if size_int > 0:
|
|
file_exists = True
|
|
break
|
|
if file_exists:
|
|
debug(
|
|
f"{self._log_prefix()} Duplicate detected - file already in Hydrus with hash: {file_hash}"
|
|
)
|
|
except Exception as exc:
|
|
debug(f"{self._log_prefix()} metadata fetch failed: {exc}")
|
|
|
|
# If Hydrus reports an existing file, it may be in trash. Best-effort restore it to 'my files'.
|
|
# Then re-check that it is actually in a current file service; if not, we'll proceed to upload.
|
|
if file_exists:
|
|
try:
|
|
client.undelete_files([file_hash])
|
|
except Exception:
|
|
pass
|
|
|
|
try:
|
|
metadata2 = client.fetch_file_metadata(
|
|
hashes=[file_hash],
|
|
include_service_keys_to_tags=False,
|
|
include_file_services=True,
|
|
include_is_trashed=True,
|
|
include_file_url=False,
|
|
include_duration=False,
|
|
include_size=False,
|
|
include_mime=False,
|
|
)
|
|
metas2 = metadata2.get("metadata", []) if isinstance(metadata2, dict) else []
|
|
if isinstance(metas2, list) and metas2:
|
|
still_current = False
|
|
for meta in metas2:
|
|
if not isinstance(meta, dict):
|
|
continue
|
|
if meta.get("file_id") is None:
|
|
continue
|
|
if isinstance(meta.get("file_services"), dict):
|
|
if self._has_current_file_service(meta):
|
|
still_current = True
|
|
break
|
|
continue
|
|
|
|
size_val = meta.get("size")
|
|
if size_val is None:
|
|
size_val = meta.get("size_bytes")
|
|
try:
|
|
size_int = int(size_val) if size_val is not None else 0
|
|
except Exception:
|
|
size_int = 0
|
|
if size_int > 0:
|
|
still_current = True
|
|
break
|
|
if not still_current:
|
|
file_exists = False
|
|
except Exception:
|
|
# If re-check fails, keep prior behavior (avoid forcing uploads in unknown states)
|
|
pass
|
|
|
|
# Upload file if not already present
|
|
if not file_exists:
|
|
debug(
|
|
f"{self._log_prefix()} Uploading: {file_path.name}"
|
|
)
|
|
response = client.add_file(file_path)
|
|
|
|
# Extract hash from response
|
|
hydrus_hash: Optional[str] = None
|
|
if isinstance(response, dict):
|
|
hydrus_hash = response.get("hash") or response.get("file_hash")
|
|
if not hydrus_hash:
|
|
hashes = response.get("hashes")
|
|
if isinstance(hashes, list) and hashes:
|
|
hydrus_hash = hashes[0]
|
|
|
|
if isinstance(hydrus_hash, (bytes, bytearray)):
|
|
try:
|
|
hydrus_hash = bytes(hydrus_hash).hex()
|
|
except Exception:
|
|
hydrus_hash = None
|
|
|
|
if hydrus_hash:
|
|
try:
|
|
hydrus_hash = str(hydrus_hash).strip().lower()
|
|
except Exception:
|
|
hydrus_hash = None
|
|
|
|
if not hydrus_hash or len(str(hydrus_hash)) != 64:
|
|
debug(
|
|
f"{self._log_prefix()} Hydrus response hash missing/invalid; using precomputed hash"
|
|
)
|
|
hydrus_hash = file_hash
|
|
|
|
if not hydrus_hash:
|
|
raise Exception(f"Hydrus response missing file hash: {response}")
|
|
|
|
file_hash = hydrus_hash
|
|
debug(f"{self._log_prefix()} hash: {file_hash}")
|
|
|
|
# Add tags if provided (both for new and existing files)
|
|
if tag_list:
|
|
try:
|
|
# Use default tag service
|
|
service_name = "my tags"
|
|
except Exception:
|
|
service_name = "my tags"
|
|
|
|
try:
|
|
debug(
|
|
f"{self._log_prefix()} Adding {len(tag_list)} tag(s): {tag_list}"
|
|
)
|
|
client.add_tag(file_hash, tag_list, service_name)
|
|
debug(
|
|
f"{self._log_prefix()} Tags added via '{service_name}'"
|
|
)
|
|
except Exception as exc:
|
|
log(
|
|
f"{self._log_prefix()} ⚠️ Failed to add tags: {exc}",
|
|
file=sys.stderr
|
|
)
|
|
|
|
# Associate url if provided (both for new and existing files)
|
|
if url:
|
|
debug(
|
|
f"{self._log_prefix()} Associating {len(url)} URL(s) with file"
|
|
)
|
|
for url in url:
|
|
if url:
|
|
try:
|
|
client.associate_url(file_hash, str(url))
|
|
debug(f"{self._log_prefix()} Associated URL: {url}")
|
|
except Exception as exc:
|
|
log(
|
|
f"{self._log_prefix()} ⚠️ Failed to associate URL {url}: {exc}",
|
|
file=sys.stderr,
|
|
)
|
|
|
|
return file_hash
|
|
|
|
except Exception as exc:
|
|
log(f"{self._log_prefix()} ❌ upload failed: {exc}", file=sys.stderr)
|
|
raise
|
|
|
|
def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
|
|
"""Search Hydrus database for files matching query.
|
|
|
|
Args:
|
|
query: Search query (tags, filenames, hashes, etc.)
|
|
limit: Maximum number of results to return (default: 100)
|
|
|
|
Returns:
|
|
List of dicts with 'name', 'hash', 'size', 'tags' fields
|
|
|
|
Example:
|
|
results = storage["hydrus"].search("artist:john_doe music")
|
|
results = storage["hydrus"].search("Simple Man")
|
|
"""
|
|
limit = kwargs.get("limit", 100)
|
|
minimal = bool(kwargs.get("minimal", False))
|
|
url_only = bool(kwargs.get("url_only", False))
|
|
|
|
try:
|
|
client = self._client
|
|
if client is None:
|
|
raise Exception("Hydrus client unavailable")
|
|
|
|
prefix = self._log_prefix()
|
|
debug(f"{prefix} Searching for: {query}")
|
|
|
|
def _extract_urls(meta_obj: Any) -> list[str]:
|
|
if not isinstance(meta_obj, dict):
|
|
return []
|
|
raw = meta_obj.get("known_urls")
|
|
if raw is None:
|
|
raw = meta_obj.get("url")
|
|
if raw is None:
|
|
raw = meta_obj.get("urls")
|
|
if isinstance(raw, str):
|
|
val = raw.strip()
|
|
return [val] if val else []
|
|
if isinstance(raw, list):
|
|
out: list[str] = []
|
|
for item in raw:
|
|
if not isinstance(item, str):
|
|
continue
|
|
s = item.strip()
|
|
if s:
|
|
out.append(s)
|
|
return out
|
|
return []
|
|
|
|
def _extract_search_ids(payload: Any) -> tuple[list[int], list[str]]:
|
|
if not isinstance(payload, dict):
|
|
return [], []
|
|
raw_ids = payload.get("file_ids", [])
|
|
raw_hashes = payload.get("hashes", [])
|
|
ids_out: list[int] = []
|
|
hashes_out: list[str] = []
|
|
if isinstance(raw_ids, list):
|
|
for item in raw_ids:
|
|
try:
|
|
if isinstance(item, (int, float)):
|
|
ids_out.append(int(item))
|
|
continue
|
|
if isinstance(item, str) and item.strip().isdigit():
|
|
ids_out.append(int(item.strip()))
|
|
except Exception:
|
|
continue
|
|
if isinstance(raw_hashes, list):
|
|
for item in raw_hashes:
|
|
try:
|
|
candidate = str(item or "").strip().lower()
|
|
if candidate:
|
|
hashes_out.append(candidate)
|
|
except Exception:
|
|
continue
|
|
return ids_out, hashes_out
|
|
|
|
def _iter_url_filtered_metadata(
|
|
url_value: str | None,
|
|
want_any: bool,
|
|
fetch_limit: int,
|
|
scan_limit: int | None = None,
|
|
needles: Optional[Sequence[str]] = None,
|
|
*,
|
|
minimal: bool = False,
|
|
) -> list[dict[str, Any]]:
|
|
"""Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""
|
|
|
|
candidate_file_ids: list[int] = []
|
|
candidate_hashes: list[str] = []
|
|
seen_file_ids: set[int] = set()
|
|
seen_hashes: set[str] = set()
|
|
|
|
def _add_candidates(ids: list[int], hashes: list[str]) -> None:
|
|
for fid in ids:
|
|
if fid in seen_file_ids:
|
|
continue
|
|
seen_file_ids.add(fid)
|
|
candidate_file_ids.append(fid)
|
|
for hh in hashes:
|
|
if hh in seen_hashes:
|
|
continue
|
|
seen_hashes.add(hh)
|
|
candidate_hashes.append(hh)
|
|
|
|
predicate_supported = getattr(self, "_has_url_predicate", None)
|
|
if predicate_supported is not False:
|
|
try:
|
|
predicate = "system:has url"
|
|
url_search = client.search_files(
|
|
tags=[predicate],
|
|
return_hashes=True,
|
|
return_file_ids=False,
|
|
return_file_count=False,
|
|
)
|
|
ids, hashes = _extract_search_ids(url_search)
|
|
_add_candidates(ids, hashes)
|
|
self._has_url_predicate = True
|
|
except Exception as exc:
|
|
try:
|
|
from API.HydrusNetwork import HydrusRequestError
|
|
|
|
if isinstance(exc, HydrusRequestError) and getattr(exc, "status", None) == 400:
|
|
self._has_url_predicate = False
|
|
except Exception:
|
|
pass
|
|
|
|
if not candidate_file_ids and not candidate_hashes:
|
|
everything = client.search_files(
|
|
tags=["system:everything"],
|
|
return_hashes=True,
|
|
return_file_ids=False,
|
|
return_file_count=False,
|
|
)
|
|
ids, hashes = _extract_search_ids(everything)
|
|
_add_candidates(ids, hashes)
|
|
|
|
if not candidate_file_ids and not candidate_hashes:
|
|
return []
|
|
|
|
needle_list: list[str] = []
|
|
if isinstance(needles, (list, tuple, set)):
|
|
for item in needles:
|
|
text = str(item or "").strip().lower()
|
|
if text and text not in needle_list:
|
|
needle_list.append(text)
|
|
if not needle_list:
|
|
needle = (url_value or "").strip().lower()
|
|
if needle:
|
|
needle_list = [needle]
|
|
chunk_size = 200
|
|
out: list[dict[str, Any]] = []
|
|
if scan_limit is None:
|
|
try:
|
|
if not want_any and needle_list:
|
|
if len(needle_list) > 1:
|
|
scan_limit = max(int(fetch_limit) * 20, 2000)
|
|
else:
|
|
scan_limit = max(200, min(int(fetch_limit), 400))
|
|
else:
|
|
scan_limit = max(int(fetch_limit) * 5, 1000)
|
|
except Exception:
|
|
scan_limit = 400 if (not want_any and needle_list) else 1000
|
|
if scan_limit is not None:
|
|
scan_limit = min(int(scan_limit), 10000)
|
|
scanned = 0
|
|
|
|
def _process_source(items: list[Any], kind: str) -> None:
|
|
nonlocal scanned
|
|
for start in range(0, len(items), chunk_size):
|
|
if len(out) >= fetch_limit:
|
|
return
|
|
if scan_limit is not None and scanned >= scan_limit:
|
|
return
|
|
chunk = items[start:start + chunk_size]
|
|
if scan_limit is not None:
|
|
remaining = scan_limit - scanned
|
|
if remaining <= 0:
|
|
return
|
|
if len(chunk) > remaining:
|
|
chunk = chunk[:remaining]
|
|
scanned += len(chunk)
|
|
try:
|
|
if kind == "hashes":
|
|
payload = client.fetch_file_metadata(
|
|
hashes=chunk,
|
|
include_file_url=True,
|
|
include_service_keys_to_tags=not minimal,
|
|
include_duration=not minimal,
|
|
include_size=not minimal,
|
|
include_mime=not minimal,
|
|
)
|
|
else:
|
|
payload = client.fetch_file_metadata(
|
|
file_ids=chunk,
|
|
include_file_url=True,
|
|
include_service_keys_to_tags=not minimal,
|
|
include_duration=not minimal,
|
|
include_size=not minimal,
|
|
include_mime=not minimal,
|
|
)
|
|
except Exception:
|
|
continue
|
|
|
|
metas = payload.get("metadata",
|
|
[]) if isinstance(payload,
|
|
dict) else []
|
|
if not isinstance(metas, list):
|
|
continue
|
|
|
|
for meta in metas:
|
|
if len(out) >= fetch_limit:
|
|
break
|
|
if not isinstance(meta, dict):
|
|
continue
|
|
urls = _extract_urls(meta)
|
|
if not urls:
|
|
continue
|
|
if want_any:
|
|
out.append(meta)
|
|
continue
|
|
if not needle_list:
|
|
continue
|
|
if any(any(n in u.lower() for n in needle_list) for u in urls):
|
|
out.append(meta)
|
|
continue
|
|
|
|
sources: list[tuple[str, list[Any]]] = []
|
|
if candidate_hashes:
|
|
sources.append(("hashes", candidate_hashes))
|
|
elif candidate_file_ids:
|
|
sources.append(("file_ids", candidate_file_ids))
|
|
|
|
for kind, items in sources:
|
|
if len(out) >= fetch_limit:
|
|
break
|
|
_process_source(items, kind)
|
|
|
|
return out
|
|
|
|
def _search_url_query_metadata(
|
|
url_query: str,
|
|
fetch_limit: int,
|
|
*,
|
|
minimal: bool = False,
|
|
) -> list[dict[str, Any]]:
|
|
"""Run a strict url:<pattern> search without falling back to system predicates."""
|
|
|
|
if not url_query:
|
|
return []
|
|
|
|
try:
|
|
payload = client.search_files(
|
|
tags=[url_query],
|
|
return_hashes=True,
|
|
return_file_ids=True,
|
|
)
|
|
except Exception:
|
|
return []
|
|
|
|
candidate_ids, candidate_hashes = _extract_search_ids(payload)
|
|
if not candidate_ids and not candidate_hashes:
|
|
return []
|
|
|
|
metas_out: list[dict[str, Any]] = []
|
|
chunk_size = 200
|
|
|
|
def _fetch_chunk(kind: Literal["file_ids", "hashes"], values: list[Any]) -> None:
|
|
nonlocal metas_out
|
|
if not values or len(metas_out) >= fetch_limit:
|
|
return
|
|
for start in range(0, len(values), chunk_size):
|
|
if len(metas_out) >= fetch_limit:
|
|
break
|
|
remaining = fetch_limit - len(metas_out)
|
|
if remaining <= 0:
|
|
break
|
|
end = start + min(chunk_size, remaining)
|
|
chunk = values[start:end]
|
|
if not chunk:
|
|
continue
|
|
try:
|
|
if kind == "file_ids":
|
|
metadata = client.fetch_file_metadata(
|
|
file_ids=chunk,
|
|
include_file_url=True,
|
|
include_service_keys_to_tags=False,
|
|
include_duration=False,
|
|
include_size=not minimal,
|
|
include_mime=False,
|
|
)
|
|
else:
|
|
metadata = client.fetch_file_metadata(
|
|
hashes=chunk,
|
|
include_file_url=True,
|
|
include_service_keys_to_tags=False,
|
|
include_duration=False,
|
|
include_size=not minimal,
|
|
include_mime=False,
|
|
)
|
|
except Exception:
|
|
continue
|
|
|
|
fetched = metadata.get("metadata", []) if isinstance(metadata, dict) else []
|
|
if not isinstance(fetched, list):
|
|
continue
|
|
for meta in fetched:
|
|
if len(metas_out) >= fetch_limit:
|
|
break
|
|
if not isinstance(meta, dict):
|
|
continue
|
|
metas_out.append(meta)
|
|
|
|
if candidate_ids:
|
|
_fetch_chunk("file_ids", candidate_ids)
|
|
if len(metas_out) < fetch_limit and candidate_hashes:
|
|
_fetch_chunk("hashes", candidate_hashes)
|
|
|
|
return metas_out[:fetch_limit]
|
|
|
|
query_lower = query.lower().strip()
|
|
|
|
# Support `ext:<value>` anywhere in the query. We filter results by the
|
|
# Hydrus metadata extension field.
|
|
def _normalize_ext_filter(value: str) -> str:
|
|
v = str(value or "").strip().lower().lstrip(".")
|
|
v = "".join(ch for ch in v if ch.isalnum())
|
|
return v
|
|
|
|
ext_filter: str | None = None
|
|
ext_only: bool = False
|
|
try:
|
|
m = re.search(r"\bext:([^\s,]+)", query_lower)
|
|
if not m:
|
|
m = re.search(r"\bextension:([^\s,]+)", query_lower)
|
|
if m:
|
|
ext_filter = _normalize_ext_filter(m.group(1)) or None
|
|
query_lower = re.sub(
|
|
r"\s*\b(?:ext|extension):[^\s,]+",
|
|
" ",
|
|
query_lower
|
|
)
|
|
query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(",")
|
|
query = query_lower
|
|
if ext_filter and not query_lower:
|
|
query = "*"
|
|
query_lower = "*"
|
|
ext_only = True
|
|
except Exception:
|
|
ext_filter = None
|
|
ext_only = False
|
|
|
|
# Split into meaningful terms for AND logic.
|
|
# Avoid punctuation tokens like '-' that would make matching brittle.
|
|
search_terms = [t for t in re.findall(r"[a-z0-9]+", query_lower) if t]
|
|
|
|
# Special case: url:* and url:<value>
|
|
metadata_list: list[dict[str, Any]] | None = None
|
|
pattern_hint_raw = kwargs.get("pattern_hint")
|
|
pattern_hints: list[str] = []
|
|
if isinstance(pattern_hint_raw, (list, tuple, set)):
|
|
for item in pattern_hint_raw:
|
|
text = str(item or "").strip().lower()
|
|
if text and text not in pattern_hints:
|
|
pattern_hints.append(text)
|
|
elif isinstance(pattern_hint_raw, str):
|
|
text = pattern_hint_raw.strip().lower()
|
|
if text:
|
|
pattern_hints.append(text)
|
|
pattern_hint = pattern_hints[0] if pattern_hints else ""
|
|
|
|
hashes: list[str] = []
|
|
file_ids: list[int] = []
|
|
|
|
if ":" in query_lower and not query_lower.startswith(":"):
|
|
namespace, pattern = query_lower.split(":", 1)
|
|
namespace = namespace.strip().lower()
|
|
pattern = pattern.strip()
|
|
if namespace == "url":
|
|
try:
|
|
fetch_limit_raw = int(limit) if limit else 100
|
|
except Exception:
|
|
fetch_limit_raw = 100
|
|
if url_only:
|
|
metadata_list = _search_url_query_metadata(
|
|
query_lower,
|
|
fetch_limit_raw,
|
|
minimal=minimal,
|
|
)
|
|
else:
|
|
if not pattern or pattern == "*":
|
|
if pattern_hints:
|
|
metadata_list = _iter_url_filtered_metadata(
|
|
None,
|
|
want_any=False,
|
|
fetch_limit=fetch_limit_raw,
|
|
needles=pattern_hints,
|
|
minimal=minimal,
|
|
)
|
|
else:
|
|
metadata_list = _iter_url_filtered_metadata(
|
|
None,
|
|
want_any=True,
|
|
fetch_limit=fetch_limit_raw,
|
|
minimal=minimal,
|
|
)
|
|
else:
|
|
def _clean_url_search_token(value: str | None) -> str:
|
|
token = str(value or "").strip().lower()
|
|
if not token:
|
|
return ""
|
|
return token.replace("*", "").replace("?", "")
|
|
|
|
# Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
|
|
try:
|
|
if pattern.startswith("http://") or pattern.startswith(
|
|
"https://"):
|
|
from API.HydrusNetwork import HydrusRequestSpec
|
|
|
|
spec = HydrusRequestSpec(
|
|
method="GET",
|
|
endpoint="/add_urls/get_url_files",
|
|
query={
|
|
"url": pattern
|
|
},
|
|
)
|
|
response = client._perform_request(
|
|
spec
|
|
) # type: ignore[attr-defined]
|
|
hashes = []
|
|
file_ids = []
|
|
if isinstance(response, dict):
|
|
raw_hashes = response.get("hashes") or response.get(
|
|
"file_hashes"
|
|
)
|
|
if isinstance(raw_hashes, list):
|
|
hashes = [
|
|
str(h).strip() for h in raw_hashes
|
|
if isinstance(h, str) and str(h).strip()
|
|
]
|
|
raw_ids = response.get("file_ids")
|
|
if isinstance(raw_ids, list):
|
|
for item in raw_ids:
|
|
try:
|
|
file_ids.append(int(item))
|
|
except (TypeError, ValueError):
|
|
continue
|
|
|
|
if file_ids:
|
|
payload = client.fetch_file_metadata(
|
|
file_ids=file_ids,
|
|
include_file_url=True,
|
|
include_service_keys_to_tags=not minimal,
|
|
include_duration=not minimal,
|
|
include_size=not minimal,
|
|
include_mime=not minimal,
|
|
)
|
|
metas = (
|
|
payload.get("metadata",
|
|
[]) if isinstance(payload,
|
|
dict) else []
|
|
)
|
|
if isinstance(metas, list):
|
|
metadata_list = [
|
|
m for m in metas if isinstance(m, dict)
|
|
]
|
|
elif hashes:
|
|
payload = client.fetch_file_metadata(
|
|
hashes=hashes,
|
|
include_file_url=True,
|
|
include_service_keys_to_tags=not minimal,
|
|
include_duration=not minimal,
|
|
include_size=not minimal,
|
|
include_mime=not minimal,
|
|
)
|
|
metas = (
|
|
payload.get("metadata",
|
|
[]) if isinstance(payload,
|
|
dict) else []
|
|
)
|
|
if isinstance(metas, list):
|
|
metadata_list = [
|
|
m for m in metas if isinstance(m, dict)
|
|
]
|
|
except Exception:
|
|
metadata_list = None
|
|
|
|
# Fallback: substring scan
|
|
if metadata_list is None:
|
|
search_token = _clean_url_search_token(pattern_hint or pattern)
|
|
scan_limit_override: int | None = None
|
|
if search_token:
|
|
is_domain_only = ("://" not in search_token and "/" not in search_token)
|
|
if is_domain_only:
|
|
try:
|
|
scan_limit_override = max(fetch_limit_raw * 20, 2000)
|
|
except Exception:
|
|
scan_limit_override = 2000
|
|
metadata_list = _iter_url_filtered_metadata(
|
|
search_token,
|
|
want_any=False,
|
|
fetch_limit=fetch_limit_raw,
|
|
scan_limit=scan_limit_override,
|
|
needles=pattern_hints if pattern_hints else None,
|
|
minimal=minimal,
|
|
)
|
|
elif namespace == "system":
|
|
normalized_system_predicate = pattern.strip()
|
|
if normalized_system_predicate == "has url":
|
|
try:
|
|
fetch_limit = int(limit) if limit else 100
|
|
except Exception:
|
|
fetch_limit = 100
|
|
metadata_list = _iter_url_filtered_metadata(
|
|
None,
|
|
want_any=not bool(pattern_hints),
|
|
fetch_limit=fetch_limit,
|
|
needles=pattern_hints if pattern_hints else None,
|
|
minimal=minimal,
|
|
)
|
|
|
|
# Parse the query into tags
|
|
# "*" means "match all" - use system:everything tag in Hydrus
|
|
# If query has explicit namespace, use it as a tag search.
|
|
# If query is free-form, search BOTH:
|
|
# - title:*term* (title: is the only namespace searched implicitly)
|
|
# - *term* (freeform tags; we will filter out other namespace matches client-side)
|
|
tags: list[str] = []
|
|
freeform_union_search: bool = False
|
|
title_predicates: list[str] = []
|
|
freeform_predicates: list[str] = []
|
|
|
|
if query.strip() == "*":
|
|
tags = ["system:everything"]
|
|
elif ":" in query_lower:
|
|
tags = [query_lower]
|
|
else:
|
|
freeform_union_search = True
|
|
if search_terms:
|
|
# Hydrus supports wildcard matching primarily as a prefix (e.g., tag*).
|
|
# Use per-term prefix matching for both title: and freeform tags.
|
|
title_predicates = [f"title:{term}*" for term in search_terms]
|
|
freeform_predicates = [f"{term}*" for term in search_terms]
|
|
else:
|
|
# If we can't extract alnum terms, fall back to the raw query text.
|
|
title_predicates = [f"title:{query_lower}*"]
|
|
freeform_predicates = [f"{query_lower}*"]
|
|
|
|
# Search files with the tags (unless url: search already produced metadata)
|
|
results: list[dict[str, Any]] = []
|
|
|
|
if metadata_list is None:
|
|
file_ids = []
|
|
hashes = []
|
|
|
|
if freeform_union_search:
|
|
if not title_predicates and not freeform_predicates:
|
|
debug(f"{prefix} 0 result(s)")
|
|
return []
|
|
|
|
payloads: list[Any] = []
|
|
try:
|
|
payloads.append(
|
|
client.search_files(
|
|
tags=title_predicates,
|
|
return_hashes=True,
|
|
return_file_ids=True,
|
|
)
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
# Extra pass: match a full title phrase when the query includes
|
|
# spaces or punctuation (e.g., "i've been down").
|
|
try:
|
|
if query_lower and query_lower != "*" and "*" not in query_lower:
|
|
if any(ch in query_lower for ch in (" ", "'", "-", "_")):
|
|
payloads.append(
|
|
client.search_files(
|
|
tags=[f"title:{query_lower}*"],
|
|
return_hashes=True,
|
|
return_file_ids=True,
|
|
)
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
try:
|
|
title_ids, title_hashes = _extract_search_ids(
|
|
payloads[0] if payloads else None
|
|
)
|
|
# Optimization: for single-term queries, skip the freeform query
|
|
# to avoid duplicate requests.
|
|
single_term = bool(search_terms and len(search_terms) == 1)
|
|
if not single_term:
|
|
payloads.append(
|
|
client.search_files(
|
|
tags=freeform_predicates,
|
|
return_hashes=True,
|
|
return_file_ids=True,
|
|
)
|
|
)
|
|
except Exception:
|
|
pass
|
|
|
|
id_set: set[int] = set()
|
|
hash_set: set[str] = set()
|
|
for payload in payloads:
|
|
ids_part, hashes_part = _extract_search_ids(payload)
|
|
for fid in ids_part:
|
|
id_set.add(fid)
|
|
for hh in hashes_part:
|
|
hash_set.add(hh)
|
|
file_ids = list(id_set)
|
|
hashes = list(hash_set)
|
|
else:
|
|
if not tags:
|
|
debug(f"{prefix} 0 result(s)")
|
|
return []
|
|
|
|
search_result = client.search_files(
|
|
tags=tags,
|
|
return_hashes=True,
|
|
return_file_ids=True
|
|
)
|
|
file_ids, hashes = _extract_search_ids(search_result)
|
|
|
|
# Fast path: ext-only search. Avoid fetching metadata for an unbounded
|
|
# system:everything result set; fetch in chunks until we have enough.
|
|
if ext_only and ext_filter:
|
|
results = []
|
|
if not file_ids and not hashes:
|
|
debug(f"{prefix} 0 result(s)")
|
|
return []
|
|
|
|
# Prefer file_ids if available.
|
|
if file_ids:
|
|
chunk_size = 200
|
|
for start in range(0, len(file_ids), chunk_size):
|
|
if len(results) >= limit:
|
|
break
|
|
chunk = file_ids[start:start + chunk_size]
|
|
try:
|
|
payload = client.fetch_file_metadata(
|
|
file_ids=chunk,
|
|
include_service_keys_to_tags=True,
|
|
include_file_url=True,
|
|
include_duration=True,
|
|
include_size=True,
|
|
include_mime=True,
|
|
)
|
|
except Exception:
|
|
continue
|
|
metas = payload.get("metadata",
|
|
[]) if isinstance(payload,
|
|
dict) else []
|
|
if not isinstance(metas, list):
|
|
continue
|
|
for meta in metas:
|
|
if len(results) >= limit:
|
|
break
|
|
if not isinstance(meta, dict):
|
|
continue
|
|
mime_type = meta.get("mime")
|
|
ext = _resolve_ext_from_meta(meta, mime_type)
|
|
if _normalize_ext_filter(ext) != ext_filter:
|
|
continue
|
|
|
|
file_id = meta.get("file_id")
|
|
hash_hex = meta.get("hash")
|
|
size_val = meta.get("size")
|
|
if size_val is None:
|
|
size_val = meta.get("size_bytes")
|
|
try:
|
|
size = int(size_val) if size_val is not None else 0
|
|
except Exception:
|
|
size = 0
|
|
|
|
title, all_tags = self._extract_title_and_tags(meta, file_id)
|
|
|
|
# Use known URLs (source URLs) from Hydrus if available (matches get-url cmdlet)
|
|
item_url = meta.get("known_urls") or meta.get("urls") or meta.get("url") or []
|
|
if not item_url:
|
|
item_url = meta.get("file_url") or f"{self.URL.rstrip('/')}/view_file?hash={hash_hex}"
|
|
if isinstance(item_url, str) and "/view_file" in item_url:
|
|
item_url = self._append_access_key(item_url)
|
|
|
|
results.append(
|
|
{
|
|
"hash": hash_hex,
|
|
"url": item_url,
|
|
"name": title,
|
|
"title": title,
|
|
"size": size,
|
|
"size_bytes": size,
|
|
"store": self.NAME,
|
|
"tag": all_tags,
|
|
"file_id": file_id,
|
|
"mime": mime_type,
|
|
"ext": _resolve_ext_from_meta(meta, mime_type),
|
|
}
|
|
)
|
|
|
|
debug(f"{prefix} {len(results)} result(s)")
|
|
return results[:limit]
|
|
|
|
# If we only got hashes, fall back to the normal flow below.
|
|
|
|
if not file_ids and not hashes:
|
|
debug(f"{prefix} 0 result(s)")
|
|
return []
|
|
|
|
if file_ids:
|
|
metadata = client.fetch_file_metadata(
|
|
file_ids=file_ids,
|
|
include_service_keys_to_tags=True,
|
|
include_file_url=True,
|
|
include_duration=True,
|
|
include_size=True,
|
|
include_mime=True,
|
|
)
|
|
metadata_list = metadata.get("metadata", [])
|
|
elif hashes:
|
|
metadata = client.fetch_file_metadata(
|
|
hashes=hashes,
|
|
include_service_keys_to_tags=True,
|
|
include_file_url=True,
|
|
include_duration=True,
|
|
include_size=True,
|
|
include_mime=True,
|
|
)
|
|
metadata_list = metadata.get("metadata", [])
|
|
else:
|
|
metadata_list = []
|
|
|
|
# If our free-text searches produce nothing (or nothing survived downstream filtering), fallback to scanning.
|
|
if (not metadata_list) and (query_lower
|
|
!= "*") and (":" not in query_lower):
|
|
try:
|
|
search_result = client.search_files(
|
|
tags=["system:everything"],
|
|
return_hashes=True,
|
|
return_file_ids=True,
|
|
)
|
|
file_ids, hashes = _extract_search_ids(search_result)
|
|
|
|
if file_ids:
|
|
metadata = client.fetch_file_metadata(
|
|
file_ids=file_ids,
|
|
include_service_keys_to_tags=True,
|
|
include_file_url=True,
|
|
include_duration=True,
|
|
include_size=True,
|
|
include_mime=True,
|
|
)
|
|
metadata_list = metadata.get("metadata", [])
|
|
elif hashes:
|
|
metadata = client.fetch_file_metadata(
|
|
hashes=hashes,
|
|
include_service_keys_to_tags=True,
|
|
include_file_url=True,
|
|
include_duration=True,
|
|
include_size=True,
|
|
include_mime=True,
|
|
)
|
|
metadata_list = metadata.get("metadata", [])
|
|
except Exception:
|
|
pass
|
|
|
|
if not isinstance(metadata_list, list):
|
|
metadata_list = []
|
|
|
|
for meta in metadata_list:
|
|
if len(results) >= limit:
|
|
break
|
|
|
|
file_id = meta.get("file_id")
|
|
hash_hex = meta.get("hash")
|
|
size_val = meta.get("size")
|
|
if size_val is None:
|
|
size_val = meta.get("size_bytes")
|
|
try:
|
|
size = int(size_val) if size_val is not None else 0
|
|
except Exception:
|
|
size = 0
|
|
|
|
title, all_tags = self._extract_title_and_tags(meta, file_id)
|
|
|
|
# Prefer Hydrus-provided extension (e.g. ".webm"); fall back to MIME map.
|
|
mime_type = meta.get("mime")
|
|
ext = _resolve_ext_from_meta(meta, mime_type)
|
|
|
|
# Filter results based on query type
|
|
# If user provided explicit namespace (has ':'), don't do substring filtering
|
|
# Just include what the tag search returned
|
|
has_namespace = ":" in query_lower
|
|
|
|
# Use known URLs (source URLs) from Hydrus if available (matches get-url cmdlet)
|
|
item_url = meta.get("known_urls") or meta.get("urls") or meta.get("url") or []
|
|
if not item_url:
|
|
item_url = meta.get("file_url") or f"{self.URL.rstrip('/')}/view_file?hash={hash_hex}"
|
|
if isinstance(item_url, str) and "/view_file" in item_url:
|
|
item_url = self._append_access_key(item_url)
|
|
|
|
if has_namespace:
|
|
# Explicit namespace search - already filtered by Hydrus tag search
|
|
# Include this result as-is
|
|
results.append(
|
|
{
|
|
"hash": hash_hex,
|
|
"url": item_url,
|
|
"name": title,
|
|
"title": title,
|
|
"size": size,
|
|
"size_bytes": size,
|
|
"store": self.NAME,
|
|
"tag": all_tags,
|
|
"file_id": file_id,
|
|
"mime": mime_type,
|
|
"ext": ext,
|
|
}
|
|
)
|
|
else:
|
|
# Free-form search: check if search terms match title or FREEFORM tags.
|
|
# Do NOT implicitly match other namespace tags (except title:).
|
|
freeform_tags = [
|
|
t for t in all_tags
|
|
if isinstance(t, str) and t and (":" not in t)
|
|
]
|
|
searchable_text = (title + " " + " ".join(freeform_tags)).lower()
|
|
|
|
match = True
|
|
if query_lower != "*" and search_terms:
|
|
for term in search_terms:
|
|
if term not in searchable_text:
|
|
match = False
|
|
break
|
|
|
|
if match:
|
|
results.append(
|
|
{
|
|
"hash": hash_hex,
|
|
"url": item_url,
|
|
"name": title,
|
|
"title": title,
|
|
"size": size,
|
|
"size_bytes": size,
|
|
"store": self.NAME,
|
|
"tag": all_tags,
|
|
"file_id": file_id,
|
|
"mime": mime_type,
|
|
"ext": ext,
|
|
}
|
|
)
|
|
|
|
debug(f"{prefix} {len(results)} result(s)")
|
|
if ext_filter:
|
|
wanted = ext_filter
|
|
filtered: list[dict[str, Any]] = []
|
|
for item in results:
|
|
try:
|
|
if _normalize_ext_filter(str(item.get("ext") or "")) == wanted:
|
|
filtered.append(item)
|
|
except Exception:
|
|
continue
|
|
results = filtered
|
|
|
|
return results[:limit]
|
|
|
|
except Exception as exc:
|
|
log(f"❌ Hydrus search failed: {exc}", file=sys.stderr)
|
|
import traceback
|
|
|
|
traceback.print_exc(file=sys.stderr)
|
|
raise
|
|
|
|
def get_file(self, file_hash: str, **kwargs: Any) -> Path | str | None:
|
|
"""Return the local file system path if available, else a browser URL.
|
|
|
|
IMPORTANT: this method must be side-effect free (do not auto-open a browser).
|
|
Only explicit user actions (e.g. the get-file cmdlet) should open files.
|
|
"""
|
|
file_hash = str(file_hash or "").strip().lower()
|
|
debug(f"{self._log_prefix()} get_file(hash={file_hash[:12]}..., url={kwargs.get('url')})")
|
|
|
|
# If 'url=True' is passed, we preference the browser URL even if a local path is available.
|
|
# This is typically used by the 'get-file' cmdlet for interactive viewing.
|
|
if kwargs.get("url"):
|
|
base_url = str(self.URL).rstrip("/")
|
|
access_key = str(self.API)
|
|
browser_url = (
|
|
f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
|
|
)
|
|
debug(f"{self._log_prefix()} get_file: returning browser URL per request: {browser_url}")
|
|
return browser_url
|
|
|
|
# Try to get the local disk path if possible (works if Hydrus is on same machine)
|
|
server_path = None
|
|
try:
|
|
path_res = self._client.get_file_path(file_hash)
|
|
if isinstance(path_res, dict) and "path" in path_res:
|
|
server_path = path_res["path"]
|
|
if server_path:
|
|
local_path = Path(server_path)
|
|
if local_path.exists():
|
|
debug(f"{self._log_prefix()} get_file: found local path: {local_path}")
|
|
return local_path
|
|
except Exception as e:
|
|
debug(f"{self._log_prefix()} get_file: could not resolve path from API: {e}")
|
|
|
|
# If we found a path on the server but it's not locally accessible,
|
|
# keep it for logging but continue to the browser URL fallback so the UI
|
|
# can still open the file via the Hydrus web UI.
|
|
if server_path:
|
|
debug(
|
|
f"{self._log_prefix()} get_file: server path not locally accessible, falling back to HTTP: {server_path}"
|
|
)
|
|
|
|
# Fallback to browser URL with access key
|
|
base_url = str(self.URL).rstrip("/")
|
|
access_key = str(self.API)
|
|
browser_url = (
|
|
f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
|
|
)
|
|
debug(f"{self._log_prefix()} get_file: falling back to url={browser_url}")
|
|
return browser_url
|
|
|
|
def download_to_temp(
|
|
self,
|
|
file_hash: str,
|
|
*,
|
|
temp_root: Optional[Path] = None,
|
|
) -> Optional[Path]:
|
|
"""Download a Hydrus file to a temporary path for downstream uploads."""
|
|
|
|
try:
|
|
client = self._client
|
|
if client is None:
|
|
return None
|
|
|
|
h = str(file_hash or "").strip().lower()
|
|
if len(h) != 64 or not all(ch in "0123456789abcdef" for ch in h):
|
|
return None
|
|
|
|
created_tmp = False
|
|
base_tmp = Path(temp_root) if temp_root is not None else Path(
|
|
tempfile.mkdtemp(prefix="hydrus-file-")
|
|
)
|
|
if temp_root is None:
|
|
created_tmp = True
|
|
base_tmp.mkdir(parents=True, exist_ok=True)
|
|
|
|
def _safe_filename(raw: str) -> str:
|
|
cleaned = re.sub(r"[\\/:*?\"<>|]", "_", str(raw or "")).strip()
|
|
if not cleaned:
|
|
return h
|
|
cleaned = cleaned.strip(". ") or h
|
|
return cleaned
|
|
|
|
# Prefer ext/title from metadata when available.
|
|
fname = h
|
|
ext_val = ""
|
|
try:
|
|
meta = self.get_metadata(h) or {}
|
|
if isinstance(meta, dict):
|
|
title_val = str(meta.get("title") or "").strip()
|
|
if title_val:
|
|
fname = _safe_filename(title_val)
|
|
ext_val = str(meta.get("ext") or "").strip().lstrip(".")
|
|
except Exception:
|
|
pass
|
|
|
|
if not fname:
|
|
fname = h
|
|
if ext_val and not fname.lower().endswith(f".{ext_val.lower()}"):
|
|
fname = f"{fname}.{ext_val}"
|
|
|
|
try:
|
|
file_url = client.file_url(h)
|
|
except Exception:
|
|
file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={quote(h)}"
|
|
|
|
dest_path = base_tmp / fname
|
|
stream_client = get_shared_httpx_client(timeout=60.0, verify_ssl=False)
|
|
with stream_client.stream(
|
|
"GET",
|
|
file_url,
|
|
headers={"Hydrus-Client-API-Access-Key": self.API},
|
|
follow_redirects=True,
|
|
timeout=60.0,
|
|
) as resp:
|
|
resp.raise_for_status()
|
|
with dest_path.open("wb") as fh:
|
|
for chunk in resp.iter_bytes():
|
|
if chunk:
|
|
fh.write(chunk)
|
|
|
|
if dest_path.exists():
|
|
return dest_path
|
|
|
|
if created_tmp:
|
|
try:
|
|
shutil.rmtree(base_tmp, ignore_errors=True)
|
|
except Exception:
|
|
pass
|
|
return None
|
|
except Exception as exc:
|
|
log(f"{self._log_prefix()} download_to_temp failed: {exc}", file=sys.stderr)
|
|
try:
|
|
if temp_root is None and "base_tmp" in locals():
|
|
shutil.rmtree(base_tmp, ignore_errors=True) # type: ignore[arg-type]
|
|
except Exception:
|
|
pass
|
|
return None
|
|
|
|
def delete_file(self, file_identifier: str, **kwargs: Any) -> bool:
|
|
"""Delete a file from Hydrus, then clear the deletion record.
|
|
|
|
This is used by the delete-file cmdlet when the item belongs to a HydrusNetwork store.
|
|
"""
|
|
try:
|
|
client = self._client
|
|
if client is None:
|
|
debug(f"{self._log_prefix()} delete_file: client unavailable")
|
|
return False
|
|
|
|
file_hash = str(file_identifier or "").strip().lower()
|
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
|
|
for ch in file_hash):
|
|
debug(
|
|
f"{self._log_prefix()} delete_file: invalid file hash '{file_identifier}'"
|
|
)
|
|
return False
|
|
|
|
reason = kwargs.get("reason")
|
|
reason_text = (
|
|
str(reason).strip() if isinstance(reason,
|
|
str) and reason.strip() else None
|
|
)
|
|
|
|
# 1) Delete file
|
|
client.delete_files([file_hash], reason=reason_text)
|
|
|
|
# 2) Clear deletion record (best-effort)
|
|
try:
|
|
client.clear_file_deletion_record([file_hash])
|
|
except Exception as exc:
|
|
debug(
|
|
f"{self._log_prefix()} delete_file: clear_file_deletion_record failed: {exc}"
|
|
)
|
|
|
|
return True
|
|
except Exception as exc:
|
|
debug(f"{self._log_prefix()} delete_file failed: {exc}")
|
|
return False
|
|
|
|
def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
|
|
"""Get metadata for a file from Hydrus by hash.
|
|
|
|
Args:
|
|
file_hash: SHA256 hash of the file (64-char hex string)
|
|
|
|
Returns:
|
|
Dict with metadata fields or None if not found
|
|
"""
|
|
try:
|
|
client = self._client
|
|
if not client:
|
|
debug(f"{self._log_prefix()} get_metadata: client unavailable")
|
|
return None
|
|
|
|
# Fetch file metadata with the fields we need for CLI display.
|
|
payload = client.fetch_file_metadata(
|
|
hashes=[file_hash],
|
|
include_service_keys_to_tags=True,
|
|
include_file_url=True,
|
|
include_duration=True,
|
|
include_size=True,
|
|
include_mime=True,
|
|
)
|
|
|
|
if not payload or not payload.get("metadata"):
|
|
return None
|
|
|
|
meta = payload["metadata"][0]
|
|
|
|
# Hydrus can return placeholder metadata rows for unknown hashes.
|
|
if not isinstance(meta, dict) or meta.get("file_id") is None:
|
|
return None
|
|
|
|
# Extract title from tags
|
|
title = f"Hydrus_{file_hash[:12]}"
|
|
extracted_tags = self._extract_tags_from_hydrus_meta(
|
|
meta,
|
|
service_key=None,
|
|
service_name="my tags",
|
|
)
|
|
for raw_tag in extracted_tags:
|
|
tag_text = str(raw_tag or "").strip()
|
|
if not tag_text:
|
|
continue
|
|
if tag_text.lower().startswith("title:"):
|
|
value = tag_text.split(":", 1)[1].strip()
|
|
if value:
|
|
title = value
|
|
break
|
|
|
|
# Hydrus may return mime as an int enum, or sometimes a human label.
|
|
mime_val = meta.get("mime")
|
|
filetype_human = (
|
|
meta.get("filetype_human") or meta.get("mime_human")
|
|
or meta.get("mime_string")
|
|
)
|
|
|
|
# Determine ext: prefer Hydrus metadata ext, then filetype_human (when it looks like an ext),
|
|
# then title suffix, then file path suffix.
|
|
ext = str(meta.get("ext") or "").strip().lstrip(".")
|
|
if not ext:
|
|
ft = str(filetype_human or "").strip().lstrip(".").lower()
|
|
if ft and ft != "unknown filetype" and ft.isalnum() and len(ft) <= 8:
|
|
# Treat simple labels like "mp4", "m4a", "webm" as extensions.
|
|
ext = ft
|
|
if not ext and isinstance(title, str) and "." in title:
|
|
try:
|
|
ext = Path(title).suffix.lstrip(".")
|
|
except Exception:
|
|
ext = ""
|
|
if not ext:
|
|
try:
|
|
path_payload = client.get_file_path(file_hash)
|
|
if isinstance(path_payload, dict):
|
|
p = path_payload.get("path")
|
|
if isinstance(p, str) and p.strip():
|
|
ext = Path(p.strip()).suffix.lstrip(".")
|
|
except Exception:
|
|
ext = ""
|
|
|
|
# If extension is still unknown, attempt a best-effort lookup from MIME.
|
|
def _mime_from_ext(ext_value: str) -> str:
|
|
ext_clean = str(ext_value or "").strip().lstrip(".").lower()
|
|
if not ext_clean:
|
|
return ""
|
|
try:
|
|
for category in mime_maps.values():
|
|
info = category.get(ext_clean)
|
|
if isinstance(info, dict):
|
|
mimes = info.get("mimes")
|
|
if isinstance(mimes, list) and mimes:
|
|
first = mimes[0]
|
|
return str(first)
|
|
except Exception:
|
|
return ""
|
|
return ""
|
|
|
|
# Normalize to a MIME string for CLI output.
|
|
# Avoid passing through human labels like "unknown filetype".
|
|
mime_type = ""
|
|
if isinstance(mime_val, str):
|
|
candidate = mime_val.strip()
|
|
if "/" in candidate and candidate.lower() != "unknown filetype":
|
|
mime_type = candidate
|
|
if not mime_type and isinstance(filetype_human, str):
|
|
candidate = filetype_human.strip()
|
|
if "/" in candidate and candidate.lower() != "unknown filetype":
|
|
mime_type = candidate
|
|
if not mime_type:
|
|
mime_type = _mime_from_ext(ext)
|
|
|
|
# Normalize size/duration to stable scalar types.
|
|
size_val = meta.get("size")
|
|
if size_val is None:
|
|
size_val = meta.get("size_bytes")
|
|
try:
|
|
size_int: int | None = int(size_val) if size_val is not None else 0
|
|
except Exception:
|
|
size_int = 0
|
|
|
|
dur_val = meta.get("duration")
|
|
if dur_val is None:
|
|
dur_val = meta.get("duration_ms")
|
|
try:
|
|
dur_int: int | None = int(dur_val) if dur_val is not None else None
|
|
except Exception:
|
|
dur_int = None
|
|
|
|
raw_urls = meta.get("known_urls") or meta.get("urls") or meta.get("url"
|
|
) or []
|
|
url_list: list[str] = []
|
|
if isinstance(raw_urls, str):
|
|
s = raw_urls.strip()
|
|
url_list = [s] if s else []
|
|
elif isinstance(raw_urls, list):
|
|
url_list = [
|
|
str(u).strip() for u in raw_urls
|
|
if isinstance(u, str) and str(u).strip()
|
|
]
|
|
|
|
return {
|
|
"hash": file_hash,
|
|
"title": title,
|
|
"ext": ext,
|
|
"size": size_int,
|
|
"mime": mime_type,
|
|
# Keep raw fields available for troubleshooting/other callers.
|
|
"hydrus_mime": mime_val,
|
|
"filetype_human": filetype_human,
|
|
"duration_ms": dur_int,
|
|
"url": url_list,
|
|
}
|
|
|
|
except Exception as exc:
|
|
debug(f"{self._log_prefix()} get_metadata failed: {exc}")
|
|
return None
|
|
|
|
def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
|
|
"""Get tags for a file from Hydrus by hash.
|
|
|
|
Args:
|
|
file_identifier: File hash (SHA256 hex string)
|
|
**kwargs: Optional service_name parameter
|
|
|
|
Returns:
|
|
Tuple of (tags_list, source_description)
|
|
where source is always "hydrus"
|
|
"""
|
|
try:
|
|
file_hash = str(file_identifier or "").strip().lower()
|
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
|
|
for ch in file_hash):
|
|
debug(
|
|
f"{self._log_prefix()} get_tags: invalid file hash '{file_identifier}'"
|
|
)
|
|
return [], "unknown"
|
|
|
|
# Get Hydrus client and service info
|
|
client = self._client
|
|
if not client:
|
|
debug(f"{self._log_prefix()} get_tags: client unavailable")
|
|
return [], "unknown"
|
|
|
|
# Fetch file metadata
|
|
payload = client.fetch_file_metadata(
|
|
hashes=[file_hash],
|
|
include_service_keys_to_tags=True,
|
|
include_file_url=True
|
|
)
|
|
|
|
items = payload.get("metadata") if isinstance(payload, dict) else None
|
|
if not isinstance(items, list) or not items:
|
|
debug(
|
|
f"{self._log_prefix()} get_tags: no metadata for hash {file_hash}"
|
|
)
|
|
return [], "unknown"
|
|
|
|
meta = items[0] if isinstance(items[0], dict) else None
|
|
if not isinstance(meta, dict) or meta.get("file_id") is None:
|
|
debug(
|
|
f"{self._log_prefix()} get_tags: invalid metadata for hash {file_hash}"
|
|
)
|
|
return [], "unknown"
|
|
|
|
service_name = kwargs.get("service_name") or "my tags"
|
|
service_key = self._get_service_key(service_name)
|
|
|
|
# Extract tags from metadata
|
|
tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name)
|
|
|
|
return [
|
|
str(t).strip().lower() for t in tags if isinstance(t, str) and t.strip()
|
|
], "hydrus"
|
|
|
|
except Exception as exc:
|
|
debug(f"{self._log_prefix()} get_tags failed: {exc}")
|
|
return [], "unknown"
|
|
|
|
def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
|
|
"""Add tags to a Hydrus file."""
|
|
try:
|
|
client = self._client
|
|
if client is None:
|
|
debug(f"{self._log_prefix()} add_tag: client unavailable")
|
|
return False
|
|
|
|
file_hash = str(file_identifier or "").strip().lower()
|
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
|
|
for ch in file_hash):
|
|
debug(
|
|
f"{self._log_prefix()} add_tag: invalid file hash '{file_identifier}'"
|
|
)
|
|
return False
|
|
service_name = kwargs.get("service_name") or "my tags"
|
|
|
|
incoming_tags = [
|
|
str(t).strip().lower() for t in (tags or [])
|
|
if isinstance(t, str) and str(t).strip()
|
|
]
|
|
if not incoming_tags:
|
|
return True
|
|
|
|
existing_tags = kwargs.get("existing_tags")
|
|
if existing_tags is None:
|
|
try:
|
|
existing_tags, _src = self.get_tag(file_hash)
|
|
except Exception:
|
|
existing_tags = []
|
|
if isinstance(existing_tags, (list, tuple, set)):
|
|
existing_tags = [
|
|
str(t).strip().lower() for t in existing_tags
|
|
if isinstance(t, str) and str(t).strip()
|
|
]
|
|
else:
|
|
existing_tags = []
|
|
|
|
from SYS.metadata import compute_namespaced_tag_overwrite
|
|
|
|
tags_to_remove, tags_to_add, _merged = compute_namespaced_tag_overwrite(
|
|
existing_tags, incoming_tags
|
|
)
|
|
|
|
if not tags_to_add and not tags_to_remove:
|
|
return True
|
|
|
|
service_key: Optional[str] = None
|
|
service_key = self._get_service_key(service_name)
|
|
|
|
mutate_success = False
|
|
if service_key:
|
|
try:
|
|
client.mutate_tags_by_key(
|
|
file_hash,
|
|
service_key,
|
|
add_tags=tags_to_add,
|
|
remove_tags=tags_to_remove,
|
|
)
|
|
mutate_success = True
|
|
except Exception as exc:
|
|
debug(
|
|
f"{self._log_prefix()} add_tag: mutate_tags_by_key failed: {exc}"
|
|
)
|
|
|
|
did_any = False
|
|
if not mutate_success:
|
|
if tags_to_remove:
|
|
try:
|
|
client.delete_tag(file_hash, tags_to_remove, service_name)
|
|
did_any = True
|
|
except Exception as exc:
|
|
debug(
|
|
f"{self._log_prefix()} add_tag: delete_tag failed: {exc}"
|
|
)
|
|
if tags_to_add:
|
|
try:
|
|
client.add_tag(file_hash, tags_to_add, service_name)
|
|
did_any = True
|
|
except Exception as exc:
|
|
debug(
|
|
f"{self._log_prefix()} add_tag: add_tag failed: {exc}"
|
|
)
|
|
else:
|
|
did_any = bool(tags_to_add or tags_to_remove)
|
|
|
|
return did_any
|
|
except Exception as exc:
|
|
debug(f"{self._log_prefix()} add_tag failed: {exc}")
|
|
return False
|
|
|
|
def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
|
|
"""Delete tags from a Hydrus file."""
|
|
try:
|
|
client = self._client
|
|
if client is None:
|
|
debug(f"{self._log_prefix()} delete_tag: client unavailable")
|
|
return False
|
|
|
|
file_hash = str(file_identifier or "").strip().lower()
|
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
|
|
for ch in file_hash):
|
|
debug(
|
|
f"{self._log_prefix()} delete_tag: invalid file hash '{file_identifier}'"
|
|
)
|
|
return False
|
|
service_name = kwargs.get("service_name") or "my tags"
|
|
raw_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
|
|
tag_list = [
|
|
str(t).strip().lower() for t in raw_list
|
|
if isinstance(t, str) and str(t).strip()
|
|
]
|
|
if not tag_list:
|
|
return False
|
|
client.delete_tag(file_hash, tag_list, service_name)
|
|
return True
|
|
except Exception as exc:
|
|
debug(f"{self._log_prefix()} delete_tag failed: {exc}")
|
|
return False
|
|
|
|
def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
|
|
"""Get known url for a Hydrus file."""
|
|
try:
|
|
client = self._client
|
|
|
|
file_hash = str(file_identifier or "").strip().lower()
|
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
|
|
for ch in file_hash):
|
|
return []
|
|
|
|
payload = client.fetch_file_metadata(
|
|
hashes=[file_hash],
|
|
include_file_url=True
|
|
)
|
|
items = payload.get("metadata") if isinstance(payload, dict) else None
|
|
if not isinstance(items, list) or not items:
|
|
return []
|
|
meta = items[0] if isinstance(items[0],
|
|
dict) else {}
|
|
|
|
raw_urls: Any = meta.get("known_urls"
|
|
) or meta.get("urls") or meta.get("url") or []
|
|
|
|
def _is_url(s: Any) -> bool:
|
|
if not isinstance(s, str):
|
|
return False
|
|
v = s.strip().lower()
|
|
return bool(v and ("://" in v or v.startswith(("magnet:", "torrent:"))))
|
|
|
|
if isinstance(raw_urls, str):
|
|
val = raw_urls.strip()
|
|
return [val] if _is_url(val) else []
|
|
if isinstance(raw_urls, list):
|
|
out: list[str] = []
|
|
for u in raw_urls:
|
|
if not isinstance(u, str):
|
|
continue
|
|
u = u.strip()
|
|
if u and _is_url(u):
|
|
out.append(u)
|
|
return out
|
|
return []
|
|
except Exception as exc:
|
|
debug(f"{self._log_prefix()} get_url failed: {exc}")
|
|
return []
|
|
|
|
def get_url_info(self, url: str, **kwargs: Any) -> dict[str, Any] | None:
|
|
"""Return Hydrus URL info for a single URL (Hydrus-only helper).
|
|
|
|
Uses: GET /add_urls/get_url_info
|
|
"""
|
|
try:
|
|
client = self._client
|
|
if client is None:
|
|
return None
|
|
u = str(url or "").strip()
|
|
if not u:
|
|
return None
|
|
try:
|
|
return client.get_url_info(u) # type: ignore[attr-defined]
|
|
except Exception:
|
|
from API.HydrusNetwork import HydrusRequestSpec
|
|
|
|
spec = HydrusRequestSpec(
|
|
method="GET",
|
|
endpoint="/add_urls/get_url_info",
|
|
query={
|
|
"url": u
|
|
},
|
|
)
|
|
response = client._perform_request(spec) # type: ignore[attr-defined]
|
|
return response if isinstance(response, dict) else None
|
|
except Exception as exc:
|
|
debug(f"{self._log_prefix()} get_url_info failed: {exc}")
|
|
return None
|
|
|
|
def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
|
"""Associate one or more url with a Hydrus file."""
|
|
try:
|
|
client = self._client
|
|
if client is None:
|
|
debug(f"{self._log_prefix()} add_url: client unavailable")
|
|
return False
|
|
for u in url:
|
|
client.associate_url(file_identifier, u)
|
|
return True
|
|
except Exception as exc:
|
|
debug(f"{self._log_prefix()} add_url failed: {exc}")
|
|
return False
|
|
|
|
def add_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool:
|
|
"""Bulk associate urls with Hydrus files.
|
|
|
|
This is a best-effort convenience wrapper used by cmdlets to batch url associations.
|
|
Hydrus' client API is still called per (hash,url) pair, but this consolidates the
|
|
cmdlet-level control flow so url association can be deferred until the end.
|
|
"""
|
|
try:
|
|
client = self._client
|
|
if client is None:
|
|
debug(f"{self._log_prefix()} add_url_bulk: client unavailable")
|
|
return False
|
|
|
|
any_success = False
|
|
for file_identifier, urls in items or []:
|
|
h = str(file_identifier or "").strip().lower()
|
|
if len(h) != 64:
|
|
continue
|
|
for u in urls or []:
|
|
s = str(u or "").strip()
|
|
if not s:
|
|
continue
|
|
try:
|
|
client.associate_url(h, s)
|
|
any_success = True
|
|
except Exception:
|
|
continue
|
|
return any_success
|
|
except Exception as exc:
|
|
debug(f"{self._log_prefix()} add_url_bulk failed: {exc}")
|
|
return False
|
|
|
|
def add_tags_bulk(self, items: List[tuple[str, List[str]]], *, service_name: str | None = None) -> bool:
|
|
"""Bulk add tags to multiple Hydrus files.
|
|
|
|
Groups files by identical tag-sets and uses the Hydrus `mutate_tags_by_key`
|
|
call (when a service key is available) to reduce the number of API calls.
|
|
Falls back to per-hash `add_tag` calls if necessary.
|
|
"""
|
|
try:
|
|
client = self._client
|
|
if client is None:
|
|
debug(f"{self._log_prefix()} add_tags_bulk: client unavailable")
|
|
return False
|
|
|
|
# Group by canonical tag set (sorted tuple) to batch identical additions
|
|
buckets: dict[tuple[str, ...], list[str]] = {}
|
|
for file_identifier, tags in items or []:
|
|
h = str(file_identifier or "").strip().lower()
|
|
if len(h) != 64:
|
|
continue
|
|
tlist = [str(t).strip().lower() for t in (tags or []) if isinstance(t, str) and str(t).strip()]
|
|
if not tlist:
|
|
continue
|
|
key = tuple(sorted(tlist))
|
|
buckets.setdefault(key, []).append(h)
|
|
|
|
if not buckets:
|
|
return False
|
|
|
|
svc = service_name or "my tags"
|
|
service_key = self._get_service_key(svc)
|
|
any_success = False
|
|
|
|
for tag_tuple, hashes in buckets.items():
|
|
try:
|
|
if service_key:
|
|
# Mutate tags for many hashes in a single request
|
|
client.mutate_tags_by_key(hash=hashes, service_key=service_key, add_tags=list(tag_tuple))
|
|
any_success = True
|
|
continue
|
|
except Exception as exc:
|
|
debug(f"{self._log_prefix()} add_tags_bulk mutate failed for tags {tag_tuple}: {exc}")
|
|
|
|
# Fallback: apply per-hash add_tag
|
|
for h in hashes:
|
|
try:
|
|
client.add_tag(h, list(tag_tuple), svc)
|
|
any_success = True
|
|
except Exception:
|
|
continue
|
|
|
|
return any_success
|
|
except Exception as exc:
|
|
debug(f"{self._log_prefix()} add_tags_bulk failed: {exc}")
|
|
return False
|
|
|
|
def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
|
|
"""Delete one or more url from a Hydrus file."""
|
|
try:
|
|
client = self._client
|
|
if client is None:
|
|
debug(f"{self._log_prefix()} delete_url: client unavailable")
|
|
return False
|
|
for u in url:
|
|
client.delete_url(file_identifier, u)
|
|
return True
|
|
except Exception as exc:
|
|
debug(f"{self._log_prefix()} delete_url failed: {exc}")
|
|
return False
|
|
|
|
def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
|
|
"""Get notes for a Hydrus file (default note service only)."""
|
|
try:
|
|
client = self._client
|
|
if client is None:
|
|
debug(f"{self._log_prefix()} get_note: client unavailable")
|
|
return {}
|
|
|
|
file_hash = str(file_identifier or "").strip().lower()
|
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
|
|
for ch in file_hash):
|
|
return {}
|
|
|
|
payload = client.fetch_file_metadata(hashes=[file_hash], include_notes=True)
|
|
items = payload.get("metadata") if isinstance(payload, dict) else None
|
|
if not isinstance(items, list) or not items:
|
|
return {}
|
|
meta = items[0] if isinstance(items[0], dict) else None
|
|
if not isinstance(meta, dict):
|
|
return {}
|
|
|
|
notes_payload = meta.get("notes")
|
|
if isinstance(notes_payload, dict):
|
|
return {
|
|
str(k): str(v or "")
|
|
for k, v in notes_payload.items() if str(k).strip()
|
|
}
|
|
|
|
return {}
|
|
except Exception as exc:
|
|
debug(f"{self._log_prefix()} get_note failed: {exc}")
|
|
return {}
|
|
|
|
def set_note(
|
|
self,
|
|
file_identifier: str,
|
|
name: str,
|
|
text: str,
|
|
**kwargs: Any
|
|
) -> bool:
|
|
"""Set a named note for a Hydrus file (default note service only)."""
|
|
try:
|
|
client = self._client
|
|
if client is None:
|
|
debug(f"{self._log_prefix()} set_note: client unavailable")
|
|
return False
|
|
|
|
file_hash = str(file_identifier or "").strip().lower()
|
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
|
|
for ch in file_hash):
|
|
return False
|
|
|
|
note_name = str(name or "").strip()
|
|
if not note_name:
|
|
return False
|
|
note_text = str(text or "")
|
|
|
|
client.set_notes(file_hash,
|
|
{
|
|
note_name: note_text
|
|
})
|
|
return True
|
|
except Exception as exc:
|
|
debug(f"{self._log_prefix()} set_note failed: {exc}")
|
|
return False
|
|
|
|
def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
|
|
"""Delete a named note for a Hydrus file (default note service only)."""
|
|
try:
|
|
client = self._client
|
|
if client is None:
|
|
debug(f"{self._log_prefix()} delete_note: client unavailable")
|
|
return False
|
|
|
|
file_hash = str(file_identifier or "").strip().lower()
|
|
if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
|
|
for ch in file_hash):
|
|
return False
|
|
|
|
note_name = str(name or "").strip()
|
|
if not note_name:
|
|
return False
|
|
|
|
client.delete_notes(file_hash, [note_name])
|
|
return True
|
|
except Exception as exc:
|
|
debug(f"{self._log_prefix()} delete_note failed: {exc}")
|
|
return False
|
|
|
|
@staticmethod
|
|
def _extract_tags_from_hydrus_meta(
|
|
meta: Dict[str,
|
|
Any],
|
|
service_key: Optional[str],
|
|
service_name: str
|
|
) -> List[str]:
|
|
"""Extract current tags from Hydrus metadata dict.
|
|
|
|
Prefers display_tags (includes siblings/parents, excludes deleted).
|
|
Falls back to storage_tags status '0' (current).
|
|
"""
|
|
tags_payload = meta.get("tags")
|
|
if not isinstance(tags_payload, dict):
|
|
return []
|
|
|
|
desired_service_name = str(service_name or "").strip().lower()
|
|
desired_service_key = str(service_key).strip() if service_key is not None else ""
|
|
|
|
def _append_tag(out: List[str], value: Any) -> None:
|
|
text = ""
|
|
if isinstance(value, bytes):
|
|
try:
|
|
text = value.decode("utf-8", errors="ignore")
|
|
except Exception:
|
|
text = str(value)
|
|
elif isinstance(value, str):
|
|
text = value
|
|
if not text:
|
|
return
|
|
cleaned = text.strip()
|
|
if cleaned:
|
|
out.append(cleaned)
|
|
|
|
def _collect_current(container: Any, out: List[str]) -> None:
|
|
if isinstance(container, list):
|
|
for tag in container:
|
|
_append_tag(out, tag)
|
|
return
|
|
if isinstance(container, dict):
|
|
current = container.get("0")
|
|
if current is None:
|
|
current = container.get(0)
|
|
if isinstance(current, list):
|
|
for tag in current:
|
|
_append_tag(out, tag)
|
|
|
|
def _collect_service_data(service_data: Any, out: List[str]) -> None:
|
|
if not isinstance(service_data, dict):
|
|
return
|
|
|
|
display = (
|
|
service_data.get("display_tags")
|
|
or service_data.get("display_friendly_tags")
|
|
or service_data.get("display")
|
|
)
|
|
_collect_current(display, out)
|
|
|
|
storage = (
|
|
service_data.get("storage_tags")
|
|
or service_data.get("statuses_to_tags")
|
|
or service_data.get("tags")
|
|
)
|
|
_collect_current(storage, out)
|
|
|
|
collected: List[str] = []
|
|
|
|
if desired_service_key:
|
|
_collect_service_data(tags_payload.get(desired_service_key), collected)
|
|
|
|
if not collected and desired_service_name:
|
|
for maybe_service in tags_payload.values():
|
|
if not isinstance(maybe_service, dict):
|
|
continue
|
|
svc_name = str(
|
|
maybe_service.get("service_name")
|
|
or maybe_service.get("name")
|
|
or ""
|
|
).strip().lower()
|
|
if svc_name and svc_name == desired_service_name:
|
|
_collect_service_data(maybe_service, collected)
|
|
|
|
names_map = tags_payload.get("service_keys_to_names")
|
|
statuses_map = tags_payload.get("service_keys_to_statuses_to_tags")
|
|
if isinstance(statuses_map, dict):
|
|
keys_to_collect: List[str] = []
|
|
if desired_service_key:
|
|
keys_to_collect.append(desired_service_key)
|
|
if desired_service_name and isinstance(names_map, dict):
|
|
for raw_key, raw_name in names_map.items():
|
|
if str(raw_name or "").strip().lower() == desired_service_name:
|
|
keys_to_collect.append(str(raw_key))
|
|
keys_filter = {k for k in keys_to_collect if k}
|
|
|
|
for raw_key, status_payload in statuses_map.items():
|
|
raw_key_text = str(raw_key)
|
|
if keys_filter and raw_key_text not in keys_filter:
|
|
continue
|
|
_collect_current(status_payload, collected)
|
|
|
|
if not collected:
|
|
for maybe_service in tags_payload.values():
|
|
_collect_service_data(maybe_service, collected)
|
|
|
|
top_level_tags = meta.get("tags_flat")
|
|
if isinstance(top_level_tags, list):
|
|
_collect_current(top_level_tags, collected)
|
|
|
|
deduped: List[str] = []
|
|
seen: set[str] = set()
|
|
for tag in collected:
|
|
key = str(tag).strip().lower()
|
|
if not key or key in seen:
|
|
continue
|
|
seen.add(key)
|
|
deduped.append(tag)
|
|
return deduped
|
|
|
|
@staticmethod
|
|
def _extract_title_and_tags(meta: Dict[str, Any], file_id: Any) -> Tuple[str, List[str]]:
|
|
title = f"Hydrus File {file_id}"
|
|
tags = HydrusNetwork._extract_tags_from_hydrus_meta(
|
|
meta,
|
|
service_key=None,
|
|
service_name="my tags",
|
|
)
|
|
|
|
normalized_tags: List[str] = []
|
|
seen: set[str] = set()
|
|
for raw_tag in tags:
|
|
text = str(raw_tag or "").strip().lower()
|
|
if not text or text in seen:
|
|
continue
|
|
seen.add(text)
|
|
normalized_tags.append(text)
|
|
if text.startswith("title:") and title == f"Hydrus File {file_id}":
|
|
value = text.split(":", 1)[1].strip()
|
|
if value:
|
|
title = value
|
|
|
|
return title, normalized_tags
|