Store/HydrusNetwork.py

from __future__ import annotations

import re
import sys
import tempfile
import shutil
from pathlib import Path
from typing import Any, Dict, List, Literal, Optional, Sequence, Tuple

from urllib.parse import quote

import httpx
from API.httpx_shared import get_shared_httpx_client

from SYS.logger import debug, log
from SYS.utils_constant import mime_maps

_KNOWN_EXTS = {
    str(info.get("ext") or "").strip().lstrip(".")
    for category in mime_maps.values()
    for info in category.values()
    if isinstance(info, dict) and info.get("ext")
}


def _resolve_ext_from_meta(meta: Dict[str, Any], mime_type: Optional[str]) -> str:
    ext = ""
    for key in ("ext", "file_ext", "extension", "file_extension"):
        raw = meta.get(key)
        if raw:
            ext = str(raw).strip().lstrip(".")
            break
    if ext and ext not in _KNOWN_EXTS:
        ext = ""
    if ext.lower() == "ebook":
        ext = ""

    if not ext:
        filetype_human = (
            meta.get("filetype_human")
            or meta.get("mime_human")
            or meta.get("mime_string")
            or meta.get("filetype")
        )
        ft = str(filetype_human or "").strip().lstrip(".").lower()
        if ft and ft != "unknown filetype":
            if ft.isalnum() and len(ft) <= 8:
                ext = ft
            else:
                try:
                    for token in re.findall(r"[a-z0-9]+", ft):
                        if token in _KNOWN_EXTS:
                            ext = token
                            break
                except Exception:
                    pass

    if not ext:
        if not mime_type or not isinstance(mime_type, str) or "/" not in mime_type:
            mime_type = meta.get("mime_string") or meta.get("mime_human") or meta.get("filetype_mime") or mime_type

    if not ext and mime_type:
        try:
            mime_type = str(mime_type).split(";", 1)[0].strip().lower()
        except Exception:
            mime_type = str(mime_type)
        for category in mime_maps.values():
            for _ext_key, info in category.items():
                if mime_type in info.get("mimes", []):
                    ext = str(info.get("ext", "")).strip().lstrip(".")
                    break
            if ext:
                break
    return ext

from Store._base import Store

_HYDRUS_INIT_CHECK_CACHE: dict[tuple[str,
                                     str],
                               tuple[bool,
                                     Optional[str]]] = {}


class HydrusNetwork(Store):
    """File storage backend for Hydrus client.

    Each instance represents a specific Hydrus client connection.
    Maintains its own HydrusClient.
    """

    @classmethod
    def config_schema(cls) -> List[Dict[str, Any]]:
        return [
            {
                "key": "NAME",
                "label": "Store Name",
                "default": "",
                "placeholder": "e.g. home_hydrus",
                "required": True
            },
            {
                "key": "URL",
                "label": "Hydrus URL",
                "default": "http://127.0.0.1:45869",
                "placeholder": "http://127.0.0.1:45869",
                "required": True
            },
            {
                "key": "API",
                "label": "API Key",
                "default": "",
                "required": True,
                "secret": True
            }
        ]

    @property
    def is_remote(self) -> bool:
        return True

    @property
    def prefer_defer_tags(self) -> bool:
        return True

    def _log_prefix(self) -> str:
        store_name = getattr(self, "NAME", None) or "unknown"
        return f"[hydrusnetwork:{store_name}]"

    def _append_access_key(self, url: str) -> str:
        if not url:
            return url
        if "access_key=" in url:
            return url
        if not getattr(self, "API", None):
            return url
        separator = "&" if "?" in url else "?"
        return f"{url}{separator}access_key={quote(str(self.API))}"

    def __new__(cls, *args: Any, **kwargs: Any) -> "HydrusNetwork":
        instance = super().__new__(cls)
        name = kwargs.get("NAME")
        api = kwargs.get("API")
        url = kwargs.get("URL")
        if name is not None:
            setattr(instance, "NAME", str(name))
        if api is not None:
            setattr(instance, "API", str(api))
        if url is not None:
            setattr(instance, "URL", str(url))
        return instance

    def __init__(
        self,
        instance_name: Optional[str] = None,
        api_key: Optional[str] = None,
        url: Optional[str] = None,
        *,
        NAME: Optional[str] = None,
        API: Optional[str] = None,
        URL: Optional[str] = None,
    ) -> None:
        """Initialize Hydrus storage backend.

        Args:
            instance_name: Name of this Hydrus instance (e.g., 'home', 'work')
            api_key: Hydrus Client API access key
            url: Hydrus client URL (e.g., 'http://192.168.1.230:45869')
        """
        from API.HydrusNetwork import HydrusNetwork as HydrusClient

        if instance_name is None and NAME is not None:
            instance_name = str(NAME)
        if api_key is None and API is not None:
            api_key = str(API)
        if url is None and URL is not None:
            url = str(URL)

        if not instance_name or not api_key or not url:
            raise ValueError("HydrusNetwork requires NAME, API, and URL")

        self.NAME = instance_name
        self.API = api_key
        self.URL = url.rstrip("/")

        # Total count (best-effort, used for startup diagnostics)
        self.total_count: Optional[int] = None

        # Self health-check: validate the URL is reachable and the access key is accepted.
        # This MUST NOT attempt to acquire a session key.
        cache_key = (self.URL, self.API)
        cached = _HYDRUS_INIT_CHECK_CACHE.get(cache_key)
        if cached is not None:
            ok, err = cached
            if not ok:
                raise RuntimeError(
                    f"Hydrus '{self.NAME}' unavailable: {err or 'Unavailable'}"
                )
        else:
            api_version_url = f"{self.URL}/api_version"
            verify_key_url = f"{self.URL}/verify_access_key"
            try:
                client = get_shared_httpx_client(timeout=5.0, verify_ssl=False)
                version_resp = client.get(api_version_url, follow_redirects=True)
                version_resp.raise_for_status()
                version_payload = version_resp.json()
                if not isinstance(version_payload, dict):
                    raise RuntimeError(
                        "Hydrus /api_version returned an unexpected response"
                    )

                verify_resp = client.get(
                    verify_key_url,
                    headers={
                        "Hydrus-Client-API-Access-Key": self.API
                    },
                    follow_redirects=True,
                )
                verify_resp.raise_for_status()
                verify_payload = verify_resp.json()
                if not isinstance(verify_payload, dict):
                    raise RuntimeError(
                        "Hydrus /verify_access_key returned an unexpected response"
                    )

                _HYDRUS_INIT_CHECK_CACHE[cache_key] = (True, None)
            except Exception as exc:
                err = str(exc)
                _HYDRUS_INIT_CHECK_CACHE[cache_key] = (False, err)
                raise RuntimeError(f"Hydrus '{self.NAME}' unavailable: {err}") from exc

        # Create a persistent client for this instance (auth via access key by default).
        self._client = HydrusClient(
            url=self.URL,
            access_key=self.API,
            instance_name=self.NAME
        )

        self._service_key_cache: Dict[str, Optional[str]] = {}

        # Best-effort total count (used for startup diagnostics). Avoid heavy payloads.
        # Some Hydrus setups appear to return no count via the CBOR client for this endpoint,
        # so prefer a direct JSON request with a short timeout.
        # NOTE: Disabled to avoid unnecessary API call during init; count will be retrieved on first search/list if needed.
        # try:
        #     self.get_total_count(refresh=True)
        # except Exception:
        #     pass

    def _get_service_key(self, service_name: str, *, refresh: bool = False) -> Optional[str]:
        """Resolve (and cache) the Hydrus service key for the given service name."""
        normalized = str(service_name or "my tags").strip()
        if not normalized:
            normalized = "my tags"
        cache_key = normalized.lower()
        if not refresh and cache_key in self._service_key_cache:
            return self._service_key_cache[cache_key]

        client = self._client
        if client is None:
            self._service_key_cache[cache_key] = None
            return None

        try:
            from API import HydrusNetwork as hydrus_wrapper

            resolved = hydrus_wrapper.get_tag_service_key(client, normalized)
        except Exception:
            resolved = None

        self._service_key_cache[cache_key] = resolved
        return resolved

    def get_total_count(self, *, refresh: bool = False) -> Optional[int]:
        """Best-effort total file count for this Hydrus instance.

        Intended for diagnostics (e.g., REPL startup checks). This should be fast,
        and it MUST NOT raise.
        """
        if self.total_count is not None and not refresh:
            return self.total_count

        # 1) Prefer a direct JSON request (fast + avoids CBOR edge cases).
        try:
            import json as _json

            url = f"{self.URL}/get_files/search_files"
            params = {
                "tags": _json.dumps(["system:everything"]),
                "return_hashes": "false",
                "return_file_ids": "false",
                "return_file_count": "true",
            }
            headers = {
                "Hydrus-Client-API-Access-Key": self.API,
                "Accept": "application/json",
            }
            client = get_shared_httpx_client(timeout=5.0, verify_ssl=False)
            resp = client.get(url, params=params, headers=headers, follow_redirects=True)
            resp.raise_for_status()
            payload = resp.json()

            count_val = None
            if isinstance(payload, dict):
                count_val = payload.get("file_count")
                if count_val is None:
                    count_val = payload.get("file_count_inclusive")
                if count_val is None:
                    count_val = payload.get("num_files")
            if isinstance(count_val, int):
                self.total_count = count_val
                return self.total_count
        except Exception as exc:
            debug(
                f"{self._log_prefix()} total count (json) unavailable: {exc}",
                file=sys.stderr
            )

        # 2) Fallback to the API client (CBOR).
        try:
            payload = self._client.search_files(
                tags=["system:everything"],
                return_hashes=False,
                return_file_ids=False,
                return_file_count=True,
            )
            count_val = None
            if isinstance(payload, dict):
                count_val = payload.get("file_count")
                if count_val is None:
                    count_val = payload.get("file_count_inclusive")
                if count_val is None:
                    count_val = payload.get("num_files")
            if isinstance(count_val, int):
                self.total_count = count_val
                return self.total_count
        except Exception as exc:
            debug(
                f"{self._log_prefix()} total count (client) unavailable: {exc}",
                file=sys.stderr
            )

        return self.total_count

    def name(self) -> str:
        return self.NAME

    def get_name(self) -> str:
        return self.NAME

    def set_relationship(self, alt_hash: str, king_hash: str, kind: str = "alt") -> bool:
        """Persist a relationship via the Hydrus client API for this backend instance."""
        try:
            alt_norm = str(alt_hash or "").strip().lower()
            king_norm = str(king_hash or "").strip().lower()
            if len(alt_norm) != 64 or len(king_norm) != 64 or alt_norm == king_norm:
                return False

            client = getattr(self, "_client", None)
            if client is None or not hasattr(client, "set_relationship"):
                return False

            client.set_relationship(alt_norm, king_norm, str(kind or "alt"))
            return True
        except Exception:
            return False

    @staticmethod
    def _has_current_file_service(meta: Dict[str, Any]) -> bool:
        services = meta.get("file_services")
        if not isinstance(services, dict):
            return False
        current = services.get("current")
        if isinstance(current, dict):
            return any(bool(v) for v in current.values())
        if isinstance(current, list):
            return len(current) > 0
        return False

    def add_file(self, file_path: Path, **kwargs: Any) -> str:
        """Upload file to Hydrus with full metadata support.

        Args:
            file_path: Path to the file to upload
            tag: Optional list of tag values to add
            url: Optional list of url to associate with the file
            title: Optional title (will be added as 'title:value' tag)

        Returns:
            File hash from Hydrus

        Raises:
            Exception: If upload fails
        """
        from SYS.utils import sha256_file

        tag_list = kwargs.get("tag", [])
        url = kwargs.get("url", [])
        title = kwargs.get("title")

        # Add title to tags if provided and not already present
        if title:
            title_tag = f"title:{title}".strip().lower()
            if not any(str(candidate).lower().startswith("title:")
                       for candidate in tag_list):
                tag_list = [title_tag] + list(tag_list)

        # Hydrus is lowercase-only tags; normalize here for consistency.
        tag_list = [
            str(t).strip().lower() for t in (tag_list or [])
            if isinstance(t, str) and str(t).strip()
        ]

        try:
            # Compute file hash (or use hint from kwargs to avoid redundant IO)
            file_hash = kwargs.get("hash") or kwargs.get("file_hash")
            if not file_hash:
                file_hash = sha256_file(file_path)
            
            debug(f"{self._log_prefix()} file hash: {file_hash}")

            # Use persistent client with session key
            client = self._client
            if client is None:
                raise Exception("Hydrus client unavailable")

            # Check if file already exists in Hydrus.
            # IMPORTANT: some Hydrus deployments can return a metadata record (file_id)
            # even when the file is not in any current file service (e.g. trashed/missing).
            # Only treat as a real duplicate if it is in a current file service.
            file_exists = False
            try:
                metadata = client.fetch_file_metadata(
                    hashes=[file_hash],
                    include_service_keys_to_tags=False,
                    include_file_services=True,
                    include_is_trashed=True,
                    include_file_url=True,
                    include_duration=False,
                    include_size=True,
                    include_mime=True,
                )
                if metadata and isinstance(metadata, dict):
                    metas = metadata.get("metadata", [])
                    if isinstance(metas, list) and metas:
                        # Hydrus returns placeholder rows for unknown hashes.
                        # Only treat as a real duplicate if it has a concrete file_id AND
                        # appears in a current file service.
                        for meta in metas:
                            if not isinstance(meta, dict):
                                continue
                            if meta.get("file_id") is None:
                                continue
                            # Preferred: use file_services.current.
                            if isinstance(meta.get("file_services"), dict):
                                if self._has_current_file_service(meta):
                                    file_exists = True
                                    break
                                continue

                            # Fallback: if Hydrus doesn't return file_services, only treat as
                            # existing when the metadata looks like a real file (non-zero size).
                            size_val = meta.get("size")
                            if size_val is None:
                                size_val = meta.get("size_bytes")
                            try:
                                size_int = int(size_val) if size_val is not None else 0
                            except Exception:
                                size_int = 0
                            if size_int > 0:
                                file_exists = True
                                break
                if file_exists:
                    debug(
                        f"{self._log_prefix()} Duplicate detected - file already in Hydrus with hash: {file_hash}"
                    )
            except Exception as exc:
                debug(f"{self._log_prefix()} metadata fetch failed: {exc}")

            # If Hydrus reports an existing file, it may be in trash. Best-effort restore it to 'my files'.
            # Then re-check that it is actually in a current file service; if not, we'll proceed to upload.
            if file_exists:
                try:
                    client.undelete_files([file_hash])
                except Exception:
                    pass

                try:
                    metadata2 = client.fetch_file_metadata(
                        hashes=[file_hash],
                        include_service_keys_to_tags=False,
                        include_file_services=True,
                        include_is_trashed=True,
                        include_file_url=False,
                        include_duration=False,
                        include_size=False,
                        include_mime=False,
                    )
                    metas2 = metadata2.get("metadata", []) if isinstance(metadata2, dict) else []
                    if isinstance(metas2, list) and metas2:
                        still_current = False
                        for meta in metas2:
                            if not isinstance(meta, dict):
                                continue
                            if meta.get("file_id") is None:
                                continue
                            if isinstance(meta.get("file_services"), dict):
                                if self._has_current_file_service(meta):
                                    still_current = True
                                    break
                                continue

                            size_val = meta.get("size")
                            if size_val is None:
                                size_val = meta.get("size_bytes")
                            try:
                                size_int = int(size_val) if size_val is not None else 0
                            except Exception:
                                size_int = 0
                            if size_int > 0:
                                still_current = True
                                break
                        if not still_current:
                            file_exists = False
                except Exception:
                    # If re-check fails, keep prior behavior (avoid forcing uploads in unknown states)
                    pass

            # Upload file if not already present
            if not file_exists:
                debug(
                    f"{self._log_prefix()} Uploading: {file_path.name}"
                )
                response = client.add_file(file_path)

                # Extract hash from response
                hydrus_hash: Optional[str] = None
                if isinstance(response, dict):
                    hydrus_hash = response.get("hash") or response.get("file_hash")
                    if not hydrus_hash:
                        hashes = response.get("hashes")
                        if isinstance(hashes, list) and hashes:
                            hydrus_hash = hashes[0]

                if isinstance(hydrus_hash, (bytes, bytearray)):
                    try:
                        hydrus_hash = bytes(hydrus_hash).hex()
                    except Exception:
                        hydrus_hash = None

                if hydrus_hash:
                    try:
                        hydrus_hash = str(hydrus_hash).strip().lower()
                    except Exception:
                        hydrus_hash = None

                if not hydrus_hash or len(str(hydrus_hash)) != 64:
                    debug(
                        f"{self._log_prefix()} Hydrus response hash missing/invalid; using precomputed hash"
                    )
                    hydrus_hash = file_hash

                if not hydrus_hash:
                    raise Exception(f"Hydrus response missing file hash: {response}")

                file_hash = hydrus_hash
                debug(f"{self._log_prefix()} hash: {file_hash}")

            # Add tags if provided (both for new and existing files)
            if tag_list:
                try:
                    # Use default tag service
                    service_name = "my tags"
                except Exception:
                    service_name = "my tags"

                try:
                    debug(
                        f"{self._log_prefix()} Adding {len(tag_list)} tag(s): {tag_list}"
                    )
                    client.add_tag(file_hash, tag_list, service_name)
                    debug(
                        f"{self._log_prefix()} Tags added via '{service_name}'"
                    )
                except Exception as exc:
                    log(
                        f"{self._log_prefix()} ⚠️  Failed to add tags: {exc}",
                        file=sys.stderr
                    )

            # Associate url if provided (both for new and existing files)
            if url:
                debug(
                    f"{self._log_prefix()} Associating {len(url)} URL(s) with file"
                )
                for url in url:
                    if url:
                        try:
                            client.associate_url(file_hash, str(url))
                            debug(f"{self._log_prefix()} Associated URL: {url}")
                        except Exception as exc:
                            log(
                                f"{self._log_prefix()} ⚠️  Failed to associate URL {url}: {exc}",
                                file=sys.stderr,
                            )

            return file_hash

        except Exception as exc:
            log(f"{self._log_prefix()} ❌ upload failed: {exc}", file=sys.stderr)
            raise

    def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
        """Search Hydrus database for files matching query.

        Args:
            query: Search query (tags, filenames, hashes, etc.)
            limit: Maximum number of results to return (default: 100)

        Returns:
            List of dicts with 'name', 'hash', 'size', 'tags' fields

        Example:
            results = storage["hydrus"].search("artist:john_doe music")
            results = storage["hydrus"].search("Simple Man")
        """
        limit = kwargs.get("limit", 100)
        minimal = bool(kwargs.get("minimal", False))
        url_only = bool(kwargs.get("url_only", False))

        try:
            client = self._client
            if client is None:
                raise Exception("Hydrus client unavailable")

            prefix = self._log_prefix()
            debug(f"{prefix} Searching for: {query}")

            def _extract_urls(meta_obj: Any) -> list[str]:
                if not isinstance(meta_obj, dict):
                    return []
                raw = meta_obj.get("known_urls")
                if raw is None:
                    raw = meta_obj.get("url")
                if raw is None:
                    raw = meta_obj.get("urls")
                if isinstance(raw, str):
                    val = raw.strip()
                    return [val] if val else []
                if isinstance(raw, list):
                    out: list[str] = []
                    for item in raw:
                        if not isinstance(item, str):
                            continue
                        s = item.strip()
                        if s:
                            out.append(s)
                    return out
                return []

            def _extract_search_ids(payload: Any) -> tuple[list[int], list[str]]:
                if not isinstance(payload, dict):
                    return [], []
                raw_ids = payload.get("file_ids", [])
                raw_hashes = payload.get("hashes", [])
                ids_out: list[int] = []
                hashes_out: list[str] = []
                if isinstance(raw_ids, list):
                    for item in raw_ids:
                        try:
                            if isinstance(item, (int, float)):
                                ids_out.append(int(item))
                                continue
                            if isinstance(item, str) and item.strip().isdigit():
                                ids_out.append(int(item.strip()))
                        except Exception:
                            continue
                if isinstance(raw_hashes, list):
                    for item in raw_hashes:
                        try:
                            candidate = str(item or "").strip().lower()
                            if candidate:
                                hashes_out.append(candidate)
                        except Exception:
                            continue
                return ids_out, hashes_out

            def _iter_url_filtered_metadata(
                url_value: str | None,
                want_any: bool,
                fetch_limit: int,
                scan_limit: int | None = None,
                needles: Optional[Sequence[str]] = None,
                *,
                minimal: bool = False,
            ) -> list[dict[str, Any]]:
                """Best-effort URL search by scanning Hydrus metadata with include_file_url=True."""

                candidate_file_ids: list[int] = []
                candidate_hashes: list[str] = []
                seen_file_ids: set[int] = set()
                seen_hashes: set[str] = set()

                def _add_candidates(ids: list[int], hashes: list[str]) -> None:
                    for fid in ids:
                        if fid in seen_file_ids:
                            continue
                        seen_file_ids.add(fid)
                        candidate_file_ids.append(fid)
                    for hh in hashes:
                        if hh in seen_hashes:
                            continue
                        seen_hashes.add(hh)
                        candidate_hashes.append(hh)

                predicate_supported = getattr(self, "_has_url_predicate", None)
                if predicate_supported is not False:
                    try:
                        predicate = "system:has url"
                        url_search = client.search_files(
                            tags=[predicate],
                            return_hashes=True,
                            return_file_ids=False,
                            return_file_count=False,
                        )
                        ids, hashes = _extract_search_ids(url_search)
                        _add_candidates(ids, hashes)
                        self._has_url_predicate = True
                    except Exception as exc:
                        try:
                            from API.HydrusNetwork import HydrusRequestError

                            if isinstance(exc, HydrusRequestError) and getattr(exc, "status", None) == 400:
                                self._has_url_predicate = False
                        except Exception:
                            pass

                if not candidate_file_ids and not candidate_hashes:
                    everything = client.search_files(
                        tags=["system:everything"],
                        return_hashes=True,
                        return_file_ids=False,
                        return_file_count=False,
                    )
                    ids, hashes = _extract_search_ids(everything)
                    _add_candidates(ids, hashes)

                if not candidate_file_ids and not candidate_hashes:
                    return []

                needle_list: list[str] = []
                if isinstance(needles, (list, tuple, set)):
                    for item in needles:
                        text = str(item or "").strip().lower()
                        if text and text not in needle_list:
                            needle_list.append(text)
                if not needle_list:
                    needle = (url_value or "").strip().lower()
                    if needle:
                        needle_list = [needle]
                chunk_size = 200
                out: list[dict[str, Any]] = []
                if scan_limit is None:
                    try:
                        if not want_any and needle_list:
                            if len(needle_list) > 1:
                                scan_limit = max(int(fetch_limit) * 20, 2000)
                            else:
                                scan_limit = max(200, min(int(fetch_limit), 400))
                        else:
                            scan_limit = max(int(fetch_limit) * 5, 1000)
                    except Exception:
                        scan_limit = 400 if (not want_any and needle_list) else 1000
                if scan_limit is not None:
                    scan_limit = min(int(scan_limit), 10000)
                scanned = 0

                def _process_source(items: list[Any], kind: str) -> None:
                    nonlocal scanned
                    for start in range(0, len(items), chunk_size):
                        if len(out) >= fetch_limit:
                            return
                        if scan_limit is not None and scanned >= scan_limit:
                            return
                        chunk = items[start:start + chunk_size]
                        if scan_limit is not None:
                            remaining = scan_limit - scanned
                            if remaining <= 0:
                                return
                            if len(chunk) > remaining:
                                chunk = chunk[:remaining]
                        scanned += len(chunk)
                        try:
                            if kind == "hashes":
                                payload = client.fetch_file_metadata(
                                    hashes=chunk,
                                    include_file_url=True,
                                    include_service_keys_to_tags=not minimal,
                                    include_duration=not minimal,
                                    include_size=not minimal,
                                    include_mime=not minimal,
                                )
                            else:
                                payload = client.fetch_file_metadata(
                                    file_ids=chunk,
                                    include_file_url=True,
                                    include_service_keys_to_tags=not minimal,
                                    include_duration=not minimal,
                                    include_size=not minimal,
                                    include_mime=not minimal,
                                )
                        except Exception:
                            continue

                        metas = payload.get("metadata",
                                            []) if isinstance(payload,
                                                              dict) else []
                        if not isinstance(metas, list):
                            continue

                        for meta in metas:
                            if len(out) >= fetch_limit:
                                break
                            if not isinstance(meta, dict):
                                continue
                            urls = _extract_urls(meta)
                            if not urls:
                                continue
                            if want_any:
                                out.append(meta)
                                continue
                            if not needle_list:
                                continue
                            if any(any(n in u.lower() for n in needle_list) for u in urls):
                                out.append(meta)
                                continue

                sources: list[tuple[str, list[Any]]] = []
                if candidate_hashes:
                    sources.append(("hashes", candidate_hashes))
                elif candidate_file_ids:
                    sources.append(("file_ids", candidate_file_ids))

                for kind, items in sources:
                    if len(out) >= fetch_limit:
                        break
                    _process_source(items, kind)

                return out

            def _search_url_query_metadata(
                url_query: str,
                fetch_limit: int,
                *,
                minimal: bool = False,
            ) -> list[dict[str, Any]]:
                """Run a strict url:<pattern> search without falling back to system predicates."""

                if not url_query:
                    return []

                try:
                    payload = client.search_files(
                        tags=[url_query],
                        return_hashes=True,
                        return_file_ids=True,
                    )
                except Exception:
                    return []

                candidate_ids, candidate_hashes = _extract_search_ids(payload)
                if not candidate_ids and not candidate_hashes:
                    return []

                metas_out: list[dict[str, Any]] = []
                chunk_size = 200

                def _fetch_chunk(kind: Literal["file_ids", "hashes"], values: list[Any]) -> None:
                    nonlocal metas_out
                    if not values or len(metas_out) >= fetch_limit:
                        return
                    for start in range(0, len(values), chunk_size):
                        if len(metas_out) >= fetch_limit:
                            break
                        remaining = fetch_limit - len(metas_out)
                        if remaining <= 0:
                            break
                        end = start + min(chunk_size, remaining)
                        chunk = values[start:end]
                        if not chunk:
                            continue
                        try:
                            if kind == "file_ids":
                                metadata = client.fetch_file_metadata(
                                    file_ids=chunk,
                                    include_file_url=True,
                                    include_service_keys_to_tags=False,
                                    include_duration=False,
                                    include_size=not minimal,
                                    include_mime=False,
                                )
                            else:
                                metadata = client.fetch_file_metadata(
                                    hashes=chunk,
                                    include_file_url=True,
                                    include_service_keys_to_tags=False,
                                    include_duration=False,
                                    include_size=not minimal,
                                    include_mime=False,
                                )
                        except Exception:
                            continue

                        fetched = metadata.get("metadata", []) if isinstance(metadata, dict) else []
                        if not isinstance(fetched, list):
                            continue
                        for meta in fetched:
                            if len(metas_out) >= fetch_limit:
                                break
                            if not isinstance(meta, dict):
                                continue
                            metas_out.append(meta)

                if candidate_ids:
                    _fetch_chunk("file_ids", candidate_ids)
                if len(metas_out) < fetch_limit and candidate_hashes:
                    _fetch_chunk("hashes", candidate_hashes)

                return metas_out[:fetch_limit]

            query_lower = query.lower().strip()

            # Support `ext:<value>` anywhere in the query. We filter results by the
            # Hydrus metadata extension field.
            def _normalize_ext_filter(value: str) -> str:
                v = str(value or "").strip().lower().lstrip(".")
                v = "".join(ch for ch in v if ch.isalnum())
                return v

            ext_filter: str | None = None
            ext_only: bool = False
            try:
                m = re.search(r"\bext:([^\s,]+)", query_lower)
                if not m:
                    m = re.search(r"\bextension:([^\s,]+)", query_lower)
                if m:
                    ext_filter = _normalize_ext_filter(m.group(1)) or None
                    query_lower = re.sub(
                        r"\s*\b(?:ext|extension):[^\s,]+",
                        " ",
                        query_lower
                    )
                    query_lower = re.sub(r"\s{2,}", " ", query_lower).strip().strip(",")
                    query = query_lower
                    if ext_filter and not query_lower:
                        query = "*"
                        query_lower = "*"
                        ext_only = True
            except Exception:
                ext_filter = None
                ext_only = False

            # Split into meaningful terms for AND logic.
            # Avoid punctuation tokens like '-' that would make matching brittle.
            search_terms = [t for t in re.findall(r"[a-z0-9]+", query_lower) if t]

            # Special case: url:* and url:<value>
            metadata_list: list[dict[str, Any]] | None = None
            pattern_hint_raw = kwargs.get("pattern_hint")
            pattern_hints: list[str] = []
            if isinstance(pattern_hint_raw, (list, tuple, set)):
                for item in pattern_hint_raw:
                    text = str(item or "").strip().lower()
                    if text and text not in pattern_hints:
                        pattern_hints.append(text)
            elif isinstance(pattern_hint_raw, str):
                text = pattern_hint_raw.strip().lower()
                if text:
                    pattern_hints.append(text)
            pattern_hint = pattern_hints[0] if pattern_hints else ""

            hashes: list[str] = []
            file_ids: list[int] = []

            if ":" in query_lower and not query_lower.startswith(":"):
                namespace, pattern = query_lower.split(":", 1)
                namespace = namespace.strip().lower()
                pattern = pattern.strip()
                if namespace == "url":
                    try:
                        fetch_limit_raw = int(limit) if limit else 100
                    except Exception:
                        fetch_limit_raw = 100
                    if url_only:
                        metadata_list = _search_url_query_metadata(
                            query_lower,
                            fetch_limit_raw,
                            minimal=minimal,
                        )
                    else:
                        if not pattern or pattern == "*":
                            if pattern_hints:
                                metadata_list = _iter_url_filtered_metadata(
                                    None,
                                    want_any=False,
                                    fetch_limit=fetch_limit_raw,
                                    needles=pattern_hints,
                                    minimal=minimal,
                                )
                            else:
                                metadata_list = _iter_url_filtered_metadata(
                                    None,
                                    want_any=True,
                                    fetch_limit=fetch_limit_raw,
                                    minimal=minimal,
                                )
                        else:
                            def _clean_url_search_token(value: str | None) -> str:
                                token = str(value or "").strip().lower()
                                if not token:
                                    return ""
                                return token.replace("*", "").replace("?", "")

                            # Fast-path: exact URL via /add_urls/get_url_files when a full URL is provided.
                            try:
                                if pattern.startswith("http://") or pattern.startswith(
                                        "https://"):
                                    from API.HydrusNetwork import HydrusRequestSpec

                                    spec = HydrusRequestSpec(
                                        method="GET",
                                        endpoint="/add_urls/get_url_files",
                                        query={
                                            "url": pattern
                                        },
                                    )
                                    response = client._perform_request(
                                        spec
                                    )  # type: ignore[attr-defined]
                                    hashes = []
                                    file_ids = []
                                    if isinstance(response, dict):
                                        raw_hashes = response.get("hashes") or response.get(
                                            "file_hashes"
                                        )
                                        if isinstance(raw_hashes, list):
                                            hashes = [
                                                str(h).strip() for h in raw_hashes
                                                if isinstance(h, str) and str(h).strip()
                                            ]
                                        raw_ids = response.get("file_ids")
                                        if isinstance(raw_ids, list):
                                            for item in raw_ids:
                                                try:
                                                    file_ids.append(int(item))
                                                except (TypeError, ValueError):
                                                    continue

                                    if file_ids:
                                        payload = client.fetch_file_metadata(
                                            file_ids=file_ids,
                                            include_file_url=True,
                                            include_service_keys_to_tags=not minimal,
                                            include_duration=not minimal,
                                            include_size=not minimal,
                                            include_mime=not minimal,
                                        )
                                        metas = (
                                            payload.get("metadata",
                                                        []) if isinstance(payload,
                                                                          dict) else []
                                        )
                                        if isinstance(metas, list):
                                            metadata_list = [
                                                m for m in metas if isinstance(m, dict)
                                            ]
                                    elif hashes:
                                        payload = client.fetch_file_metadata(
                                            hashes=hashes,
                                            include_file_url=True,
                                            include_service_keys_to_tags=not minimal,
                                            include_duration=not minimal,
                                            include_size=not minimal,
                                            include_mime=not minimal,
                                        )
                                        metas = (
                                            payload.get("metadata",
                                                        []) if isinstance(payload,
                                                                          dict) else []
                                        )
                                        if isinstance(metas, list):
                                            metadata_list = [
                                                m for m in metas if isinstance(m, dict)
                                            ]
                            except Exception:
                                metadata_list = None

                            # Fallback: substring scan
                            if metadata_list is None:
                                search_token = _clean_url_search_token(pattern_hint or pattern)
                                scan_limit_override: int | None = None
                                if search_token:
                                    is_domain_only = ("://" not in search_token and "/" not in search_token)
                                    if is_domain_only:
                                        try:
                                            scan_limit_override = max(fetch_limit_raw * 20, 2000)
                                        except Exception:
                                            scan_limit_override = 2000
                                metadata_list = _iter_url_filtered_metadata(
                                    search_token,
                                    want_any=False,
                                    fetch_limit=fetch_limit_raw,
                                    scan_limit=scan_limit_override,
                                    needles=pattern_hints if pattern_hints else None,
                                    minimal=minimal,
                                )
                elif namespace == "system":
                    normalized_system_predicate = pattern.strip()
                    if normalized_system_predicate == "has url":
                        try:
                            fetch_limit = int(limit) if limit else 100
                        except Exception:
                            fetch_limit = 100
                        metadata_list = _iter_url_filtered_metadata(
                            None,
                            want_any=not bool(pattern_hints),
                            fetch_limit=fetch_limit,
                            needles=pattern_hints if pattern_hints else None,
                            minimal=minimal,
                        )

            # Parse the query into tags
            # "*" means "match all" - use system:everything tag in Hydrus
            # If query has explicit namespace, use it as a tag search.
            # If query is free-form, search BOTH:
            #   - title:*term*  (title: is the only namespace searched implicitly)
            #   - *term*        (freeform tags; we will filter out other namespace matches client-side)
            tags: list[str] = []
            freeform_union_search: bool = False
            title_predicates: list[str] = []
            freeform_predicates: list[str] = []

            if query.strip() == "*":
                tags = ["system:everything"]
            elif ":" in query_lower:
                tags = [query_lower]
            else:
                freeform_union_search = True
                if search_terms:
                    # Hydrus supports wildcard matching primarily as a prefix (e.g., tag*).
                    # Use per-term prefix matching for both title: and freeform tags.
                    title_predicates = [f"title:{term}*" for term in search_terms]
                    freeform_predicates = [f"{term}*" for term in search_terms]
                else:
                    # If we can't extract alnum terms, fall back to the raw query text.
                    title_predicates = [f"title:{query_lower}*"]
                    freeform_predicates = [f"{query_lower}*"]

            # Search files with the tags (unless url: search already produced metadata)
            results: list[dict[str, Any]] = []

            if metadata_list is None:
                file_ids = []
                hashes = []

                if freeform_union_search:
                    if not title_predicates and not freeform_predicates:
                        debug(f"{prefix} 0 result(s)")
                        return []

                    payloads: list[Any] = []
                    try:
                        payloads.append(
                            client.search_files(
                                tags=title_predicates,
                                return_hashes=True,
                                return_file_ids=True,
                            )
                        )
                    except Exception:
                        pass

                    # Extra pass: match a full title phrase when the query includes
                    # spaces or punctuation (e.g., "i've been down").
                    try:
                        if query_lower and query_lower != "*" and "*" not in query_lower:
                            if any(ch in query_lower for ch in (" ", "'", "-", "_")):
                                payloads.append(
                                    client.search_files(
                                        tags=[f"title:{query_lower}*"],
                                        return_hashes=True,
                                        return_file_ids=True,
                                    )
                                )
                    except Exception:
                        pass

                    try:
                        title_ids, title_hashes = _extract_search_ids(
                            payloads[0] if payloads else None
                        )
                        # Optimization: for single-term queries, skip the freeform query
                        # to avoid duplicate requests.
                        single_term = bool(search_terms and len(search_terms) == 1)
                        if not single_term:
                            payloads.append(
                                client.search_files(
                                    tags=freeform_predicates,
                                    return_hashes=True,
                                    return_file_ids=True,
                                )
                            )
                    except Exception:
                        pass

                    id_set: set[int] = set()
                    hash_set: set[str] = set()
                    for payload in payloads:
                        ids_part, hashes_part = _extract_search_ids(payload)
                        for fid in ids_part:
                            id_set.add(fid)
                        for hh in hashes_part:
                            hash_set.add(hh)
                    file_ids = list(id_set)
                    hashes = list(hash_set)
                else:
                    if not tags:
                        debug(f"{prefix} 0 result(s)")
                        return []

                    search_result = client.search_files(
                        tags=tags,
                        return_hashes=True,
                        return_file_ids=True
                    )
                    file_ids, hashes = _extract_search_ids(search_result)

                # Fast path: ext-only search. Avoid fetching metadata for an unbounded
                # system:everything result set; fetch in chunks until we have enough.
                if ext_only and ext_filter:
                    results = []
                    if not file_ids and not hashes:
                        debug(f"{prefix} 0 result(s)")
                        return []

                    # Prefer file_ids if available.
                    if file_ids:
                        chunk_size = 200
                        for start in range(0, len(file_ids), chunk_size):
                            if len(results) >= limit:
                                break
                            chunk = file_ids[start:start + chunk_size]
                            try:
                                payload = client.fetch_file_metadata(
                                    file_ids=chunk,
                                    include_service_keys_to_tags=True,
                                    include_file_url=True,
                                    include_duration=True,
                                    include_size=True,
                                    include_mime=True,
                                )
                            except Exception:
                                continue
                            metas = payload.get("metadata",
                                                []) if isinstance(payload,
                                                                  dict) else []
                            if not isinstance(metas, list):
                                continue
                            for meta in metas:
                                if len(results) >= limit:
                                    break
                                if not isinstance(meta, dict):
                                    continue
                                mime_type = meta.get("mime")
                                ext = _resolve_ext_from_meta(meta, mime_type)
                                if _normalize_ext_filter(ext) != ext_filter:
                                    continue

                                file_id = meta.get("file_id")
                                hash_hex = meta.get("hash")
                                size_val = meta.get("size")
                                if size_val is None:
                                    size_val = meta.get("size_bytes")
                                try:
                                    size = int(size_val) if size_val is not None else 0
                                except Exception:
                                    size = 0

                                title, all_tags = self._extract_title_and_tags(meta, file_id)

                                # Use known URLs (source URLs) from Hydrus if available (matches get-url cmdlet)
                                item_url = meta.get("known_urls") or meta.get("urls") or meta.get("url") or []
                                if not item_url:
                                    item_url = meta.get("file_url") or f"{self.URL.rstrip('/')}/view_file?hash={hash_hex}"
                                if isinstance(item_url, str) and "/view_file" in item_url:
                                    item_url = self._append_access_key(item_url)

                                results.append(
                                    {
                                        "hash": hash_hex,
                                        "url": item_url,
                                        "name": title,
                                        "title": title,
                                        "size": size,
                                        "size_bytes": size,
                                        "store": self.NAME,
                                        "tag": all_tags,
                                        "file_id": file_id,
                                        "mime": mime_type,
                                        "ext": _resolve_ext_from_meta(meta, mime_type),
                                    }
                                )

                        debug(f"{prefix} {len(results)} result(s)")
                        return results[:limit]

                    # If we only got hashes, fall back to the normal flow below.

                if not file_ids and not hashes:
                    debug(f"{prefix} 0 result(s)")
                    return []

                if file_ids:
                    metadata = client.fetch_file_metadata(
                        file_ids=file_ids,
                        include_service_keys_to_tags=True,
                        include_file_url=True,
                        include_duration=True,
                        include_size=True,
                        include_mime=True,
                    )
                    metadata_list = metadata.get("metadata", [])
                elif hashes:
                    metadata = client.fetch_file_metadata(
                        hashes=hashes,
                        include_service_keys_to_tags=True,
                        include_file_url=True,
                        include_duration=True,
                        include_size=True,
                        include_mime=True,
                    )
                    metadata_list = metadata.get("metadata", [])
                else:
                    metadata_list = []

                # If our free-text searches produce nothing (or nothing survived downstream filtering), fallback to scanning.
                if (not metadata_list) and (query_lower
                                            != "*") and (":" not in query_lower):
                    try:
                        search_result = client.search_files(
                            tags=["system:everything"],
                            return_hashes=True,
                            return_file_ids=True,
                        )
                        file_ids, hashes = _extract_search_ids(search_result)

                        if file_ids:
                            metadata = client.fetch_file_metadata(
                                file_ids=file_ids,
                                include_service_keys_to_tags=True,
                                include_file_url=True,
                                include_duration=True,
                                include_size=True,
                                include_mime=True,
                            )
                            metadata_list = metadata.get("metadata", [])
                        elif hashes:
                            metadata = client.fetch_file_metadata(
                                hashes=hashes,
                                include_service_keys_to_tags=True,
                                include_file_url=True,
                                include_duration=True,
                                include_size=True,
                                include_mime=True,
                            )
                            metadata_list = metadata.get("metadata", [])
                    except Exception:
                        pass

            if not isinstance(metadata_list, list):
                metadata_list = []

            for meta in metadata_list:
                if len(results) >= limit:
                    break

                file_id = meta.get("file_id")
                hash_hex = meta.get("hash")
                size_val = meta.get("size")
                if size_val is None:
                    size_val = meta.get("size_bytes")
                try:
                    size = int(size_val) if size_val is not None else 0
                except Exception:
                    size = 0

                title, all_tags = self._extract_title_and_tags(meta, file_id)

                # Prefer Hydrus-provided extension (e.g. ".webm"); fall back to MIME map.
                mime_type = meta.get("mime")
                ext = _resolve_ext_from_meta(meta, mime_type)

                # Filter results based on query type
                # If user provided explicit namespace (has ':'), don't do substring filtering
                # Just include what the tag search returned
                has_namespace = ":" in query_lower

                # Use known URLs (source URLs) from Hydrus if available (matches get-url cmdlet)
                item_url = meta.get("known_urls") or meta.get("urls") or meta.get("url") or []
                if not item_url:
                    item_url = meta.get("file_url") or f"{self.URL.rstrip('/')}/view_file?hash={hash_hex}"
                if isinstance(item_url, str) and "/view_file" in item_url:
                    item_url = self._append_access_key(item_url)

                if has_namespace:
                    # Explicit namespace search - already filtered by Hydrus tag search
                    # Include this result as-is
                    results.append(
                        {
                            "hash": hash_hex,
                            "url": item_url,
                            "name": title,
                            "title": title,
                            "size": size,
                            "size_bytes": size,
                            "store": self.NAME,
                            "tag": all_tags,
                            "file_id": file_id,
                            "mime": mime_type,
                            "ext": ext,
                        }
                    )
                else:
                    # Free-form search: check if search terms match title or FREEFORM tags.
                    # Do NOT implicitly match other namespace tags (except title:).
                    freeform_tags = [
                        t for t in all_tags
                        if isinstance(t, str) and t and (":" not in t)
                    ]
                    searchable_text = (title + " " + " ".join(freeform_tags)).lower()

                    match = True
                    if query_lower != "*" and search_terms:
                        for term in search_terms:
                            if term not in searchable_text:
                                match = False
                                break

                    if match:
                        results.append(
                            {
                                "hash": hash_hex,
                                "url": item_url,
                                "name": title,
                                "title": title,
                                "size": size,
                                "size_bytes": size,
                                "store": self.NAME,
                                "tag": all_tags,
                                "file_id": file_id,
                                "mime": mime_type,
                                "ext": ext,
                            }
                        )

            debug(f"{prefix} {len(results)} result(s)")
            if ext_filter:
                wanted = ext_filter
                filtered: list[dict[str, Any]] = []
                for item in results:
                    try:
                        if _normalize_ext_filter(str(item.get("ext") or "")) == wanted:
                            filtered.append(item)
                    except Exception:
                        continue
                results = filtered

            return results[:limit]

        except Exception as exc:
            log(f"❌ Hydrus search failed: {exc}", file=sys.stderr)
            import traceback

            traceback.print_exc(file=sys.stderr)
            raise

    def get_file(self, file_hash: str, **kwargs: Any) -> Path | str | None:
        """Return the local file system path if available, else a browser URL.

        IMPORTANT: this method must be side-effect free (do not auto-open a browser).
        Only explicit user actions (e.g. the get-file cmdlet) should open files.
        """
        file_hash = str(file_hash or "").strip().lower()
        debug(f"{self._log_prefix()} get_file(hash={file_hash[:12]}..., url={kwargs.get('url')})")

        # If 'url=True' is passed, we preference the browser URL even if a local path is available.
        # This is typically used by the 'get-file' cmdlet for interactive viewing.
        if kwargs.get("url"):
            base_url = str(self.URL).rstrip("/")
            access_key = str(self.API)
            browser_url = (
                f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
            )
            debug(f"{self._log_prefix()} get_file: returning browser URL per request: {browser_url}")
            return browser_url

        # Try to get the local disk path if possible (works if Hydrus is on same machine)
        server_path = None
        try:
            path_res = self._client.get_file_path(file_hash)
            if isinstance(path_res, dict) and "path" in path_res:
                server_path = path_res["path"]
                if server_path:
                    local_path = Path(server_path)
                    if local_path.exists():
                        debug(f"{self._log_prefix()} get_file: found local path: {local_path}")
                        return local_path
        except Exception as e:
            debug(f"{self._log_prefix()} get_file: could not resolve path from API: {e}")

        # If we found a path on the server but it's not locally accessible,
        # keep it for logging but continue to the browser URL fallback so the UI
        # can still open the file via the Hydrus web UI.
        if server_path:
            debug(
                f"{self._log_prefix()} get_file: server path not locally accessible, falling back to HTTP: {server_path}"
            )

        # Fallback to browser URL with access key
        base_url = str(self.URL).rstrip("/")
        access_key = str(self.API)
        browser_url = (
            f"{base_url}/get_files/file?hash={file_hash}&Hydrus-Client-API-Access-Key={access_key}"
        )
        debug(f"{self._log_prefix()} get_file: falling back to url={browser_url}")
        return browser_url

    def download_to_temp(
        self,
        file_hash: str,
        *,
        temp_root: Optional[Path] = None,
    ) -> Optional[Path]:
        """Download a Hydrus file to a temporary path for downstream uploads."""

        try:
            client = self._client
            if client is None:
                return None

            h = str(file_hash or "").strip().lower()
            if len(h) != 64 or not all(ch in "0123456789abcdef" for ch in h):
                return None

            created_tmp = False
            base_tmp = Path(temp_root) if temp_root is not None else Path(
                tempfile.mkdtemp(prefix="hydrus-file-")
            )
            if temp_root is None:
                created_tmp = True
            base_tmp.mkdir(parents=True, exist_ok=True)

            def _safe_filename(raw: str) -> str:
                cleaned = re.sub(r"[\\/:*?\"<>|]", "_", str(raw or "")).strip()
                if not cleaned:
                    return h
                cleaned = cleaned.strip(". ") or h
                return cleaned

            # Prefer ext/title from metadata when available.
            fname = h
            ext_val = ""
            try:
                meta = self.get_metadata(h) or {}
                if isinstance(meta, dict):
                    title_val = str(meta.get("title") or "").strip()
                    if title_val:
                        fname = _safe_filename(title_val)
                    ext_val = str(meta.get("ext") or "").strip().lstrip(".")
            except Exception:
                pass

            if not fname:
                fname = h
            if ext_val and not fname.lower().endswith(f".{ext_val.lower()}"):
                fname = f"{fname}.{ext_val}"

            try:
                file_url = client.file_url(h)
            except Exception:
                file_url = f"{self.URL.rstrip('/')}/get_files/file?hash={quote(h)}"

            dest_path = base_tmp / fname
            stream_client = get_shared_httpx_client(timeout=60.0, verify_ssl=False)
            with stream_client.stream(
                "GET",
                file_url,
                headers={"Hydrus-Client-API-Access-Key": self.API},
                follow_redirects=True,
                timeout=60.0,
            ) as resp:
                resp.raise_for_status()
                with dest_path.open("wb") as fh:
                    for chunk in resp.iter_bytes():
                        if chunk:
                            fh.write(chunk)

            if dest_path.exists():
                return dest_path

            if created_tmp:
                try:
                    shutil.rmtree(base_tmp, ignore_errors=True)
                except Exception:
                    pass
            return None
        except Exception as exc:
            log(f"{self._log_prefix()} download_to_temp failed: {exc}", file=sys.stderr)
            try:
                if temp_root is None and "base_tmp" in locals():
                    shutil.rmtree(base_tmp, ignore_errors=True)  # type: ignore[arg-type]
            except Exception:
                pass
            return None

    def delete_file(self, file_identifier: str, **kwargs: Any) -> bool:
        """Delete a file from Hydrus, then clear the deletion record.

        This is used by the delete-file cmdlet when the item belongs to a HydrusNetwork store.
        """
        try:
            client = self._client
            if client is None:
                debug(f"{self._log_prefix()} delete_file: client unavailable")
                return False

            file_hash = str(file_identifier or "").strip().lower()
            if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
                                               for ch in file_hash):
                debug(
                    f"{self._log_prefix()} delete_file: invalid file hash '{file_identifier}'"
                )
                return False

            reason = kwargs.get("reason")
            reason_text = (
                str(reason).strip() if isinstance(reason,
                                                  str) and reason.strip() else None
            )

            # 1) Delete file
            client.delete_files([file_hash], reason=reason_text)

            # 2) Clear deletion record (best-effort)
            try:
                client.clear_file_deletion_record([file_hash])
            except Exception as exc:
                debug(
                    f"{self._log_prefix()} delete_file: clear_file_deletion_record failed: {exc}"
                )

            return True
        except Exception as exc:
            debug(f"{self._log_prefix()} delete_file failed: {exc}")
            return False

    def get_metadata(self, file_hash: str, **kwargs: Any) -> Optional[Dict[str, Any]]:
        """Get metadata for a file from Hydrus by hash.

        Args:
            file_hash: SHA256 hash of the file (64-char hex string)

        Returns:
            Dict with metadata fields or None if not found
        """
        try:
            client = self._client
            if not client:
                debug(f"{self._log_prefix()} get_metadata: client unavailable")
                return None

            # Fetch file metadata with the fields we need for CLI display.
            payload = client.fetch_file_metadata(
                hashes=[file_hash],
                include_service_keys_to_tags=True,
                include_file_url=True,
                include_duration=True,
                include_size=True,
                include_mime=True,
            )

            if not payload or not payload.get("metadata"):
                return None

            meta = payload["metadata"][0]

            # Hydrus can return placeholder metadata rows for unknown hashes.
            if not isinstance(meta, dict) or meta.get("file_id") is None:
                return None

            # Extract title from tags
            title = f"Hydrus_{file_hash[:12]}"
            extracted_tags = self._extract_tags_from_hydrus_meta(
                meta,
                service_key=None,
                service_name="my tags",
            )
            for raw_tag in extracted_tags:
                tag_text = str(raw_tag or "").strip()
                if not tag_text:
                    continue
                if tag_text.lower().startswith("title:"):
                    value = tag_text.split(":", 1)[1].strip()
                    if value:
                        title = value
                        break

            # Hydrus may return mime as an int enum, or sometimes a human label.
            mime_val = meta.get("mime")
            filetype_human = (
                meta.get("filetype_human") or meta.get("mime_human")
                or meta.get("mime_string")
            )

            # Determine ext: prefer Hydrus metadata ext, then filetype_human (when it looks like an ext),
            # then title suffix, then file path suffix.
            ext = str(meta.get("ext") or "").strip().lstrip(".")
            if not ext:
                ft = str(filetype_human or "").strip().lstrip(".").lower()
                if ft and ft != "unknown filetype" and ft.isalnum() and len(ft) <= 8:
                    # Treat simple labels like "mp4", "m4a", "webm" as extensions.
                    ext = ft
            if not ext and isinstance(title, str) and "." in title:
                try:
                    ext = Path(title).suffix.lstrip(".")
                except Exception:
                    ext = ""
            if not ext:
                try:
                    path_payload = client.get_file_path(file_hash)
                    if isinstance(path_payload, dict):
                        p = path_payload.get("path")
                        if isinstance(p, str) and p.strip():
                            ext = Path(p.strip()).suffix.lstrip(".")
                except Exception:
                    ext = ""

            # If extension is still unknown, attempt a best-effort lookup from MIME.
            def _mime_from_ext(ext_value: str) -> str:
                ext_clean = str(ext_value or "").strip().lstrip(".").lower()
                if not ext_clean:
                    return ""
                try:
                    for category in mime_maps.values():
                        info = category.get(ext_clean)
                        if isinstance(info, dict):
                            mimes = info.get("mimes")
                            if isinstance(mimes, list) and mimes:
                                first = mimes[0]
                                return str(first)
                except Exception:
                    return ""
                return ""

            # Normalize to a MIME string for CLI output.
            # Avoid passing through human labels like "unknown filetype".
            mime_type = ""
            if isinstance(mime_val, str):
                candidate = mime_val.strip()
                if "/" in candidate and candidate.lower() != "unknown filetype":
                    mime_type = candidate
            if not mime_type and isinstance(filetype_human, str):
                candidate = filetype_human.strip()
                if "/" in candidate and candidate.lower() != "unknown filetype":
                    mime_type = candidate
            if not mime_type:
                mime_type = _mime_from_ext(ext)

            # Normalize size/duration to stable scalar types.
            size_val = meta.get("size")
            if size_val is None:
                size_val = meta.get("size_bytes")
            try:
                size_int: int | None = int(size_val) if size_val is not None else 0
            except Exception:
                size_int = 0

            dur_val = meta.get("duration")
            if dur_val is None:
                dur_val = meta.get("duration_ms")
            try:
                dur_int: int | None = int(dur_val) if dur_val is not None else None
            except Exception:
                dur_int = None

            raw_urls = meta.get("known_urls") or meta.get("urls") or meta.get("url"
                                                                              ) or []
            url_list: list[str] = []
            if isinstance(raw_urls, str):
                s = raw_urls.strip()
                url_list = [s] if s else []
            elif isinstance(raw_urls, list):
                url_list = [
                    str(u).strip() for u in raw_urls
                    if isinstance(u, str) and str(u).strip()
                ]

            return {
                "hash": file_hash,
                "title": title,
                "ext": ext,
                "size": size_int,
                "mime": mime_type,
                # Keep raw fields available for troubleshooting/other callers.
                "hydrus_mime": mime_val,
                "filetype_human": filetype_human,
                "duration_ms": dur_int,
                "url": url_list,
            }

        except Exception as exc:
            debug(f"{self._log_prefix()} get_metadata failed: {exc}")
            return None

    def get_tag(self, file_identifier: str, **kwargs: Any) -> Tuple[List[str], str]:
        """Get tags for a file from Hydrus by hash.

        Args:
            file_identifier: File hash (SHA256 hex string)
            **kwargs: Optional service_name parameter

        Returns:
            Tuple of (tags_list, source_description)
            where source is always "hydrus"
        """
        try:
            file_hash = str(file_identifier or "").strip().lower()
            if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
                                               for ch in file_hash):
                debug(
                    f"{self._log_prefix()} get_tags: invalid file hash '{file_identifier}'"
                )
                return [], "unknown"

            # Get Hydrus client and service info
            client = self._client
            if not client:
                debug(f"{self._log_prefix()} get_tags: client unavailable")
                return [], "unknown"

            # Fetch file metadata
            payload = client.fetch_file_metadata(
                hashes=[file_hash],
                include_service_keys_to_tags=True,
                include_file_url=True
            )

            items = payload.get("metadata") if isinstance(payload, dict) else None
            if not isinstance(items, list) or not items:
                debug(
                    f"{self._log_prefix()} get_tags: no metadata for hash {file_hash}"
                )
                return [], "unknown"

            meta = items[0] if isinstance(items[0], dict) else None
            if not isinstance(meta, dict) or meta.get("file_id") is None:
                debug(
                    f"{self._log_prefix()} get_tags: invalid metadata for hash {file_hash}"
                )
                return [], "unknown"

            service_name = kwargs.get("service_name") or "my tags"
            service_key = self._get_service_key(service_name)

            # Extract tags from metadata
            tags = self._extract_tags_from_hydrus_meta(meta, service_key, service_name)

            return [
                str(t).strip().lower() for t in tags if isinstance(t, str) and t.strip()
            ], "hydrus"

        except Exception as exc:
            debug(f"{self._log_prefix()} get_tags failed: {exc}")
            return [], "unknown"

    def add_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
        """Add tags to a Hydrus file."""
        try:
            client = self._client
            if client is None:
                debug(f"{self._log_prefix()} add_tag: client unavailable")
                return False

            file_hash = str(file_identifier or "").strip().lower()
            if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
                                               for ch in file_hash):
                debug(
                    f"{self._log_prefix()} add_tag: invalid file hash '{file_identifier}'"
                )
                return False
            service_name = kwargs.get("service_name") or "my tags"

            incoming_tags = [
                str(t).strip().lower() for t in (tags or [])
                if isinstance(t, str) and str(t).strip()
            ]
            if not incoming_tags:
                return True

            existing_tags = kwargs.get("existing_tags")
            if existing_tags is None:
                try:
                    existing_tags, _src = self.get_tag(file_hash)
                except Exception:
                    existing_tags = []
            if isinstance(existing_tags, (list, tuple, set)):
                existing_tags = [
                    str(t).strip().lower() for t in existing_tags
                    if isinstance(t, str) and str(t).strip()
                ]
            else:
                existing_tags = []

            from SYS.metadata import compute_namespaced_tag_overwrite

            tags_to_remove, tags_to_add, _merged = compute_namespaced_tag_overwrite(
                existing_tags, incoming_tags
            )

            if not tags_to_add and not tags_to_remove:
                return True

            service_key: Optional[str] = None
            service_key = self._get_service_key(service_name)

            mutate_success = False
            if service_key:
                try:
                    client.mutate_tags_by_key(
                        file_hash,
                        service_key,
                        add_tags=tags_to_add,
                        remove_tags=tags_to_remove,
                    )
                    mutate_success = True
                except Exception as exc:
                    debug(
                        f"{self._log_prefix()} add_tag: mutate_tags_by_key failed: {exc}"
                    )

            did_any = False
            if not mutate_success:
                if tags_to_remove:
                    try:
                        client.delete_tag(file_hash, tags_to_remove, service_name)
                        did_any = True
                    except Exception as exc:
                        debug(
                            f"{self._log_prefix()} add_tag: delete_tag failed: {exc}"
                        )
                if tags_to_add:
                    try:
                        client.add_tag(file_hash, tags_to_add, service_name)
                        did_any = True
                    except Exception as exc:
                        debug(
                            f"{self._log_prefix()} add_tag: add_tag failed: {exc}"
                        )
            else:
                did_any = bool(tags_to_add or tags_to_remove)

            return did_any
        except Exception as exc:
            debug(f"{self._log_prefix()} add_tag failed: {exc}")
            return False

    def delete_tag(self, file_identifier: str, tags: List[str], **kwargs: Any) -> bool:
        """Delete tags from a Hydrus file."""
        try:
            client = self._client
            if client is None:
                debug(f"{self._log_prefix()} delete_tag: client unavailable")
                return False

            file_hash = str(file_identifier or "").strip().lower()
            if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
                                               for ch in file_hash):
                debug(
                    f"{self._log_prefix()} delete_tag: invalid file hash '{file_identifier}'"
                )
                return False
            service_name = kwargs.get("service_name") or "my tags"
            raw_list = list(tags) if isinstance(tags, (list, tuple)) else [str(tags)]
            tag_list = [
                str(t).strip().lower() for t in raw_list
                if isinstance(t, str) and str(t).strip()
            ]
            if not tag_list:
                return False
            client.delete_tag(file_hash, tag_list, service_name)
            return True
        except Exception as exc:
            debug(f"{self._log_prefix()} delete_tag failed: {exc}")
            return False

    def get_url(self, file_identifier: str, **kwargs: Any) -> List[str]:
        """Get known url for a Hydrus file."""
        try:
            client = self._client

            file_hash = str(file_identifier or "").strip().lower()
            if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
                                               for ch in file_hash):
                return []

            payload = client.fetch_file_metadata(
                hashes=[file_hash],
                include_file_url=True
            )
            items = payload.get("metadata") if isinstance(payload, dict) else None
            if not isinstance(items, list) or not items:
                return []
            meta = items[0] if isinstance(items[0],
                                          dict) else {}

            raw_urls: Any = meta.get("known_urls"
                                     ) or meta.get("urls") or meta.get("url") or []
            
            def _is_url(s: Any) -> bool:
                if not isinstance(s, str):
                    return False
                v = s.strip().lower()
                return bool(v and ("://" in v or v.startswith(("magnet:", "torrent:"))))

            if isinstance(raw_urls, str):
                val = raw_urls.strip()
                return [val] if _is_url(val) else []
            if isinstance(raw_urls, list):
                out: list[str] = []
                for u in raw_urls:
                    if not isinstance(u, str):
                        continue
                    u = u.strip()
                    if u and _is_url(u):
                        out.append(u)
                return out
            return []
        except Exception as exc:
            debug(f"{self._log_prefix()} get_url failed: {exc}")
            return []

    def get_url_info(self, url: str, **kwargs: Any) -> dict[str, Any] | None:
        """Return Hydrus URL info for a single URL (Hydrus-only helper).

        Uses: GET /add_urls/get_url_info
        """
        try:
            client = self._client
            if client is None:
                return None
            u = str(url or "").strip()
            if not u:
                return None
            try:
                return client.get_url_info(u)  # type: ignore[attr-defined]
            except Exception:
                from API.HydrusNetwork import HydrusRequestSpec

                spec = HydrusRequestSpec(
                    method="GET",
                    endpoint="/add_urls/get_url_info",
                    query={
                        "url": u
                    },
                )
                response = client._perform_request(spec)  # type: ignore[attr-defined]
                return response if isinstance(response, dict) else None
        except Exception as exc:
            debug(f"{self._log_prefix()} get_url_info failed: {exc}")
            return None

    def add_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
        """Associate one or more url with a Hydrus file."""
        try:
            client = self._client
            if client is None:
                debug(f"{self._log_prefix()} add_url: client unavailable")
                return False
            for u in url:
                client.associate_url(file_identifier, u)
            return True
        except Exception as exc:
            debug(f"{self._log_prefix()} add_url failed: {exc}")
            return False

    def add_url_bulk(self, items: List[tuple[str, List[str]]], **kwargs: Any) -> bool:
        """Bulk associate urls with Hydrus files.

        This is a best-effort convenience wrapper used by cmdlets to batch url associations.
        Hydrus' client API is still called per (hash,url) pair, but this consolidates the
        cmdlet-level control flow so url association can be deferred until the end.
        """
        try:
            client = self._client
            if client is None:
                debug(f"{self._log_prefix()} add_url_bulk: client unavailable")
                return False

            any_success = False
            for file_identifier, urls in items or []:
                h = str(file_identifier or "").strip().lower()
                if len(h) != 64:
                    continue
                for u in urls or []:
                    s = str(u or "").strip()
                    if not s:
                        continue
                    try:
                        client.associate_url(h, s)
                        any_success = True
                    except Exception:
                        continue
            return any_success
        except Exception as exc:
            debug(f"{self._log_prefix()} add_url_bulk failed: {exc}")
            return False

    def add_tags_bulk(self, items: List[tuple[str, List[str]]], *, service_name: str | None = None) -> bool:
        """Bulk add tags to multiple Hydrus files.

        Groups files by identical tag-sets and uses the Hydrus `mutate_tags_by_key`
        call (when a service key is available) to reduce the number of API calls.
        Falls back to per-hash `add_tag` calls if necessary.
        """
        try:
            client = self._client
            if client is None:
                debug(f"{self._log_prefix()} add_tags_bulk: client unavailable")
                return False

            # Group by canonical tag set (sorted tuple) to batch identical additions
            buckets: dict[tuple[str, ...], list[str]] = {}
            for file_identifier, tags in items or []:
                h = str(file_identifier or "").strip().lower()
                if len(h) != 64:
                    continue
                tlist = [str(t).strip().lower() for t in (tags or []) if isinstance(t, str) and str(t).strip()]
                if not tlist:
                    continue
                key = tuple(sorted(tlist))
                buckets.setdefault(key, []).append(h)

            if not buckets:
                return False

            svc = service_name or "my tags"
            service_key = self._get_service_key(svc)
            any_success = False

            for tag_tuple, hashes in buckets.items():
                try:
                    if service_key:
                        # Mutate tags for many hashes in a single request
                        client.mutate_tags_by_key(hash=hashes, service_key=service_key, add_tags=list(tag_tuple))
                        any_success = True
                        continue
                except Exception as exc:
                    debug(f"{self._log_prefix()} add_tags_bulk mutate failed for tags {tag_tuple}: {exc}")

                # Fallback: apply per-hash add_tag
                for h in hashes:
                    try:
                        client.add_tag(h, list(tag_tuple), svc)
                        any_success = True
                    except Exception:
                        continue

            return any_success
        except Exception as exc:
            debug(f"{self._log_prefix()} add_tags_bulk failed: {exc}")
            return False

    def delete_url(self, file_identifier: str, url: List[str], **kwargs: Any) -> bool:
        """Delete one or more url from a Hydrus file."""
        try:
            client = self._client
            if client is None:
                debug(f"{self._log_prefix()} delete_url: client unavailable")
                return False
            for u in url:
                client.delete_url(file_identifier, u)
            return True
        except Exception as exc:
            debug(f"{self._log_prefix()} delete_url failed: {exc}")
            return False

    def get_note(self, file_identifier: str, **kwargs: Any) -> Dict[str, str]:
        """Get notes for a Hydrus file (default note service only)."""
        try:
            client = self._client
            if client is None:
                debug(f"{self._log_prefix()} get_note: client unavailable")
                return {}

            file_hash = str(file_identifier or "").strip().lower()
            if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
                                               for ch in file_hash):
                return {}

            payload = client.fetch_file_metadata(hashes=[file_hash], include_notes=True)
            items = payload.get("metadata") if isinstance(payload, dict) else None
            if not isinstance(items, list) or not items:
                return {}
            meta = items[0] if isinstance(items[0], dict) else None
            if not isinstance(meta, dict):
                return {}

            notes_payload = meta.get("notes")
            if isinstance(notes_payload, dict):
                return {
                    str(k): str(v or "")
                    for k, v in notes_payload.items() if str(k).strip()
                }

            return {}
        except Exception as exc:
            debug(f"{self._log_prefix()} get_note failed: {exc}")
            return {}

    def set_note(
        self,
        file_identifier: str,
        name: str,
        text: str,
        **kwargs: Any
    ) -> bool:
        """Set a named note for a Hydrus file (default note service only)."""
        try:
            client = self._client
            if client is None:
                debug(f"{self._log_prefix()} set_note: client unavailable")
                return False

            file_hash = str(file_identifier or "").strip().lower()
            if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
                                               for ch in file_hash):
                return False

            note_name = str(name or "").strip()
            if not note_name:
                return False
            note_text = str(text or "")

            client.set_notes(file_hash,
                             {
                                 note_name: note_text
                             })
            return True
        except Exception as exc:
            debug(f"{self._log_prefix()} set_note failed: {exc}")
            return False

    def delete_note(self, file_identifier: str, name: str, **kwargs: Any) -> bool:
        """Delete a named note for a Hydrus file (default note service only)."""
        try:
            client = self._client
            if client is None:
                debug(f"{self._log_prefix()} delete_note: client unavailable")
                return False

            file_hash = str(file_identifier or "").strip().lower()
            if len(file_hash) != 64 or not all(ch in "0123456789abcdef"
                                               for ch in file_hash):
                return False

            note_name = str(name or "").strip()
            if not note_name:
                return False

            client.delete_notes(file_hash, [note_name])
            return True
        except Exception as exc:
            debug(f"{self._log_prefix()} delete_note failed: {exc}")
            return False

    @staticmethod
    def _extract_tags_from_hydrus_meta(
        meta: Dict[str,
                   Any],
        service_key: Optional[str],
        service_name: str
    ) -> List[str]:
        """Extract current tags from Hydrus metadata dict.

        Prefers display_tags (includes siblings/parents, excludes deleted).
        Falls back to storage_tags status '0' (current).
        """
        tags_payload = meta.get("tags")
        if not isinstance(tags_payload, dict):
            return []

        desired_service_name = str(service_name or "").strip().lower()
        desired_service_key = str(service_key).strip() if service_key is not None else ""

        def _append_tag(out: List[str], value: Any) -> None:
            text = ""
            if isinstance(value, bytes):
                try:
                    text = value.decode("utf-8", errors="ignore")
                except Exception:
                    text = str(value)
            elif isinstance(value, str):
                text = value
            if not text:
                return
            cleaned = text.strip()
            if cleaned:
                out.append(cleaned)

        def _collect_current(container: Any, out: List[str]) -> None:
            if isinstance(container, list):
                for tag in container:
                    _append_tag(out, tag)
                return
            if isinstance(container, dict):
                current = container.get("0")
                if current is None:
                    current = container.get(0)
                if isinstance(current, list):
                    for tag in current:
                        _append_tag(out, tag)

        def _collect_service_data(service_data: Any, out: List[str]) -> None:
            if not isinstance(service_data, dict):
                return

            display = (
                service_data.get("display_tags")
                or service_data.get("display_friendly_tags")
                or service_data.get("display")
            )
            _collect_current(display, out)

            storage = (
                service_data.get("storage_tags")
                or service_data.get("statuses_to_tags")
                or service_data.get("tags")
            )
            _collect_current(storage, out)

        collected: List[str] = []

        if desired_service_key:
            _collect_service_data(tags_payload.get(desired_service_key), collected)

        if not collected and desired_service_name:
            for maybe_service in tags_payload.values():
                if not isinstance(maybe_service, dict):
                    continue
                svc_name = str(
                    maybe_service.get("service_name")
                    or maybe_service.get("name")
                    or ""
                ).strip().lower()
                if svc_name and svc_name == desired_service_name:
                    _collect_service_data(maybe_service, collected)

        names_map = tags_payload.get("service_keys_to_names")
        statuses_map = tags_payload.get("service_keys_to_statuses_to_tags")
        if isinstance(statuses_map, dict):
            keys_to_collect: List[str] = []
            if desired_service_key:
                keys_to_collect.append(desired_service_key)
            if desired_service_name and isinstance(names_map, dict):
                for raw_key, raw_name in names_map.items():
                    if str(raw_name or "").strip().lower() == desired_service_name:
                        keys_to_collect.append(str(raw_key))
            keys_filter = {k for k in keys_to_collect if k}

            for raw_key, status_payload in statuses_map.items():
                raw_key_text = str(raw_key)
                if keys_filter and raw_key_text not in keys_filter:
                    continue
                _collect_current(status_payload, collected)

        if not collected:
            for maybe_service in tags_payload.values():
                _collect_service_data(maybe_service, collected)

        top_level_tags = meta.get("tags_flat")
        if isinstance(top_level_tags, list):
            _collect_current(top_level_tags, collected)

        deduped: List[str] = []
        seen: set[str] = set()
        for tag in collected:
            key = str(tag).strip().lower()
            if not key or key in seen:
                continue
            seen.add(key)
            deduped.append(tag)
        return deduped

    @staticmethod
    def _extract_title_and_tags(meta: Dict[str, Any], file_id: Any) -> Tuple[str, List[str]]:
        title = f"Hydrus File {file_id}"
        tags = HydrusNetwork._extract_tags_from_hydrus_meta(
            meta,
            service_key=None,
            service_name="my tags",
        )

        normalized_tags: List[str] = []
        seen: set[str] = set()
        for raw_tag in tags:
            text = str(raw_tag or "").strip().lower()
            if not text or text in seen:
                continue
            seen.add(text)
            normalized_tags.append(text)
            if text.startswith("title:") and title == f"Hydrus File {file_id}":
                value = text.split(":", 1)[1].strip()
                if value:
                    title = value

        return title, normalized_tags