Medios-Macina/helper/file_storage.py

"""File storage abstraction layer for uploading files to different services.

Supports multiple backend storage services (0x0.st, local directories, Hydrus, etc.)
with a unified interface.

Example:
    storage = FileStorage()

    # Upload to 0x0.st
    url = storage["0x0"].upload(Path("file.mp3"))

    # Copy to local directory
    path = storage["local"].upload(Path("file.mp3"), location="/home/user/files")

    # Upload to Hydrus
    hash_result = storage["hydrus"].upload(file_path, config=config)
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from pathlib import Path
from typing import Any, Dict, Optional
import sys
import shutil
import requests
import re

from helper.logger import log, debug
from helper.utils_constant import mime_maps
from helper.utils import sha256_file


HEX_DIGITS = set("0123456789abcdef")


def _normalize_hex_hash(value: Optional[str]) -> Optional[str]:
    """Return a normalized 64-character lowercase hash or None."""
    if value is None:
        return None

    try:
        cleaned = ''.join(ch for ch in str(value).strip().lower() if ch in HEX_DIGITS)
    except Exception:
        return None

    if len(cleaned) == 64:
        return cleaned
    return None


def _resolve_file_hash(candidate: Optional[str], path: Path) -> Optional[str]:
    """Return the given hash if valid, otherwise compute sha256 from disk."""
    normalized = _normalize_hex_hash(candidate)
    if normalized is not None:
        return normalized

    if not path.exists():
        return None

    try:
        return sha256_file(path)
    except Exception as exc:
        debug(f"Failed to compute hash for {path}: {exc}")
        return None


class StorageBackend(ABC):
    """Abstract base class for file storage backends.

    Backends can optionally support searching by implementing the search() method.
    """

    @abstractmethod
    def upload(self, file_path: Path, **kwargs: Any) -> str:
        """Upload a file and return a result identifier (URL, hash, path, etc.).

        Args:
            file_path: Path to the file to upload
            **kwargs: Backend-specific options

        Returns:
            Result identifier (e.g., URL for 0x0.st, hash for Hydrus, path for local)

        Raises:
            Exception: If upload fails
        """

    @abstractmethod
    def get_name(self) -> str:
        """Get the unique name of this backend."""

    def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
        """Search for files in backends that support it.

        This method is optional and only implemented by searchable backends
        (e.g., Hydrus, Debrid, Soulseek).

        Args:
            query: Search query string
            **kwargs: Backend-specific search options

        Returns:
            List of search results, each as a dict with backend-specific fields.
            Common fields: 'name', 'size', 'hash', 'url', 'id', etc.

        Raises:
            NotImplementedError: If backend doesn't support searching
            Exception: If search fails

        Example:
            results = storage["hydrus"].search("music artist:john")
            for result in results:
                print(result['name'], result['hash'])
        """
        raise NotImplementedError(f"{self.get_name()} backend does not support searching")

    def supports_search(self) -> bool:
        """Check if this backend supports searching.

        Returns:
            True if search() is implemented, False otherwise
        """
        return self.search.__func__ is not StorageBackend.search


class LocalStorageBackend(StorageBackend):
    """File storage backend for local file system copy."""

    def __init__(self, location: Optional[str] = None) -> None:
        """Initialize local storage backend.

        Args:
            location: Default directory path for storage operations
        """
        self._location = location

    def get_name(self) -> str:
        return "local"

    def upload(self, file_path: Path, **kwargs: Any) -> str:
        """Copy or move file to a local directory.

        Args:
            file_path: Path to the file to upload
            location: Destination directory path (uses default if not provided)
            move: When True, move the file instead of copying (default: False)

        Returns:
            Absolute path to the copied/moved file

        Raises:
            ValueError: If location not provided and no default configured
            Exception: If copy fails or duplicate detected
        """
        from helper.utils import unique_path as utils_unique_path
        from helper.utils import sha256_file
        from helper.local_library import LocalLibraryDB

        location = kwargs.get("location") or self._location
        move_file = bool(kwargs.get("move"))
        if not location:
            raise ValueError("'location' parameter required for local storage (not configured)")

        try:
            # Compute file hash
            file_hash = sha256_file(file_path)
            debug(f"File hash: {file_hash}", file=sys.stderr)

            dest_dir = Path(location).expanduser()
            dest_dir.mkdir(parents=True, exist_ok=True)

            # Check for duplicate files using LocalLibraryDB (fast - uses index)
            try:
                with LocalLibraryDB(dest_dir) as db:
                    existing_path = db.search_by_hash(file_hash)
                    if existing_path and existing_path.exists():
                        log(
                            f"✓ File already in local storage: {existing_path}",
                            file=sys.stderr,
                        )
                        return str(existing_path)
            except Exception as exc:
                log(f"⚠️  Could not check for duplicates in DB: {exc}", file=sys.stderr)

            dest_file = dest_dir / file_path.name
            dest_file = utils_unique_path(dest_file)

            if move_file:
                shutil.move(str(file_path), dest_file)
                debug(f"Local move: {dest_file}", file=sys.stderr)
            else:
                shutil.copy2(file_path, dest_file)
                debug(f"Local copy: {dest_file}", file=sys.stderr)
            return str(dest_file)
        except Exception as exc:
            debug(f"Local copy failed: {exc}", file=sys.stderr)
            raise

    def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
        """Search local database for files by title tag or filename.

        Args:
            query: Search string supporting:
                   - Title tag search: "title:document" or just searches DB for matching title tags
                   - Tag namespace search: "creator:Mac*" matches tags in database
                   - Filename fallback: if query not in DB, searches filesystem
                   - "*" means "match all files"
            location: Directory to search in (uses default if not provided)
            recursive: Search subdirectories (default: True)

        Returns:
            List of dicts with 'name', 'path', 'size' fields
        """
        from fnmatch import fnmatch
        from helper.local_library import LocalLibraryDB

        location = kwargs.get("location") or self._location
        if not location:
            raise ValueError("'location' parameter required for local search (not configured)")

        limit = kwargs.get("limit")
        try:
            limit = int(limit) if limit is not None else None
        except (TypeError, ValueError):
            limit = None
        if isinstance(limit, int) and limit <= 0:
            limit = None

        query_lower = query.lower()
        match_all = query_lower == "*"
        results = []
        search_dir = Path(location).expanduser()
        debug(f"Searching local storage at: {search_dir}")

        # Support comma-separated AND queries (token1,token2,...). Each token must match.
        tokens = [t.strip() for t in query.split(',') if t.strip()]

        # Require explicit namespace for hash lookups to avoid accidental filename matches
        if not match_all and len(tokens) == 1 and _normalize_hex_hash(query_lower):
            debug("Hash queries require 'hash:' prefix for local search")
            return results

        # Require explicit namespace for hash lookups to avoid accidental filename matches
        if not match_all and _normalize_hex_hash(query_lower):
            debug("Hash queries require 'hash:' prefix for local search")
            return results

        def _create_entry(file_path: Path, tags: list[str], size_bytes: int | None, db_hash: Optional[str]) -> dict[str, Any]:
            path_str = str(file_path)
            entry = {
                "name": file_path.stem,
                "title": next((t.split(':', 1)[1] for t in tags if t.lower().startswith('title:')), file_path.stem),
                "ext": file_path.suffix.lstrip('.'),
                "path": path_str,
                "target": path_str,
                "origin": "local",
                "size": size_bytes,
                "size_bytes": size_bytes,
                "tags": tags,
            }
            hash_value = _resolve_file_hash(db_hash, file_path)
            if hash_value:
                entry["hash"] = hash_value
                entry["hash_hex"] = hash_value
                entry["file_hash"] = hash_value
            return entry

        try:
            if not search_dir.exists():
                debug(f"Search directory does not exist: {search_dir}")
                return results

            # Try database search first (much faster than filesystem scan)
            try:
                with LocalLibraryDB(search_dir) as db:
                    cursor = db.connection.cursor()

                    # Check if query is a tag namespace search (format: "namespace:pattern")
                    if tokens and len(tokens) > 1:
                        # AND mode across comma-separated tokens
                        def _like_pattern(term: str) -> str:
                            return term.replace('*', '%').replace('?', '_')

                        def _ids_for_token(token: str, cursor) -> set[int]:
                            token = token.strip()
                            if not token:
                                return set()

                            # Namespaced token
                            if ':' in token and not token.startswith(':'):
                                namespace, pattern = token.split(':', 1)
                                namespace = namespace.strip().lower()
                                pattern = pattern.strip().lower()

                                if namespace == 'hash':
                                    normalized_hash = _normalize_hex_hash(pattern)
                                    if not normalized_hash:
                                        return set()
                                    cursor.execute(
                                        """
                                            SELECT id FROM files
                                            WHERE LOWER(file_hash) = ?
                                        """,
                                        (normalized_hash,)
                                    )
                                    return {row[0] for row in cursor.fetchall()}

                                if namespace == 'store':
                                    # Local backend only serves local store
                                    if pattern not in {'local', 'file', 'filesystem'}:
                                        return set()
                                    cursor.execute("SELECT id FROM files")
                                    return {row[0] for row in cursor.fetchall()}

                                # Generic namespace match on tags
                                query_pattern = f"{namespace}:%"
                                cursor.execute(
                                    """
                                        SELECT DISTINCT f.id, t.tag
                                        FROM files f
                                        JOIN tags t ON f.id = t.file_id
                                        WHERE LOWER(t.tag) LIKE ?
                                    """,
                                    (query_pattern,)
                                )
                                matched: set[int] = set()
                                for file_id, tag_val in cursor.fetchall():
                                    if not tag_val:
                                        continue
                                    tag_lower = str(tag_val).lower()
                                    if not tag_lower.startswith(f"{namespace}:"):
                                        continue
                                    value = tag_lower[len(namespace)+1:]
                                    if fnmatch(value, pattern):
                                        matched.add(int(file_id))
                                return matched

                            # Bare token: match filename OR any tag (including title)
                            term = token.lower()
                            like_pattern = f"%{_like_pattern(term)}%"

                            ids: set[int] = set()
                            # Filename match
                            cursor.execute(
                                """
                                    SELECT DISTINCT id FROM files
                                    WHERE LOWER(file_path) LIKE ?
                                """,
                                (like_pattern,)
                            )
                            ids.update(int(row[0]) for row in cursor.fetchall())

                            # Tag match (any namespace, including title)
                            cursor.execute(
                                """
                                    SELECT DISTINCT f.id
                                    FROM files f
                                    JOIN tags t ON f.id = t.file_id
                                    WHERE LOWER(t.tag) LIKE ?
                                """,
                                (like_pattern,)
                            )
                            ids.update(int(row[0]) for row in cursor.fetchall())
                            return ids

                        try:
                            with LocalLibraryDB(search_dir) as db:
                                cursor = db.connection.cursor()
                                matching_ids: set[int] | None = None
                                for token in tokens:
                                    ids = _ids_for_token(token, cursor)
                                    matching_ids = ids if matching_ids is None else matching_ids & ids
                                    if not matching_ids:
                                        return results

                                if not matching_ids:
                                    return results

                                # Fetch rows for matching IDs
                                placeholders = ",".join(["?"] * len(matching_ids))
                                fetch_sql = f"""
                                    SELECT id, file_path, file_size, file_hash
                                    FROM files
                                    WHERE id IN ({placeholders})
                                    ORDER BY file_path
                                    LIMIT ?
                                """
                                cursor.execute(fetch_sql, (*matching_ids, limit or len(matching_ids)))
                                rows = cursor.fetchall()
                                for file_id, file_path_str, size_bytes, file_hash in rows:
                                    if not file_path_str:
                                        continue
                                    file_path = Path(file_path_str)
                                    if not file_path.exists():
                                        continue
                                    if size_bytes is None:
                                        try:
                                            size_bytes = file_path.stat().st_size
                                        except OSError:
                                            size_bytes = None
                                    cursor.execute(
                                        """
                                            SELECT tag FROM tags WHERE file_id = ?
                                        """,
                                        (file_id,),
                                    )
                                    tags = [row[0] for row in cursor.fetchall()]
                                    entry = _create_entry(file_path, tags, size_bytes, file_hash)
                                    results.append(entry)
                                    if limit is not None and len(results) >= limit:
                                        return results
                                return results
                        except Exception as exc:
                            log(f"⚠️  AND search failed: {exc}", file=sys.stderr)
                            debug(f"AND search exception details: {exc}")
                            return []

                    if ":" in query and not query.startswith(":"):
                        namespace, pattern = query.split(":", 1)
                        namespace = namespace.strip().lower()
                        pattern = pattern.strip().lower()
                        debug(f"Performing namespace search: {namespace}:{pattern}")

                        # Special-case hash: lookups against file_hash column
                        if namespace == "hash":
                            normalized_hash = _normalize_hex_hash(pattern)
                            if not normalized_hash:
                                return results
                            cursor.execute(
                                """
                                    SELECT id, file_path, file_size, file_hash
                                    FROM files
                                    WHERE LOWER(file_hash) = ?
                                    ORDER BY file_path
                                    LIMIT ?
                                """,
                                (normalized_hash, limit or 1000),
                            )

                            for file_id, file_path_str, size_bytes, file_hash in cursor.fetchall():
                                if not file_path_str:
                                    continue
                                file_path = Path(file_path_str)
                                if not file_path.exists():
                                    continue
                                if size_bytes is None:
                                    try:
                                        size_bytes = file_path.stat().st_size
                                    except OSError:
                                        size_bytes = None
                                cursor.execute(
                                    """
                                        SELECT tag FROM tags WHERE file_id = ?
                                    """,
                                    (file_id,),
                                )
                                all_tags = [row[0] for row in cursor.fetchall()]
                                entry = _create_entry(file_path, all_tags, size_bytes, file_hash)
                                results.append(entry)
                                if limit is not None and len(results) >= limit:
                                    return results
                            return results

                        # Search for tags matching the namespace and pattern
                        query_pattern = f"{namespace}:%"

                        cursor.execute("""
                            SELECT DISTINCT f.id, f.file_path, f.file_size, f.file_hash
                            FROM files f
                            JOIN tags t ON f.id = t.file_id
                            WHERE LOWER(t.tag) LIKE ?
                            ORDER BY f.file_path
                            LIMIT ?
                        """, (query_pattern, limit or 1000))

                        rows = cursor.fetchall()
                        debug(f"Found {len(rows)} potential matches in DB")

                        # Filter results by pattern match
                        for file_id, file_path_str, size_bytes, file_hash in rows:
                            if not file_path_str:
                                continue

                            # Get the file's tags and check if any match the pattern
                            cursor.execute("""
                                SELECT DISTINCT tag FROM tags
                                WHERE file_id = ?
                                AND LOWER(tag) LIKE ?
                            """, (file_id, query_pattern))

                            tags = [row[0] for row in cursor.fetchall()]

                            # Check if any tag matches the pattern (case-insensitive wildcard)
                            for tag in tags:
                                tag_lower = tag.lower()
                                # Extract the value part after "namespace:"
                                if tag_lower.startswith(f"{namespace}:"):
                                    value = tag_lower[len(namespace)+1:]
                                    # Use fnmatch for wildcard matching
                                    if fnmatch(value, pattern):
                                        file_path = Path(file_path_str)
                                        if file_path.exists():
                                            if size_bytes is None:
                                                size_bytes = file_path.stat().st_size
                                            cursor.execute("""
                                                SELECT tag FROM tags WHERE file_id = ?
                                            """, (file_id,))
                                            all_tags = [row[0] for row in cursor.fetchall()]
                                            entry = _create_entry(file_path, all_tags, size_bytes, file_hash)
                                            results.append(entry)
                                        else:
                                            debug(f"File missing on disk: {file_path}")
                                        break  # Don't add same file multiple times

                            if limit is not None and len(results) >= limit:
                                return results

                    elif not match_all:
                        # Search by filename or simple tags (namespace-agnostic for plain text)
                        # For plain text search, match:
                        # 1. Filenames containing the query
                        # 2. Simple tags (without namespace) containing the query
                        # NOTE: Does NOT match namespaced tags (e.g., "joe" won't match "channel:Joe Mullan")
                        #       Use explicit namespace search for that (e.g., "channel:joe*")

                        # Split query into terms for AND logic
                        terms = [t.strip() for t in query_lower.replace(',', ' ').split() if t.strip()]
                        if not terms:
                            terms = [query_lower]

                        debug(f"Performing filename/tag search for terms: {terms}")

                        # Fetch more results than requested to allow for filtering
                        fetch_limit = (limit or 45) * 50

                        # 1. Filename search (AND logic)
                        conditions = ["LOWER(f.file_path) LIKE ?" for _ in terms]
                        params = [f"%{t}%" for t in terms]
                        where_clause = " AND ".join(conditions)

                        cursor.execute(f"""
                            SELECT DISTINCT f.id, f.file_path, f.file_size, f.file_hash
                            FROM files f
                            WHERE {where_clause}
                            ORDER BY f.file_path
                            LIMIT ?
                        """, (*params, fetch_limit))

                        rows = cursor.fetchall()
                        debug(f"Found {len(rows)} filename matches in DB (before whole-word filter)")

                        # Compile regex for whole word matching (only if single term, otherwise skip)
                        word_regex = None
                        if len(terms) == 1:
                            term = terms[0]
                            # Check if term contains wildcard characters
                            has_wildcard = '*' in term or '?' in term

                            if has_wildcard:
                                # Use fnmatch for wildcard patterns (e.g., "sie*" matches "SiebeliebenWohl...")
                                try:
                                    from fnmatch import translate
                                    word_regex = re.compile(translate(term), re.IGNORECASE)
                                except Exception:
                                    word_regex = None
                            else:
                                # Use custom boundary that treats underscores as separators
                                # \b treats _ as a word character, so "foo_bar" wouldn't match "bar" with \b
                                try:
                                    # Match if not preceded or followed by alphanumeric chars
                                    pattern = r'(?<![a-zA-Z0-9])' + re.escape(term) + r'(?![a-zA-Z0-9])'
                                    word_regex = re.compile(pattern, re.IGNORECASE)
                                except Exception:
                                    word_regex = None

                        seen_files = set()
                        for file_id, file_path_str, size_bytes, file_hash in rows:
                            if not file_path_str or file_path_str in seen_files:
                                continue

                            # Apply whole word filter on filename if single term
                            if word_regex:
                                p = Path(file_path_str)
                                if not word_regex.search(p.name):
                                    continue
                            seen_files.add(file_path_str)

                            file_path = Path(file_path_str)
                            if file_path.exists():
                                path_str = str(file_path)
                                if size_bytes is None:
                                    size_bytes = file_path.stat().st_size

                                cursor.execute("""
                                    SELECT tag FROM tags WHERE file_id = ?
                                """, (file_id,))
                                tags = [row[0] for row in cursor.fetchall()]
                                entry = _create_entry(file_path, tags, size_bytes, file_hash)
                                results.append(entry)
                                if limit is not None and len(results) >= limit:
                                    return results

                        # Title-tag search: treat freeform terms as title namespace queries (AND across terms)
                        if terms:
                            title_hits: dict[int, dict[str, Any]] = {}
                            for term in terms:
                                cursor.execute(
                                    """
                                        SELECT DISTINCT f.id, f.file_path, f.file_size, f.file_hash
                                        FROM files f
                                        JOIN tags t ON f.id = t.file_id
                                        WHERE LOWER(t.tag) LIKE ?
                                        ORDER BY f.file_path
                                        LIMIT ?
                                    """,
                                    (f"title:%{term}%", fetch_limit),
                                )
                                for file_id, file_path_str, size_bytes, file_hash in cursor.fetchall():
                                    if not file_path_str:
                                        continue
                                    entry = title_hits.get(file_id)
                                    if entry:
                                        entry["count"] += 1
                                        if size_bytes is not None:
                                            entry["size"] = size_bytes
                                    else:
                                        title_hits[file_id] = {
                                            "path": file_path_str,
                                            "size": size_bytes,
                                            "hash": file_hash,
                                            "count": 1,
                                        }

                            if title_hits:
                                required = len(terms)
                                for file_id, info in title_hits.items():
                                    if info.get("count") != required:
                                        continue
                                    file_path_str = info.get("path")
                                    if not file_path_str or file_path_str in seen_files:
                                        continue
                                    file_path = Path(file_path_str)
                                    if not file_path.exists():
                                        continue
                                    seen_files.add(file_path_str)

                                    size_bytes = info.get("size")
                                    if size_bytes is None:
                                        try:
                                            size_bytes = file_path.stat().st_size
                                        except OSError:
                                            size_bytes = None

                                    cursor.execute(
                                        """
                                            SELECT tag FROM tags WHERE file_id = ?
                                        """,
                                        (file_id,),
                                    )
                                    tags = [row[0] for row in cursor.fetchall()]
                                    entry = _create_entry(file_path, tags, size_bytes, info.get("hash"))
                                    results.append(entry)
                                    if limit is not None and len(results) >= limit:
                                        return results

                        # Also search for simple tags (without namespace) containing the query
                        # Only perform tag search if single term, or if we want to support multi-term tag search
                        # For now, fallback to single pattern search for tags if multiple terms
                        # (searching for a tag that contains "term1 term2" or "term1,term2")
                        # This is less useful for AND logic across multiple tags, but consistent with previous behavior
                        query_pattern = f"%{query_lower}%"

                        cursor.execute("""
                            SELECT DISTINCT f.id, f.file_path, f.file_size, f.file_hash
                            FROM files f
                            JOIN tags t ON f.id = t.file_id
                            WHERE LOWER(t.tag) LIKE ? AND LOWER(t.tag) NOT LIKE '%:%'
                            ORDER BY f.file_path
                            LIMIT ?
                        """, (query_pattern, limit or 1000))

                        tag_rows = cursor.fetchall()
                        for file_id, file_path_str, size_bytes, file_hash in tag_rows:
                            if not file_path_str or file_path_str in seen_files:
                                continue
                            seen_files.add(file_path_str)

                            file_path = Path(file_path_str)
                            if file_path.exists():
                                path_str = str(file_path)
                                if size_bytes is None:
                                    size_bytes = file_path.stat().st_size

                                # Fetch tags for this file
                                cursor.execute("""
                                    SELECT tag FROM tags WHERE file_id = ?
                                """, (file_id,))
                                tags = [row[0] for row in cursor.fetchall()]
                                entry = _create_entry(file_path, tags, size_bytes, file_hash)
                                results.append(entry)

                                if limit is not None and len(results) >= limit:
                                    return results

                    else:
                        # Match all - get all files from database
                        cursor.execute("""
                            SELECT id, file_path, file_size, file_hash
                            FROM files
                            ORDER BY file_path
                            LIMIT ?
                        """, (limit or 1000,))

                        rows = cursor.fetchall()
                        for file_id, file_path_str, size_bytes, file_hash in rows:
                            if file_path_str:
                                file_path = Path(file_path_str)
                                if file_path.exists():
                                    path_str = str(file_path)
                                    if size_bytes is None:
                                        size_bytes = file_path.stat().st_size

                                    # Fetch tags for this file
                                    cursor.execute("""
                                        SELECT tag FROM tags WHERE file_id = ?
                                    """, (file_id,))
                                    tags = [row[0] for row in cursor.fetchall()]
                                    entry = _create_entry(file_path, tags, size_bytes, file_hash)
                                    results.append(entry)

                    if results:
                        debug(f"Returning {len(results)} results from DB")
                    else:
                        debug("No results found in DB")
                    return results

            except Exception as e:
                log(f"⚠️  Database search failed: {e}", file=sys.stderr)
                debug(f"DB search exception details: {e}")
                return []

        except Exception as exc:
            log(f"❌ Local search failed: {exc}", file=sys.stderr)
            raise


class HydrusStorageBackend(StorageBackend):
    """File storage backend for Hydrus client."""

    def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
        """Initialize Hydrus storage backend.

        Args:
            config: Configuration dict with Hydrus settings (HydrusNetwork section)
        """
        self._config = config or {}

    def get_name(self) -> str:
        return "hydrus"

    def upload(self, file_path: Path, **kwargs: Any) -> str:
        """Upload file to Hydrus.

        Args:
            file_path: Path to the file to upload
            tags: Optional list of tags to add (uses default config if not provided)
            config: Optional override for config (uses default if not provided)

        Returns:
            File hash from Hydrus

        Raises:
            Exception: If upload fails
        """
        from helper import hydrus as hydrus_wrapper
        from helper.utils import sha256_file

        config = kwargs.get("config") or self._config
        if not config:
            raise ValueError("'config' parameter required for Hydrus storage (not configured)")

        tags = kwargs.get("tags", [])

        try:
            # Compute file hash
            file_hash = sha256_file(file_path)
            debug(f"File hash: {file_hash}")

            # Build Hydrus client
            client = hydrus_wrapper.get_client(config)
            if client is None:
                raise Exception("Hydrus client unavailable")

            # Check if file already exists in Hydrus
            try:
                metadata = client.fetch_file_metadata(hashes=[file_hash])
                if metadata and isinstance(metadata, dict):
                    files = metadata.get("file_metadata", [])
                    if files:
                        log(
                            f"ℹ️  Duplicate detected - file already in Hydrus with hash: {file_hash}",
                            file=sys.stderr,
                        )
                        # Even if duplicate, we should add tags if provided
                        if tags:
                            try:
                                service_name = hydrus_wrapper.get_tag_service_name(config)
                            except Exception:
                                service_name = "my tags"

                            try:
                                debug(f"Adding {len(tags)} tag(s) to existing file in Hydrus: {tags}")
                                client.add_tags(file_hash, tags, service_name)
                                log(f"✅ Tags added to existing file via '{service_name}'", file=sys.stderr)
                            except Exception as exc:
                                log(f"⚠️  Failed to add tags to existing file: {exc}", file=sys.stderr)

                        return file_hash
            except Exception:
                pass

            # Upload file to Hydrus
            log(f"Uploading to Hydrus: {file_path.name}", file=sys.stderr)
            response = client.add_file(file_path)

            # Extract hash from response
            hydrus_hash: Optional[str] = None
            if isinstance(response, dict):
                hydrus_hash = response.get("hash") or response.get("file_hash")
                if not hydrus_hash:
                    hashes = response.get("hashes")
                    if isinstance(hashes, list) and hashes:
                        hydrus_hash = hashes[0]

            if not hydrus_hash:
                raise Exception(f"Hydrus response missing file hash: {response}")

            file_hash = hydrus_hash
            log(f"Hydrus: {file_hash}", file=sys.stderr)

            # Add tags if provided
            if tags:
                try:
                    service_name = hydrus_wrapper.get_tag_service_name(config)
                except Exception:
                    service_name = "my tags"

                try:
                    debug(f"Adding {len(tags)} tag(s) to Hydrus: {tags}")
                    client.add_tags(file_hash, tags, service_name)
                    log(f"✅ Tags added via '{service_name}'", file=sys.stderr)
                except Exception as exc:
                    log(f"⚠️  Failed to add tags: {exc}", file=sys.stderr)

            return file_hash

        except Exception as exc:
            log(f"❌ Hydrus upload failed: {exc}", file=sys.stderr)
            raise

    def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
        """Search Hydrus database for files matching query.

        Args:
            query: Search query (tags, filenames, hashes, etc.)
            limit: Maximum number of results to return (default: 100)
            config: Optional override for config (uses default if not provided)

        Returns:
            List of dicts with 'name', 'hash', 'size', 'tags' fields

        Example:
            results = storage["hydrus"].search("artist:john_doe music")
            results = storage["hydrus"].search("Simple Man")
        """
        from helper import hydrus as hydrus_wrapper

        config = kwargs.get("config") or self._config
        if not config:
            raise ValueError("'config' parameter required for Hydrus search (not configured)")

        limit = kwargs.get("limit", 100)

        try:
            client = hydrus_wrapper.get_client(config)
            if client is None:
                raise Exception("Hydrus client unavailable")

            debug(f"Searching Hydrus for: {query}")

            # Parse the query into tags
            # Handle both simple tags and complex queries
            # "*" means "match all" - use system:everything tag in Hydrus
            if query.strip() == "*":
                # Use system:everything to match all files in Hydrus
                tags = ["system:everything"]
            else:
                query_lower = query.lower().strip()
                # If query doesn't have a namespace (no ':'), search all files and filter by title/tags
                # If query has explicit namespace, use it as a tag search
                if ':' not in query_lower:
                    # No namespace provided: search all files, then filter by title/tags containing the query
                    tags = ["system:everything"]
                else:
                    # User provided explicit namespace (e.g., "creator:john" or "system:has_audio")
                    # Use it as a tag search
                    tags = [query_lower]

            if not tags:
                debug(f"Found 0 result(s)")
                return []

            # Search files with the tags
            search_result = client.search_files(
                tags=tags,
                return_hashes=True,
                return_file_ids=True
            )

            # Extract file IDs from search result
            file_ids = search_result.get("file_ids", [])
            hashes = search_result.get("hashes", [])

            if not file_ids and not hashes:
                debug(f"Found 0 result(s)")
                return []

            # Fetch metadata for the found files
            results = []
            query_lower = query.lower().strip()
            # Split by comma or space for AND logic
            search_terms = set(query_lower.replace(',', ' ').split())  # For substring matching

            if file_ids:
                metadata = client.fetch_file_metadata(file_ids=file_ids)
                metadata_list = metadata.get("metadata", [])

                for meta in metadata_list:
                    if len(results) >= limit:
                        break

                    file_id = meta.get("file_id")
                    hash_hex = meta.get("hash")
                    size = meta.get("size", 0)

                    # Get tags for this file and extract title
                    tags_set = meta.get("tags", {})
                    all_tags = []
                    title = f"Hydrus File {file_id}"  # Default fallback
                    all_tags_str = ""  # For substring matching

                    # debug(f"[HydrusBackend.search] Processing file_id={file_id}, tags type={type(tags_set)}")

                    if isinstance(tags_set, dict):
                        # Collect both storage_tags and display_tags to capture siblings/parents and ensure title: is seen
                        def _collect(tag_list: Any) -> None:
                            nonlocal title, all_tags_str
                            if not isinstance(tag_list, list):
                                return
                            for tag in tag_list:
                                tag_text = str(tag) if tag else ""
                                if not tag_text:
                                    continue
                                all_tags.append(tag_text)
                                all_tags_str += " " + tag_text.lower()
                                if tag_text.lower().startswith("title:") and title == f"Hydrus File {file_id}":
                                    title = tag_text.split(":", 1)[1].strip()

                        for service_name, service_tags in tags_set.items():
                            if not isinstance(service_tags, dict):
                                continue

                            storage_tags = service_tags.get("storage_tags", {})
                            if isinstance(storage_tags, dict):
                                for tag_list in storage_tags.values():
                                    _collect(tag_list)

                            display_tags = service_tags.get("display_tags", [])
                            _collect(display_tags)

                        # Also consider top-level flattened tags payload if provided (Hydrus API sometimes includes it)
                        top_level_tags = meta.get("tags_flat", []) or meta.get("tags", [])
                        _collect(top_level_tags)

                    # Resolve extension from MIME type
                    mime_type = meta.get("mime")
                    ext = ""
                    if mime_type:
                        for category in mime_maps.values():
                            for ext_key, info in category.items():
                                if mime_type in info.get("mimes", []):
                                    ext = info.get("ext", "").lstrip('.')
                                    break
                            if ext:
                                break

                    # Filter results based on query type
                    # If user provided explicit namespace (has ':'), don't do substring filtering
                    # Just include what the tag search returned
                    has_namespace = ':' in query_lower

                    if has_namespace:
                        # Explicit namespace search - already filtered by Hydrus tag search
                        # Include this result as-is
                        results.append({
                            "hash": hash_hex,
                            "hash_hex": hash_hex,
                            "target": hash_hex,
                            "name": title,
                            "title": title,
                            "size": size,
                            "size_bytes": size,
                            "origin": "hydrus",
                            "tags": all_tags,
                            "file_id": file_id,
                            "mime": mime_type,
                            "ext": ext,
                        })
                    else:
                        # Free-form search: check if search terms match the title or tags
                        # Match if ALL search terms are found in title or tags (AND logic)
                        # AND use whole word matching

                        # Combine title and tags for searching
                        searchable_text = (title + " " + all_tags_str).lower()

                        match = True
                        if query_lower != "*":
                            for term in search_terms:
                                # Regex for whole word: \bterm\b
                                # Escape term to handle special chars
                                pattern = r'\b' + re.escape(term) + r'\b'
                                if not re.search(pattern, searchable_text):
                                    match = False
                                    break

                        if match:
                            results.append({
                                "hash": hash_hex,
                                "hash_hex": hash_hex,
                                "target": hash_hex,
                                "name": title,
                                "title": title,
                                "size": size,
                                "size_bytes": size,
                                "origin": "hydrus",
                                "tags": all_tags,
                                "file_id": file_id,
                                "mime": mime_type,
                                "ext": ext,
                            })

            debug(f"Found {len(results)} result(s)")
            return results[:limit]

        except Exception as exc:
            log(f"❌ Hydrus search failed: {exc}", file=sys.stderr)
            import traceback
            traceback.print_exc(file=sys.stderr)
            raise
class MatrixStorageBackend(StorageBackend):
    """File storage backend for Matrix (Element) chat rooms."""

    def get_name(self) -> str:
        return "matrix"

    def list_rooms(self, config: Dict[str, Any]) -> List[Dict[str, Any]]:
        """List joined rooms with their names."""
        matrix_conf = config.get('storage', {}).get('matrix', {})
        homeserver = matrix_conf.get('homeserver')
        access_token = matrix_conf.get('access_token')

        if not homeserver or not access_token:
            return []

        if not homeserver.startswith('http'):
            homeserver = f"https://{homeserver}"

        headers = {"Authorization": f"Bearer {access_token}"}

        try:
            # Get joined rooms
            resp = requests.get(f"{homeserver}/_matrix/client/v3/joined_rooms", headers=headers, timeout=10)
            if resp.status_code != 200:
                return []

            room_ids = resp.json().get('joined_rooms', [])
            rooms = []

            for rid in room_ids:
                # Try to get room name
                name = "Unknown Room"
                try:
                    # Get state event for name
                    name_resp = requests.get(
                        f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.name",
                        headers=headers,
                        timeout=2
                    )
                    if name_resp.status_code == 200:
                        name = name_resp.json().get('name', name)
                    else:
                        # Try canonical alias
                        alias_resp = requests.get(
                            f"{homeserver}/_matrix/client/v3/rooms/{rid}/state/m.room.canonical_alias",
                            headers=headers,
                            timeout=2
                        )
                        if alias_resp.status_code == 200:
                            name = alias_resp.json().get('alias', name)
                except Exception:
                    pass

                rooms.append({'id': rid, 'name': name})

            return rooms
        except Exception as e:
            log(f"Error listing Matrix rooms: {e}", file=sys.stderr)
            return []

    def upload(self, file_path: Path, **kwargs: Any) -> str:
        """Upload file to Matrix room.

        Requires 'config' in kwargs with 'storage.matrix' settings:
        - homeserver: URL of homeserver (e.g. https://matrix.org)
        - user_id: User ID (e.g. @user:matrix.org)
        - access_token: Access token (preferred) OR password
        - room_id: Room ID to upload to (e.g. !roomid:matrix.org)
        """
        config = kwargs.get('config', {})
        if not config:
            raise ValueError("Config required for Matrix upload")

        matrix_conf = config.get('storage', {}).get('matrix', {})
        if not matrix_conf:
            raise ValueError("Matrix storage not configured in config.json")

        homeserver = matrix_conf.get('homeserver')
        # user_id = matrix_conf.get('user_id') # Not strictly needed if we have token
        access_token = matrix_conf.get('access_token')
        room_id = matrix_conf.get('room_id')

        if not homeserver:
            raise ValueError("Matrix homeserver required")

        # Ensure homeserver has protocol
        if not homeserver.startswith('http'):
            homeserver = f"https://{homeserver}"

        # Login if no access token (optional implementation, for now assume token)
        if not access_token:
             raise ValueError("Matrix access_token required (login not yet implemented)")

        # Handle room selection if not provided
        if not room_id:
            log("No room_id configured. Fetching joined rooms...", file=sys.stderr)
            rooms = self.list_rooms(config)

            if not rooms:
                raise ValueError("No joined rooms found or failed to fetch rooms.")

            from result_table import ResultTable
            table = ResultTable("Matrix Rooms")
            for i, room in enumerate(rooms):
                row = table.add_row()
                row.add_column("#", str(i + 1))
                row.add_column("Name", room['name'])
                row.add_column("ID", room['id'])

            print(table)

            # Simple interactive selection
            try:
                selection = input("Select room # to upload to: ")
                idx = int(selection) - 1
                if 0 <= idx < len(rooms):
                    room_id = rooms[idx]['id']
                    log(f"Selected room: {rooms[idx]['name']} ({room_id})", file=sys.stderr)
                else:
                    raise ValueError("Invalid selection")
            except Exception:
                raise ValueError("Invalid room selection")

        if not room_id:
             raise ValueError("Matrix room_id required")

        # 1. Upload Media
        upload_url = f"{homeserver}/_matrix/media/r3/upload"
        headers = {
            "Authorization": f"Bearer {access_token}",
            "Content-Type": "application/octet-stream" # Or guess mime type
        }

        import mimetypes
        mime_type, _ = mimetypes.guess_type(file_path)
        if mime_type:
            headers["Content-Type"] = mime_type

        filename = file_path.name

        try:
            with open(file_path, 'rb') as f:
                resp = requests.post(upload_url, headers=headers, data=f, params={"filename": filename})

            if resp.status_code != 200:
                raise Exception(f"Matrix upload failed: {resp.text}")

            content_uri = resp.json().get('content_uri')
            if not content_uri:
                raise Exception("No content_uri returned from Matrix upload")

            # 2. Send Message
            send_url = f"{homeserver}/_matrix/client/r0/rooms/{room_id}/send/m.room.message"

            # Determine msgtype
            msgtype = "m.file"
            if mime_type:
                if mime_type.startswith("image/"): msgtype = "m.image"
                elif mime_type.startswith("video/"): msgtype = "m.video"
                elif mime_type.startswith("audio/"): msgtype = "m.audio"

            payload = {
                "msgtype": msgtype,
                "body": filename,
                "url": content_uri,
                "info": {
                    "mimetype": mime_type,
                    "size": file_path.stat().st_size
                }
            }

            resp = requests.post(send_url, headers=headers, json=payload)
            if resp.status_code != 200:
                 raise Exception(f"Matrix send message failed: {resp.text}")

            event_id = resp.json().get('event_id')
            return f"matrix://{room_id}/{event_id}"

        except Exception as e:
            log(f"❌ Matrix upload error: {e}", file=sys.stderr)
            raise


class RemoteStorageBackend(StorageBackend):
    """File storage backend for remote Android/network storage servers.

    Connects to a remote storage server (e.g., running on Android phone)
    via REST API. All operations are proxied to the remote server.
    """

    def __init__(self, server_url: str, timeout: int = 30, api_key: str = None) -> None:
        """Initialize remote storage backend.

        Args:
            server_url: Base URL of remote storage server (e.g., http://192.168.1.100:5000)
            timeout: Request timeout in seconds
            api_key: Optional API key for authentication
        """
        try:
            import requests
        except ImportError:
            raise ImportError("requests library required for RemoteStorageBackend. Install with: pip install requests")

        self.server_url = server_url.rstrip('/')
        self.timeout = timeout
        self.api_key = api_key
        self._session = requests.Session()

        # Add API key to default headers if provided
        if self.api_key:
            self._session.headers.update({'X-API-Key': self.api_key})

    def get_name(self) -> str:
        return "remote"

    def _request(self, method: str, endpoint: str, **kwargs) -> Dict[str, Any]:
        """Make HTTP request to remote server."""
        import requests
        from urllib.parse import urljoin

        url = urljoin(self.server_url, endpoint)

        try:
            response = self._session.request(
                method,
                url,
                timeout=self.timeout,
                **kwargs
            )

            if response.status_code == 404:
                raise Exception(f"Remote resource not found: {endpoint}")

            if response.status_code >= 400:
                try:
                    error_data = response.json()
                    error_msg = error_data.get('error', response.text)
                except:
                    error_msg = response.text
                raise Exception(f"Remote server error {response.status_code}: {error_msg}")

            return response.json()

        except requests.exceptions.RequestException as e:
            raise Exception(f"Connection to {self.server_url} failed: {e}")

    def upload(self, file_path: Path, **kwargs: Any) -> str:
        """Upload file to remote storage.

        Args:
            file_path: Path to the file to upload
            tags: Optional list of tags to add
            urls: Optional list of known URLs

        Returns:
            Remote file hash
        """
        from helper.utils import sha256_file

        if not file_path.exists():
            raise ValueError(f"File not found: {file_path}")

        try:
            # Index the file on remote server
            data = {"path": str(file_path)}

            tags = kwargs.get("tags", [])
            if tags:
                data["tags"] = tags

            urls = kwargs.get("urls", [])
            if urls:
                data["urls"] = urls

            result = self._request('POST', '/files/index', json=data)
            file_hash = result.get('hash')

            if file_hash:
                log(f"✓ File indexed on remote storage: {file_hash}", file=sys.stderr)
                return file_hash
            else:
                raise Exception("Remote server did not return file hash")

        except Exception as exc:
            debug(f"Remote upload failed: {exc}", file=sys.stderr)
            raise

    def search(self, query: str, **kwargs: Any) -> list[Dict[str, Any]]:
        """Search files on remote storage.

        Args:
            query: Search query
            limit: Maximum results

        Returns:
            List of search results
        """
        limit = kwargs.get("limit")
        try:
            limit = int(limit) if limit is not None else 100
        except (TypeError, ValueError):
            limit = 100

        if limit <= 0:
            limit = 100

        try:
            response = self._request('GET', '/files/search', params={
                'q': query,
                'limit': limit
            })

            files = response.get('files', [])

            # Transform remote format to standard result format
            results = []
            for f in files:
                results.append({
                    "name": f.get('name', '').split('/')[-1],  # Get filename from path
                    "title": f.get('name', f.get('path', '')).split('/')[-1],
                    "ext": f.get('ext', ''),
                    "path": f.get('path', ''),
                    "target": f.get('path', ''),
                    "hash": f.get('hash', ''),
                    "origin": "remote",
                    "size": f.get('size', 0),
                    "size_bytes": f.get('size', 0),
                    "tags": f.get('tags', []),
                })

            debug(f"Remote search found {len(results)} results", file=sys.stderr)
            return results

        except Exception as exc:
            log(f"❌ Remote search failed: {exc}", file=sys.stderr)
            raise


class FileStorage:
    """Unified file storage interface supporting multiple backend services.

    Example:
        storage = FileStorage(config)

        # Upload to different backends (uses configured locations)
        url = storage["0x0"].upload(Path("file.mp3"))
        local_path = storage["local"].upload(Path("file.mp3"))  # Uses config["Local"]["path"]
        hydrus_hash = storage["hydrus"].upload(Path("file.mp3"), tags=["music"])

        # Search with searchable backends (uses configured locations)
        results = storage["hydrus"].search("music")
        results = storage["local"].search("song")  # Uses config["Local"]["path"]
    """

    def __init__(self, config: Optional[Dict[str, Any]] = None) -> None:
        """Initialize the file storage system with available backends.

        Args:
            config: Configuration dict with backend settings (Local.path, HydrusNetwork, Debrid, etc.)
        """
        config = config or {}

        # Extract backend-specific settings from config
        from config import get_local_storage_path

        local_path = get_local_storage_path(config)
        local_path_str = str(local_path) if local_path else None

        self._backends: Dict[str, StorageBackend] = {}

        # Always include local backend (even if no default path configured)
        # The location can be specified at upload time if not configured globally
        self._backends["local"] = LocalStorageBackend(location=local_path_str)

        # Include Hydrus backend (configuration optional)
        self._backends["hydrus"] = HydrusStorageBackend(config=config)

        # Include Matrix backend
        self._backends["matrix"] = MatrixStorageBackend()

        # Include remote storage backends from config (for Android/network servers)
        remote_storages = config.get("remote_storages", [])
        if isinstance(remote_storages, list):
            for remote_config in remote_storages:
                if isinstance(remote_config, dict):
                    name = remote_config.get("name", "remote")
                    url = remote_config.get("url")
                    timeout = remote_config.get("timeout", 30)
                    api_key = remote_config.get("api_key")

                    if url:
                        try:
                            backend = RemoteStorageBackend(url, timeout=timeout, api_key=api_key)
                            self._backends[name] = backend
                            auth_status = " (with auth)" if api_key else " (no auth)"
                            log(f"Registered remote storage backend: {name} -> {url}{auth_status}", file=sys.stderr)
                        except Exception as e:
                            log(f"Failed to register remote storage '{name}': {e}", file=sys.stderr)

    def list_backends(self) -> list[str]:
        """Return available backend keys for autocomplete and validation."""
        return sorted(self._backends.keys())

    def __getitem__(self, backend_name: str) -> StorageBackend:
        """Get a storage backend by name.

        Args:
            backend_name: Name of the backend ('0x0', 'local', 'hydrus')

        Returns:
            StorageBackend instance

        Raises:
            KeyError: If backend not found
        """
        if backend_name not in self._backends:
            raise KeyError(
                f"Unknown storage backend: {backend_name}. "
                f"Available: {list(self._backends.keys())}"
            )
        return self._backends[backend_name]

    def register(self, backend: StorageBackend) -> None:
        """Register a custom storage backend.

        Args:
            backend: StorageBackend instance to register
        """
        name = backend.get_name()
        self._backends[name] = backend
        log(f"Registered storage backend: {name}", file=sys.stderr)

    def list_backends(self) -> list[str]:
        """Get list of available backend names.

        Returns:
            List of backend names
        """
        return list(self._backends.keys())

    def is_available(self, backend_name: str) -> bool:
        """Check if a backend is available.

        Args:
            backend_name: Name of the backend

        Returns:
            True if backend is registered
        """
        return backend_name in self._backends

    def list_searchable_backends(self) -> list[str]:
        """Get list of backends that support searching.

        Returns:
            List of searchable backend names
        """
        return [
            name for name, backend in self._backends.items()
            if backend.supports_search()
        ]

    def supports_search(self, backend_name: str) -> bool:
        """Check if a backend supports searching.

        Args:
            backend_name: Name of the backend

        Returns:
            True if backend supports search(), False otherwise
        """
        if backend_name not in self._backends:
            return False
        return self._backends[backend_name].supports_search()