Medios-Macina/cmdlets/search_store.py

"""Search-store cmdlet: Search for files in storage backends (Folder, Hydrus)."""
from __future__ import annotations

from typing import Any, Dict, Sequence, List, Optional, Tuple
from pathlib import Path
from dataclasses import dataclass, field
from collections import OrderedDict
import re
import json
import sys

from helper.logger import log, debug

from ._shared import Cmdlet, CmdletArg, get_origin, get_field, should_show_help
import pipeline as ctx

# Optional dependencies
try:
    import mutagen  # type: ignore
except ImportError:  # pragma: no cover
    mutagen = None  # type: ignore

try:
    from config import get_hydrus_url, resolve_output_dir
except Exception:  # pragma: no cover
    get_hydrus_url = None  # type: ignore
    resolve_output_dir = None  # type: ignore

try:
    from helper.hydrus import HydrusClient, HydrusRequestError
except ImportError:  # pragma: no cover
    HydrusClient = None  # type: ignore
    HydrusRequestError = RuntimeError  # type: ignore

try:
    from helper.utils import sha256_file
except ImportError:  # pragma: no cover
    sha256_file = None  # type: ignore

try:
    from helper.utils_constant import mime_maps
except ImportError:  # pragma: no cover
    mime_maps = {}  # type: ignore

@dataclass(slots=True)
class SearchRecord:
    path: str
    size_bytes: int | None = None
    duration_seconds: str | None = None
    tags: str | None = None
    hash_hex: str | None = None

    def as_dict(self) -> dict[str, str]:
        payload: dict[str, str] = {"path": self.path}
        if self.size_bytes is not None:
            payload["size"] = str(self.size_bytes)
        if self.duration_seconds:
            payload["duration"] = self.duration_seconds
        if self.tags:
            payload["tags"] = self.tags
        if self.hash_hex:
            payload["hash"] = self.hash_hex
        return payload


STORAGE_ORIGINS = {"local", "hydrus", "folder"}


class Search_Store(Cmdlet):
    """Class-based search-store cmdlet for searching storage backends."""

    def __init__(self) -> None:
        super().__init__(
            name="search-store",
            summary="Search storage backends (Folder, Hydrus) for files.",
            usage="search-store [query] [-tag TAG] [-size >100MB|<50MB] [-type audio|video|image] [-duration >10:00] [-store BACKEND]",
            arg=[
                CmdletArg("query", description="Search query string"),
                CmdletArg("tag", description="Filter by tag (can be used multiple times)"),
                CmdletArg("size", description="Filter by size: >100MB, <50MB, =10MB"),
                CmdletArg("type", description="Filter by type: audio, video, image, document"),
                CmdletArg("duration", description="Filter by duration: >10:00, <1:30:00"),
                CmdletArg("limit", type="integer", description="Limit results (default: 100)"),
                CmdletArg("store", description="Search specific storage backend (e.g., 'home', 'test', or 'default')"),
            ],
            detail=[
                "Search across storage backends: Folder stores and Hydrus instances",
                "Use -store to search a specific backend by name",
                "Filter results by: tag, size, type, duration",
                "Results include hash for downstream commands (get-file, add-tag, etc.)",
                "Examples:",
                "search-store foo                          # Search all storage backends",
                "search-store -store home '*'              # Search 'home' Hydrus instance",
                "search-store -store test 'video'          # Search 'test' folder store",
                "search-store song -type audio             # Search for audio files",
                "search-store movie -tag action            # Search with tag filter",
            ],
            exec=self.run,
        )
        self.register()

    # --- Helper methods -------------------------------------------------
    @staticmethod
    def _normalize_extension(ext_value: Any) -> str:
        """Sanitize extension strings to alphanumerics and cap at 5 chars."""
        ext = str(ext_value or "").strip().lstrip(".")
        for sep in (" ", "|", "(", "[", "{", ",", ";"):
            if sep in ext:
                ext = ext.split(sep, 1)[0]
                break
        if "." in ext:
            ext = ext.split(".")[-1]
        ext = "".join(ch for ch in ext if ch.isalnum())
        return ext[:5]

    def _ensure_storage_columns(self, payload: Dict[str, Any]) -> Dict[str, Any]:
        """Ensure storage results have the necessary fields for result_table display."""
        store_value = str(get_origin(payload, "") or "").lower()
        if store_value not in STORAGE_ORIGINS:
            return payload

        # Ensure we have title field
        if "title" not in payload:
            payload["title"] = payload.get("name") or payload.get("target") or payload.get("path") or "Result"

        # Ensure we have ext field
        if "ext" not in payload:
            title = str(payload.get("title", ""))
            path_obj = Path(title)
            if path_obj.suffix:
                payload["ext"] = self._normalize_extension(path_obj.suffix.lstrip('.'))
            else:
                payload["ext"] = payload.get("ext", "")

        # Ensure size_bytes is present for display (already set by search_file())
        # result_table will handle formatting it

        # Don't create manual columns - let result_table handle display
        # This allows the table to respect max_columns and apply consistent formatting
        return payload

    # --- Execution ------------------------------------------------------
    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        """Search storage backends for files."""
        if should_show_help(args):
            log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
            return 0

        args_list = [str(arg) for arg in (args or [])]

        # Parse arguments
        query = ""
        tag_filters: List[str] = []
        size_filter: Optional[Tuple[str, int]] = None
        duration_filter: Optional[Tuple[str, float]] = None
        type_filter: Optional[str] = None
        storage_backend: Optional[str] = None
        limit = 100
        searched_backends: List[str] = []

        i = 0
        while i < len(args_list):
            arg = args_list[i]
            low = arg.lower()
            if low in {"-store", "--store", "-storage", "--storage"} and i + 1 < len(args_list):
                storage_backend = args_list[i + 1]
                i += 2
            elif low in {"-tag", "--tag"} and i + 1 < len(args_list):
                tag_filters.append(args_list[i + 1])
                i += 2
            elif low in {"-limit", "--limit"} and i + 1 < len(args_list):
                try:
                    limit = int(args_list[i + 1])
                except ValueError:
                    limit = 100
                i += 2
            elif low in {"-type", "--type"} and i + 1 < len(args_list):
                type_filter = args_list[i + 1].lower()
                i += 2
            elif not arg.startswith("-"):
                query = f"{query} {arg}".strip() if query else arg
                i += 1
            else:
                i += 1

        store_filter: Optional[str] = None
        if query:
            match = re.search(r"\bstore:([^\s,]+)", query, flags=re.IGNORECASE)
            if match:
                store_filter = match.group(1).strip() or None
            query = re.sub(r"\s*[,]?\s*store:[^\s,]+", " ", query, flags=re.IGNORECASE)
            query = re.sub(r"\s{2,}", " ", query)
            query = query.strip().strip(',')

        if store_filter and not storage_backend:
            storage_backend = store_filter

        if not query:
            log("Provide a search query", file=sys.stderr)
            return 1

        from helper.folder_store import FolderDB
        from config import get_local_storage_path
        import uuid
        worker_id = str(uuid.uuid4())
        library_root = get_local_storage_path(config or {})
        if not library_root:
            log("No library root configured", file=sys.stderr)
            return 1

        # Use context manager to ensure database is always closed
        with FolderDB(library_root) as db:
            try:
                db.insert_worker(
                    worker_id,
                    "search-store",
                    title=f"Search: {query}",
                    description=f"Query: {query}",
                    pipe=ctx.get_current_command_text()
                )

                results_list = []
                import result_table
                import importlib
                importlib.reload(result_table)
                from result_table import ResultTable

                table_title = f"Search: {query}"
                if storage_backend:
                    table_title += f" [{storage_backend}]"

                table = ResultTable(table_title)

                from helper.store import FileStorage
                storage = FileStorage(config=config or {})

                backend_to_search = storage_backend or None
                if backend_to_search:
                    searched_backends.append(backend_to_search)
                    target_backend = storage[backend_to_search]
                    if not callable(getattr(target_backend, 'search_file', None)):
                        log(f"Backend '{backend_to_search}' does not support searching", file=sys.stderr)
                        db.update_worker_status(worker_id, 'error')
                        return 1
                    results = target_backend.search_file(query, limit=limit)
                else:
                    from helper.hydrus import is_hydrus_available
                    hydrus_available = is_hydrus_available(config or {})

                    all_results = []
                    for backend_name in storage.list_searchable_backends():
                        if backend_name.startswith("hydrus") and not hydrus_available:
                            continue
                        searched_backends.append(backend_name)
                        try:
                            backend_results = storage[backend_name].search_file(query, limit=limit - len(all_results))
                            if backend_results:
                                all_results.extend(backend_results)
                            if len(all_results) >= limit:
                                break
                        except Exception as exc:
                            log(f"Backend {backend_name} search failed: {exc}", file=sys.stderr)
                    results = all_results[:limit]

                def _format_storage_label(name: str) -> str:
                    clean = str(name or "").strip()
                    if not clean:
                        return "Unknown"
                    return clean.replace("_", " ").title()

                storage_counts: OrderedDict[str, int] = OrderedDict((name, 0) for name in searched_backends)
                for item in results or []:
                    origin = get_origin(item)
                    if not origin:
                        continue
                    key = str(origin).lower()
                    if key not in storage_counts:
                        storage_counts[key] = 0
                    storage_counts[key] += 1

                if storage_counts or query:
                    display_counts = OrderedDict((_format_storage_label(name), count) for name, count in storage_counts.items())
                    summary_line = table.set_storage_summary(display_counts, query, inline=True)
                    if summary_line:
                        table.title = summary_line

                if results:
                    for item in results:
                        def _as_dict(obj: Any) -> Dict[str, Any]:
                            if isinstance(obj, dict):
                                return dict(obj)
                            if hasattr(obj, "to_dict") and callable(getattr(obj, "to_dict")):
                                return obj.to_dict()  # type: ignore[arg-type]
                            return {"title": str(obj)}

                        item_dict = _as_dict(item)
                        if store_filter:
                            origin_val = str(get_origin(item_dict) or "").lower()
                            if store_filter != origin_val:
                                continue
                        normalized = self._ensure_storage_columns(item_dict)

                        # Make hash/store available for downstream cmdlets without rerunning search
                        hash_val = normalized.get("hash")
                        store_val = normalized.get("store") or get_origin(item_dict)
                        if hash_val and not normalized.get("hash"):
                            normalized["hash"] = hash_val
                        if store_val and not normalized.get("store"):
                            normalized["store"] = store_val

                        table.add_result(normalized)

                        results_list.append(normalized)
                        ctx.emit(normalized)

                    # Debug: Verify table rows match items list
                    debug(f"[search-store] Added {len(table.rows)} rows to table, {len(results_list)} items to results_list")
                    if len(table.rows) != len(results_list):
                        debug(f"[search-store] WARNING: Table/items mismatch! rows={len(table.rows)} items={len(results_list)}", file=sys.stderr)

                    ctx.set_last_result_table(table, results_list)
                    db.append_worker_stdout(worker_id, json.dumps(results_list, indent=2))
                else:
                    log("No results found", file=sys.stderr)
                    db.append_worker_stdout(worker_id, json.dumps([], indent=2))

                db.update_worker_status(worker_id, 'completed')
                return 0

            except Exception as exc:
                log(f"Search failed: {exc}", file=sys.stderr)
                import traceback
                traceback.print_exc(file=sys.stderr)
                try:
                    db.update_worker_status(worker_id, 'error')
                except Exception:
                    pass
                return 1


CMDLET = Search_Store()