Medios-Macina/cmdlets/add_tag.py

from __future__ import annotations

from typing import Any, Dict, List, Sequence, Optional
from pathlib import Path
import sys

from helper.logger import log

import models
import pipeline as ctx
from ._shared import normalize_result_input, filter_results_by_temp
from helper import hydrus as hydrus_wrapper
from helper.folder_store import write_sidecar, FolderDB
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, expand_tag_groups, parse_cmdlet_args, collapse_namespace_tags, should_show_help, get_field
from config import get_local_storage_path


class Add_Tag(Cmdlet):
    """Class-based add-tag cmdlet with Cmdlet metadata inheritance."""

    def __init__(self) -> None:
        super().__init__(
            name="add-tag",
            summary="Add a tag to a Hydrus file or write it to a local .tags sidecar.",
            usage="add-tag [-hash <sha256>] [-store <backend>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
            arg=[
                SharedArgs.HASH,
                SharedArgs.STORE,
                CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
                CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
                CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."),
                CmdletArg("tags", type="string", required=False, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tags from pipeline payload.", variadic=True),
            ],
            detail=[
                "- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.",
                "- Without -hash and when the selection is a local file, tags are written to <file>.tags.",
                "- With a Hydrus hash, tags are sent to the 'my tags' service.",
                "- Multiple tags can be comma-separated or space-separated.",
                "- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
                "- Tags can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"",
                "- Use -duplicate to copy EXISTING tag values to new namespaces:",
                "  Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
                "  Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
                "- The source namespace must already exist in the file being tagged.",
                "- Target namespaces that already have a value are skipped (not overwritten).",
                "- You can also pass the target hash as a tag token: hash:<sha256>. This overrides -hash and is removed from the tag list.",
            ],
            exec=self.run,
        )
        self.register()

    @staticmethod
    def _extract_title_tag(tags: List[str]) -> Optional[str]:
        """Return the value of the first title: tag if present."""
        for tag in tags:
            if isinstance(tag, str) and tag.lower().startswith("title:"):
                value = tag.split(":", 1)[1].strip()
                if value:
                    return value
        return None

    @staticmethod
    def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None:
        """Update result object/dict title fields and columns in-place."""
        if not title_value:
            return
        if isinstance(res, models.PipeObject):
            res.title = title_value
            if hasattr(res, "columns") and isinstance(res.columns, list) and res.columns:
                label, *_ = res.columns[0]
                if str(label).lower() == "title":
                    res.columns[0] = (res.columns[0][0], title_value)
        elif isinstance(res, dict):
            res["title"] = title_value
            cols = res.get("columns")
            if isinstance(cols, list):
                updated = []
                changed = False
                for col in cols:
                    if isinstance(col, tuple) and len(col) == 2:
                        label, val = col
                        if str(label).lower() == "title":
                            updated.append((label, title_value))
                            changed = True
                        else:
                            updated.append(col)
                    else:
                        updated.append(col)
                if changed:
                    res["columns"] = updated

    @staticmethod
    def _matches_target(item: Any, hydrus_hash: Optional[str], file_hash: Optional[str], file_path: Optional[str]) -> bool:
        """Determine whether a result item refers to the given hash/path target."""
        hydrus_hash_l = hydrus_hash.lower() if hydrus_hash else None
        file_hash_l = file_hash.lower() if file_hash else None
        file_path_l = file_path.lower() if file_path else None

        def norm(val: Any) -> Optional[str]:
            return str(val).lower() if val is not None else None

        hash_fields = ["hydrus_hash", "hash", "hash_hex", "file_hash"]
        path_fields = ["path", "file_path", "target"]

        if isinstance(item, dict):
            hashes = [norm(item.get(field)) for field in hash_fields]
            paths = [norm(item.get(field)) for field in path_fields]
        else:
            hashes = [norm(get_field(item, field)) for field in hash_fields]
            paths = [norm(get_field(item, field)) for field in path_fields]

        if hydrus_hash_l and hydrus_hash_l in hashes:
            return True
        if file_hash_l and file_hash_l in hashes:
            return True
        if file_path_l and file_path_l in paths:
            return True
        return False

    @staticmethod
    def _update_item_title_fields(item: Any, new_title: str) -> None:
        """Mutate an item to reflect a new title in plain fields and columns."""
        if isinstance(item, models.PipeObject):
            item.title = new_title
            if hasattr(item, "columns") and isinstance(item.columns, list) and item.columns:
                label, *_ = item.columns[0]
                if str(label).lower() == "title":
                    item.columns[0] = (label, new_title)
        elif isinstance(item, dict):
            item["title"] = new_title
            cols = item.get("columns")
            if isinstance(cols, list):
                updated_cols = []
                changed = False
                for col in cols:
                    if isinstance(col, tuple) and len(col) == 2:
                        label, val = col
                        if str(label).lower() == "title":
                            updated_cols.append((label, new_title))
                            changed = True
                        else:
                            updated_cols.append(col)
                    else:
                        updated_cols.append(col)
                if changed:
                    item["columns"] = updated_cols

    def _refresh_result_table_title(self, new_title: str, hydrus_hash: Optional[str], file_hash: Optional[str], file_path: Optional[str]) -> None:
        """Refresh the cached result table with an updated title and redisplay it."""
        try:
            last_table = ctx.get_last_result_table()
            items = ctx.get_last_result_items()
            if not last_table or not items:
                return

            updated_items = []
            match_found = False
            for item in items:
                try:
                    if self._matches_target(item, hydrus_hash, file_hash, file_path):
                        self._update_item_title_fields(item, new_title)
                        match_found = True
                except Exception:
                    pass
                updated_items.append(item)
            if not match_found:
                return

            from result_table import ResultTable  # Local import to avoid circular dependency

            new_table = last_table.copy_with_title(getattr(last_table, "title", ""))

            for item in updated_items:
                new_table.add_result(item)

            ctx.set_last_result_table_overlay(new_table, updated_items)
        except Exception:
            pass

    def _refresh_tags_view(self, res: Any, hydrus_hash: Optional[str], file_hash: Optional[str], file_path: Optional[str], config: Dict[str, Any]) -> None:
        """Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh."""
        try:
            from cmdlets import get_tag as get_tag_cmd  # type: ignore
        except Exception:
            return

        target_hash = hydrus_hash or file_hash
        refresh_args: List[str] = []
        if target_hash:
            refresh_args = ["-hash", target_hash, "-store", target_hash]

        try:
            subject = ctx.get_last_result_subject()
            if subject and self._matches_target(subject, hydrus_hash, file_hash, file_path):
                get_tag_cmd._run(subject, refresh_args, config)
                return
        except Exception:
            pass

        if target_hash:
            try:
                get_tag_cmd._run(res, refresh_args, config)
            except Exception:
                pass

    def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
        """Add a tag to a file with smart filtering for pipeline results."""
        if should_show_help(args):
            log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
            return 0

        parsed = parse_cmdlet_args(args, self)

        # Check for --all flag
        include_temp = parsed.get("all", False)

        # Get explicit -hash and -store overrides from CLI
        hash_override = normalize_hash(parsed.get("hash"))
        store_override = parsed.get("store") or parsed.get("storage")

        # Normalize input to list
        results = normalize_result_input(result)

        # If no piped results but we have -hash flag, create a minimal synthetic result
        if not results and hash_override:
            results = [{"hash": hash_override, "is_temp": False}]
            if store_override:
                results[0]["store"] = store_override

        # Filter by temp status (unless --all is set)
        if not include_temp:
            results = filter_results_by_temp(results, include_temp=False)

        if not results:
            log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr)
            return 1

        # Get tags from arguments (or fallback to pipeline payload)
        raw_tags = parsed.get("tags", [])
        if isinstance(raw_tags, str):
            raw_tags = [raw_tags]

        # Fallback: if no tags provided explicitly, try to pull from first result payload
        if not raw_tags and results:
            first = results[0]
            payload_tags = None
            # Try multiple tag lookup strategies in order
            tag_lookups = [
                lambda x: x.extra.get("tags") if isinstance(x, models.PipeObject) and isinstance(x.extra, dict) else None,
                lambda x: x.get("tags") if isinstance(x, dict) else None,
                lambda x: x.get("extra", {}).get("tags") if isinstance(x, dict) and isinstance(x.get("extra"), dict) else None,
                lambda x: getattr(x, "tags", None),
            ]
            for lookup in tag_lookups:
                try:
                    payload_tags = lookup(first)
                    if payload_tags:
                        break
                except (AttributeError, TypeError, KeyError):
                    continue
            if payload_tags:
                if isinstance(payload_tags, str):
                    raw_tags = [payload_tags]
                elif isinstance(payload_tags, list):
                    raw_tags = payload_tags

        # Handle -list argument (convert to {list} syntax)
        list_arg = parsed.get("list")
        if list_arg:
            for l in list_arg.split(','):
                l = l.strip()
                if l:
                    raw_tags.append(f"{{{l}}}")

        # Parse and expand tags
        tags_to_add = parse_tag_arguments(raw_tags)
        tags_to_add = expand_tag_groups(tags_to_add)

        # Allow hash override via namespaced token (e.g., "hash:abcdef...")
        extracted_hash = None
        filtered_tags: List[str] = []
        for tag in tags_to_add:
            if isinstance(tag, str) and tag.lower().startswith("hash:"):
                _, _, hash_val = tag.partition(":")
                if hash_val:
                    extracted_hash = normalize_hash(hash_val.strip())
                continue
            filtered_tags.append(tag)
        tags_to_add = filtered_tags

        if not tags_to_add:
            log("No tags provided to add", file=sys.stderr)
            return 1

        def _find_library_root(path_obj: Path) -> Optional[Path]:
            candidates = []
            cfg_root = get_local_storage_path(config) if config else None
            if cfg_root:
                try:
                    candidates.append(Path(cfg_root).expanduser())
                except Exception:
                    pass
            try:
                for candidate in candidates:
                    if (candidate / "medios-macina.db").exists():
                        return candidate
                for parent in [path_obj] + list(path_obj.parents):
                    if (parent / "medios-macina.db").exists():
                        return parent
            except Exception:
                pass
            return None

        # Get other flags
        duplicate_arg = parsed.get("duplicate")

        if not tags_to_add and not duplicate_arg:
            # Write sidecar files with the tags that are already in the result dicts
            sidecar_count = 0
            for res in results:
                # Handle both dict and PipeObject formats
                file_path = None
                tags = []
                file_hash = ""
                # Use canonical field access with get_field for both dict and objects
                file_path = get_field(res, "path")
                # Try tags from top-level 'tags' or from 'extra.tags'
                tags = get_field(res, "tags") or (get_field(res, "extra") or {}).get("tags", [])
                file_hash = get_field(res, "hash") or get_field(res, "file_hash") or get_field(res, "hash_hex") or ""
                if not file_path:
                    log(f"[add_tag] Warning: Result has no path, skipping", file=sys.stderr)
                    ctx.emit(res)
                    continue
                if tags:
                    # Write sidecar file for this file with its tags
                    try:
                        sidecar_path = write_sidecar(Path(file_path), tags, [], file_hash)
                        log(f"[add_tag] Wrote {len(tags)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr)
                        sidecar_count += 1
                    except Exception as e:
                        log(f"[add_tag] Warning: Failed to write sidecar for {file_path}: {e}", file=sys.stderr)
                ctx.emit(res)
            if sidecar_count > 0:
                log(f"[add_tag] Wrote {sidecar_count} sidecar file(s) with embedded tags", file=sys.stderr)
            else:
                log(f"[add_tag] No tags to write - passed {len(results)} result(s) through unchanged", file=sys.stderr)
            return 0

        # Main loop: process results with tags to add
        total_new_tags = 0
        total_modified = 0
        for res in results:
            # Extract file info from result
            file_path = None
            existing_tags = []
            file_hash = ""
            storage_source = None

            # Use canonical getters for fields from both dicts and PipeObject
            file_path = get_field(res, "path")
            existing_tags = get_field(res, "tags") or []
            if not existing_tags:
                existing_tags = (get_field(res, "extra", {}) or {}).get("tags") or []
            file_hash = get_field(res, "hash") or get_field(res, "file_hash") or get_field(res, "hash_hex") or ""
            storage_source = get_field(res, "store") or get_field(res, "storage") or get_field(res, "storage_source") or get_field(res, "origin")
            hydrus_hash = get_field(res, "hydrus_hash") or file_hash

            # Infer storage source from result if not found
            if not storage_source:
                if file_path:
                    storage_source = 'local'
                elif file_hash and file_hash != "unknown":
                    storage_source = 'hydrus'

            original_tags_lower = {str(t).lower() for t in existing_tags if isinstance(t, str)}
            original_title = self._extract_title_tag(list(existing_tags))

            # Apply CLI overrides if provided
            if hash_override and not file_hash:
                file_hash = hash_override
            if store_override and not storage_source:
                storage_source = store_override

            # Check if we have sufficient identifier (file_path OR file_hash)
            if not file_path and not file_hash:
                log(f"[add_tag] Warning: Result has neither path nor hash available, skipping", file=sys.stderr)
                ctx.emit(res)
                continue
            # Handle -duplicate logic (copy existing tags to new namespaces)
            if duplicate_arg:
                # Parse duplicate format: source:target1,target2 or source,target1,target2
                parts = duplicate_arg.split(':')
                source_ns = ""
                targets = []
                if len(parts) > 1:
                    # Explicit format: source:target1,target2
                    source_ns = parts[0]
                    targets = parts[1].split(',')
                else:
                    # Inferred format: source,target1,target2
                    parts = duplicate_arg.split(',')
                    if len(parts) > 1:
                        source_ns = parts[0]
                        targets = parts[1:]
                if source_ns and targets:
                    # Find tags in source namespace
                    source_tags = [t for t in existing_tags if t.startswith(source_ns + ':')]
                    for t in source_tags:
                        value = t.split(':', 1)[1]
                        for target_ns in targets:
                            new_tag = f"{target_ns}:{value}"
                            if new_tag not in existing_tags and new_tag not in tags_to_add:
                                tags_to_add.append(new_tag)

            # Initialize tag mutation tracking local variables
            removed_tags = []
            new_tags_added = []
            final_tags = list(existing_tags) if existing_tags else []

            # Determine where to add tags: Hydrus or Folder storage
            if storage_source and storage_source.lower() == 'hydrus':
                # Add tags to Hydrus using the API
                target_hash = file_hash
                if target_hash:
                    try:
                        hydrus_client = hydrus_wrapper.get_client(config)
                        service_name = hydrus_wrapper.get_tag_service_name(config)

                        # For namespaced tags, remove old tags in same namespace
                        removed_tags = []
                        for new_tag in tags_to_add:
                            if ':' in new_tag:
                                namespace = new_tag.split(':', 1)[0]
                                to_remove = [t for t in existing_tags if t.startswith(namespace + ':') and t.lower() != new_tag.lower()]
                                removed_tags.extend(to_remove)

                        # Add new tags
                        if tags_to_add:
                            log(f"[add_tag] Adding {len(tags_to_add)} tag(s) to Hydrus file: {target_hash}", file=sys.stderr)
                            hydrus_client.add_tags(target_hash, tags_to_add, service_name)

                        # Delete replaced namespace tags
                        if removed_tags:
                            unique_removed = sorted(set(removed_tags))
                            hydrus_client.delete_tags(target_hash, unique_removed, service_name)

                        if tags_to_add or removed_tags:
                            total_new_tags += len(tags_to_add)
                            total_modified += 1
                            log(f"[add_tag] ✓ Added {len(tags_to_add)} tag(s) to Hydrus", file=sys.stderr)
                        # Refresh final tag list from the backend for accurate display
                        try:
                            from helper.store import FileStorage
                            storage = FileStorage(config)
                            if storage and storage_source in storage.list_backends():
                                backend = storage[storage_source]
                                refreshed_tags, _ = backend.get_tag(target_hash)
                                if refreshed_tags is not None:
                                    final_tags = refreshed_tags
                                    new_tags_added = [t for t in refreshed_tags if t.lower() not in original_tags_lower]
                                    # Update result tags for downstream cmdlets/UI
                                    if isinstance(res, models.PipeObject):
                                        res.tags = refreshed_tags
                                        if isinstance(res.extra, dict):
                                            res.extra['tags'] = refreshed_tags
                                    elif isinstance(res, dict):
                                        res['tags'] = refreshed_tags
                        except Exception:
                            # Ignore failures - this is best-effort for refreshing tag state
                            pass
                    except Exception as e:
                        log(f"[add_tag] Warning: Failed to add tags to Hydrus: {e}", file=sys.stderr)
                else:
                    log(f"[add_tag] Warning: No hash available for Hydrus file, skipping", file=sys.stderr)
            elif storage_source:
                # For any Folder-based storage (local, test, default, etc.), delegate to backend
                # If storage_source is not a registered backend, fallback to writing a sidecar
                from helper.store import FileStorage
                storage = FileStorage(config)
                try:
                    if storage and storage_source in storage.list_backends():
                        backend = storage[storage_source]
                        if file_hash and backend.add_tag(file_hash, tags_to_add):
                            # Refresh tags from backend to get merged result
                            refreshed_tags, _ = backend.get_tag(file_hash)
                            if refreshed_tags:
                                # Update result tags
                                if isinstance(res, models.PipeObject):
                                    res.tags = refreshed_tags
                                    # Also keep as extra for compatibility
                                    if isinstance(res.extra, dict):
                                        res.extra['tags'] = refreshed_tags
                                elif isinstance(res, dict):
                                    res['tags'] = refreshed_tags

                                # Update title if changed
                                title_value = self._extract_title_tag(refreshed_tags)
                                self._apply_title_to_result(res, title_value)

                                # Compute stats
                                new_tags_added = [t for t in refreshed_tags if t.lower() not in original_tags_lower]
                                total_new_tags += len(new_tags_added)
                                if new_tags_added:
                                    total_modified += 1

                                log(f"[add_tag] Added {len(new_tags_added)} new tag(s); {len(refreshed_tags)} total tag(s) stored in {storage_source}", file=sys.stderr)
                                final_tags = refreshed_tags
                        else:
                            log(f"[add_tag] Warning: Failed to add tags to {storage_source}", file=sys.stderr)
                    else:
                        # Not a registered backend - fallback to sidecar if we have a path
                        if file_path:
                            try:
                                sidecar_path = write_sidecar(Path(file_path), tags_to_add, [], file_hash)
                                log(f"[add_tag] Wrote {len(tags_to_add)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr)
                                total_new_tags += len(tags_to_add)
                                total_modified += 1
                                # Update res tags
                                if isinstance(res, models.PipeObject):
                                    res.tags = (res.tags or []) + tags_to_add
                                    if isinstance(res.extra, dict):
                                        res.extra['tags'] = res.tags
                                elif isinstance(res, dict):
                                    res['tags'] = list(set((res.get('tags') or []) + tags_to_add))
                            except Exception as exc:
                                log(f"[add_tag] Warning: Failed to write sidecar for {file_path}: {exc}", file=sys.stderr)
                        else:
                            log(f"[add_tag] Warning: Storage backend '{storage_source}' not found in config", file=sys.stderr)
                except KeyError:
                    # storage[storage_source] raised KeyError - treat as absent backend
                    if file_path:
                        try:
                            sidecar_path = write_sidecar(Path(file_path), tags_to_add, [], file_hash)
                            log(f"[add_tag] Wrote {len(tags_to_add)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr)
                            total_new_tags += len(tags_to_add)
                            total_modified += 1
                            # Update res tags for downstream
                            if isinstance(res, models.PipeObject):
                                res.tags = (res.tags or []) + tags_to_add
                                if isinstance(res.extra, dict):
                                    res.extra['tags'] = res.tags
                            elif isinstance(res, dict):
                                res['tags'] = list(set((res.get('tags') or []) + tags_to_add))
                        except Exception as exc:
                            log(f"[add_tag] Warning: Failed to write sidecar for {file_path}: {exc}", file=sys.stderr)
                    else:
                        log(f"[add_tag] Warning: Storage backend '{storage_source}' not found in config", file=sys.stderr)
            else:
                # For other storage types or unknown sources, avoid writing sidecars to reduce clutter
                # (local/hydrus are handled above).
                ctx.emit(res)
                continue
            # If title changed, refresh the cached result table so the display reflects the new name
            final_title = self._extract_title_tag(final_tags)
            if final_title and (not original_title or final_title.lower() != original_title.lower()):
                self._refresh_result_table_title(final_title, hydrus_hash or file_hash, file_hash, file_path)
            # If tags changed, refresh tag view via get-tag (prefer current subject; fall back to hash refresh)
            if new_tags_added or removed_tags:
                self._refresh_tags_view(res, hydrus_hash, file_hash, file_path, config)
            # Emit the modified result
            ctx.emit(res)
        log(f"[add_tag] Added {total_new_tags} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)", file=sys.stderr)
        return 0


CMDLET = Add_Tag()