Medios-Macina/cmdlets/get_relationship.py

from __future__ import annotations

from typing import Any, Dict, Sequence, List, Optional
import json
import sys
from pathlib import Path

from helper.logger import log

from . import register
import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash, fmt_bytes
from helper.local_library import LocalLibraryDB
from config import get_local_storage_path
from result_table import ResultTable

CMDLET = Cmdlet(
    name="get-relationship",
    summary="Print relationships for the selected file (Hydrus or Local).",
    usage="get-relationship [-hash <sha256>]",
    args=[
        CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
    ],
    details=[
        "- Lists relationship data as returned by Hydrus or Local DB.",
    ],
)

@register(["get-rel", "get-relationship", "get-relationships", "get-file-relationships"])  # aliases
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
    # Help
    try:
        if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
            log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
            return 0
    except Exception:
        pass

    # Parse -hash override
    override_hash: str | None = None
    args_list = list(_args)
    i = 0
    while i < len(args_list):
        a = args_list[i]
        low = str(a).lower()
        if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
            override_hash = str(args_list[i + 1]).strip()
            break
        i += 1

    # Handle @N selection which creates a list - extract the first item
    if isinstance(result, list) and len(result) > 0:
        result = result[0]

    # Initialize results collection
    found_relationships = [] # List of dicts: {hash, type, title, path, origin}
    source_title = "Unknown"

    def _add_relationship(entry: Dict[str, Any]) -> None:
        """Add relationship if not already present by hash or path."""
        for existing in found_relationships:
            if entry.get("hash") and str(existing.get("hash", "")).lower() == str(entry["hash"]).lower():
                return
            if entry.get("path") and str(existing.get("path", "")).lower() == str(entry["path"]).lower():
                return
        found_relationships.append(entry)

    # Check for local file first
    file_path = None
    if isinstance(result, dict):
        file_path = result.get("file_path") or result.get("path")
        source_title = result.get("title") or result.get("name") or "Unknown"
    elif hasattr(result, "file_path"):
        file_path = result.file_path
        source_title = getattr(result, "title", "Unknown")

    local_db_checked = False

    if file_path and not override_hash:
        try:
            path_obj = Path(file_path)
            if not source_title or source_title == "Unknown":
                source_title = path_obj.name

            print(f"\n[DEBUG] Starting get-relationship for: {path_obj.name}", file=sys.stderr)
            print(f"[DEBUG] Path exists: {path_obj.exists()}", file=sys.stderr)

            if path_obj.exists():
                storage_path = get_local_storage_path(config)
                print(f"[DEBUG] Storage path: {storage_path}", file=sys.stderr)
                if storage_path:
                    with LocalLibraryDB(storage_path) as db:
                        metadata = db.get_metadata(path_obj)
                        print(f"[DEBUG] Metadata found: {metadata is not None}", file=sys.stderr)
                        if metadata and metadata.get("relationships"):
                            local_db_checked = True
                            rels = metadata["relationships"]
                            print(f"[DEBUG] Relationships dict: {rels}", file=sys.stderr)
                            if isinstance(rels, dict):
                                for rel_type, hashes in rels.items():
                                    print(f"[DEBUG] Processing rel_type: {rel_type}, hashes: {hashes}", file=sys.stderr)
                                    if hashes:
                                        for h in hashes:
                                            # h is now a file hash (not a path)
                                            print(f"[DEBUG] Processing relationship hash: h={h}", file=sys.stderr)
                                            # Resolve hash to file path
                                            resolved_path = db.search_by_hash(h)
                                            title = h[:16] + "..."
                                            path = None
                                            if resolved_path and resolved_path.exists():
                                                path = str(resolved_path)
                                                # Try to get title from tags
                                                try:
                                                    tags = db.get_tags(resolved_path)
                                                    found_title = False
                                                    for t in tags:
                                                        if t.lower().startswith('title:'):
                                                            title = t[6:].strip()
                                                            found_title = True
                                                            break
                                                    if not found_title:
                                                        title = resolved_path.stem
                                                except Exception:
                                                    title = resolved_path.stem

                                            entry_type = "king" if rel_type.lower() == "alt" else rel_type
                                            _add_relationship({
                                                "hash": h,
                                                "type": entry_type,
                                                "title": title,
                                                "path": path,
                                                "origin": "local"
                                            })

                                            # RECURSIVE LOOKUP: If this is an "alt" relationship (meaning we're an alt pointing to a king),
                                            # then we should look up the king's other alts to show siblings.
                                            # NOTE: We only do this for "alt" relationships, not "king", to avoid duplicating
                                            # the king's direct relationships with its alts.
                                            print(f"[DEBUG] Checking if recursive lookup needed: rel_type={rel_type}, path={path}", file=sys.stderr)
                                            if rel_type.lower() == "alt" and path:
                                                print(f"[DEBUG] 🔍 RECURSIVE LOOKUP TRIGGERED for parent: {path}", file=sys.stderr)
                                                try:
                                                    parent_path_obj = Path(path)
                                                    print(f"[DEBUG] Parent path obj: {parent_path_obj}", file=sys.stderr)

                                                    # Also add the king/parent itself if not already in results
                                                    existing_parent = None
                                                    for r in found_relationships:
                                                        if str(r.get('hash', '')).lower() == str(path).lower() or str(r.get('path', '')).lower() == str(path).lower():
                                                            existing_parent = r
                                                            break
                                                    if not existing_parent:
                                                        parent_title = parent_path_obj.stem
                                                        try:
                                                            parent_tags = db.get_tags(parent_path_obj)
                                                            for t in parent_tags:
                                                                if t.lower().startswith('title:'):
                                                                    parent_title = t[6:].strip()
                                                                    break
                                                        except Exception:
                                                            pass

                                                        print(f"[DEBUG] ➕ Adding king/parent to results: {parent_title}", file=sys.stderr)
                                                        _add_relationship({
                                                            "hash": str(path),
                                                            "type": "king" if rel_type.lower() == "alt" else rel_type,
                                                            "title": parent_title,
                                                            "path": str(path),
                                                            "origin": "local"
                                                        })
                                                    else:
                                                        # If already in results, ensure it's marked as king if appropriate
                                                        if rel_type.lower() == "alt":
                                                            existing_parent['type'] = "king"

                                                    # 1. Check forward relationships from parent (siblings)
                                                    parent_metadata = db.get_metadata(parent_path_obj)
                                                    print(f"[DEBUG] 📖 Parent metadata: {parent_metadata is not None}", file=sys.stderr)
                                                    if parent_metadata:
                                                        print(f"[DEBUG] Parent metadata keys: {parent_metadata.keys()}", file=sys.stderr)
                                                    if parent_metadata and parent_metadata.get("relationships"):
                                                        parent_rels = parent_metadata["relationships"]
                                                        print(f"[DEBUG] 👑 Parent has relationships: {list(parent_rels.keys())}", file=sys.stderr)
                                                        if isinstance(parent_rels, dict):
                                                            for child_type, child_hashes in parent_rels.items():
                                                                print(f"[DEBUG]    Type '{child_type}': {len(child_hashes) if child_hashes else 0} children", file=sys.stderr)
                                                                if child_hashes:
                                                                    for child_h in child_hashes:
                                                                        # child_h is now a HASH, not a path - resolve it
                                                                        child_path_obj = db.search_by_hash(child_h)
                                                                        print(f"[DEBUG]    Resolved hash {child_h[:16]}... to: {child_path_obj}", file=sys.stderr)

                                                                        if not child_path_obj:
                                                                            # Hash doesn't resolve - skip it
                                                                            print(f"[DEBUG]    ⏭️  Hash doesn't resolve, skipping: {child_h}", file=sys.stderr)
                                                                            continue

                                                                        # Check if already added (case-insensitive hash/path check)
                                                                        if any(str(r.get('hash', '')).lower() == str(child_h).lower() or str(r.get('path', '')).lower() == str(child_path_obj).lower() for r in found_relationships):
                                                                            print(f"[DEBUG]    ⏭️  Already in results: {child_h}", file=sys.stderr)
                                                                            continue

                                                                        # Now child_path_obj is a Path, so we can get tags
                                                                        child_title = child_path_obj.stem
                                                                        try:
                                                                            child_tags = db.get_tags(child_path_obj)
                                                                            for t in child_tags:
                                                                                if t.lower().startswith('title:'):
                                                                                    child_title = t[6:].strip()
                                                                                    break
                                                                        except Exception:
                                                                            pass

                                                                        print(f"[DEBUG]    ➕ Adding sibling: {child_title}", file=sys.stderr)
                                                                        _add_relationship({
                                                                            "hash": child_h,
                                                                            "type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
                                                                            "title": child_title,
                                                                            "path": str(child_path_obj),
                                                                            "origin": "local"
                                                                        })
                                                    else:
                                                        print(f"[DEBUG] ⚠️  Parent has no relationships metadata", file=sys.stderr)

                                                    # 2. Check reverse relationships pointing TO parent (siblings via reverse lookup)
                                                    # This handles the case where siblings point to parent but parent doesn't point to siblings
                                                    reverse_children = db.find_files_pointing_to(parent_path_obj)
                                                    print(f"[DEBUG] 🔄 Reverse lookup found {len(reverse_children)} children", file=sys.stderr)
                                                    for child in reverse_children:
                                                        child_path = child['path']
                                                        child_type = child['type']
                                                        print(f"[DEBUG]    Reverse child: {child_path}, type: {child_type}", file=sys.stderr)

                                                        # Skip if already added (check by path/hash, case-insensitive)
                                                        if any(str(r.get('path', '')).lower() == str(child_path).lower() or str(r.get('hash', '')).lower() == str(child_path).lower() for r in found_relationships):
                                                            print(f"[DEBUG]    ⏭️  Already in results: {child_path}", file=sys.stderr)
                                                            continue

                                                        child_path_obj = Path(child_path)
                                                        child_title = child_path_obj.stem
                                                        try:
                                                            child_tags = db.get_tags(child_path_obj)
                                                            for t in child_tags:
                                                                if t.lower().startswith('title:'):
                                                                    child_title = t[6:].strip()
                                                                    break
                                                        except Exception:
                                                            pass

                                                        print(f"[DEBUG]    ➕ Adding reverse sibling: {child_title}", file=sys.stderr)
                                                        _add_relationship({
                                                            "hash": child_path,
                                                            "type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
                                                            "title": child_title,
                                                            "path": child_path,
                                                            "origin": "local"
                                                        })

                                                except Exception as e:
                                                    print(f"[DEBUG] ❌ Recursive lookup error: {e}", file=sys.stderr)
                                                    import traceback
                                                    traceback.print_exc(file=sys.stderr)

                                                except Exception as e:
                                                    log(f"Recursive lookup error: {e}", file=sys.stderr)


                            # ALSO CHECK REVERSE RELATIONSHIPS FOR THE CURRENT FILE
                            # NOTE: This is now handled via recursive lookup above, which finds siblings through the parent.
                            # We keep this disabled to avoid adding the same relationships twice.
                            # If needed in future, can be re-enabled with better deduplication.
                            # for rev in reverse_rels:
                            #     rev_path = rev['path']
                            #     rev_type = rev['type']
                            #
                            #     if any(r['hash'] == rev_path for r in found_relationships): continue
                            #
                            #     rev_path_obj = Path(rev_path)
                            #     rev_title = rev_path_obj.stem
                            #     try:
                            #         rev_tags = db.get_tags(rev_path_obj)
                            #         for t in rev_tags:
                            #             if t.lower().startswith('title:'):
                            #                 rev_title = t[6:].strip(); break
                            #     except Exception: pass
                            #
                            #     # If someone points to us as 'alt' or 'king', they are our 'child' or 'subject'
                            #     # But we'll just list them with the relationship type they used
                            #     found_relationships.append({
                            #         "hash": rev_path,
                            #         "type": f"reverse-{rev_type}", # e.g. reverse-alt
                            #         "title": rev_title,
                            #         "path": rev_path,
                            #         "origin": "local"
                            #     })

        except Exception as e:
            log(f"Error checking local relationships: {e}", file=sys.stderr)

    # If we found local relationships, we can stop or merge with Hydrus?
    # For now, if we found local ones, let's show them.
    # But if the file is also in Hydrus, we might want those too.
    # Let's try Hydrus if we have a hash.

    hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
    if not hash_hex:
        # Try to get hash from dict
        if isinstance(result, dict):
            hash_hex = normalize_hash(result.get("hash") or result.get("file_hash"))

    if hash_hex and not local_db_checked:
        try:
            client = hydrus_wrapper.get_client(config)
            if client:
                rel = client.get_file_relationships(hash_hex)
                if rel:
                    file_rels = rel.get("file_relationships", {})
                    this_file_rels = file_rels.get(hash_hex)

                    if this_file_rels:
                        # Map Hydrus relationship IDs to names
                        # 0: potential duplicates, 1: false positives, 2: false positives (alternates),
                        # 3: duplicates, 4: alternatives, 8: king
                        # This mapping is approximate based on Hydrus API docs/behavior
                        rel_map = {
                            "0": "potential duplicate",
                            "1": "false positive",
                            "2": "false positive",
                            "3": "duplicate",
                            "4": "alternative",
                            "8": "king"
                        }

                        for rel_type_id, hash_list in this_file_rels.items():
                            # Skip metadata keys
                            if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}:
                                continue

                            rel_name = rel_map.get(str(rel_type_id), f"type-{rel_type_id}")

                            if isinstance(hash_list, list):
                                for rel_hash in hash_list:
                                    if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex:
                                        # Check if we already have this hash from local DB
                                        if not any(r['hash'] == rel_hash for r in found_relationships):
                                            found_relationships.append({
                                                "hash": rel_hash,
                                                "type": rel_name,
                                                "title": rel_hash, # Can't resolve title easily without another API call
                                                "path": None,
                                                "origin": "hydrus"
                                            })
        except Exception as exc:
            # Only log error if we didn't find local relationships either
            if not found_relationships:
                log(f"Hydrus relationships fetch failed: {exc}", file=sys.stderr)

    if not found_relationships:
        log("No relationships found.")
        return 0

    # Display results
    table = ResultTable(f"Relationships: {source_title}")

    # Sort by type then title
    # Custom sort order: King first, then Derivative, then others
    def type_sort_key(item):
        t = item['type'].lower()
        if t == 'king':
            return 0
        elif t == 'derivative':
            return 1
        elif t == 'alternative':
            return 2
        elif t == 'duplicate':
            return 3
        else:
            return 4

    found_relationships.sort(key=lambda x: (type_sort_key(x), x['title']))

    pipeline_results = []

    for i, item in enumerate(found_relationships):
        row = table.add_row()
        row.add_column("Type", item['type'].title())
        row.add_column("Title", item['title'])
        # row.add_column("Hash", item['hash'][:16] + "...") # User requested removal
        row.add_column("Origin", item['origin'])

        # Create result object for pipeline
        res_obj = {
            "title": item['title'],
            "hash": item['hash'],
            "file_hash": item['hash'],
            "relationship_type": item['type'],
            "origin": item['origin']
        }
        if item['path']:
            res_obj["path"] = item['path']
            res_obj["file_path"] = item['path']
            res_obj["target"] = item['path']
        else:
            # If Hydrus, target is hash
            res_obj["target"] = item['hash']

        pipeline_results.append(res_obj)

        # Set selection args
        # If it has a path, we can use it directly. If hash, maybe get-file -hash?
        if item['path']:
            table.set_row_selection_args(i, [item['path']])
        else:
            table.set_row_selection_args(i, ["-hash", item['hash']])

    ctx.set_last_result_table(table, pipeline_results)
    print(table)

    return 0