Files
Medios-Macina/cmdlets/get_relationship.py
2025-12-07 00:21:30 -08:00

424 lines
26 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional
import json
import sys
from pathlib import Path
from helper.logger import log
from . import register
import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash, fmt_bytes
from helper.local_library import LocalLibraryDB
from config import get_local_storage_path
from result_table import ResultTable
CMDLET = Cmdlet(
name="get-relationship",
summary="Print relationships for the selected file (Hydrus or Local).",
usage="get-relationship [-hash <sha256>]",
args=[
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
],
details=[
"- Lists relationship data as returned by Hydrus or Local DB.",
],
)
@register(["get-rel", "get-relationship", "get-relationships", "get-file-relationships"]) # aliases
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
# Parse -hash override
override_hash: str | None = None
args_list = list(_args)
i = 0
while i < len(args_list):
a = args_list[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
override_hash = str(args_list[i + 1]).strip()
break
i += 1
# Handle @N selection which creates a list - extract the first item
if isinstance(result, list) and len(result) > 0:
result = result[0]
# Initialize results collection
found_relationships = [] # List of dicts: {hash, type, title, path, origin}
source_title = "Unknown"
def _add_relationship(entry: Dict[str, Any]) -> None:
"""Add relationship if not already present by hash or path."""
for existing in found_relationships:
if entry.get("hash") and str(existing.get("hash", "")).lower() == str(entry["hash"]).lower():
return
if entry.get("path") and str(existing.get("path", "")).lower() == str(entry["path"]).lower():
return
found_relationships.append(entry)
# Check for local file first
file_path = None
if isinstance(result, dict):
file_path = result.get("file_path") or result.get("path")
source_title = result.get("title") or result.get("name") or "Unknown"
elif hasattr(result, "file_path"):
file_path = result.file_path
source_title = getattr(result, "title", "Unknown")
local_db_checked = False
if file_path and not override_hash:
try:
path_obj = Path(file_path)
if not source_title or source_title == "Unknown":
source_title = path_obj.name
print(f"\n[DEBUG] Starting get-relationship for: {path_obj.name}", file=sys.stderr)
print(f"[DEBUG] Path exists: {path_obj.exists()}", file=sys.stderr)
if path_obj.exists():
storage_path = get_local_storage_path(config)
print(f"[DEBUG] Storage path: {storage_path}", file=sys.stderr)
if storage_path:
with LocalLibraryDB(storage_path) as db:
metadata = db.get_metadata(path_obj)
print(f"[DEBUG] Metadata found: {metadata is not None}", file=sys.stderr)
if metadata and metadata.get("relationships"):
local_db_checked = True
rels = metadata["relationships"]
print(f"[DEBUG] Relationships dict: {rels}", file=sys.stderr)
if isinstance(rels, dict):
for rel_type, hashes in rels.items():
print(f"[DEBUG] Processing rel_type: {rel_type}, hashes: {hashes}", file=sys.stderr)
if hashes:
for h in hashes:
# h is now a file hash (not a path)
print(f"[DEBUG] Processing relationship hash: h={h}", file=sys.stderr)
# Resolve hash to file path
resolved_path = db.search_by_hash(h)
title = h[:16] + "..."
path = None
if resolved_path and resolved_path.exists():
path = str(resolved_path)
# Try to get title from tags
try:
tags = db.get_tags(resolved_path)
found_title = False
for t in tags:
if t.lower().startswith('title:'):
title = t[6:].strip()
found_title = True
break
if not found_title:
title = resolved_path.stem
except Exception:
title = resolved_path.stem
entry_type = "king" if rel_type.lower() == "alt" else rel_type
_add_relationship({
"hash": h,
"type": entry_type,
"title": title,
"path": path,
"origin": "local"
})
# RECURSIVE LOOKUP: If this is an "alt" relationship (meaning we're an alt pointing to a king),
# then we should look up the king's other alts to show siblings.
# NOTE: We only do this for "alt" relationships, not "king", to avoid duplicating
# the king's direct relationships with its alts.
print(f"[DEBUG] Checking if recursive lookup needed: rel_type={rel_type}, path={path}", file=sys.stderr)
if rel_type.lower() == "alt" and path:
print(f"[DEBUG] 🔍 RECURSIVE LOOKUP TRIGGERED for parent: {path}", file=sys.stderr)
try:
parent_path_obj = Path(path)
print(f"[DEBUG] Parent path obj: {parent_path_obj}", file=sys.stderr)
# Also add the king/parent itself if not already in results
existing_parent = None
for r in found_relationships:
if str(r.get('hash', '')).lower() == str(path).lower() or str(r.get('path', '')).lower() == str(path).lower():
existing_parent = r
break
if not existing_parent:
parent_title = parent_path_obj.stem
try:
parent_tags = db.get_tags(parent_path_obj)
for t in parent_tags:
if t.lower().startswith('title:'):
parent_title = t[6:].strip()
break
except Exception:
pass
print(f"[DEBUG] Adding king/parent to results: {parent_title}", file=sys.stderr)
_add_relationship({
"hash": str(path),
"type": "king" if rel_type.lower() == "alt" else rel_type,
"title": parent_title,
"path": str(path),
"origin": "local"
})
else:
# If already in results, ensure it's marked as king if appropriate
if rel_type.lower() == "alt":
existing_parent['type'] = "king"
# 1. Check forward relationships from parent (siblings)
parent_metadata = db.get_metadata(parent_path_obj)
print(f"[DEBUG] 📖 Parent metadata: {parent_metadata is not None}", file=sys.stderr)
if parent_metadata:
print(f"[DEBUG] Parent metadata keys: {parent_metadata.keys()}", file=sys.stderr)
if parent_metadata and parent_metadata.get("relationships"):
parent_rels = parent_metadata["relationships"]
print(f"[DEBUG] 👑 Parent has relationships: {list(parent_rels.keys())}", file=sys.stderr)
if isinstance(parent_rels, dict):
for child_type, child_hashes in parent_rels.items():
print(f"[DEBUG] Type '{child_type}': {len(child_hashes) if child_hashes else 0} children", file=sys.stderr)
if child_hashes:
for child_h in child_hashes:
# child_h is now a HASH, not a path - resolve it
child_path_obj = db.search_by_hash(child_h)
print(f"[DEBUG] Resolved hash {child_h[:16]}... to: {child_path_obj}", file=sys.stderr)
if not child_path_obj:
# Hash doesn't resolve - skip it
print(f"[DEBUG] ⏭️ Hash doesn't resolve, skipping: {child_h}", file=sys.stderr)
continue
# Check if already added (case-insensitive hash/path check)
if any(str(r.get('hash', '')).lower() == str(child_h).lower() or str(r.get('path', '')).lower() == str(child_path_obj).lower() for r in found_relationships):
print(f"[DEBUG] ⏭️ Already in results: {child_h}", file=sys.stderr)
continue
# Now child_path_obj is a Path, so we can get tags
child_title = child_path_obj.stem
try:
child_tags = db.get_tags(child_path_obj)
for t in child_tags:
if t.lower().startswith('title:'):
child_title = t[6:].strip()
break
except Exception:
pass
print(f"[DEBUG] Adding sibling: {child_title}", file=sys.stderr)
_add_relationship({
"hash": child_h,
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
"title": child_title,
"path": str(child_path_obj),
"origin": "local"
})
else:
print(f"[DEBUG] ⚠️ Parent has no relationships metadata", file=sys.stderr)
# 2. Check reverse relationships pointing TO parent (siblings via reverse lookup)
# This handles the case where siblings point to parent but parent doesn't point to siblings
reverse_children = db.find_files_pointing_to(parent_path_obj)
print(f"[DEBUG] 🔄 Reverse lookup found {len(reverse_children)} children", file=sys.stderr)
for child in reverse_children:
child_path = child['path']
child_type = child['type']
print(f"[DEBUG] Reverse child: {child_path}, type: {child_type}", file=sys.stderr)
# Skip if already added (check by path/hash, case-insensitive)
if any(str(r.get('path', '')).lower() == str(child_path).lower() or str(r.get('hash', '')).lower() == str(child_path).lower() for r in found_relationships):
print(f"[DEBUG] ⏭️ Already in results: {child_path}", file=sys.stderr)
continue
child_path_obj = Path(child_path)
child_title = child_path_obj.stem
try:
child_tags = db.get_tags(child_path_obj)
for t in child_tags:
if t.lower().startswith('title:'):
child_title = t[6:].strip()
break
except Exception:
pass
print(f"[DEBUG] Adding reverse sibling: {child_title}", file=sys.stderr)
_add_relationship({
"hash": child_path,
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
"title": child_title,
"path": child_path,
"origin": "local"
})
except Exception as e:
print(f"[DEBUG] ❌ Recursive lookup error: {e}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
except Exception as e:
log(f"Recursive lookup error: {e}", file=sys.stderr)
# ALSO CHECK REVERSE RELATIONSHIPS FOR THE CURRENT FILE
# NOTE: This is now handled via recursive lookup above, which finds siblings through the parent.
# We keep this disabled to avoid adding the same relationships twice.
# If needed in future, can be re-enabled with better deduplication.
# for rev in reverse_rels:
# rev_path = rev['path']
# rev_type = rev['type']
#
# if any(r['hash'] == rev_path for r in found_relationships): continue
#
# rev_path_obj = Path(rev_path)
# rev_title = rev_path_obj.stem
# try:
# rev_tags = db.get_tags(rev_path_obj)
# for t in rev_tags:
# if t.lower().startswith('title:'):
# rev_title = t[6:].strip(); break
# except Exception: pass
#
# # If someone points to us as 'alt' or 'king', they are our 'child' or 'subject'
# # But we'll just list them with the relationship type they used
# found_relationships.append({
# "hash": rev_path,
# "type": f"reverse-{rev_type}", # e.g. reverse-alt
# "title": rev_title,
# "path": rev_path,
# "origin": "local"
# })
except Exception as e:
log(f"Error checking local relationships: {e}", file=sys.stderr)
# If we found local relationships, we can stop or merge with Hydrus?
# For now, if we found local ones, let's show them.
# But if the file is also in Hydrus, we might want those too.
# Let's try Hydrus if we have a hash.
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
if not hash_hex:
# Try to get hash from dict
if isinstance(result, dict):
hash_hex = normalize_hash(result.get("hash") or result.get("file_hash"))
if hash_hex and not local_db_checked:
try:
client = hydrus_wrapper.get_client(config)
if client:
rel = client.get_file_relationships(hash_hex)
if rel:
file_rels = rel.get("file_relationships", {})
this_file_rels = file_rels.get(hash_hex)
if this_file_rels:
# Map Hydrus relationship IDs to names
# 0: potential duplicates, 1: false positives, 2: false positives (alternates),
# 3: duplicates, 4: alternatives, 8: king
# This mapping is approximate based on Hydrus API docs/behavior
rel_map = {
"0": "potential duplicate",
"1": "false positive",
"2": "false positive",
"3": "duplicate",
"4": "alternative",
"8": "king"
}
for rel_type_id, hash_list in this_file_rels.items():
# Skip metadata keys
if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}:
continue
rel_name = rel_map.get(str(rel_type_id), f"type-{rel_type_id}")
if isinstance(hash_list, list):
for rel_hash in hash_list:
if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex:
# Check if we already have this hash from local DB
if not any(r['hash'] == rel_hash for r in found_relationships):
found_relationships.append({
"hash": rel_hash,
"type": rel_name,
"title": rel_hash, # Can't resolve title easily without another API call
"path": None,
"origin": "hydrus"
})
except Exception as exc:
# Only log error if we didn't find local relationships either
if not found_relationships:
log(f"Hydrus relationships fetch failed: {exc}", file=sys.stderr)
if not found_relationships:
log("No relationships found.")
return 0
# Display results
table = ResultTable(f"Relationships: {source_title}")
# Sort by type then title
# Custom sort order: King first, then Derivative, then others
def type_sort_key(item):
t = item['type'].lower()
if t == 'king':
return 0
elif t == 'derivative':
return 1
elif t == 'alternative':
return 2
elif t == 'duplicate':
return 3
else:
return 4
found_relationships.sort(key=lambda x: (type_sort_key(x), x['title']))
pipeline_results = []
for i, item in enumerate(found_relationships):
row = table.add_row()
row.add_column("Type", item['type'].title())
row.add_column("Title", item['title'])
# row.add_column("Hash", item['hash'][:16] + "...") # User requested removal
row.add_column("Origin", item['origin'])
# Create result object for pipeline
res_obj = {
"title": item['title'],
"hash": item['hash'],
"file_hash": item['hash'],
"relationship_type": item['type'],
"origin": item['origin']
}
if item['path']:
res_obj["path"] = item['path']
res_obj["file_path"] = item['path']
res_obj["target"] = item['path']
else:
# If Hydrus, target is hash
res_obj["target"] = item['hash']
pipeline_results.append(res_obj)
# Set selection args
# If it has a path, we can use it directly. If hash, maybe get-file -hash?
if item['path']:
table.set_row_selection_args(i, [item['path']])
else:
table.set_row_selection_args(i, ["-hash", item['hash']])
ctx.set_last_result_table(table, pipeline_results)
print(table)
return 0