426 lines
26 KiB
Python
426 lines
26 KiB
Python
from __future__ import annotations
|
||
|
||
from typing import Any, Dict, Sequence, List, Optional
|
||
import json
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
from helper.logger import log
|
||
|
||
import models
|
||
import pipeline as ctx
|
||
from helper import hydrus as hydrus_wrapper
|
||
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, fmt_bytes, get_hash_for_operation, fetch_hydrus_metadata, should_show_help
|
||
from helper.folder_store import FolderDB
|
||
from config import get_local_storage_path
|
||
from result_table import ResultTable
|
||
|
||
CMDLET = Cmdlet(
|
||
name="get-relationship",
|
||
summary="Print relationships for the selected file (Hydrus or Local).",
|
||
usage="get-relationship [-hash <sha256>]",
|
||
alias=[
|
||
"get-rel",
|
||
],
|
||
arg=[
|
||
SharedArgs.HASH,
|
||
],
|
||
detail=[
|
||
"- Lists relationship data as returned by Hydrus or Local DB.",
|
||
],
|
||
)
|
||
|
||
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
|
||
# Help
|
||
if should_show_help(_args):
|
||
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
|
||
return 0
|
||
|
||
# Parse -hash override
|
||
override_hash: str | None = None
|
||
args_list = list(_args)
|
||
i = 0
|
||
while i < len(args_list):
|
||
a = args_list[i]
|
||
low = str(a).lower()
|
||
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
|
||
override_hash = str(args_list[i + 1]).strip()
|
||
break
|
||
i += 1
|
||
|
||
# Handle @N selection which creates a list - extract the first item
|
||
if isinstance(result, list) and len(result) > 0:
|
||
result = result[0]
|
||
|
||
# Initialize results collection
|
||
found_relationships = [] # List of dicts: {hash, type, title, path, origin}
|
||
source_title = "Unknown"
|
||
|
||
def _add_relationship(entry: Dict[str, Any]) -> None:
|
||
"""Add relationship if not already present by hash or path."""
|
||
for existing in found_relationships:
|
||
if entry.get("hash") and str(existing.get("hash", "")).lower() == str(entry["hash"]).lower():
|
||
return
|
||
if entry.get("path") and str(existing.get("path", "")).lower() == str(entry["path"]).lower():
|
||
return
|
||
found_relationships.append(entry)
|
||
|
||
# Check for local file first
|
||
file_path = None
|
||
if isinstance(result, dict):
|
||
file_path = result.get("file_path") or result.get("path")
|
||
source_title = result.get("title") or result.get("name") or "Unknown"
|
||
elif hasattr(result, "file_path"):
|
||
file_path = result.file_path
|
||
source_title = getattr(result, "title", "Unknown")
|
||
|
||
local_db_checked = False
|
||
|
||
if file_path and not override_hash:
|
||
try:
|
||
path_obj = Path(file_path)
|
||
if not source_title or source_title == "Unknown":
|
||
source_title = path_obj.name
|
||
|
||
print(f"\n[DEBUG] Starting get-relationship for: {path_obj.name}", file=sys.stderr)
|
||
print(f"[DEBUG] Path exists: {path_obj.exists()}", file=sys.stderr)
|
||
|
||
if path_obj.exists():
|
||
storage_path = get_local_storage_path(config)
|
||
print(f"[DEBUG] Storage path: {storage_path}", file=sys.stderr)
|
||
if storage_path:
|
||
with FolderDB(storage_path) as db:
|
||
file_hash = db.get_file_hash(path_obj)
|
||
metadata = db.get_metadata(file_hash) if file_hash else None
|
||
print(f"[DEBUG] Metadata found: {metadata is not None}", file=sys.stderr)
|
||
if metadata and metadata.get("relationships"):
|
||
local_db_checked = True
|
||
rels = metadata["relationships"]
|
||
print(f"[DEBUG] Relationships dict: {rels}", file=sys.stderr)
|
||
if isinstance(rels, dict):
|
||
for rel_type, hashes in rels.items():
|
||
print(f"[DEBUG] Processing rel_type: {rel_type}, hashes: {hashes}", file=sys.stderr)
|
||
if hashes:
|
||
for h in hashes:
|
||
# h is now a file hash (not a path)
|
||
print(f"[DEBUG] Processing relationship hash: h={h}", file=sys.stderr)
|
||
# Resolve hash to file path
|
||
resolved_path = db.search_hash(h)
|
||
title = h[:16] + "..."
|
||
path = None
|
||
if resolved_path and resolved_path.exists():
|
||
path = str(resolved_path)
|
||
# Try to get title from tags
|
||
try:
|
||
tags = db.get_tags(h)
|
||
found_title = False
|
||
for t in tags:
|
||
if t.lower().startswith('title:'):
|
||
title = t[6:].strip()
|
||
found_title = True
|
||
break
|
||
if not found_title:
|
||
title = resolved_path.stem
|
||
except Exception:
|
||
title = resolved_path.stem
|
||
|
||
entry_type = "king" if rel_type.lower() == "alt" else rel_type
|
||
_add_relationship({
|
||
"hash": h,
|
||
"type": entry_type,
|
||
"title": title,
|
||
"path": path,
|
||
"origin": "local"
|
||
})
|
||
|
||
# RECURSIVE LOOKUP: If this is an "alt" relationship (meaning we're an alt pointing to a king),
|
||
# then we should look up the king's other alts to show siblings.
|
||
# NOTE: We only do this for "alt" relationships, not "king", to avoid duplicating
|
||
# the king's direct relationships with its alts.
|
||
print(f"[DEBUG] Checking if recursive lookup needed: rel_type={rel_type}, path={path}", file=sys.stderr)
|
||
if rel_type.lower() == "alt" and path:
|
||
print(f"[DEBUG] 🔍 RECURSIVE LOOKUP TRIGGERED for parent: {path}", file=sys.stderr)
|
||
try:
|
||
parent_path_obj = Path(path)
|
||
print(f"[DEBUG] Parent path obj: {parent_path_obj}", file=sys.stderr)
|
||
|
||
# Also add the king/parent itself if not already in results
|
||
existing_parent = None
|
||
for r in found_relationships:
|
||
if str(r.get('hash', '')).lower() == str(path).lower() or str(r.get('path', '')).lower() == str(path).lower():
|
||
existing_parent = r
|
||
break
|
||
if not existing_parent:
|
||
parent_title = parent_path_obj.stem
|
||
try:
|
||
parent_hash = db.get_file_hash(parent_path_obj)
|
||
if parent_hash:
|
||
parent_tags = db.get_tags(parent_hash)
|
||
for t in parent_tags:
|
||
if t.lower().startswith('title:'):
|
||
parent_title = t[6:].strip()
|
||
break
|
||
except Exception:
|
||
pass
|
||
|
||
print(f"[DEBUG] ➕ Adding king/parent to results: {parent_title}", file=sys.stderr)
|
||
_add_relationship({
|
||
"hash": str(path),
|
||
"type": "king" if rel_type.lower() == "alt" else rel_type,
|
||
"title": parent_title,
|
||
"path": str(path),
|
||
"origin": "local"
|
||
})
|
||
else:
|
||
# If already in results, ensure it's marked as king if appropriate
|
||
if rel_type.lower() == "alt":
|
||
existing_parent['type'] = "king"
|
||
|
||
# 1. Check forward relationships from parent (siblings)
|
||
parent_hash = db.get_file_hash(parent_path_obj)
|
||
parent_metadata = db.get_metadata(parent_hash) if parent_hash else None
|
||
print(f"[DEBUG] 📖 Parent metadata: {parent_metadata is not None}", file=sys.stderr)
|
||
if parent_metadata:
|
||
print(f"[DEBUG] Parent metadata keys: {parent_metadata.keys()}", file=sys.stderr)
|
||
if parent_metadata and parent_metadata.get("relationships"):
|
||
parent_rels = parent_metadata["relationships"]
|
||
print(f"[DEBUG] 👑 Parent has relationships: {list(parent_rels.keys())}", file=sys.stderr)
|
||
if isinstance(parent_rels, dict):
|
||
for child_type, child_hashes in parent_rels.items():
|
||
print(f"[DEBUG] Type '{child_type}': {len(child_hashes) if child_hashes else 0} children", file=sys.stderr)
|
||
if child_hashes:
|
||
for child_h in child_hashes:
|
||
# child_h is now a HASH, not a path - resolve it
|
||
child_path_obj = db.search_hash(child_h)
|
||
print(f"[DEBUG] Resolved hash {child_h[:16]}... to: {child_path_obj}", file=sys.stderr)
|
||
|
||
if not child_path_obj:
|
||
# Hash doesn't resolve - skip it
|
||
print(f"[DEBUG] ⏭️ Hash doesn't resolve, skipping: {child_h}", file=sys.stderr)
|
||
continue
|
||
|
||
# Check if already added (case-insensitive hash/path check)
|
||
if any(str(r.get('hash', '')).lower() == str(child_h).lower() or str(r.get('path', '')).lower() == str(child_path_obj).lower() for r in found_relationships):
|
||
print(f"[DEBUG] ⏭️ Already in results: {child_h}", file=sys.stderr)
|
||
continue
|
||
|
||
# Now child_path_obj is a Path, so we can get tags
|
||
child_title = child_path_obj.stem
|
||
try:
|
||
child_hash = db.get_file_hash(child_path_obj)
|
||
if child_hash:
|
||
child_tags = db.get_tags(child_hash)
|
||
for t in child_tags:
|
||
if t.lower().startswith('title:'):
|
||
child_title = t[6:].strip()
|
||
break
|
||
except Exception:
|
||
pass
|
||
|
||
print(f"[DEBUG] ➕ Adding sibling: {child_title}", file=sys.stderr)
|
||
_add_relationship({
|
||
"hash": child_h,
|
||
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
|
||
"title": child_title,
|
||
"path": str(child_path_obj),
|
||
"origin": "local"
|
||
})
|
||
else:
|
||
print(f"[DEBUG] ⚠️ Parent has no relationships metadata", file=sys.stderr)
|
||
|
||
# 2. Check reverse relationships pointing TO parent (siblings via reverse lookup)
|
||
# This handles the case where siblings point to parent but parent doesn't point to siblings
|
||
reverse_children = db.find_files_pointing_to(parent_path_obj)
|
||
print(f"[DEBUG] 🔄 Reverse lookup found {len(reverse_children)} children", file=sys.stderr)
|
||
for child in reverse_children:
|
||
child_path = child['path']
|
||
child_type = child['type']
|
||
print(f"[DEBUG] Reverse child: {child_path}, type: {child_type}", file=sys.stderr)
|
||
|
||
# Skip if already added (check by path/hash, case-insensitive)
|
||
if any(str(r.get('path', '')).lower() == str(child_path).lower() or str(r.get('hash', '')).lower() == str(child_path).lower() for r in found_relationships):
|
||
print(f"[DEBUG] ⏭️ Already in results: {child_path}", file=sys.stderr)
|
||
continue
|
||
|
||
child_path_obj = Path(child_path)
|
||
child_title = child_path_obj.stem
|
||
try:
|
||
child_hash = db.get_file_hash(child_path_obj)
|
||
if child_hash:
|
||
child_tags = db.get_tags(child_hash)
|
||
for t in child_tags:
|
||
if t.lower().startswith('title:'):
|
||
child_title = t[6:].strip()
|
||
break
|
||
except Exception:
|
||
pass
|
||
|
||
print(f"[DEBUG] ➕ Adding reverse sibling: {child_title}", file=sys.stderr)
|
||
_add_relationship({
|
||
"hash": child_path,
|
||
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
|
||
"title": child_title,
|
||
"path": child_path,
|
||
"origin": "local"
|
||
})
|
||
|
||
except Exception as e:
|
||
print(f"[DEBUG] ❌ Recursive lookup error: {e}", file=sys.stderr)
|
||
import traceback
|
||
traceback.print_exc(file=sys.stderr)
|
||
|
||
except Exception as e:
|
||
log(f"Recursive lookup error: {e}", file=sys.stderr)
|
||
|
||
|
||
# ALSO CHECK REVERSE RELATIONSHIPS FOR THE CURRENT FILE
|
||
# NOTE: This is now handled via recursive lookup above, which finds siblings through the parent.
|
||
# We keep this disabled to avoid adding the same relationships twice.
|
||
# If needed in future, can be re-enabled with better deduplication.
|
||
# for rev in reverse_rels:
|
||
# rev_path = rev['path']
|
||
# rev_type = rev['type']
|
||
#
|
||
# if any(r['hash'] == rev_path for r in found_relationships): continue
|
||
#
|
||
# rev_path_obj = Path(rev_path)
|
||
# rev_title = rev_path_obj.stem
|
||
# try:
|
||
# rev_tags = db.get_tags(rev_path_obj)
|
||
# for t in rev_tags:
|
||
# if t.lower().startswith('title:'):
|
||
# rev_title = t[6:].strip(); break
|
||
# except Exception: pass
|
||
#
|
||
# # If someone points to us as 'alt' or 'king', they are our 'child' or 'subject'
|
||
# # But we'll just list them with the relationship type they used
|
||
# found_relationships.append({
|
||
# "hash": rev_path,
|
||
# "type": f"reverse-{rev_type}", # e.g. reverse-alt
|
||
# "title": rev_title,
|
||
# "path": rev_path,
|
||
# "origin": "local"
|
||
# })
|
||
|
||
except Exception as e:
|
||
log(f"Error checking local relationships: {e}", file=sys.stderr)
|
||
|
||
# If we found local relationships, we can stop or merge with Hydrus?
|
||
# For now, if we found local ones, let's show them.
|
||
# But if the file is also in Hydrus, we might want those too.
|
||
# Let's try Hydrus if we have a hash.
|
||
|
||
hash_hex = get_hash_for_operation(override_hash, result)
|
||
|
||
if hash_hex and not local_db_checked:
|
||
try:
|
||
client = hydrus_wrapper.get_client(config)
|
||
if client:
|
||
rel = client.get_file_relationships(hash_hex)
|
||
if rel:
|
||
file_rels = rel.get("file_relationships", {})
|
||
this_file_rels = file_rels.get(hash_hex)
|
||
|
||
if this_file_rels:
|
||
# Map Hydrus relationship IDs to names
|
||
# 0: potential duplicates, 1: false positives, 2: false positives (alternates),
|
||
# 3: duplicates, 4: alternatives, 8: king
|
||
# This mapping is approximate based on Hydrus API docs/behavior
|
||
rel_map = {
|
||
"0": "potential duplicate",
|
||
"1": "false positive",
|
||
"2": "false positive",
|
||
"3": "duplicate",
|
||
"4": "alternative",
|
||
"8": "king"
|
||
}
|
||
|
||
for rel_type_id, hash_list in this_file_rels.items():
|
||
# Skip metadata keys
|
||
if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}:
|
||
continue
|
||
|
||
rel_name = rel_map.get(str(rel_type_id), f"type-{rel_type_id}")
|
||
|
||
if isinstance(hash_list, list):
|
||
for rel_hash in hash_list:
|
||
if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex:
|
||
# Check if we already have this hash from local DB
|
||
if not any(r['hash'] == rel_hash for r in found_relationships):
|
||
found_relationships.append({
|
||
"hash": rel_hash,
|
||
"type": rel_name,
|
||
"title": rel_hash, # Can't resolve title easily without another API call
|
||
"path": None,
|
||
"origin": "hydrus"
|
||
})
|
||
except Exception as exc:
|
||
# Only log error if we didn't find local relationships either
|
||
if not found_relationships:
|
||
log(f"Hydrus relationships fetch failed: {exc}", file=sys.stderr)
|
||
|
||
if not found_relationships:
|
||
log("No relationships found.")
|
||
return 0
|
||
|
||
# Display results
|
||
table = ResultTable(f"Relationships: {source_title}").init_command("get-relationship", [])
|
||
|
||
# Sort by type then title
|
||
# Custom sort order: King first, then Derivative, then others
|
||
def type_sort_key(item):
|
||
t = item['type'].lower()
|
||
if t == 'king':
|
||
return 0
|
||
elif t == 'derivative':
|
||
return 1
|
||
elif t == 'alternative':
|
||
return 2
|
||
elif t == 'duplicate':
|
||
return 3
|
||
else:
|
||
return 4
|
||
|
||
found_relationships.sort(key=lambda x: (type_sort_key(x), x['title']))
|
||
|
||
pipeline_results = []
|
||
|
||
for i, item in enumerate(found_relationships):
|
||
row = table.add_row()
|
||
row.add_column("Type", item['type'].title())
|
||
row.add_column("Title", item['title'])
|
||
# row.add_column("Hash", item['hash'][:16] + "...") # User requested removal
|
||
row.add_column("Origin", item['origin'])
|
||
|
||
# Create result object for pipeline
|
||
res_obj = {
|
||
"title": item['title'],
|
||
"hash": item['hash'],
|
||
"file_hash": item['hash'],
|
||
"relationship_type": item['type'],
|
||
"origin": item['origin']
|
||
}
|
||
if item['path']:
|
||
res_obj["path"] = item['path']
|
||
res_obj["file_path"] = item['path']
|
||
res_obj["target"] = item['path']
|
||
else:
|
||
# If Hydrus, target is hash
|
||
res_obj["target"] = item['hash']
|
||
|
||
pipeline_results.append(res_obj)
|
||
|
||
# Set selection args
|
||
# If it has a path, we can use it directly. If hash, maybe get-file -hash?
|
||
if item['path']:
|
||
table.set_row_selection_args(i, [item['path']])
|
||
else:
|
||
table.set_row_selection_args(i, ["-hash", item['hash']])
|
||
|
||
ctx.set_last_result_table(table, pipeline_results)
|
||
print(table)
|
||
|
||
return 0
|
||
|
||
|