Files
Medios-Macina/cmdlets/get_relationship.py

426 lines
26 KiB
Python
Raw Normal View History

2025-11-25 20:09:33 -08:00
from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional
import json
import sys
2025-12-01 01:10:16 -08:00
from pathlib import Path
2025-11-25 20:09:33 -08:00
2025-12-11 19:04:02 -08:00
from SYS.logger import log
2025-11-25 20:09:33 -08:00
import models
import pipeline as ctx
2025-12-11 19:04:02 -08:00
from API import HydrusNetwork as hydrus_wrapper
2025-12-11 12:47:30 -08:00
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, fmt_bytes, get_hash_for_operation, fetch_hydrus_metadata, should_show_help
2025-12-11 19:04:02 -08:00
from API.folder import API_folder_store
2025-12-01 01:10:16 -08:00
from config import get_local_storage_path
from result_table import ResultTable
2025-11-25 20:09:33 -08:00
CMDLET = Cmdlet(
name="get-relationship",
2025-12-01 01:10:16 -08:00
summary="Print relationships for the selected file (Hydrus or Local).",
2025-11-25 20:09:33 -08:00
usage="get-relationship [-hash <sha256>]",
2025-12-11 12:47:30 -08:00
alias=[
"get-rel",
2025-11-25 20:09:33 -08:00
],
2025-12-11 12:47:30 -08:00
arg=[
SharedArgs.HASH,
],
detail=[
2025-12-01 01:10:16 -08:00
"- Lists relationship data as returned by Hydrus or Local DB.",
2025-11-25 20:09:33 -08:00
],
)
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
2025-12-11 12:47:30 -08:00
if should_show_help(_args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
2025-11-25 20:09:33 -08:00
# Parse -hash override
override_hash: str | None = None
args_list = list(_args)
i = 0
while i < len(args_list):
a = args_list[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
override_hash = str(args_list[i + 1]).strip()
break
i += 1
# Handle @N selection which creates a list - extract the first item
if isinstance(result, list) and len(result) > 0:
result = result[0]
2025-12-01 01:10:16 -08:00
# Initialize results collection
2025-12-11 19:04:02 -08:00
found_relationships = [] # List of dicts: {hash, type, title, path, store}
2025-12-01 01:10:16 -08:00
source_title = "Unknown"
2025-12-07 00:21:30 -08:00
def _add_relationship(entry: Dict[str, Any]) -> None:
"""Add relationship if not already present by hash or path."""
for existing in found_relationships:
if entry.get("hash") and str(existing.get("hash", "")).lower() == str(entry["hash"]).lower():
return
if entry.get("path") and str(existing.get("path", "")).lower() == str(entry["path"]).lower():
return
found_relationships.append(entry)
2025-12-01 01:10:16 -08:00
# Check for local file first
file_path = None
if isinstance(result, dict):
file_path = result.get("file_path") or result.get("path")
source_title = result.get("title") or result.get("name") or "Unknown"
elif hasattr(result, "file_path"):
file_path = result.file_path
source_title = getattr(result, "title", "Unknown")
local_db_checked = False
if file_path and not override_hash:
try:
path_obj = Path(file_path)
if not source_title or source_title == "Unknown":
source_title = path_obj.name
2025-12-03 15:18:57 -08:00
print(f"\n[DEBUG] Starting get-relationship for: {path_obj.name}", file=sys.stderr)
print(f"[DEBUG] Path exists: {path_obj.exists()}", file=sys.stderr)
2025-12-01 01:10:16 -08:00
if path_obj.exists():
storage_path = get_local_storage_path(config)
2025-12-03 15:18:57 -08:00
print(f"[DEBUG] Storage path: {storage_path}", file=sys.stderr)
2025-12-01 01:10:16 -08:00
if storage_path:
2025-12-11 19:04:02 -08:00
with API_folder_store(storage_path) as db:
2025-12-11 12:47:30 -08:00
file_hash = db.get_file_hash(path_obj)
metadata = db.get_metadata(file_hash) if file_hash else None
2025-12-03 15:18:57 -08:00
print(f"[DEBUG] Metadata found: {metadata is not None}", file=sys.stderr)
2025-12-01 01:10:16 -08:00
if metadata and metadata.get("relationships"):
local_db_checked = True
rels = metadata["relationships"]
2025-12-03 15:18:57 -08:00
print(f"[DEBUG] Relationships dict: {rels}", file=sys.stderr)
2025-12-01 01:10:16 -08:00
if isinstance(rels, dict):
for rel_type, hashes in rels.items():
2025-12-03 15:18:57 -08:00
print(f"[DEBUG] Processing rel_type: {rel_type}, hashes: {hashes}", file=sys.stderr)
2025-12-01 01:10:16 -08:00
if hashes:
for h in hashes:
2025-12-03 15:18:57 -08:00
# h is now a file hash (not a path)
print(f"[DEBUG] Processing relationship hash: h={h}", file=sys.stderr)
# Resolve hash to file path
2025-12-11 12:47:30 -08:00
resolved_path = db.search_hash(h)
2025-12-03 15:18:57 -08:00
title = h[:16] + "..."
2025-12-01 01:10:16 -08:00
path = None
2025-12-03 15:18:57 -08:00
if resolved_path and resolved_path.exists():
2025-12-01 01:10:16 -08:00
path = str(resolved_path)
# Try to get title from tags
try:
2025-12-11 12:47:30 -08:00
tags = db.get_tags(h)
2025-12-01 01:10:16 -08:00
found_title = False
for t in tags:
if t.lower().startswith('title:'):
title = t[6:].strip()
found_title = True
break
if not found_title:
title = resolved_path.stem
except Exception:
title = resolved_path.stem
2025-12-07 00:21:30 -08:00
entry_type = "king" if rel_type.lower() == "alt" else rel_type
_add_relationship({
2025-12-01 01:10:16 -08:00
"hash": h,
2025-12-07 00:21:30 -08:00
"type": entry_type,
2025-12-01 01:10:16 -08:00
"title": title,
"path": path,
2025-12-11 19:04:02 -08:00
"store": "local"
2025-12-01 01:10:16 -08:00
})
2025-12-03 15:18:57 -08:00
# RECURSIVE LOOKUP: If this is an "alt" relationship (meaning we're an alt pointing to a king),
# then we should look up the king's other alts to show siblings.
# NOTE: We only do this for "alt" relationships, not "king", to avoid duplicating
# the king's direct relationships with its alts.
print(f"[DEBUG] Checking if recursive lookup needed: rel_type={rel_type}, path={path}", file=sys.stderr)
if rel_type.lower() == "alt" and path:
print(f"[DEBUG] 🔍 RECURSIVE LOOKUP TRIGGERED for parent: {path}", file=sys.stderr)
try:
parent_path_obj = Path(path)
print(f"[DEBUG] Parent path obj: {parent_path_obj}", file=sys.stderr)
# Also add the king/parent itself if not already in results
2025-12-07 00:21:30 -08:00
existing_parent = None
for r in found_relationships:
if str(r.get('hash', '')).lower() == str(path).lower() or str(r.get('path', '')).lower() == str(path).lower():
existing_parent = r
break
if not existing_parent:
2025-12-03 15:18:57 -08:00
parent_title = parent_path_obj.stem
try:
2025-12-11 12:47:30 -08:00
parent_hash = db.get_file_hash(parent_path_obj)
if parent_hash:
parent_tags = db.get_tags(parent_hash)
for t in parent_tags:
if t.lower().startswith('title:'):
parent_title = t[6:].strip()
break
2025-12-03 15:18:57 -08:00
except Exception:
pass
print(f"[DEBUG] Adding king/parent to results: {parent_title}", file=sys.stderr)
2025-12-07 00:21:30 -08:00
_add_relationship({
2025-12-03 15:18:57 -08:00
"hash": str(path),
"type": "king" if rel_type.lower() == "alt" else rel_type,
"title": parent_title,
"path": str(path),
2025-12-11 19:04:02 -08:00
"store": "local"
2025-12-03 15:18:57 -08:00
})
else:
# If already in results, ensure it's marked as king if appropriate
2025-12-07 00:21:30 -08:00
if rel_type.lower() == "alt":
existing_parent['type'] = "king"
2025-12-03 15:18:57 -08:00
# 1. Check forward relationships from parent (siblings)
2025-12-11 12:47:30 -08:00
parent_hash = db.get_file_hash(parent_path_obj)
parent_metadata = db.get_metadata(parent_hash) if parent_hash else None
2025-12-03 15:18:57 -08:00
print(f"[DEBUG] 📖 Parent metadata: {parent_metadata is not None}", file=sys.stderr)
if parent_metadata:
print(f"[DEBUG] Parent metadata keys: {parent_metadata.keys()}", file=sys.stderr)
if parent_metadata and parent_metadata.get("relationships"):
parent_rels = parent_metadata["relationships"]
print(f"[DEBUG] 👑 Parent has relationships: {list(parent_rels.keys())}", file=sys.stderr)
if isinstance(parent_rels, dict):
for child_type, child_hashes in parent_rels.items():
print(f"[DEBUG] Type '{child_type}': {len(child_hashes) if child_hashes else 0} children", file=sys.stderr)
if child_hashes:
for child_h in child_hashes:
# child_h is now a HASH, not a path - resolve it
2025-12-11 12:47:30 -08:00
child_path_obj = db.search_hash(child_h)
2025-12-03 15:18:57 -08:00
print(f"[DEBUG] Resolved hash {child_h[:16]}... to: {child_path_obj}", file=sys.stderr)
if not child_path_obj:
# Hash doesn't resolve - skip it
print(f"[DEBUG] ⏭️ Hash doesn't resolve, skipping: {child_h}", file=sys.stderr)
continue
2025-12-07 00:21:30 -08:00
# Check if already added (case-insensitive hash/path check)
if any(str(r.get('hash', '')).lower() == str(child_h).lower() or str(r.get('path', '')).lower() == str(child_path_obj).lower() for r in found_relationships):
2025-12-03 15:18:57 -08:00
print(f"[DEBUG] ⏭️ Already in results: {child_h}", file=sys.stderr)
continue
# Now child_path_obj is a Path, so we can get tags
child_title = child_path_obj.stem
try:
2025-12-11 12:47:30 -08:00
child_hash = db.get_file_hash(child_path_obj)
if child_hash:
child_tags = db.get_tags(child_hash)
for t in child_tags:
if t.lower().startswith('title:'):
child_title = t[6:].strip()
break
2025-12-03 15:18:57 -08:00
except Exception:
pass
print(f"[DEBUG] Adding sibling: {child_title}", file=sys.stderr)
2025-12-07 00:21:30 -08:00
_add_relationship({
2025-12-03 15:18:57 -08:00
"hash": child_h,
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
"title": child_title,
"path": str(child_path_obj),
2025-12-11 19:04:02 -08:00
"store": "local"
2025-12-03 15:18:57 -08:00
})
else:
print(f"[DEBUG] ⚠️ Parent has no relationships metadata", file=sys.stderr)
# 2. Check reverse relationships pointing TO parent (siblings via reverse lookup)
# This handles the case where siblings point to parent but parent doesn't point to siblings
reverse_children = db.find_files_pointing_to(parent_path_obj)
print(f"[DEBUG] 🔄 Reverse lookup found {len(reverse_children)} children", file=sys.stderr)
for child in reverse_children:
child_path = child['path']
child_type = child['type']
print(f"[DEBUG] Reverse child: {child_path}, type: {child_type}", file=sys.stderr)
2025-12-07 00:21:30 -08:00
# Skip if already added (check by path/hash, case-insensitive)
if any(str(r.get('path', '')).lower() == str(child_path).lower() or str(r.get('hash', '')).lower() == str(child_path).lower() for r in found_relationships):
2025-12-03 15:18:57 -08:00
print(f"[DEBUG] ⏭️ Already in results: {child_path}", file=sys.stderr)
continue
child_path_obj = Path(child_path)
child_title = child_path_obj.stem
try:
2025-12-11 12:47:30 -08:00
child_hash = db.get_file_hash(child_path_obj)
if child_hash:
child_tags = db.get_tags(child_hash)
for t in child_tags:
if t.lower().startswith('title:'):
child_title = t[6:].strip()
break
2025-12-03 15:18:57 -08:00
except Exception:
pass
print(f"[DEBUG] Adding reverse sibling: {child_title}", file=sys.stderr)
2025-12-07 00:21:30 -08:00
_add_relationship({
2025-12-03 15:18:57 -08:00
"hash": child_path,
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
"title": child_title,
"path": child_path,
2025-12-11 19:04:02 -08:00
"store": "local"
2025-12-03 15:18:57 -08:00
})
except Exception as e:
print(f"[DEBUG] ❌ Recursive lookup error: {e}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
except Exception as e:
log(f"Recursive lookup error: {e}", file=sys.stderr)
# ALSO CHECK REVERSE RELATIONSHIPS FOR THE CURRENT FILE
# NOTE: This is now handled via recursive lookup above, which finds siblings through the parent.
# We keep this disabled to avoid adding the same relationships twice.
# If needed in future, can be re-enabled with better deduplication.
# for rev in reverse_rels:
# rev_path = rev['path']
# rev_type = rev['type']
#
# if any(r['hash'] == rev_path for r in found_relationships): continue
#
# rev_path_obj = Path(rev_path)
# rev_title = rev_path_obj.stem
# try:
# rev_tags = db.get_tags(rev_path_obj)
# for t in rev_tags:
# if t.lower().startswith('title:'):
# rev_title = t[6:].strip(); break
# except Exception: pass
#
# # If someone points to us as 'alt' or 'king', they are our 'child' or 'subject'
# # But we'll just list them with the relationship type they used
# found_relationships.append({
# "hash": rev_path,
# "type": f"reverse-{rev_type}", # e.g. reverse-alt
# "title": rev_title,
# "path": rev_path,
2025-12-11 19:04:02 -08:00
# "store": "local"
2025-12-03 15:18:57 -08:00
# })
2025-12-01 01:10:16 -08:00
except Exception as e:
log(f"Error checking local relationships: {e}", file=sys.stderr)
# If we found local relationships, we can stop or merge with Hydrus?
# For now, if we found local ones, let's show them.
# But if the file is also in Hydrus, we might want those too.
# Let's try Hydrus if we have a hash.
2025-12-11 12:47:30 -08:00
hash_hex = get_hash_for_operation(override_hash, result)
2025-12-01 01:10:16 -08:00
if hash_hex and not local_db_checked:
try:
client = hydrus_wrapper.get_client(config)
if client:
rel = client.get_file_relationships(hash_hex)
if rel:
file_rels = rel.get("file_relationships", {})
this_file_rels = file_rels.get(hash_hex)
if this_file_rels:
# Map Hydrus relationship IDs to names
# 0: potential duplicates, 1: false positives, 2: false positives (alternates),
# 3: duplicates, 4: alternatives, 8: king
# This mapping is approximate based on Hydrus API docs/behavior
rel_map = {
"0": "potential duplicate",
"1": "false positive",
"2": "false positive",
"3": "duplicate",
"4": "alternative",
"8": "king"
}
for rel_type_id, hash_list in this_file_rels.items():
# Skip metadata keys
if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}:
continue
rel_name = rel_map.get(str(rel_type_id), f"type-{rel_type_id}")
if isinstance(hash_list, list):
for rel_hash in hash_list:
if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex:
# Check if we already have this hash from local DB
if not any(r['hash'] == rel_hash for r in found_relationships):
found_relationships.append({
"hash": rel_hash,
"type": rel_name,
"title": rel_hash, # Can't resolve title easily without another API call
"path": None,
2025-12-11 19:04:02 -08:00
"store": "hydrus"
2025-12-01 01:10:16 -08:00
})
except Exception as exc:
# Only log error if we didn't find local relationships either
if not found_relationships:
log(f"Hydrus relationships fetch failed: {exc}", file=sys.stderr)
if not found_relationships:
2025-11-25 20:09:33 -08:00
log("No relationships found.")
return 0
2025-12-01 01:10:16 -08:00
# Display results
2025-12-11 12:47:30 -08:00
table = ResultTable(f"Relationships: {source_title}").init_command("get-relationship", [])
2025-12-01 01:10:16 -08:00
# Sort by type then title
# Custom sort order: King first, then Derivative, then others
def type_sort_key(item):
t = item['type'].lower()
if t == 'king':
return 0
elif t == 'derivative':
return 1
elif t == 'alternative':
return 2
elif t == 'duplicate':
return 3
else:
return 4
found_relationships.sort(key=lambda x: (type_sort_key(x), x['title']))
pipeline_results = []
for i, item in enumerate(found_relationships):
row = table.add_row()
row.add_column("Type", item['type'].title())
row.add_column("Title", item['title'])
# row.add_column("Hash", item['hash'][:16] + "...") # User requested removal
2025-12-11 19:04:02 -08:00
row.add_column("Store", item['store'])
2025-11-25 20:09:33 -08:00
# Create result object for pipeline
2025-12-01 01:10:16 -08:00
res_obj = {
"title": item['title'],
"hash": item['hash'],
"file_hash": item['hash'],
"relationship_type": item['type'],
2025-12-11 19:04:02 -08:00
"store": item['store']
2025-12-01 01:10:16 -08:00
}
if item['path']:
res_obj["path"] = item['path']
res_obj["file_path"] = item['path']
res_obj["target"] = item['path']
else:
# If Hydrus, target is hash
res_obj["target"] = item['hash']
pipeline_results.append(res_obj)
# Set selection args
# If it has a path, we can use it directly. If hash, maybe get-file -hash?
if item['path']:
table.set_row_selection_args(i, [item['path']])
else:
table.set_row_selection_args(i, ["-hash", item['hash']])
ctx.set_last_result_table(table, pipeline_results)
print(table)
2025-11-25 20:09:33 -08:00
return 0