This commit is contained in:
nose
2025-12-16 01:45:01 -08:00
parent a03eb0d1be
commit 9873280f0e
36 changed files with 4911 additions and 1225 deletions

View File

@@ -10,10 +10,11 @@ from SYS.logger import log
import models
import pipeline as ctx
from API import HydrusNetwork as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, fmt_bytes, get_hash_for_operation, fetch_hydrus_metadata, should_show_help
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, fmt_bytes, get_hash_for_operation, fetch_hydrus_metadata, should_show_help, get_field
from API.folder import API_folder_store
from config import get_local_storage_path
from result_table import ResultTable
from Store import Store
CMDLET = Cmdlet(
name="get-relationship",
@@ -24,6 +25,7 @@ CMDLET = Cmdlet(
],
arg=[
SharedArgs.HASH,
SharedArgs.STORE,
],
detail=[
"- Lists relationship data as returned by Hydrus or Local DB.",
@@ -36,8 +38,9 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse -hash override
# Parse -hash and -store override
override_hash: str | None = None
override_store: str | None = None
args_list = list(_args)
i = 0
while i < len(args_list):
@@ -46,11 +49,20 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
override_hash = str(args_list[i + 1]).strip()
break
if low in {"-store", "--store", "store"} and i + 1 < len(args_list):
override_store = str(args_list[i + 1]).strip()
i += 1
# Handle @N selection which creates a list - extract the first item
if isinstance(result, list) and len(result) > 0:
result = result[0]
# Handle @N selection which creates a list
# This cmdlet is single-subject; require disambiguation when multiple items are provided.
if isinstance(result, list):
if len(result) == 0:
result = None
elif len(result) > 1 and not override_hash:
log("get-relationship expects a single item; select one row (e.g. @1) or pass -hash", file=sys.stderr)
return 1
else:
result = result[0]
# Initialize results collection
found_relationships = [] # List of dicts: {hash, type, title, path, store}
@@ -65,256 +77,170 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
return
found_relationships.append(entry)
# Check for local file first
file_path = None
if isinstance(result, dict):
file_path = result.get("file_path") or result.get("path")
source_title = result.get("title") or result.get("name") or "Unknown"
elif hasattr(result, "file_path"):
file_path = result.file_path
source_title = getattr(result, "title", "Unknown")
local_db_checked = False
if file_path and not override_hash:
try:
path_obj = Path(file_path)
if not source_title or source_title == "Unknown":
source_title = path_obj.name
print(f"\n[DEBUG] Starting get-relationship for: {path_obj.name}", file=sys.stderr)
print(f"[DEBUG] Path exists: {path_obj.exists()}", file=sys.stderr)
if path_obj.exists():
storage_path = get_local_storage_path(config)
print(f"[DEBUG] Storage path: {storage_path}", file=sys.stderr)
if storage_path:
with API_folder_store(storage_path) as db:
file_hash = db.get_file_hash(path_obj)
metadata = db.get_metadata(file_hash) if file_hash else None
print(f"[DEBUG] Metadata found: {metadata is not None}", file=sys.stderr)
if metadata and metadata.get("relationships"):
local_db_checked = True
rels = metadata["relationships"]
print(f"[DEBUG] Relationships dict: {rels}", file=sys.stderr)
if isinstance(rels, dict):
for rel_type, hashes in rels.items():
print(f"[DEBUG] Processing rel_type: {rel_type}, hashes: {hashes}", file=sys.stderr)
if hashes:
for h in hashes:
# h is now a file hash (not a path)
print(f"[DEBUG] Processing relationship hash: h={h}", file=sys.stderr)
# Resolve hash to file path
resolved_path = db.search_hash(h)
title = h[:16] + "..."
path = None
if resolved_path and resolved_path.exists():
path = str(resolved_path)
# Try to get title from tags
try:
tags = db.get_tags(h)
found_title = False
for t in tags:
if t.lower().startswith('title:'):
title = t[6:].strip()
found_title = True
break
if not found_title:
title = resolved_path.stem
except Exception:
title = resolved_path.stem
entry_type = "king" if rel_type.lower() == "alt" else rel_type
_add_relationship({
"hash": h,
"type": entry_type,
"title": title,
"path": path,
"store": "local"
})
# RECURSIVE LOOKUP: If this is an "alt" relationship (meaning we're an alt pointing to a king),
# then we should look up the king's other alts to show siblings.
# NOTE: We only do this for "alt" relationships, not "king", to avoid duplicating
# the king's direct relationships with its alts.
print(f"[DEBUG] Checking if recursive lookup needed: rel_type={rel_type}, path={path}", file=sys.stderr)
if rel_type.lower() == "alt" and path:
print(f"[DEBUG] 🔍 RECURSIVE LOOKUP TRIGGERED for parent: {path}", file=sys.stderr)
try:
parent_path_obj = Path(path)
print(f"[DEBUG] Parent path obj: {parent_path_obj}", file=sys.stderr)
# Also add the king/parent itself if not already in results
existing_parent = None
for r in found_relationships:
if str(r.get('hash', '')).lower() == str(path).lower() or str(r.get('path', '')).lower() == str(path).lower():
existing_parent = r
break
if not existing_parent:
parent_title = parent_path_obj.stem
try:
parent_hash = db.get_file_hash(parent_path_obj)
if parent_hash:
parent_tags = db.get_tags(parent_hash)
for t in parent_tags:
if t.lower().startswith('title:'):
parent_title = t[6:].strip()
break
except Exception:
pass
print(f"[DEBUG] Adding king/parent to results: {parent_title}", file=sys.stderr)
_add_relationship({
"hash": str(path),
"type": "king" if rel_type.lower() == "alt" else rel_type,
"title": parent_title,
"path": str(path),
"store": "local"
})
else:
# If already in results, ensure it's marked as king if appropriate
if rel_type.lower() == "alt":
existing_parent['type'] = "king"
# 1. Check forward relationships from parent (siblings)
parent_hash = db.get_file_hash(parent_path_obj)
parent_metadata = db.get_metadata(parent_hash) if parent_hash else None
print(f"[DEBUG] 📖 Parent metadata: {parent_metadata is not None}", file=sys.stderr)
if parent_metadata:
print(f"[DEBUG] Parent metadata keys: {parent_metadata.keys()}", file=sys.stderr)
if parent_metadata and parent_metadata.get("relationships"):
parent_rels = parent_metadata["relationships"]
print(f"[DEBUG] 👑 Parent has relationships: {list(parent_rels.keys())}", file=sys.stderr)
if isinstance(parent_rels, dict):
for child_type, child_hashes in parent_rels.items():
print(f"[DEBUG] Type '{child_type}': {len(child_hashes) if child_hashes else 0} children", file=sys.stderr)
if child_hashes:
for child_h in child_hashes:
# child_h is now a HASH, not a path - resolve it
child_path_obj = db.search_hash(child_h)
print(f"[DEBUG] Resolved hash {child_h[:16]}... to: {child_path_obj}", file=sys.stderr)
if not child_path_obj:
# Hash doesn't resolve - skip it
print(f"[DEBUG] ⏭️ Hash doesn't resolve, skipping: {child_h}", file=sys.stderr)
continue
# Check if already added (case-insensitive hash/path check)
if any(str(r.get('hash', '')).lower() == str(child_h).lower() or str(r.get('path', '')).lower() == str(child_path_obj).lower() for r in found_relationships):
print(f"[DEBUG] ⏭️ Already in results: {child_h}", file=sys.stderr)
continue
# Now child_path_obj is a Path, so we can get tags
child_title = child_path_obj.stem
try:
child_hash = db.get_file_hash(child_path_obj)
if child_hash:
child_tags = db.get_tags(child_hash)
for t in child_tags:
if t.lower().startswith('title:'):
child_title = t[6:].strip()
break
except Exception:
pass
print(f"[DEBUG] Adding sibling: {child_title}", file=sys.stderr)
_add_relationship({
"hash": child_h,
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
"title": child_title,
"path": str(child_path_obj),
"store": "local"
})
else:
print(f"[DEBUG] ⚠️ Parent has no relationships metadata", file=sys.stderr)
# 2. Check reverse relationships pointing TO parent (siblings via reverse lookup)
# This handles the case where siblings point to parent but parent doesn't point to siblings
reverse_children = db.find_files_pointing_to(parent_path_obj)
print(f"[DEBUG] 🔄 Reverse lookup found {len(reverse_children)} children", file=sys.stderr)
for child in reverse_children:
child_path = child['path']
child_type = child['type']
print(f"[DEBUG] Reverse child: {child_path}, type: {child_type}", file=sys.stderr)
# Skip if already added (check by path/hash, case-insensitive)
if any(str(r.get('path', '')).lower() == str(child_path).lower() or str(r.get('hash', '')).lower() == str(child_path).lower() for r in found_relationships):
print(f"[DEBUG] ⏭️ Already in results: {child_path}", file=sys.stderr)
continue
child_path_obj = Path(child_path)
child_title = child_path_obj.stem
try:
child_hash = db.get_file_hash(child_path_obj)
if child_hash:
child_tags = db.get_tags(child_hash)
for t in child_tags:
if t.lower().startswith('title:'):
child_title = t[6:].strip()
break
except Exception:
pass
print(f"[DEBUG] Adding reverse sibling: {child_title}", file=sys.stderr)
_add_relationship({
"hash": child_path,
"type": f"alt" if child_type == "alt" else f"sibling ({child_type})",
"title": child_title,
"path": child_path,
"store": "local"
})
except Exception as e:
print(f"[DEBUG] ❌ Recursive lookup error: {e}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
except Exception as e:
log(f"Recursive lookup error: {e}", file=sys.stderr)
# Store/hash-first subject resolution
store_name: Optional[str] = override_store
if not store_name:
store_name = get_field(result, "store")
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_hash_for_operation(None, result))
if not source_title or source_title == "Unknown":
source_title = get_field(result, "title") or get_field(result, "name") or (hash_hex[:16] + "..." if hash_hex else "Unknown")
local_db_checked = False
if store_name and hash_hex:
try:
store = Store(config)
backend = store[str(store_name)]
# Folder store relationships
# IMPORTANT: only treat the Folder backend as a local DB store.
# Other backends may expose a location() method but are not SQLite folder stores.
if type(backend).__name__ == "Folder" and hasattr(backend, "location") and callable(getattr(backend, "location")):
storage_path = Path(str(backend.location()))
with API_folder_store(storage_path) as db:
local_db_checked = True
# Update source title from tags if possible
try:
tags = db.get_tags(hash_hex)
for t in tags:
if isinstance(t, str) and t.lower().startswith("title:"):
source_title = t[6:].strip()
break
except Exception:
pass
metadata = db.get_metadata(hash_hex)
rels = (metadata or {}).get("relationships")
king_hashes: list[str] = []
# Forward relationships
if isinstance(rels, dict):
for rel_type, hashes in rels.items():
if not isinstance(hashes, list):
continue
for related_hash in hashes:
related_hash = normalize_hash(str(related_hash))
if not related_hash or related_hash == hash_hex:
continue
entry_type = "king" if str(rel_type).lower() == "alt" else str(rel_type)
if entry_type == "king":
king_hashes.append(related_hash)
related_title = related_hash[:16] + "..."
try:
rel_tags = db.get_tags(related_hash)
for t in rel_tags:
if isinstance(t, str) and t.lower().startswith("title:"):
related_title = t[6:].strip()
break
except Exception:
pass
_add_relationship({
"hash": related_hash,
"type": entry_type,
"title": related_title,
"path": None,
"store": str(store_name),
})
# Reverse relationships (alts pointing to this hash)
try:
reverse_children = db.find_files_pointing_to_hash(hash_hex)
except Exception:
reverse_children = []
for child in reverse_children or []:
child_hash = normalize_hash(str(child.get("hash") or ""))
rel_type = str(child.get("type") or "").strip().lower()
if not child_hash or child_hash == hash_hex:
continue
child_title = child_hash[:16] + "..."
try:
child_tags = db.get_tags(child_hash)
for t in child_tags:
if isinstance(t, str) and t.lower().startswith("title:"):
child_title = t[6:].strip()
break
except Exception:
pass
entry_type = "alt" if rel_type == "alt" else (rel_type or "related")
_add_relationship({
"hash": child_hash,
"type": entry_type,
"title": child_title,
"path": None,
"store": str(store_name),
})
# Siblings (alts that share the same king)
for king_hash in king_hashes:
try:
siblings = db.find_files_pointing_to_hash(king_hash)
except Exception:
siblings = []
for sib in siblings or []:
sib_hash = normalize_hash(str(sib.get("hash") or ""))
sib_type = str(sib.get("type") or "").strip().lower()
if not sib_hash or sib_hash in {hash_hex, king_hash}:
continue
sib_title = sib_hash[:16] + "..."
try:
sib_tags = db.get_tags(sib_hash)
for t in sib_tags:
if isinstance(t, str) and t.lower().startswith("title:"):
sib_title = t[6:].strip()
break
except Exception:
pass
entry_type = "alt" if sib_type == "alt" else (sib_type or "related")
_add_relationship({
"hash": sib_hash,
"type": entry_type,
"title": sib_title,
"path": None,
"store": str(store_name),
})
# ALSO CHECK REVERSE RELATIONSHIPS FOR THE CURRENT FILE
# NOTE: This is now handled via recursive lookup above, which finds siblings through the parent.
# We keep this disabled to avoid adding the same relationships twice.
# If needed in future, can be re-enabled with better deduplication.
# for rev in reverse_rels:
# rev_path = rev['path']
# rev_type = rev['type']
#
# if any(r['hash'] == rev_path for r in found_relationships): continue
#
# rev_path_obj = Path(rev_path)
# rev_title = rev_path_obj.stem
# try:
# rev_tags = db.get_tags(rev_path_obj)
# for t in rev_tags:
# if t.lower().startswith('title:'):
# rev_title = t[6:].strip(); break
# except Exception: pass
#
# # If someone points to us as 'alt' or 'king', they are our 'child' or 'subject'
# # But we'll just list them with the relationship type they used
# found_relationships.append({
# "hash": rev_path,
# "type": f"reverse-{rev_type}", # e.g. reverse-alt
# "title": rev_title,
# "path": rev_path,
# "store": "local"
# })
except Exception as e:
log(f"Error checking local relationships: {e}", file=sys.stderr)
log(f"Error checking store relationships: {e}", file=sys.stderr)
# If we found local relationships, we can stop or merge with Hydrus?
# For now, if we found local ones, let's show them.
# But if the file is also in Hydrus, we might want those too.
# Let's try Hydrus if we have a hash.
hash_hex = get_hash_for_operation(override_hash, result)
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_hash_for_operation(None, result))
if hash_hex and not local_db_checked:
try:
client = hydrus_wrapper.get_client(config)
client = None
store_label = "hydrus"
if store_name:
# Store specified: do not fall back to a global/default Hydrus client.
store_label = str(store_name)
try:
store = Store(config)
backend = store[str(store_name)]
candidate = getattr(backend, "_client", None)
if candidate is not None and hasattr(candidate, "get_file_relationships"):
client = candidate
except Exception:
client = None
if client is None:
log(f"Hydrus client unavailable for store '{store_name}'", file=sys.stderr)
return 1
else:
client = hydrus_wrapper.get_client(config)
if client:
rel = client.get_file_relationships(hash_hex)
if rel:
@@ -322,38 +248,66 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
this_file_rels = file_rels.get(hash_hex)
if this_file_rels:
# Map Hydrus relationship IDs to names
# 0: potential duplicates, 1: false positives, 2: false positives (alternates),
# 3: duplicates, 4: alternatives, 8: king
# This mapping is approximate based on Hydrus API docs/behavior
# Map Hydrus relationship IDs to names.
# For /manage_file_relationships/get_file_relationships, the Hydrus docs define:
# 0=potential duplicates, 1=false positives, 3=alternates, 8=duplicates
# Additionally, this endpoint includes metadata keys like 'king'/'is_king'.
rel_map = {
"0": "potential duplicate",
"0": "potential",
"1": "false positive",
"2": "false positive",
"3": "duplicate",
"4": "alternative",
"8": "king"
"3": "alternate",
"8": "duplicate",
}
for rel_type_id, hash_list in this_file_rels.items():
# Skip metadata keys
if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}:
for rel_type_id, rel_value in this_file_rels.items():
key = str(rel_type_id)
# Handle metadata keys explicitly.
if key in {"is_king", "king_is_on_file_domain", "king_is_local"}:
continue
rel_name = rel_map.get(str(rel_type_id), f"type-{rel_type_id}")
if isinstance(hash_list, list):
for rel_hash in hash_list:
if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex:
# Check if we already have this hash from local DB
if not any(r['hash'] == rel_hash for r in found_relationships):
found_relationships.append({
"hash": rel_hash,
"type": rel_name,
"title": rel_hash, # Can't resolve title easily without another API call
"path": None,
"store": "hydrus"
})
# Some Hydrus responses provide a direct king hash under the 'king' key.
if key == "king":
king_hash = normalize_hash(rel_value) if isinstance(rel_value, str) else None
if king_hash and king_hash != hash_hex:
if not any(str(r.get('hash', '')).lower() == king_hash for r in found_relationships):
found_relationships.append({
"hash": king_hash,
"type": "king",
"title": king_hash,
"path": None,
"store": store_label,
})
continue
rel_name = rel_map.get(key, f"type-{key}")
# The relationship value is typically a list of hashes.
if isinstance(rel_value, list):
for rel_hash in rel_value:
rel_hash_norm = normalize_hash(rel_hash) if isinstance(rel_hash, str) else None
if not rel_hash_norm or rel_hash_norm == hash_hex:
continue
if not any(str(r.get('hash', '')).lower() == rel_hash_norm for r in found_relationships):
found_relationships.append({
"hash": rel_hash_norm,
"type": rel_name,
"title": rel_hash_norm, # Can't resolve title easily without another API call
"path": None,
"store": store_label,
})
# Defensive: sometimes the API may return a single hash string.
elif isinstance(rel_value, str):
rel_hash_norm = normalize_hash(rel_value)
if rel_hash_norm and rel_hash_norm != hash_hex:
if not any(str(r.get('hash', '')).lower() == rel_hash_norm for r in found_relationships):
found_relationships.append({
"hash": rel_hash_norm,
"type": rel_name,
"title": rel_hash_norm,
"path": None,
"store": store_label,
})
except Exception as exc:
# Only log error if we didn't find local relationships either
if not found_relationships:
@@ -374,7 +328,7 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
return 0
elif t == 'derivative':
return 1
elif t == 'alternative':
elif t in {'alternative', 'alternate', 'alt'}:
return 2
elif t == 'duplicate':
return 3
@@ -400,22 +354,14 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
"relationship_type": item['type'],
"store": item['store']
}
if item['path']:
res_obj["path"] = item['path']
res_obj["file_path"] = item['path']
res_obj["target"] = item['path']
else:
# If Hydrus, target is hash
res_obj["target"] = item['hash']
# Target is always hash in store/hash-first mode
res_obj["target"] = item['hash']
pipeline_results.append(res_obj)
# Set selection args
# If it has a path, we can use it directly. If hash, maybe get-file -hash?
if item['path']:
table.set_row_selection_args(i, [item['path']])
else:
table.set_row_selection_args(i, ["-hash", item['hash']])
table.set_row_selection_args(i, ["-store", str(item['store']), "-hash", item['hash']])
ctx.set_last_result_table(table, pipeline_results)
print(table)