Files
Medios-Macina/cmdlet/get_relationship.py
2025-12-16 23:23:43 -08:00

455 lines
20 KiB
Python

from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional
import json
import sys
from pathlib import Path
from SYS.logger import log
import models
import pipeline as ctx
from API import HydrusNetwork as hydrus_wrapper
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
normalize_hash = sh.normalize_hash
fmt_bytes = sh.fmt_bytes
get_hash_for_operation = sh.get_hash_for_operation
fetch_hydrus_metadata = sh.fetch_hydrus_metadata
should_show_help = sh.should_show_help
get_field = sh.get_field
from API.folder import API_folder_store
from config import get_local_storage_path
from result_table import ResultTable
from Store import Store
CMDLET = Cmdlet(
name="get-relationship",
summary="Print relationships for the selected file (Hydrus or Local).",
usage="get-relationship [-hash <sha256>]",
alias=[
"get-rel",
],
arg=[
SharedArgs.HASH,
SharedArgs.STORE,
],
detail=[
"- Lists relationship data as returned by Hydrus or Local DB.",
],
)
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
if should_show_help(_args):
log(f"Cmdlet: {CMDLET.name}\nSummary: {CMDLET.summary}\nUsage: {CMDLET.usage}")
return 0
# Parse -hash and -store override
override_hash: str | None = None
override_store: str | None = None
args_list = list(_args)
i = 0
while i < len(args_list):
a = args_list[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
override_hash = str(args_list[i + 1]).strip()
break
if low in {"-store", "--store", "store"} and i + 1 < len(args_list):
override_store = str(args_list[i + 1]).strip()
i += 1
# Handle @N selection which creates a list
# This cmdlet is single-subject; require disambiguation when multiple items are provided.
if isinstance(result, list):
if len(result) == 0:
result = None
elif len(result) > 1 and not override_hash:
log("get-relationship expects a single item; select one row (e.g. @1) or pass -hash", file=sys.stderr)
return 1
else:
result = result[0]
# Initialize results collection
found_relationships = [] # List of dicts: {hash, type, title, path, store}
source_title = "Unknown"
def _add_relationship(entry: Dict[str, Any]) -> None:
"""Add relationship if not already present by hash or path."""
for existing in found_relationships:
if entry.get("hash") and str(existing.get("hash", "")).lower() == str(entry["hash"]).lower():
return
if entry.get("path") and str(existing.get("path", "")).lower() == str(entry["path"]).lower():
return
found_relationships.append(entry)
# Store/hash-first subject resolution
store_name: Optional[str] = override_store
if not store_name:
store_name = get_field(result, "store")
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_hash_for_operation(None, result))
if not source_title or source_title == "Unknown":
source_title = get_field(result, "title") or get_field(result, "name") or (hash_hex[:16] + "..." if hash_hex else "Unknown")
local_db_checked = False
if store_name and hash_hex:
try:
store = Store(config)
backend = store[str(store_name)]
# Folder store relationships
# IMPORTANT: only treat the Folder backend as a local DB store.
# Other backends may expose a location() method but are not SQLite folder stores.
if type(backend).__name__ == "Folder" and hasattr(backend, "location") and callable(getattr(backend, "location")):
storage_path = Path(str(backend.location()))
with API_folder_store(storage_path) as db:
local_db_checked = True
# Update source title from tags if possible
try:
tags = db.get_tags(hash_hex)
for t in tags:
if isinstance(t, str) and t.lower().startswith("title:"):
source_title = t[6:].strip()
break
except Exception:
pass
metadata = db.get_metadata(hash_hex)
rels = (metadata or {}).get("relationships")
king_hashes: list[str] = []
# Forward relationships
if isinstance(rels, dict):
for rel_type, hashes in rels.items():
if not isinstance(hashes, list):
continue
for related_hash in hashes:
related_hash = normalize_hash(str(related_hash))
if not related_hash or related_hash == hash_hex:
continue
entry_type = "king" if str(rel_type).lower() == "alt" else str(rel_type)
if entry_type == "king":
king_hashes.append(related_hash)
related_title = related_hash[:16] + "..."
try:
rel_tags = db.get_tags(related_hash)
for t in rel_tags:
if isinstance(t, str) and t.lower().startswith("title:"):
related_title = t[6:].strip()
break
except Exception:
pass
_add_relationship({
"hash": related_hash,
"type": entry_type,
"title": related_title,
"path": None,
"store": str(store_name),
})
# Reverse relationships (alts pointing to this hash)
try:
reverse_children = db.find_files_pointing_to_hash(hash_hex)
except Exception:
reverse_children = []
for child in reverse_children or []:
child_hash = normalize_hash(str(child.get("hash") or ""))
rel_type = str(child.get("type") or "").strip().lower()
if not child_hash or child_hash == hash_hex:
continue
child_title = child_hash[:16] + "..."
try:
child_tags = db.get_tags(child_hash)
for t in child_tags:
if isinstance(t, str) and t.lower().startswith("title:"):
child_title = t[6:].strip()
break
except Exception:
pass
entry_type = "alt" if rel_type == "alt" else (rel_type or "related")
_add_relationship({
"hash": child_hash,
"type": entry_type,
"title": child_title,
"path": None,
"store": str(store_name),
})
# Siblings (alts that share the same king)
for king_hash in king_hashes:
try:
siblings = db.find_files_pointing_to_hash(king_hash)
except Exception:
siblings = []
for sib in siblings or []:
sib_hash = normalize_hash(str(sib.get("hash") or ""))
sib_type = str(sib.get("type") or "").strip().lower()
if not sib_hash or sib_hash in {hash_hex, king_hash}:
continue
sib_title = sib_hash[:16] + "..."
try:
sib_tags = db.get_tags(sib_hash)
for t in sib_tags:
if isinstance(t, str) and t.lower().startswith("title:"):
sib_title = t[6:].strip()
break
except Exception:
pass
entry_type = "alt" if sib_type == "alt" else (sib_type or "related")
_add_relationship({
"hash": sib_hash,
"type": entry_type,
"title": sib_title,
"path": None,
"store": str(store_name),
})
except Exception as e:
log(f"Error checking store relationships: {e}", file=sys.stderr)
# If we found local relationships, we can stop or merge with Hydrus?
# For now, if we found local ones, let's show them.
# But if the file is also in Hydrus, we might want those too.
# Let's try Hydrus if we have a hash.
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_hash_for_operation(None, result))
if hash_hex and not local_db_checked:
try:
client = None
store_label = "hydrus"
backend_obj = None
if store_name:
# Store specified: do not fall back to a global/default Hydrus client.
store_label = str(store_name)
try:
store = Store(config)
backend_obj = store[str(store_name)]
candidate = getattr(backend_obj, "_client", None)
if candidate is not None and hasattr(candidate, "get_file_relationships"):
client = candidate
except Exception:
client = None
if client is None:
log(f"Hydrus client unavailable for store '{store_name}'", file=sys.stderr)
return 1
else:
client = hydrus_wrapper.get_client(config)
def _resolve_related_title(rel_hash: str) -> str:
"""Best-effort resolve a Hydrus hash to a human title.
Preference order:
- title: tag from the backend (fast path)
- Hydrus metadata tags via fetch_hydrus_metadata
- fallback to short hash
"""
h = normalize_hash(rel_hash)
if not h:
return str(rel_hash)
# Prefer backend tag extraction when available.
if backend_obj is not None and hasattr(backend_obj, "get_tag"):
try:
tag_result = backend_obj.get_tag(h)
tags = tag_result[0] if isinstance(tag_result, tuple) and tag_result else tag_result
if isinstance(tags, list):
for t in tags:
if isinstance(t, str) and t.lower().startswith("title:"):
val = t.split(":", 1)[1].strip()
if val:
return val
except Exception:
pass
# Fallback: fetch minimal metadata and scan for a title tag.
try:
meta, _ = fetch_hydrus_metadata(
config,
h,
store_name=store_label if store_name else None,
hydrus_client=client,
include_service_keys_to_tags=True,
include_file_url=False,
include_duration=False,
include_size=False,
include_mime=False,
)
if isinstance(meta, dict):
tags_payload = meta.get("tags")
tag_candidates: list[str] = []
if isinstance(tags_payload, dict):
for svc_data in tags_payload.values():
if not isinstance(svc_data, dict):
continue
storage = svc_data.get("storage_tags")
if isinstance(storage, dict):
for group in storage.values():
if isinstance(group, list):
tag_candidates.extend([str(x) for x in group if isinstance(x, str)])
display = svc_data.get("display_tags")
if isinstance(display, list):
tag_candidates.extend([str(x) for x in display if isinstance(x, str)])
flat = meta.get("tags_flat")
if isinstance(flat, list):
tag_candidates.extend([str(x) for x in flat if isinstance(x, str)])
for t in tag_candidates:
if isinstance(t, str) and t.lower().startswith("title:"):
val = t.split(":", 1)[1].strip()
if val:
return val
except Exception:
pass
return h[:16] + "..."
if client:
rel = client.get_file_relationships(hash_hex)
if rel:
file_rels = rel.get("file_relationships", {})
this_file_rels = file_rels.get(hash_hex)
if this_file_rels:
# Map Hydrus relationship IDs to names.
# For /manage_file_relationships/get_file_relationships, the Hydrus docs define:
# 0=potential duplicates, 1=false positives, 3=alternates, 8=duplicates
# Additionally, this endpoint includes metadata keys like 'king'/'is_king'.
rel_map = {
"0": "potential",
"1": "false positive",
"3": "alternate",
"8": "duplicate",
}
for rel_type_id, rel_value in this_file_rels.items():
key = str(rel_type_id)
# Handle metadata keys explicitly.
if key in {"is_king", "king_is_on_file_domain", "king_is_local"}:
continue
# Some Hydrus responses provide a direct king hash under the 'king' key.
if key == "king":
king_hash = normalize_hash(rel_value) if isinstance(rel_value, str) else None
if king_hash and king_hash != hash_hex:
if not any(str(r.get('hash', '')).lower() == king_hash for r in found_relationships):
found_relationships.append({
"hash": king_hash,
"type": "king",
"title": _resolve_related_title(king_hash),
"path": None,
"store": store_label,
})
continue
rel_name = rel_map.get(key, f"type-{key}")
# The relationship value is typically a list of hashes.
if isinstance(rel_value, list):
for rel_hash in rel_value:
rel_hash_norm = normalize_hash(rel_hash) if isinstance(rel_hash, str) else None
if not rel_hash_norm or rel_hash_norm == hash_hex:
continue
if not any(str(r.get('hash', '')).lower() == rel_hash_norm for r in found_relationships):
found_relationships.append({
"hash": rel_hash_norm,
"type": rel_name,
"title": _resolve_related_title(rel_hash_norm),
"path": None,
"store": store_label,
})
# Defensive: sometimes the API may return a single hash string.
elif isinstance(rel_value, str):
rel_hash_norm = normalize_hash(rel_value)
if rel_hash_norm and rel_hash_norm != hash_hex:
if not any(str(r.get('hash', '')).lower() == rel_hash_norm for r in found_relationships):
found_relationships.append({
"hash": rel_hash_norm,
"type": rel_name,
"title": _resolve_related_title(rel_hash_norm),
"path": None,
"store": store_label,
})
except Exception as exc:
# Only log error if we didn't find local relationships either
if not found_relationships:
log(f"Hydrus relationships fetch failed: {exc}", file=sys.stderr)
if not found_relationships:
log("No relationships found.")
return 0
# Display results
table = ResultTable(f"Relationships: {source_title}").init_command("get-relationship", [])
# Sort by type then title
# Custom sort order: King first, then Derivative, then others
def type_sort_key(item):
t = item['type'].lower()
if t == 'king':
return 0
elif t == 'derivative':
return 1
elif t in {'alternative', 'alternate', 'alt'}:
return 2
elif t == 'duplicate':
return 3
else:
return 4
found_relationships.sort(key=lambda x: (type_sort_key(x), x['title']))
pipeline_results = []
for i, item in enumerate(found_relationships):
row = table.add_row()
row.add_column("Type", item['type'].title())
row.add_column("Title", item['title'])
# row.add_column("Hash", item['hash'][:16] + "...") # User requested removal
row.add_column("Store", item['store'])
# Create result object for pipeline
res_obj = {
"title": item['title'],
"hash": item['hash'],
"file_hash": item['hash'],
"relationship_type": item['type'],
"store": item['store']
}
# Target is always hash in store/hash-first mode
res_obj["target"] = item['hash']
pipeline_results.append(res_obj)
# Set selection args
# If it has a path, we can use it directly. If hash, maybe get-file -hash?
table.set_row_selection_args(i, ["-store", str(item['store']), "-hash", item['hash']])
ctx.set_last_result_table(table, pipeline_results)
print(table)
return 0
CMDLET.exec = _run
CMDLET.register()