This commit is contained in:
nose
2025-12-01 01:10:16 -08:00
parent 2b93edac10
commit 6b9ed7d4ab
17 changed files with 1644 additions and 470 deletions

View File

@@ -3,6 +3,7 @@ from __future__ import annotations
from typing import Any, Dict, Sequence, List, Optional
import json
import sys
from pathlib import Path
from helper.logger import log
@@ -11,16 +12,19 @@ import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash, fmt_bytes
from helper.local_library import LocalLibraryDB
from config import get_local_storage_path
from result_table import ResultTable
CMDLET = Cmdlet(
name="get-relationship",
summary="Print Hydrus relationships for the selected file.",
summary="Print relationships for the selected file (Hydrus or Local).",
usage="get-relationship [-hash <sha256>]",
args=[
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
],
details=[
"- Lists relationship data as returned by Hydrus.",
"- Lists relationship data as returned by Hydrus or Local DB.",
],
)
@@ -50,190 +54,187 @@ def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
if isinstance(result, list) and len(result) > 0:
result = result[0]
# Initialize results collection
found_relationships = [] # List of dicts: {hash, type, title, path, origin}
source_title = "Unknown"
# Check for local file first
file_path = None
if isinstance(result, dict):
file_path = result.get("file_path") or result.get("path")
source_title = result.get("title") or result.get("name") or "Unknown"
elif hasattr(result, "file_path"):
file_path = result.file_path
source_title = getattr(result, "title", "Unknown")
local_db_checked = False
if file_path and not override_hash:
try:
path_obj = Path(file_path)
if not source_title or source_title == "Unknown":
source_title = path_obj.name
if path_obj.exists():
storage_path = get_local_storage_path(config)
if storage_path:
with LocalLibraryDB(storage_path) as db:
metadata = db.get_metadata(path_obj)
if metadata and metadata.get("relationships"):
local_db_checked = True
rels = metadata["relationships"]
if isinstance(rels, dict):
for rel_type, hashes in rels.items():
if hashes:
for h in hashes:
# Try to resolve hash to filename if possible
resolved_path = db.search_by_hash(h)
title = h
path = None
if resolved_path:
path = str(resolved_path)
# Try to get title from tags
try:
tags = db.get_tags(resolved_path)
found_title = False
for t in tags:
if t.lower().startswith('title:'):
title = t[6:].strip()
found_title = True
break
if not found_title:
title = resolved_path.stem
except Exception:
title = resolved_path.stem
found_relationships.append({
"hash": h,
"type": rel_type,
"title": title,
"path": path,
"origin": "local"
})
except Exception as e:
log(f"Error checking local relationships: {e}", file=sys.stderr)
# If we found local relationships, we can stop or merge with Hydrus?
# For now, if we found local ones, let's show them.
# But if the file is also in Hydrus, we might want those too.
# Let's try Hydrus if we have a hash.
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(getattr(result, "hash_hex", None))
if not hash_hex:
log("Selected result does not include a Hydrus hash", file=sys.stderr)
return 1
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return 1
if client is None:
log("Hydrus client unavailable", file=sys.stderr)
return 1
try:
rel = client.get_file_relationships(hash_hex)
except Exception as exc:
log(f"Hydrus relationships fetch failed: {exc}", file=sys.stderr)
return 1
if not rel:
log("No relationships found.")
return 0
# Extract file_relationships from response
file_rels = rel.get("file_relationships", {})
if not file_rels:
log("No relationships found.")
return 0
# Get the relationships dict for this specific hash
this_file_rels = file_rels.get(hash_hex)
if not this_file_rels:
log("No relationships found.")
return 0
# Extract related hashes from all relationship types
# Keys "0", "1", "3", "8" are relationship type IDs
# Values are lists of hashes
related_hashes = []
for rel_type_id, hash_list in this_file_rels.items():
# Skip non-numeric keys and metadata keys
if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}:
continue
if isinstance(hash_list, list):
for rel_hash in hash_list:
if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex:
related_hashes.append(rel_hash)
# Remove duplicates while preserving order
seen = set()
unique_hashes = []
for h in related_hashes:
if h not in seen:
seen.add(h)
unique_hashes.append(h)
if not unique_hashes:
log("No related files found.")
return 0
# Fetch metadata for all related files
try:
metadata_payload = client.fetch_file_metadata(
hashes=unique_hashes,
include_service_keys_to_tags=True,
include_duration=True,
include_size=True,
include_mime=True,
)
except Exception as exc:
log(f"Hydrus metadata fetch failed: {exc}", file=sys.stderr)
return 1
metadata_list = metadata_payload.get("metadata") if isinstance(metadata_payload, dict) else None
if not isinstance(metadata_list, list):
log("Hydrus metadata response was not a list", file=sys.stderr)
return 1
# Build metadata map by hash
meta_by_hash: Dict[str, Dict[str, Any]] = {}
for item in metadata_list:
if isinstance(item, dict):
item_hash = normalize_hash(item.get("hash"))
if item_hash:
meta_by_hash[item_hash] = item
# Helper functions for formatting
def _format_duration(seconds: Optional[float]) -> str:
if seconds is None:
return ""
# Try to get hash from dict
if isinstance(result, dict):
hash_hex = normalize_hash(result.get("hash") or result.get("file_hash"))
if hash_hex and not local_db_checked:
try:
s = int(seconds)
hours = s // 3600
minutes = (s % 3600) // 60
secs = s % 60
if hours > 0:
return f"{hours}:{minutes:02d}:{secs:02d}"
else:
return f"{minutes}:{secs:02d}"
except Exception:
return ""
def _get_title(meta: Dict[str, Any]) -> str:
# Try to extract title from tags
tags_payload = meta.get("tags")
if isinstance(tags_payload, dict):
for service_data in tags_payload.values():
if isinstance(service_data, dict):
storage_tags = service_data.get("storage_tags")
if isinstance(storage_tags, dict):
for tag_list in storage_tags.values():
if isinstance(tag_list, list):
for tag in tag_list:
tag_str = str(tag).lower()
if tag_str.startswith("title:"):
return str(tag)[6:].strip()
# Fallback to hash prefix
h = meta.get("hash")
return str(h)[:12] if h else "unknown"
def _get_mime_type(meta: Dict[str, Any]) -> str:
mime = meta.get("mime", "")
if not mime:
return ""
# Extract type from mime (e.g., "video/mp4" -> "video")
parts = str(mime).split("/")
return parts[0] if parts else ""
# Print header and separator
log("# | Title | Type | Duration | Size")
log("--+---------------------------+-------+----------+--------")
# Create result objects for each related file
results: List[Any] = []
# Print each related file
for idx, rel_hash in enumerate(unique_hashes, start=1):
meta = meta_by_hash.get(rel_hash)
if not meta:
continue
client = hydrus_wrapper.get_client(config)
if client:
rel = client.get_file_relationships(hash_hex)
if rel:
file_rels = rel.get("file_relationships", {})
this_file_rels = file_rels.get(hash_hex)
if this_file_rels:
# Map Hydrus relationship IDs to names
# 0: potential duplicates, 1: false positives, 2: false positives (alternates),
# 3: duplicates, 4: alternatives, 8: king
# This mapping is approximate based on Hydrus API docs/behavior
rel_map = {
"0": "potential duplicate",
"1": "false positive",
"2": "false positive",
"3": "duplicate",
"4": "alternative",
"8": "king"
}
for rel_type_id, hash_list in this_file_rels.items():
# Skip metadata keys
if rel_type_id in {"is_king", "king", "king_is_on_file_domain", "king_is_local"}:
continue
rel_name = rel_map.get(str(rel_type_id), f"type-{rel_type_id}")
if isinstance(hash_list, list):
for rel_hash in hash_list:
if isinstance(rel_hash, str) and rel_hash and rel_hash != hash_hex:
# Check if we already have this hash from local DB
if not any(r['hash'] == rel_hash for r in found_relationships):
found_relationships.append({
"hash": rel_hash,
"type": rel_name,
"title": rel_hash, # Can't resolve title easily without another API call
"path": None,
"origin": "hydrus"
})
except Exception as exc:
# Only log error if we didn't find local relationships either
if not found_relationships:
log(f"Hydrus relationships fetch failed: {exc}", file=sys.stderr)
if not found_relationships:
log("No relationships found.")
return 0
title = _get_title(meta)
mime_type = _get_mime_type(meta)
# Get duration
duration_value = meta.get("duration")
if duration_value is None and isinstance(meta.get("metadata"), dict):
duration_value = meta["metadata"].get("duration")
duration_str = _format_duration(duration_value)
# Get size
size = meta.get("size") or meta.get("file_size")
size_str = fmt_bytes(size) if size else ""
# Format and print row
title_display = title[:25].ljust(25)
type_display = mime_type[:5].ljust(5)
duration_display = duration_str[:8].ljust(8)
size_display = size_str[:7].ljust(7)
log(f"{idx:2d} | {title_display} | {type_display} | {duration_display} | {size_display}")
# Display results
table = ResultTable(f"Relationships: {source_title}")
# Sort by type then title
# Custom sort order: King first, then Derivative, then others
def type_sort_key(item):
t = item['type'].lower()
if t == 'king':
return 0
elif t == 'derivative':
return 1
elif t == 'alternative':
return 2
elif t == 'duplicate':
return 3
else:
return 4
found_relationships.sort(key=lambda x: (type_sort_key(x), x['title']))
pipeline_results = []
for i, item in enumerate(found_relationships):
row = table.add_row()
row.add_column("Type", item['type'].title())
row.add_column("Title", item['title'])
# row.add_column("Hash", item['hash'][:16] + "...") # User requested removal
row.add_column("Origin", item['origin'])
# Create result object for pipeline
result_obj = type("RelatedFile", (), {
"hash_hex": rel_hash,
"title": title,
"media_kind": mime_type or "other",
"size": size,
"duration": duration_value,
"known_urls": [],
"annotations": [],
"columns": [
("Title", title),
("Type", mime_type),
("Duration", duration_str),
("Size", size_str),
],
})()
results.append(result_obj)
# Emit results to pipeline
try:
ctx._PIPE_EMITS.extend(results)
except Exception:
pass
res_obj = {
"title": item['title'],
"hash": item['hash'],
"file_hash": item['hash'],
"relationship_type": item['type'],
"origin": item['origin']
}
if item['path']:
res_obj["path"] = item['path']
res_obj["file_path"] = item['path']
res_obj["target"] = item['path']
else:
# If Hydrus, target is hash
res_obj["target"] = item['hash']
pipeline_results.append(res_obj)
# Set selection args
# If it has a path, we can use it directly. If hash, maybe get-file -hash?
if item['path']:
table.set_row_selection_args(i, [item['path']])
else:
table.set_row_selection_args(i, ["-hash", item['hash']])
ctx.set_last_result_table(table, pipeline_results)
print(table)
return 0