Files
Medios-Macina/cmdlets/get_metadata.py

247 lines
9.3 KiB
Python
Raw Normal View History

2025-11-25 20:09:33 -08:00
from __future__ import annotations
from typing import Any, Dict, Sequence, Optional
import json
import sys
from helper.logger import log
from pathlib import Path
import mimetypes
import os
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
log(json.dumps(CMDLET.to_dict(), ensure_ascii=False, indent=2))
return 0
except Exception:
pass
# Helper to get field from both dict and object
def get_field(obj: Any, field: str, default: Any = None) -> Any:
if isinstance(obj, dict):
return obj.get(field, default)
else:
return getattr(obj, field, default)
# Parse -hash override
override_hash: str | None = None
args_list = list(_args)
i = 0
while i < len(args_list):
a = args_list[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
override_hash = str(args_list[i + 1]).strip()
break
i += 1
# Try to determine if this is a local file or Hydrus file
local_path = get_field(result, "target", None) or get_field(result, "path", None)
is_local = False
if local_path and isinstance(local_path, str) and not local_path.startswith(("http://", "https://")):
is_local = True
# LOCAL FILE PATH
if is_local and local_path:
try:
file_path = Path(str(local_path))
if file_path.exists() and file_path.is_file():
# Get the hash from result or compute it
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
# If no hash, compute SHA256 of the file
if not hash_hex:
try:
import hashlib
with open(file_path, 'rb') as f:
hash_hex = hashlib.sha256(f.read()).hexdigest()
except Exception:
hash_hex = None
# Get MIME type
mime_type, _ = mimetypes.guess_type(str(file_path))
if not mime_type:
mime_type = "unknown"
# Get file size
try:
file_size = file_path.stat().st_size
except Exception:
file_size = None
# Try to get duration if it's a media file
duration_seconds = None
try:
# Try to use ffprobe if available
import subprocess
result_proc = subprocess.run(
["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", str(file_path)],
capture_output=True,
text=True,
timeout=5
)
if result_proc.returncode == 0 and result_proc.stdout.strip():
try:
duration_seconds = float(result_proc.stdout.strip())
except ValueError:
pass
except Exception:
pass
# Get format helpers from search module
try:
from .search_file import _format_size as _fmt_size
from .search_file import _format_duration as _fmt_dur
except Exception:
_fmt_size = lambda x: str(x) if x is not None else ""
_fmt_dur = lambda x: str(x) if x is not None else ""
size_label = _fmt_size(file_size) if file_size is not None else ""
dur_label = _fmt_dur(duration_seconds) if duration_seconds is not None else ""
# Get known URLs from sidecar or result
urls = []
sidecar_path = Path(str(file_path) + '.tags')
if sidecar_path.exists():
try:
with open(sidecar_path, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if line.startswith('known_url:'):
url_value = line.replace('known_url:', '', 1).strip()
if url_value:
urls.append(url_value)
except Exception:
pass
# Fallback to result URLs if not in sidecar
if not urls:
urls_from_result = get_field(result, "known_urls", None) or get_field(result, "urls", None)
if isinstance(urls_from_result, list):
urls.extend([str(u).strip() for u in urls_from_result if u])
# Display local file metadata
log(f"PATH: {file_path}")
if hash_hex:
log(f"HASH: {hash_hex}")
if mime_type:
log(f"MIME: {mime_type}")
if size_label:
log(f"Size: {size_label}")
if dur_label:
log(f"Duration: {dur_label}")
if urls:
log("URLs:")
for url in urls:
log(f" {url}")
return 0
except Exception as exc:
# Fall through to Hydrus if local file handling fails
pass
# HYDRUS PATH
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
if not hash_hex:
log("Selected result does not include a Hydrus hash or local path", file=sys.stderr)
return 1
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return 1
if client is None:
log("Hydrus client unavailable", file=sys.stderr)
return 1
try:
payload = client.fetch_file_metadata(
hashes=[hash_hex],
include_service_keys_to_tags=False,
include_file_urls=True,
include_duration=True,
include_size=True,
include_mime=True,
)
except Exception as exc:
log(f"Hydrus metadata fetch failed: {exc}", file=sys.stderr)
return 1
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
log("No metadata found.")
return 0
meta = items[0] if isinstance(items[0], dict) else None
if not isinstance(meta, dict):
log("No metadata found.")
return 0
mime = meta.get("mime")
size = meta.get("size") or meta.get("file_size")
duration_value = meta.get("duration")
inner = meta.get("metadata") if isinstance(meta.get("metadata"), dict) else None
if duration_value is None and isinstance(inner, dict):
duration_value = inner.get("duration")
try:
from .search_file import _format_size as _fmt_size
from .search_file import _format_duration as _fmt_dur
from .search_file import _hydrus_duration_seconds as _dur_secs
except Exception:
_fmt_size = lambda x: str(x) if x is not None else ""
_dur_secs = lambda x: x
_fmt_dur = lambda x: str(x) if x is not None else ""
dur_seconds = _dur_secs(duration_value)
dur_label = _fmt_dur(dur_seconds) if dur_seconds is not None else ""
size_label = _fmt_size(size)
# Display Hydrus file metadata
log(f"PATH: hydrus://file/{hash_hex}")
log(f"Hash: {hash_hex}")
if mime:
log(f"MIME: {mime}")
if dur_label:
log(f"Duration: {dur_label}")
if size_label:
log(f"Size: {size_label}")
urls = meta.get("known_urls") or meta.get("urls")
if isinstance(urls, list) and urls:
log("URLs:")
for url in urls:
try:
text = str(url).strip()
except Exception:
text = ""
if text:
log(f" {text}")
return 0
CMDLET = Cmdlet(
name="get-metadata",
summary="Print metadata for local or Hydrus files (hash, mime, duration, size, URLs).",
usage="get-metadata [-hash <sha256>]",
aliases=["meta"],
args=[
CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
],
details=[
"- For local files: Shows path, hash (computed if needed), MIME type, size, duration, and known URLs from sidecar.",
"- For Hydrus files: Shows path (hydrus://), hash, MIME, duration, size, and known URLs.",
"- Automatically detects local vs Hydrus files.",
"- Local file hashes are computed via SHA256 if not already available.",
],
)