Files
Medios-Macina/cmdlets/get_metadata.py

343 lines
13 KiB
Python
Raw Normal View History

2025-11-25 20:09:33 -08:00
from __future__ import annotations
from typing import Any, Dict, Sequence, Optional
import json
import sys
from helper.logger import log
from pathlib import Path
import mimetypes
import os
from helper import hydrus as hydrus_wrapper
2025-12-07 00:21:30 -08:00
from helper.local_library import LocalLibraryDB
2025-11-25 20:09:33 -08:00
from ._shared import Cmdlet, CmdletArg, normalize_hash
2025-12-07 00:21:30 -08:00
from config import get_local_storage_path
import pipeline as ctx
from result_table import ResultTable
def _extract_imported_ts(meta: Dict[str, Any]) -> Optional[int]:
"""Extract an imported timestamp from Hydrus metadata if available."""
if not isinstance(meta, dict):
return None
# Prefer explicit time_imported if present
explicit = meta.get("time_imported")
if isinstance(explicit, (int, float)):
return int(explicit)
file_services = meta.get("file_services")
if isinstance(file_services, dict):
current = file_services.get("current")
if isinstance(current, dict):
numeric = [int(v) for v in current.values() if isinstance(v, (int, float))]
if numeric:
return min(numeric)
return None
def _format_imported(ts: Optional[int]) -> str:
if not ts:
return ""
try:
import datetime as _dt
return _dt.datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S")
except Exception:
return ""
def _build_table_row(title: str, origin: str, path: str, mime: str, size_bytes: Optional[int], dur_seconds: Optional[int], imported_ts: Optional[int], urls: list[str], hash_value: Optional[str], pages: Optional[int] = None) -> Dict[str, Any]:
size_mb = None
if isinstance(size_bytes, int):
try:
size_mb = int(size_bytes / (1024 * 1024))
except Exception:
size_mb = None
dur_int = int(dur_seconds) if isinstance(dur_seconds, (int, float)) else None
pages_int = int(pages) if isinstance(pages, (int, float)) else None
imported_label = _format_imported(imported_ts)
duration_label = "Duration(s)"
duration_value = str(dur_int) if dur_int is not None else ""
if mime and mime.lower().startswith("application/pdf"):
duration_label = "Pages"
duration_value = str(pages_int) if pages_int is not None else ""
columns = [
("Title", title or ""),
("Hash", hash_value or ""),
("MIME", mime or ""),
("Size(MB)", str(size_mb) if size_mb is not None else ""),
(duration_label, duration_value),
("Imported", imported_label),
("Store", origin or ""),
]
return {
"title": title or path,
"path": path,
"origin": origin,
"mime": mime,
"size_bytes": size_bytes,
"duration_seconds": dur_int,
"pages": pages_int,
"imported_ts": imported_ts,
"imported": imported_label,
"hash": hash_value,
"known_urls": urls,
"columns": columns,
}
2025-11-25 20:09:33 -08:00
def _run(result: Any, _args: Sequence[str], config: Dict[str, Any]) -> int:
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in _args):
log(json.dumps(CMDLET.to_dict(), ensure_ascii=False, indent=2))
return 0
except Exception:
pass
# Helper to get field from both dict and object
def get_field(obj: Any, field: str, default: Any = None) -> Any:
if isinstance(obj, dict):
return obj.get(field, default)
else:
return getattr(obj, field, default)
# Parse -hash override
override_hash: str | None = None
args_list = list(_args)
i = 0
while i < len(args_list):
a = args_list[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args_list):
override_hash = str(args_list[i + 1]).strip()
break
i += 1
# Try to determine if this is a local file or Hydrus file
local_path = get_field(result, "target", None) or get_field(result, "path", None)
is_local = False
if local_path and isinstance(local_path, str) and not local_path.startswith(("http://", "https://")):
is_local = True
# LOCAL FILE PATH
if is_local and local_path:
try:
file_path = Path(str(local_path))
if file_path.exists() and file_path.is_file():
# Get the hash from result or compute it
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
# If no hash, compute SHA256 of the file
if not hash_hex:
try:
import hashlib
with open(file_path, 'rb') as f:
hash_hex = hashlib.sha256(f.read()).hexdigest()
except Exception:
hash_hex = None
# Get MIME type
mime_type, _ = mimetypes.guess_type(str(file_path))
if not mime_type:
mime_type = "unknown"
2025-12-07 00:21:30 -08:00
# Pull metadata from local DB if available (for imported timestamp, duration, etc.)
db_metadata = None
library_root = get_local_storage_path(config)
if library_root:
try:
with LocalLibraryDB(library_root) as db:
db_metadata = db.get_metadata(file_path) or None
except Exception:
db_metadata = None
# Get file size (prefer DB size if present)
file_size = None
if isinstance(db_metadata, dict) and isinstance(db_metadata.get("size"), int):
file_size = db_metadata.get("size")
else:
try:
file_size = file_path.stat().st_size
except Exception:
file_size = None
# Duration/pages
2025-11-25 20:09:33 -08:00
duration_seconds = None
2025-12-07 00:21:30 -08:00
pages = None
if isinstance(db_metadata, dict):
if isinstance(db_metadata.get("duration"), (int, float)):
duration_seconds = float(db_metadata.get("duration"))
if isinstance(db_metadata.get("pages"), (int, float)):
pages = int(db_metadata.get("pages"))
if duration_seconds is None and mime_type and mime_type.startswith("video"):
try:
import subprocess
result_proc = subprocess.run(
["ffprobe", "-v", "error", "-select_streams", "v:0", "-show_entries", "format=duration", "-of", "default=noprint_wrappers=1:nokey=1", str(file_path)],
capture_output=True,
text=True,
timeout=5
)
if result_proc.returncode == 0 and result_proc.stdout.strip():
2025-11-25 20:09:33 -08:00
duration_seconds = float(result_proc.stdout.strip())
2025-12-07 00:21:30 -08:00
except Exception:
pass
# Known URLs from sidecar or result
2025-11-25 20:09:33 -08:00
urls = []
sidecar_path = Path(str(file_path) + '.tags')
if sidecar_path.exists():
try:
with open(sidecar_path, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
if line.startswith('known_url:'):
url_value = line.replace('known_url:', '', 1).strip()
if url_value:
urls.append(url_value)
except Exception:
pass
2025-12-07 00:21:30 -08:00
2025-11-25 20:09:33 -08:00
if not urls:
urls_from_result = get_field(result, "known_urls", None) or get_field(result, "urls", None)
if isinstance(urls_from_result, list):
urls.extend([str(u).strip() for u in urls_from_result if u])
2025-12-07 00:21:30 -08:00
imported_ts = None
if isinstance(db_metadata, dict):
ts = db_metadata.get("time_imported") or db_metadata.get("time_added")
if isinstance(ts, (int, float)):
imported_ts = int(ts)
elif isinstance(ts, str):
try:
import datetime as _dt
imported_ts = int(_dt.datetime.fromisoformat(ts).timestamp())
except Exception:
imported_ts = None
row = _build_table_row(
title=file_path.name,
origin="local",
path=str(file_path),
mime=mime_type or "",
size_bytes=int(file_size) if isinstance(file_size, int) else None,
dur_seconds=duration_seconds,
imported_ts=imported_ts,
urls=urls,
hash_value=hash_hex,
pages=pages,
)
table_title = file_path.name
table = ResultTable(table_title)
table.set_source_command("get-metadata", list(_args))
table.add_result(row)
ctx.set_last_result_table_overlay(table, [row], row)
ctx.emit(row)
2025-11-25 20:09:33 -08:00
return 0
2025-12-07 00:21:30 -08:00
except Exception:
2025-11-25 20:09:33 -08:00
# Fall through to Hydrus if local file handling fails
pass
# HYDRUS PATH
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
if not hash_hex:
log("Selected result does not include a Hydrus hash or local path", file=sys.stderr)
return 1
try:
client = hydrus_wrapper.get_client(config)
except Exception as exc:
log(f"Hydrus client unavailable: {exc}", file=sys.stderr)
return 1
if client is None:
log("Hydrus client unavailable", file=sys.stderr)
return 1
try:
payload = client.fetch_file_metadata(
hashes=[hash_hex],
include_service_keys_to_tags=False,
include_file_urls=True,
include_duration=True,
include_size=True,
include_mime=True,
)
except Exception as exc:
log(f"Hydrus metadata fetch failed: {exc}", file=sys.stderr)
return 1
items = payload.get("metadata") if isinstance(payload, dict) else None
if not isinstance(items, list) or not items:
log("No metadata found.")
return 0
meta = items[0] if isinstance(items[0], dict) else None
if not isinstance(meta, dict):
log("No metadata found.")
return 0
mime = meta.get("mime")
size = meta.get("size") or meta.get("file_size")
duration_value = meta.get("duration")
inner = meta.get("metadata") if isinstance(meta.get("metadata"), dict) else None
if duration_value is None and isinstance(inner, dict):
duration_value = inner.get("duration")
2025-12-07 00:21:30 -08:00
imported_ts = _extract_imported_ts(meta)
2025-11-25 20:09:33 -08:00
try:
from .search_file import _hydrus_duration_seconds as _dur_secs
except Exception:
_dur_secs = lambda x: x
2025-12-07 00:21:30 -08:00
2025-11-25 20:09:33 -08:00
dur_seconds = _dur_secs(duration_value)
urls = meta.get("known_urls") or meta.get("urls")
2025-12-07 00:21:30 -08:00
urls = [str(u).strip() for u in urls] if isinstance(urls, list) else []
row = _build_table_row(
title=hash_hex,
origin="hydrus",
path=f"hydrus://file/{hash_hex}",
mime=mime or "",
size_bytes=int(size) if isinstance(size, int) else None,
dur_seconds=int(dur_seconds) if isinstance(dur_seconds, (int, float)) else None,
imported_ts=imported_ts,
urls=urls,
hash_value=hash_hex,
pages=None,
)
table = ResultTable(hash_hex or "Metadata")
table.set_source_command("get-metadata", list(_args))
table.add_result(row)
ctx.set_last_result_table_overlay(table, [row], row)
ctx.emit(row)
2025-11-25 20:09:33 -08:00
return 0
CMDLET = Cmdlet(
name="get-metadata",
summary="Print metadata for local or Hydrus files (hash, mime, duration, size, URLs).",
usage="get-metadata [-hash <sha256>]",
aliases=["meta"],
args=[
CmdletArg("hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
],
details=[
"- For local files: Shows path, hash (computed if needed), MIME type, size, duration, and known URLs from sidecar.",
"- For Hydrus files: Shows path (hydrus://), hash, MIME, duration, size, and known URLs.",
"- Automatically detects local vs Hydrus files.",
"- Local file hashes are computed via SHA256 if not already available.",
],
)