Files
Medios-Macina/cmdlet/get_metadata.py

290 lines
10 KiB
Python
Raw Normal View History

2025-11-25 20:09:33 -08:00
from __future__ import annotations
from typing import Any, Dict, Sequence, Optional
import json
import sys
2025-12-11 19:04:02 -08:00
from SYS.logger import log
2025-11-25 20:09:33 -08:00
from pathlib import Path
2025-12-16 23:23:43 -08:00
from . import _shared as sh
Cmdlet = sh.Cmdlet
CmdletArg = sh.CmdletArg
SharedArgs = sh.SharedArgs
parse_cmdlet_args = sh.parse_cmdlet_args
get_field = sh.get_field
2025-12-07 00:21:30 -08:00
import pipeline as ctx
from result_table import ResultTable
2025-12-11 12:47:30 -08:00
class Get_Metadata(Cmdlet):
"""Class-based get-metadata cmdlet with self-registration."""
def __init__(self) -> None:
"""Initialize get-metadata cmdlet."""
super().__init__(
name="get-metadata",
summary="Print metadata for files by hash and storage backend.",
2025-12-29 17:05:03 -08:00
usage='get-metadata [-query "hash:<sha256>"] [-store <backend>]',
2025-12-11 12:47:30 -08:00
alias=["meta"],
arg=[
2025-12-20 02:12:45 -08:00
SharedArgs.QUERY,
2025-12-11 12:47:30 -08:00
SharedArgs.STORE,
],
detail=[
"- Retrieves metadata from storage backend using file hash as identifier.",
"- Shows hash, MIME type, size, duration/pages, known url, and import timestamp.",
2025-12-20 02:12:45 -08:00
"- Hash and store are taken from piped result or can be overridden with -query/-store flags.",
2025-12-11 12:47:30 -08:00
"- All metadata is retrieved from the storage backend's database (single source of truth).",
],
exec=self.run,
)
self.register()
@staticmethod
def _extract_imported_ts(meta: Dict[str, Any]) -> Optional[int]:
"""Extract an imported timestamp from metadata if available."""
if not isinstance(meta, dict):
return None
# Prefer explicit time_imported if present
explicit = meta.get("time_imported")
if isinstance(explicit, (int, float)):
return int(explicit)
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Try parsing string timestamps
if isinstance(explicit, str):
try:
import datetime as _dt
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
return int(_dt.datetime.fromisoformat(explicit).timestamp())
except Exception:
pass
2025-12-29 17:05:03 -08:00
2025-12-07 00:21:30 -08:00
return None
2025-12-11 12:47:30 -08:00
@staticmethod
def _format_imported(ts: Optional[int]) -> str:
"""Format timestamp as readable string."""
if not ts:
return ""
2025-12-07 00:21:30 -08:00
try:
2025-12-11 12:47:30 -08:00
import datetime as _dt
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
return _dt.datetime.utcfromtimestamp(ts).strftime("%Y-%m-%d %H:%M:%S")
2025-12-07 00:21:30 -08:00
except Exception:
2025-12-11 12:47:30 -08:00
return ""
@staticmethod
2025-12-29 17:05:03 -08:00
def _build_table_row(
title: str,
store: str,
path: str,
mime: str,
size_bytes: Optional[int],
dur_seconds: Optional[int],
imported_ts: Optional[int],
url: list[str],
hash_value: Optional[str],
pages: Optional[int] = None,
) -> Dict[str, Any]:
2025-12-11 12:47:30 -08:00
"""Build a table row dict with metadata fields."""
size_mb = None
2025-12-16 23:23:43 -08:00
size_int: Optional[int] = None
if size_bytes is not None:
try:
size_int = int(size_bytes)
except Exception:
size_int = None
if isinstance(size_int, int):
2025-12-11 12:47:30 -08:00
try:
2025-12-16 23:23:43 -08:00
size_mb = int(size_int / (1024 * 1024))
2025-12-11 12:47:30 -08:00
except Exception:
size_mb = None
dur_int = int(dur_seconds) if isinstance(dur_seconds, (int, float)) else None
pages_int = int(pages) if isinstance(pages, (int, float)) else None
imported_label = Get_Metadata._format_imported(imported_ts)
duration_label = "Duration(s)"
duration_value = str(dur_int) if dur_int is not None else ""
if mime and mime.lower().startswith("application/pdf"):
duration_label = "Pages"
duration_value = str(pages_int) if pages_int is not None else ""
columns = [
("Title", title or ""),
("Hash", hash_value or ""),
("MIME", mime or ""),
("Size(MB)", str(size_mb) if size_mb is not None else ""),
(duration_label, duration_value),
("Imported", imported_label),
2025-12-11 19:04:02 -08:00
("Store", store or ""),
2025-12-11 12:47:30 -08:00
]
return {
"title": title or path,
"path": path,
2025-12-11 19:04:02 -08:00
"store": store,
2025-12-11 12:47:30 -08:00
"mime": mime,
2025-12-16 23:23:43 -08:00
"size_bytes": size_int,
2025-12-11 12:47:30 -08:00
"duration_seconds": dur_int,
"pages": pages_int,
"imported_ts": imported_ts,
"imported": imported_label,
"hash": hash_value,
"url": url,
"columns": columns,
}
@staticmethod
def _add_table_body_row(table: ResultTable, row: Dict[str, Any]) -> None:
"""Add a single row to the ResultTable using the prepared columns."""
columns = row.get("columns") if isinstance(row, dict) else None
lookup: Dict[str, Any] = {}
if isinstance(columns, list):
for col in columns:
if isinstance(col, tuple) and len(col) == 2:
label, value = col
lookup[str(label)] = value
row_obj = table.add_row()
row_obj.add_column("Hash", lookup.get("Hash", ""))
row_obj.add_column("MIME", lookup.get("MIME", ""))
row_obj.add_column("Size(MB)", lookup.get("Size(MB)", ""))
if "Duration(s)" in lookup:
row_obj.add_column("Duration(s)", lookup.get("Duration(s)", ""))
elif "Pages" in lookup:
row_obj.add_column("Pages", lookup.get("Pages", ""))
2025-11-25 20:09:33 -08:00
else:
2025-12-11 12:47:30 -08:00
row_obj.add_column("Duration(s)", "")
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Main execution entry point."""
# Parse arguments
parsed = parse_cmdlet_args(args, self)
2025-12-20 02:12:45 -08:00
query_hash = sh.parse_single_hash_query(parsed.get("query"))
if parsed.get("query") and not query_hash:
2025-12-29 17:05:03 -08:00
log('No hash available - use -query "hash:<sha256>"', file=sys.stderr)
2025-12-20 02:12:45 -08:00
return 1
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Get hash and store from parsed args or result
2025-12-20 02:12:45 -08:00
file_hash = query_hash or get_field(result, "hash")
2025-12-11 19:04:02 -08:00
storage_source = parsed.get("store") or get_field(result, "store")
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
if not file_hash:
2025-12-29 17:05:03 -08:00
log('No hash available - use -query "hash:<sha256>"', file=sys.stderr)
2025-12-11 12:47:30 -08:00
return 1
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
if not storage_source:
log("No storage backend specified - use -store to specify", file=sys.stderr)
return 1
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Use storage backend to get metadata
2025-11-25 20:09:33 -08:00
try:
2025-12-11 19:04:02 -08:00
from Store import Store
2025-12-29 17:05:03 -08:00
2025-12-11 19:04:02 -08:00
storage = Store(config)
2025-12-11 12:47:30 -08:00
backend = storage[storage_source]
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Get metadata from backend
metadata = backend.get_metadata(file_hash)
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
if not metadata:
2025-12-29 17:05:03 -08:00
log(
f"No metadata found for hash {file_hash[:8]}... in {storage_source}",
file=sys.stderr,
)
2025-12-11 12:47:30 -08:00
return 1
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Extract title from tags if available
title = get_field(result, "title") or file_hash[:16]
if not get_field(result, "title"):
# Try to get title from tags
try:
tags, _ = backend.get_tag(file_hash)
for tag in tags:
if tag.lower().startswith("title:"):
title = tag.split(":", 1)[1]
break
except Exception:
pass
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Extract metadata fields
mime_type = metadata.get("mime") or metadata.get("ext", "")
file_size = metadata.get("size")
duration_seconds = metadata.get("duration")
2025-12-14 00:53:52 -08:00
if duration_seconds is None:
duration_seconds = metadata.get("duration_seconds")
if duration_seconds is None:
duration_seconds = metadata.get("length")
if duration_seconds is None and isinstance(metadata.get("duration_ms"), (int, float)):
try:
duration_seconds = float(metadata["duration_ms"]) / 1000.0
except Exception:
duration_seconds = None
if isinstance(duration_seconds, str):
s = duration_seconds.strip()
if s:
try:
duration_seconds = float(s)
except ValueError:
if ":" in s:
parts = [p.strip() for p in s.split(":") if p.strip()]
if len(parts) in {2, 3} and all(p.isdigit() for p in parts):
nums = [int(p) for p in parts]
if len(nums) == 2:
duration_seconds = float(nums[0] * 60 + nums[1])
else:
2025-12-29 17:05:03 -08:00
duration_seconds = float(
nums[0] * 3600 + nums[1] * 60 + nums[2]
)
2025-12-14 00:53:52 -08:00
else:
duration_seconds = None
2025-12-11 12:47:30 -08:00
pages = metadata.get("pages")
url = metadata.get("url") or []
imported_ts = self._extract_imported_ts(metadata)
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Normalize url
if isinstance(url, str):
try:
url = json.loads(url)
except (json.JSONDecodeError, TypeError):
url = []
if not isinstance(url, list):
url = []
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
# Build display row
row = self._build_table_row(
title=title,
2025-12-11 19:04:02 -08:00
store=storage_source,
path=metadata.get("path", ""),
2025-12-11 12:47:30 -08:00
mime=mime_type,
size_bytes=file_size,
dur_seconds=duration_seconds,
imported_ts=imported_ts,
url=url,
hash_value=file_hash,
pages=pages,
)
2025-12-29 17:05:03 -08:00
2025-12-16 23:23:43 -08:00
table_title = f"get-metadata: {title}" if title else "get-metadata"
table = ResultTable(table_title).init_command(table_title, "get-metadata", list(args))
2025-12-11 12:47:30 -08:00
self._add_table_body_row(table, row)
ctx.set_last_result_table_overlay(table, [row], row)
ctx.emit(row)
return 0
2025-12-29 17:05:03 -08:00
2025-12-11 12:47:30 -08:00
except KeyError:
log(f"Storage backend '{storage_source}' not found", file=sys.stderr)
return 1
except Exception as exc:
log(f"Failed to get metadata: {exc}", file=sys.stderr)
return 1
2025-11-25 20:09:33 -08:00
2025-12-11 12:47:30 -08:00
CMDLET = Get_Metadata()