Files
Medios-Macina/cmdlets/get_url.py
2025-12-01 01:10:16 -08:00

140 lines
5.3 KiB
Python

from __future__ import annotations
from typing import Any, Dict, Sequence
import json
import sys
from pathlib import Path
from . import register
import models
import pipeline as ctx
from helper import hydrus as hydrus_wrapper
from ._shared import Cmdlet, CmdletArg, normalize_hash
from helper.logger import log
from config import get_local_storage_path
from helper.local_library import LocalLibraryDB
CMDLET = Cmdlet(
name="get-url",
summary="List URLs associated with a file (Hydrus or Local).",
usage="get-url [-hash <sha256>]",
args=[
CmdletArg("-hash", description="Override the Hydrus file hash (SHA256) to target instead of the selected result."),
],
details=[
"- Prints the known URLs for the selected file.",
],
)
def _parse_hash_and_rest(args: Sequence[str]) -> tuple[str | None, list[str]]:
override_hash: str | None = None
rest: list[str] = []
i = 0
while i < len(args):
a = args[i]
low = str(a).lower()
if low in {"-hash", "--hash", "hash"} and i + 1 < len(args):
override_hash = str(args[i + 1]).strip()
i += 2
continue
rest.append(a)
i += 1
return override_hash, rest
@register(["get-url", "get-urls", "get_url"]) # aliases
def get_urls(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Helper to get field from both dict and object
def get_field(obj: Any, field: str, default: Any = None) -> Any:
if isinstance(obj, dict):
return obj.get(field, default)
else:
return getattr(obj, field, default)
# Help
try:
if any(str(a).lower() in {"-?", "/?", "--help", "-h", "help", "--cmdlet"} for a in args):
log(json.dumps(CMDLET, ensure_ascii=False, indent=2))
return 0
except Exception:
pass
override_hash, _ = _parse_hash_and_rest(args)
# Handle @N selection which creates a list - extract the first item
if isinstance(result, list) and len(result) > 0:
result = result[0]
found_urls = []
# 1. Try Local Library
file_path = get_field(result, "file_path") or get_field(result, "path")
if file_path and not override_hash:
try:
path_obj = Path(file_path)
if path_obj.exists():
storage_path = get_local_storage_path(config)
if storage_path:
with LocalLibraryDB(storage_path) as db:
metadata = db.get_metadata(path_obj)
if metadata and metadata.get("known_urls"):
found_urls.extend(metadata["known_urls"])
except Exception as e:
log(f"Error checking local library: {e}", file=sys.stderr)
# 2. Try Hydrus
hash_hex = normalize_hash(override_hash) if override_hash else normalize_hash(get_field(result, "hash_hex", None))
# If we haven't found URLs yet, or if we want to merge them (maybe?), let's check Hydrus if we have a hash
# But usually if it's local, we might not want to check Hydrus unless requested.
# However, the user said "they can just work together".
if hash_hex:
try:
client = hydrus_wrapper.get_client(config)
if client:
payload = client.fetch_file_metadata(hashes=[hash_hex], include_file_urls=True)
items = payload.get("metadata") if isinstance(payload, dict) else None
meta = items[0] if (isinstance(items, list) and items and isinstance(items[0], dict)) else None
hydrus_urls = (meta.get("known_urls") if isinstance(meta, dict) else None) or []
for u in hydrus_urls:
if u not in found_urls:
found_urls.append(u)
except Exception as exc:
# Only log error if we didn't find local URLs either, or if it's a specific error
if not found_urls:
log(f"Hydrus lookup failed: {exc}", file=sys.stderr)
if found_urls:
for u in found_urls:
text = str(u).strip()
if text:
# Emit a rich object that looks like a string but carries context
# We use a dict with 'title' which ResultTable uses for display
# and 'url' which is the actual data
# We also include the source file info so downstream cmdlets can use it
# Create a result object that mimics the structure expected by delete-url
# delete-url expects a file object usually, but here we are emitting URLs.
# If we emit a dict with 'url' and 'source_file', delete-url can use it.
rich_result = {
"title": text, # Display as just the URL
"url": text,
"source_file": result, # Pass the original file context
"file_path": get_field(result, "file_path") or get_field(result, "path"),
"hash_hex": hash_hex
}
ctx.emit(rich_result)
return 0
if not hash_hex and not file_path:
log("Selected result does not include a file path or Hydrus hash", file=sys.stderr)
return 1
ctx.emit("No URLs found.")
return 0