Files
Medios-Macina/cmdlets/add_tag.py
2025-12-11 12:47:30 -08:00

567 lines
28 KiB
Python

from __future__ import annotations
from typing import Any, Dict, List, Sequence, Optional
from pathlib import Path
import sys
from helper.logger import log
import models
import pipeline as ctx
from ._shared import normalize_result_input, filter_results_by_temp
from helper import hydrus as hydrus_wrapper
from helper.folder_store import write_sidecar, FolderDB
from ._shared import Cmdlet, CmdletArg, SharedArgs, normalize_hash, parse_tag_arguments, expand_tag_groups, parse_cmdlet_args, collapse_namespace_tags, should_show_help, get_field
from config import get_local_storage_path
class Add_Tag(Cmdlet):
"""Class-based add-tag cmdlet with Cmdlet metadata inheritance."""
def __init__(self) -> None:
super().__init__(
name="add-tag",
summary="Add a tag to a Hydrus file or write it to a local .tags sidecar.",
usage="add-tag [-hash <sha256>] [-store <backend>] [-duplicate <format>] [-list <list>[,<list>...]] [--all] <tag>[,<tag>...]",
arg=[
SharedArgs.HASH,
SharedArgs.STORE,
CmdletArg("-duplicate", type="string", description="Copy existing tag values to new namespaces. Formats: title:album,artist (explicit) or title,album,artist (inferred)"),
CmdletArg("-list", type="string", description="Load predefined tag lists from adjective.json. Comma-separated list names (e.g., -list philosophy,occult)."),
CmdletArg("--all", type="flag", description="Include temporary files in tagging (by default, only tags non-temporary files)."),
CmdletArg("tags", type="string", required=False, description="One or more tags to add. Comma- or space-separated. Can also use {list_name} syntax. If omitted, uses tags from pipeline payload.", variadic=True),
],
detail=[
"- By default, only tags non-temporary files (from pipelines). Use --all to tag everything.",
"- Without -hash and when the selection is a local file, tags are written to <file>.tags.",
"- With a Hydrus hash, tags are sent to the 'my tags' service.",
"- Multiple tags can be comma-separated or space-separated.",
"- Use -list to include predefined tag lists from adjective.json: -list philosophy,occult",
"- Tags can also reference lists with curly braces: add-tag {philosophy} \"other:tag\"",
"- Use -duplicate to copy EXISTING tag values to new namespaces:",
" Explicit format: -duplicate title:album,artist (copies title: to album: and artist:)",
" Inferred format: -duplicate title,album,artist (first is source, rest are targets)",
"- The source namespace must already exist in the file being tagged.",
"- Target namespaces that already have a value are skipped (not overwritten).",
"- You can also pass the target hash as a tag token: hash:<sha256>. This overrides -hash and is removed from the tag list.",
],
exec=self.run,
)
self.register()
@staticmethod
def _extract_title_tag(tags: List[str]) -> Optional[str]:
"""Return the value of the first title: tag if present."""
for tag in tags:
if isinstance(tag, str) and tag.lower().startswith("title:"):
value = tag.split(":", 1)[1].strip()
if value:
return value
return None
@staticmethod
def _apply_title_to_result(res: Any, title_value: Optional[str]) -> None:
"""Update result object/dict title fields and columns in-place."""
if not title_value:
return
if isinstance(res, models.PipeObject):
res.title = title_value
if hasattr(res, "columns") and isinstance(res.columns, list) and res.columns:
label, *_ = res.columns[0]
if str(label).lower() == "title":
res.columns[0] = (res.columns[0][0], title_value)
elif isinstance(res, dict):
res["title"] = title_value
cols = res.get("columns")
if isinstance(cols, list):
updated = []
changed = False
for col in cols:
if isinstance(col, tuple) and len(col) == 2:
label, val = col
if str(label).lower() == "title":
updated.append((label, title_value))
changed = True
else:
updated.append(col)
else:
updated.append(col)
if changed:
res["columns"] = updated
@staticmethod
def _matches_target(item: Any, hydrus_hash: Optional[str], file_hash: Optional[str], file_path: Optional[str]) -> bool:
"""Determine whether a result item refers to the given hash/path target."""
hydrus_hash_l = hydrus_hash.lower() if hydrus_hash else None
file_hash_l = file_hash.lower() if file_hash else None
file_path_l = file_path.lower() if file_path else None
def norm(val: Any) -> Optional[str]:
return str(val).lower() if val is not None else None
hash_fields = ["hydrus_hash", "hash", "hash_hex", "file_hash"]
path_fields = ["path", "file_path", "target"]
if isinstance(item, dict):
hashes = [norm(item.get(field)) for field in hash_fields]
paths = [norm(item.get(field)) for field in path_fields]
else:
hashes = [norm(get_field(item, field)) for field in hash_fields]
paths = [norm(get_field(item, field)) for field in path_fields]
if hydrus_hash_l and hydrus_hash_l in hashes:
return True
if file_hash_l and file_hash_l in hashes:
return True
if file_path_l and file_path_l in paths:
return True
return False
@staticmethod
def _update_item_title_fields(item: Any, new_title: str) -> None:
"""Mutate an item to reflect a new title in plain fields and columns."""
if isinstance(item, models.PipeObject):
item.title = new_title
if hasattr(item, "columns") and isinstance(item.columns, list) and item.columns:
label, *_ = item.columns[0]
if str(label).lower() == "title":
item.columns[0] = (label, new_title)
elif isinstance(item, dict):
item["title"] = new_title
cols = item.get("columns")
if isinstance(cols, list):
updated_cols = []
changed = False
for col in cols:
if isinstance(col, tuple) and len(col) == 2:
label, val = col
if str(label).lower() == "title":
updated_cols.append((label, new_title))
changed = True
else:
updated_cols.append(col)
else:
updated_cols.append(col)
if changed:
item["columns"] = updated_cols
def _refresh_result_table_title(self, new_title: str, hydrus_hash: Optional[str], file_hash: Optional[str], file_path: Optional[str]) -> None:
"""Refresh the cached result table with an updated title and redisplay it."""
try:
last_table = ctx.get_last_result_table()
items = ctx.get_last_result_items()
if not last_table or not items:
return
updated_items = []
match_found = False
for item in items:
try:
if self._matches_target(item, hydrus_hash, file_hash, file_path):
self._update_item_title_fields(item, new_title)
match_found = True
except Exception:
pass
updated_items.append(item)
if not match_found:
return
from result_table import ResultTable # Local import to avoid circular dependency
new_table = last_table.copy_with_title(getattr(last_table, "title", ""))
for item in updated_items:
new_table.add_result(item)
ctx.set_last_result_table_overlay(new_table, updated_items)
except Exception:
pass
def _refresh_tags_view(self, res: Any, hydrus_hash: Optional[str], file_hash: Optional[str], file_path: Optional[str], config: Dict[str, Any]) -> None:
"""Refresh tag display via get-tag. Prefer current subject; fall back to direct hash refresh."""
try:
from cmdlets import get_tag as get_tag_cmd # type: ignore
except Exception:
return
target_hash = hydrus_hash or file_hash
refresh_args: List[str] = []
if target_hash:
refresh_args = ["-hash", target_hash, "-store", target_hash]
try:
subject = ctx.get_last_result_subject()
if subject and self._matches_target(subject, hydrus_hash, file_hash, file_path):
get_tag_cmd._run(subject, refresh_args, config)
return
except Exception:
pass
if target_hash:
try:
get_tag_cmd._run(res, refresh_args, config)
except Exception:
pass
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Add a tag to a file with smart filtering for pipeline results."""
if should_show_help(args):
log(f"Cmdlet: {self.name}\nSummary: {self.summary}\nUsage: {self.usage}")
return 0
parsed = parse_cmdlet_args(args, self)
# Check for --all flag
include_temp = parsed.get("all", False)
# Get explicit -hash and -store overrides from CLI
hash_override = normalize_hash(parsed.get("hash"))
store_override = parsed.get("store") or parsed.get("storage")
# Normalize input to list
results = normalize_result_input(result)
# If no piped results but we have -hash flag, create a minimal synthetic result
if not results and hash_override:
results = [{"hash": hash_override, "is_temp": False}]
if store_override:
results[0]["store"] = store_override
# Filter by temp status (unless --all is set)
if not include_temp:
results = filter_results_by_temp(results, include_temp=False)
if not results:
log("No valid files to tag (all results were temporary; use --all to include temporary files)", file=sys.stderr)
return 1
# Get tags from arguments (or fallback to pipeline payload)
raw_tags = parsed.get("tags", [])
if isinstance(raw_tags, str):
raw_tags = [raw_tags]
# Fallback: if no tags provided explicitly, try to pull from first result payload
if not raw_tags and results:
first = results[0]
payload_tags = None
# Try multiple tag lookup strategies in order
tag_lookups = [
lambda x: x.extra.get("tags") if isinstance(x, models.PipeObject) and isinstance(x.extra, dict) else None,
lambda x: x.get("tags") if isinstance(x, dict) else None,
lambda x: x.get("extra", {}).get("tags") if isinstance(x, dict) and isinstance(x.get("extra"), dict) else None,
lambda x: getattr(x, "tags", None),
]
for lookup in tag_lookups:
try:
payload_tags = lookup(first)
if payload_tags:
break
except (AttributeError, TypeError, KeyError):
continue
if payload_tags:
if isinstance(payload_tags, str):
raw_tags = [payload_tags]
elif isinstance(payload_tags, list):
raw_tags = payload_tags
# Handle -list argument (convert to {list} syntax)
list_arg = parsed.get("list")
if list_arg:
for l in list_arg.split(','):
l = l.strip()
if l:
raw_tags.append(f"{{{l}}}")
# Parse and expand tags
tags_to_add = parse_tag_arguments(raw_tags)
tags_to_add = expand_tag_groups(tags_to_add)
# Allow hash override via namespaced token (e.g., "hash:abcdef...")
extracted_hash = None
filtered_tags: List[str] = []
for tag in tags_to_add:
if isinstance(tag, str) and tag.lower().startswith("hash:"):
_, _, hash_val = tag.partition(":")
if hash_val:
extracted_hash = normalize_hash(hash_val.strip())
continue
filtered_tags.append(tag)
tags_to_add = filtered_tags
if not tags_to_add:
log("No tags provided to add", file=sys.stderr)
return 1
def _find_library_root(path_obj: Path) -> Optional[Path]:
candidates = []
cfg_root = get_local_storage_path(config) if config else None
if cfg_root:
try:
candidates.append(Path(cfg_root).expanduser())
except Exception:
pass
try:
for candidate in candidates:
if (candidate / "medios-macina.db").exists():
return candidate
for parent in [path_obj] + list(path_obj.parents):
if (parent / "medios-macina.db").exists():
return parent
except Exception:
pass
return None
# Get other flags
duplicate_arg = parsed.get("duplicate")
if not tags_to_add and not duplicate_arg:
# Write sidecar files with the tags that are already in the result dicts
sidecar_count = 0
for res in results:
# Handle both dict and PipeObject formats
file_path = None
tags = []
file_hash = ""
# Use canonical field access with get_field for both dict and objects
file_path = get_field(res, "path")
# Try tags from top-level 'tags' or from 'extra.tags'
tags = get_field(res, "tags") or (get_field(res, "extra") or {}).get("tags", [])
file_hash = get_field(res, "hash") or get_field(res, "file_hash") or get_field(res, "hash_hex") or ""
if not file_path:
log(f"[add_tag] Warning: Result has no path, skipping", file=sys.stderr)
ctx.emit(res)
continue
if tags:
# Write sidecar file for this file with its tags
try:
sidecar_path = write_sidecar(Path(file_path), tags, [], file_hash)
log(f"[add_tag] Wrote {len(tags)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr)
sidecar_count += 1
except Exception as e:
log(f"[add_tag] Warning: Failed to write sidecar for {file_path}: {e}", file=sys.stderr)
ctx.emit(res)
if sidecar_count > 0:
log(f"[add_tag] Wrote {sidecar_count} sidecar file(s) with embedded tags", file=sys.stderr)
else:
log(f"[add_tag] No tags to write - passed {len(results)} result(s) through unchanged", file=sys.stderr)
return 0
# Main loop: process results with tags to add
total_new_tags = 0
total_modified = 0
for res in results:
# Extract file info from result
file_path = None
existing_tags = []
file_hash = ""
storage_source = None
# Use canonical getters for fields from both dicts and PipeObject
file_path = get_field(res, "path")
existing_tags = get_field(res, "tags") or []
if not existing_tags:
existing_tags = (get_field(res, "extra", {}) or {}).get("tags") or []
file_hash = get_field(res, "hash") or get_field(res, "file_hash") or get_field(res, "hash_hex") or ""
storage_source = get_field(res, "store") or get_field(res, "storage") or get_field(res, "storage_source") or get_field(res, "origin")
hydrus_hash = get_field(res, "hydrus_hash") or file_hash
# Infer storage source from result if not found
if not storage_source:
if file_path:
storage_source = 'local'
elif file_hash and file_hash != "unknown":
storage_source = 'hydrus'
original_tags_lower = {str(t).lower() for t in existing_tags if isinstance(t, str)}
original_title = self._extract_title_tag(list(existing_tags))
# Apply CLI overrides if provided
if hash_override and not file_hash:
file_hash = hash_override
if store_override and not storage_source:
storage_source = store_override
# Check if we have sufficient identifier (file_path OR file_hash)
if not file_path and not file_hash:
log(f"[add_tag] Warning: Result has neither path nor hash available, skipping", file=sys.stderr)
ctx.emit(res)
continue
# Handle -duplicate logic (copy existing tags to new namespaces)
if duplicate_arg:
# Parse duplicate format: source:target1,target2 or source,target1,target2
parts = duplicate_arg.split(':')
source_ns = ""
targets = []
if len(parts) > 1:
# Explicit format: source:target1,target2
source_ns = parts[0]
targets = parts[1].split(',')
else:
# Inferred format: source,target1,target2
parts = duplicate_arg.split(',')
if len(parts) > 1:
source_ns = parts[0]
targets = parts[1:]
if source_ns and targets:
# Find tags in source namespace
source_tags = [t for t in existing_tags if t.startswith(source_ns + ':')]
for t in source_tags:
value = t.split(':', 1)[1]
for target_ns in targets:
new_tag = f"{target_ns}:{value}"
if new_tag not in existing_tags and new_tag not in tags_to_add:
tags_to_add.append(new_tag)
# Initialize tag mutation tracking local variables
removed_tags = []
new_tags_added = []
final_tags = list(existing_tags) if existing_tags else []
# Determine where to add tags: Hydrus or Folder storage
if storage_source and storage_source.lower() == 'hydrus':
# Add tags to Hydrus using the API
target_hash = file_hash
if target_hash:
try:
hydrus_client = hydrus_wrapper.get_client(config)
service_name = hydrus_wrapper.get_tag_service_name(config)
# For namespaced tags, remove old tags in same namespace
removed_tags = []
for new_tag in tags_to_add:
if ':' in new_tag:
namespace = new_tag.split(':', 1)[0]
to_remove = [t for t in existing_tags if t.startswith(namespace + ':') and t.lower() != new_tag.lower()]
removed_tags.extend(to_remove)
# Add new tags
if tags_to_add:
log(f"[add_tag] Adding {len(tags_to_add)} tag(s) to Hydrus file: {target_hash}", file=sys.stderr)
hydrus_client.add_tags(target_hash, tags_to_add, service_name)
# Delete replaced namespace tags
if removed_tags:
unique_removed = sorted(set(removed_tags))
hydrus_client.delete_tags(target_hash, unique_removed, service_name)
if tags_to_add or removed_tags:
total_new_tags += len(tags_to_add)
total_modified += 1
log(f"[add_tag] ✓ Added {len(tags_to_add)} tag(s) to Hydrus", file=sys.stderr)
# Refresh final tag list from the backend for accurate display
try:
from helper.store import FileStorage
storage = FileStorage(config)
if storage and storage_source in storage.list_backends():
backend = storage[storage_source]
refreshed_tags, _ = backend.get_tag(target_hash)
if refreshed_tags is not None:
final_tags = refreshed_tags
new_tags_added = [t for t in refreshed_tags if t.lower() not in original_tags_lower]
# Update result tags for downstream cmdlets/UI
if isinstance(res, models.PipeObject):
res.tags = refreshed_tags
if isinstance(res.extra, dict):
res.extra['tags'] = refreshed_tags
elif isinstance(res, dict):
res['tags'] = refreshed_tags
except Exception:
# Ignore failures - this is best-effort for refreshing tag state
pass
except Exception as e:
log(f"[add_tag] Warning: Failed to add tags to Hydrus: {e}", file=sys.stderr)
else:
log(f"[add_tag] Warning: No hash available for Hydrus file, skipping", file=sys.stderr)
elif storage_source:
# For any Folder-based storage (local, test, default, etc.), delegate to backend
# If storage_source is not a registered backend, fallback to writing a sidecar
from helper.store import FileStorage
storage = FileStorage(config)
try:
if storage and storage_source in storage.list_backends():
backend = storage[storage_source]
if file_hash and backend.add_tag(file_hash, tags_to_add):
# Refresh tags from backend to get merged result
refreshed_tags, _ = backend.get_tag(file_hash)
if refreshed_tags:
# Update result tags
if isinstance(res, models.PipeObject):
res.tags = refreshed_tags
# Also keep as extra for compatibility
if isinstance(res.extra, dict):
res.extra['tags'] = refreshed_tags
elif isinstance(res, dict):
res['tags'] = refreshed_tags
# Update title if changed
title_value = self._extract_title_tag(refreshed_tags)
self._apply_title_to_result(res, title_value)
# Compute stats
new_tags_added = [t for t in refreshed_tags if t.lower() not in original_tags_lower]
total_new_tags += len(new_tags_added)
if new_tags_added:
total_modified += 1
log(f"[add_tag] Added {len(new_tags_added)} new tag(s); {len(refreshed_tags)} total tag(s) stored in {storage_source}", file=sys.stderr)
final_tags = refreshed_tags
else:
log(f"[add_tag] Warning: Failed to add tags to {storage_source}", file=sys.stderr)
else:
# Not a registered backend - fallback to sidecar if we have a path
if file_path:
try:
sidecar_path = write_sidecar(Path(file_path), tags_to_add, [], file_hash)
log(f"[add_tag] Wrote {len(tags_to_add)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr)
total_new_tags += len(tags_to_add)
total_modified += 1
# Update res tags
if isinstance(res, models.PipeObject):
res.tags = (res.tags or []) + tags_to_add
if isinstance(res.extra, dict):
res.extra['tags'] = res.tags
elif isinstance(res, dict):
res['tags'] = list(set((res.get('tags') or []) + tags_to_add))
except Exception as exc:
log(f"[add_tag] Warning: Failed to write sidecar for {file_path}: {exc}", file=sys.stderr)
else:
log(f"[add_tag] Warning: Storage backend '{storage_source}' not found in config", file=sys.stderr)
except KeyError:
# storage[storage_source] raised KeyError - treat as absent backend
if file_path:
try:
sidecar_path = write_sidecar(Path(file_path), tags_to_add, [], file_hash)
log(f"[add_tag] Wrote {len(tags_to_add)} tag(s) to sidecar: {sidecar_path}", file=sys.stderr)
total_new_tags += len(tags_to_add)
total_modified += 1
# Update res tags for downstream
if isinstance(res, models.PipeObject):
res.tags = (res.tags or []) + tags_to_add
if isinstance(res.extra, dict):
res.extra['tags'] = res.tags
elif isinstance(res, dict):
res['tags'] = list(set((res.get('tags') or []) + tags_to_add))
except Exception as exc:
log(f"[add_tag] Warning: Failed to write sidecar for {file_path}: {exc}", file=sys.stderr)
else:
log(f"[add_tag] Warning: Storage backend '{storage_source}' not found in config", file=sys.stderr)
else:
# For other storage types or unknown sources, avoid writing sidecars to reduce clutter
# (local/hydrus are handled above).
ctx.emit(res)
continue
# If title changed, refresh the cached result table so the display reflects the new name
final_title = self._extract_title_tag(final_tags)
if final_title and (not original_title or final_title.lower() != original_title.lower()):
self._refresh_result_table_title(final_title, hydrus_hash or file_hash, file_hash, file_path)
# If tags changed, refresh tag view via get-tag (prefer current subject; fall back to hash refresh)
if new_tags_added or removed_tags:
self._refresh_tags_view(res, hydrus_hash, file_hash, file_path, config)
# Emit the modified result
ctx.emit(res)
log(f"[add_tag] Added {total_new_tags} new tag(s) across {len(results)} item(s); modified {total_modified} item(s)", file=sys.stderr)
return 0
CMDLET = Add_Tag()