This commit is contained in:
nose
2025-12-11 12:47:30 -08:00
parent 6b05dc5552
commit 65d12411a2
92 changed files with 17447 additions and 14308 deletions

View File

@@ -20,8 +20,8 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
import pipeline as ctx
from helper import hydrus
from helper.local_library import read_sidecar, write_sidecar, find_sidecar, LocalLibraryDB
from ._shared import normalize_hash, looks_like_hash, Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args
from helper.folder_store import read_sidecar, write_sidecar, find_sidecar, FolderDB
from ._shared import normalize_hash, looks_like_hash, Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args, get_field
from config import get_local_storage_path
@@ -71,33 +71,6 @@ class TagItem:
}
def _extract_my_tags_from_hydrus_meta(meta: Dict[str, Any], service_key: Optional[str], service_name: str) -> List[str]:
"""Extract current tags from Hydrus metadata dict.
Prefers display_tags (includes siblings/parents, excludes deleted).
Falls back to storage_tags status '0' (current).
"""
tags_payload = meta.get("tags")
if not isinstance(tags_payload, dict):
return []
svc_data = None
if service_key:
svc_data = tags_payload.get(service_key)
if not isinstance(svc_data, dict):
return []
# Prefer display_tags (Hydrus computes siblings/parents)
display = svc_data.get("display_tags")
if isinstance(display, list) and display:
return [str(t) for t in display if isinstance(t, (str, bytes)) and str(t).strip()]
# Fallback to storage_tags status '0' (current)
storage = svc_data.get("storage_tags")
if isinstance(storage, dict):
current_list = storage.get("0") or storage.get(0)
if isinstance(current_list, list):
return [str(t) for t in current_list if isinstance(t, (str, bytes)) and str(t).strip()]
return []
def _emit_tags_as_table(
tags_list: List[str],
hash_hex: Optional[str],
@@ -316,12 +289,12 @@ def _read_sidecar_fallback(p: Path) -> tuple[Optional[str], List[str], List[str]
Format:
- Lines with "hash:" prefix: file hash
- Lines with "known_url:" or "url:" prefix: URLs
- Lines with "url:" or "url:" prefix: url
- Lines with "relationship:" prefix: ignored (internal relationships)
- Lines with "key:", "namespace:value" format: treated as namespace tags
- Plain lines without colons: freeform tags
Excluded namespaces (treated as metadata, not tags): hash, known_url, url, relationship
Excluded namespaces (treated as metadata, not tags): hash, url, url, relationship
"""
try:
raw = p.read_text(encoding="utf-8", errors="ignore")
@@ -332,7 +305,7 @@ def _read_sidecar_fallback(p: Path) -> tuple[Optional[str], List[str], List[str]
h: Optional[str] = None
# Namespaces to exclude from tags
excluded_namespaces = {"hash", "known_url", "url", "relationship"}
excluded_namespaces = {"hash", "url", "url", "relationship"}
for line in raw.splitlines():
s = line.strip()
@@ -344,7 +317,7 @@ def _read_sidecar_fallback(p: Path) -> tuple[Optional[str], List[str], List[str]
if low.startswith("hash:"):
h = s.split(":", 1)[1].strip() if ":" in s else h
# Check if this is a URL line
elif low.startswith("known_url:") or low.startswith("url:"):
elif low.startswith("url:") or low.startswith("url:"):
val = s.split(":", 1)[1].strip() if ":" in s else ""
if val:
u.append(val)
@@ -361,12 +334,12 @@ def _read_sidecar_fallback(p: Path) -> tuple[Optional[str], List[str], List[str]
return h, t, u
def _write_sidecar(p: Path, media: Path, tag_list: List[str], known_urls: List[str], hash_in_sidecar: Optional[str]) -> Path:
def _write_sidecar(p: Path, media: Path, tag_list: List[str], url: List[str], hash_in_sidecar: Optional[str]) -> Path:
"""Write tags to sidecar file and handle title-based renaming.
Returns the new media path if renamed, otherwise returns the original media path.
"""
success = write_sidecar(media, tag_list, known_urls, hash_in_sidecar)
success = write_sidecar(media, tag_list, url, hash_in_sidecar)
if success:
_apply_result_updates_from_tags(None, tag_list)
# Check if we should rename the file based on title tag
@@ -381,8 +354,8 @@ def _write_sidecar(p: Path, media: Path, tag_list: List[str], known_urls: List[s
if hash_in_sidecar:
lines.append(f"hash:{hash_in_sidecar}")
lines.extend(ordered)
for u in known_urls:
lines.append(f"known_url:{u}")
for u in url:
lines.append(f"url:{u}")
try:
p.write_text("\n".join(lines) + "\n", encoding="utf-8")
# Check if we should rename the file based on title tag
@@ -414,16 +387,16 @@ def _emit_tag_payload(source: str, tags_list: List[str], *, hash_value: Optional
label = None
if store_label:
label = store_label
elif ctx._PIPE_ACTIVE:
elif ctx.get_stage_context() is not None:
label = "tags"
if label:
ctx.store_value(label, payload)
if ctx._PIPE_ACTIVE and label.lower() != "tags":
if ctx.get_stage_context() is not None and label.lower() != "tags":
ctx.store_value("tags", payload)
# Emit individual TagItem objects so they can be selected by bare index
# When in pipeline, emit individual TagItem objects
if ctx._PIPE_ACTIVE:
if ctx.get_stage_context() is not None:
for idx, tag_name in enumerate(tags_list, start=1):
tag_item = TagItem(
tag_name=tag_name,
@@ -1113,7 +1086,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Try local sidecar if no tags present on result
if not identifier_tags:
file_path = get_field(result, "target", None) or get_field(result, "path", None) or get_field(result, "file_path", None) or get_field(result, "filename", None)
file_path = get_field(result, "target", None) or get_field(result, "path", None) or get_field(result, "filename", None)
if isinstance(file_path, str) and file_path and not file_path.lower().startswith(("http://", "https://")):
try:
media_path = Path(str(file_path))
@@ -1226,103 +1199,35 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
emit_mode = emit_requested or bool(store_key)
store_label = (store_key.strip() if store_key and store_key.strip() else None)
# Check Hydrus availability
hydrus_available, _ = hydrus.is_available(config)
# Get hash and store from result
file_hash = hash_hex
storage_source = get_field(result, "store") or get_field(result, "storage") or get_field(result, "origin")
# Try to find path in result object
local_path = get_field(result, "target", None) or get_field(result, "path", None) or get_field(result, "file_path", None)
if not file_hash:
log("No hash available in result", file=sys.stderr)
return 1
# Determine if local file
is_local_file = False
media: Optional[Path] = None
if local_path and isinstance(local_path, str) and not local_path.startswith(("http://", "https://")):
is_local_file = True
try:
media = Path(str(local_path))
except Exception:
media = None
if not storage_source:
log("No storage backend specified in result", file=sys.stderr)
return 1
# Try Hydrus first (always prioritize if available and has hash)
use_hydrus = False
hydrus_meta = None # Cache the metadata from first fetch
client = None
if hash_hex and hydrus_available:
try:
client = hydrus.get_client(config)
payload = client.fetch_file_metadata(hashes=[str(hash_hex)], include_service_keys_to_tags=True, include_file_urls=False)
items = payload.get("metadata") if isinstance(payload, dict) else None
if isinstance(items, list) and items:
meta = items[0] if isinstance(items[0], dict) else None
# Only accept file if it has a valid file_id (not None)
if isinstance(meta, dict) and meta.get("file_id") is not None:
use_hydrus = True
hydrus_meta = meta # Cache for tag extraction
except Exception:
pass
# Get tags - try Hydrus first, fallback to sidecar
current = []
service_name = ""
service_key = None
source = "unknown"
if use_hydrus and hash_hex and hydrus_meta:
try:
# Use cached metadata from above, don't fetch again
service_name = hydrus.get_tag_service_name(config)
if client is None:
client = hydrus.get_client(config)
service_key = hydrus.get_tag_service_key(client, service_name)
current = _extract_my_tags_from_hydrus_meta(hydrus_meta, service_key, service_name)
source = "hydrus"
except Exception as exc:
log(f"Warning: Failed to extract tags from Hydrus: {exc}", file=sys.stderr)
# Fallback to local sidecar or local DB if no tags
if not current and is_local_file and media and media.exists():
try:
# First try local library DB
library_root = get_local_storage_path(config)
if library_root:
try:
with LocalLibraryDB(library_root) as db:
db_tags = db.get_tags(media)
if db_tags:
current = db_tags
source = "local_db"
except Exception as exc:
log(f"[get_tag] DB lookup failed, trying sidecar: {exc}", file=sys.stderr)
# Fall back to sidecar if DB didn't have tags
if not current:
sidecar_path = find_sidecar(media)
if sidecar_path and sidecar_path.exists():
try:
_, current, _ = read_sidecar(sidecar_path)
except Exception:
_, current, _ = _read_sidecar_fallback(sidecar_path)
if current:
source = "sidecar"
except Exception as exc:
log(f"Warning: Failed to load tags from local storage: {exc}", file=sys.stderr)
# Fallback to tags in the result object if Hydrus/local lookup returned nothing
if not current:
# Check if result has 'tags' attribute (PipeObject)
if hasattr(result, 'tags') and getattr(result, 'tags', None):
current = getattr(result, 'tags')
source = "pipeline_result"
# Check if result is a dict with 'tags' key
elif isinstance(result, dict) and 'tags' in result:
tags_val = result['tags']
if isinstance(tags_val, list):
current = tags_val
source = "pipeline_result"
source = "pipeline_result"
# Error if no tags found
if not current:
log("No tags found", file=sys.stderr)
# Get tags using storage backend
try:
from helper.store import FileStorage
storage = FileStorage(config)
backend = storage[storage_source]
current, source = backend.get_tag(file_hash, config=config)
if not current:
log("No tags found", file=sys.stderr)
return 1
service_name = ""
except KeyError:
log(f"Storage backend '{storage_source}' not found", file=sys.stderr)
return 1
except Exception as exc:
log(f"Failed to get tags: {exc}", file=sys.stderr)
return 1
# Always output to ResultTable (pipeline mode only)
@@ -1383,33 +1288,106 @@ except Exception:
_SCRAPE_CHOICES = ["itunes", "openlibrary", "googlebooks", "google", "musicbrainz"]
CMDLET = Cmdlet(
name="get-tag",
summary="Get tags from Hydrus or local sidecar metadata",
usage="get-tag [-hash <sha256>] [--store <key>] [--emit] [-scrape <url|provider>]",
aliases=["tags"],
args=[
SharedArgs.HASH,
CmdletArg(
name="-store",
type="string",
description="Store result to this key for pipeline",
alias="store"
),
CmdletArg(
name="-emit",
type="flag",
description="Emit result without interactive prompt (quiet mode)",
alias="emit-only"
),
CmdletArg(
name="-scrape",
type="string",
description="Scrape metadata from URL or provider name (returns tags as JSON or table)",
required=False,
choices=_SCRAPE_CHOICES,
)
]
)
class Get_Tag(Cmdlet):
"""Class-based get-tag cmdlet with self-registration."""
def __init__(self) -> None:
"""Initialize get-tag cmdlet."""
super().__init__(
name="get-tag",
summary="Get tags from Hydrus or local sidecar metadata",
usage="get-tag [-hash <sha256>] [--store <key>] [--emit] [-scrape <url|provider>]",
alias=["tags"],
arg=[
SharedArgs.HASH,
CmdletArg(
name="-store",
type="string",
description="Store result to this key for pipeline",
alias="store"
),
CmdletArg(
name="-emit",
type="flag",
description="Emit result without interactive prompt (quiet mode)",
alias="emit-only"
),
CmdletArg(
name="-scrape",
type="string",
description="Scrape metadata from URL or provider name (returns tags as JSON or table)",
required=False,
choices=_SCRAPE_CHOICES,
)
],
detail=[
"- Retrieves tags for a file from:",
" Hydrus: Using file hash if available",
" Local: From sidecar files or local library database",
"- Options:",
" -hash: Override hash to look up in Hydrus",
" -store: Store result to key for downstream pipeline",
" -emit: Quiet mode (no interactive selection)",
" -scrape: Scrape metadata from URL or metadata provider",
],
exec=self.run,
)
self.register()
def run(self, result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
"""Execute get-tag cmdlet."""
# Parse arguments
parsed = parse_cmdlet_args(args, self)
# Get hash and store from parsed args or result
hash_override = parsed.get("hash")
file_hash = hash_override or get_field(result, "hash") or get_field(result, "file_hash") or get_field(result, "hash_hex")
storage_source = parsed.get("store") or get_field(result, "store") or get_field(result, "storage") or get_field(result, "origin")
if not file_hash:
log("No hash available in result", file=sys.stderr)
return 1
if not storage_source:
log("No storage backend specified in result", file=sys.stderr)
return 1
# Get tags using storage backend
try:
from helper.store import FileStorage
storage_obj = FileStorage(config)
backend = storage_obj[storage_source]
current, source = backend.get_tag(file_hash, config=config)
if not current:
log("No tags found", file=sys.stderr)
return 1
# Build table and emit
item_title = get_field(result, "title") or file_hash[:16]
_emit_tags_as_table(
tags_list=current,
hash_hex=file_hash,
source=source,
service_name="",
config=config,
item_title=item_title,
file_path=None,
subject=result,
)
return 0
except KeyError:
log(f"Storage backend '{storage_source}' not found", file=sys.stderr)
return 1
except Exception as exc:
log(f"Failed to get tags: {exc}", file=sys.stderr)
import traceback
traceback.print_exc(file=sys.stderr)
return 1
# Create and register the cmdlet
CMDLET = Get_Tag()