This commit is contained in:
nose
2025-12-06 00:10:19 -08:00
parent 5482ee5586
commit f29709d951
20 changed files with 1353 additions and 419 deletions

View File

@@ -21,7 +21,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
import pipeline as ctx
from helper import hydrus
from helper.local_library import read_sidecar, write_sidecar, find_sidecar, LocalLibraryDB
from ._shared import normalize_hash, Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args
from ._shared import normalize_hash, looks_like_hash, Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args
from config import get_local_storage_path
@@ -105,7 +105,8 @@ def _emit_tags_as_table(
service_name: Optional[str] = None,
config: Dict[str, Any] = None,
item_title: Optional[str] = None,
file_path: Optional[str] = None
file_path: Optional[str] = None,
subject: Optional[Any] = None,
) -> None:
"""Emit tags as TagItem objects and display via ResultTable.
@@ -144,9 +145,9 @@ def _emit_tags_as_table(
# Use overlay mode so it doesn't push the previous search to history stack
# This makes get-tag behave like a transient view
try:
ctx.set_last_result_table_overlay(table, tag_items)
ctx.set_last_result_table_overlay(table, tag_items, subject)
except AttributeError:
ctx.set_last_result_table(table, tag_items)
ctx.set_last_result_table(table, tag_items, subject)
# Note: CLI will handle displaying the table via ResultTable formatting
def _summarize_tags(tags_list: List[str], limit: int = 8) -> str:
"""Create a summary of tags for display."""
@@ -443,7 +444,10 @@ def _emit_tag_payload(source: str, tags_list: List[str], *, hash_value: Optional
def _extract_scrapable_identifiers(tags_list: List[str]) -> Dict[str, str]:
"""Extract scrapable identifiers from tags."""
identifiers = {}
scrapable_prefixes = {'openlibrary', 'isbn_10', 'isbn', 'musicbrainz', 'musicbrainzalbum', 'imdb', 'tmdb', 'tvdb'}
scrapable_prefixes = {
'openlibrary', 'isbn', 'isbn_10', 'isbn_13',
'musicbrainz', 'musicbrainzalbum', 'imdb', 'tmdb', 'tvdb'
}
for tag in tags_list:
if not isinstance(tag, str) or ':' not in tag:
@@ -453,9 +457,18 @@ def _extract_scrapable_identifiers(tags_list: List[str]) -> Dict[str, str]:
if len(parts) != 2:
continue
key = parts[0].strip().lower()
key_raw = parts[0].strip().lower()
key = key_raw.replace('-', '_')
if key == 'isbn10':
key = 'isbn_10'
elif key == 'isbn13':
key = 'isbn_13'
value = parts[1].strip()
# Normalize ISBN values by removing hyphens for API friendliness
if key.startswith('isbn'):
value = value.replace('-', '')
if key in scrapable_prefixes and value:
identifiers[key] = value
@@ -965,8 +978,8 @@ def _perform_scraping(tags_list: List[str]) -> List[str]:
if olid:
log(f"Scraping OpenLibrary: {olid}")
new_tags.extend(_scrape_openlibrary_metadata(olid))
elif 'isbn_10' in identifiers or 'isbn' in identifiers:
isbn = identifiers.get('isbn_10') or identifiers.get('isbn')
elif 'isbn_13' in identifiers or 'isbn_10' in identifiers or 'isbn' in identifiers:
isbn = identifiers.get('isbn_13') or identifiers.get('isbn_10') or identifiers.get('isbn')
if isbn:
log(f"Scraping ISBN: {isbn}")
new_tags.extend(_scrape_isbn_metadata(isbn))
@@ -991,13 +1004,13 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
Usage:
get-tag [-hash <sha256>] [--store <key>] [--emit]
get-tag -scrape <url>
get-tag -scrape <url|provider>
Options:
-hash <sha256>: Override hash to use instead of result's hash_hex
--store <key>: Store result to this key for pipeline
--emit: Emit result without interactive prompt (quiet mode)
-scrape <url>: Scrape metadata from URL (returns tags as JSON)
-scrape <url|provider>: Scrape metadata from URL or provider name (itunes, openlibrary, googlebooks)
"""
# Helper to get field from both dict and object
def get_field(obj: Any, field: str, default: Any = None) -> Any:
@@ -1008,13 +1021,26 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Parse arguments using shared parser
parsed_args = parse_cmdlet_args(args, CMDLET)
# Detect if -scrape flag was provided without a value (parse_cmdlet_args skips missing values)
scrape_flag_present = any(str(arg).lower() in {"-scrape", "--scrape"} for arg in args)
# Extract values
hash_override = normalize_hash(parsed_args.get("hash"))
hash_override_raw = parsed_args.get("hash")
hash_override = normalize_hash(hash_override_raw)
store_key = parsed_args.get("store")
emit_requested = parsed_args.get("emit", False)
scrape_url = parsed_args.get("scrape")
scrape_requested = scrape_url is not None
scrape_requested = scrape_flag_present or scrape_url is not None
if hash_override_raw is not None:
if not hash_override or not looks_like_hash(hash_override):
log("Invalid hash format: expected 64 hex characters", file=sys.stderr)
return 1
if scrape_requested and (not scrape_url or str(scrape_url).strip() == ""):
log("-scrape requires a URL or provider name", file=sys.stderr)
return 1
# Handle URL or provider scraping mode
if scrape_requested and scrape_url:
@@ -1041,18 +1067,51 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
log(f"Unknown metadata provider: {scrape_url}", file=sys.stderr)
return 1
# Determine query from title on the result or filename
# Prefer identifier tags (ISBN/OLID/etc.) when available; fallback to title/filename
identifier_tags: List[str] = []
result_tags = get_field(result, "tags", None)
if isinstance(result_tags, list):
identifier_tags = [str(t) for t in result_tags if isinstance(t, (str, bytes))]
# Try local sidecar if no tags present on result
if not identifier_tags:
file_path = get_field(result, "target", None) or get_field(result, "path", None) or get_field(result, "file_path", None) or get_field(result, "filename", None)
if isinstance(file_path, str) and file_path and not file_path.lower().startswith(("http://", "https://")):
try:
media_path = Path(str(file_path))
if media_path.exists():
tags_from_sidecar = read_sidecar(media_path)
if isinstance(tags_from_sidecar, list):
identifier_tags = [str(t) for t in tags_from_sidecar if isinstance(t, (str, bytes))]
except Exception:
pass
identifiers = _extract_scrapable_identifiers(identifier_tags)
identifier_query: Optional[str] = None
if identifiers:
if provider.name in {"openlibrary", "googlebooks", "google"}:
identifier_query = identifiers.get("isbn_13") or identifiers.get("isbn_10") or identifiers.get("isbn") or identifiers.get("openlibrary")
elif provider.name == "itunes":
identifier_query = identifiers.get("musicbrainz") or identifiers.get("musicbrainzalbum")
# Determine query from identifier first, else title on the result or filename
title_hint = get_field(result, "title", None) or get_field(result, "name", None)
if not title_hint:
file_path = get_field(result, "path", None) or get_field(result, "filename", None)
if file_path:
title_hint = Path(str(file_path)).stem
if not title_hint:
log("No title available to search for metadata", file=sys.stderr)
query_hint = identifier_query or title_hint
if not query_hint:
log("No title or identifier available to search for metadata", file=sys.stderr)
return 1
items = provider.search(title_hint, limit=10)
if identifier_query:
log(f"Using identifier for metadata search: {identifier_query}")
else:
log(f"Using title for metadata search: {query_hint}")
items = provider.search(query_hint, limit=10)
if not items:
log("No metadata results found", file=sys.stderr)
return 1
@@ -1212,11 +1271,46 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
# Always output to ResultTable (pipeline mode only)
# Extract title for table header
item_title = get_field(result, "title", None) or get_field(result, "name", None) or get_field(result, "filename", None)
# Build a subject payload representing the file whose tags are being shown
subject_origin = get_field(result, "origin", None) or get_field(result, "source", None) or source
subject_payload: Dict[str, Any] = {
"tags": list(current),
"title": item_title,
"name": item_title,
"origin": subject_origin,
"source": subject_origin,
"storage_source": subject_origin,
"service_name": service_name,
"extra": {
"tags": list(current),
"storage_source": subject_origin,
"hydrus_hash": hash_hex,
},
}
if hash_hex:
subject_payload.update({
"hash": hash_hex,
"hash_hex": hash_hex,
"file_hash": hash_hex,
"hydrus_hash": hash_hex,
})
if local_path:
try:
path_text = str(local_path)
subject_payload.update({
"file_path": path_text,
"path": path_text,
"target": path_text,
})
subject_payload["extra"]["file_path"] = path_text
except Exception:
pass
if source == "hydrus":
_emit_tags_as_table(current, hash_hex=hash_hex, source="hydrus", service_name=service_name, config=config, item_title=item_title)
_emit_tags_as_table(current, hash_hex=hash_hex, source="hydrus", service_name=service_name, config=config, item_title=item_title, subject=subject_payload)
else:
_emit_tags_as_table(current, hash_hex=hash_hex, source="local", service_name=None, config=config, item_title=item_title, file_path=str(local_path) if local_path else None)
_emit_tags_as_table(current, hash_hex=hash_hex, source="local", service_name=None, config=config, item_title=item_title, file_path=str(local_path) if local_path else None, subject=subject_payload)
# If emit requested or store key provided, emit payload
if emit_mode: