dfd

2025-12-06 00:10:19 -08:00
parent 5482ee5586
commit f29709d951
20 changed files with 1353 additions and 419 deletions
--- a/cmdlets/get_tag.py
+++ b/cmdlets/get_tag.py
@@ -21,7 +21,7 @@ from typing import Any, Dict, List, Optional, Sequence, Tuple
 import pipeline as ctx
 from helper import hydrus
 from helper.local_library import read_sidecar, write_sidecar, find_sidecar, LocalLibraryDB
-from ._shared import normalize_hash, Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args
+from ._shared import normalize_hash, looks_like_hash, Cmdlet, CmdletArg, SharedArgs, parse_cmdlet_args
 from config import get_local_storage_path


@@ -105,7 +105,8 @@ def _emit_tags_as_table(
 	service_name: Optional[str] = None,
 	config: Dict[str, Any] = None,
 	item_title: Optional[str] = None,
-	file_path: Optional[str] = None
+	file_path: Optional[str] = None,
+	subject: Optional[Any] = None,
 ) -> None:
 	"""Emit tags as TagItem objects and display via ResultTable.
 	
@@ -144,9 +145,9 @@ def _emit_tags_as_table(
 	# Use overlay mode so it doesn't push the previous search to history stack
 	# This makes get-tag behave like a transient view
 	try:
-		ctx.set_last_result_table_overlay(table, tag_items)
+		ctx.set_last_result_table_overlay(table, tag_items, subject)
 	except AttributeError:
-		ctx.set_last_result_table(table, tag_items)
+		ctx.set_last_result_table(table, tag_items, subject)
 	# Note: CLI will handle displaying the table via ResultTable formatting
 def _summarize_tags(tags_list: List[str], limit: int = 8) -> str:
 	"""Create a summary of tags for display."""
@@ -443,7 +444,10 @@ def _emit_tag_payload(source: str, tags_list: List[str], *, hash_value: Optional
 def _extract_scrapable_identifiers(tags_list: List[str]) -> Dict[str, str]:
 	"""Extract scrapable identifiers from tags."""
 	identifiers = {}
-	scrapable_prefixes = {'openlibrary', 'isbn_10', 'isbn', 'musicbrainz', 'musicbrainzalbum', 'imdb', 'tmdb', 'tvdb'}
+	scrapable_prefixes = {
+		'openlibrary', 'isbn', 'isbn_10', 'isbn_13',
+		'musicbrainz', 'musicbrainzalbum', 'imdb', 'tmdb', 'tvdb'
+	}
 	
 	for tag in tags_list:
 		if not isinstance(tag, str) or ':' not in tag:
@@ -453,9 +457,18 @@ def _extract_scrapable_identifiers(tags_list: List[str]) -> Dict[str, str]:
 		if len(parts) != 2:
 			continue
 		
-		key = parts[0].strip().lower()
+		key_raw = parts[0].strip().lower()
+		key = key_raw.replace('-', '_')
+		if key == 'isbn10':
+			key = 'isbn_10'
+		elif key == 'isbn13':
+			key = 'isbn_13'
 		value = parts[1].strip()
 		
+		# Normalize ISBN values by removing hyphens for API friendliness
+		if key.startswith('isbn'):
+			value = value.replace('-', '')
+		
 		if key in scrapable_prefixes and value:
 			identifiers[key] = value
 	
@@ -965,8 +978,8 @@ def _perform_scraping(tags_list: List[str]) -> List[str]:
 		if olid:
 			log(f"Scraping OpenLibrary: {olid}")
 			new_tags.extend(_scrape_openlibrary_metadata(olid))
-	elif 'isbn_10' in identifiers or 'isbn' in identifiers:
-		isbn = identifiers.get('isbn_10') or identifiers.get('isbn')
+	elif 'isbn_13' in identifiers or 'isbn_10' in identifiers or 'isbn' in identifiers:
+		isbn = identifiers.get('isbn_13') or identifiers.get('isbn_10') or identifiers.get('isbn')
 		if isbn:
 			log(f"Scraping ISBN: {isbn}")
 			new_tags.extend(_scrape_isbn_metadata(isbn))
@@ -991,13 +1004,13 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 	
 	Usage: 
 		get-tag [-hash <sha256>] [--store <key>] [--emit]
-		get-tag -scrape <url>
+		get-tag -scrape <url|provider>
 	
 	Options:
 		-hash <sha256>: Override hash to use instead of result's hash_hex
 		--store <key>: Store result to this key for pipeline
 		--emit: Emit result without interactive prompt (quiet mode)
-		-scrape <url>: Scrape metadata from URL (returns tags as JSON)
+		-scrape <url|provider>: Scrape metadata from URL or provider name (itunes, openlibrary, googlebooks)
 	"""
 	# Helper to get field from both dict and object
 	def get_field(obj: Any, field: str, default: Any = None) -> Any:
@@ -1008,13 +1021,26 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 	
 	# Parse arguments using shared parser
 	parsed_args = parse_cmdlet_args(args, CMDLET)
-	
+
+	# Detect if -scrape flag was provided without a value (parse_cmdlet_args skips missing values)
+	scrape_flag_present = any(str(arg).lower() in {"-scrape", "--scrape"} for arg in args)
+
 	# Extract values
-	hash_override = normalize_hash(parsed_args.get("hash"))
+	hash_override_raw = parsed_args.get("hash")
+	hash_override = normalize_hash(hash_override_raw)
 	store_key = parsed_args.get("store")
 	emit_requested = parsed_args.get("emit", False)
 	scrape_url = parsed_args.get("scrape")
-	scrape_requested = scrape_url is not None
+	scrape_requested = scrape_flag_present or scrape_url is not None
+
+	if hash_override_raw is not None:
+		if not hash_override or not looks_like_hash(hash_override):
+			log("Invalid hash format: expected 64 hex characters", file=sys.stderr)
+			return 1
+
+	if scrape_requested and (not scrape_url or str(scrape_url).strip() == ""):
+		log("-scrape requires a URL or provider name", file=sys.stderr)
+		return 1
 	
 	# Handle URL or provider scraping mode
 	if scrape_requested and scrape_url:
@@ -1041,18 +1067,51 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 			log(f"Unknown metadata provider: {scrape_url}", file=sys.stderr)
 			return 1
 		
-		# Determine query from title on the result or filename
+		# Prefer identifier tags (ISBN/OLID/etc.) when available; fallback to title/filename
+		identifier_tags: List[str] = []
+		result_tags = get_field(result, "tags", None)
+		if isinstance(result_tags, list):
+			identifier_tags = [str(t) for t in result_tags if isinstance(t, (str, bytes))]
+		
+		# Try local sidecar if no tags present on result
+		if not identifier_tags:
+			file_path = get_field(result, "target", None) or get_field(result, "path", None) or get_field(result, "file_path", None) or get_field(result, "filename", None)
+			if isinstance(file_path, str) and file_path and not file_path.lower().startswith(("http://", "https://")):
+				try:
+					media_path = Path(str(file_path))
+					if media_path.exists():
+						tags_from_sidecar = read_sidecar(media_path)
+						if isinstance(tags_from_sidecar, list):
+							identifier_tags = [str(t) for t in tags_from_sidecar if isinstance(t, (str, bytes))]
+				except Exception:
+					pass
+		
+		identifiers = _extract_scrapable_identifiers(identifier_tags)
+		identifier_query: Optional[str] = None
+		if identifiers:
+			if provider.name in {"openlibrary", "googlebooks", "google"}:
+				identifier_query = identifiers.get("isbn_13") or identifiers.get("isbn_10") or identifiers.get("isbn") or identifiers.get("openlibrary")
+			elif provider.name == "itunes":
+				identifier_query = identifiers.get("musicbrainz") or identifiers.get("musicbrainzalbum")
+		
+		# Determine query from identifier first, else title on the result or filename
 		title_hint = get_field(result, "title", None) or get_field(result, "name", None)
 		if not title_hint:
 			file_path = get_field(result, "path", None) or get_field(result, "filename", None)
 			if file_path:
 				title_hint = Path(str(file_path)).stem
 		
-		if not title_hint:
-			log("No title available to search for metadata", file=sys.stderr)
+		query_hint = identifier_query or title_hint
+		if not query_hint:
+			log("No title or identifier available to search for metadata", file=sys.stderr)
 			return 1
 		
-		items = provider.search(title_hint, limit=10)
+		if identifier_query:
+			log(f"Using identifier for metadata search: {identifier_query}")
+		else:
+			log(f"Using title for metadata search: {query_hint}")
+
+		items = provider.search(query_hint, limit=10)
 		if not items:
 			log("No metadata results found", file=sys.stderr)
 			return 1
@@ -1212,11 +1271,46 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 	# Always output to ResultTable (pipeline mode only)
 	# Extract title for table header
 	item_title = get_field(result, "title", None) or get_field(result, "name", None) or get_field(result, "filename", None)
+
+	# Build a subject payload representing the file whose tags are being shown
+	subject_origin = get_field(result, "origin", None) or get_field(result, "source", None) or source
+	subject_payload: Dict[str, Any] = {
+		"tags": list(current),
+		"title": item_title,
+		"name": item_title,
+		"origin": subject_origin,
+		"source": subject_origin,
+		"storage_source": subject_origin,
+		"service_name": service_name,
+		"extra": {
+			"tags": list(current),
+			"storage_source": subject_origin,
+			"hydrus_hash": hash_hex,
+		},
+	}
+	if hash_hex:
+		subject_payload.update({
+			"hash": hash_hex,
+			"hash_hex": hash_hex,
+			"file_hash": hash_hex,
+			"hydrus_hash": hash_hex,
+		})
+	if local_path:
+		try:
+			path_text = str(local_path)
+			subject_payload.update({
+				"file_path": path_text,
+				"path": path_text,
+				"target": path_text,
+			})
+			subject_payload["extra"]["file_path"] = path_text
+		except Exception:
+			pass
 	
 	if source == "hydrus":
-		_emit_tags_as_table(current, hash_hex=hash_hex, source="hydrus", service_name=service_name, config=config, item_title=item_title)
+		_emit_tags_as_table(current, hash_hex=hash_hex, source="hydrus", service_name=service_name, config=config, item_title=item_title, subject=subject_payload)
 	else:
-		_emit_tags_as_table(current, hash_hex=hash_hex, source="local", service_name=None, config=config, item_title=item_title, file_path=str(local_path) if local_path else None)
+		_emit_tags_as_table(current, hash_hex=hash_hex, source="local", service_name=None, config=config, item_title=item_title, file_path=str(local_path) if local_path else None, subject=subject_payload)
 	
 	# If emit requested or store key provided, emit payload
 	if emit_mode: