d

2025-12-05 03:42:57 -08:00
parent 5e4df11dbf
commit 5482ee5586
20 changed files with 911 additions and 223 deletions
--- a/cmdlets/get_tag.py
+++ b/cmdlets/get_tag.py
@@ -13,6 +13,7 @@ from __future__ import annotations
 import sys

 from helper.logger import log
+from helper.metadata_search import get_metadata_provider
 import subprocess
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Sequence, Tuple
@@ -1015,33 +1016,82 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 	scrape_url = parsed_args.get("scrape")
 	scrape_requested = scrape_url is not None
 	
-	# Handle URL scraping mode
+	# Handle URL or provider scraping mode
 	if scrape_requested and scrape_url:
 		import json as json_module
-		# Don't print debug message - output should be JSON only for programmatic consumption
-		# logger.debug(f"Scraping URL: {scrape_url}")
-		title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url)
+
+		if scrape_url.startswith("http://") or scrape_url.startswith("https://"):
+			# URL scraping (existing behavior)
+			title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url)
+			if not tags:
+				log("No tags extracted from URL", file=sys.stderr)
+				return 1
+			output = {
+				"title": title,
+				"tags": tags,
+				"formats": [(label, fmt_id) for label, fmt_id in formats],
+				"playlist_items": playlist_items,
+			}
+			print(json_module.dumps(output, ensure_ascii=False))
+			return 0
 		
-		if not tags:
-			log("No tags extracted from URL", file=sys.stderr)
+		# Provider scraping (e.g., itunes)
+		provider = get_metadata_provider(scrape_url, config)
+		if provider is None:
+			log(f"Unknown metadata provider: {scrape_url}", file=sys.stderr)
 			return 1
 		
-		# Build result object
-		# result_obj = TagItem("url_scrape", tag_index=0, hash_hex=None, source="url", service_name=None)
-		# result_obj.title = title or "URL Content"
+		# Determine query from title on the result or filename
+		title_hint = get_field(result, "title", None) or get_field(result, "name", None)
+		if not title_hint:
+			file_path = get_field(result, "path", None) or get_field(result, "filename", None)
+			if file_path:
+				title_hint = Path(str(file_path)).stem
 		
-		# Emit tags as JSON for pipeline consumption (output should be pure JSON on stdout)
-		output = {
-			"title": title,
-			"tags": tags,
-			"formats": [(label, fmt_id) for label, fmt_id in formats],
-			"playlist_items": playlist_items,
-		}
+		if not title_hint:
+			log("No title available to search for metadata", file=sys.stderr)
+			return 1
 		
-		# Use print() directly to stdout for JSON output (NOT log() which adds prefix)
-		# This ensures the output is capturable by the download modal and other pipelines
-		# The modal filters for lines starting with '{' so the prefix breaks parsing
-		print(json_module.dumps(output, ensure_ascii=False))
+		items = provider.search(title_hint, limit=10)
+		if not items:
+			log("No metadata results found", file=sys.stderr)
+			return 1
+		
+		from result_table import ResultTable
+		table = ResultTable(f"Metadata: {provider.name}")
+		table.set_source_command("get-tag", [])
+		selection_payload = []
+		hash_for_payload = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash_hex", None))
+		for idx, item in enumerate(items):
+			tags = provider.to_tags(item)
+			row = table.add_row()
+			row.add_column("Title", item.get("title", ""))
+			row.add_column("Artist", item.get("artist", ""))
+			row.add_column("Album", item.get("album", ""))
+			row.add_column("Year", item.get("year", ""))
+			payload = {
+				"tags": tags,
+				"provider": provider.name,
+				"title": item.get("title"),
+				"artist": item.get("artist"),
+				"album": item.get("album"),
+				"year": item.get("year"),
+				"extra": {
+					"tags": tags,
+					"provider": provider.name,
+					"hydrus_hash": hash_for_payload,
+					"storage_source": get_field(result, "source", None) or get_field(result, "origin", None),
+				},
+				"file_hash": hash_for_payload,
+			}
+			selection_payload.append(payload)
+			table.set_row_selection_args(idx, [str(idx + 1)])
+
+		ctx.set_last_result_table_overlay(table, selection_payload)
+		ctx.set_current_stage_table(table)
+		# Preserve items for @ selection and downstream pipes without emitting duplicates
+		ctx.set_last_result_items_only(selection_payload)
+		print(table)
 		return 0
 	
 	# If -scrape was requested but no URL, that's an error
@@ -1178,7 +1228,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
 CMDLET = Cmdlet(
    name="get-tag",
    summary="Get tags from Hydrus or local sidecar metadata",
-    usage="get-tag [-hash <sha256>] [--store <key>] [--emit] [-scrape <url>]",
+	usage="get-tag [-hash <sha256>] [--store <key>] [--emit] [-scrape <url|provider>]",
    aliases=["tags"],
    args=[
        SharedArgs.HASH,
@@ -1197,7 +1247,7 @@ CMDLET = Cmdlet(
        CmdletArg(
            name="-scrape",
            type="string",
-            description="Scrape metadata from URL (returns tags as JSON)",
+			description="Scrape metadata from URL or provider name (returns tags as JSON or table)",
            required=False
        )
    ]