d
This commit is contained in:
@@ -13,6 +13,7 @@ from __future__ import annotations
|
||||
import sys
|
||||
|
||||
from helper.logger import log
|
||||
from helper.metadata_search import get_metadata_provider
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Optional, Sequence, Tuple
|
||||
@@ -1015,33 +1016,82 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
scrape_url = parsed_args.get("scrape")
|
||||
scrape_requested = scrape_url is not None
|
||||
|
||||
# Handle URL scraping mode
|
||||
# Handle URL or provider scraping mode
|
||||
if scrape_requested and scrape_url:
|
||||
import json as json_module
|
||||
# Don't print debug message - output should be JSON only for programmatic consumption
|
||||
# logger.debug(f"Scraping URL: {scrape_url}")
|
||||
title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url)
|
||||
|
||||
if scrape_url.startswith("http://") or scrape_url.startswith("https://"):
|
||||
# URL scraping (existing behavior)
|
||||
title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url)
|
||||
if not tags:
|
||||
log("No tags extracted from URL", file=sys.stderr)
|
||||
return 1
|
||||
output = {
|
||||
"title": title,
|
||||
"tags": tags,
|
||||
"formats": [(label, fmt_id) for label, fmt_id in formats],
|
||||
"playlist_items": playlist_items,
|
||||
}
|
||||
print(json_module.dumps(output, ensure_ascii=False))
|
||||
return 0
|
||||
|
||||
if not tags:
|
||||
log("No tags extracted from URL", file=sys.stderr)
|
||||
# Provider scraping (e.g., itunes)
|
||||
provider = get_metadata_provider(scrape_url, config)
|
||||
if provider is None:
|
||||
log(f"Unknown metadata provider: {scrape_url}", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Build result object
|
||||
# result_obj = TagItem("url_scrape", tag_index=0, hash_hex=None, source="url", service_name=None)
|
||||
# result_obj.title = title or "URL Content"
|
||||
# Determine query from title on the result or filename
|
||||
title_hint = get_field(result, "title", None) or get_field(result, "name", None)
|
||||
if not title_hint:
|
||||
file_path = get_field(result, "path", None) or get_field(result, "filename", None)
|
||||
if file_path:
|
||||
title_hint = Path(str(file_path)).stem
|
||||
|
||||
# Emit tags as JSON for pipeline consumption (output should be pure JSON on stdout)
|
||||
output = {
|
||||
"title": title,
|
||||
"tags": tags,
|
||||
"formats": [(label, fmt_id) for label, fmt_id in formats],
|
||||
"playlist_items": playlist_items,
|
||||
}
|
||||
if not title_hint:
|
||||
log("No title available to search for metadata", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
# Use print() directly to stdout for JSON output (NOT log() which adds prefix)
|
||||
# This ensures the output is capturable by the download modal and other pipelines
|
||||
# The modal filters for lines starting with '{' so the prefix breaks parsing
|
||||
print(json_module.dumps(output, ensure_ascii=False))
|
||||
items = provider.search(title_hint, limit=10)
|
||||
if not items:
|
||||
log("No metadata results found", file=sys.stderr)
|
||||
return 1
|
||||
|
||||
from result_table import ResultTable
|
||||
table = ResultTable(f"Metadata: {provider.name}")
|
||||
table.set_source_command("get-tag", [])
|
||||
selection_payload = []
|
||||
hash_for_payload = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash_hex", None))
|
||||
for idx, item in enumerate(items):
|
||||
tags = provider.to_tags(item)
|
||||
row = table.add_row()
|
||||
row.add_column("Title", item.get("title", ""))
|
||||
row.add_column("Artist", item.get("artist", ""))
|
||||
row.add_column("Album", item.get("album", ""))
|
||||
row.add_column("Year", item.get("year", ""))
|
||||
payload = {
|
||||
"tags": tags,
|
||||
"provider": provider.name,
|
||||
"title": item.get("title"),
|
||||
"artist": item.get("artist"),
|
||||
"album": item.get("album"),
|
||||
"year": item.get("year"),
|
||||
"extra": {
|
||||
"tags": tags,
|
||||
"provider": provider.name,
|
||||
"hydrus_hash": hash_for_payload,
|
||||
"storage_source": get_field(result, "source", None) or get_field(result, "origin", None),
|
||||
},
|
||||
"file_hash": hash_for_payload,
|
||||
}
|
||||
selection_payload.append(payload)
|
||||
table.set_row_selection_args(idx, [str(idx + 1)])
|
||||
|
||||
ctx.set_last_result_table_overlay(table, selection_payload)
|
||||
ctx.set_current_stage_table(table)
|
||||
# Preserve items for @ selection and downstream pipes without emitting duplicates
|
||||
ctx.set_last_result_items_only(selection_payload)
|
||||
print(table)
|
||||
return 0
|
||||
|
||||
# If -scrape was requested but no URL, that's an error
|
||||
@@ -1178,7 +1228,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
|
||||
CMDLET = Cmdlet(
|
||||
name="get-tag",
|
||||
summary="Get tags from Hydrus or local sidecar metadata",
|
||||
usage="get-tag [-hash <sha256>] [--store <key>] [--emit] [-scrape <url>]",
|
||||
usage="get-tag [-hash <sha256>] [--store <key>] [--emit] [-scrape <url|provider>]",
|
||||
aliases=["tags"],
|
||||
args=[
|
||||
SharedArgs.HASH,
|
||||
@@ -1197,7 +1247,7 @@ CMDLET = Cmdlet(
|
||||
CmdletArg(
|
||||
name="-scrape",
|
||||
type="string",
|
||||
description="Scrape metadata from URL (returns tags as JSON)",
|
||||
description="Scrape metadata from URL or provider name (returns tags as JSON or table)",
|
||||
required=False
|
||||
)
|
||||
]
|
||||
|
||||
Reference in New Issue
Block a user