This commit is contained in:
nose
2025-12-05 03:42:57 -08:00
parent 5e4df11dbf
commit 5482ee5586
20 changed files with 911 additions and 223 deletions

View File

@@ -13,6 +13,7 @@ from __future__ import annotations
import sys
from helper.logger import log
from helper.metadata_search import get_metadata_provider
import subprocess
from pathlib import Path
from typing import Any, Dict, List, Optional, Sequence, Tuple
@@ -1015,33 +1016,82 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
scrape_url = parsed_args.get("scrape")
scrape_requested = scrape_url is not None
# Handle URL scraping mode
# Handle URL or provider scraping mode
if scrape_requested and scrape_url:
import json as json_module
# Don't print debug message - output should be JSON only for programmatic consumption
# logger.debug(f"Scraping URL: {scrape_url}")
title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url)
if scrape_url.startswith("http://") or scrape_url.startswith("https://"):
# URL scraping (existing behavior)
title, tags, formats, playlist_items = _scrape_url_metadata(scrape_url)
if not tags:
log("No tags extracted from URL", file=sys.stderr)
return 1
output = {
"title": title,
"tags": tags,
"formats": [(label, fmt_id) for label, fmt_id in formats],
"playlist_items": playlist_items,
}
print(json_module.dumps(output, ensure_ascii=False))
return 0
if not tags:
log("No tags extracted from URL", file=sys.stderr)
# Provider scraping (e.g., itunes)
provider = get_metadata_provider(scrape_url, config)
if provider is None:
log(f"Unknown metadata provider: {scrape_url}", file=sys.stderr)
return 1
# Build result object
# result_obj = TagItem("url_scrape", tag_index=0, hash_hex=None, source="url", service_name=None)
# result_obj.title = title or "URL Content"
# Determine query from title on the result or filename
title_hint = get_field(result, "title", None) or get_field(result, "name", None)
if not title_hint:
file_path = get_field(result, "path", None) or get_field(result, "filename", None)
if file_path:
title_hint = Path(str(file_path)).stem
# Emit tags as JSON for pipeline consumption (output should be pure JSON on stdout)
output = {
"title": title,
"tags": tags,
"formats": [(label, fmt_id) for label, fmt_id in formats],
"playlist_items": playlist_items,
}
if not title_hint:
log("No title available to search for metadata", file=sys.stderr)
return 1
# Use print() directly to stdout for JSON output (NOT log() which adds prefix)
# This ensures the output is capturable by the download modal and other pipelines
# The modal filters for lines starting with '{' so the prefix breaks parsing
print(json_module.dumps(output, ensure_ascii=False))
items = provider.search(title_hint, limit=10)
if not items:
log("No metadata results found", file=sys.stderr)
return 1
from result_table import ResultTable
table = ResultTable(f"Metadata: {provider.name}")
table.set_source_command("get-tag", [])
selection_payload = []
hash_for_payload = normalize_hash(hash_override) or normalize_hash(get_field(result, "hash_hex", None))
for idx, item in enumerate(items):
tags = provider.to_tags(item)
row = table.add_row()
row.add_column("Title", item.get("title", ""))
row.add_column("Artist", item.get("artist", ""))
row.add_column("Album", item.get("album", ""))
row.add_column("Year", item.get("year", ""))
payload = {
"tags": tags,
"provider": provider.name,
"title": item.get("title"),
"artist": item.get("artist"),
"album": item.get("album"),
"year": item.get("year"),
"extra": {
"tags": tags,
"provider": provider.name,
"hydrus_hash": hash_for_payload,
"storage_source": get_field(result, "source", None) or get_field(result, "origin", None),
},
"file_hash": hash_for_payload,
}
selection_payload.append(payload)
table.set_row_selection_args(idx, [str(idx + 1)])
ctx.set_last_result_table_overlay(table, selection_payload)
ctx.set_current_stage_table(table)
# Preserve items for @ selection and downstream pipes without emitting duplicates
ctx.set_last_result_items_only(selection_payload)
print(table)
return 0
# If -scrape was requested but no URL, that's an error
@@ -1178,7 +1228,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
CMDLET = Cmdlet(
name="get-tag",
summary="Get tags from Hydrus or local sidecar metadata",
usage="get-tag [-hash <sha256>] [--store <key>] [--emit] [-scrape <url>]",
usage="get-tag [-hash <sha256>] [--store <key>] [--emit] [-scrape <url|provider>]",
aliases=["tags"],
args=[
SharedArgs.HASH,
@@ -1197,7 +1247,7 @@ CMDLET = Cmdlet(
CmdletArg(
name="-scrape",
type="string",
description="Scrape metadata from URL (returns tags as JSON)",
description="Scrape metadata from URL or provider name (returns tags as JSON or table)",
required=False
)
]