This commit is contained in:
2025-12-31 05:17:37 -08:00
parent 3bbaa28fb4
commit e8842ceded
10 changed files with 1255 additions and 29 deletions

View File

@@ -2,9 +2,13 @@
from __future__ import annotations
import base64
import hashlib
import json
import re
import shutil
import sys
import tempfile
from collections.abc import Iterable as IterableABC
from SYS.logger import log
@@ -53,14 +57,14 @@ class CmdletArg:
"""Resolve/process the argument value using the handler if available.
Args:
value: The raw argument value to process
value: The raw argument value to process
Returns:
Processed value from handler, or original value if no handler
Processed value from handler, or original value if no handler
Example:
# For STORAGE arg with a handler
storage_path = SharedArgs.STORAGE.resolve('local') # Returns Path.home() / "Videos"
# For STORAGE arg with a handler
storage_path = SharedArgs.STORAGE.resolve('local') # Returns Path.home() / "Videos"
"""
if self.handler is not None and callable(self.handler):
return self.handler(value)
@@ -2435,3 +2439,224 @@ def register_url_with_local_library(
return True # url already existed
except Exception:
return False
def resolve_tidal_manifest_path(item: Any) -> Optional[str]:
"""Persist the Tidal manifest from search results and return a local path."""
metadata = None
if isinstance(item, dict):
metadata = item.get("full_metadata") or item.get("metadata")
else:
metadata = getattr(item, "full_metadata", None) or getattr(item, "metadata", None)
if not isinstance(metadata, dict):
return None
existing_path = metadata.get("_tidal_manifest_path")
if existing_path:
try:
resolved = Path(str(existing_path))
if resolved.is_file():
return str(resolved)
except Exception:
pass
existing_url = metadata.get("_tidal_manifest_url")
if existing_url and isinstance(existing_url, str):
candidate = existing_url.strip()
if candidate:
return candidate
raw_manifest = metadata.get("manifest")
if not raw_manifest:
# When piping directly from the HIFI search table, we may only have a track id.
# Fetch track details from the proxy so downstream stages can decode the manifest.
try:
already = bool(metadata.get("_tidal_track_details_fetched"))
except Exception:
already = False
track_id = metadata.get("trackId") or metadata.get("id")
if track_id is None:
try:
if isinstance(item, dict):
candidate_path = item.get("path") or item.get("url")
else:
candidate_path = getattr(item, "path", None) or getattr(item, "url", None)
except Exception:
candidate_path = None
if candidate_path:
m = re.search(
r"hifi:(?://)?track[\\/](\d+)",
str(candidate_path),
flags=re.IGNORECASE,
)
if m:
track_id = m.group(1)
if (not already) and track_id is not None:
try:
track_int = int(track_id)
except Exception:
track_int = None
if track_int and track_int > 0:
try:
import httpx
resp = httpx.get(
"https://tidal-api.binimum.org/track/",
params={"id": str(track_int)},
timeout=10.0,
)
resp.raise_for_status()
payload = resp.json()
data = payload.get("data") if isinstance(payload, dict) else None
if isinstance(data, dict) and data:
try:
metadata.update(data)
except Exception:
pass
try:
metadata["_tidal_track_details_fetched"] = True
except Exception:
pass
except Exception:
pass
raw_manifest = metadata.get("manifest")
if not raw_manifest:
return None
manifest_str = "".join(str(raw_manifest or "").split())
if not manifest_str:
return None
manifest_bytes: bytes
try:
manifest_bytes = base64.b64decode(manifest_str, validate=True)
except Exception:
try:
manifest_bytes = base64.b64decode(manifest_str, validate=False)
except Exception:
try:
manifest_bytes = manifest_str.encode("utf-8")
except Exception:
return None
if not manifest_bytes:
return None
head = (manifest_bytes[:1024] or b"").lstrip()
if head.startswith((b"{", b"[")):
try:
text = manifest_bytes.decode("utf-8", errors="ignore")
payload = json.loads(text)
urls = payload.get("urls") or []
selected_url = None
for candidate in urls:
if isinstance(candidate, str):
candidate = candidate.strip()
if candidate:
selected_url = candidate
break
if selected_url:
try:
metadata["_tidal_manifest_url"] = selected_url
except Exception:
pass
try:
log(
f"[hifi] Resolved JSON manifest for track {metadata.get('trackId') or metadata.get('id')} to {selected_url}",
file=sys.stderr,
)
except Exception:
pass
return selected_url
try:
metadata["_tidal_manifest_error"] = "JSON manifest contained no urls"
except Exception:
pass
log(
f"[hifi] JSON manifest for track {metadata.get('trackId') or metadata.get('id')} had no playable urls",
file=sys.stderr,
)
except Exception as exc:
try:
metadata["_tidal_manifest_error"] = (
f"Failed to parse JSON manifest: {exc}"
)
except Exception:
pass
log(
f"[hifi] Failed to parse JSON manifest for track {metadata.get('trackId') or metadata.get('id')}: {exc}",
file=sys.stderr,
)
return None
looks_like_mpd = (
head.startswith(b"<?xml")
or head.startswith(b"<MPD")
or b"<MPD" in head
)
if not looks_like_mpd:
manifest_mime = str(metadata.get("manifestMimeType") or "").strip().lower()
try:
metadata["_tidal_manifest_error"] = (
f"Decoded manifest is not an MPD XML (mime: {manifest_mime or 'unknown'})"
)
except Exception:
pass
try:
log(
f"[hifi] Decoded manifest is not an MPD XML for track {metadata.get('trackId') or metadata.get('id')} (mime {manifest_mime or 'unknown'})",
file=sys.stderr,
)
except Exception:
pass
return None
manifest_hash = str(metadata.get("manifestHash") or "").strip()
track_id = metadata.get("trackId") or metadata.get("id")
identifier = manifest_hash or hashlib.sha256(manifest_bytes).hexdigest()
identifier_safe = re.sub(r"[^A-Za-z0-9_-]+", "_", identifier)[:64]
if not identifier_safe:
identifier_safe = hashlib.sha256(manifest_bytes).hexdigest()[:12]
track_safe = "tidal"
if track_id is not None:
track_safe = re.sub(r"[^A-Za-z0-9_-]+", "_", str(track_id))[:32]
if not track_safe:
track_safe = "tidal"
# Persist as .mpd for DASH manifests.
ext = "mpd"
manifest_dir = Path(tempfile.gettempdir()) / "medeia" / "hifi"
try:
manifest_dir.mkdir(parents=True, exist_ok=True)
except Exception:
pass
filename = f"hifi-{track_safe}-{identifier_safe[:24]}.{ext}"
target_path = manifest_dir / filename
try:
with open(target_path, "wb") as fh:
fh.write(manifest_bytes)
metadata["_tidal_manifest_path"] = str(target_path)
if isinstance(item, dict):
if item.get("full_metadata") is metadata:
item["full_metadata"] = metadata
elif item.get("metadata") is metadata:
item["metadata"] = metadata
else:
extra = getattr(item, "extra", None)
if isinstance(extra, dict):
extra["_tidal_manifest_path"] = str(target_path)
except Exception:
return None
return str(target_path)

View File

@@ -1,6 +1,6 @@
from __future__ import annotations
from typing import Any, Dict, Optional, Sequence, Tuple, List
from typing import Any, Dict, Optional, Sequence, Tuple, List, Union
from pathlib import Path
import sys
import shutil
@@ -582,6 +582,82 @@ class Add_File(Cmdlet):
failures += 1
continue
# If we got a hifi://track/<id> placeholder, resolve it to a decoded MPD first.
try:
if isinstance(media_path_or_url, Path):
mp_url = str(media_path_or_url)
if mp_url.lower().startswith("hifi:"):
manifest_path = sh.resolve_tidal_manifest_path(item)
if not manifest_path:
try:
meta = getattr(item, "full_metadata", None)
if isinstance(meta, dict) and meta.get("_tidal_manifest_error"):
log(str(meta.get("_tidal_manifest_error")), file=sys.stderr)
except Exception:
pass
log("HIFI selection has no playable DASH MPD manifest.", file=sys.stderr)
failures += 1
continue
media_path_or_url = Path(manifest_path)
pipe_obj.path = str(media_path_or_url)
elif isinstance(media_path_or_url, str):
if str(media_path_or_url).strip().lower().startswith("hifi:"):
manifest_path = sh.resolve_tidal_manifest_path(item)
if not manifest_path:
try:
meta = getattr(item, "full_metadata", None)
if isinstance(meta, dict) and meta.get("_tidal_manifest_error"):
log(str(meta.get("_tidal_manifest_error")), file=sys.stderr)
except Exception:
pass
log("HIFI selection has no playable DASH MPD manifest.", file=sys.stderr)
failures += 1
continue
media_path_or_url = Path(manifest_path)
pipe_obj.path = str(media_path_or_url)
except Exception:
pass
manifest_source: Optional[Union[str, Path]] = None
tidal_metadata = None
try:
if isinstance(item, dict):
tidal_metadata = item.get("full_metadata") or item.get("metadata")
else:
tidal_metadata = (
getattr(item, "full_metadata", None)
or getattr(item, "metadata", None)
)
except Exception:
tidal_metadata = None
if not tidal_metadata and isinstance(pipe_obj.extra, dict):
tidal_metadata = pipe_obj.extra.get("full_metadata") or pipe_obj.extra.get("metadata")
if isinstance(tidal_metadata, dict):
manifest_source = (
tidal_metadata.get("_tidal_manifest_path")
or tidal_metadata.get("_tidal_manifest_url")
)
if not manifest_source:
if isinstance(media_path_or_url, Path):
manifest_source = media_path_or_url
elif isinstance(media_path_or_url, str):
if media_path_or_url.lower().endswith(".mpd"):
manifest_source = media_path_or_url
if manifest_source:
downloaded, tmp_dir = self._download_manifest_with_ffmpeg(manifest_source)
if downloaded is None:
failures += 1
continue
media_path_or_url = str(downloaded)
pipe_obj.path = str(downloaded)
pipe_obj.is_temp = True
delete_after_item = True
if tmp_dir is not None:
temp_dir_to_cleanup = tmp_dir
is_url_target = isinstance(
media_path_or_url,
str
@@ -2016,10 +2092,159 @@ class Add_File(Cmdlet):
# Call download-media with the URL in args
return dl_cmdlet.run(None, dl_args, config)
@staticmethod
def _download_manifest_with_ffmpeg(source: Union[str, Path]) -> Tuple[Optional[Path], Optional[Path]]:
"""Run ffmpeg on the manifest or stream URL and return a local file path for ingestion."""
import subprocess
ffmpeg_bin = shutil.which("ffmpeg")
if not ffmpeg_bin:
log("ffmpeg not found on PATH; cannot download HIFI manifest.", file=sys.stderr)
return None, None
tmp_dir = Path(tempfile.mkdtemp(prefix="medeia_hifi_mpd_"))
stream_mp4 = tmp_dir / "stream.mp4"
input_target: Optional[str] = None
if isinstance(source, Path):
input_target = str(source)
elif isinstance(source, str):
candidate = source.strip()
if candidate.lower().startswith("file://"):
try:
from urllib.parse import unquote, urlparse
parsed = urlparse(candidate)
raw_path = unquote(parsed.path or "")
raw_path = raw_path.lstrip("/")
candidate = raw_path
except Exception:
pass
input_target = candidate
if not input_target:
return None, None
try:
subprocess.run(
[
ffmpeg_bin,
"-hide_banner",
"-loglevel",
"error",
"-y",
"-protocol_whitelist",
"file,https,tcp,tls,crypto,data",
"-i",
input_target,
"-c",
"copy",
str(stream_mp4),
],
check=True,
capture_output=True,
text=True,
)
except subprocess.CalledProcessError as exc:
err = (exc.stderr or "").strip()
if err:
log(f"ffmpeg manifest download failed: {err}", file=sys.stderr)
else:
log(f"ffmpeg manifest download failed (exit {exc.returncode})", file=sys.stderr)
return None, tmp_dir
except Exception as exc:
log(f"ffmpeg manifest download failed: {exc}", file=sys.stderr)
return None, tmp_dir
codec = None
ffprobe_bin = shutil.which("ffprobe")
if ffprobe_bin:
try:
probe = subprocess.run(
[
ffprobe_bin,
"-v",
"error",
"-select_streams",
"a:0",
"-show_entries",
"stream=codec_name",
"-of",
"default=nw=1:nk=1",
str(stream_mp4),
],
capture_output=True,
text=True,
check=True,
)
codec = (probe.stdout or "").strip().lower() or None
except Exception:
codec = None
ext = None
if codec == "flac":
ext = "flac"
elif codec == "aac":
ext = "m4a"
elif codec == "mp3":
ext = "mp3"
elif codec == "opus":
ext = "opus"
else:
ext = "mka"
audio_out = tmp_dir / f"audio.{ext}"
try:
subprocess.run(
[
ffmpeg_bin,
"-hide_banner",
"-loglevel",
"error",
"-y",
"-i",
str(stream_mp4),
"-vn",
"-c:a",
"copy",
str(audio_out),
],
check=True,
capture_output=True,
text=True,
)
if audio_out.exists():
return audio_out, tmp_dir
except subprocess.CalledProcessError as exc:
err = (exc.stderr or "").strip()
if err:
log(f"ffmpeg audio extract failed: {err}", file=sys.stderr)
except Exception:
pass
if stream_mp4.exists():
return stream_mp4, tmp_dir
return None, tmp_dir
@staticmethod
def _get_url(result: Any, pipe_obj: models.PipeObject) -> List[str]:
from SYS.metadata import normalize_urls
# If this is a HIFI selection, we only support the decoded MPD (never tidal.com URLs).
is_hifi = False
try:
if isinstance(result, dict):
is_hifi = str(result.get("table") or result.get("provider") or "").strip().lower().startswith("hifi")
else:
is_hifi = str(getattr(result, "table", "") or getattr(result, "provider", "")).strip().lower().startswith("hifi")
except Exception:
is_hifi = False
try:
if not is_hifi:
is_hifi = str(getattr(pipe_obj, "path", "") or "").strip().lower().startswith("hifi:")
except Exception:
pass
# Prefer explicit PipeObject.url if present
urls: List[str] = []
try:
@@ -2043,6 +2268,13 @@ class Add_File(Cmdlet):
if not urls:
urls = normalize_urls(extract_url_from_result(result))
# If this is a Tidal/HIFI selection with a decodable manifest, do NOT fall back to
# tidal.com track URLs. The only supported target is the decoded local MPD.
manifest_path = sh.resolve_tidal_manifest_path(result)
if manifest_path:
return [manifest_path]
if is_hifi:
return []
return urls
@staticmethod

View File

@@ -1,12 +1,13 @@
from __future__ import annotations
from pathlib import Path
from typing import Any, Dict, Optional, Sequence
from typing import Any, Dict, List, Optional, Sequence
import sys
from SYS.logger import log
from SYS import pipeline as ctx
from SYS.result_table import ResultTable
from . import _shared as sh
Cmdlet = sh.Cmdlet
@@ -99,6 +100,8 @@ class Get_Note(Cmdlet):
store_registry = Store(config)
any_notes = False
display_items: List[Dict[str, Any]] = []
note_table: Optional[ResultTable] = None
for res in results:
if not isinstance(res, dict):
@@ -145,6 +148,13 @@ class Get_Note(Cmdlet):
continue
any_notes = True
if note_table is None:
note_table = (
ResultTable("note")
.set_table("note")
.set_value_case("preserve")
.set_preserve_order(True)
)
# Emit each note as its own row so CLI renders a proper note table
for k in sorted(notes.keys(), key=lambda x: str(x).lower()):
v = notes.get(k)
@@ -152,23 +162,27 @@ class Get_Note(Cmdlet):
# Keep payload small for IPC/pipes.
raw_text = raw_text[:999]
preview = " ".join(raw_text.replace("\r", "").split("\n"))
ctx.emit(
{
"store": store_name,
"hash": resolved_hash,
"note_name": str(k),
"note_text": raw_text,
"columns": [
("Name",
str(k)),
("Text",
preview.strip()),
],
}
)
payload: Dict[str, Any] = {
"store": store_name,
"hash": resolved_hash,
"note_name": str(k),
"note_text": raw_text,
"columns": [
("Name",
str(k)),
("Text",
preview.strip()),
],
}
display_items.append(payload)
if note_table is not None:
note_table.add_result(payload)
ctx.emit(payload)
if not any_notes:
ctx.emit("No notes found.")
elif note_table is not None:
ctx.set_last_result_table(note_table, display_items, subject=result)
return 0

View File

@@ -1118,7 +1118,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
-query "hash:<sha256>": Override hash to use instead of result's hash
--store <key>: Store result to this key for pipeline
--emit: Emit result without interactive prompt (quiet mode)
-scrape <url|provider>: Scrape metadata from URL or provider name (itunes, openlibrary, googlebooks)
-scrape <url|provider>: Scrape metadata from URL or provider name (itunes, openlibrary, googlebooks, imdb)
"""
args_list = [str(arg) for arg in (args or [])]
raw_args = list(args_list)
@@ -1367,7 +1367,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
print(json_module.dumps(output, ensure_ascii=False))
return 0
# Provider scraping (e.g., itunes)
# Provider scraping (e.g., itunes, imdb)
provider = get_metadata_provider(scrape_url, config)
if provider is None:
log(f"Unknown metadata provider: {scrape_url}", file=sys.stderr)
@@ -1447,6 +1447,8 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
identifiers.get("isbn_13") or identifiers.get("isbn_10")
or identifiers.get("isbn") or identifiers.get("openlibrary")
)
elif provider.name == "imdb":
identifier_query = identifiers.get("imdb")
elif provider.name == "itunes":
identifier_query = identifiers.get("musicbrainz") or identifiers.get(
"musicbrainzalbum"
@@ -1557,6 +1559,7 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
from SYS.result_table import ResultTable
table = ResultTable(f"Metadata: {provider.name}")
table.set_table(f"metadata.{provider.name}")
table.set_source_command("get-tag", [])
selection_payload = []
hash_for_payload = normalize_hash(hash_override) or normalize_hash(
@@ -1601,10 +1604,10 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
selection_payload.append(payload)
table.set_row_selection_args(idx, [str(idx + 1)])
# Store an overlay so that a subsequent `@N` selects from THIS metadata table,
# not from the previous searchable table.
ctx.set_last_result_table_overlay(table, selection_payload)
ctx.set_current_stage_table(table)
# Preserve items for @ selection and downstream pipes without emitting duplicates
ctx.set_last_result_items_only(selection_payload)
return 0
# If -scrape was requested but no URL, that's an error
@@ -1653,6 +1656,11 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
path=str(subject_path) if subject_path else None,
subject=result,
)
_emit_tag_payload(
str(result_provider),
[str(t) for t in result_tags if t is not None],
hash_value=file_hash,
)
return 0
# Apply tags to the store backend (no sidecar writing here).
@@ -1716,6 +1724,12 @@ def _run(result: Any, args: Sequence[str], config: Dict[str, Any]) -> int:
},
},
)
_emit_tag_payload(
str(store_name),
list(updated_tags),
hash_value=file_hash,
extra={"applied_provider": str(result_provider)},
)
return 0
hash_from_result = normalize_hash(get_field(result, "hash", None))
@@ -1825,7 +1839,14 @@ _SCRAPE_CHOICES = []
try:
_SCRAPE_CHOICES = sorted(list_metadata_providers().keys())
except Exception:
_SCRAPE_CHOICES = ["itunes", "openlibrary", "googlebooks", "google", "musicbrainz"]
_SCRAPE_CHOICES = [
"itunes",
"openlibrary",
"googlebooks",
"google",
"musicbrainz",
"imdb",
]
# Special scrape mode: pull tags from an item's URL via yt-dlp (no download)
if "ytdlp" not in _SCRAPE_CHOICES:

View File

@@ -62,7 +62,7 @@ class search_file(Cmdlet):
"provider",
type="string",
description=
"External provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive",
"External provider name: bandcamp, libgen, soulseek, youtube, alldebrid, loc, internetarchive, hifi",
),
CmdletArg(
"open",