Medios-Macina/MPV/pipeline_helper.py

"""Persistent MPV pipeline helper.

This process connects to MPV's IPC server, observes a user-data property for
pipeline execution requests, runs the pipeline in-process, and posts results
back to MPV via user-data properties.

Why:
- Avoid spawning a new Python process for every MPV action.
- Enable MPV Lua scripts to trigger any cmdlet pipeline cheaply.

Protocol (user-data properties):
- Request:  user-data/medeia-pipeline-request (JSON string)
  {"id": "...", "pipeline": "...", "seeds": [...]}  (seeds optional)
- Response: user-data/medeia-pipeline-response (JSON string)
  {"id": "...", "success": bool, "stdout": "...", "stderr": "...", "error": "..."}
- Ready:    user-data/medeia-pipeline-ready ("1")

This helper is intentionally minimal: one request at a time, last-write-wins.
"""
from __future__ import annotations

MEDEIA_MPV_HELPER_VERSION = "2025-12-19"

import argparse
import json
import os
import sys
import tempfile
import time
import logging
import re
import hashlib
from pathlib import Path
from typing import Any, Dict, Optional


def _repo_root() -> Path:
    return Path(__file__).resolve().parent.parent


# Make repo-local packages importable even when mpv starts us from another cwd.
_ROOT = str(_repo_root())
if _ROOT not in sys.path:
    sys.path.insert(0, _ROOT)


from MPV.mpv_ipc import MPVIPCClient  # noqa: E402
from config import load_config  # noqa: E402
from SYS.logger import set_debug, debug, set_thread_stream  # noqa: E402


REQUEST_PROP = "user-data/medeia-pipeline-request"
RESPONSE_PROP = "user-data/medeia-pipeline-response"
READY_PROP = "user-data/medeia-pipeline-ready"

OBS_ID_REQUEST = 1001


def _run_pipeline(pipeline_text: str, *, seeds: Any = None) -> Dict[str, Any]:
    # Import after sys.path fix.
    from TUI.pipeline_runner import PipelineExecutor  # noqa: WPS433

    def _table_to_payload(table: Any) -> Optional[Dict[str, Any]]:
        if table is None:
            return None
        try:
            title = getattr(table, "title", "")
        except Exception:
            title = ""

        rows_payload = []
        try:
            rows = getattr(table, "rows", None)
        except Exception:
            rows = None
        if isinstance(rows, list):
            for r in rows:
                cols_payload = []
                try:
                    cols = getattr(r, "columns", None)
                except Exception:
                    cols = None
                if isinstance(cols, list):
                    for c in cols:
                        try:
                            cols_payload.append(
                                {
                                    "name": getattr(c, "name", ""),
                                    "value": getattr(c, "value", ""),
                                }
                            )
                        except Exception:
                            continue

                sel_args = None
                try:
                    sel = getattr(r, "selection_args", None)
                    if isinstance(sel, list):
                        sel_args = [str(x) for x in sel]
                except Exception:
                    sel_args = None

                rows_payload.append({"columns": cols_payload, "selection_args": sel_args})

        # Only return JSON-serializable data (Lua only needs title + rows).
        return {"title": str(title or ""), "rows": rows_payload}

    executor = PipelineExecutor()
    result = executor.run_pipeline(pipeline_text, seeds=seeds)

    table_payload = None
    try:
        table_payload = _table_to_payload(getattr(result, "result_table", None))
    except Exception:
        table_payload = None

    return {
        "success": bool(result.success),
        "stdout": result.stdout or "",
        "stderr": result.stderr or "",
        "error": result.error,
        "table": table_payload,
    }


def _run_op(op: str, data: Any) -> Dict[str, Any]:
    """Run a helper-only operation.

    These are NOT cmdlets and are not available via CLI pipelines. They exist
    solely so MPV Lua can query lightweight metadata (e.g., autocomplete lists)
    without inventing user-facing commands.
    """
    op_name = str(op or "").strip().lower()

    # Provide store backend choices using the same source as CLI/Typer autocomplete.
    if op_name in {"store-choices", "store_choices", "get-store-choices", "get_store_choices"}:
        # Preferred: call the same choice function used by the CLI completer.
        try:
            from CLI import get_store_choices  # noqa: WPS433

            backends = get_store_choices()
            choices = sorted({str(n) for n in (backends or []) if str(n).strip()})
        except Exception:
            # Fallback: direct Store registry enumeration using loaded config.
            try:
                cfg = load_config() or {}
            except Exception:
                cfg = {}
            try:
                from Store import Store  # noqa: WPS433

                storage = Store(cfg, suppress_debug=True)
                backends = storage.list_backends() or []
                choices = sorted({str(n) for n in backends if str(n).strip()})
            except Exception as exc:
                return {
                    "success": False,
                    "stdout": "",
                    "stderr": "",
                    "error": f"{type(exc).__name__}: {exc}",
                    "table": None,
                    "choices": [],
                }

        return {
            "success": True,
            "stdout": "",
            "stderr": "",
            "error": None,
            "table": None,
            "choices": choices,
        }

    # Provide yt-dlp format list for a URL (for MPV "Change format" menu).
    # Returns a ResultTable-like payload so the Lua UI can render without running cmdlets.
    if op_name in {"ytdlp-formats", "ytdlp_formats", "ytdl-formats", "ytdl_formats"}:
        try:
            url = None
            if isinstance(data, dict):
                url = data.get("url")
            url = str(url or "").strip()
            if not url:
                return {
                    "success": False,
                    "stdout": "",
                    "stderr": "",
                    "error": "Missing url",
                    "table": None,
                }

            # Fast gate: only for streaming URLs yt-dlp knows about.
            try:
                from SYS.download import is_url_supported_by_ytdlp  # noqa: WPS433

                if not is_url_supported_by_ytdlp(url):
                    return {
                        "success": False,
                        "stdout": "",
                        "stderr": "",
                        "error": "URL not supported by yt-dlp",
                        "table": None,
                    }
            except Exception:
                # If probing support fails, still attempt extraction and let yt-dlp decide.
                pass

            try:
                import yt_dlp  # type: ignore
            except Exception as exc:
                return {
                    "success": False,
                    "stdout": "",
                    "stderr": "",
                    "error": f"yt-dlp module not available: {type(exc).__name__}: {exc}",
                    "table": None,
                }

            cookiefile = None
            try:
                from tool.ytdlp import YtDlpTool  # noqa: WPS433

                cfg = load_config() or {}
                cookie_path = YtDlpTool(cfg).resolve_cookiefile()
                if cookie_path is not None:
                    cookiefile = str(cookie_path)
            except Exception:
                cookiefile = None

            ydl_opts: Dict[str, Any] = {
                "quiet": True,
                "no_warnings": True,
                "socket_timeout": 20,
                "retries": 2,
                "skip_download": True,
                # Avoid accidentally expanding huge playlists on load.
                "noplaylist": True,
                "noprogress": True,
            }
            if cookiefile:
                ydl_opts["cookiefile"] = cookiefile

            def _format_bytes(n: Any) -> str:
                try:
                    v = float(n)
                except Exception:
                    return ""
                if v <= 0:
                    return ""
                units = ["B", "KB", "MB", "GB", "TB"]
                i = 0
                while v >= 1024 and i < len(units) - 1:
                    v /= 1024.0
                    i += 1
                if i == 0:
                    return f"{int(v)} {units[i]}"
                return f"{v:.1f} {units[i]}"

            with yt_dlp.YoutubeDL(ydl_opts) as ydl:  # type: ignore[attr-defined]
                info = ydl.extract_info(url, download=False)

            # Debug: dump a short summary of the format list to the helper log.
            try:
                formats_any = info.get("formats") if isinstance(info, dict) else None
                count = len(formats_any) if isinstance(formats_any, list) else 0
                _append_helper_log(f"[ytdlp-formats] extracted formats count={count} url={url}")

                if isinstance(formats_any, list) and formats_any:
                    limit = 60
                    for i, f in enumerate(formats_any[:limit], start=1):
                        if not isinstance(f, dict):
                            continue
                        fid = str(f.get("format_id") or "")
                        ext = str(f.get("ext") or "")
                        note = f.get("format_note") or f.get("format") or ""
                        vcodec = str(f.get("vcodec") or "")
                        acodec = str(f.get("acodec") or "")
                        size = f.get("filesize") or f.get("filesize_approx")
                        res = str(f.get("resolution") or "")
                        if not res:
                            try:
                                w = f.get("width")
                                h = f.get("height")
                                if w and h:
                                    res = f"{int(w)}x{int(h)}"
                                elif h:
                                    res = f"{int(h)}p"
                            except Exception:
                                res = ""
                        _append_helper_log(
                            f"[ytdlp-format {i:02d}] id={fid} ext={ext} res={res} note={note} codecs={vcodec}/{acodec} size={size}"
                        )
                    if count > limit:
                        _append_helper_log(f"[ytdlp-formats] (truncated; total={count})")
            except Exception:
                pass

            # Optional: dump the full extracted JSON for inspection.
            try:
                dump = os.environ.get("MEDEIA_MPV_YTDLP_DUMP", "").strip()
                if dump and dump != "0" and isinstance(info, dict):
                    h = hashlib.sha1(url.encode("utf-8", errors="replace")).hexdigest()[:10]
                    out_path = _repo_root() / "Log" / f"ytdlp-probe-{h}.json"
                    out_path.write_text(json.dumps(info, ensure_ascii=False, indent=2), encoding="utf-8", errors="replace")
                    _append_helper_log(f"[ytdlp-formats] wrote probe json: {out_path}")
            except Exception:
                pass

            if not isinstance(info, dict):
                return {
                    "success": False,
                    "stdout": "",
                    "stderr": "",
                    "error": "yt-dlp returned non-dict info",
                    "table": None,
                }

            formats = info.get("formats")
            if not isinstance(formats, list) or not formats:
                return {
                    "success": True,
                    "stdout": "",
                    "stderr": "",
                    "error": None,
                    "table": {"title": "Formats", "rows": []},
                }

            rows = []
            for fmt in formats:
                if not isinstance(fmt, dict):
                    continue
                format_id = str(fmt.get("format_id") or "").strip()
                if not format_id:
                    continue

                # Prefer human-ish resolution.
                resolution = str(fmt.get("resolution") or "").strip()
                if not resolution:
                    w = fmt.get("width")
                    h = fmt.get("height")
                    try:
                        if w and h:
                            resolution = f"{int(w)}x{int(h)}"
                        elif h:
                            resolution = f"{int(h)}p"
                    except Exception:
                        resolution = ""

                ext = str(fmt.get("ext") or "").strip()
                size = _format_bytes(fmt.get("filesize") or fmt.get("filesize_approx"))

                # Build selection args compatible with MPV Lua picker.
                selection_args = ["-format", format_id]

                rows.append(
                    {
                        "columns": [
                            {"name": "ID", "value": format_id},
                            {"name": "Resolution", "value": resolution or ""},
                            {"name": "Ext", "value": ext or ""},
                            {"name": "Size", "value": size or ""},
                        ],
                        "selection_args": selection_args,
                    }
                )

            return {
                "success": True,
                "stdout": "",
                "stderr": "",
                "error": None,
                "table": {"title": "Formats", "rows": rows},
            }
        except Exception as exc:
            return {
                "success": False,
                "stdout": "",
                "stderr": "",
                "error": f"{type(exc).__name__}: {exc}",
                "table": None,
            }

    return {
        "success": False,
        "stdout": "",
        "stderr": "",
        "error": f"Unknown op: {op_name}",
        "table": None,
    }


def _helper_log_path() -> Path:
    try:
        d = _repo_root() / "Log"
        d.mkdir(parents=True, exist_ok=True)
        return d / "medeia-mpv-helper.log"
    except Exception:
        return Path(tempfile.gettempdir()) / "medeia-mpv-helper.log"


def _append_helper_log(text: str) -> None:
    payload = (text or "").rstrip()
    if not payload:
        return
    try:
        path = _helper_log_path()
        path.parent.mkdir(parents=True, exist_ok=True)
        with open(path, "a", encoding="utf-8", errors="replace") as fh:
            fh.write(payload + "\n")
    except Exception:
        return


def _acquire_ipc_lock(ipc_path: str) -> Optional[Any]:
    """Best-effort singleton lock per IPC path.

    Multiple helpers subscribing to mpv log-message events causes duplicated
    log output. Use a tiny file lock to ensure one helper per mpv instance.
    """
    try:
        safe = re.sub(r"[^a-zA-Z0-9_.-]+", "_", str(ipc_path or ""))
        if not safe:
            safe = "mpv"
        # Keep lock files out of the repo's Log/ directory to avoid clutter.
        lock_dir = Path(tempfile.gettempdir()) / "medeia-mpv-helper"
        lock_dir.mkdir(parents=True, exist_ok=True)
        lock_path = lock_dir / f"medeia-mpv-helper-{safe}.lock"
        fh = open(lock_path, "a+", encoding="utf-8", errors="replace")

        if os.name == "nt":
            try:
                import msvcrt  # type: ignore

                fh.seek(0)
                msvcrt.locking(fh.fileno(), msvcrt.LK_NBLCK, 1)
            except Exception:
                try:
                    fh.close()
                except Exception:
                    pass
                return None
        else:
            try:
                import fcntl  # type: ignore

                fcntl.flock(fh.fileno(), fcntl.LOCK_EX | fcntl.LOCK_NB)
            except Exception:
                try:
                    fh.close()
                except Exception:
                    pass
                return None

        return fh
    except Exception:
        return None


def _parse_request(data: Any) -> Optional[Dict[str, Any]]:
    if data is None:
        return None
    if isinstance(data, str):
        text = data.strip()
        if not text:
            return None
        try:
            obj = json.loads(text)
        except Exception:
            return None
        return obj if isinstance(obj, dict) else None
    if isinstance(data, dict):
        return data
    return None


def main(argv: Optional[list[str]] = None) -> int:
    parser = argparse.ArgumentParser(prog="mpv-pipeline-helper")
    parser.add_argument("--ipc", required=True, help="mpv --input-ipc-server path")
    parser.add_argument("--timeout", type=float, default=15.0)
    args = parser.parse_args(argv)

    # Load config once and configure logging similar to CLI.pipeline.
    try:
        cfg = load_config() or {}
    except Exception:
        cfg = {}

    try:
        debug_enabled = bool(isinstance(cfg, dict) and cfg.get("debug", False))
        set_debug(debug_enabled)

        if debug_enabled:
            logging.basicConfig(
                level=logging.DEBUG,
                format='[%(name)s] %(levelname)s: %(message)s',
                stream=sys.stderr,
            )
            for noisy in ("httpx", "httpcore", "httpcore.http11", "httpcore.connection"):
                try:
                    logging.getLogger(noisy).setLevel(logging.WARNING)
                except Exception:
                    pass
    except Exception:
        pass

    # Ensure all in-process cmdlets that talk to MPV pick up the exact IPC server
    # path used by this helper (which comes from the running MPV instance).
    os.environ["MEDEIA_MPV_IPC"] = str(args.ipc)

    # Ensure single helper instance per ipc.
    _lock_fh = _acquire_ipc_lock(str(args.ipc))
    if _lock_fh is None:
        _append_helper_log(f"[helper] another instance already holds lock for ipc={args.ipc}; exiting")
        return 0

    try:
        _append_helper_log(f"[helper] version={MEDEIA_MPV_HELPER_VERSION} started ipc={args.ipc}")
        debug(f"[mpv-helper] logging to: {_helper_log_path()}")
    except Exception:
        pass

    # Route SYS.logger output into the helper log file so diagnostics are not
    # lost in mpv's console/terminal output.
    try:
        class _HelperLogStream:
            def __init__(self) -> None:
                self._pending = ""

            def write(self, s: str) -> int:
                if not s:
                    return 0
                text = self._pending + str(s)
                lines = text.splitlines(keepends=True)
                if lines and not lines[-1].endswith(("\n", "\r")):
                    self._pending = lines[-1]
                    lines = lines[:-1]
                else:
                    self._pending = ""
                for line in lines:
                    payload = line.rstrip("\r\n")
                    if payload:
                        _append_helper_log("[py] " + payload)
                return len(s)

            def flush(self) -> None:
                if self._pending:
                    _append_helper_log("[py] " + self._pending.rstrip("\r\n"))
                    self._pending = ""

        set_thread_stream(_HelperLogStream())
    except Exception:
        pass

    # Prefer a stable repo-local log folder for discoverability.
    error_log_dir = _repo_root() / "Log"
    try:
        error_log_dir.mkdir(parents=True, exist_ok=True)
    except Exception:
        error_log_dir = Path(tempfile.gettempdir())
    last_error_log = error_log_dir / "medeia-mpv-pipeline-last-error.log"

    def _write_error_log(text: str, *, req_id: str) -> Optional[str]:
        try:
            error_log_dir.mkdir(parents=True, exist_ok=True)
        except Exception:
            pass

        payload = (text or "").strip()
        if not payload:
            return None

        stamped = error_log_dir / f"medeia-mpv-pipeline-error-{req_id}.log"
        try:
            stamped.write_text(payload, encoding="utf-8", errors="replace")
        except Exception:
            stamped = None

        try:
            last_error_log.write_text(payload, encoding="utf-8", errors="replace")
        except Exception:
            pass

        return str(stamped) if stamped else str(last_error_log)

    # Connect to mpv's JSON IPC. On Windows, the pipe can exist but reject opens
    # briefly during startup; also mpv may create the IPC server slightly after
    # the Lua script launches us. Retry until timeout.
    connect_deadline = time.time() + max(0.5, float(args.timeout))
    last_connect_error: Optional[str] = None

    client = MPVIPCClient(socket_path=args.ipc, timeout=0.5, silent=True)
    while True:
        try:
            if client.connect():
                break
        except Exception as exc:
            last_connect_error = f"{type(exc).__name__}: {exc}"

        if time.time() > connect_deadline:
            _append_helper_log(f"[helper] failed to connect ipc={args.ipc} error={last_connect_error or 'timeout'}")
            return 2

        # Keep trying.
        time.sleep(0.10)

    # Mark ready ASAP and keep it fresh.
    # Use a unix timestamp so the Lua side can treat it as a heartbeat.
    last_ready_ts: float = 0.0

    def _touch_ready() -> None:
        nonlocal last_ready_ts
        now = time.time()
        # Throttle updates to reduce IPC chatter.
        if (now - last_ready_ts) < 0.75:
            return
        try:
            client.send_command_no_wait(["set_property", READY_PROP, str(int(now))])
            last_ready_ts = now
        except Exception:
            return

    _touch_ready()

    # Mirror mpv's own log messages into our helper log file so debugging does
    # not depend on the mpv on-screen console or mpv's log-file.
    try:
        # IMPORTANT: mpv debug logs can be extremely chatty (especially ytdl_hook)
        # and can starve request handling. Default to warn unless explicitly overridden.
        level = os.environ.get("MEDEIA_MPV_HELPER_MPVLOG", "").strip() or "warn"
        client.send_command_no_wait(["request_log_messages", level])
        _append_helper_log(f"[helper] requested mpv log messages level={level}")
    except Exception:
        pass

    # De-dup/throttle mpv log-message lines (mpv and yt-dlp can be very chatty).
    last_mpv_line: Optional[str] = None
    last_mpv_count: int = 0
    last_mpv_ts: float = 0.0

    def _flush_mpv_repeat() -> None:
        nonlocal last_mpv_line, last_mpv_count
        if last_mpv_line and last_mpv_count > 1:
            _append_helper_log(f"[mpv] (previous line repeated {last_mpv_count}x)")
        last_mpv_line = None
        last_mpv_count = 0

    # Observe request property changes.
    try:
        client.send_command_no_wait(["observe_property", OBS_ID_REQUEST, REQUEST_PROP, "string"])
    except Exception:
        return 3

    last_seen_id: Optional[str] = None

    try:
        _append_helper_log(f"[helper] connected to ipc={args.ipc}")
    except Exception:
        pass

    while True:
        msg = client.read_message(timeout=0.25)
        if msg is None:
            # Keep READY fresh even when idle (Lua may clear it on timeouts).
            _touch_ready()
            time.sleep(0.02)
            continue

        if msg.get("event") == "__eof__":
            try:
                _flush_mpv_repeat()
            except Exception:
                pass
            return 0

        if msg.get("event") == "log-message":
            try:
                level = str(msg.get("level") or "")
                prefix = str(msg.get("prefix") or "")
                text = str(msg.get("text") or "").rstrip()

                if not text:
                    continue

                # Filter excessive noise unless debug is enabled.
                if not debug_enabled:
                    lower_prefix = prefix.lower()
                    if "quic" in lower_prefix and "DEBUG:" in text:
                        continue
                    # Suppress progress-bar style lines (keep true errors).
                    if ("ETA" in text or "%" in text) and ("ERROR:" not in text and "WARNING:" not in text):
                        # Typical yt-dlp progress bar line.
                        if text.lstrip().startswith("["):
                            continue

                line = f"[mpv {level}] {prefix} {text}".strip()

                now = time.time()
                if last_mpv_line == line and (now - last_mpv_ts) < 2.0:
                    last_mpv_count += 1
                    last_mpv_ts = now
                    continue

                _flush_mpv_repeat()
                last_mpv_line = line
                last_mpv_count = 1
                last_mpv_ts = now
                _append_helper_log(line)
            except Exception:
                pass
            continue

        if msg.get("event") != "property-change":
            continue

        if msg.get("id") != OBS_ID_REQUEST:
            continue

        raw = msg.get("data")
        req = _parse_request(raw)
        if not req:
            try:
                if isinstance(raw, str) and raw.strip():
                    snippet = raw.strip().replace("\r", "").replace("\n", " ")
                    if len(snippet) > 220:
                        snippet = snippet[:220] + "…"
                    _append_helper_log(f"[request-raw] could not parse request json: {snippet}")
            except Exception:
                pass
            continue

        req_id = str(req.get("id") or "")
        op = str(req.get("op") or "").strip()
        data = req.get("data")
        pipeline_text = str(req.get("pipeline") or "").strip()
        seeds = req.get("seeds")

        if not req_id:
            continue

        if last_seen_id == req_id:
            continue
        last_seen_id = req_id

        try:
            label = pipeline_text if pipeline_text else (op and ("op=" + op) or "(empty)")
            _append_helper_log(f"\n[request {req_id}] {label}")
        except Exception:
            pass

        try:
            if op:
                run = _run_op(op, data)
            else:
                if not pipeline_text:
                    continue
                run = _run_pipeline(pipeline_text, seeds=seeds)

            resp = {
                "id": req_id,
                "success": bool(run.get("success")),
                "stdout": run.get("stdout", ""),
                "stderr": run.get("stderr", ""),
                "error": run.get("error"),
                "table": run.get("table"),
            }
            if "choices" in run:
                resp["choices"] = run.get("choices")
        except Exception as exc:
            resp = {
                "id": req_id,
                "success": False,
                "stdout": "",
                "stderr": "",
                "error": f"{type(exc).__name__}: {exc}",
                "table": None,
            }

        # Persist helper output for debugging MPV menu interactions.
        try:
            if resp.get("stdout"):
                _append_helper_log("[stdout]\n" + str(resp.get("stdout")))
            if resp.get("stderr"):
                _append_helper_log("[stderr]\n" + str(resp.get("stderr")))
            if resp.get("error"):
                _append_helper_log("[error]\n" + str(resp.get("error")))
        except Exception:
            pass

        if not resp.get("success"):
            details = ""
            if resp.get("error"):
                details += str(resp.get("error"))
            if resp.get("stderr"):
                details = (details + "\n" if details else "") + str(resp.get("stderr"))
            log_path = _write_error_log(details, req_id=req_id)
            if log_path:
                resp["log_path"] = log_path

        try:
            # IMPORTANT: don't wait for a response here; waiting would consume
            # async events and can drop/skip property-change notifications.
            client.send_command_no_wait(["set_property", RESPONSE_PROP, json.dumps(resp, ensure_ascii=False)])
        except Exception:
            # If posting results fails, there's nothing more useful to do.
            pass


if __name__ == "__main__":
    raise SystemExit(main())