h

2026-01-18 10:50:42 -08:00
parent 66132811e0
commit 66e6c6eb72
34 changed files with 718 additions and 516 deletions
--- a/SYS/cli_parsing.py
+++ b/SYS/cli_parsing.py
@@ -0,0 +1,460 @@
+"""CLI parsing helpers moved out of `CLI.py`.
+
+Contains selection parsing and the REPL lexer so `CLI.py` can be smaller and
+these pure helpers are easier to test.
+"""
+
+from __future__ import annotations
+
+import re
+from typing import Any, Dict, List, Optional, Set, Tuple
+
+# Prompt-toolkit lexer types are optional at import time; fall back to lightweight
+# stubs if prompt_toolkit is not available so imports remain safe for testing.
+try:
+    from prompt_toolkit.document import Document
+    from prompt_toolkit.lexers import Lexer
+except Exception:  # pragma: no cover - optional dependency
+    Document = object  # type: ignore
+
+    class Lexer:  # simple fallback base
+        pass
+
+
+class SelectionSyntax:
+    """Parses @ selection syntax into 1-based indices."""
+
+    _RANGE_RE = re.compile(r"^[0-9\-]+$")
+
+    @staticmethod
+    def parse(token: str) -> Optional[Set[int]]:
+        """Return 1-based indices or None when not a concrete selection.
+
+        Concrete selections:
+        - @2
+        - @2-5
+        - @{1,3,5}
+        - @2,5,7-9
+
+        Special (non-concrete) selectors return None:
+        - @* (select all)
+        - @.. (history prev)
+        - @,, (history next)
+        """
+
+        if not token or not token.startswith("@"):
+            return None
+
+        selector = token[1:].strip()
+        if selector in (".", ",", "*"):
+            return None
+
+        if selector.startswith("{") and selector.endswith("}"):
+            selector = selector[1:-1].strip()
+
+        indices: Set[int] = set()
+        for part in selector.split(","):
+            part = part.strip()
+            if not part:
+                continue
+
+            if "-" in part:
+                pieces = part.split("-", 1)
+                if len(pieces) != 2:
+                    return None
+                start_str = pieces[0].strip()
+                end_str = pieces[1].strip()
+                if not start_str or not end_str:
+                    return None
+                try:
+                    start = int(start_str)
+                    end = int(end_str)
+                except ValueError:
+                    return None
+                if start <= 0 or end <= 0 or start > end:
+                    return None
+                indices.update(range(start, end + 1))
+                continue
+
+            try:
+                value = int(part)
+            except ValueError:
+                return None
+            if value <= 0:
+                return None
+            indices.add(value)
+
+        return indices if indices else None
+
+
+class SelectionFilterSyntax:
+    """Parses and applies @"COL:filter" selection filters.
+
+    Notes:
+    - CLI tokenization (shlex) strips quotes, so a user input of `@"TITLE:foo"`
+      arrives as `@TITLE:foo`. We support both forms.
+    - Filters apply to the *current selectable table items* (in-memory), not to
+      provider searches.
+    """
+
+    _OP_RE = re.compile(r"^(>=|<=|!=|==|>|<|=)\s*(.+)$")
+    _DUR_TOKEN_RE = re.compile(r"(?i)(\d+)\s*([hms])")
+
+    @staticmethod
+    def parse(token: str) -> Optional[List[Tuple[str, str]]]:
+        """Return list of (column, raw_expression) or None when not a filter token."""
+
+        if not token or not str(token).startswith("@"):
+            return None
+
+        if token.strip() == "@*":
+            return None
+
+        # If this is a concrete numeric selection (@2, @1-3, @{1,3}), do not treat it as a filter.
+        try:
+            if SelectionSyntax.parse(str(token)) is not None:
+                return None
+        except Exception:
+            pass
+
+        raw = str(token)[1:].strip()
+        if not raw:
+            return None
+
+        # If quotes survived tokenization, strip a single symmetric wrapper.
+        if len(raw) >= 2 and raw[0] == raw[-1] and raw[0] in ('"', "'"):
+            raw = raw[1:-1].strip()
+
+        # Shorthand: @"foo" means Title contains "foo".
+        if ":" not in raw:
+            if raw:
+                return [("Title", raw)]
+            return None
+
+        parts = [p.strip() for p in raw.split(",") if p.strip()]
+        conditions: List[Tuple[str, str]] = []
+        for part in parts:
+            if ":" not in part:
+                return None
+            col, expr = part.split(":", 1)
+            col = str(col or "").strip()
+            expr = str(expr or "").strip()
+            if not col:
+                return None
+            conditions.append((col, expr))
+
+        return conditions if conditions else None
+
+    @staticmethod
+    def _norm_key(text: str) -> str:
+        return re.sub(r"\s+", " ", str(text or "").strip().lower())
+
+    @staticmethod
+    def _item_column_map(item: Any) -> Dict[str, str]:
+        out: Dict[str, str] = {}
+
+        def _set(k: Any, v: Any) -> None:
+            key = SelectionFilterSyntax._norm_key(str(k or ""))
+            if not key:
+                return
+            if v is None:
+                return
+            try:
+                if isinstance(v, (list, tuple, set)):
+                    text = ", ".join(str(x) for x in v if x is not None)
+                else:
+                    text = str(v)
+            except Exception:
+                return
+            out[key] = text
+
+        if isinstance(item, dict):
+            # Display columns (primary UX surface)
+            cols = item.get("columns")
+            if isinstance(cols, list):
+                for pair in cols:
+                    try:
+                        if isinstance(pair, (list, tuple)) and len(pair) == 2:
+                            _set(pair[0], pair[1])
+                    except Exception:
+                        continue
+            # Direct keys as fallback
+            for k, v in item.items():
+                if k == "columns":
+                    continue
+                _set(k, v)
+        else:
+            cols = getattr(item, "columns", None)
+            if isinstance(cols, list):
+                for pair in cols:
+                    try:
+                        if isinstance(pair, (list, tuple)) and len(pair) == 2:
+                            _set(pair[0], pair[1])
+                    except Exception:
+                        continue
+            for k in ("title", "path", "detail", "provider", "store", "table"):
+                try:
+                    _set(k, getattr(item, k, None))
+                except Exception:
+                    pass
+
+        return out
+
+    @staticmethod
+    def _parse_duration_seconds(text: str) -> Optional[int]:
+        s = str(text or "").strip()
+        if not s:
+            return None
+
+        if s.isdigit():
+            try:
+                return max(0, int(s))
+            except Exception:
+                return None
+
+        # clock format: M:SS or H:MM:SS
+        if ":" in s:
+            parts = [p.strip() for p in s.split(":")]
+            if len(parts) == 2 and all(p.isdigit() for p in parts):
+                m, sec = parts
+                return max(0, int(m) * 60 + int(sec))
+            if len(parts) == 3 and all(p.isdigit() for p in parts):
+                h, m, sec = parts
+                return max(0, int(h) * 3600 + int(m) * 60 + int(sec))
+
+        # token format: 1h2m3s (tokens can appear in any combination)
+        total = 0
+        found = False
+        for m in SelectionFilterSyntax._DUR_TOKEN_RE.finditer(s):
+            found = True
+            n = int(m.group(1))
+            unit = m.group(2).lower()
+            if unit == "h":
+                total += n * 3600
+            elif unit == "m":
+                total += n * 60
+            elif unit == "s":
+                total += n
+        if found:
+            return max(0, int(total))
+
+        return None
+
+    @staticmethod
+    def _parse_float(text: str) -> Optional[float]:
+        s = str(text or "").strip()
+        if not s:
+            return None
+        s = s.replace(",", "")
+        try:
+            return float(s)
+        except Exception:
+            return None
+
+    @staticmethod
+    def _parse_op(expr: str) -> Tuple[Optional[str], str]:
+        text = str(expr or "").strip()
+        if not text:
+            return None, ""
+        m = SelectionFilterSyntax._OP_RE.match(text)
+        if not m:
+            return None, text
+        return m.group(1), str(m.group(2) or "").strip()
+
+    @staticmethod
+    def matches(item: Any, conditions: List[Tuple[str, str]]) -> bool:
+        colmap = SelectionFilterSyntax._item_column_map(item)
+
+        for col, expr in conditions:
+            key = SelectionFilterSyntax._norm_key(col)
+            actual = colmap.get(key)
+
+            # Convenience aliases for common UX names.
+            if actual is None:
+                if key == "duration":
+                    actual = colmap.get("duration")
+                elif key == "title":
+                    actual = colmap.get("title")
+
+            if actual is None:
+                return False
+
+            op, rhs = SelectionFilterSyntax._parse_op(expr)
+            left_text = str(actual or "").strip()
+            right_text = str(rhs or "").strip()
+
+            if op is None:
+                if not right_text:
+                    return False
+                if right_text.lower() not in left_text.lower():
+                    return False
+                continue
+
+            # Comparator: try duration parsing first when it looks time-like.
+            prefer_duration = (
+                key == "duration"
+                or any(ch in right_text for ch in (":", "h", "m", "s"))
+                or any(ch in left_text for ch in (":", "h", "m", "s"))
+            )
+
+            left_num: Optional[float] = None
+            right_num: Optional[float] = None
+
+            if prefer_duration:
+                ldur = SelectionFilterSyntax._parse_duration_seconds(left_text)
+                rdur = SelectionFilterSyntax._parse_duration_seconds(right_text)
+                if ldur is not None and rdur is not None:
+                    left_num = float(ldur)
+                    right_num = float(rdur)
+
+            if left_num is None or right_num is None:
+                left_num = SelectionFilterSyntax._parse_float(left_text)
+                right_num = SelectionFilterSyntax._parse_float(right_text)
+
+            if left_num is not None and right_num is not None:
+                if op in ("=", "=="):
+                    if not (left_num == right_num):
+                        return False
+                elif op == "!=":
+                    if not (left_num != right_num):
+                        return False
+                elif op == ">":
+                    if not (left_num > right_num):
+                        return False
+                elif op == ">=":
+                    if not (left_num >= right_num):
+                        return False
+                elif op == "<":
+                    if not (left_num < right_num):
+                        return False
+                elif op == "<=":
+                    if not (left_num <= right_num):
+                        return False
+                else:
+                    return False
+                continue
+
+            # Fallback to string equality for =/!= when numeric parsing fails.
+            if op in ("=", "=="):
+                if left_text.lower() != right_text.lower():
+                    return False
+            elif op == "!=":
+                if left_text.lower() == right_text.lower():
+                    return False
+            else:
+                return False
+
+        return True
+
+
+class MedeiaLexer(Lexer):
+    def lex_document(self, document: "Document"):  # type: ignore[override]
+
+        def get_line(lineno: int):
+            line = document.lines[lineno]
+            tokens: List[tuple[str, str]] = []
+
+            pattern = re.compile(
+                r"""
+                (\s+) |                                      # 1. Whitespace
+                (\|) |                                       # 2. Pipe
+                ("(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*') |      # 3. Quoted string
+                ([^\s\|]+)                                   # 4. Word
+                """,
+                re.VERBOSE,
+            )
+
+            is_cmdlet = True
+
+            def _emit_keyed_value(word: str) -> bool:
+                """Emit `key:` prefixes (comma-separated) as argument tokens.
+
+                Designed for values like:
+                  clip:3m4s-3m14s,1h22m-1h33m,item:2-3
+
+                Avoids special-casing URLs (://) and Windows drive paths (C:\\...).
+                Returns True if it handled the token.
+                """
+                if not word or ":" not in word:
+                    return False
+                # Avoid URLs and common scheme patterns.
+                if "://" in word:
+                    return False
+                # Avoid Windows drive paths (e.g., C:\\foo or D:/bar)
+                if re.match(r"^[A-Za-z]:[\\/]", word):
+                    return False
+
+                key_prefix = re.compile(r"^([A-Za-z_][A-Za-z0-9_-]*:)(.*)$")
+                parts = word.split(",")
+                handled_any = False
+                for i, part in enumerate(parts):
+                    if i > 0:
+                        tokens.append(("class:value", ","))
+                    if part == "":
+                        continue
+                    m = key_prefix.match(part)
+                    if m:
+                        tokens.append(("class:argument", m.group(1)))
+                        if m.group(2):
+                            tokens.append(("class:value", m.group(2)))
+                        handled_any = True
+                    else:
+                        tokens.append(("class:value", part))
+                        handled_any = True
+
+                return handled_any
+
+            for match in pattern.finditer(line):
+                ws, pipe, quote, word = match.groups()
+                if ws:
+                    tokens.append(("", ws))
+                    continue
+                if pipe:
+                    tokens.append(("class:pipe", pipe))
+                    is_cmdlet = True
+                    continue
+                if quote:
+                    # If the quoted token contains a keyed spec (clip:/item:/hash:),
+                    # highlight the `key:` portion in argument-blue even inside quotes.
+                    if len(quote) >= 2 and quote[0] == quote[-1] and quote[0] in ('"', "'"):
+                        q = quote[0]
+                        inner = quote[1:-1]
+                        start_index = len(tokens)
+                        if _emit_keyed_value(inner):
+                            # _emit_keyed_value already appended tokens for inner; insert opening quote
+                            # before that chunk, then add the closing quote.
+                            tokens.insert(start_index, ("class:string", q))
+                            tokens.append(("class:string", q))
+                            is_cmdlet = False
+                            continue
+
+                    tokens.append(("class:string", quote))
+                    is_cmdlet = False
+                    continue
+                if not word:
+                    continue
+
+                if word.startswith("@"):  # selection tokens
+                    rest = word[1:]
+                    if rest and re.fullmatch(r"[0-9\-\*,]+", rest):
+                        tokens.append(("class:selection_at", "@"))
+                        tokens.append(("class:selection_range", rest))
+                        is_cmdlet = False
+                        continue
+                    if rest == "":
+                        tokens.append(("class:selection_at", "@"))
+                        is_cmdlet = False
+                        continue
+
+                if is_cmdlet:
+                    tokens.append(("class:cmdlet", word))
+                    is_cmdlet = False
+                elif word.startswith("-"):
+                    tokens.append(("class:argument", word))
+                else:
+                    if not _emit_keyed_value(word):
+                        tokens.append(("class:value", word))
+
+            return tokens
+
+        return get_line