Files
Medios-Macina/SYS/cli_parsing.py
2026-01-31 19:57:09 -08:00

475 lines
16 KiB
Python

"""CLI parsing helpers moved out of `CLI.py`.
Contains selection parsing and the REPL lexer so `CLI.py` can be smaller and
these pure helpers are easier to test.
"""
from __future__ import annotations
import re
from typing import Any, Callable, Dict, List, Optional, Set, Tuple
from SYS.logger import debug
# Prompt-toolkit lexer types are optional at import time; fall back to lightweight
# stubs if prompt_toolkit is not available so imports remain safe for testing.
try:
from prompt_toolkit.document import Document
from prompt_toolkit.lexers import Lexer as _PTK_Lexer
except Exception: # pragma: no cover - optional dependency
Document = object # type: ignore
# Fallback to a simple object when prompt_toolkit is not available
_PTK_Lexer = object # type: ignore
# Expose a stable name used by the rest of the module
Lexer = _PTK_Lexer
# Pre-compiled regexes for the lexer (avoid recompiling on every call)
TOKEN_PATTERN = re.compile(
r"""
(\s+) | # 1. Whitespace
(\|) | # 2. Pipe
("(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*') | # 3. Quoted string
([^\s\|]+) # 4. Word
""",
re.VERBOSE,
)
KEY_PREFIX_RE = re.compile(r"^([A-Za-z_][A-Za-z0-9_-]*:)(.*)$")
SELECTION_RANGE_RE = re.compile(r"^[0-9\-\*,]+$")
DRIVE_RE = re.compile(r"^[A-Za-z]:[\\/]")
class SelectionSyntax:
"""Parses @ selection syntax into 1-based indices."""
_RANGE_RE = re.compile(r"^[0-9\-]+$")
@staticmethod
def parse(token: str) -> Optional[Set[int]]:
"""Return 1-based indices or None when not a concrete selection.
Concrete selections:
- @2
- @2-5
- @{1,3,5}
- @2,5,7-9
Special (non-concrete) selectors return None:
- @* (select all)
- @.. (history prev)
- @,, (history next)
"""
if not token or not token.startswith("@"):
return None
selector = token[1:].strip()
if selector in (".", ",", "*"):
return None
if selector.startswith("{") and selector.endswith("}"):
selector = selector[1:-1].strip()
indices: Set[int] = set()
for part in selector.split(","):
part = part.strip()
if not part:
continue
if "-" in part:
pieces = part.split("-", 1)
if len(pieces) != 2:
return None
start_str = pieces[0].strip()
end_str = pieces[1].strip()
if not start_str or not end_str:
return None
try:
start = int(start_str)
end = int(end_str)
except ValueError:
return None
if start <= 0 or end <= 0 or start > end:
return None
indices.update(range(start, end + 1))
continue
try:
value = int(part)
except ValueError:
return None
if value <= 0:
return None
indices.add(value)
return indices if indices else None
class SelectionFilterSyntax:
"""Parses and applies @"COL:filter" selection filters.
Notes:
- CLI tokenization (shlex) strips quotes, so a user input of `@"TITLE:foo"`
arrives as `@TITLE:foo`. We support both forms.
- Filters apply to the *current selectable table items* (in-memory), not to
provider searches.
"""
_OP_RE = re.compile(r"^(>=|<=|!=|==|>|<|=)\s*(.+)$")
_DUR_TOKEN_RE = re.compile(r"(?i)(\d+)\s*([hms])")
@staticmethod
def parse(token: str) -> Optional[List[Tuple[str, str]]]:
"""Return list of (column, raw_expression) or None when not a filter token."""
if not token or not str(token).startswith("@"):
return None
if token.strip() == "@*":
return None
# If this is a concrete numeric selection (@2, @1-3, @{1,3}), do not treat it as a filter.
try:
if SelectionSyntax.parse(str(token)) is not None:
return None
except Exception as exc:
debug("SelectionSyntax.parse failed during filter detection: %s", exc, exc_info=True)
raw = str(token)[1:].strip()
if not raw:
return None
# If quotes survived tokenization, strip a single symmetric wrapper.
if len(raw) >= 2 and raw[0] == raw[-1] and raw[0] in ('"', "'"):
raw = raw[1:-1].strip()
# Shorthand: @"foo" means Title contains "foo".
if ":" not in raw:
if raw:
return [("Title", raw)]
return None
parts = [p.strip() for p in raw.split(",") if p.strip()]
conditions: List[Tuple[str, str]] = []
for part in parts:
if ":" not in part:
return None
col, expr = part.split(":", 1)
col = str(col or "").strip()
expr = str(expr or "").strip()
if not col:
return None
conditions.append((col, expr))
return conditions if conditions else None
@staticmethod
def _norm_key(text: str) -> str:
return re.sub(r"\s+", " ", str(text or "").strip().lower())
@staticmethod
def _item_column_map(item: Any) -> Dict[str, str]:
out: Dict[str, str] = {}
def _set(k: Any, v: Any) -> None:
key = SelectionFilterSyntax._norm_key(str(k or ""))
if not key:
return
if v is None:
return
try:
if isinstance(v, (list, tuple, set)):
text = ", ".join(str(x) for x in v if x is not None)
else:
text = str(v)
except Exception:
return
out[key] = text
if isinstance(item, dict):
# Display columns (primary UX surface)
cols = item.get("columns")
if isinstance(cols, list):
for pair in cols:
try:
if isinstance(pair, (list, tuple)) and len(pair) == 2:
_set(pair[0], pair[1])
except Exception:
continue
# Direct keys as fallback
for k, v in item.items():
if k == "columns":
continue
_set(k, v)
else:
cols = getattr(item, "columns", None)
if isinstance(cols, list):
for pair in cols:
try:
if isinstance(pair, (list, tuple)) and len(pair) == 2:
_set(pair[0], pair[1])
except Exception:
continue
for k in ("title", "path", "detail", "provider", "store", "table"):
try:
_set(k, getattr(item, k, None))
except Exception as exc:
debug("SelectionFilterSyntax: failed to _set attribute %s on item: %s", k, exc, exc_info=True)
return out
@staticmethod
def _parse_duration_seconds(text: str) -> Optional[int]:
s = str(text or "").strip()
if not s:
return None
if s.isdigit():
try:
return max(0, int(s))
except Exception:
return None
# clock format: M:SS or H:MM:SS
if ":" in s:
parts = [p.strip() for p in s.split(":")]
if len(parts) == 2 and all(p.isdigit() for p in parts):
m_str, sec_str = parts
return max(0, int(m_str) * 60 + int(sec_str))
if len(parts) == 3 and all(p.isdigit() for p in parts):
h_str, m_str, sec_str = parts
return max(0, int(h_str) * 3600 + int(m_str) * 60 + int(sec_str))
# token format: 1h2m3s (tokens can appear in any combination)
total = 0
found = False
for match in SelectionFilterSyntax._DUR_TOKEN_RE.finditer(s):
found = True
n = int(match.group(1))
unit = match.group(2).lower()
if unit == "h":
total += n * 3600
elif unit == "m":
total += n * 60
elif unit == "s":
total += n
if found:
return max(0, int(total))
return None
@staticmethod
def _parse_float(text: str) -> Optional[float]:
s = str(text or "").strip()
if not s:
return None
s = s.replace(",", "")
try:
return float(s)
except Exception:
return None
@staticmethod
def _parse_op(expr: str) -> Tuple[Optional[str], str]:
text = str(expr or "").strip()
if not text:
return None, ""
m = SelectionFilterSyntax._OP_RE.match(text)
if not m:
return None, text
return m.group(1), str(m.group(2) or "").strip()
@staticmethod
def matches(item: Any, conditions: List[Tuple[str, str]]) -> bool:
colmap = SelectionFilterSyntax._item_column_map(item)
for col, expr in conditions:
key = SelectionFilterSyntax._norm_key(col)
actual = colmap.get(key)
# Convenience aliases for common UX names.
if actual is None:
if key == "duration":
actual = colmap.get("duration")
elif key == "title":
actual = colmap.get("title")
if actual is None:
return False
op, rhs = SelectionFilterSyntax._parse_op(expr)
left_text = str(actual or "").strip()
right_text = str(rhs or "").strip()
if op is None:
if not right_text:
return False
if right_text.lower() not in left_text.lower():
return False
continue
# Comparator: try duration parsing first when it looks time-like.
prefer_duration = (
key == "duration"
or any(ch in right_text for ch in (":", "h", "m", "s"))
or any(ch in left_text for ch in (":", "h", "m", "s"))
)
left_num: Optional[float] = None
right_num: Optional[float] = None
if prefer_duration:
ldur = SelectionFilterSyntax._parse_duration_seconds(left_text)
rdur = SelectionFilterSyntax._parse_duration_seconds(right_text)
if ldur is not None and rdur is not None:
left_num = float(ldur)
right_num = float(rdur)
if left_num is None or right_num is None:
left_num = SelectionFilterSyntax._parse_float(left_text)
right_num = SelectionFilterSyntax._parse_float(right_text)
if left_num is not None and right_num is not None:
if op in ("=", "=="):
if not (left_num == right_num):
return False
elif op == "!=":
if not (left_num != right_num):
return False
elif op == ">":
if not (left_num > right_num):
return False
elif op == ">=":
if not (left_num >= right_num):
return False
elif op == "<":
if not (left_num < right_num):
return False
elif op == "<=":
if not (left_num <= right_num):
return False
else:
return False
continue
# Fallback to string equality for =/!= when numeric parsing fails.
if op in ("=", "=="):
if left_text.lower() != right_text.lower():
return False
elif op == "!=":
if left_text.lower() == right_text.lower():
return False
else:
return False
return True
class MedeiaLexer(Lexer):
def lex_document(self, document: "Document") -> Callable[[int], List[Tuple[str, str]]]: # type: ignore[override]
def get_line(lineno: int) -> List[Tuple[str, str]]:
"""Return token list for a single input line (used by prompt-toolkit)."""
line = document.lines[lineno]
tokens: List[Tuple[str, str]] = []
# Using TOKEN_PATTERN precompiled at module scope.
is_cmdlet = True
def _emit_keyed_value(word: str) -> bool:
"""Emit `key:` prefixes (comma-separated) as argument tokens.
Designed for values like:
clip:3m4s-3m14s,1h22m-1h33m,item:2-3
Avoids special-casing URLs (://) and Windows drive paths (C:\\...).
Returns True if it handled the token.
"""
if not word or ":" not in word:
return False
# Avoid URLs and common scheme patterns.
if "://" in word:
return False
# Avoid Windows drive paths (e.g., C:\\foo or D:/bar)
if DRIVE_RE.match(word):
return False
parts = word.split(",")
handled_any = False
for i, part in enumerate(parts):
if i > 0:
tokens.append(("class:value", ","))
if part == "":
continue
m = KEY_PREFIX_RE.match(part)
if m:
tokens.append(("class:argument", m.group(1)))
if m.group(2):
tokens.append(("class:value", m.group(2)))
handled_any = True
else:
tokens.append(("class:value", part))
handled_any = True
return handled_any
for match in TOKEN_PATTERN.finditer(line):
ws, pipe, quote, word = match.groups()
if ws:
tokens.append(("", ws))
continue
if pipe:
tokens.append(("class:pipe", pipe))
is_cmdlet = True
continue
if quote:
# If the quoted token contains a keyed spec (clip:/item:/hash:),
# highlight the `key:` portion in argument-blue even inside quotes.
if len(quote) >= 2 and quote[0] == quote[-1] and quote[0] in ('"', "'"):
q = quote[0]
inner = quote[1:-1]
start_index = len(tokens)
if _emit_keyed_value(inner):
tokens.insert(start_index, ("class:string", q))
tokens.append(("class:string", q))
is_cmdlet = False
continue
tokens.append(("class:string", quote))
is_cmdlet = False
continue
if not word:
continue
if word.startswith("@"): # selection tokens
rest = word[1:]
if rest and SELECTION_RANGE_RE.fullmatch(rest):
tokens.append(("class:selection_at", "@"))
tokens.append(("class:selection_range", rest))
is_cmdlet = False
continue
if rest and ":" in rest:
tokens.append(("class:selection_at", "@"))
tokens.append(("class:selection_filter", rest))
is_cmdlet = False
continue
if rest == "":
tokens.append(("class:selection_at", "@"))
is_cmdlet = False
continue
if is_cmdlet:
tokens.append(("class:cmdlet", word))
is_cmdlet = False
elif word.startswith("-"):
tokens.append(("class:argument", word))
else:
if not _emit_keyed_value(word):
tokens.append(("class:value", word))
return tokens
return get_line