h
This commit is contained in:
460
SYS/cli_parsing.py
Normal file
460
SYS/cli_parsing.py
Normal file
@@ -0,0 +1,460 @@
|
||||
"""CLI parsing helpers moved out of `CLI.py`.
|
||||
|
||||
Contains selection parsing and the REPL lexer so `CLI.py` can be smaller and
|
||||
these pure helpers are easier to test.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional, Set, Tuple
|
||||
|
||||
# Prompt-toolkit lexer types are optional at import time; fall back to lightweight
|
||||
# stubs if prompt_toolkit is not available so imports remain safe for testing.
|
||||
try:
|
||||
from prompt_toolkit.document import Document
|
||||
from prompt_toolkit.lexers import Lexer
|
||||
except Exception: # pragma: no cover - optional dependency
|
||||
Document = object # type: ignore
|
||||
|
||||
class Lexer: # simple fallback base
|
||||
pass
|
||||
|
||||
|
||||
class SelectionSyntax:
|
||||
"""Parses @ selection syntax into 1-based indices."""
|
||||
|
||||
_RANGE_RE = re.compile(r"^[0-9\-]+$")
|
||||
|
||||
@staticmethod
|
||||
def parse(token: str) -> Optional[Set[int]]:
|
||||
"""Return 1-based indices or None when not a concrete selection.
|
||||
|
||||
Concrete selections:
|
||||
- @2
|
||||
- @2-5
|
||||
- @{1,3,5}
|
||||
- @2,5,7-9
|
||||
|
||||
Special (non-concrete) selectors return None:
|
||||
- @* (select all)
|
||||
- @.. (history prev)
|
||||
- @,, (history next)
|
||||
"""
|
||||
|
||||
if not token or not token.startswith("@"):
|
||||
return None
|
||||
|
||||
selector = token[1:].strip()
|
||||
if selector in (".", ",", "*"):
|
||||
return None
|
||||
|
||||
if selector.startswith("{") and selector.endswith("}"):
|
||||
selector = selector[1:-1].strip()
|
||||
|
||||
indices: Set[int] = set()
|
||||
for part in selector.split(","):
|
||||
part = part.strip()
|
||||
if not part:
|
||||
continue
|
||||
|
||||
if "-" in part:
|
||||
pieces = part.split("-", 1)
|
||||
if len(pieces) != 2:
|
||||
return None
|
||||
start_str = pieces[0].strip()
|
||||
end_str = pieces[1].strip()
|
||||
if not start_str or not end_str:
|
||||
return None
|
||||
try:
|
||||
start = int(start_str)
|
||||
end = int(end_str)
|
||||
except ValueError:
|
||||
return None
|
||||
if start <= 0 or end <= 0 or start > end:
|
||||
return None
|
||||
indices.update(range(start, end + 1))
|
||||
continue
|
||||
|
||||
try:
|
||||
value = int(part)
|
||||
except ValueError:
|
||||
return None
|
||||
if value <= 0:
|
||||
return None
|
||||
indices.add(value)
|
||||
|
||||
return indices if indices else None
|
||||
|
||||
|
||||
class SelectionFilterSyntax:
|
||||
"""Parses and applies @"COL:filter" selection filters.
|
||||
|
||||
Notes:
|
||||
- CLI tokenization (shlex) strips quotes, so a user input of `@"TITLE:foo"`
|
||||
arrives as `@TITLE:foo`. We support both forms.
|
||||
- Filters apply to the *current selectable table items* (in-memory), not to
|
||||
provider searches.
|
||||
"""
|
||||
|
||||
_OP_RE = re.compile(r"^(>=|<=|!=|==|>|<|=)\s*(.+)$")
|
||||
_DUR_TOKEN_RE = re.compile(r"(?i)(\d+)\s*([hms])")
|
||||
|
||||
@staticmethod
|
||||
def parse(token: str) -> Optional[List[Tuple[str, str]]]:
|
||||
"""Return list of (column, raw_expression) or None when not a filter token."""
|
||||
|
||||
if not token or not str(token).startswith("@"):
|
||||
return None
|
||||
|
||||
if token.strip() == "@*":
|
||||
return None
|
||||
|
||||
# If this is a concrete numeric selection (@2, @1-3, @{1,3}), do not treat it as a filter.
|
||||
try:
|
||||
if SelectionSyntax.parse(str(token)) is not None:
|
||||
return None
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
raw = str(token)[1:].strip()
|
||||
if not raw:
|
||||
return None
|
||||
|
||||
# If quotes survived tokenization, strip a single symmetric wrapper.
|
||||
if len(raw) >= 2 and raw[0] == raw[-1] and raw[0] in ('"', "'"):
|
||||
raw = raw[1:-1].strip()
|
||||
|
||||
# Shorthand: @"foo" means Title contains "foo".
|
||||
if ":" not in raw:
|
||||
if raw:
|
||||
return [("Title", raw)]
|
||||
return None
|
||||
|
||||
parts = [p.strip() for p in raw.split(",") if p.strip()]
|
||||
conditions: List[Tuple[str, str]] = []
|
||||
for part in parts:
|
||||
if ":" not in part:
|
||||
return None
|
||||
col, expr = part.split(":", 1)
|
||||
col = str(col or "").strip()
|
||||
expr = str(expr or "").strip()
|
||||
if not col:
|
||||
return None
|
||||
conditions.append((col, expr))
|
||||
|
||||
return conditions if conditions else None
|
||||
|
||||
@staticmethod
|
||||
def _norm_key(text: str) -> str:
|
||||
return re.sub(r"\s+", " ", str(text or "").strip().lower())
|
||||
|
||||
@staticmethod
|
||||
def _item_column_map(item: Any) -> Dict[str, str]:
|
||||
out: Dict[str, str] = {}
|
||||
|
||||
def _set(k: Any, v: Any) -> None:
|
||||
key = SelectionFilterSyntax._norm_key(str(k or ""))
|
||||
if not key:
|
||||
return
|
||||
if v is None:
|
||||
return
|
||||
try:
|
||||
if isinstance(v, (list, tuple, set)):
|
||||
text = ", ".join(str(x) for x in v if x is not None)
|
||||
else:
|
||||
text = str(v)
|
||||
except Exception:
|
||||
return
|
||||
out[key] = text
|
||||
|
||||
if isinstance(item, dict):
|
||||
# Display columns (primary UX surface)
|
||||
cols = item.get("columns")
|
||||
if isinstance(cols, list):
|
||||
for pair in cols:
|
||||
try:
|
||||
if isinstance(pair, (list, tuple)) and len(pair) == 2:
|
||||
_set(pair[0], pair[1])
|
||||
except Exception:
|
||||
continue
|
||||
# Direct keys as fallback
|
||||
for k, v in item.items():
|
||||
if k == "columns":
|
||||
continue
|
||||
_set(k, v)
|
||||
else:
|
||||
cols = getattr(item, "columns", None)
|
||||
if isinstance(cols, list):
|
||||
for pair in cols:
|
||||
try:
|
||||
if isinstance(pair, (list, tuple)) and len(pair) == 2:
|
||||
_set(pair[0], pair[1])
|
||||
except Exception:
|
||||
continue
|
||||
for k in ("title", "path", "detail", "provider", "store", "table"):
|
||||
try:
|
||||
_set(k, getattr(item, k, None))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
return out
|
||||
|
||||
@staticmethod
|
||||
def _parse_duration_seconds(text: str) -> Optional[int]:
|
||||
s = str(text or "").strip()
|
||||
if not s:
|
||||
return None
|
||||
|
||||
if s.isdigit():
|
||||
try:
|
||||
return max(0, int(s))
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
# clock format: M:SS or H:MM:SS
|
||||
if ":" in s:
|
||||
parts = [p.strip() for p in s.split(":")]
|
||||
if len(parts) == 2 and all(p.isdigit() for p in parts):
|
||||
m, sec = parts
|
||||
return max(0, int(m) * 60 + int(sec))
|
||||
if len(parts) == 3 and all(p.isdigit() for p in parts):
|
||||
h, m, sec = parts
|
||||
return max(0, int(h) * 3600 + int(m) * 60 + int(sec))
|
||||
|
||||
# token format: 1h2m3s (tokens can appear in any combination)
|
||||
total = 0
|
||||
found = False
|
||||
for m in SelectionFilterSyntax._DUR_TOKEN_RE.finditer(s):
|
||||
found = True
|
||||
n = int(m.group(1))
|
||||
unit = m.group(2).lower()
|
||||
if unit == "h":
|
||||
total += n * 3600
|
||||
elif unit == "m":
|
||||
total += n * 60
|
||||
elif unit == "s":
|
||||
total += n
|
||||
if found:
|
||||
return max(0, int(total))
|
||||
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _parse_float(text: str) -> Optional[float]:
|
||||
s = str(text or "").strip()
|
||||
if not s:
|
||||
return None
|
||||
s = s.replace(",", "")
|
||||
try:
|
||||
return float(s)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def _parse_op(expr: str) -> Tuple[Optional[str], str]:
|
||||
text = str(expr or "").strip()
|
||||
if not text:
|
||||
return None, ""
|
||||
m = SelectionFilterSyntax._OP_RE.match(text)
|
||||
if not m:
|
||||
return None, text
|
||||
return m.group(1), str(m.group(2) or "").strip()
|
||||
|
||||
@staticmethod
|
||||
def matches(item: Any, conditions: List[Tuple[str, str]]) -> bool:
|
||||
colmap = SelectionFilterSyntax._item_column_map(item)
|
||||
|
||||
for col, expr in conditions:
|
||||
key = SelectionFilterSyntax._norm_key(col)
|
||||
actual = colmap.get(key)
|
||||
|
||||
# Convenience aliases for common UX names.
|
||||
if actual is None:
|
||||
if key == "duration":
|
||||
actual = colmap.get("duration")
|
||||
elif key == "title":
|
||||
actual = colmap.get("title")
|
||||
|
||||
if actual is None:
|
||||
return False
|
||||
|
||||
op, rhs = SelectionFilterSyntax._parse_op(expr)
|
||||
left_text = str(actual or "").strip()
|
||||
right_text = str(rhs or "").strip()
|
||||
|
||||
if op is None:
|
||||
if not right_text:
|
||||
return False
|
||||
if right_text.lower() not in left_text.lower():
|
||||
return False
|
||||
continue
|
||||
|
||||
# Comparator: try duration parsing first when it looks time-like.
|
||||
prefer_duration = (
|
||||
key == "duration"
|
||||
or any(ch in right_text for ch in (":", "h", "m", "s"))
|
||||
or any(ch in left_text for ch in (":", "h", "m", "s"))
|
||||
)
|
||||
|
||||
left_num: Optional[float] = None
|
||||
right_num: Optional[float] = None
|
||||
|
||||
if prefer_duration:
|
||||
ldur = SelectionFilterSyntax._parse_duration_seconds(left_text)
|
||||
rdur = SelectionFilterSyntax._parse_duration_seconds(right_text)
|
||||
if ldur is not None and rdur is not None:
|
||||
left_num = float(ldur)
|
||||
right_num = float(rdur)
|
||||
|
||||
if left_num is None or right_num is None:
|
||||
left_num = SelectionFilterSyntax._parse_float(left_text)
|
||||
right_num = SelectionFilterSyntax._parse_float(right_text)
|
||||
|
||||
if left_num is not None and right_num is not None:
|
||||
if op in ("=", "=="):
|
||||
if not (left_num == right_num):
|
||||
return False
|
||||
elif op == "!=":
|
||||
if not (left_num != right_num):
|
||||
return False
|
||||
elif op == ">":
|
||||
if not (left_num > right_num):
|
||||
return False
|
||||
elif op == ">=":
|
||||
if not (left_num >= right_num):
|
||||
return False
|
||||
elif op == "<":
|
||||
if not (left_num < right_num):
|
||||
return False
|
||||
elif op == "<=":
|
||||
if not (left_num <= right_num):
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
continue
|
||||
|
||||
# Fallback to string equality for =/!= when numeric parsing fails.
|
||||
if op in ("=", "=="):
|
||||
if left_text.lower() != right_text.lower():
|
||||
return False
|
||||
elif op == "!=":
|
||||
if left_text.lower() == right_text.lower():
|
||||
return False
|
||||
else:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class MedeiaLexer(Lexer):
|
||||
def lex_document(self, document: "Document"): # type: ignore[override]
|
||||
|
||||
def get_line(lineno: int):
|
||||
line = document.lines[lineno]
|
||||
tokens: List[tuple[str, str]] = []
|
||||
|
||||
pattern = re.compile(
|
||||
r"""
|
||||
(\s+) | # 1. Whitespace
|
||||
(\|) | # 2. Pipe
|
||||
("(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*') | # 3. Quoted string
|
||||
([^\s\|]+) # 4. Word
|
||||
""",
|
||||
re.VERBOSE,
|
||||
)
|
||||
|
||||
is_cmdlet = True
|
||||
|
||||
def _emit_keyed_value(word: str) -> bool:
|
||||
"""Emit `key:` prefixes (comma-separated) as argument tokens.
|
||||
|
||||
Designed for values like:
|
||||
clip:3m4s-3m14s,1h22m-1h33m,item:2-3
|
||||
|
||||
Avoids special-casing URLs (://) and Windows drive paths (C:\\...).
|
||||
Returns True if it handled the token.
|
||||
"""
|
||||
if not word or ":" not in word:
|
||||
return False
|
||||
# Avoid URLs and common scheme patterns.
|
||||
if "://" in word:
|
||||
return False
|
||||
# Avoid Windows drive paths (e.g., C:\\foo or D:/bar)
|
||||
if re.match(r"^[A-Za-z]:[\\/]", word):
|
||||
return False
|
||||
|
||||
key_prefix = re.compile(r"^([A-Za-z_][A-Za-z0-9_-]*:)(.*)$")
|
||||
parts = word.split(",")
|
||||
handled_any = False
|
||||
for i, part in enumerate(parts):
|
||||
if i > 0:
|
||||
tokens.append(("class:value", ","))
|
||||
if part == "":
|
||||
continue
|
||||
m = key_prefix.match(part)
|
||||
if m:
|
||||
tokens.append(("class:argument", m.group(1)))
|
||||
if m.group(2):
|
||||
tokens.append(("class:value", m.group(2)))
|
||||
handled_any = True
|
||||
else:
|
||||
tokens.append(("class:value", part))
|
||||
handled_any = True
|
||||
|
||||
return handled_any
|
||||
|
||||
for match in pattern.finditer(line):
|
||||
ws, pipe, quote, word = match.groups()
|
||||
if ws:
|
||||
tokens.append(("", ws))
|
||||
continue
|
||||
if pipe:
|
||||
tokens.append(("class:pipe", pipe))
|
||||
is_cmdlet = True
|
||||
continue
|
||||
if quote:
|
||||
# If the quoted token contains a keyed spec (clip:/item:/hash:),
|
||||
# highlight the `key:` portion in argument-blue even inside quotes.
|
||||
if len(quote) >= 2 and quote[0] == quote[-1] and quote[0] in ('"', "'"):
|
||||
q = quote[0]
|
||||
inner = quote[1:-1]
|
||||
start_index = len(tokens)
|
||||
if _emit_keyed_value(inner):
|
||||
# _emit_keyed_value already appended tokens for inner; insert opening quote
|
||||
# before that chunk, then add the closing quote.
|
||||
tokens.insert(start_index, ("class:string", q))
|
||||
tokens.append(("class:string", q))
|
||||
is_cmdlet = False
|
||||
continue
|
||||
|
||||
tokens.append(("class:string", quote))
|
||||
is_cmdlet = False
|
||||
continue
|
||||
if not word:
|
||||
continue
|
||||
|
||||
if word.startswith("@"): # selection tokens
|
||||
rest = word[1:]
|
||||
if rest and re.fullmatch(r"[0-9\-\*,]+", rest):
|
||||
tokens.append(("class:selection_at", "@"))
|
||||
tokens.append(("class:selection_range", rest))
|
||||
is_cmdlet = False
|
||||
continue
|
||||
if rest == "":
|
||||
tokens.append(("class:selection_at", "@"))
|
||||
is_cmdlet = False
|
||||
continue
|
||||
|
||||
if is_cmdlet:
|
||||
tokens.append(("class:cmdlet", word))
|
||||
is_cmdlet = False
|
||||
elif word.startswith("-"):
|
||||
tokens.append(("class:argument", word))
|
||||
else:
|
||||
if not _emit_keyed_value(word):
|
||||
tokens.append(("class:value", word))
|
||||
|
||||
return tokens
|
||||
|
||||
return get_line
|
||||
Reference in New Issue
Block a user