This commit is contained in:
2026-01-20 03:33:11 -08:00
parent 1f65f9de2a
commit 1e2054189b
4 changed files with 143 additions and 115 deletions

View File

@@ -22,6 +22,20 @@ except Exception: # pragma: no cover - optional dependency
# Expose a stable name used by the rest of the module
Lexer = _PTK_Lexer
# Pre-compiled regexes for the lexer (avoid recompiling on every call)
TOKEN_PATTERN = re.compile(
r"""
(\s+) | # 1. Whitespace
(\|) | # 2. Pipe
("(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*') | # 3. Quoted string
([^\s\|]+) # 4. Word
""",
re.VERBOSE,
)
KEY_PREFIX_RE = re.compile(r"^([A-Za-z_][A-Za-z0-9_-]*:)(.*)$")
SELECTION_RANGE_RE = re.compile(r"^[0-9\-\*,]+$")
DRIVE_RE = re.compile(r"^[A-Za-z]:[\\/]")
class SelectionSyntax:
"""Parses @ selection syntax into 1-based indices."""
@@ -349,114 +363,3 @@ class SelectionFilterSyntax:
return True
class MedeiaLexer(Lexer):
def lex_document(self, document: "Document"): # type: ignore[override]
def get_line(lineno: int):
line = document.lines[lineno]
tokens: List[tuple[str, str]] = []
pattern = re.compile(
r"""
(\s+) | # 1. Whitespace
(\|) | # 2. Pipe
("(?:[^"\\]|\\.)*"|'(?:[^'\\]|\\.)*') | # 3. Quoted string
([^\s\|]+) # 4. Word
""",
re.VERBOSE,
)
is_cmdlet = True
def _emit_keyed_value(word: str) -> bool:
"""Emit `key:` prefixes (comma-separated) as argument tokens.
Designed for values like:
clip:3m4s-3m14s,1h22m-1h33m,item:2-3
Avoids special-casing URLs (://) and Windows drive paths (C:\\...).
Returns True if it handled the token.
"""
if not word or ":" not in word:
return False
# Avoid URLs and common scheme patterns.
if "://" in word:
return False
# Avoid Windows drive paths (e.g., C:\\foo or D:/bar)
if re.match(r"^[A-Za-z]:[\\/]", word):
return False
key_prefix = re.compile(r"^([A-Za-z_][A-Za-z0-9_-]*:)(.*)$")
parts = word.split(",")
handled_any = False
for i, part in enumerate(parts):
if i > 0:
tokens.append(("class:value", ","))
if part == "":
continue
m = key_prefix.match(part)
if m:
tokens.append(("class:argument", m.group(1)))
if m.group(2):
tokens.append(("class:value", m.group(2)))
handled_any = True
else:
tokens.append(("class:value", part))
handled_any = True
return handled_any
for match in pattern.finditer(line):
ws, pipe, quote, word = match.groups()
if ws:
tokens.append(("", ws))
continue
if pipe:
tokens.append(("class:pipe", pipe))
is_cmdlet = True
continue
if quote:
# If the quoted token contains a keyed spec (clip:/item:/hash:),
# highlight the `key:` portion in argument-blue even inside quotes.
if len(quote) >= 2 and quote[0] == quote[-1] and quote[0] in ('"', "'"):
q = quote[0]
inner = quote[1:-1]
start_index = len(tokens)
if _emit_keyed_value(inner):
# _emit_keyed_value already appended tokens for inner; insert opening quote
# before that chunk, then add the closing quote.
tokens.insert(start_index, ("class:string", q))
tokens.append(("class:string", q))
is_cmdlet = False
continue
tokens.append(("class:string", quote))
is_cmdlet = False
continue
if not word:
continue
if word.startswith("@"): # selection tokens
rest = word[1:]
if rest and re.fullmatch(r"[0-9\-\*,]+", rest):
tokens.append(("class:selection_at", "@"))
tokens.append(("class:selection_range", rest))
is_cmdlet = False
continue
if rest == "":
tokens.append(("class:selection_at", "@"))
is_cmdlet = False
continue
if is_cmdlet:
tokens.append(("class:cmdlet", word))
is_cmdlet = False
elif word.startswith("-"):
tokens.append(("class:argument", word))
else:
if not _emit_keyed_value(word):
tokens.append(("class:value", word))
return tokens
return get_line