2025-12-12 21:55:38 -08:00
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
|
|
from dataclasses import dataclass
|
|
|
|
|
from typing import Any, Dict, Optional
|
|
|
|
|
|
|
|
|
|
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@dataclass(frozen=True)
|
|
|
|
|
class SyntaxErrorDetail:
|
|
|
|
|
message: str
|
|
|
|
|
expected: Optional[str] = None
|
|
|
|
|
|
|
|
|
|
|
2025-12-21 05:10:09 -08:00
|
|
|
def _split_pipeline_stages(text: str) -> list[str]:
|
|
|
|
|
"""Split a pipeline command into stage strings on unquoted '|' characters."""
|
|
|
|
|
raw = str(text or "")
|
|
|
|
|
if not raw:
|
|
|
|
|
return []
|
|
|
|
|
|
|
|
|
|
stages: list[str] = []
|
|
|
|
|
buf: list[str] = []
|
|
|
|
|
quote: Optional[str] = None
|
|
|
|
|
escaped = False
|
|
|
|
|
|
|
|
|
|
for ch in raw:
|
|
|
|
|
if escaped:
|
|
|
|
|
buf.append(ch)
|
|
|
|
|
escaped = False
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if ch == "\\" and quote is not None:
|
|
|
|
|
buf.append(ch)
|
|
|
|
|
escaped = True
|
|
|
|
|
continue
|
|
|
|
|
|
2025-12-29 17:05:03 -08:00
|
|
|
if ch in ('"', "'"):
|
2025-12-21 05:10:09 -08:00
|
|
|
if quote is None:
|
|
|
|
|
quote = ch
|
|
|
|
|
elif quote == ch:
|
|
|
|
|
quote = None
|
|
|
|
|
buf.append(ch)
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if ch == "|" and quote is None:
|
|
|
|
|
stage = "".join(buf).strip()
|
|
|
|
|
if stage:
|
|
|
|
|
stages.append(stage)
|
|
|
|
|
buf = []
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
buf.append(ch)
|
|
|
|
|
|
|
|
|
|
tail = "".join(buf).strip()
|
|
|
|
|
if tail:
|
|
|
|
|
stages.append(tail)
|
|
|
|
|
return stages
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _tokenize_stage(stage_text: str) -> list[str]:
|
|
|
|
|
"""Tokenize a stage string (best-effort)."""
|
|
|
|
|
import shlex
|
|
|
|
|
|
|
|
|
|
text = str(stage_text or "").strip()
|
|
|
|
|
if not text:
|
|
|
|
|
return []
|
|
|
|
|
try:
|
|
|
|
|
return shlex.split(text)
|
|
|
|
|
except Exception:
|
|
|
|
|
return text.split()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _has_flag(tokens: list[str], *flags: str) -> bool:
|
|
|
|
|
want = {str(f).strip().lower() for f in flags if str(f).strip()}
|
|
|
|
|
if not want:
|
|
|
|
|
return False
|
|
|
|
|
for tok in tokens:
|
|
|
|
|
low = str(tok).strip().lower()
|
|
|
|
|
if low in want:
|
|
|
|
|
return True
|
|
|
|
|
# Support -arg=value
|
|
|
|
|
if "=" in low:
|
|
|
|
|
head = low.split("=", 1)[0].strip()
|
|
|
|
|
if head in want:
|
|
|
|
|
return True
|
|
|
|
|
return False
|
|
|
|
|
|
|
|
|
|
|
2025-12-27 21:24:27 -08:00
|
|
|
def _get_flag_value(tokens: list[str], *flags: str) -> Optional[str]:
|
|
|
|
|
"""Return the value for a flag from tokenized args.
|
|
|
|
|
|
|
|
|
|
Supports:
|
|
|
|
|
- -flag value
|
|
|
|
|
- --flag value
|
|
|
|
|
- -flag=value
|
|
|
|
|
- --flag=value
|
|
|
|
|
"""
|
|
|
|
|
want = {str(f).strip().lower() for f in flags if str(f).strip()}
|
|
|
|
|
if not want:
|
|
|
|
|
return None
|
|
|
|
|
for idx, tok in enumerate(tokens):
|
|
|
|
|
low = str(tok).strip().lower()
|
|
|
|
|
if "=" in low:
|
|
|
|
|
head, val = low.split("=", 1)
|
|
|
|
|
if head.strip() in want:
|
|
|
|
|
return tok.split("=", 1)[1]
|
|
|
|
|
if low in want and idx + 1 < len(tokens):
|
|
|
|
|
return tokens[idx + 1]
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2025-12-21 05:10:09 -08:00
|
|
|
def _validate_add_note_requires_add_file_order(raw: str) -> Optional[SyntaxErrorDetail]:
|
|
|
|
|
"""Enforce: add-note in piped mode must occur after add-file.
|
|
|
|
|
|
|
|
|
|
Rationale: add-note requires a known (store, hash) target; piping before add-file
|
|
|
|
|
means the item likely has no hash yet.
|
|
|
|
|
"""
|
|
|
|
|
stages = _split_pipeline_stages(raw)
|
|
|
|
|
if len(stages) <= 1:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
parsed: list[tuple[str, list[str]]] = []
|
|
|
|
|
for stage in stages:
|
|
|
|
|
tokens = _tokenize_stage(stage)
|
|
|
|
|
if not tokens:
|
|
|
|
|
continue
|
|
|
|
|
cmd = str(tokens[0]).replace("_", "-").strip().lower()
|
|
|
|
|
parsed.append((cmd, tokens))
|
|
|
|
|
|
|
|
|
|
add_file_positions = [i for i, (cmd, _toks) in enumerate(parsed) if cmd == "add-file"]
|
|
|
|
|
if not add_file_positions:
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
for i, (cmd, tokens) in enumerate(parsed):
|
|
|
|
|
if cmd != "add-note":
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
# If add-note occurs before any add-file stage, it must be explicitly targeted.
|
|
|
|
|
if any(pos > i for pos in add_file_positions):
|
|
|
|
|
has_hash = _has_flag(tokens, "-hash", "--hash")
|
|
|
|
|
has_store = _has_flag(tokens, "-store", "--store")
|
2025-12-27 21:24:27 -08:00
|
|
|
|
|
|
|
|
# Also accept explicit targeting via -query "store:<store> hash:<sha256> ...".
|
|
|
|
|
query_val = _get_flag_value(tokens, "-query", "--query")
|
|
|
|
|
has_store_hash_in_query = False
|
|
|
|
|
if query_val:
|
|
|
|
|
try:
|
|
|
|
|
parsed_q = parse_query(str(query_val))
|
|
|
|
|
q_hash = get_field(parsed_q, "hash") or get_field(parsed_q, "sha256")
|
|
|
|
|
q_store = get_field(parsed_q, "store")
|
2025-12-29 17:05:03 -08:00
|
|
|
has_store_hash_in_query = bool(
|
|
|
|
|
str(q_hash or "").strip() and str(q_store or "").strip()
|
|
|
|
|
)
|
2025-12-27 21:24:27 -08:00
|
|
|
except Exception:
|
|
|
|
|
has_store_hash_in_query = False
|
|
|
|
|
|
|
|
|
|
if (has_hash and has_store) or has_store_hash_in_query:
|
2025-12-21 05:10:09 -08:00
|
|
|
continue
|
|
|
|
|
return SyntaxErrorDetail(
|
|
|
|
|
"Pipeline error: 'add-note' must come after 'add-file' when used with piped input. "
|
|
|
|
|
"Move 'add-note' after 'add-file', or call it with explicit targeting: "
|
2025-12-29 17:05:03 -08:00
|
|
|
'add-note -query "store:<store> hash:<sha256> title:<title>,text:<text>".'
|
2025-12-21 05:10:09 -08:00
|
|
|
)
|
|
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
def validate_pipeline_text(text: str) -> Optional[SyntaxErrorDetail]:
|
|
|
|
|
"""Validate raw CLI input before tokenization/execution.
|
|
|
|
|
|
|
|
|
|
This is intentionally lightweight and focuses on user-facing syntax issues:
|
|
|
|
|
- Unbalanced single/double quotes
|
|
|
|
|
- Dangling or empty pipeline stages (|)
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
None if valid, otherwise a SyntaxErrorDetail describing the issue.
|
|
|
|
|
"""
|
|
|
|
|
if text is None:
|
|
|
|
|
return SyntaxErrorDetail("Empty command")
|
|
|
|
|
|
|
|
|
|
raw = text.strip()
|
|
|
|
|
if not raw:
|
|
|
|
|
return SyntaxErrorDetail("Empty command")
|
|
|
|
|
|
|
|
|
|
in_single = False
|
|
|
|
|
in_double = False
|
|
|
|
|
escaped = False
|
|
|
|
|
last_pipe_outside_quotes: Optional[int] = None
|
|
|
|
|
|
|
|
|
|
for idx, ch in enumerate(raw):
|
|
|
|
|
if escaped:
|
|
|
|
|
escaped = False
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if ch == "\\" and (in_single or in_double):
|
|
|
|
|
escaped = True
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if ch == '"' and not in_single:
|
|
|
|
|
in_double = not in_double
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if ch == "'" and not in_double:
|
|
|
|
|
in_single = not in_single
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
if ch == "|" and not in_single and not in_double:
|
|
|
|
|
# Record pipe locations to catch empty stages/dangling pipe.
|
|
|
|
|
if last_pipe_outside_quotes is not None and last_pipe_outside_quotes == idx - 1:
|
|
|
|
|
return SyntaxErrorDetail("Syntax error: empty pipeline stage (found '||').")
|
|
|
|
|
last_pipe_outside_quotes = idx
|
|
|
|
|
|
|
|
|
|
if in_double:
|
2025-12-29 17:05:03 -08:00
|
|
|
return SyntaxErrorDetail("Syntax error: missing closing " + '"' + ".", expected='"')
|
2025-12-12 21:55:38 -08:00
|
|
|
if in_single:
|
|
|
|
|
return SyntaxErrorDetail("Syntax error: missing closing '.", expected="'")
|
|
|
|
|
|
|
|
|
|
# Dangling pipe at end / pipe as first non-space character
|
|
|
|
|
if raw.startswith("|"):
|
|
|
|
|
return SyntaxErrorDetail("Syntax error: pipeline cannot start with '|'.")
|
|
|
|
|
if raw.endswith("|"):
|
|
|
|
|
return SyntaxErrorDetail("Syntax error: pipeline cannot end with '|'.")
|
|
|
|
|
|
|
|
|
|
# Empty stage like "cmd1 | | cmd2" (spaces between pipes)
|
|
|
|
|
if "|" in raw:
|
|
|
|
|
# Simple pass: look for pipes that have only whitespace between them.
|
|
|
|
|
# We only check outside quotes by re-scanning and counting non-space chars between pipes.
|
|
|
|
|
in_single = False
|
|
|
|
|
in_double = False
|
|
|
|
|
escaped = False
|
|
|
|
|
seen_nonspace_since_pipe = True # start true to allow leading command
|
|
|
|
|
for ch in raw:
|
|
|
|
|
if escaped:
|
|
|
|
|
escaped = False
|
|
|
|
|
continue
|
|
|
|
|
if ch == "\\" and (in_single or in_double):
|
|
|
|
|
escaped = True
|
|
|
|
|
continue
|
|
|
|
|
if ch == '"' and not in_single:
|
|
|
|
|
in_double = not in_double
|
|
|
|
|
continue
|
|
|
|
|
if ch == "'" and not in_double:
|
|
|
|
|
in_single = not in_single
|
|
|
|
|
continue
|
|
|
|
|
if ch == "|" and not in_single and not in_double:
|
|
|
|
|
if not seen_nonspace_since_pipe:
|
2025-12-29 17:05:03 -08:00
|
|
|
return SyntaxErrorDetail(
|
|
|
|
|
"Syntax error: empty pipeline stage (use a command between '|')."
|
|
|
|
|
)
|
2025-12-12 21:55:38 -08:00
|
|
|
seen_nonspace_since_pipe = False
|
|
|
|
|
continue
|
|
|
|
|
if not in_single and not in_double and not ch.isspace():
|
|
|
|
|
seen_nonspace_since_pipe = True
|
|
|
|
|
|
2025-12-21 05:10:09 -08:00
|
|
|
# Semantic rules (still lightweight; no cmdlet imports)
|
|
|
|
|
semantic_error = _validate_add_note_requires_add_file_order(raw)
|
|
|
|
|
if semantic_error is not None:
|
|
|
|
|
return semantic_error
|
|
|
|
|
|
2025-12-12 21:55:38 -08:00
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def parse_query(query: str) -> Dict[str, Any]:
|
|
|
|
|
"""Parse a query string into field:value pairs and free text.
|
|
|
|
|
|
|
|
|
|
Supports syntax like:
|
|
|
|
|
- isbn:0557677203
|
|
|
|
|
- author:"Albert Pike"
|
|
|
|
|
- title:"Morals and Dogma" year:2010
|
|
|
|
|
- Mixed with free text: Morals isbn:0557677203
|
|
|
|
|
|
|
|
|
|
Returns:
|
|
|
|
|
Dict with keys:
|
|
|
|
|
- fields: Dict[str, str]
|
|
|
|
|
- text: str
|
|
|
|
|
- raw: str
|
|
|
|
|
"""
|
|
|
|
|
|
|
|
|
|
result: Dict[str, Any] = {
|
|
|
|
|
"fields": {},
|
|
|
|
|
"text": "",
|
|
|
|
|
"raw": query,
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if not query or not query.strip():
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
raw = query.strip()
|
|
|
|
|
remaining_parts: list[str] = []
|
|
|
|
|
|
|
|
|
|
# Match field:value where value is either a quoted string or a non-space token.
|
|
|
|
|
pattern = r'(\w+):(?:"([^"]*)"|(\S+))'
|
|
|
|
|
|
|
|
|
|
pos = 0
|
|
|
|
|
for match in re.finditer(pattern, raw):
|
|
|
|
|
if match.start() > pos:
|
|
|
|
|
before_text = raw[pos : match.start()].strip()
|
|
|
|
|
if before_text:
|
|
|
|
|
remaining_parts.append(before_text)
|
|
|
|
|
|
|
|
|
|
field_name = (match.group(1) or "").lower()
|
|
|
|
|
field_value = match.group(2) if match.group(2) is not None else match.group(3)
|
|
|
|
|
if field_name:
|
|
|
|
|
result["fields"][field_name] = field_value
|
|
|
|
|
|
|
|
|
|
pos = match.end()
|
|
|
|
|
|
|
|
|
|
if pos < len(raw):
|
|
|
|
|
remaining_text = raw[pos:].strip()
|
|
|
|
|
if remaining_text:
|
|
|
|
|
remaining_parts.append(remaining_text)
|
|
|
|
|
|
|
|
|
|
result["text"] = " ".join(remaining_parts)
|
|
|
|
|
return result
|
|
|
|
|
|
|
|
|
|
|
2025-12-29 17:05:03 -08:00
|
|
|
def get_field(
|
|
|
|
|
parsed_query: Dict[str, Any], field_name: str, default: Optional[str] = None
|
|
|
|
|
) -> Optional[str]:
|
2025-12-12 21:55:38 -08:00
|
|
|
"""Get a field value from a parsed query."""
|
|
|
|
|
|
|
|
|
|
return parsed_query.get("fields", {}).get((field_name or "").lower(), default)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_free_text(parsed_query: Dict[str, Any]) -> str:
|
|
|
|
|
"""Get the free-text portion of a parsed query."""
|
|
|
|
|
|
|
|
|
|
return str(parsed_query.get("text", "") or "")
|