syntax v2
introduce a new, more ergonomic syntax for haku not all features are implemented just yet. still missing: - custom tags (non-True/False) - color literals - lists
This commit is contained in:
parent
a3e5e8bd10
commit
2595bf0d82
21 changed files with 2844 additions and 1062 deletions
607
crates/haku/src/parser.rs
Normal file
607
crates/haku/src/parser.rs
Normal file
|
@ -0,0 +1,607 @@
|
|||
use core::cell::Cell;
|
||||
|
||||
use alloc::vec::Vec;
|
||||
|
||||
use crate::{
|
||||
ast::{Ast, NodeAllocError, NodeId, NodeKind},
|
||||
diagnostic::Diagnostic,
|
||||
source::Span,
|
||||
token::{Lexis, TokenKind, TokenKindSet},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct ParserLimits {
|
||||
pub max_events: usize,
|
||||
}
|
||||
|
||||
pub struct Parser<'a> {
|
||||
tokens: &'a Lexis,
|
||||
events: Vec<Event>,
|
||||
position: u32,
|
||||
fuel: Cell<u32>,
|
||||
pub diagnostics: Vec<Diagnostic>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Event {
|
||||
Open { kind: NodeKind },
|
||||
Close,
|
||||
Advance,
|
||||
}
|
||||
|
||||
struct Open {
|
||||
index: Option<usize>,
|
||||
}
|
||||
|
||||
struct Closed {
|
||||
index: Option<usize>,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
const FUEL: u32 = 256;
|
||||
|
||||
pub fn new(input: &'a Lexis, limits: &ParserLimits) -> Self {
|
||||
assert!(limits.max_events < u32::MAX as usize);
|
||||
|
||||
Self {
|
||||
tokens: input,
|
||||
events: Vec::with_capacity(limits.max_events),
|
||||
position: 0,
|
||||
diagnostics: Vec::with_capacity(16),
|
||||
fuel: Cell::new(Self::FUEL),
|
||||
}
|
||||
}
|
||||
|
||||
fn event(&mut self, event: Event) -> Option<usize> {
|
||||
if self.events.len() < self.events.capacity() {
|
||||
let index = self.events.len();
|
||||
self.events.push(event);
|
||||
Some(index)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn open(&mut self) -> Open {
|
||||
Open {
|
||||
index: self.event(Event::Open {
|
||||
kind: NodeKind::Error,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn open_before(&mut self, closed: Closed) -> Open {
|
||||
if let Some(index) = closed.index {
|
||||
if self.events.len() < self.events.capacity() {
|
||||
self.events.insert(
|
||||
index,
|
||||
Event::Open {
|
||||
kind: NodeKind::Error,
|
||||
},
|
||||
);
|
||||
return Open { index: Some(index) };
|
||||
}
|
||||
}
|
||||
Open { index: None }
|
||||
}
|
||||
|
||||
fn close(&mut self, open: Open, kind: NodeKind) -> Closed {
|
||||
if let Some(index) = open.index {
|
||||
self.events[index] = Event::Open { kind };
|
||||
self.event(Event::Close);
|
||||
Closed { index: Some(index) }
|
||||
} else {
|
||||
Closed { index: None }
|
||||
}
|
||||
}
|
||||
|
||||
fn is_eof(&self) -> bool {
|
||||
self.peek() == TokenKind::Eof
|
||||
}
|
||||
|
||||
fn advance(&mut self) {
|
||||
if !self.is_eof() {
|
||||
self.position += 1;
|
||||
self.event(Event::Advance);
|
||||
self.fuel.set(Self::FUEL);
|
||||
}
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn peek(&self) -> TokenKind {
|
||||
assert_ne!(self.fuel.get(), 0, "parser is stuck");
|
||||
self.fuel.set(self.fuel.get() - 1);
|
||||
|
||||
self.tokens.kind(self.position)
|
||||
}
|
||||
|
||||
fn span(&self) -> Span {
|
||||
self.tokens.span(self.position)
|
||||
}
|
||||
|
||||
fn emit(&mut self, diagnostic: Diagnostic) {
|
||||
if self.diagnostics.len() < self.diagnostics.capacity() {
|
||||
self.diagnostics.push(diagnostic);
|
||||
}
|
||||
}
|
||||
|
||||
fn advance_with_error(&mut self) -> Closed {
|
||||
let opened = self.open();
|
||||
self.advance();
|
||||
self.close(opened, NodeKind::Error)
|
||||
}
|
||||
|
||||
fn optional_newline(&mut self) -> bool {
|
||||
if self.peek() == TokenKind::Newline {
|
||||
self.advance();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_ast(self, ast: &mut Ast) -> Result<(NodeId, Vec<Diagnostic>), NodeAllocError> {
|
||||
let mut token = 0;
|
||||
let mut events = self.events;
|
||||
let mut stack = Vec::new();
|
||||
|
||||
struct StackEntry {
|
||||
node_id: NodeId,
|
||||
// TODO: This should probably be optimized to use a shared stack.
|
||||
children: Vec<NodeId>,
|
||||
}
|
||||
|
||||
// Remove the last Close to keep a single node on the stack.
|
||||
assert!(matches!(events.pop(), Some(Event::Close)));
|
||||
|
||||
for event in events {
|
||||
match event {
|
||||
Event::Open { kind } => {
|
||||
stack.push(StackEntry {
|
||||
node_id: ast.alloc(kind, self.tokens.span(token))?,
|
||||
children: Vec::new(),
|
||||
});
|
||||
}
|
||||
Event::Close => {
|
||||
let end_span = self.tokens.span(token.saturating_sub(1));
|
||||
let stack_entry = stack.pop().unwrap();
|
||||
ast.alloc_children(stack_entry.node_id, &stack_entry.children);
|
||||
ast.extend_span(stack_entry.node_id, end_span.end);
|
||||
stack.last_mut().unwrap().children.push(stack_entry.node_id);
|
||||
}
|
||||
Event::Advance => {
|
||||
let span = self.tokens.span(token);
|
||||
let node_id = ast.alloc(NodeKind::Token, span)?;
|
||||
stack
|
||||
.last_mut()
|
||||
.expect("advance() may only be used in an open node")
|
||||
.children
|
||||
.push(node_id);
|
||||
token += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if stack.len() != 1 {
|
||||
// This means we had too many events emitted and they are no longer balanced.
|
||||
return Err(NodeAllocError);
|
||||
}
|
||||
// assert_eq!(token, self.tokens.len());
|
||||
|
||||
let end_span = self.tokens.span(token.saturating_sub(1));
|
||||
let stack_entry = stack.pop().unwrap();
|
||||
ast.alloc_children(stack_entry.node_id, &stack_entry.children);
|
||||
ast.extend_span(stack_entry.node_id, end_span.end);
|
||||
|
||||
Ok((stack_entry.node_id, self.diagnostics))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> core::fmt::Debug for Parser<'a> {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
f.debug_struct("Parser")
|
||||
.field("events", &self.events)
|
||||
.finish_non_exhaustive()
|
||||
}
|
||||
}
|
||||
|
||||
enum Tighter {
|
||||
Left,
|
||||
Right,
|
||||
}
|
||||
|
||||
fn tighter(left: TokenKind, right: TokenKind) -> Tighter {
|
||||
fn tightness(kind: TokenKind) -> Option<usize> {
|
||||
match kind {
|
||||
TokenKind::Equal => Some(0),
|
||||
TokenKind::EqualEqual
|
||||
| TokenKind::NotEqual
|
||||
| TokenKind::Less
|
||||
| TokenKind::LessEqual
|
||||
| TokenKind::Greater
|
||||
| TokenKind::GreaterEqual => Some(1),
|
||||
TokenKind::Plus | TokenKind::Minus => Some(2),
|
||||
TokenKind::Star | TokenKind::Slash => Some(3),
|
||||
_ if PREFIX_TOKENS.contains(kind) => Some(4),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
let Some(right_tightness) = tightness(right) else {
|
||||
return Tighter::Left;
|
||||
};
|
||||
let Some(left_tightness) = tightness(left) else {
|
||||
assert!(left == TokenKind::Eof);
|
||||
return Tighter::Right;
|
||||
};
|
||||
|
||||
if right_tightness > left_tightness {
|
||||
Tighter::Right
|
||||
} else {
|
||||
Tighter::Left
|
||||
}
|
||||
}
|
||||
|
||||
fn precedence_parse(p: &mut Parser, left: TokenKind) {
|
||||
let mut lhs = prefix(p);
|
||||
|
||||
loop {
|
||||
let right = p.peek();
|
||||
match tighter(left, right) {
|
||||
Tighter::Left => break,
|
||||
Tighter::Right => {
|
||||
let o = p.open_before(lhs);
|
||||
let kind = infix(p, right);
|
||||
lhs = p.close(o, kind);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn one(p: &mut Parser, kind: NodeKind) -> Closed {
|
||||
let o = p.open();
|
||||
p.advance();
|
||||
p.close(o, kind)
|
||||
}
|
||||
|
||||
fn list(p: &mut Parser) -> Closed {
|
||||
let o = p.open();
|
||||
let lspan = p.span();
|
||||
p.advance(); // [
|
||||
p.optional_newline();
|
||||
|
||||
loop {
|
||||
match p.peek() {
|
||||
TokenKind::Eof => {
|
||||
p.emit(Diagnostic::error(lspan, "missing `]` to close this list"));
|
||||
break;
|
||||
}
|
||||
|
||||
TokenKind::RBrack => {
|
||||
p.advance();
|
||||
break;
|
||||
}
|
||||
|
||||
_ => (),
|
||||
}
|
||||
|
||||
expr(p);
|
||||
|
||||
match p.peek() {
|
||||
TokenKind::Comma | TokenKind::Newline => {
|
||||
p.advance();
|
||||
continue;
|
||||
}
|
||||
|
||||
TokenKind::RBrack => {
|
||||
p.advance();
|
||||
break;
|
||||
}
|
||||
|
||||
_ => {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"comma `,` or new line expected after list element",
|
||||
));
|
||||
p.advance_with_error();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p.close(o, NodeKind::List)
|
||||
}
|
||||
|
||||
fn unary(p: &mut Parser) -> Closed {
|
||||
let o = p.open();
|
||||
|
||||
let op = p.open();
|
||||
p.advance();
|
||||
p.close(op, NodeKind::Op);
|
||||
|
||||
prefix(p);
|
||||
|
||||
p.close(o, NodeKind::Unary)
|
||||
}
|
||||
|
||||
fn paren(p: &mut Parser) -> Closed {
|
||||
let o = p.open();
|
||||
let lspan = p.span();
|
||||
p.advance(); // (
|
||||
if p.peek() == TokenKind::RParen {
|
||||
p.advance(); // )
|
||||
p.close(o, NodeKind::ParenEmpty)
|
||||
} else {
|
||||
p.optional_newline();
|
||||
expr(p);
|
||||
p.optional_newline();
|
||||
if p.peek() != TokenKind::RParen {
|
||||
p.emit(Diagnostic::error(lspan, "missing closing parenthesis `)`"));
|
||||
p.advance_with_error()
|
||||
} else {
|
||||
p.advance();
|
||||
p.close(o, NodeKind::Paren)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn param(p: &mut Parser) {
|
||||
let o = p.open();
|
||||
|
||||
if let TokenKind::Ident | TokenKind::Underscore = p.peek() {
|
||||
p.advance();
|
||||
} else {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"parameter names must be identifiers or `_`",
|
||||
));
|
||||
p.advance_with_error();
|
||||
}
|
||||
|
||||
p.close(o, NodeKind::Param);
|
||||
}
|
||||
|
||||
fn lambda(p: &mut Parser) -> Closed {
|
||||
let o = p.open();
|
||||
p.advance(); // backslash
|
||||
|
||||
let params = p.open();
|
||||
loop {
|
||||
param(p);
|
||||
match p.peek() {
|
||||
TokenKind::Comma => {
|
||||
p.advance();
|
||||
continue;
|
||||
}
|
||||
|
||||
TokenKind::RArrow => break,
|
||||
|
||||
_ => {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"`,` or `->` expected after function parameter",
|
||||
));
|
||||
p.advance_with_error();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
p.close(params, NodeKind::Params);
|
||||
|
||||
// NOTE: Can be false if there are some stray tokens.
|
||||
// We prefer to bail early and let the rest of the program parse.
|
||||
if p.peek() == TokenKind::RArrow {
|
||||
p.advance();
|
||||
p.optional_newline();
|
||||
expr(p);
|
||||
}
|
||||
|
||||
p.close(o, NodeKind::Lambda)
|
||||
}
|
||||
|
||||
fn if_expr(p: &mut Parser) -> Closed {
|
||||
let o = p.open();
|
||||
|
||||
p.advance(); // if
|
||||
if p.peek() != TokenKind::LParen {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"the condition in an `if` expression must be surrounded with parentheses",
|
||||
));
|
||||
// NOTE: Don't advance, it's more likely the programmer expected no parentheses to be needed.
|
||||
}
|
||||
p.advance();
|
||||
expr(p); // Condition
|
||||
if p.peek() != TokenKind::RParen {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"missing closing parenthesis after `if` condition",
|
||||
));
|
||||
}
|
||||
p.advance();
|
||||
p.optional_newline();
|
||||
|
||||
expr(p); // True branch
|
||||
p.optional_newline();
|
||||
|
||||
if p.peek() != TokenKind::Else {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"`if` expression is missing an `else` clause",
|
||||
));
|
||||
}
|
||||
p.advance();
|
||||
p.optional_newline();
|
||||
|
||||
expr(p); // False branch
|
||||
|
||||
p.close(o, NodeKind::If)
|
||||
}
|
||||
|
||||
fn let_expr(p: &mut Parser) -> Closed {
|
||||
let o = p.open();
|
||||
|
||||
p.advance(); // let
|
||||
|
||||
if p.peek() == TokenKind::Ident {
|
||||
let ident = p.open();
|
||||
p.advance();
|
||||
p.close(ident, NodeKind::Ident);
|
||||
} else {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(span, "`let` variable name expected"));
|
||||
p.advance_with_error();
|
||||
}
|
||||
|
||||
if p.peek() == TokenKind::Equal {
|
||||
p.advance();
|
||||
} else {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(span, "`=` expected after variable name"));
|
||||
p.advance_with_error();
|
||||
}
|
||||
|
||||
expr(p);
|
||||
|
||||
if p.peek() == TokenKind::Newline {
|
||||
p.advance();
|
||||
} else {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"new line expected after `let` expression",
|
||||
));
|
||||
p.advance_with_error();
|
||||
}
|
||||
|
||||
expr(p);
|
||||
|
||||
p.close(o, NodeKind::Let)
|
||||
}
|
||||
|
||||
const PREFIX_TOKENS: TokenKindSet = TokenKindSet::new(&[
|
||||
TokenKind::Ident,
|
||||
TokenKind::Tag,
|
||||
TokenKind::Number,
|
||||
TokenKind::Color,
|
||||
// NOTE: This is ambiguous in function calls.
|
||||
// In that case, the infix operator takes precedence (because the `match` arms for the infix op
|
||||
// come first.)
|
||||
TokenKind::Minus,
|
||||
TokenKind::Not,
|
||||
TokenKind::LParen,
|
||||
TokenKind::Backslash,
|
||||
TokenKind::If,
|
||||
TokenKind::Let,
|
||||
TokenKind::LBrack,
|
||||
]);
|
||||
|
||||
fn prefix(p: &mut Parser) -> Closed {
|
||||
match p.peek() {
|
||||
TokenKind::Ident => one(p, NodeKind::Ident),
|
||||
TokenKind::Tag => one(p, NodeKind::Tag),
|
||||
TokenKind::Number => one(p, NodeKind::Number),
|
||||
TokenKind::Color => one(p, NodeKind::Color),
|
||||
TokenKind::LBrack => list(p),
|
||||
|
||||
TokenKind::Minus | TokenKind::Not => unary(p),
|
||||
TokenKind::LParen => paren(p),
|
||||
TokenKind::Backslash => lambda(p),
|
||||
TokenKind::If => if_expr(p),
|
||||
TokenKind::Let => let_expr(p),
|
||||
|
||||
_ => {
|
||||
assert!(
|
||||
!PREFIX_TOKENS.contains(p.peek()),
|
||||
"{:?} found in PREFIX_TOKENS",
|
||||
p.peek()
|
||||
);
|
||||
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"an expression was expected, but this token does not start one",
|
||||
));
|
||||
p.advance_with_error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn infix(p: &mut Parser, op: TokenKind) -> NodeKind {
|
||||
match op {
|
||||
TokenKind::Plus
|
||||
| TokenKind::Minus
|
||||
| TokenKind::Star
|
||||
| TokenKind::Slash
|
||||
| TokenKind::EqualEqual
|
||||
| TokenKind::NotEqual
|
||||
| TokenKind::Less
|
||||
| TokenKind::LessEqual
|
||||
| TokenKind::Greater
|
||||
| TokenKind::GreaterEqual
|
||||
| TokenKind::Equal => infix_binary(p, op),
|
||||
|
||||
_ if PREFIX_TOKENS.contains(op) => infix_call(p),
|
||||
|
||||
_ => panic!("unhandled infix operator {op:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
fn infix_binary(p: &mut Parser, op: TokenKind) -> NodeKind {
|
||||
let o = p.open();
|
||||
p.advance();
|
||||
p.close(o, NodeKind::Op);
|
||||
|
||||
if p.peek() == TokenKind::Newline {
|
||||
p.advance();
|
||||
}
|
||||
|
||||
precedence_parse(p, op);
|
||||
NodeKind::Binary
|
||||
}
|
||||
|
||||
fn infix_call(p: &mut Parser) -> NodeKind {
|
||||
while PREFIX_TOKENS.contains(p.peek()) {
|
||||
prefix(p);
|
||||
}
|
||||
|
||||
NodeKind::Call
|
||||
}
|
||||
|
||||
pub fn expr(p: &mut Parser) {
|
||||
precedence_parse(p, TokenKind::Eof)
|
||||
}
|
||||
|
||||
pub fn toplevel(p: &mut Parser) {
|
||||
let o = p.open();
|
||||
p.optional_newline();
|
||||
while p.peek() != TokenKind::Eof {
|
||||
expr(p);
|
||||
|
||||
match p.peek() {
|
||||
TokenKind::Newline => {
|
||||
p.advance();
|
||||
continue;
|
||||
}
|
||||
|
||||
TokenKind::Eof => break,
|
||||
|
||||
_ => {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"newline expected after toplevel expression",
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
p.close(o, NodeKind::Toplevel);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
Loading…
Add table
Add a link
Reference in a new issue