overhaul operator precedence

arithmetic operators now have the same precedence.
if you want them to bind tighter, you remove the spaces around them:

- 2 + 2 * 2 = 8
- 2 + 2*2 = 6
This commit is contained in:
りき萌 2025-09-01 21:13:32 +02:00
parent 09f2292e62
commit b52c1b26c9
4 changed files with 144 additions and 43 deletions

View file

@ -7,7 +7,7 @@ use crate::{
ast::{Ast, NodeAllocError, NodeId, NodeKind},
diagnostic::Diagnostic,
source::Span,
token::{Lexis, TokenKind, TokenKindSet},
token::{Lexis, Spaces, TokenKind, TokenKindSet},
};
#[derive(Debug, Clone, Copy)]
@ -132,6 +132,11 @@ impl<'a> Parser<'a> {
self.tokens.kind(self.position)
}
#[track_caller]
fn peek_with_spaces(&self) -> (TokenKind, Spaces) {
(self.peek(), self.tokens.spaces(self.position))
}
fn span(&self) -> Span {
self.tokens.span(self.position)
}
@ -298,33 +303,61 @@ impl fmt::Display for IntoAstError {
impl Error for IntoAstError {}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Tighter {
Left,
Right,
}
fn tighter(left: TokenKind, right: TokenKind) -> Tighter {
fn tightness(kind: TokenKind) -> Option<usize> {
match kind {
TokenKind::Equal | TokenKind::Colon => Some(0),
fn tighter(left: (TokenKind, Spaces), right: (TokenKind, Spaces)) -> Tighter {
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
enum Spacing {
Loose,
Call,
Tight,
}
fn tightness((kind, spaces): (TokenKind, Spaces)) -> Option<(Spacing, usize)> {
let spacing = match kind {
// There are a few types of operators which are independent of tightness.
// For : and =, it does not matter if they're spelled one way or the other, because
// there is only one way to use them (at the beginning of the expression).
TokenKind::Colon | TokenKind::Equal => Spacing::Loose,
// For calls, there is a special intermediate level, such that they can sit between
// loose operators and tight operators.
_ if PREFIX_TOKENS.contains(kind) => Spacing::Call,
// For everything else, the usual rules apply.
_ => match spaces.pair() {
(false, false) => Spacing::Tight,
(true, true) => Spacing::Loose,
_ => return None, // not a valid infix operator
},
};
let index = match kind {
TokenKind::Equal | TokenKind::Colon => 0,
// 1: reserved for `and` and `or`
TokenKind::EqualEqual
| TokenKind::NotEqual
| TokenKind::Less
| TokenKind::LessEqual
| TokenKind::Greater
| TokenKind::GreaterEqual => Some(1),
TokenKind::Plus | TokenKind::Minus => Some(2),
TokenKind::Star | TokenKind::Slash => Some(3),
_ if PREFIX_TOKENS.contains(kind) => Some(4),
_ => None,
}
| TokenKind::GreaterEqual => 2,
TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => 3,
// 4: reserve for `.`
_ if PREFIX_TOKENS.contains(kind) => 5,
_ => return None, // not an infix operator
};
Some((spacing, index))
}
let Some(right_tightness) = tightness(right) else {
return Tighter::Left;
};
let Some(left_tightness) = tightness(left) else {
assert!(left == TokenKind::Eof);
assert!(left.0 == TokenKind::Eof);
return Tighter::Right;
};
@ -335,12 +368,13 @@ fn tighter(left: TokenKind, right: TokenKind) -> Tighter {
}
}
fn precedence_parse(p: &mut Parser, left: TokenKind) {
fn precedence_parse(p: &mut Parser, left: (TokenKind, Spaces)) {
let mut lhs = prefix(p);
loop {
let right = p.peek();
match tighter(left, right) {
let right = p.peek_with_spaces();
let tighter = tighter(left, right);
match tighter {
Tighter::Left => break,
Tighter::Right => {
let o = p.open_before(lhs);
@ -536,14 +570,12 @@ fn if_expr(p: &mut Parser) -> Closed {
p.close(o, NodeKind::If)
}
// NOTE: This must be synchronised with the match expression in prefix().
const PREFIX_TOKENS: TokenKindSet = TokenKindSet::new(&[
TokenKind::Ident,
TokenKind::Tag,
TokenKind::Number,
TokenKind::Color,
// NOTE: This is ambiguous in function calls.
// In that case, the infix operator takes precedence (because the `match` arms for the infix op
// come first.)
TokenKind::Minus,
TokenKind::Not,
TokenKind::LParen,
@ -582,8 +614,8 @@ fn prefix(p: &mut Parser) -> Closed {
}
}
fn infix(p: &mut Parser, op: TokenKind) -> NodeKind {
match op {
fn infix(p: &mut Parser, op: (TokenKind, Spaces)) -> NodeKind {
match op.0 {
TokenKind::Plus
| TokenKind::Minus
| TokenKind::Star
@ -598,13 +630,13 @@ fn infix(p: &mut Parser, op: TokenKind) -> NodeKind {
TokenKind::Equal => infix_let(p, op),
_ if PREFIX_TOKENS.contains(op) => infix_call(p),
_ if PREFIX_TOKENS.contains(op.0) => infix_call(p, op),
_ => panic!("unhandled infix operator {op:?}"),
}
}
fn infix_binary(p: &mut Parser, op: TokenKind) -> NodeKind {
fn infix_binary(p: &mut Parser, op: (TokenKind, Spaces)) -> NodeKind {
let o = p.open();
p.advance();
p.close(o, NodeKind::Op);
@ -617,15 +649,16 @@ fn infix_binary(p: &mut Parser, op: TokenKind) -> NodeKind {
NodeKind::Binary
}
fn infix_call(p: &mut Parser) -> NodeKind {
fn infix_call(p: &mut Parser, mut arg: (TokenKind, Spaces)) -> NodeKind {
while PREFIX_TOKENS.contains(p.peek()) {
prefix(p);
precedence_parse(p, arg);
arg = p.peek_with_spaces();
}
NodeKind::Call
}
fn infix_let(p: &mut Parser, op: TokenKind) -> NodeKind {
fn infix_let(p: &mut Parser, op: (TokenKind, Spaces)) -> NodeKind {
p.advance();
if p.peek() == TokenKind::Newline {
@ -651,7 +684,7 @@ fn infix_let(p: &mut Parser, op: TokenKind) -> NodeKind {
}
pub fn expr(p: &mut Parser) {
precedence_parse(p, TokenKind::Eof)
precedence_parse(p, (TokenKind::Eof, Spaces::new(true, false)))
}
pub fn toplevel(p: &mut Parser) {