overhaul operator precedence

arithmetic operators now have the same precedence.
if you want them to bind tighter, you remove the spaces around them:

- 2 + 2 * 2 = 8
- 2 + 2*2 = 6
This commit is contained in:
りき萌 2025-09-01 21:13:32 +02:00
parent 09f2292e62
commit b52c1b26c9
4 changed files with 144 additions and 43 deletions

View file

@ -3,7 +3,7 @@ use alloc::vec::Vec;
use crate::{ use crate::{
diagnostic::Diagnostic, diagnostic::Diagnostic,
source::{SourceCode, Span}, source::{SourceCode, Span},
token::{Lexis, TokenAllocError, TokenKind}, token::{Lexis, Spaces, TokenAllocError, TokenKind},
}; };
pub struct Lexer<'a> { pub struct Lexer<'a> {
@ -132,7 +132,8 @@ fn color(l: &mut Lexer<'_>) -> TokenKind {
TokenKind::Color TokenKind::Color
} }
fn whitespace_and_comments(l: &mut Lexer<'_>) { fn whitespace_and_comments(l: &mut Lexer<'_>) -> bool {
let mut matched = false;
loop { loop {
match l.current() { match l.current() {
'-' => { '-' => {
@ -142,6 +143,7 @@ fn whitespace_and_comments(l: &mut Lexer<'_>) {
while l.current() != '\n' && l.current() != '\0' { while l.current() != '\n' && l.current() != '\0' {
l.advance(); l.advance();
} }
matched = true;
} else { } else {
// An unfortunate little bit of backtracking here; // An unfortunate little bit of backtracking here;
// This seems like the simplest possible solution though. // This seems like the simplest possible solution though.
@ -153,14 +155,18 @@ fn whitespace_and_comments(l: &mut Lexer<'_>) {
} }
} }
' ' | '\r' | '\t' => l.advance(), ' ' | '\r' | '\t' => {
l.advance();
matched = true
}
_ => break, _ => break,
} }
} }
matched
} }
fn newline(l: &mut Lexer<'_>) -> (TokenKind, Span) { fn newline(l: &mut Lexer<'_>, has_left_space: bool) -> (TokenKind, Span, bool) {
let start = l.position; let start = l.position;
l.advance(); // skip the initial newline l.advance(); // skip the initial newline
let end = l.position; let end = l.position;
@ -177,11 +183,11 @@ fn newline(l: &mut Lexer<'_>) -> (TokenKind, Span) {
} }
} }
(TokenKind::Newline, Span::new(start, end)) (TokenKind::Newline, Span::new(start, end), has_left_space)
} }
fn token(l: &mut Lexer<'_>) -> (TokenKind, Span) { fn token(l: &mut Lexer<'_>) -> (TokenKind, Span, bool) {
whitespace_and_comments(l); let has_left_space = whitespace_and_comments(l);
let start = l.position; let start = l.position;
let kind = match l.current() { let kind = match l.current() {
@ -203,7 +209,7 @@ fn token(l: &mut Lexer<'_>) -> (TokenKind, Span) {
'<' => one_or_two(l, TokenKind::Less, '=', TokenKind::LessEqual), '<' => one_or_two(l, TokenKind::Less, '=', TokenKind::LessEqual),
'>' => one_or_two(l, TokenKind::Greater, '=', TokenKind::GreaterEqual), '>' => one_or_two(l, TokenKind::Greater, '=', TokenKind::GreaterEqual),
'\n' => return newline(l), '\n' => return newline(l, has_left_space),
'(' => one(l, TokenKind::LParen), '(' => one(l, TokenKind::LParen),
')' => one(l, TokenKind::RParen), ')' => one(l, TokenKind::RParen),
'[' => one(l, TokenKind::LBrack), '[' => one(l, TokenKind::LBrack),
@ -222,13 +228,22 @@ fn token(l: &mut Lexer<'_>) -> (TokenKind, Span) {
} }
}; };
let end = l.position; let end = l.position;
(kind, Span::new(start, end)) (kind, Span::new(start, end), has_left_space)
} }
pub fn lex(l: &mut Lexer<'_>) -> Result<(), TokenAllocError> { pub fn lex(l: &mut Lexer<'_>) -> Result<(), TokenAllocError> {
loop { loop {
let (kind, span) = token(l); let (kind, span, has_left_space) = token(l);
l.lexis.push(kind, span)?;
if !l.lexis.is_empty() {
let prev = l.lexis.len() - 1;
let spaces = l.lexis.spaces(prev);
l.lexis
.set_spaces(prev, Spaces::new(spaces.left(), has_left_space));
}
let spaces = Spaces::new(has_left_space, false);
l.lexis.push(kind, spaces, span)?;
if kind == TokenKind::Eof { if kind == TokenKind::Eof {
break; break;
} }

View file

@ -7,7 +7,7 @@ use crate::{
ast::{Ast, NodeAllocError, NodeId, NodeKind}, ast::{Ast, NodeAllocError, NodeId, NodeKind},
diagnostic::Diagnostic, diagnostic::Diagnostic,
source::Span, source::Span,
token::{Lexis, TokenKind, TokenKindSet}, token::{Lexis, Spaces, TokenKind, TokenKindSet},
}; };
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
@ -132,6 +132,11 @@ impl<'a> Parser<'a> {
self.tokens.kind(self.position) self.tokens.kind(self.position)
} }
#[track_caller]
fn peek_with_spaces(&self) -> (TokenKind, Spaces) {
(self.peek(), self.tokens.spaces(self.position))
}
fn span(&self) -> Span { fn span(&self) -> Span {
self.tokens.span(self.position) self.tokens.span(self.position)
} }
@ -298,33 +303,61 @@ impl fmt::Display for IntoAstError {
impl Error for IntoAstError {} impl Error for IntoAstError {}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Tighter { enum Tighter {
Left, Left,
Right, Right,
} }
fn tighter(left: TokenKind, right: TokenKind) -> Tighter { fn tighter(left: (TokenKind, Spaces), right: (TokenKind, Spaces)) -> Tighter {
fn tightness(kind: TokenKind) -> Option<usize> { #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
match kind { enum Spacing {
TokenKind::Equal | TokenKind::Colon => Some(0), Loose,
Call,
Tight,
}
fn tightness((kind, spaces): (TokenKind, Spaces)) -> Option<(Spacing, usize)> {
let spacing = match kind {
// There are a few types of operators which are independent of tightness.
// For : and =, it does not matter if they're spelled one way or the other, because
// there is only one way to use them (at the beginning of the expression).
TokenKind::Colon | TokenKind::Equal => Spacing::Loose,
// For calls, there is a special intermediate level, such that they can sit between
// loose operators and tight operators.
_ if PREFIX_TOKENS.contains(kind) => Spacing::Call,
// For everything else, the usual rules apply.
_ => match spaces.pair() {
(false, false) => Spacing::Tight,
(true, true) => Spacing::Loose,
_ => return None, // not a valid infix operator
},
};
let index = match kind {
TokenKind::Equal | TokenKind::Colon => 0,
// 1: reserved for `and` and `or`
TokenKind::EqualEqual TokenKind::EqualEqual
| TokenKind::NotEqual | TokenKind::NotEqual
| TokenKind::Less | TokenKind::Less
| TokenKind::LessEqual | TokenKind::LessEqual
| TokenKind::Greater | TokenKind::Greater
| TokenKind::GreaterEqual => Some(1), | TokenKind::GreaterEqual => 2,
TokenKind::Plus | TokenKind::Minus => Some(2), TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => 3,
TokenKind::Star | TokenKind::Slash => Some(3), // 4: reserve for `.`
_ if PREFIX_TOKENS.contains(kind) => Some(4), _ if PREFIX_TOKENS.contains(kind) => 5,
_ => None, _ => return None, // not an infix operator
} };
Some((spacing, index))
} }
let Some(right_tightness) = tightness(right) else { let Some(right_tightness) = tightness(right) else {
return Tighter::Left; return Tighter::Left;
}; };
let Some(left_tightness) = tightness(left) else { let Some(left_tightness) = tightness(left) else {
assert!(left == TokenKind::Eof); assert!(left.0 == TokenKind::Eof);
return Tighter::Right; return Tighter::Right;
}; };
@ -335,12 +368,13 @@ fn tighter(left: TokenKind, right: TokenKind) -> Tighter {
} }
} }
fn precedence_parse(p: &mut Parser, left: TokenKind) { fn precedence_parse(p: &mut Parser, left: (TokenKind, Spaces)) {
let mut lhs = prefix(p); let mut lhs = prefix(p);
loop { loop {
let right = p.peek(); let right = p.peek_with_spaces();
match tighter(left, right) { let tighter = tighter(left, right);
match tighter {
Tighter::Left => break, Tighter::Left => break,
Tighter::Right => { Tighter::Right => {
let o = p.open_before(lhs); let o = p.open_before(lhs);
@ -536,14 +570,12 @@ fn if_expr(p: &mut Parser) -> Closed {
p.close(o, NodeKind::If) p.close(o, NodeKind::If)
} }
// NOTE: This must be synchronised with the match expression in prefix().
const PREFIX_TOKENS: TokenKindSet = TokenKindSet::new(&[ const PREFIX_TOKENS: TokenKindSet = TokenKindSet::new(&[
TokenKind::Ident, TokenKind::Ident,
TokenKind::Tag, TokenKind::Tag,
TokenKind::Number, TokenKind::Number,
TokenKind::Color, TokenKind::Color,
// NOTE: This is ambiguous in function calls.
// In that case, the infix operator takes precedence (because the `match` arms for the infix op
// come first.)
TokenKind::Minus, TokenKind::Minus,
TokenKind::Not, TokenKind::Not,
TokenKind::LParen, TokenKind::LParen,
@ -582,8 +614,8 @@ fn prefix(p: &mut Parser) -> Closed {
} }
} }
fn infix(p: &mut Parser, op: TokenKind) -> NodeKind { fn infix(p: &mut Parser, op: (TokenKind, Spaces)) -> NodeKind {
match op { match op.0 {
TokenKind::Plus TokenKind::Plus
| TokenKind::Minus | TokenKind::Minus
| TokenKind::Star | TokenKind::Star
@ -598,13 +630,13 @@ fn infix(p: &mut Parser, op: TokenKind) -> NodeKind {
TokenKind::Equal => infix_let(p, op), TokenKind::Equal => infix_let(p, op),
_ if PREFIX_TOKENS.contains(op) => infix_call(p), _ if PREFIX_TOKENS.contains(op.0) => infix_call(p, op),
_ => panic!("unhandled infix operator {op:?}"), _ => panic!("unhandled infix operator {op:?}"),
} }
} }
fn infix_binary(p: &mut Parser, op: TokenKind) -> NodeKind { fn infix_binary(p: &mut Parser, op: (TokenKind, Spaces)) -> NodeKind {
let o = p.open(); let o = p.open();
p.advance(); p.advance();
p.close(o, NodeKind::Op); p.close(o, NodeKind::Op);
@ -617,15 +649,16 @@ fn infix_binary(p: &mut Parser, op: TokenKind) -> NodeKind {
NodeKind::Binary NodeKind::Binary
} }
fn infix_call(p: &mut Parser) -> NodeKind { fn infix_call(p: &mut Parser, mut arg: (TokenKind, Spaces)) -> NodeKind {
while PREFIX_TOKENS.contains(p.peek()) { while PREFIX_TOKENS.contains(p.peek()) {
prefix(p); precedence_parse(p, arg);
arg = p.peek_with_spaces();
} }
NodeKind::Call NodeKind::Call
} }
fn infix_let(p: &mut Parser, op: TokenKind) -> NodeKind { fn infix_let(p: &mut Parser, op: (TokenKind, Spaces)) -> NodeKind {
p.advance(); p.advance();
if p.peek() == TokenKind::Newline { if p.peek() == TokenKind::Newline {
@ -651,7 +684,7 @@ fn infix_let(p: &mut Parser, op: TokenKind) -> NodeKind {
} }
pub fn expr(p: &mut Parser) { pub fn expr(p: &mut Parser) {
precedence_parse(p, TokenKind::Eof) precedence_parse(p, (TokenKind::Eof, Spaces::new(true, false)))
} }
pub fn toplevel(p: &mut Parser) { pub fn toplevel(p: &mut Parser) {

View file

@ -1,4 +1,7 @@
use core::{error::Error, fmt::Display}; use core::{
error::Error,
fmt::{self, Display},
};
use alloc::vec::Vec; use alloc::vec::Vec;
@ -49,10 +52,16 @@ pub enum TokenKind {
Error, Error,
} }
#[derive(Clone, Copy, PartialEq, Eq)]
pub struct Spaces {
value: u8, // 0b10 = left, 0b01 = right
}
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Lexis { pub struct Lexis {
pub kinds: Vec<TokenKind>, kinds: Vec<TokenKind>,
pub spans: Vec<Span>, spaces: Vec<Spaces>,
spans: Vec<Span>,
} }
impl Lexis { impl Lexis {
@ -61,6 +70,7 @@ impl Lexis {
Self { Self {
kinds: Vec::with_capacity(capacity), kinds: Vec::with_capacity(capacity),
spaces: Vec::with_capacity(capacity),
spans: Vec::with_capacity(capacity), spans: Vec::with_capacity(capacity),
} }
} }
@ -73,12 +83,18 @@ impl Lexis {
self.len() == 0 self.len() == 0
} }
pub fn push(&mut self, kind: TokenKind, span: Span) -> Result<(), TokenAllocError> { pub fn push(
&mut self,
kind: TokenKind,
spaces: Spaces,
span: Span,
) -> Result<(), TokenAllocError> {
if self.kinds.len() >= self.kinds.capacity() { if self.kinds.len() >= self.kinds.capacity() {
return Err(TokenAllocError); return Err(TokenAllocError);
} }
self.kinds.push(kind); self.kinds.push(kind);
self.spaces.push(spaces);
self.spans.push(span); self.spans.push(span);
Ok(()) Ok(())
@ -88,11 +104,48 @@ impl Lexis {
self.kinds[position as usize] self.kinds[position as usize]
} }
pub fn spaces(&self, position: u32) -> Spaces {
self.spaces[position as usize]
}
pub fn set_spaces(&mut self, position: u32, spaces: Spaces) {
self.spaces[position as usize] = spaces;
}
pub fn span(&self, position: u32) -> Span { pub fn span(&self, position: u32) -> Span {
self.spans[position as usize] self.spans[position as usize]
} }
} }
impl Spaces {
pub fn new(left: bool, right: bool) -> Self {
Self {
value: (left as u8) << 1 | right as u8,
}
}
pub fn left(self) -> bool {
(self.value & 0b10) == 0b10
}
pub fn right(self) -> bool {
(self.value & 0b01) == 0b01
}
pub fn pair(self) -> (bool, bool) {
(self.left(), self.right())
}
}
impl fmt::Debug for Spaces {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_tuple("Spaces")
.field(&self.left())
.field(&self.right())
.finish()
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)] #[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct TokenAllocError; pub struct TokenAllocError;

View file

@ -76,9 +76,9 @@ wavelength: 1
withDotter \\d -> withDotter \\d ->
pi = 3.14159265 pi = 3.14159265
a = (sin (d Num * wavelength / pi) + 1) / 2 a = sin (d Num * wavelength / pi) + 1 / 2
range = maxThickness - minThickness range = maxThickness - minThickness
thickness = minThickness + a * range thickness = a * range + minThickness
stroke thickness color (line (d From) (d To)) stroke thickness color (line (d From) (d To))
`.trim(), `.trim(),
}, },