From b52c1b26c9aa6c77d901196d2eee7125d5955d6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=83=AA=E3=82=AD=E8=90=8C?= Date: Mon, 1 Sep 2025 21:13:32 +0200 Subject: [PATCH] overhaul operator precedence arithmetic operators now have the same precedence. if you want them to bind tighter, you remove the spaces around them: - 2 + 2 * 2 = 8 - 2 + 2*2 = 6 --- crates/haku/src/lexer.rs | 37 ++++++++++++----- crates/haku/src/parser.rs | 85 +++++++++++++++++++++++++++------------ crates/haku/src/token.rs | 61 ++++++++++++++++++++++++++-- static/brush-box.js | 4 +- 4 files changed, 144 insertions(+), 43 deletions(-) diff --git a/crates/haku/src/lexer.rs b/crates/haku/src/lexer.rs index d577937..f3e4ce2 100644 --- a/crates/haku/src/lexer.rs +++ b/crates/haku/src/lexer.rs @@ -3,7 +3,7 @@ use alloc::vec::Vec; use crate::{ diagnostic::Diagnostic, source::{SourceCode, Span}, - token::{Lexis, TokenAllocError, TokenKind}, + token::{Lexis, Spaces, TokenAllocError, TokenKind}, }; pub struct Lexer<'a> { @@ -132,7 +132,8 @@ fn color(l: &mut Lexer<'_>) -> TokenKind { TokenKind::Color } -fn whitespace_and_comments(l: &mut Lexer<'_>) { +fn whitespace_and_comments(l: &mut Lexer<'_>) -> bool { + let mut matched = false; loop { match l.current() { '-' => { @@ -142,6 +143,7 @@ fn whitespace_and_comments(l: &mut Lexer<'_>) { while l.current() != '\n' && l.current() != '\0' { l.advance(); } + matched = true; } else { // An unfortunate little bit of backtracking here; // This seems like the simplest possible solution though. @@ -153,14 +155,18 @@ fn whitespace_and_comments(l: &mut Lexer<'_>) { } } - ' ' | '\r' | '\t' => l.advance(), + ' ' | '\r' | '\t' => { + l.advance(); + matched = true + } _ => break, } } + matched } -fn newline(l: &mut Lexer<'_>) -> (TokenKind, Span) { +fn newline(l: &mut Lexer<'_>, has_left_space: bool) -> (TokenKind, Span, bool) { let start = l.position; l.advance(); // skip the initial newline let end = l.position; @@ -177,11 +183,11 @@ fn newline(l: &mut Lexer<'_>) -> (TokenKind, Span) { } } - (TokenKind::Newline, Span::new(start, end)) + (TokenKind::Newline, Span::new(start, end), has_left_space) } -fn token(l: &mut Lexer<'_>) -> (TokenKind, Span) { - whitespace_and_comments(l); +fn token(l: &mut Lexer<'_>) -> (TokenKind, Span, bool) { + let has_left_space = whitespace_and_comments(l); let start = l.position; let kind = match l.current() { @@ -203,7 +209,7 @@ fn token(l: &mut Lexer<'_>) -> (TokenKind, Span) { '<' => one_or_two(l, TokenKind::Less, '=', TokenKind::LessEqual), '>' => one_or_two(l, TokenKind::Greater, '=', TokenKind::GreaterEqual), - '\n' => return newline(l), + '\n' => return newline(l, has_left_space), '(' => one(l, TokenKind::LParen), ')' => one(l, TokenKind::RParen), '[' => one(l, TokenKind::LBrack), @@ -222,13 +228,22 @@ fn token(l: &mut Lexer<'_>) -> (TokenKind, Span) { } }; let end = l.position; - (kind, Span::new(start, end)) + (kind, Span::new(start, end), has_left_space) } pub fn lex(l: &mut Lexer<'_>) -> Result<(), TokenAllocError> { loop { - let (kind, span) = token(l); - l.lexis.push(kind, span)?; + let (kind, span, has_left_space) = token(l); + + if !l.lexis.is_empty() { + let prev = l.lexis.len() - 1; + let spaces = l.lexis.spaces(prev); + l.lexis + .set_spaces(prev, Spaces::new(spaces.left(), has_left_space)); + } + let spaces = Spaces::new(has_left_space, false); + l.lexis.push(kind, spaces, span)?; + if kind == TokenKind::Eof { break; } diff --git a/crates/haku/src/parser.rs b/crates/haku/src/parser.rs index 76c1ba1..a8e0cee 100644 --- a/crates/haku/src/parser.rs +++ b/crates/haku/src/parser.rs @@ -7,7 +7,7 @@ use crate::{ ast::{Ast, NodeAllocError, NodeId, NodeKind}, diagnostic::Diagnostic, source::Span, - token::{Lexis, TokenKind, TokenKindSet}, + token::{Lexis, Spaces, TokenKind, TokenKindSet}, }; #[derive(Debug, Clone, Copy)] @@ -132,6 +132,11 @@ impl<'a> Parser<'a> { self.tokens.kind(self.position) } + #[track_caller] + fn peek_with_spaces(&self) -> (TokenKind, Spaces) { + (self.peek(), self.tokens.spaces(self.position)) + } + fn span(&self) -> Span { self.tokens.span(self.position) } @@ -298,33 +303,61 @@ impl fmt::Display for IntoAstError { impl Error for IntoAstError {} +#[derive(Debug, Clone, Copy, PartialEq, Eq)] enum Tighter { Left, Right, } -fn tighter(left: TokenKind, right: TokenKind) -> Tighter { - fn tightness(kind: TokenKind) -> Option { - match kind { - TokenKind::Equal | TokenKind::Colon => Some(0), +fn tighter(left: (TokenKind, Spaces), right: (TokenKind, Spaces)) -> Tighter { + #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] + enum Spacing { + Loose, + Call, + Tight, + } + + fn tightness((kind, spaces): (TokenKind, Spaces)) -> Option<(Spacing, usize)> { + let spacing = match kind { + // There are a few types of operators which are independent of tightness. + + // For : and =, it does not matter if they're spelled one way or the other, because + // there is only one way to use them (at the beginning of the expression). + TokenKind::Colon | TokenKind::Equal => Spacing::Loose, + + // For calls, there is a special intermediate level, such that they can sit between + // loose operators and tight operators. + _ if PREFIX_TOKENS.contains(kind) => Spacing::Call, + + // For everything else, the usual rules apply. + _ => match spaces.pair() { + (false, false) => Spacing::Tight, + (true, true) => Spacing::Loose, + _ => return None, // not a valid infix operator + }, + }; + let index = match kind { + TokenKind::Equal | TokenKind::Colon => 0, + // 1: reserved for `and` and `or` TokenKind::EqualEqual | TokenKind::NotEqual | TokenKind::Less | TokenKind::LessEqual | TokenKind::Greater - | TokenKind::GreaterEqual => Some(1), - TokenKind::Plus | TokenKind::Minus => Some(2), - TokenKind::Star | TokenKind::Slash => Some(3), - _ if PREFIX_TOKENS.contains(kind) => Some(4), - _ => None, - } + | TokenKind::GreaterEqual => 2, + TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => 3, + // 4: reserve for `.` + _ if PREFIX_TOKENS.contains(kind) => 5, + _ => return None, // not an infix operator + }; + Some((spacing, index)) } let Some(right_tightness) = tightness(right) else { return Tighter::Left; }; let Some(left_tightness) = tightness(left) else { - assert!(left == TokenKind::Eof); + assert!(left.0 == TokenKind::Eof); return Tighter::Right; }; @@ -335,12 +368,13 @@ fn tighter(left: TokenKind, right: TokenKind) -> Tighter { } } -fn precedence_parse(p: &mut Parser, left: TokenKind) { +fn precedence_parse(p: &mut Parser, left: (TokenKind, Spaces)) { let mut lhs = prefix(p); loop { - let right = p.peek(); - match tighter(left, right) { + let right = p.peek_with_spaces(); + let tighter = tighter(left, right); + match tighter { Tighter::Left => break, Tighter::Right => { let o = p.open_before(lhs); @@ -536,14 +570,12 @@ fn if_expr(p: &mut Parser) -> Closed { p.close(o, NodeKind::If) } +// NOTE: This must be synchronised with the match expression in prefix(). const PREFIX_TOKENS: TokenKindSet = TokenKindSet::new(&[ TokenKind::Ident, TokenKind::Tag, TokenKind::Number, TokenKind::Color, - // NOTE: This is ambiguous in function calls. - // In that case, the infix operator takes precedence (because the `match` arms for the infix op - // come first.) TokenKind::Minus, TokenKind::Not, TokenKind::LParen, @@ -582,8 +614,8 @@ fn prefix(p: &mut Parser) -> Closed { } } -fn infix(p: &mut Parser, op: TokenKind) -> NodeKind { - match op { +fn infix(p: &mut Parser, op: (TokenKind, Spaces)) -> NodeKind { + match op.0 { TokenKind::Plus | TokenKind::Minus | TokenKind::Star @@ -598,13 +630,13 @@ fn infix(p: &mut Parser, op: TokenKind) -> NodeKind { TokenKind::Equal => infix_let(p, op), - _ if PREFIX_TOKENS.contains(op) => infix_call(p), + _ if PREFIX_TOKENS.contains(op.0) => infix_call(p, op), _ => panic!("unhandled infix operator {op:?}"), } } -fn infix_binary(p: &mut Parser, op: TokenKind) -> NodeKind { +fn infix_binary(p: &mut Parser, op: (TokenKind, Spaces)) -> NodeKind { let o = p.open(); p.advance(); p.close(o, NodeKind::Op); @@ -617,15 +649,16 @@ fn infix_binary(p: &mut Parser, op: TokenKind) -> NodeKind { NodeKind::Binary } -fn infix_call(p: &mut Parser) -> NodeKind { +fn infix_call(p: &mut Parser, mut arg: (TokenKind, Spaces)) -> NodeKind { while PREFIX_TOKENS.contains(p.peek()) { - prefix(p); + precedence_parse(p, arg); + arg = p.peek_with_spaces(); } NodeKind::Call } -fn infix_let(p: &mut Parser, op: TokenKind) -> NodeKind { +fn infix_let(p: &mut Parser, op: (TokenKind, Spaces)) -> NodeKind { p.advance(); if p.peek() == TokenKind::Newline { @@ -651,7 +684,7 @@ fn infix_let(p: &mut Parser, op: TokenKind) -> NodeKind { } pub fn expr(p: &mut Parser) { - precedence_parse(p, TokenKind::Eof) + precedence_parse(p, (TokenKind::Eof, Spaces::new(true, false))) } pub fn toplevel(p: &mut Parser) { diff --git a/crates/haku/src/token.rs b/crates/haku/src/token.rs index 5a0ba1f..c613408 100644 --- a/crates/haku/src/token.rs +++ b/crates/haku/src/token.rs @@ -1,4 +1,7 @@ -use core::{error::Error, fmt::Display}; +use core::{ + error::Error, + fmt::{self, Display}, +}; use alloc::vec::Vec; @@ -49,10 +52,16 @@ pub enum TokenKind { Error, } +#[derive(Clone, Copy, PartialEq, Eq)] +pub struct Spaces { + value: u8, // 0b10 = left, 0b01 = right +} + #[derive(Debug, Clone)] pub struct Lexis { - pub kinds: Vec, - pub spans: Vec, + kinds: Vec, + spaces: Vec, + spans: Vec, } impl Lexis { @@ -61,6 +70,7 @@ impl Lexis { Self { kinds: Vec::with_capacity(capacity), + spaces: Vec::with_capacity(capacity), spans: Vec::with_capacity(capacity), } } @@ -73,12 +83,18 @@ impl Lexis { self.len() == 0 } - pub fn push(&mut self, kind: TokenKind, span: Span) -> Result<(), TokenAllocError> { + pub fn push( + &mut self, + kind: TokenKind, + spaces: Spaces, + span: Span, + ) -> Result<(), TokenAllocError> { if self.kinds.len() >= self.kinds.capacity() { return Err(TokenAllocError); } self.kinds.push(kind); + self.spaces.push(spaces); self.spans.push(span); Ok(()) @@ -88,11 +104,48 @@ impl Lexis { self.kinds[position as usize] } + pub fn spaces(&self, position: u32) -> Spaces { + self.spaces[position as usize] + } + + pub fn set_spaces(&mut self, position: u32, spaces: Spaces) { + self.spaces[position as usize] = spaces; + } + pub fn span(&self, position: u32) -> Span { self.spans[position as usize] } } +impl Spaces { + pub fn new(left: bool, right: bool) -> Self { + Self { + value: (left as u8) << 1 | right as u8, + } + } + + pub fn left(self) -> bool { + (self.value & 0b10) == 0b10 + } + + pub fn right(self) -> bool { + (self.value & 0b01) == 0b01 + } + + pub fn pair(self) -> (bool, bool) { + (self.left(), self.right()) + } +} + +impl fmt::Debug for Spaces { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_tuple("Spaces") + .field(&self.left()) + .field(&self.right()) + .finish() + } +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub struct TokenAllocError; diff --git a/static/brush-box.js b/static/brush-box.js index 2ff5fed..dfcc850 100644 --- a/static/brush-box.js +++ b/static/brush-box.js @@ -76,9 +76,9 @@ wavelength: 1 withDotter \\d -> pi = 3.14159265 - a = (sin (d Num * wavelength / pi) + 1) / 2 + a = sin (d Num * wavelength / pi) + 1 / 2 range = maxThickness - minThickness - thickness = minThickness + a * range + thickness = a * range + minThickness stroke thickness color (line (d From) (d To)) `.trim(), },