From b52c1b26c9aa6c77d901196d2eee7125d5955d6f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=E3=83=AA=E3=82=AD=E8=90=8C?= <commits@liquidev.net>
Date: Mon, 1 Sep 2025 21:13:32 +0200
Subject: [PATCH] overhaul operator precedence

arithmetic operators now have the same precedence.
if you want them to bind tighter, you remove the spaces around them:

- 2 + 2 * 2 = 8
- 2 + 2*2 = 6
---
 crates/haku/src/lexer.rs  | 37 ++++++++++++-----
 crates/haku/src/parser.rs | 85 +++++++++++++++++++++++++++------------
 crates/haku/src/token.rs  | 61 ++++++++++++++++++++++++++--
 static/brush-box.js       |  4 +-
 4 files changed, 144 insertions(+), 43 deletions(-)

diff --git a/crates/haku/src/lexer.rs b/crates/haku/src/lexer.rs
index d577937..f3e4ce2 100644
--- a/crates/haku/src/lexer.rs
+++ b/crates/haku/src/lexer.rs
@@ -3,7 +3,7 @@ use alloc::vec::Vec;
 use crate::{
     diagnostic::Diagnostic,
     source::{SourceCode, Span},
-    token::{Lexis, TokenAllocError, TokenKind},
+    token::{Lexis, Spaces, TokenAllocError, TokenKind},
 };
 
 pub struct Lexer<'a> {
@@ -132,7 +132,8 @@ fn color(l: &mut Lexer<'_>) -> TokenKind {
     TokenKind::Color
 }
 
-fn whitespace_and_comments(l: &mut Lexer<'_>) {
+fn whitespace_and_comments(l: &mut Lexer<'_>) -> bool {
+    let mut matched = false;
     loop {
         match l.current() {
             '-' => {
@@ -142,6 +143,7 @@ fn whitespace_and_comments(l: &mut Lexer<'_>) {
                     while l.current() != '\n' && l.current() != '\0' {
                         l.advance();
                     }
+                    matched = true;
                 } else {
                     // An unfortunate little bit of backtracking here;
                     // This seems like the simplest possible solution though.
@@ -153,14 +155,18 @@ fn whitespace_and_comments(l: &mut Lexer<'_>) {
                 }
             }
 
-            ' ' | '\r' | '\t' => l.advance(),
+            ' ' | '\r' | '\t' => {
+                l.advance();
+                matched = true
+            }
 
             _ => break,
         }
     }
+    matched
 }
 
-fn newline(l: &mut Lexer<'_>) -> (TokenKind, Span) {
+fn newline(l: &mut Lexer<'_>, has_left_space: bool) -> (TokenKind, Span, bool) {
     let start = l.position;
     l.advance(); // skip the initial newline
     let end = l.position;
@@ -177,11 +183,11 @@ fn newline(l: &mut Lexer<'_>) -> (TokenKind, Span) {
         }
     }
 
-    (TokenKind::Newline, Span::new(start, end))
+    (TokenKind::Newline, Span::new(start, end), has_left_space)
 }
 
-fn token(l: &mut Lexer<'_>) -> (TokenKind, Span) {
-    whitespace_and_comments(l);
+fn token(l: &mut Lexer<'_>) -> (TokenKind, Span, bool) {
+    let has_left_space = whitespace_and_comments(l);
 
     let start = l.position;
     let kind = match l.current() {
@@ -203,7 +209,7 @@ fn token(l: &mut Lexer<'_>) -> (TokenKind, Span) {
         '<' => one_or_two(l, TokenKind::Less, '=', TokenKind::LessEqual),
         '>' => one_or_two(l, TokenKind::Greater, '=', TokenKind::GreaterEqual),
 
-        '\n' => return newline(l),
+        '\n' => return newline(l, has_left_space),
         '(' => one(l, TokenKind::LParen),
         ')' => one(l, TokenKind::RParen),
         '[' => one(l, TokenKind::LBrack),
@@ -222,13 +228,22 @@ fn token(l: &mut Lexer<'_>) -> (TokenKind, Span) {
         }
     };
     let end = l.position;
-    (kind, Span::new(start, end))
+    (kind, Span::new(start, end), has_left_space)
 }
 
 pub fn lex(l: &mut Lexer<'_>) -> Result<(), TokenAllocError> {
     loop {
-        let (kind, span) = token(l);
-        l.lexis.push(kind, span)?;
+        let (kind, span, has_left_space) = token(l);
+
+        if !l.lexis.is_empty() {
+            let prev = l.lexis.len() - 1;
+            let spaces = l.lexis.spaces(prev);
+            l.lexis
+                .set_spaces(prev, Spaces::new(spaces.left(), has_left_space));
+        }
+        let spaces = Spaces::new(has_left_space, false);
+        l.lexis.push(kind, spaces, span)?;
+
         if kind == TokenKind::Eof {
             break;
         }
diff --git a/crates/haku/src/parser.rs b/crates/haku/src/parser.rs
index 76c1ba1..a8e0cee 100644
--- a/crates/haku/src/parser.rs
+++ b/crates/haku/src/parser.rs
@@ -7,7 +7,7 @@ use crate::{
     ast::{Ast, NodeAllocError, NodeId, NodeKind},
     diagnostic::Diagnostic,
     source::Span,
-    token::{Lexis, TokenKind, TokenKindSet},
+    token::{Lexis, Spaces, TokenKind, TokenKindSet},
 };
 
 #[derive(Debug, Clone, Copy)]
@@ -132,6 +132,11 @@ impl<'a> Parser<'a> {
         self.tokens.kind(self.position)
     }
 
+    #[track_caller]
+    fn peek_with_spaces(&self) -> (TokenKind, Spaces) {
+        (self.peek(), self.tokens.spaces(self.position))
+    }
+
     fn span(&self) -> Span {
         self.tokens.span(self.position)
     }
@@ -298,33 +303,61 @@ impl fmt::Display for IntoAstError {
 
 impl Error for IntoAstError {}
 
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
 enum Tighter {
     Left,
     Right,
 }
 
-fn tighter(left: TokenKind, right: TokenKind) -> Tighter {
-    fn tightness(kind: TokenKind) -> Option<usize> {
-        match kind {
-            TokenKind::Equal | TokenKind::Colon => Some(0),
+fn tighter(left: (TokenKind, Spaces), right: (TokenKind, Spaces)) -> Tighter {
+    #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+    enum Spacing {
+        Loose,
+        Call,
+        Tight,
+    }
+
+    fn tightness((kind, spaces): (TokenKind, Spaces)) -> Option<(Spacing, usize)> {
+        let spacing = match kind {
+            // There are a few types of operators which are independent of tightness.
+
+            // For : and =, it does not matter if they're spelled one way or the other, because
+            // there is only one way to use them (at the beginning of the expression).
+            TokenKind::Colon | TokenKind::Equal => Spacing::Loose,
+
+            // For calls, there is a special intermediate level, such that they can sit between
+            // loose operators and tight operators.
+            _ if PREFIX_TOKENS.contains(kind) => Spacing::Call,
+
+            // For everything else, the usual rules apply.
+            _ => match spaces.pair() {
+                (false, false) => Spacing::Tight,
+                (true, true) => Spacing::Loose,
+                _ => return None, // not a valid infix operator
+            },
+        };
+        let index = match kind {
+            TokenKind::Equal | TokenKind::Colon => 0,
+            // 1: reserved for `and` and `or`
             TokenKind::EqualEqual
             | TokenKind::NotEqual
             | TokenKind::Less
             | TokenKind::LessEqual
             | TokenKind::Greater
-            | TokenKind::GreaterEqual => Some(1),
-            TokenKind::Plus | TokenKind::Minus => Some(2),
-            TokenKind::Star | TokenKind::Slash => Some(3),
-            _ if PREFIX_TOKENS.contains(kind) => Some(4),
-            _ => None,
-        }
+            | TokenKind::GreaterEqual => 2,
+            TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => 3,
+            // 4: reserve for `.`
+            _ if PREFIX_TOKENS.contains(kind) => 5,
+            _ => return None, // not an infix operator
+        };
+        Some((spacing, index))
     }
 
     let Some(right_tightness) = tightness(right) else {
         return Tighter::Left;
     };
     let Some(left_tightness) = tightness(left) else {
-        assert!(left == TokenKind::Eof);
+        assert!(left.0 == TokenKind::Eof);
         return Tighter::Right;
     };
 
@@ -335,12 +368,13 @@ fn tighter(left: TokenKind, right: TokenKind) -> Tighter {
     }
 }
 
-fn precedence_parse(p: &mut Parser, left: TokenKind) {
+fn precedence_parse(p: &mut Parser, left: (TokenKind, Spaces)) {
     let mut lhs = prefix(p);
 
     loop {
-        let right = p.peek();
-        match tighter(left, right) {
+        let right = p.peek_with_spaces();
+        let tighter = tighter(left, right);
+        match tighter {
             Tighter::Left => break,
             Tighter::Right => {
                 let o = p.open_before(lhs);
@@ -536,14 +570,12 @@ fn if_expr(p: &mut Parser) -> Closed {
     p.close(o, NodeKind::If)
 }
 
+// NOTE: This must be synchronised with the match expression in prefix().
 const PREFIX_TOKENS: TokenKindSet = TokenKindSet::new(&[
     TokenKind::Ident,
     TokenKind::Tag,
     TokenKind::Number,
     TokenKind::Color,
-    // NOTE: This is ambiguous in function calls.
-    // In that case, the infix operator takes precedence (because the `match` arms for the infix op
-    // come first.)
     TokenKind::Minus,
     TokenKind::Not,
     TokenKind::LParen,
@@ -582,8 +614,8 @@ fn prefix(p: &mut Parser) -> Closed {
     }
 }
 
-fn infix(p: &mut Parser, op: TokenKind) -> NodeKind {
-    match op {
+fn infix(p: &mut Parser, op: (TokenKind, Spaces)) -> NodeKind {
+    match op.0 {
         TokenKind::Plus
         | TokenKind::Minus
         | TokenKind::Star
@@ -598,13 +630,13 @@ fn infix(p: &mut Parser, op: TokenKind) -> NodeKind {
 
         TokenKind::Equal => infix_let(p, op),
 
-        _ if PREFIX_TOKENS.contains(op) => infix_call(p),
+        _ if PREFIX_TOKENS.contains(op.0) => infix_call(p, op),
 
         _ => panic!("unhandled infix operator {op:?}"),
     }
 }
 
-fn infix_binary(p: &mut Parser, op: TokenKind) -> NodeKind {
+fn infix_binary(p: &mut Parser, op: (TokenKind, Spaces)) -> NodeKind {
     let o = p.open();
     p.advance();
     p.close(o, NodeKind::Op);
@@ -617,15 +649,16 @@ fn infix_binary(p: &mut Parser, op: TokenKind) -> NodeKind {
     NodeKind::Binary
 }
 
-fn infix_call(p: &mut Parser) -> NodeKind {
+fn infix_call(p: &mut Parser, mut arg: (TokenKind, Spaces)) -> NodeKind {
     while PREFIX_TOKENS.contains(p.peek()) {
-        prefix(p);
+        precedence_parse(p, arg);
+        arg = p.peek_with_spaces();
     }
 
     NodeKind::Call
 }
 
-fn infix_let(p: &mut Parser, op: TokenKind) -> NodeKind {
+fn infix_let(p: &mut Parser, op: (TokenKind, Spaces)) -> NodeKind {
     p.advance();
 
     if p.peek() == TokenKind::Newline {
@@ -651,7 +684,7 @@ fn infix_let(p: &mut Parser, op: TokenKind) -> NodeKind {
 }
 
 pub fn expr(p: &mut Parser) {
-    precedence_parse(p, TokenKind::Eof)
+    precedence_parse(p, (TokenKind::Eof, Spaces::new(true, false)))
 }
 
 pub fn toplevel(p: &mut Parser) {
diff --git a/crates/haku/src/token.rs b/crates/haku/src/token.rs
index 5a0ba1f..c613408 100644
--- a/crates/haku/src/token.rs
+++ b/crates/haku/src/token.rs
@@ -1,4 +1,7 @@
-use core::{error::Error, fmt::Display};
+use core::{
+    error::Error,
+    fmt::{self, Display},
+};
 
 use alloc::vec::Vec;
 
@@ -49,10 +52,16 @@ pub enum TokenKind {
     Error,
 }
 
+#[derive(Clone, Copy, PartialEq, Eq)]
+pub struct Spaces {
+    value: u8, // 0b10 = left, 0b01 = right
+}
+
 #[derive(Debug, Clone)]
 pub struct Lexis {
-    pub kinds: Vec<TokenKind>,
-    pub spans: Vec<Span>,
+    kinds: Vec<TokenKind>,
+    spaces: Vec<Spaces>,
+    spans: Vec<Span>,
 }
 
 impl Lexis {
@@ -61,6 +70,7 @@ impl Lexis {
 
         Self {
             kinds: Vec::with_capacity(capacity),
+            spaces: Vec::with_capacity(capacity),
             spans: Vec::with_capacity(capacity),
         }
     }
@@ -73,12 +83,18 @@ impl Lexis {
         self.len() == 0
     }
 
-    pub fn push(&mut self, kind: TokenKind, span: Span) -> Result<(), TokenAllocError> {
+    pub fn push(
+        &mut self,
+        kind: TokenKind,
+        spaces: Spaces,
+        span: Span,
+    ) -> Result<(), TokenAllocError> {
         if self.kinds.len() >= self.kinds.capacity() {
             return Err(TokenAllocError);
         }
 
         self.kinds.push(kind);
+        self.spaces.push(spaces);
         self.spans.push(span);
 
         Ok(())
@@ -88,11 +104,48 @@ impl Lexis {
         self.kinds[position as usize]
     }
 
+    pub fn spaces(&self, position: u32) -> Spaces {
+        self.spaces[position as usize]
+    }
+
+    pub fn set_spaces(&mut self, position: u32, spaces: Spaces) {
+        self.spaces[position as usize] = spaces;
+    }
+
     pub fn span(&self, position: u32) -> Span {
         self.spans[position as usize]
     }
 }
 
+impl Spaces {
+    pub fn new(left: bool, right: bool) -> Self {
+        Self {
+            value: (left as u8) << 1 | right as u8,
+        }
+    }
+
+    pub fn left(self) -> bool {
+        (self.value & 0b10) == 0b10
+    }
+
+    pub fn right(self) -> bool {
+        (self.value & 0b01) == 0b01
+    }
+
+    pub fn pair(self) -> (bool, bool) {
+        (self.left(), self.right())
+    }
+}
+
+impl fmt::Debug for Spaces {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_tuple("Spaces")
+            .field(&self.left())
+            .field(&self.right())
+            .finish()
+    }
+}
+
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub struct TokenAllocError;
 
diff --git a/static/brush-box.js b/static/brush-box.js
index 2ff5fed..dfcc850 100644
--- a/static/brush-box.js
+++ b/static/brush-box.js
@@ -76,9 +76,9 @@ wavelength: 1
 
 withDotter \\d ->
   pi = 3.14159265
-  a = (sin (d Num * wavelength / pi) + 1) / 2
+  a = sin (d Num * wavelength / pi) + 1 / 2
   range = maxThickness - minThickness
-  thickness = minThickness + a * range
+  thickness = a * range + minThickness
   stroke thickness color (line (d From) (d To))
         `.trim(),
     },