fix a few bugs with the new precedence rules

2025-09-02 20:02:48 +02:00 · 2025-09-02 20:02:48 +02:00 · 9808d3227f
commit 9808d3227f
parent 29a80854a4
3 changed files with 113 additions and 59 deletions
--- a/crates/haku-wasm/src/lib.rs
+++ b/crates/haku-wasm/src/lib.rs
@ -432,22 +432,23 @@ unsafe extern "C" fn haku_compile_brush(
    );
    debug!("compiling: {closure_spec:?}");

-    // debug!("bytecode: {:?}", chunk.bytecode);
-    // {
-    //     let mut cursor = 0_usize;
-    //     for info in &chunk.span_info {
-    //         let slice = &chunk.bytecode[cursor..cursor + info.len as usize];
-    //         debug!(
-    //             "{:?} | 0x{:x} {:?} | {:?}",
-    //             info.span,
-    //             cursor,
-    //             slice,
-    //             info.span.slice(src.code),
-    //         );
-    //         cursor += info.len as usize;
-    //     }
-    // }
-
+    /*
+    debug!("bytecode: {:?}", chunk.bytecode);
+    {
+        let mut cursor = 0_usize;
+        for info in &chunk.span_info {
+            let slice = &chunk.bytecode[cursor..cursor + info.len as usize];
+            debug!(
+                "{:?} | 0x{:x} {:?} | {:?}",
+                info.span,
+                cursor,
+                slice,
+                info.span.slice(src.code),
+            );
+            cursor += info.len as usize;
+        }
+    }
+    // */
    instance.compile_result2 = Some(CompileResult {
        defs_string: instance.defs.serialize_defs(),
        tags_string: instance.defs.serialize_tags(),
--- a/crates/haku/src/parser.rs
+++ b/crates/haku/src/parser.rs
@ -329,7 +329,7 @@ fn tighter(left: (TokenKind, Spaces), right: (TokenKind, Spaces)) -> Tighter {
            | TokenKind::GreaterEqual => 2,
            TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => 3,
            // 4: reserve for `.`
-            _ if PREFIX_TOKENS.contains(kind) => 5,
+            _ if is_prefix_token((kind, spaces)) => 5,
            _ => return None, // not an infix operator
        };
        Some(match kind {
@ -341,11 +341,11 @@ fn tighter(left: (TokenKind, Spaces), right: (TokenKind, Spaces)) -> Tighter {

            // For unary -, we treat it as having Tight spacing rather than Call, else it would
            // be allowed to begin function calls.
-            TokenKind::Minus if spaces.pair() == (true, false) => Tightness::Tight(index),
+            TokenKind::Minus if !spaces.are_balanced() => Tightness::Tight(index),

            // For calls, there is a special intermediate level, such that they can sit between
            // loose operators and tight operators.
-            _ if PREFIX_TOKENS.contains(kind) => Tightness::Call,
+            _ if is_prefix_token((kind, spaces)) => Tightness::Call,

            // For everything else, the usual rules apply.
            _ => match spaces.pair() {
@ -366,7 +366,7 @@ fn tighter(left: (TokenKind, Spaces), right: (TokenKind, Spaces)) -> Tighter {

    // When we're inside a call, subsequent arguments must not be slurped into the current
    // expression, as it would result in calls being parsed as (vec (1 (-1))), which is not correct.
-    if left_tightness == Tightness::Call {
+    if left_tightness == Tightness::Call && right.0 == TokenKind::Minus && !right.1.are_balanced() {
        return Tighter::Left;
    }

@ -579,13 +579,15 @@ fn if_expr(p: &mut Parser) -> Closed {
    p.close(o, NodeKind::If)
 }

+// TODO: There is a lot of special casing around `-` being both a prefix and an infix token.
+// Maybe there's a way to simplify it?
+
 // NOTE: This must be synchronised with the match expression in prefix().
 const PREFIX_TOKENS: TokenKindSet = TokenKindSet::new(&[
    TokenKind::Ident,
    TokenKind::Tag,
    TokenKind::Number,
    TokenKind::Color,
-    TokenKind::Minus,
    TokenKind::Not,
    TokenKind::LParen,
    TokenKind::Backslash,
@ -593,6 +595,10 @@ const PREFIX_TOKENS: TokenKindSet = TokenKindSet::new(&[
    TokenKind::LBrack,
 ]);

+fn is_prefix_token((kind, spaces): (TokenKind, Spaces)) -> bool {
+    PREFIX_TOKENS.contains(kind) || (kind == TokenKind::Minus && !spaces.are_balanced())
+}
+
 fn prefix(p: &mut Parser) -> Closed {
    let (kind, spaces) = p.peek_with_spaces();
    match kind {
@ -602,7 +608,7 @@ fn prefix(p: &mut Parser) -> Closed {
        TokenKind::Color => one(p, NodeKind::Color),
        TokenKind::LBrack => list(p),

-        TokenKind::Minus if !spaces.right() => unary(p),
+        TokenKind::Minus if spaces.pair() == (true, false) => unary(p),
        TokenKind::Not => unary(p),
        TokenKind::LParen => paren(p),
        TokenKind::Backslash => lambda(p),
@ -619,9 +625,9 @@ fn prefix(p: &mut Parser) -> Closed {

        _ => {
            assert!(
-                !PREFIX_TOKENS.contains(p.peek()),
-                "{:?} found in PREFIX_TOKENS",
-                p.peek()
+                !is_prefix_token(p.peek_with_spaces()),
+                "{:?} is not a prefix token",
+                p.peek_with_spaces()
            );

            let span = p.span();
@ -651,7 +657,7 @@ fn infix(p: &mut Parser, op: (TokenKind, Spaces)) -> NodeKind {

        TokenKind::Equal => infix_let(p, op),

-        _ if PREFIX_TOKENS.contains(op.0) => infix_call(p, op),
+        _ if is_prefix_token(op) => infix_call(p, op),

        _ => panic!("unhandled infix operator {op:?}"),
    }
@ -671,7 +677,7 @@ fn infix_binary(p: &mut Parser, op: (TokenKind, Spaces)) -> NodeKind {
 }

 fn infix_call(p: &mut Parser, mut arg: (TokenKind, Spaces)) -> NodeKind {
-    while PREFIX_TOKENS.contains(p.peek()) {
+    while is_prefix_token(p.peek_with_spaces()) {
        precedence_parse(p, arg);
        arg = p.peek_with_spaces();
    }
--- a/docs/rkgk.dj
+++ b/docs/rkgk.dj
@ -87,7 +87,7 @@ If you want to draw multiple scribbles, you can wrap them into a list, which we
 withDotter \d ->
  [
    stroke 8 #F00 (d To + vec 4 0)
-    stroke 8 #00F (d To + vec (-4) 0)
+    stroke 8 #00F (d To + vec -4 0)
  ]
 ```

@ -109,25 +109,15 @@ withDotter \d ->
  [
    [
      stroke 8 #F00 (d To + vec 4 0)
-      stroke 8 #00F (d To + vec (-4) 0)
+      stroke 8 #00F (d To + vec -4 0)
    ]
    [
      stroke 8 #FF0 (d To + vec 0 4)
-      stroke 8 #0FF (d To + vec 0 (-4))
+      stroke 8 #0FF (d To + vec 0 -4)
    ]
  ]
 ```

-::: aside
-
-Another weird thing: when negating a number, you have to put it in parentheses.
-
-This is because haku does not see your spaces---`vec -4`, `vec - 4`, and `vec-4` all mean the same thing!
-In this case, it will always choose the 2nd interpretation---vec minus four.
-So to make it interpret our minus four as, well, _minus four_, we need to enclose it in parentheses.
-
-:::
-
 This might seem useless, but it's a really useful property in computer programs.
 It essentially means you can snap pieces together like Lego bricks!

@ -186,7 +176,7 @@ haku vectors however are a little more constrained, because they always contain
 We call these four numbers X, Y, Z, and W respectively.

 Four is a useful number of dimensions to have, because it lets us do 3D math---which technically isn't built into haku, but if you want it, it's there.
-For most practical purposes, we'll only be using the first _two_ of the four dimensions though---X and Y. 
+For most practical purposes, we'll only be using the first _two_ of the four dimensions though---X and Y.
 This is because the wall is a 2D space---it's a flat surface with no depth.

 It's important to know though that vectors don't mean much _by themselves_---rakugaki just chooses them to represent points on the wall, but in a flat 2D space, all points need to be relative to some _origin_---the vector `(0, 0)`.
@ -205,7 +195,7 @@ withDotter \d ->
  stroke 8 #000 (d To + vec 10 0) -- moved 10 pixels rightwards
 ```

-Also note how the `d To` expression is parenthesized.
+Also note how the `d To + vec 10 0` expression is parenthesized.
 This is because otherwise, its individual parts would be interpreted as separate arguments to `stroke`, which is not what we want!

 Anyways, with all that, we let haku mix all the ingredients together, and get a black dot under the cursor.
@ -249,16 +239,72 @@ haku also supports other kinds of shapes: circles and rectangles.
 ```haku
 withDotter \d ->
  [
-    stroke 8 #F00 (circle (d To + vec (-16) 0) 16)
-    stroke 8 #00F (rect (d To + vec 0 (-16)) 32 32)
+    stroke 8 #F00 (circle (d To + vec -16 0) 16)
+    stroke 8 #00F (rect (d To + vec 0 -16) (vec 32 32))
  ]
 ```

 - `circle`s are made up of an X position, Y position, and radius.
-  
+
 - `rect`s are made up of the (X and Y) position of their top-left corner, and a size (width and height).\
  Our example produces a square, because the rectangle's width and height are equal!

+
+## Math in haku
+
+While haku is based entirely in pure math, it is important to note that haku is _not_ math notation!
+It is a textual programming language, and has different rules concerning order of operations than math.
+
+::: aside
+
+If you've programmed in any other language, you might find those rules alien.
+But I promise you, they make sense in the context of the rest of the language!
+
+:::
+
+In traditional math notation, the conventional order of operations is:
+
+1. Parentheses
+2. Exponentiation
+3. Multiplication and division
+4. Addition and subtraction
+
+haku does not have an exponentiation operator.
+That purpose is served by the function `pow`.
+It does however have parentheses, multiplication, division, addition, and subtraction.
+
+Unlike in math notation, addition, subtraction, multiplication, and division, are _all_ calculated from left to right---multiplication and division does not take precedence over addition and subtraction.
+So for the expression `2 + 2 * 2`, the result is `8`, and not `6`!
+
+Since this can be inconvenient at times, there is a way to work around that.
+haku has a distinction between _tight_ and _loose_ operators, where tight operators always take precedence over loose ones in the order of operations.
+
+Remove the spaces around the `*` multiplication operator, like `2 + 2*2`, and the result is now `6` again---because we made `*` tight!
+
+This is convenient when representing fractions.
+If you want a constant like half-π, the way to write it is `1/2*pi`---and order of operations will never mess you up, as long as you keep it tight without spaces!
+
+The same thing happens with functions.
+For example, if you wanted to calculate the sine of `1/2*pi*x`, as long as you write that as `sin 1/2*pi*x`, with the whole argument without spaces, you won't have to wrap it in parentheses.
+
+Inside a single whole tight or loose expression, there is still an order of operations.
+In fact, here's the full order of operations in haku for reference:
+
+1. Tight
+
+    1. Arithmetic: `+`, `-`, `*`, `/`
+    1. Comparisons: `==`, `!=`, `<`, `<=`, `>`, `>=`
+
+1. Function calls
+1. Loose
+
+    1. Arithmetic
+    1. Comparisons
+    1. Variables: `:`, `=`
+
+Naturally, you can still use parentheses when the loose-tight distinction is not enough.
+
+
 ## Programming in haku

 So far we've been using haku solely to describe data.
@ -270,7 +316,7 @@ Remember that example from before?
 withDotter \d ->
  [
    stroke 8 #F00 (d To + vec 4 0)
-    stroke 8 #00F (d To + vec (-4) 0)
+    stroke 8 #00F (d To + vec -4 0)
  ]
 ```

@ -281,7 +327,7 @@ If we wanted to change the size of the points, we'd need to first update the str
 withDotter \d ->
  [
    stroke 4 #F00 (d To + vec 4 0)
-    stroke 4 #00F (d To + vec (-4) 0)
+    stroke 4 #00F (d To + vec -4 0)
          ---
  ]
 ```
@ -294,8 +340,8 @@ So we also have to update their positions.
 [
  stroke 4 #F00 (d To + vec 2 0)
                           ---
-  stroke 4 #00F (d To + vec (-2) 0)
-                             --
+  stroke 4 #00F (d To + vec -2 0)
+                            --
 ]
 ```

@ -322,7 +368,7 @@ thickness: 4
 withDotter \d ->
  [
    stroke thickness #F00 (d To + vec 2 0)
-    stroke thickness #00F (d To + vec (-2) 0)
+    stroke thickness #00F (d To + vec -2 0)
           ---------
  ]
 ```
@ -355,7 +401,7 @@ xOffset: 2
 withDotter \d ->
  [
    stroke thickness #F00 (d To + vec xOffset 0)
-    stroke thickness #00F (d To + vec (-xOffset) 0)
+    stroke thickness #00F (d To + vec -xOffset 0)
           ---------
  ]
 ```
@ -371,7 +417,7 @@ Uppercase names are special values we call _tags_.

 Tags are values which represent names.
 For example, the `To` in `d To` is a tag.
-It represents the name of the piece of data we're extracting from `d`.
+It is the name of the piece of data we're extracting from `d`.

 There are also two special tags, `True` and `False`, which represent [Boolean](https://en.wikipedia.org/wiki/Boolean_algebra) truth and falsehood.

@ -388,7 +434,7 @@ xOffset: 2
 withDotter \d ->
  [
    stroke thickness #F00 (d To + vec xOffset 0)
-    stroke thickness #00F (d To + vec (-xOffset) 0)
+    stroke thickness #00F (d To + vec -xOffset 0)
  ]
 ```

@ -402,7 +448,7 @@ xOffset: thickness / 2
 withDotter \d ->
  [
    stroke thickness #F00 (d To + vec xOffset 0)
-    stroke thickness #00F (d To + vec (-xOffset) 0)
+    stroke thickness #00F (d To + vec -xOffset 0)
  ]
 ```

@ -564,6 +610,7 @@ Seriously, 64 is my limit.

 I wonder if there's any way we could automate this?

+
 ### The Ouroboros

 You know the drill by now.
@ -587,7 +634,7 @@ splat: \d, radius ->
 airbrush: \d, size ->
  [
    splat d size
-    airbrush d (size - 8)
+    airbrush d size-8
  ]

 withDotter \d ->
@ -649,7 +696,7 @@ airbrush: \d, size ->
  if (size > 0)
    [
      splat d size
-      airbrush d (size - 8)
+      airbrush d size-8
    ]
  else
    []
@ -675,8 +722,8 @@ airbrush: \d, size ->
  if (size > 0)
    [
      splat d size
-      airbrush d (size - 1)
-                        ---
+      airbrush d size-1
+                    ---
    ]
  else
    []
@ -696,7 +743,7 @@ airbrush: \d, size ->
  if (size > 0)
    [
      splat d size
-      airbrush d (size - 1)
+      airbrush d size-1
    ]
  else
    []