From 9808d3227f5831fe2898f3a40ea0c6aae175a429 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=83=AA=E3=82=AD=E8=90=8C?= Date: Tue, 2 Sep 2025 20:02:48 +0200 Subject: [PATCH] fix a few bugs with the new precedence rules --- crates/haku-wasm/src/lib.rs | 33 +++++------ crates/haku/src/parser.rs | 28 +++++---- docs/rkgk.dj | 111 +++++++++++++++++++++++++----------- 3 files changed, 113 insertions(+), 59 deletions(-) diff --git a/crates/haku-wasm/src/lib.rs b/crates/haku-wasm/src/lib.rs index 85c0635..5030c58 100644 --- a/crates/haku-wasm/src/lib.rs +++ b/crates/haku-wasm/src/lib.rs @@ -432,22 +432,23 @@ unsafe extern "C" fn haku_compile_brush( ); debug!("compiling: {closure_spec:?}"); - // debug!("bytecode: {:?}", chunk.bytecode); - // { - // let mut cursor = 0_usize; - // for info in &chunk.span_info { - // let slice = &chunk.bytecode[cursor..cursor + info.len as usize]; - // debug!( - // "{:?} | 0x{:x} {:?} | {:?}", - // info.span, - // cursor, - // slice, - // info.span.slice(src.code), - // ); - // cursor += info.len as usize; - // } - // } - + /* + debug!("bytecode: {:?}", chunk.bytecode); + { + let mut cursor = 0_usize; + for info in &chunk.span_info { + let slice = &chunk.bytecode[cursor..cursor + info.len as usize]; + debug!( + "{:?} | 0x{:x} {:?} | {:?}", + info.span, + cursor, + slice, + info.span.slice(src.code), + ); + cursor += info.len as usize; + } + } + // */ instance.compile_result2 = Some(CompileResult { defs_string: instance.defs.serialize_defs(), tags_string: instance.defs.serialize_tags(), diff --git a/crates/haku/src/parser.rs b/crates/haku/src/parser.rs index 5d02a5a..3a99adf 100644 --- a/crates/haku/src/parser.rs +++ b/crates/haku/src/parser.rs @@ -329,7 +329,7 @@ fn tighter(left: (TokenKind, Spaces), right: (TokenKind, Spaces)) -> Tighter { | TokenKind::GreaterEqual => 2, TokenKind::Plus | TokenKind::Minus | TokenKind::Star | TokenKind::Slash => 3, // 4: reserve for `.` - _ if PREFIX_TOKENS.contains(kind) => 5, + _ if is_prefix_token((kind, spaces)) => 5, _ => return None, // not an infix operator }; Some(match kind { @@ -341,11 +341,11 @@ fn tighter(left: (TokenKind, Spaces), right: (TokenKind, Spaces)) -> Tighter { // For unary -, we treat it as having Tight spacing rather than Call, else it would // be allowed to begin function calls. - TokenKind::Minus if spaces.pair() == (true, false) => Tightness::Tight(index), + TokenKind::Minus if !spaces.are_balanced() => Tightness::Tight(index), // For calls, there is a special intermediate level, such that they can sit between // loose operators and tight operators. - _ if PREFIX_TOKENS.contains(kind) => Tightness::Call, + _ if is_prefix_token((kind, spaces)) => Tightness::Call, // For everything else, the usual rules apply. _ => match spaces.pair() { @@ -366,7 +366,7 @@ fn tighter(left: (TokenKind, Spaces), right: (TokenKind, Spaces)) -> Tighter { // When we're inside a call, subsequent arguments must not be slurped into the current // expression, as it would result in calls being parsed as (vec (1 (-1))), which is not correct. - if left_tightness == Tightness::Call { + if left_tightness == Tightness::Call && right.0 == TokenKind::Minus && !right.1.are_balanced() { return Tighter::Left; } @@ -579,13 +579,15 @@ fn if_expr(p: &mut Parser) -> Closed { p.close(o, NodeKind::If) } +// TODO: There is a lot of special casing around `-` being both a prefix and an infix token. +// Maybe there's a way to simplify it? + // NOTE: This must be synchronised with the match expression in prefix(). const PREFIX_TOKENS: TokenKindSet = TokenKindSet::new(&[ TokenKind::Ident, TokenKind::Tag, TokenKind::Number, TokenKind::Color, - TokenKind::Minus, TokenKind::Not, TokenKind::LParen, TokenKind::Backslash, @@ -593,6 +595,10 @@ const PREFIX_TOKENS: TokenKindSet = TokenKindSet::new(&[ TokenKind::LBrack, ]); +fn is_prefix_token((kind, spaces): (TokenKind, Spaces)) -> bool { + PREFIX_TOKENS.contains(kind) || (kind == TokenKind::Minus && !spaces.are_balanced()) +} + fn prefix(p: &mut Parser) -> Closed { let (kind, spaces) = p.peek_with_spaces(); match kind { @@ -602,7 +608,7 @@ fn prefix(p: &mut Parser) -> Closed { TokenKind::Color => one(p, NodeKind::Color), TokenKind::LBrack => list(p), - TokenKind::Minus if !spaces.right() => unary(p), + TokenKind::Minus if spaces.pair() == (true, false) => unary(p), TokenKind::Not => unary(p), TokenKind::LParen => paren(p), TokenKind::Backslash => lambda(p), @@ -619,9 +625,9 @@ fn prefix(p: &mut Parser) -> Closed { _ => { assert!( - !PREFIX_TOKENS.contains(p.peek()), - "{:?} found in PREFIX_TOKENS", - p.peek() + !is_prefix_token(p.peek_with_spaces()), + "{:?} is not a prefix token", + p.peek_with_spaces() ); let span = p.span(); @@ -651,7 +657,7 @@ fn infix(p: &mut Parser, op: (TokenKind, Spaces)) -> NodeKind { TokenKind::Equal => infix_let(p, op), - _ if PREFIX_TOKENS.contains(op.0) => infix_call(p, op), + _ if is_prefix_token(op) => infix_call(p, op), _ => panic!("unhandled infix operator {op:?}"), } @@ -671,7 +677,7 @@ fn infix_binary(p: &mut Parser, op: (TokenKind, Spaces)) -> NodeKind { } fn infix_call(p: &mut Parser, mut arg: (TokenKind, Spaces)) -> NodeKind { - while PREFIX_TOKENS.contains(p.peek()) { + while is_prefix_token(p.peek_with_spaces()) { precedence_parse(p, arg); arg = p.peek_with_spaces(); } diff --git a/docs/rkgk.dj b/docs/rkgk.dj index 7335554..e91e98c 100644 --- a/docs/rkgk.dj +++ b/docs/rkgk.dj @@ -87,7 +87,7 @@ If you want to draw multiple scribbles, you can wrap them into a list, which we withDotter \d -> [ stroke 8 #F00 (d To + vec 4 0) - stroke 8 #00F (d To + vec (-4) 0) + stroke 8 #00F (d To + vec -4 0) ] ``` @@ -109,25 +109,15 @@ withDotter \d -> [ [ stroke 8 #F00 (d To + vec 4 0) - stroke 8 #00F (d To + vec (-4) 0) + stroke 8 #00F (d To + vec -4 0) ] [ stroke 8 #FF0 (d To + vec 0 4) - stroke 8 #0FF (d To + vec 0 (-4)) + stroke 8 #0FF (d To + vec 0 -4) ] ] ``` -::: aside - -Another weird thing: when negating a number, you have to put it in parentheses. - -This is because haku does not see your spaces---`vec -4`, `vec - 4`, and `vec-4` all mean the same thing! -In this case, it will always choose the 2nd interpretation---vec minus four. -So to make it interpret our minus four as, well, _minus four_, we need to enclose it in parentheses. - -::: - This might seem useless, but it's a really useful property in computer programs. It essentially means you can snap pieces together like Lego bricks! @@ -186,7 +176,7 @@ haku vectors however are a little more constrained, because they always contain We call these four numbers X, Y, Z, and W respectively. Four is a useful number of dimensions to have, because it lets us do 3D math---which technically isn't built into haku, but if you want it, it's there. -For most practical purposes, we'll only be using the first _two_ of the four dimensions though---X and Y. +For most practical purposes, we'll only be using the first _two_ of the four dimensions though---X and Y. This is because the wall is a 2D space---it's a flat surface with no depth. It's important to know though that vectors don't mean much _by themselves_---rakugaki just chooses them to represent points on the wall, but in a flat 2D space, all points need to be relative to some _origin_---the vector `(0, 0)`. @@ -205,7 +195,7 @@ withDotter \d -> stroke 8 #000 (d To + vec 10 0) -- moved 10 pixels rightwards ``` -Also note how the `d To` expression is parenthesized. +Also note how the `d To + vec 10 0` expression is parenthesized. This is because otherwise, its individual parts would be interpreted as separate arguments to `stroke`, which is not what we want! Anyways, with all that, we let haku mix all the ingredients together, and get a black dot under the cursor. @@ -249,16 +239,72 @@ haku also supports other kinds of shapes: circles and rectangles. ```haku withDotter \d -> [ - stroke 8 #F00 (circle (d To + vec (-16) 0) 16) - stroke 8 #00F (rect (d To + vec 0 (-16)) 32 32) + stroke 8 #F00 (circle (d To + vec -16 0) 16) + stroke 8 #00F (rect (d To + vec 0 -16) (vec 32 32)) ] ``` - `circle`s are made up of an X position, Y position, and radius. - + - `rect`s are made up of the (X and Y) position of their top-left corner, and a size (width and height).\ Our example produces a square, because the rectangle's width and height are equal! + +## Math in haku + +While haku is based entirely in pure math, it is important to note that haku is _not_ math notation! +It is a textual programming language, and has different rules concerning order of operations than math. + +::: aside + +If you've programmed in any other language, you might find those rules alien. +But I promise you, they make sense in the context of the rest of the language! + +::: + +In traditional math notation, the conventional order of operations is: + +1. Parentheses +2. Exponentiation +3. Multiplication and division +4. Addition and subtraction + +haku does not have an exponentiation operator. +That purpose is served by the function `pow`. +It does however have parentheses, multiplication, division, addition, and subtraction. + +Unlike in math notation, addition, subtraction, multiplication, and division, are _all_ calculated from left to right---multiplication and division does not take precedence over addition and subtraction. +So for the expression `2 + 2 * 2`, the result is `8`, and not `6`! + +Since this can be inconvenient at times, there is a way to work around that. +haku has a distinction between _tight_ and _loose_ operators, where tight operators always take precedence over loose ones in the order of operations. + +Remove the spaces around the `*` multiplication operator, like `2 + 2*2`, and the result is now `6` again---because we made `*` tight! + +This is convenient when representing fractions. +If you want a constant like half-π, the way to write it is `1/2*pi`---and order of operations will never mess you up, as long as you keep it tight without spaces! + +The same thing happens with functions. +For example, if you wanted to calculate the sine of `1/2*pi*x`, as long as you write that as `sin 1/2*pi*x`, with the whole argument without spaces, you won't have to wrap it in parentheses. + +Inside a single whole tight or loose expression, there is still an order of operations. +In fact, here's the full order of operations in haku for reference: + +1. Tight + + 1. Arithmetic: `+`, `-`, `*`, `/` + 1. Comparisons: `==`, `!=`, `<`, `<=`, `>`, `>=` + +1. Function calls +1. Loose + + 1. Arithmetic + 1. Comparisons + 1. Variables: `:`, `=` + +Naturally, you can still use parentheses when the loose-tight distinction is not enough. + + ## Programming in haku So far we've been using haku solely to describe data. @@ -270,7 +316,7 @@ Remember that example from before? withDotter \d -> [ stroke 8 #F00 (d To + vec 4 0) - stroke 8 #00F (d To + vec (-4) 0) + stroke 8 #00F (d To + vec -4 0) ] ``` @@ -281,7 +327,7 @@ If we wanted to change the size of the points, we'd need to first update the str withDotter \d -> [ stroke 4 #F00 (d To + vec 4 0) - stroke 4 #00F (d To + vec (-4) 0) + stroke 4 #00F (d To + vec -4 0) --- ] ``` @@ -294,8 +340,8 @@ So we also have to update their positions. [ stroke 4 #F00 (d To + vec 2 0) --- - stroke 4 #00F (d To + vec (-2) 0) - -- + stroke 4 #00F (d To + vec -2 0) + -- ] ``` @@ -322,7 +368,7 @@ thickness: 4 withDotter \d -> [ stroke thickness #F00 (d To + vec 2 0) - stroke thickness #00F (d To + vec (-2) 0) + stroke thickness #00F (d To + vec -2 0) --------- ] ``` @@ -355,7 +401,7 @@ xOffset: 2 withDotter \d -> [ stroke thickness #F00 (d To + vec xOffset 0) - stroke thickness #00F (d To + vec (-xOffset) 0) + stroke thickness #00F (d To + vec -xOffset 0) --------- ] ``` @@ -371,7 +417,7 @@ Uppercase names are special values we call _tags_. Tags are values which represent names. For example, the `To` in `d To` is a tag. -It represents the name of the piece of data we're extracting from `d`. +It is the name of the piece of data we're extracting from `d`. There are also two special tags, `True` and `False`, which represent [Boolean](https://en.wikipedia.org/wiki/Boolean_algebra) truth and falsehood. @@ -388,7 +434,7 @@ xOffset: 2 withDotter \d -> [ stroke thickness #F00 (d To + vec xOffset 0) - stroke thickness #00F (d To + vec (-xOffset) 0) + stroke thickness #00F (d To + vec -xOffset 0) ] ``` @@ -402,7 +448,7 @@ xOffset: thickness / 2 withDotter \d -> [ stroke thickness #F00 (d To + vec xOffset 0) - stroke thickness #00F (d To + vec (-xOffset) 0) + stroke thickness #00F (d To + vec -xOffset 0) ] ``` @@ -564,6 +610,7 @@ Seriously, 64 is my limit. I wonder if there's any way we could automate this? + ### The Ouroboros You know the drill by now. @@ -587,7 +634,7 @@ splat: \d, radius -> airbrush: \d, size -> [ splat d size - airbrush d (size - 8) + airbrush d size-8 ] withDotter \d -> @@ -649,7 +696,7 @@ airbrush: \d, size -> if (size > 0) [ splat d size - airbrush d (size - 8) + airbrush d size-8 ] else [] @@ -675,8 +722,8 @@ airbrush: \d, size -> if (size > 0) [ splat d size - airbrush d (size - 1) - --- + airbrush d size-1 + --- ] else [] @@ -696,7 +743,7 @@ airbrush: \d, size -> if (size > 0) [ splat d size - airbrush d (size - 1) + airbrush d size-1 ] else []