parser debugging utilities + fixed paren sometimes producing unbalanced events

parser events now have a `from` field which has the source location of where the event was emitted. this location is pretty-printed when using `Debug`
2024-09-01 09:35:26 +02:00 · 2024-09-01 09:35:26 +02:00 · 084012d65f
parent c5b2eb376f
commit 084012d65f
7 changed files with 116 additions and 58 deletions
--- a/crates/haku-wasm/src/lib.rs
+++ b/crates/haku-wasm/src/lib.rs
@ -2,16 +2,16 @@
 extern crate alloc;
-use core::{alloc::Layout, num::Saturating, slice};
+use core::{alloc::Layout, slice};
 use alloc::{boxed::Box, vec::Vec};
 use haku::{
    ast::Ast,
    bytecode::{Chunk, Defs, DefsImage},
-    compiler::{compile_expr, CompileError, Compiler, Source},
+    compiler::{compile_expr, ClosureSpec, CompileError, Compiler, Source},
    diagnostic::Diagnostic,
    lexer::{lex, Lexer},
-    parser::{self, Parser},
+    parser::{self, IntoAstError, Parser},
    render::{
        tiny_skia::{Pixmap, PremultipliedColorU8},
        Renderer, RendererLimits,
@ -19,7 +19,7 @@ use haku::{
    source::SourceCode,
    system::{ChunkId, System, SystemImage},
    token::Lexis,
-    value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
+    value::{Closure, Ref, Value},
    vm::{Exception, Vm, VmImage, VmLimits},
 };
 use log::{debug, info};
@ -220,6 +220,7 @@ enum StatusCode {
    SourceCodeTooLong,
    TooManyTokens,
    TooManyAstNodes,
    ParserUnbalancedEvents,
    ChunkTooBig,
    DiagnosticsEmitted,
    TooManyChunks,
@ -253,6 +254,7 @@ extern "C" fn haku_status_string(code: StatusCode) -> *const i8 {
        StatusCode::SourceCodeTooLong => c"source code is too long",
        StatusCode::TooManyTokens => c"source code has too many tokens",
        StatusCode::TooManyAstNodes => c"source code has too many AST nodes",
        StatusCode::ParserUnbalancedEvents => c"parser produced unbalanced events",
        StatusCode::ChunkTooBig => c"compiled bytecode is too large",
        StatusCode::DiagnosticsEmitted => c"diagnostics were emitted",
        StatusCode::TooManyChunks => c"too many registered bytecode chunks",
@ -267,7 +269,7 @@ extern "C" fn haku_status_string(code: StatusCode) -> *const i8 {
 enum BrushState {
    #[default]
    Default,
-    Ready(ChunkId),
+    Ready(ChunkId, ClosureSpec),
 }
 #[derive(Debug, Default)]
@ -348,8 +350,10 @@ unsafe extern "C" fn haku_compile_brush(
        },
    );
    parser::toplevel(&mut parser);
-    let Ok((root, mut parser_diagnostics)) = parser.into_ast(&mut ast) else {
+    let (root, mut parser_diagnostics) = match parser.into_ast(&mut ast) {
-        return StatusCode::TooManyAstNodes;
+        Ok((r, d)) => (r, d),
        Err(IntoAstError::NodeAlloc(_)) => return StatusCode::TooManyAstNodes,
        Err(IntoAstError::UnbalancedEvents) => return StatusCode::ParserUnbalancedEvents,
    };
    let src = Source {
@ -365,6 +369,7 @@ unsafe extern "C" fn haku_compile_brush(
            CompileError::Emit => return StatusCode::ChunkTooBig,
        }
    }
    let closure_spec = compiler.closure_spec();
    let mut diagnostics = lexer.diagnostics;
    diagnostics.append(&mut parser_diagnostics);
@ -378,7 +383,7 @@ unsafe extern "C" fn haku_compile_brush(
        Ok(chunk_id) => chunk_id,
        Err(_) => return StatusCode::TooManyChunks,
    };
-    brush.state = BrushState::Ready(chunk_id);
+    brush.state = BrushState::Ready(chunk_id, closure_spec);
    info!("brush compiled into {chunk_id:?}");
@ -421,22 +426,17 @@ unsafe extern "C" fn haku_eval_brush(instance: *mut Instance, brush: *const Brus
    let instance = &mut *instance;
    let brush = &*brush;
-    let BrushState::Ready(chunk_id) = brush.state else {
+    let BrushState::Ready(chunk_id, closure_spec) = brush.state else {
        panic!("brush is not compiled and ready to be used");
    };
    debug!("applying defs");
    instance.vm.apply_defs(&instance.defs);
-    let Ok(closure_id) = instance.vm.create_ref(Ref::Closure(Closure {
+    let Ok(closure_id) = instance
-        start: BytecodeLoc {
+        .vm
-            chunk_id,
+        .create_ref(Ref::Closure(Closure::chunk(chunk_id, closure_spec)))
-            offset: 0,
+    else {
        },
        name: FunctionName::Anonymous,
        param_count: 0,
        captures: Vec::new(),
    })) else {
        return StatusCode::OutOfRefSlots;
    };
--- a/crates/haku/src/parser.rs
+++ b/crates/haku/src/parser.rs
@ -1,4 +1,4 @@
-use core::cell::Cell;
+use core::{cell::Cell, error::Error, fmt};
 use alloc::vec::Vec;
@ -22,8 +22,15 @@ pub struct Parser<'a> {
    pub diagnostics: Vec<Diagnostic>,
 }
 struct Event {
    kind: EventKind,
    #[cfg(debug_assertions)]
    from: &'static core::panic::Location<'static>,
 }
 #[derive(Debug)]
-enum Event {
+enum EventKind {
    Open { kind: NodeKind },
    Close,
    Advance,
@ -52,32 +59,35 @@ impl<'a> Parser<'a> {
        }
    }
-    fn event(&mut self, event: Event) -> Option<usize> {
+    #[track_caller]
    fn event(&mut self, event: EventKind) -> Option<usize> {
        if self.events.len() < self.events.capacity() {
            let index = self.events.len();
-            self.events.push(event);
+            self.events.push(Event::new(event));
            Some(index)
        } else {
            None
        }
    }
    #[track_caller]
    fn open(&mut self) -> Open {
        Open {
-            index: self.event(Event::Open {
+            index: self.event(EventKind::Open {
                kind: NodeKind::Error,
            }),
        }
    }
    #[track_caller]
    fn open_before(&mut self, closed: Closed) -> Open {
        if let Some(index) = closed.index {
            if self.events.len() < self.events.capacity() {
                self.events.insert(
                    index,
-                    Event::Open {
+                    Event::new(EventKind::Open {
                        kind: NodeKind::Error,
-                    },
+                    }),
                );
                return Open { index: Some(index) };
            }
@ -85,10 +95,11 @@ impl<'a> Parser<'a> {
        Open { index: None }
    }
    #[track_caller]
    fn close(&mut self, open: Open, kind: NodeKind) -> Closed {
        if let Some(index) = open.index {
-            self.events[index] = Event::Open { kind };
+            self.events[index].kind = EventKind::Open { kind };
-            self.event(Event::Close);
+            self.event(EventKind::Close);
            Closed { index: Some(index) }
        } else {
            Closed { index: None }
@ -102,7 +113,7 @@ impl<'a> Parser<'a> {
    fn advance(&mut self) {
        if !self.is_eof() {
            self.position += 1;
-            self.event(Event::Advance);
+            self.event(EventKind::Advance);
            self.fuel.set(Self::FUEL);
        }
    }
@ -125,6 +136,7 @@ impl<'a> Parser<'a> {
        }
    }
    #[track_caller]
    fn advance_with_error(&mut self) -> Closed {
        let opened = self.open();
        self.advance();
@ -140,7 +152,7 @@ impl<'a> Parser<'a> {
        }
    }
-    pub fn into_ast(self, ast: &mut Ast) -> Result<(NodeId, Vec<Diagnostic>), NodeAllocError> {
+    pub fn into_ast(self, ast: &mut Ast) -> Result<(NodeId, Vec<Diagnostic>), IntoAstError> {
        let mut token = 0;
        let mut events = self.events;
        let mut stack = Vec::new();
@ -152,24 +164,30 @@ impl<'a> Parser<'a> {
        }
        // Remove the last Close to keep a single node on the stack.
-        assert!(matches!(events.pop(), Some(Event::Close)));
+        assert!(matches!(
            events.pop(),
            Some(Event {
                kind: EventKind::Close,
                ..
            })
        ));
        for event in events {
-            match event {
+            match event.kind {
-                Event::Open { kind } => {
+                EventKind::Open { kind } => {
                    stack.push(StackEntry {
                        node_id: ast.alloc(kind, self.tokens.span(token))?,
                        children: Vec::new(),
                    });
                }
-                Event::Close => {
+                EventKind::Close => {
                    let end_span = self.tokens.span(token.saturating_sub(1));
                    let stack_entry = stack.pop().unwrap();
                    ast.alloc_children(stack_entry.node_id, &stack_entry.children);
                    ast.extend_span(stack_entry.node_id, end_span.end);
                    stack.last_mut().unwrap().children.push(stack_entry.node_id);
                }
-                Event::Advance => {
+                EventKind::Advance => {
                    let span = self.tokens.span(token);
                    let node_id = ast.alloc(NodeKind::Token, span)?;
                    stack
@ -184,7 +202,7 @@ impl<'a> Parser<'a> {
        if stack.len() != 1 {
            // This means we had too many events emitted and they are no longer balanced.
-            return Err(NodeAllocError);
+            return Err(IntoAstError::UnbalancedEvents);
        }
        // assert_eq!(token, self.tokens.len());
@ -197,14 +215,60 @@ impl<'a> Parser<'a> {
    }
 }
-impl<'a> core::fmt::Debug for Parser<'a> {
+impl<'a> fmt::Debug for Parser<'a> {
-    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.debug_struct("Parser")
            .field("events", &self.events)
            .finish_non_exhaustive()
    }
 }
 impl Event {
    #[track_caller]
    fn new(kind: EventKind) -> Event {
        Event {
            kind,
            from: core::panic::Location::caller(),
        }
    }
 }
 impl fmt::Debug for Event {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f,
            "{:?} @ {}:{}:{}",
            self.kind,
            self.from.file(),
            self.from.line(),
            self.from.column()
        )
    }
 }
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum IntoAstError {
    NodeAlloc(NodeAllocError),
    UnbalancedEvents,
 }
 impl From<NodeAllocError> for IntoAstError {
    fn from(value: NodeAllocError) -> Self {
        Self::NodeAlloc(value)
    }
 }
 impl fmt::Display for IntoAstError {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        match self {
            IntoAstError::NodeAlloc(e) => fmt::Display::fmt(e, f),
            IntoAstError::UnbalancedEvents => f.write_str("parser produced unbalanced events"),
        }
    }
 }
 impl Error for IntoAstError {}
 enum Tighter {
    Left,
    Right,
@ -337,11 +401,11 @@ fn paren(p: &mut Parser) -> Closed {
        p.optional_newline();
        if p.peek() != TokenKind::RParen {
            p.emit(Diagnostic::error(lspan, "missing closing parenthesis `)`"));
-            p.advance_with_error()
+            p.advance_with_error();
        } else {
            p.advance();
            p.close(o, NodeKind::Paren)
        }
        p.close(o, NodeKind::Paren)
    }
 }
--- a/crates/haku/src/parser/tests.rs
+++ b/crates/haku/src/parser/tests.rs
@ -18,7 +18,10 @@ fn parse(s: &str, f: fn(&mut Parser)) -> (Ast, NodeId) {
    f(&mut parser);
    if !parser.diagnostics.is_empty() {
-        panic!("parser emitted diagnostics: {:#?}", parser.diagnostics);
+        panic!(
            "parser: {parser:#?}\nemitted diagnostics: {:#?}",
            parser.diagnostics
        );
    }
    let mut ast = Ast::new(1024);
--- a/crates/haku/tests/language.rs
+++ b/crates/haku/tests/language.rs
@ -9,7 +9,7 @@ use haku::{
    source::SourceCode,
    system::System,
    token::Lexis,
-    value::{BytecodeLoc, Closure, FunctionName, Ref, RefId, Value},
+    value::{Closure, Ref, RefId, Value},
    vm::{Vm, VmLimits},
 };
--- a/crates/rkgk/src/api/wall.rs
+++ b/crates/rkgk/src/api/wall.rs
@ -1,7 +1,6 @@
 use std::{
    collections::{HashSet, VecDeque},
    sync::Arc,
    time::Duration,
 };
 use axum::{
@ -21,7 +20,6 @@ use serde::{Deserialize, Serialize};
 use tokio::{
    select,
    sync::{mpsc, oneshot},
    time::interval,
 };
 use tracing::{error, instrument};
--- a/crates/rkgk/src/haku.rs
+++ b/crates/rkgk/src/haku.rs
@ -7,14 +7,14 @@ use eyre::{bail, Context, OptionExt};
 use haku::{
    ast::Ast,
    bytecode::{Chunk, Defs, DefsImage},
-    compiler::{Compiler, Source},
+    compiler::{ClosureSpec, Compiler, Source},
    lexer::{lex, Lexer},
    parser::{self, Parser, ParserLimits},
    render::{tiny_skia::Pixmap, Renderer, RendererLimits},
    source::SourceCode,
    system::{ChunkId, System, SystemImage},
    token::Lexis,
-    value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
+    value::{Closure, Ref, Value},
    vm::{Vm, VmImage, VmLimits},
 };
 use serde::{Deserialize, Serialize};
@ -52,7 +52,7 @@ pub struct Haku {
    vm: Vm,
    vm_image: VmImage,
-    brush: Option<ChunkId>,
+    brush: Option<(ChunkId, ClosureSpec)>,
 }
 impl Haku {
@ -121,6 +121,7 @@ impl Haku {
        let mut compiler = Compiler::new(&mut self.defs, &mut chunk);
        haku::compiler::compile_expr(&mut compiler, &src, root)
            .context("failed to compile the chunk")?;
        let closure_spec = compiler.closure_spec();
        if !lexer.diagnostics.is_empty()
            || !parser_diagnostics.is_empty()
@ -130,13 +131,13 @@ impl Haku {
        }
        let chunk_id = self.system.add_chunk(chunk).context("too many chunks")?;
-        self.brush = Some(chunk_id);
+        self.brush = Some((chunk_id, closure_spec));
        Ok(())
    }
    pub fn eval_brush(&mut self) -> eyre::Result<Value> {
-        let brush = self
+        let (chunk_id, closure_spec) = self
            .brush
            .ok_or_eyre("brush is not compiled and ready to be used")?;
@ -144,15 +145,7 @@ impl Haku {
        let closure_id = self
            .vm
-            .create_ref(Ref::Closure(Closure {
+            .create_ref(Ref::Closure(Closure::chunk(chunk_id, closure_spec)))
                start: BytecodeLoc {
                    chunk_id: brush,
                    offset: 0,
                },
                name: FunctionName::Anonymous,
                param_count: 0,
                captures: vec![],
            }))
            .context("not enough ref slots to create initial closure")?;
        let scribble = self
--- a/crates/rkgk/src/main.rs
+++ b/crates/rkgk/src/main.rs
@ -15,7 +15,7 @@ use handlebars::Handlebars;
 use serde::Serialize;
 use tokio::{fs, net::TcpListener};
 use tower_http::services::{ServeDir, ServeFile};
-use tracing::{info, info_span, instrument};
+use tracing::{info, instrument};
 use walkdir::WalkDir;
 mod api;