parser debugging utilities + fixed paren sometimes producing unbalanced events

parser events now have a `from` field which has the source location of where the event was emitted.
this location is pretty-printed when using `Debug`
This commit is contained in:
liquidex 2024-09-01 09:35:26 +02:00
parent c5b2eb376f
commit 084012d65f
7 changed files with 116 additions and 58 deletions

View file

@ -2,16 +2,16 @@
extern crate alloc; extern crate alloc;
use core::{alloc::Layout, num::Saturating, slice}; use core::{alloc::Layout, slice};
use alloc::{boxed::Box, vec::Vec}; use alloc::{boxed::Box, vec::Vec};
use haku::{ use haku::{
ast::Ast, ast::Ast,
bytecode::{Chunk, Defs, DefsImage}, bytecode::{Chunk, Defs, DefsImage},
compiler::{compile_expr, CompileError, Compiler, Source}, compiler::{compile_expr, ClosureSpec, CompileError, Compiler, Source},
diagnostic::Diagnostic, diagnostic::Diagnostic,
lexer::{lex, Lexer}, lexer::{lex, Lexer},
parser::{self, Parser}, parser::{self, IntoAstError, Parser},
render::{ render::{
tiny_skia::{Pixmap, PremultipliedColorU8}, tiny_skia::{Pixmap, PremultipliedColorU8},
Renderer, RendererLimits, Renderer, RendererLimits,
@ -19,7 +19,7 @@ use haku::{
source::SourceCode, source::SourceCode,
system::{ChunkId, System, SystemImage}, system::{ChunkId, System, SystemImage},
token::Lexis, token::Lexis,
value::{BytecodeLoc, Closure, FunctionName, Ref, Value}, value::{Closure, Ref, Value},
vm::{Exception, Vm, VmImage, VmLimits}, vm::{Exception, Vm, VmImage, VmLimits},
}; };
use log::{debug, info}; use log::{debug, info};
@ -220,6 +220,7 @@ enum StatusCode {
SourceCodeTooLong, SourceCodeTooLong,
TooManyTokens, TooManyTokens,
TooManyAstNodes, TooManyAstNodes,
ParserUnbalancedEvents,
ChunkTooBig, ChunkTooBig,
DiagnosticsEmitted, DiagnosticsEmitted,
TooManyChunks, TooManyChunks,
@ -253,6 +254,7 @@ extern "C" fn haku_status_string(code: StatusCode) -> *const i8 {
StatusCode::SourceCodeTooLong => c"source code is too long", StatusCode::SourceCodeTooLong => c"source code is too long",
StatusCode::TooManyTokens => c"source code has too many tokens", StatusCode::TooManyTokens => c"source code has too many tokens",
StatusCode::TooManyAstNodes => c"source code has too many AST nodes", StatusCode::TooManyAstNodes => c"source code has too many AST nodes",
StatusCode::ParserUnbalancedEvents => c"parser produced unbalanced events",
StatusCode::ChunkTooBig => c"compiled bytecode is too large", StatusCode::ChunkTooBig => c"compiled bytecode is too large",
StatusCode::DiagnosticsEmitted => c"diagnostics were emitted", StatusCode::DiagnosticsEmitted => c"diagnostics were emitted",
StatusCode::TooManyChunks => c"too many registered bytecode chunks", StatusCode::TooManyChunks => c"too many registered bytecode chunks",
@ -267,7 +269,7 @@ extern "C" fn haku_status_string(code: StatusCode) -> *const i8 {
enum BrushState { enum BrushState {
#[default] #[default]
Default, Default,
Ready(ChunkId), Ready(ChunkId, ClosureSpec),
} }
#[derive(Debug, Default)] #[derive(Debug, Default)]
@ -348,8 +350,10 @@ unsafe extern "C" fn haku_compile_brush(
}, },
); );
parser::toplevel(&mut parser); parser::toplevel(&mut parser);
let Ok((root, mut parser_diagnostics)) = parser.into_ast(&mut ast) else { let (root, mut parser_diagnostics) = match parser.into_ast(&mut ast) {
return StatusCode::TooManyAstNodes; Ok((r, d)) => (r, d),
Err(IntoAstError::NodeAlloc(_)) => return StatusCode::TooManyAstNodes,
Err(IntoAstError::UnbalancedEvents) => return StatusCode::ParserUnbalancedEvents,
}; };
let src = Source { let src = Source {
@ -365,6 +369,7 @@ unsafe extern "C" fn haku_compile_brush(
CompileError::Emit => return StatusCode::ChunkTooBig, CompileError::Emit => return StatusCode::ChunkTooBig,
} }
} }
let closure_spec = compiler.closure_spec();
let mut diagnostics = lexer.diagnostics; let mut diagnostics = lexer.diagnostics;
diagnostics.append(&mut parser_diagnostics); diagnostics.append(&mut parser_diagnostics);
@ -378,7 +383,7 @@ unsafe extern "C" fn haku_compile_brush(
Ok(chunk_id) => chunk_id, Ok(chunk_id) => chunk_id,
Err(_) => return StatusCode::TooManyChunks, Err(_) => return StatusCode::TooManyChunks,
}; };
brush.state = BrushState::Ready(chunk_id); brush.state = BrushState::Ready(chunk_id, closure_spec);
info!("brush compiled into {chunk_id:?}"); info!("brush compiled into {chunk_id:?}");
@ -421,22 +426,17 @@ unsafe extern "C" fn haku_eval_brush(instance: *mut Instance, brush: *const Brus
let instance = &mut *instance; let instance = &mut *instance;
let brush = &*brush; let brush = &*brush;
let BrushState::Ready(chunk_id) = brush.state else { let BrushState::Ready(chunk_id, closure_spec) = brush.state else {
panic!("brush is not compiled and ready to be used"); panic!("brush is not compiled and ready to be used");
}; };
debug!("applying defs"); debug!("applying defs");
instance.vm.apply_defs(&instance.defs); instance.vm.apply_defs(&instance.defs);
let Ok(closure_id) = instance.vm.create_ref(Ref::Closure(Closure { let Ok(closure_id) = instance
start: BytecodeLoc { .vm
chunk_id, .create_ref(Ref::Closure(Closure::chunk(chunk_id, closure_spec)))
offset: 0, else {
},
name: FunctionName::Anonymous,
param_count: 0,
captures: Vec::new(),
})) else {
return StatusCode::OutOfRefSlots; return StatusCode::OutOfRefSlots;
}; };

View file

@ -1,4 +1,4 @@
use core::cell::Cell; use core::{cell::Cell, error::Error, fmt};
use alloc::vec::Vec; use alloc::vec::Vec;
@ -22,8 +22,15 @@ pub struct Parser<'a> {
pub diagnostics: Vec<Diagnostic>, pub diagnostics: Vec<Diagnostic>,
} }
struct Event {
kind: EventKind,
#[cfg(debug_assertions)]
from: &'static core::panic::Location<'static>,
}
#[derive(Debug)] #[derive(Debug)]
enum Event { enum EventKind {
Open { kind: NodeKind }, Open { kind: NodeKind },
Close, Close,
Advance, Advance,
@ -52,32 +59,35 @@ impl<'a> Parser<'a> {
} }
} }
fn event(&mut self, event: Event) -> Option<usize> { #[track_caller]
fn event(&mut self, event: EventKind) -> Option<usize> {
if self.events.len() < self.events.capacity() { if self.events.len() < self.events.capacity() {
let index = self.events.len(); let index = self.events.len();
self.events.push(event); self.events.push(Event::new(event));
Some(index) Some(index)
} else { } else {
None None
} }
} }
#[track_caller]
fn open(&mut self) -> Open { fn open(&mut self) -> Open {
Open { Open {
index: self.event(Event::Open { index: self.event(EventKind::Open {
kind: NodeKind::Error, kind: NodeKind::Error,
}), }),
} }
} }
#[track_caller]
fn open_before(&mut self, closed: Closed) -> Open { fn open_before(&mut self, closed: Closed) -> Open {
if let Some(index) = closed.index { if let Some(index) = closed.index {
if self.events.len() < self.events.capacity() { if self.events.len() < self.events.capacity() {
self.events.insert( self.events.insert(
index, index,
Event::Open { Event::new(EventKind::Open {
kind: NodeKind::Error, kind: NodeKind::Error,
}, }),
); );
return Open { index: Some(index) }; return Open { index: Some(index) };
} }
@ -85,10 +95,11 @@ impl<'a> Parser<'a> {
Open { index: None } Open { index: None }
} }
#[track_caller]
fn close(&mut self, open: Open, kind: NodeKind) -> Closed { fn close(&mut self, open: Open, kind: NodeKind) -> Closed {
if let Some(index) = open.index { if let Some(index) = open.index {
self.events[index] = Event::Open { kind }; self.events[index].kind = EventKind::Open { kind };
self.event(Event::Close); self.event(EventKind::Close);
Closed { index: Some(index) } Closed { index: Some(index) }
} else { } else {
Closed { index: None } Closed { index: None }
@ -102,7 +113,7 @@ impl<'a> Parser<'a> {
fn advance(&mut self) { fn advance(&mut self) {
if !self.is_eof() { if !self.is_eof() {
self.position += 1; self.position += 1;
self.event(Event::Advance); self.event(EventKind::Advance);
self.fuel.set(Self::FUEL); self.fuel.set(Self::FUEL);
} }
} }
@ -125,6 +136,7 @@ impl<'a> Parser<'a> {
} }
} }
#[track_caller]
fn advance_with_error(&mut self) -> Closed { fn advance_with_error(&mut self) -> Closed {
let opened = self.open(); let opened = self.open();
self.advance(); self.advance();
@ -140,7 +152,7 @@ impl<'a> Parser<'a> {
} }
} }
pub fn into_ast(self, ast: &mut Ast) -> Result<(NodeId, Vec<Diagnostic>), NodeAllocError> { pub fn into_ast(self, ast: &mut Ast) -> Result<(NodeId, Vec<Diagnostic>), IntoAstError> {
let mut token = 0; let mut token = 0;
let mut events = self.events; let mut events = self.events;
let mut stack = Vec::new(); let mut stack = Vec::new();
@ -152,24 +164,30 @@ impl<'a> Parser<'a> {
} }
// Remove the last Close to keep a single node on the stack. // Remove the last Close to keep a single node on the stack.
assert!(matches!(events.pop(), Some(Event::Close))); assert!(matches!(
events.pop(),
Some(Event {
kind: EventKind::Close,
..
})
));
for event in events { for event in events {
match event { match event.kind {
Event::Open { kind } => { EventKind::Open { kind } => {
stack.push(StackEntry { stack.push(StackEntry {
node_id: ast.alloc(kind, self.tokens.span(token))?, node_id: ast.alloc(kind, self.tokens.span(token))?,
children: Vec::new(), children: Vec::new(),
}); });
} }
Event::Close => { EventKind::Close => {
let end_span = self.tokens.span(token.saturating_sub(1)); let end_span = self.tokens.span(token.saturating_sub(1));
let stack_entry = stack.pop().unwrap(); let stack_entry = stack.pop().unwrap();
ast.alloc_children(stack_entry.node_id, &stack_entry.children); ast.alloc_children(stack_entry.node_id, &stack_entry.children);
ast.extend_span(stack_entry.node_id, end_span.end); ast.extend_span(stack_entry.node_id, end_span.end);
stack.last_mut().unwrap().children.push(stack_entry.node_id); stack.last_mut().unwrap().children.push(stack_entry.node_id);
} }
Event::Advance => { EventKind::Advance => {
let span = self.tokens.span(token); let span = self.tokens.span(token);
let node_id = ast.alloc(NodeKind::Token, span)?; let node_id = ast.alloc(NodeKind::Token, span)?;
stack stack
@ -184,7 +202,7 @@ impl<'a> Parser<'a> {
if stack.len() != 1 { if stack.len() != 1 {
// This means we had too many events emitted and they are no longer balanced. // This means we had too many events emitted and they are no longer balanced.
return Err(NodeAllocError); return Err(IntoAstError::UnbalancedEvents);
} }
// assert_eq!(token, self.tokens.len()); // assert_eq!(token, self.tokens.len());
@ -197,14 +215,60 @@ impl<'a> Parser<'a> {
} }
} }
impl<'a> core::fmt::Debug for Parser<'a> { impl<'a> fmt::Debug for Parser<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.debug_struct("Parser") f.debug_struct("Parser")
.field("events", &self.events) .field("events", &self.events)
.finish_non_exhaustive() .finish_non_exhaustive()
} }
} }
impl Event {
#[track_caller]
fn new(kind: EventKind) -> Event {
Event {
kind,
from: core::panic::Location::caller(),
}
}
}
impl fmt::Debug for Event {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{:?} @ {}:{}:{}",
self.kind,
self.from.file(),
self.from.line(),
self.from.column()
)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum IntoAstError {
NodeAlloc(NodeAllocError),
UnbalancedEvents,
}
impl From<NodeAllocError> for IntoAstError {
fn from(value: NodeAllocError) -> Self {
Self::NodeAlloc(value)
}
}
impl fmt::Display for IntoAstError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
IntoAstError::NodeAlloc(e) => fmt::Display::fmt(e, f),
IntoAstError::UnbalancedEvents => f.write_str("parser produced unbalanced events"),
}
}
}
impl Error for IntoAstError {}
enum Tighter { enum Tighter {
Left, Left,
Right, Right,
@ -337,11 +401,11 @@ fn paren(p: &mut Parser) -> Closed {
p.optional_newline(); p.optional_newline();
if p.peek() != TokenKind::RParen { if p.peek() != TokenKind::RParen {
p.emit(Diagnostic::error(lspan, "missing closing parenthesis `)`")); p.emit(Diagnostic::error(lspan, "missing closing parenthesis `)`"));
p.advance_with_error() p.advance_with_error();
} else { } else {
p.advance(); p.advance();
p.close(o, NodeKind::Paren)
} }
p.close(o, NodeKind::Paren)
} }
} }

View file

@ -18,7 +18,10 @@ fn parse(s: &str, f: fn(&mut Parser)) -> (Ast, NodeId) {
f(&mut parser); f(&mut parser);
if !parser.diagnostics.is_empty() { if !parser.diagnostics.is_empty() {
panic!("parser emitted diagnostics: {:#?}", parser.diagnostics); panic!(
"parser: {parser:#?}\nemitted diagnostics: {:#?}",
parser.diagnostics
);
} }
let mut ast = Ast::new(1024); let mut ast = Ast::new(1024);

View file

@ -9,7 +9,7 @@ use haku::{
source::SourceCode, source::SourceCode,
system::System, system::System,
token::Lexis, token::Lexis,
value::{BytecodeLoc, Closure, FunctionName, Ref, RefId, Value}, value::{Closure, Ref, RefId, Value},
vm::{Vm, VmLimits}, vm::{Vm, VmLimits},
}; };

View file

@ -1,7 +1,6 @@
use std::{ use std::{
collections::{HashSet, VecDeque}, collections::{HashSet, VecDeque},
sync::Arc, sync::Arc,
time::Duration,
}; };
use axum::{ use axum::{
@ -21,7 +20,6 @@ use serde::{Deserialize, Serialize};
use tokio::{ use tokio::{
select, select,
sync::{mpsc, oneshot}, sync::{mpsc, oneshot},
time::interval,
}; };
use tracing::{error, instrument}; use tracing::{error, instrument};

View file

@ -7,14 +7,14 @@ use eyre::{bail, Context, OptionExt};
use haku::{ use haku::{
ast::Ast, ast::Ast,
bytecode::{Chunk, Defs, DefsImage}, bytecode::{Chunk, Defs, DefsImage},
compiler::{Compiler, Source}, compiler::{ClosureSpec, Compiler, Source},
lexer::{lex, Lexer}, lexer::{lex, Lexer},
parser::{self, Parser, ParserLimits}, parser::{self, Parser, ParserLimits},
render::{tiny_skia::Pixmap, Renderer, RendererLimits}, render::{tiny_skia::Pixmap, Renderer, RendererLimits},
source::SourceCode, source::SourceCode,
system::{ChunkId, System, SystemImage}, system::{ChunkId, System, SystemImage},
token::Lexis, token::Lexis,
value::{BytecodeLoc, Closure, FunctionName, Ref, Value}, value::{Closure, Ref, Value},
vm::{Vm, VmImage, VmLimits}, vm::{Vm, VmImage, VmLimits},
}; };
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
@ -52,7 +52,7 @@ pub struct Haku {
vm: Vm, vm: Vm,
vm_image: VmImage, vm_image: VmImage,
brush: Option<ChunkId>, brush: Option<(ChunkId, ClosureSpec)>,
} }
impl Haku { impl Haku {
@ -121,6 +121,7 @@ impl Haku {
let mut compiler = Compiler::new(&mut self.defs, &mut chunk); let mut compiler = Compiler::new(&mut self.defs, &mut chunk);
haku::compiler::compile_expr(&mut compiler, &src, root) haku::compiler::compile_expr(&mut compiler, &src, root)
.context("failed to compile the chunk")?; .context("failed to compile the chunk")?;
let closure_spec = compiler.closure_spec();
if !lexer.diagnostics.is_empty() if !lexer.diagnostics.is_empty()
|| !parser_diagnostics.is_empty() || !parser_diagnostics.is_empty()
@ -130,13 +131,13 @@ impl Haku {
} }
let chunk_id = self.system.add_chunk(chunk).context("too many chunks")?; let chunk_id = self.system.add_chunk(chunk).context("too many chunks")?;
self.brush = Some(chunk_id); self.brush = Some((chunk_id, closure_spec));
Ok(()) Ok(())
} }
pub fn eval_brush(&mut self) -> eyre::Result<Value> { pub fn eval_brush(&mut self) -> eyre::Result<Value> {
let brush = self let (chunk_id, closure_spec) = self
.brush .brush
.ok_or_eyre("brush is not compiled and ready to be used")?; .ok_or_eyre("brush is not compiled and ready to be used")?;
@ -144,15 +145,7 @@ impl Haku {
let closure_id = self let closure_id = self
.vm .vm
.create_ref(Ref::Closure(Closure { .create_ref(Ref::Closure(Closure::chunk(chunk_id, closure_spec)))
start: BytecodeLoc {
chunk_id: brush,
offset: 0,
},
name: FunctionName::Anonymous,
param_count: 0,
captures: vec![],
}))
.context("not enough ref slots to create initial closure")?; .context("not enough ref slots to create initial closure")?;
let scribble = self let scribble = self

View file

@ -15,7 +15,7 @@ use handlebars::Handlebars;
use serde::Serialize; use serde::Serialize;
use tokio::{fs, net::TcpListener}; use tokio::{fs, net::TcpListener};
use tower_http::services::{ServeDir, ServeFile}; use tower_http::services::{ServeDir, ServeFile};
use tracing::{info, info_span, instrument}; use tracing::{info, instrument};
use walkdir::WalkDir; use walkdir::WalkDir;
mod api; mod api;