syntax v2

introduce a new, more ergonomic syntax for haku
not all features are implemented just yet. still missing:

- custom tags (non-True/False)
- color literals
- lists
This commit is contained in:
リキ萌 2024-08-27 20:43:14 +02:00
parent a3e5e8bd10
commit 2595bf0d82
21 changed files with 2844 additions and 1062 deletions

View file

@ -1,71 +1,31 @@
// NOTE: This is a very bad CLI.
// Sorry!
// NOTE: This is a very bad CLI. I only use it for debugging haku with LLDB.
// Sorry that it doesn't actually do anything!
use std::{error::Error, fmt::Display, io::BufRead};
use haku::{
bytecode::{Chunk, Defs},
compiler::{compile_expr, Compiler, Source},
sexp::{parse_toplevel, Ast, Parser, SourceCode},
system::System,
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
vm::{Vm, VmLimits},
ast::{dump::dump, Ast},
lexer::{lex, Lexer},
parser::{expr, Parser, ParserLimits},
source::SourceCode,
token::Lexis,
value::Value,
};
fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
let mut system = System::new(1);
let ast = Ast::new(1024);
let code = SourceCode::unlimited_len(code);
let mut parser = Parser::new(ast, code);
let root = parse_toplevel(&mut parser);
let ast = parser.ast;
let src = Source {
code,
ast: &ast,
system: &system,
};
let mut lexer = Lexer::new(Lexis::new(1024), code);
lex(&mut lexer).expect("too many tokens");
let mut defs = Defs::new(256);
let mut chunk = Chunk::new(65536).unwrap();
let mut compiler = Compiler::new(&mut defs, &mut chunk);
compile_expr(&mut compiler, &src, root)?;
let diagnostics = compiler.diagnostics;
let defs = compiler.defs;
println!("{chunk:?}");
let mut parser = Parser::new(&lexer.lexis, &ParserLimits { max_events: 1024 });
expr(&mut parser);
for diagnostic in &diagnostics {
eprintln!(
"{}..{}: {}",
diagnostic.span.start, diagnostic.span.end, diagnostic.message
);
}
let mut ast = Ast::new(1024);
let (root, _) = parser.into_ast(&mut ast).unwrap();
if !diagnostics.is_empty() {
return Err(Box::new(DiagnosticsEmitted));
}
eprintln!("{}", dump(&ast, root, Some(code)));
let mut vm = Vm::new(
defs,
&VmLimits {
stack_capacity: 256,
call_stack_capacity: 256,
ref_capacity: 256,
fuel: 32768,
memory: 1024,
},
);
let chunk_id = system.add_chunk(chunk)?;
let closure = vm.create_ref(Ref::Closure(Closure {
start: BytecodeLoc {
chunk_id,
offset: 0,
},
name: FunctionName::Anonymous,
param_count: 0,
captures: Vec::new(),
}))?;
Ok(vm.run(&system, closure)?)
Ok(Value::Nil)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]

View file

@ -2,18 +2,23 @@
extern crate alloc;
use core::{alloc::Layout, slice};
use core::{alloc::Layout, num::Saturating, slice};
use alloc::{boxed::Box, vec::Vec};
use haku::{
ast::Ast,
bytecode::{Chunk, Defs, DefsImage},
compiler::{compile_expr, CompileError, Compiler, Diagnostic, Source},
compiler::{compile_expr, CompileError, Compiler, Source},
diagnostic::Diagnostic,
lexer::{lex, Lexer},
parser::{self, Parser},
render::{
tiny_skia::{Pixmap, PremultipliedColorU8},
Renderer, RendererLimits,
},
sexp::{parse_toplevel, Ast, Parser, SourceCode},
source::SourceCode,
system::{ChunkId, System, SystemImage},
token::Lexis,
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
vm::{Exception, Vm, VmImage, VmLimits},
};
@ -41,6 +46,8 @@ struct Limits {
max_source_code_len: usize,
max_chunks: usize,
max_defs: usize,
max_tokens: usize,
max_parser_events: usize,
ast_capacity: usize,
chunk_capacity: usize,
stack_capacity: usize,
@ -58,6 +65,8 @@ impl Default for Limits {
max_source_code_len: 65536,
max_chunks: 2,
max_defs: 256,
max_tokens: 1024,
max_parser_events: 1024,
ast_capacity: 1024,
chunk_capacity: 65536,
stack_capacity: 1024,
@ -101,6 +110,8 @@ macro_rules! limit_setter {
limit_setter!(max_source_code_len);
limit_setter!(max_chunks);
limit_setter!(max_defs);
limit_setter!(max_tokens);
limit_setter!(max_parser_events);
limit_setter!(ast_capacity);
limit_setter!(chunk_capacity);
limit_setter!(stack_capacity);
@ -207,6 +218,8 @@ unsafe extern "C" fn haku_exception_message_len(instance: *const Instance) -> u3
enum StatusCode {
Ok,
SourceCodeTooLong,
TooManyTokens,
TooManyAstNodes,
ChunkTooBig,
DiagnosticsEmitted,
TooManyChunks,
@ -238,6 +251,8 @@ extern "C" fn haku_status_string(code: StatusCode) -> *const i8 {
match code {
StatusCode::Ok => c"ok",
StatusCode::SourceCodeTooLong => c"source code is too long",
StatusCode::TooManyTokens => c"source code has too many tokens",
StatusCode::TooManyAstNodes => c"source code has too many AST nodes",
StatusCode::ChunkTooBig => c"compiled bytecode is too large",
StatusCode::DiagnosticsEmitted => c"diagnostics were emitted",
StatusCode::TooManyChunks => c"too many registered bytecode chunks",
@ -281,22 +296,22 @@ unsafe extern "C" fn haku_num_diagnostics(brush: *const Brush) -> u32 {
#[no_mangle]
unsafe extern "C" fn haku_diagnostic_start(brush: *const Brush, index: u32) -> u32 {
(*brush).diagnostics[index as usize].span.start as u32
(*brush).diagnostics[index as usize].span().start
}
#[no_mangle]
unsafe extern "C" fn haku_diagnostic_end(brush: *const Brush, index: u32) -> u32 {
(*brush).diagnostics[index as usize].span.end as u32
(*brush).diagnostics[index as usize].span().end
}
#[no_mangle]
unsafe extern "C" fn haku_diagnostic_message(brush: *const Brush, index: u32) -> *const u8 {
(*brush).diagnostics[index as usize].message.as_ptr()
(*brush).diagnostics[index as usize].message().as_ptr()
}
#[no_mangle]
unsafe extern "C" fn haku_diagnostic_message_len(brush: *const Brush, index: u32) -> u32 {
(*brush).diagnostics[index as usize].message.len() as u32
(*brush).diagnostics[index as usize].message().len() as u32
}
#[no_mangle]
@ -315,15 +330,27 @@ unsafe extern "C" fn haku_compile_brush(
let code = core::str::from_utf8(slice::from_raw_parts(code, code_len as usize))
.expect("invalid UTF-8");
let code = match SourceCode::limited_len(code, instance.limits.max_source_code_len) {
Some(code) => code,
None => return StatusCode::SourceCodeTooLong,
let Some(code) = SourceCode::limited_len(code, instance.limits.max_source_code_len as u32)
else {
return StatusCode::SourceCodeTooLong;
};
let ast = Ast::new(instance.limits.ast_capacity);
let mut parser = Parser::new(ast, code);
let root = parse_toplevel(&mut parser);
let ast = parser.ast;
let mut lexer = Lexer::new(Lexis::new(instance.limits.max_tokens), code);
if lex(&mut lexer).is_err() {
return StatusCode::TooManyTokens;
};
let mut ast = Ast::new(instance.limits.ast_capacity);
let mut parser = Parser::new(
&lexer.lexis,
&haku::parser::ParserLimits {
max_events: instance.limits.max_parser_events,
},
);
parser::toplevel(&mut parser);
let Ok((root, mut parser_diagnostics)) = parser.into_ast(&mut ast) else {
return StatusCode::TooManyAstNodes;
};
let src = Source {
code,
@ -339,8 +366,11 @@ unsafe extern "C" fn haku_compile_brush(
}
}
if !compiler.diagnostics.is_empty() {
brush.diagnostics = compiler.diagnostics;
let mut diagnostics = lexer.diagnostics;
diagnostics.append(&mut parser_diagnostics);
diagnostics.append(&mut compiler.diagnostics);
if !diagnostics.is_empty() {
brush.diagnostics = diagnostics;
return StatusCode::DiagnosticsEmitted;
}

125
crates/haku/src/ast.rs Normal file
View file

@ -0,0 +1,125 @@
use core::{error::Error, fmt::Display};
use alloc::vec::Vec;
use crate::source::Span;
pub mod dump;
pub mod walk;
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct NodeId(u32);
impl NodeId {
pub const NIL: NodeId = NodeId(0);
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NodeKind {
Nil,
Token,
Ident,
Tag,
Number,
Color,
List,
Op,
Unary,
Binary,
Call,
ParenEmpty,
Paren,
Lambda,
Params,
Param,
If,
Let,
Toplevel,
Error,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Node {
pub span: Span,
pub kind: NodeKind,
}
#[derive(Debug, Clone)]
pub struct Ast {
kinds: Vec<NodeKind>,
spans: Vec<Span>,
children_spans: Vec<(u32, u32)>,
children: Vec<NodeId>,
}
impl Ast {
pub fn new(capacity: usize) -> Self {
assert!(capacity >= 1, "there must be space for at least a nil node");
assert!(capacity <= u32::MAX as usize);
let mut ast = Self {
kinds: Vec::with_capacity(capacity),
spans: Vec::with_capacity(capacity),
children_spans: Vec::with_capacity(capacity),
children: Vec::new(),
};
ast.alloc(NodeKind::Nil, Span::new(0, 0)).unwrap();
ast
}
pub fn alloc(&mut self, kind: NodeKind, span: Span) -> Result<NodeId, NodeAllocError> {
if self.kinds.len() >= self.kinds.capacity() {
return Err(NodeAllocError);
}
let index = self.kinds.len() as u32;
self.kinds.push(kind);
self.spans.push(span);
self.children_spans.push((0, 0));
Ok(NodeId(index))
}
// NOTE: This never produces a NodeAllocError, because there can more or less only ever be as many children for
// nodes as there are nodes.
pub fn alloc_children(&mut self, for_node: NodeId, children: &[NodeId]) {
let start = self.children.len();
self.children.extend_from_slice(children);
let end = self.children.len();
self.children_spans[for_node.0 as usize] = (start as u32, end as u32);
}
pub fn extend_span(&mut self, in_node: NodeId, end: u32) {
self.spans[in_node.0 as usize].end = end;
}
pub fn kind(&self, id: NodeId) -> NodeKind {
self.kinds[id.0 as usize]
}
pub fn span(&self, id: NodeId) -> Span {
self.spans[id.0 as usize]
}
pub fn children(&self, id: NodeId) -> &[NodeId] {
let (start, end) = self.children_spans[id.0 as usize];
&self.children[start as usize..end as usize]
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct NodeAllocError;
impl Display for NodeAllocError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.write_str("too many nodes")
}
}
impl Error for NodeAllocError {}

View file

@ -0,0 +1,34 @@
use alloc::string::String;
use core::fmt::Write;
use crate::{ast::NodeKind, source::SourceCode};
use super::{Ast, NodeId};
pub fn dump(ast: &Ast, node: NodeId, code: Option<&SourceCode>) -> String {
let mut result = String::new();
fn rec(ast: &Ast, node: NodeId, code: Option<&SourceCode>, result: &mut String, depth: usize) {
for _ in 0..depth {
result.push_str(" ");
}
write!(result, "{:?} @ {:?}", ast.kind(node), ast.span(node)).unwrap();
if let Some(code) = code {
if ast.kind(node) == NodeKind::Token {
write!(result, " {:?}", ast.span(node).slice(code)).unwrap();
}
}
writeln!(result).unwrap();
for &child in ast.children(node) {
rec(ast, child, code, result, depth + 1);
}
}
rec(ast, node, code, &mut result, 0);
// Remove the trailing newline.
result.pop();
result
}

View file

@ -0,0 +1,73 @@
use super::{Ast, NodeId, NodeKind};
impl Ast {
pub fn child(&self, parent: NodeId, kind: NodeKind) -> Option<NodeId> {
self.children(parent)
.iter()
.find(|&&child| self.kind(child) == kind)
.copied()
}
pub fn walk(&self, parent: NodeId) -> Walk<'_> {
Walk {
ast: self,
parent,
index: 0,
}
}
}
/// An iterator over a node's children, with convenience methods for accessing those children.
#[derive(Clone)]
pub struct Walk<'a> {
ast: &'a Ast,
parent: NodeId,
index: usize,
}
impl<'a> Walk<'a> {
/// Walk to the first non-Nil, non-Error, non-Token node.
pub fn node(&mut self) -> Option<NodeId> {
while let Some(id) = self.next() {
if !matches!(
self.ast.kind(id),
NodeKind::Nil | NodeKind::Token | NodeKind::Error
) {
return Some(id);
}
}
None
}
/// Walk to the next [`node`][`Self::node`] of the given kind.
pub fn node_of(&mut self, kind: NodeKind) -> Option<NodeId> {
while let Some(id) = self.node() {
if self.ast.kind(id) == kind {
return Some(id);
}
}
None
}
/// Find the first node of the given kind. This does not advance the iterator.
pub fn get(&self, kind: NodeKind) -> Option<NodeId> {
self.clone().find(|&id| self.ast.kind(id) == kind)
}
}
impl<'a> Iterator for Walk<'a> {
type Item = NodeId;
fn next(&mut self) -> Option<Self::Item> {
let children = self.ast.children(self.parent);
if self.index < children.len() {
let index = self.index;
self.index += 1;
Some(children[index])
} else {
None
}
}
}

View file

@ -17,6 +17,8 @@ pub enum Opcode {
// Duplicate existing values.
/// Push a value relative to the bottom of the current stack window.
Local, // (index: u8)
/// Set the value of a value relative to the bottom of the current stack window.
SetLocal, // (index: u8)
/// Push a captured value.
Capture, // (index: u8)
/// Get the value of a definition.
@ -24,12 +26,8 @@ pub enum Opcode {
/// Set the value of a definition.
SetDef, // (index: u16)
/// Drop `number` values from the stack.
/// <!-- OwO -->
DropLet, // (number: u8)
// Create literal functions.
Function, // (params: u8, then: u16), at `then`: (capture_count: u8, captures: [(source: u8, index: u8); capture_count])
Function, // (params: u8, then: u16), at `then`: (local_count: u8, capture_count: u8, captures: [(source: u8, index: u8); capture_count])
// Control flow.
Jump, // (offset: u16)

View file

@ -6,9 +6,11 @@ use core::{
use alloc::vec::Vec;
use crate::{
ast::{Ast, NodeId, NodeKind},
bytecode::{Chunk, DefError, Defs, EmitError, Opcode, CAPTURE_CAPTURE, CAPTURE_LOCAL},
sexp::{Ast, NodeId, NodeKind, SourceCode, Span},
system::System,
diagnostic::Diagnostic,
source::SourceCode,
system::{System, SystemFnArity},
};
pub struct Source<'a> {
@ -17,12 +19,6 @@ pub struct Source<'a> {
pub system: &'a System,
}
#[derive(Debug, Clone, Copy)]
pub struct Diagnostic {
pub span: Span,
pub message: &'static str,
}
#[derive(Debug, Clone, Copy)]
struct Local<'a> {
name: &'a str,
@ -46,6 +42,11 @@ pub struct Compiler<'a, 'b> {
scopes: Vec<Scope<'a>>,
}
#[derive(Debug, Clone, Copy)]
pub struct ClosureSpec {
pub(crate) local_count: u8,
}
impl<'a, 'b> Compiler<'a, 'b> {
pub fn new(defs: &'a mut Defs, chunk: &'b mut Chunk) -> Self {
Self {
@ -59,20 +60,24 @@ impl<'a, 'b> Compiler<'a, 'b> {
}
}
pub fn diagnose(&mut self, diagnostic: Diagnostic) {
if self.diagnostics.len() >= self.diagnostics.capacity() {
return;
}
if self.diagnostics.len() == self.diagnostics.capacity() - 1 {
self.diagnostics.push(Diagnostic {
span: Span::new(0, 0),
message: "too many diagnostics emitted, stopping", // hello clangd!
})
} else {
fn emit(&mut self, diagnostic: Diagnostic) {
if self.diagnostics.len() < self.diagnostics.capacity() {
self.diagnostics.push(diagnostic);
}
}
pub fn closure_spec(&self) -> ClosureSpec {
ClosureSpec {
local_count: self
.scopes
.last()
.unwrap()
.locals
.len()
.try_into()
.unwrap_or_default(),
}
}
}
type CompileResult<T = ()> = Result<T, CompileError>;
@ -82,27 +87,51 @@ pub fn compile_expr<'a>(
src: &Source<'a>,
node_id: NodeId,
) -> CompileResult {
let node = src.ast.get(node_id);
match node.kind {
NodeKind::Eof => unreachable!("eof node should never be emitted"),
match src.ast.kind(node_id) {
// The nil node is special, as it inhabits node ID 0.
NodeKind::Nil => {
unreachable!("Nil node should never be emitted (ParenEmpty is used for nil literals)")
}
// Tokens are trivia and should never be emitted---they're only useful for error reporting.
NodeKind::Token => unreachable!("Token node should never be emitted"),
// Op nodes are only used to provide a searching anchor for the operator in Unary and Binary.
NodeKind::Op => unreachable!("Op node should never be emitted"),
// Params nodes are only used to provide a searching anchor for Lambda parameters.
NodeKind::Params => unreachable!("Param node should never be emitted"),
// Param nodes are only used to provide a searching anchor for identifiers in Params nodes,
// as they may also contain commas and other trivia.
NodeKind::Param => unreachable!("Param node should never be emitted"),
NodeKind::Color => unsupported(c, src, node_id, "color literals are not implemented yet"),
NodeKind::Nil => compile_nil(c),
NodeKind::Ident => compile_ident(c, src, node_id),
NodeKind::Number => compile_number(c, src, node_id),
NodeKind::List(_, _) => compile_list(c, src, node_id),
NodeKind::Toplevel(_) => compile_toplevel(c, src, node_id),
NodeKind::Tag => compile_tag(c, src, node_id),
NodeKind::List => unsupported(c, src, node_id, "list literals are not implemented yet"),
NodeKind::Error(message) => {
c.diagnose(Diagnostic {
span: node.span,
message,
});
Ok(())
}
NodeKind::Unary => compile_unary(c, src, node_id),
NodeKind::Binary => compile_binary(c, src, node_id),
NodeKind::Call => compile_call(c, src, node_id),
NodeKind::Paren => compile_paren(c, src, node_id),
NodeKind::ParenEmpty => compile_nil(c),
NodeKind::Lambda => compile_lambda(c, src, node_id),
NodeKind::If => compile_if(c, src, node_id),
NodeKind::Let => compile_let(c, src, node_id),
NodeKind::Toplevel => compile_toplevel(c, src, node_id),
// Error nodes are ignored, because for each error node an appropriate parser
// diagnostic is emitted anyways.
NodeKind::Error => Ok(()),
}
}
fn compile_nil(c: &mut Compiler<'_, '_>) -> CompileResult {
fn unsupported(c: &mut Compiler, src: &Source, node_id: NodeId, message: &str) -> CompileResult {
c.emit(Diagnostic::error(src.ast.span(node_id), message));
Ok(())
}
fn compile_nil(c: &mut Compiler) -> CompileResult {
c.chunk.emit_opcode(Opcode::Nil)?;
Ok(())
@ -144,48 +173,39 @@ fn find_variable(
}
fn compile_ident<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
let ident = src.ast.get(node_id);
let name = ident.span.slice(src.code);
let span = src.ast.span(node_id);
let name = span.slice(src.code);
match name {
"false" => _ = c.chunk.emit_opcode(Opcode::False)?,
"true" => _ = c.chunk.emit_opcode(Opcode::True)?,
_ => match find_variable(c, name, c.scopes.len() - 1) {
Ok(Some(Variable::Local(index))) => {
c.chunk.emit_opcode(Opcode::Local)?;
c.chunk.emit_u8(index)?;
match find_variable(c, name, c.scopes.len() - 1) {
Ok(Some(Variable::Local(index))) => {
c.chunk.emit_opcode(Opcode::Local)?;
c.chunk.emit_u8(index)?;
}
Ok(Some(Variable::Captured(index))) => {
c.chunk.emit_opcode(Opcode::Capture)?;
c.chunk.emit_u8(index)?;
}
Ok(None) => {
if let Some(def_id) = c.defs.get(name) {
c.chunk.emit_opcode(Opcode::Def)?;
c.chunk.emit_u16(def_id.to_u16())?;
} else {
c.emit(Diagnostic::error(span, "undefined variable"));
}
Ok(Some(Variable::Captured(index))) => {
c.chunk.emit_opcode(Opcode::Capture)?;
c.chunk.emit_u8(index)?;
}
Ok(None) => {
if let Some(def_id) = c.defs.get(name) {
c.chunk.emit_opcode(Opcode::Def)?;
c.chunk.emit_u16(def_id.to_u16())?;
} else {
c.diagnose(Diagnostic {
span: ident.span,
message: "undefined variable",
});
}
}
Err(CaptureError) => {
c.diagnose(Diagnostic {
span: ident.span,
message: "too many variables captured from outer functions in this scope",
});
}
},
}
}
Err(CaptureError) => {
c.emit(Diagnostic::error(
span,
"too many variables captured from outer functions in this scope",
));
}
};
Ok(())
}
fn compile_number(c: &mut Compiler<'_, '_>, src: &Source<'_>, node_id: NodeId) -> CompileResult {
let node = src.ast.get(node_id);
let literal = node.span.slice(src.code);
let literal = src.ast.span(node_id).slice(src.code);
let float: f32 = literal
.parse()
.expect("the parser should've gotten us a string parsable by the stdlib");
@ -196,48 +216,130 @@ fn compile_number(c: &mut Compiler<'_, '_>, src: &Source<'_>, node_id: NodeId) -
Ok(())
}
fn compile_list<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
let NodeKind::List(function_id, args) = src.ast.get(node_id).kind else {
unreachable!("compile_list expects a List");
};
fn compile_tag(c: &mut Compiler<'_, '_>, src: &Source, node_id: NodeId) -> CompileResult {
let tag = src.ast.span(node_id).slice(src.code);
let function = src.ast.get(function_id);
let name = function.span.slice(src.code);
if function.kind == NodeKind::Ident {
match name {
"fn" => return compile_fn(c, src, args),
"if" => return compile_if(c, src, args),
"let" => return compile_let(c, src, args),
_ => (),
};
match tag {
"False" => {
c.chunk.emit_opcode(Opcode::False)?;
}
"True" => {
c.chunk.emit_opcode(Opcode::True)?;
}
_ => {
c.emit(Diagnostic::error(src.ast.span(node_id), "uppercased identifiers are reserved for future use; please start your identifiers with a lowercase letter instead"));
}
}
Ok(())
}
fn compile_unary<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
let mut walk = src.ast.walk(node_id);
let Some(op) = walk.node() else { return Ok(()) };
let Some(expr) = walk.node() else {
return Ok(());
};
if src.ast.kind(op) != NodeKind::Op {
return Ok(());
}
let name = src.ast.span(op).slice(src.code);
compile_expr(c, src, expr)?;
if let Some(index) = (src.system.resolve_fn)(SystemFnArity::Unary, name) {
let argument_count = 1;
c.chunk.emit_opcode(Opcode::System)?;
c.chunk.emit_u8(index)?;
c.chunk.emit_u8(argument_count)?;
} else {
c.emit(Diagnostic::error(
src.ast.span(op),
"this unary operator is currently unimplemented",
));
}
Ok(())
}
fn compile_binary<'a>(
c: &mut Compiler<'a, '_>,
src: &Source<'a>,
node_id: NodeId,
) -> CompileResult {
let mut walk = src.ast.walk(node_id);
let Some(left) = walk.node() else {
return Ok(());
};
let Some(op) = walk.node() else { return Ok(()) };
let Some(right) = walk.node() else {
return Ok(());
};
if src.ast.kind(op) != NodeKind::Op {
return Ok(());
}
let name = src.ast.span(op).slice(src.code);
if name == "=" {
c.emit(Diagnostic::error(
src.ast.span(op),
"defs `a = b` may only appear at the top level",
));
return Ok(());
}
compile_expr(c, src, left)?;
compile_expr(c, src, right)?;
if let Some(index) = (src.system.resolve_fn)(SystemFnArity::Binary, name) {
let argument_count = 2;
c.chunk.emit_opcode(Opcode::System)?;
c.chunk.emit_u8(index)?;
c.chunk.emit_u8(argument_count)?;
} else {
c.emit(Diagnostic::error(
src.ast.span(op),
"this unary operator is currently unimplemented",
));
}
Ok(())
}
fn compile_call<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
let mut walk = src.ast.walk(node_id);
let Some(func) = walk.node() else {
return Ok(());
};
let name = src.ast.span(func).slice(src.code);
let mut argument_count = 0;
let mut args = args;
while let NodeKind::List(head, tail) = src.ast.get(args).kind {
compile_expr(c, src, head)?;
while let Some(arg) = walk.node() {
compile_expr(c, src, arg)?;
argument_count += 1;
args = tail;
}
let argument_count = u8::try_from(argument_count).unwrap_or_else(|_| {
c.diagnose(Diagnostic {
span: src.ast.get(args).span,
message: "function call has too many arguments",
});
c.emit(Diagnostic::error(
src.ast.span(node_id),
"function call has too many arguments",
));
0
});
if let (NodeKind::Ident, Some(index)) = (function.kind, (src.system.resolve_fn)(name)) {
if let (NodeKind::Ident, Some(index)) = (
src.ast.kind(func),
(src.system.resolve_fn)(SystemFnArity::Nary, name),
) {
c.chunk.emit_opcode(Opcode::System)?;
c.chunk.emit_u8(index)?;
c.chunk.emit_u8(argument_count)?;
} else {
// This is a bit of an oddity: we only emit the function expression _after_ the arguments,
// but since the language is effectless this doesn't matter in practice.
// It makes for less code in the compiler and the VM.
compile_expr(c, src, function_id)?;
// It makes for a bit less code in the VM, since there's no need to find the function
// down the stack - it's always on top.
compile_expr(c, src, func)?;
c.chunk.emit_opcode(Opcode::Call)?;
c.chunk.emit_u8(argument_count)?;
}
@ -245,67 +347,28 @@ fn compile_list<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId)
Ok(())
}
struct WalkList {
current: NodeId,
ok: bool,
}
impl WalkList {
fn new(start: NodeId) -> Self {
Self {
current: start,
ok: true,
}
}
fn expect_arg(
&mut self,
c: &mut Compiler<'_, '_>,
src: &Source<'_>,
message: &'static str,
) -> NodeId {
if !self.ok {
return NodeId::NIL;
}
if let NodeKind::List(expr, tail) = src.ast.get(self.current).kind {
self.current = tail;
expr
} else {
c.diagnose(Diagnostic {
span: src.ast.get(self.current).span,
message,
});
self.ok = false;
NodeId::NIL
}
}
fn expect_nil(&mut self, c: &mut Compiler<'_, '_>, src: &Source<'_>, message: &'static str) {
if src.ast.get(self.current).kind != NodeKind::Nil {
c.diagnose(Diagnostic {
span: src.ast.get(self.current).span,
message,
});
// NOTE: Don't set self.ok to false, since this is not a fatal error.
// The nodes returned previously are valid and therefore it's safe to operate on them.
// Just having extra arguments shouldn't inhibit emitting additional diagnostics in
// the expression.
}
}
}
fn compile_if<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> CompileResult {
let mut list = WalkList::new(args);
let condition = list.expect_arg(c, src, "missing `if` condition");
let if_true = list.expect_arg(c, src, "missing `if` true branch");
let if_false = list.expect_arg(c, src, "missing `if` false branch");
list.expect_nil(c, src, "extra arguments after `if` false branch");
if !list.ok {
fn compile_paren<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
let Some(inner) = src.ast.walk(node_id).node() else {
return Ok(());
}
};
compile_expr(c, src, inner)?;
Ok(())
}
fn compile_if<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
let mut walk = src.ast.walk(node_id);
let Some(condition) = walk.node() else {
return Ok(());
};
let Some(if_true) = walk.node() else {
return Ok(());
};
let Some(if_false) = walk.node() else {
return Ok(());
};
compile_expr(c, src, condition)?;
@ -328,113 +391,70 @@ fn compile_if<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> C
Ok(())
}
fn compile_let<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> CompileResult {
let mut list = WalkList::new(args);
fn compile_let<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
let mut walk = src.ast.walk(node_id);
let binding_list = list.expect_arg(c, src, "missing `let` binding list ((x 1) (y 2) ...)");
let expr = list.expect_arg(c, src, "missing expression to `let` names into");
list.expect_nil(c, src, "extra arguments after `let` expression");
if !list.ok {
let Some(ident) = walk.node() else {
return Ok(());
}
// NOTE: Our `let` behaves like `let*` from Lisps.
// This is because this is generally the more intuitive behaviour with how variable declarations
// work in traditional imperative languages.
// We do not offer an alternative to Lisp `let` to be as minimal as possible.
let mut current = binding_list;
let mut local_count: usize = 0;
while let NodeKind::List(head, tail) = src.ast.get(current).kind {
if !matches!(src.ast.get(head).kind, NodeKind::List(_, _)) {
c.diagnose(Diagnostic {
span: src.ast.get(head).span,
message: "`let` binding expected, like (x 1)",
});
current = tail;
continue;
}
let mut list = WalkList::new(head);
let ident = list.expect_arg(c, src, "binding name expected");
let value = list.expect_arg(c, src, "binding value expected");
list.expect_nil(c, src, "extra expressions after `let` binding value");
if src.ast.get(ident).kind != NodeKind::Ident {
c.diagnose(Diagnostic {
span: src.ast.get(ident).span,
message: "binding name must be an identifier",
});
}
// NOTE: Compile expression _before_ putting the value into scope.
// This is so that the variable cannot refer to itself, as it is yet to be declared.
compile_expr(c, src, value)?;
let name = src.ast.get(ident).span.slice(src.code);
let scope = c.scopes.last_mut().unwrap();
if scope.locals.len() >= u8::MAX as usize {
c.diagnose(Diagnostic {
span: src.ast.get(ident).span,
message: "too many names bound in this function at a single time",
});
} else {
scope.locals.push(Local { name });
}
local_count += 1;
current = tail;
}
};
let Some(expr) = walk.node() else {
return Ok(());
};
let Some(then) = walk.node() else {
return Ok(());
};
compile_expr(c, src, expr)?;
let name = src.ast.span(ident).slice(src.code);
let scope = c.scopes.last_mut().unwrap();
scope
.locals
.resize_with(scope.locals.len() - local_count, || unreachable!());
let index = if scope.locals.len() >= u8::MAX as usize {
c.emit(Diagnostic::error(
src.ast.span(ident),
"too many names bound in this function at a single time",
));
// NOTE: If we reach more than 255 locals declared in our `let`, we should've gotten
// a diagnostic emitted in the `while` loop beforehand.
let local_count = u8::try_from(local_count).unwrap_or(0);
c.chunk.emit_opcode(Opcode::DropLet)?;
c.chunk.emit_u8(local_count)?;
// Don't emit the expression, because it will most likely contain errors due to this
// `let` failing.
return Ok(());
} else {
let index = scope.locals.len();
scope.locals.push(Local { name });
index as u8
};
c.chunk.emit_opcode(Opcode::SetLocal)?;
c.chunk.emit_u8(index)?;
compile_expr(c, src, then)?;
Ok(())
}
fn compile_fn<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> CompileResult {
let mut list = WalkList::new(args);
let param_list = list.expect_arg(c, src, "missing function parameters");
let body = list.expect_arg(c, src, "missing function body");
list.expect_nil(c, src, "extra arguments after function body");
if !list.ok {
fn compile_lambda<'a>(
c: &mut Compiler<'a, '_>,
src: &Source<'a>,
node_id: NodeId,
) -> CompileResult {
let mut walk = src.ast.walk(node_id);
let Some(params) = walk.node() else {
return Ok(());
}
};
let Some(body) = walk.node() else {
return Ok(());
};
let mut locals = Vec::new();
let mut current = param_list;
while let NodeKind::List(ident, tail) = src.ast.get(current).kind {
if let NodeKind::Ident = src.ast.get(ident).kind {
locals.push(Local {
name: src.ast.get(ident).span.slice(src.code),
})
} else {
c.diagnose(Diagnostic {
span: src.ast.get(ident).span,
message: "function parameters must be identifiers",
})
}
current = tail;
let mut params_walk = src.ast.walk(params);
while let Some(param) = params_walk.node() {
locals.push(Local {
name: src.ast.span(param).slice(src.code),
});
}
let param_count = u8::try_from(locals.len()).unwrap_or_else(|_| {
c.diagnose(Diagnostic {
span: src.ast.get(param_list).span,
message: "too many function parameters",
});
c.emit(Diagnostic::error(
src.ast.span(params),
"too many function parameters",
));
0
});
@ -453,13 +473,21 @@ fn compile_fn<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> C
c.chunk.patch_u16(after_offset, after);
let scope = c.scopes.pop().unwrap();
let capture_count = u8::try_from(scope.captures.len()).unwrap_or_else(|_| {
c.diagnose(Diagnostic {
span: src.ast.get(body).span,
message: "function refers to too many variables from the outer function",
});
let local_count = u8::try_from(scope.locals.len()).unwrap_or_else(|_| {
c.emit(Diagnostic::error(
src.ast.span(body),
"function contains too many local variables",
));
0
});
let capture_count = u8::try_from(scope.captures.len()).unwrap_or_else(|_| {
c.emit(Diagnostic::error(
src.ast.span(body),
"function refers to too many variables from its outer functions",
));
0
});
c.chunk.emit_u8(local_count)?;
c.chunk.emit_u8(capture_count)?;
for capture in scope.captures {
match capture {
@ -484,31 +512,27 @@ fn compile_toplevel<'a>(
src: &Source<'a>,
node_id: NodeId,
) -> CompileResult {
let NodeKind::Toplevel(mut current) = src.ast.get(node_id).kind else {
unreachable!("compile_toplevel expects a Toplevel");
};
def_prepass(c, src, node_id)?;
def_prepass(c, src, current)?;
let mut walk = src.ast.walk(node_id);
let mut result_expr = None;
while let Some(toplevel_expr) = walk.node() {
if let Some(result_expr) = result_expr {
// TODO: This diagnostic should show you the expression after the result.
c.emit(Diagnostic::error(
src.ast.span(result_expr),
"the result value must be the last thing in the program",
));
}
let mut had_result = false;
while let NodeKind::List(expr, tail) = src.ast.get(current).kind {
match compile_toplevel_expr(c, src, expr)? {
match compile_toplevel_expr(c, src, toplevel_expr)? {
ToplevelExpr::Def => (),
ToplevelExpr::Result => had_result = true,
ToplevelExpr::Result if result_expr.is_none() => result_expr = Some(toplevel_expr),
ToplevelExpr::Result => (),
}
if had_result && src.ast.get(tail).kind != NodeKind::Nil {
c.diagnose(Diagnostic {
span: src.ast.get(tail).span,
message: "result value may not be followed by anything else",
});
break;
}
current = tail;
}
if !had_result {
if result_expr.is_none() {
c.chunk.emit_opcode(Opcode::Nil)?;
}
c.chunk.emit_opcode(Opcode::Return)?;
@ -516,36 +540,28 @@ fn compile_toplevel<'a>(
Ok(())
}
fn def_prepass<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
fn def_prepass<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, toplevel: NodeId) -> CompileResult {
let mut walk = src.ast.walk(toplevel);
// This is a bit of a pattern matching tapeworm, but Rust unfortunately doesn't have `if let`
// chains yet to make this more readable.
let mut current = node_id;
while let NodeKind::List(expr, tail) = src.ast.get(current).kind {
if let NodeKind::List(head_id, tail_id) = src.ast.get(expr).kind {
let head = src.ast.get(head_id);
let name = head.span.slice(src.code);
if head.kind == NodeKind::Ident && name == "def" {
if let NodeKind::List(ident_id, _) = src.ast.get(tail_id).kind {
let ident = src.ast.get(ident_id);
if ident.kind == NodeKind::Ident {
let name = ident.span.slice(src.code);
match c.defs.add(name) {
Ok(_) => (),
Err(DefError::Exists) => c.diagnose(Diagnostic {
span: ident.span,
message: "redefinitions of defs are not allowed",
}),
Err(DefError::OutOfSpace) => c.diagnose(Diagnostic {
span: ident.span,
message: "too many defs",
}),
}
while let Some(binary) = walk.node_of(NodeKind::Binary) {
let mut binary_walk = src.ast.walk(binary);
if let (Some(ident), Some(op)) = (binary_walk.node(), binary_walk.get(NodeKind::Op)) {
if src.ast.span(op).slice(src.code) == "=" {
let name = src.ast.span(ident).slice(src.code);
match c.defs.add(name) {
Ok(_) => (),
Err(DefError::Exists) => c.emit(Diagnostic::error(
src.ast.span(ident),
"a def with this name already exists",
)),
Err(DefError::OutOfSpace) => {
c.emit(Diagnostic::error(src.ast.span(binary), "too many defs"))
}
}
}
}
current = tail;
}
Ok(())
@ -562,14 +578,10 @@ fn compile_toplevel_expr<'a>(
src: &Source<'a>,
node_id: NodeId,
) -> CompileResult<ToplevelExpr> {
let node = src.ast.get(node_id);
if let NodeKind::List(head_id, tail_id) = node.kind {
let head = src.ast.get(head_id);
if head.kind == NodeKind::Ident {
let name = head.span.slice(src.code);
if name == "def" {
compile_def(c, src, tail_id)?;
if src.ast.kind(node_id) == NodeKind::Binary {
if let Some(op) = src.ast.walk(node_id).get(NodeKind::Op) {
if src.ast.span(op).slice(src.code) == "=" {
compile_def(c, src, node_id)?;
return Ok(ToplevelExpr::Def);
}
}
@ -579,24 +591,32 @@ fn compile_toplevel_expr<'a>(
Ok(ToplevelExpr::Result)
}
fn compile_def<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> CompileResult {
let mut list = WalkList::new(args);
let ident = list.expect_arg(c, src, "missing definition name");
let value = list.expect_arg(c, src, "missing definition value");
list.expect_nil(c, src, "extra arguments after definition");
if !list.ok {
fn compile_def<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
let mut walk = src.ast.walk(node_id);
let Some(left) = walk.node() else {
return Ok(());
};
let Some(_op) = walk.node() else {
return Ok(());
};
let Some(right) = walk.node() else {
return Ok(());
};
if src.ast.kind(left) != NodeKind::Ident {
c.emit(Diagnostic::error(
src.ast.span(left),
"def name (identifier) expected",
));
}
let name = src.ast.get(ident).span.slice(src.code);
let name = src.ast.span(left).slice(src.code);
// NOTE: def_prepass collects all definitions beforehand.
// In case a def ends up not existing, that means we ran out of space for defs - so emit a
// zero def instead.
let def_id = c.defs.get(name).unwrap_or_default();
compile_expr(c, src, value)?;
compile_expr(c, src, right)?;
c.chunk.emit_opcode(Opcode::SetDef)?;
c.chunk.emit_u16(def_id.to_u16())?;

View file

@ -0,0 +1,26 @@
use alloc::string::String;
use crate::source::Span;
#[derive(Debug, Clone)]
pub struct Diagnostic {
span: Span,
message: String,
}
impl Diagnostic {
pub fn error(span: Span, message: impl Into<String>) -> Self {
Self {
span,
message: message.into(),
}
}
pub fn span(&self) -> Span {
self.span
}
pub fn message(&self) -> &str {
&self.message
}
}

237
crates/haku/src/lexer.rs Normal file
View file

@ -0,0 +1,237 @@
use alloc::vec::Vec;
use crate::{
diagnostic::Diagnostic,
source::{SourceCode, Span},
token::{Lexis, TokenAllocError, TokenKind},
};
pub struct Lexer<'a> {
pub lexis: Lexis,
pub diagnostics: Vec<Diagnostic>,
input: &'a SourceCode,
position: u32,
}
impl<'a> Lexer<'a> {
pub fn new(lexis: Lexis, input: &'a SourceCode) -> Self {
Self {
lexis,
diagnostics: Vec::new(),
input,
position: 0,
}
}
fn current(&self) -> char {
self.input[self.position as usize..]
.chars()
.next()
.unwrap_or('\0')
}
fn advance(&mut self) {
self.position += self.current().len_utf8() as u32;
}
fn emit(&mut self, diagnostic: Diagnostic) {
if self.diagnostics.len() < self.diagnostics.capacity() {
self.diagnostics.push(diagnostic);
}
}
}
fn one(l: &mut Lexer<'_>, kind: TokenKind) -> TokenKind {
l.advance();
kind
}
fn one_or_two(l: &mut Lexer<'_>, kind1: TokenKind, c2: char, kind2: TokenKind) -> TokenKind {
l.advance();
if l.current() == c2 {
l.advance();
kind2
} else {
kind1
}
}
fn is_ident_char(c: char) -> bool {
matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_')
}
fn ident(l: &mut Lexer<'_>) -> TokenKind {
let start = l.position;
while is_ident_char(l.current()) {
l.advance();
}
let end = l.position;
match Span::new(start, end).slice(l.input) {
"_" => TokenKind::Underscore,
"and" => TokenKind::And,
"or" => TokenKind::Or,
"if" => TokenKind::If,
"else" => TokenKind::Else,
"let" => TokenKind::Let,
_ => TokenKind::Ident,
}
}
fn tag(l: &mut Lexer<'_>) -> TokenKind {
while is_ident_char(l.current()) {
l.advance();
}
TokenKind::Tag
}
// NOTE: You shouldn't expect that the numbers produced by the lexer are parsable.
fn number(l: &mut Lexer<'_>) -> TokenKind {
while l.current().is_ascii_digit() {
l.advance();
}
if l.current() == '.' {
let dot = l.position;
l.advance();
if !l.current().is_ascii_digit() {
l.emit(Diagnostic::error(
Span::new(dot, l.position),
"there must be at least a single digit after the decimal point",
));
}
while l.current().is_ascii_digit() {
l.advance();
}
}
TokenKind::Number
}
// NOTE: You shouldn't expect that the color literals produced by the lexer are parsable.
fn color(l: &mut Lexer<'_>) -> TokenKind {
let hash = l.position;
l.advance(); // #
if !l.current().is_ascii_hexdigit() {
l.emit(Diagnostic::error(
Span::new(hash, l.position),
"hex digits expected after `#` (color literal)",
));
}
let start = l.position;
while l.current().is_ascii_hexdigit() {
l.advance();
}
let len = l.position - start;
if !matches!(len, 3 | 4 | 6 | 8) {
l.emit(Diagnostic::error(Span::new(hash, l.position), "incorrect number of digits in color literal (must be #RGB, #RGBA, #RRGGBB, or #RRGGBBAA)"));
}
TokenKind::Color
}
fn whitespace_and_comments(l: &mut Lexer<'_>) {
loop {
match l.current() {
'-' => {
let position = l.position;
l.advance();
if l.current() == '-' {
while l.current() != '\n' {
l.advance();
}
} else {
// An unfortunate little bit of backtracking here;
// This seems like the simplest possible solution though.
// We don't treat comments as a separate token to simplify the parsing phase,
// and because of this, handling this at the "real" token level would complicate
// things quite a bit.
l.position = position;
break;
}
}
' ' | '\r' | '\t' => l.advance(),
_ => break,
}
}
}
fn newline(l: &mut Lexer<'_>) -> (TokenKind, Span) {
let start = l.position;
l.advance(); // skip the initial newline
let end = l.position;
// Skip additional newlines after this one, to only produce one token.
// These do not count into this newline's span though.
loop {
whitespace_and_comments(l);
if l.current() == '\n' {
l.advance();
continue;
} else {
break;
}
}
(TokenKind::Newline, Span::new(start, end))
}
fn token(l: &mut Lexer<'_>) -> (TokenKind, Span) {
whitespace_and_comments(l);
let start = l.position;
let kind = match l.current() {
'\0' => TokenKind::Eof,
// NOTE: Order matters here. Numbers and tags take priority over identifers.
c if c.is_ascii_uppercase() => tag(l),
c if c.is_ascii_digit() => number(l),
c if is_ident_char(c) => ident(l),
'#' => color(l),
'+' => one(l, TokenKind::Plus),
'-' => one_or_two(l, TokenKind::Minus, '>', TokenKind::RArrow),
'*' => one(l, TokenKind::Star),
'/' => one(l, TokenKind::Slash),
'=' => one_or_two(l, TokenKind::Equal, '=', TokenKind::EqualEqual),
'!' => one_or_two(l, TokenKind::Not, '=', TokenKind::NotEqual),
'<' => one_or_two(l, TokenKind::Less, '=', TokenKind::LessEqual),
'>' => one_or_two(l, TokenKind::Greater, '=', TokenKind::GreaterEqual),
'\n' => return newline(l),
'(' => one(l, TokenKind::LParen),
')' => one(l, TokenKind::RParen),
'[' => one(l, TokenKind::LBrack),
']' => one(l, TokenKind::RBrack),
',' => one(l, TokenKind::Comma),
'\\' => one(l, TokenKind::Backslash),
_ => {
l.advance();
l.emit(Diagnostic::error(
Span::new(start, l.position),
"unexpected character",
));
TokenKind::Error
}
};
let end = l.position;
(kind, Span::new(start, end))
}
pub fn lex(l: &mut Lexer<'_>) -> Result<(), TokenAllocError> {
loop {
let (kind, span) = token(l);
l.lexis.push(kind, span)?;
if kind == TokenKind::Eof {
break;
}
}
Ok(())
}

View file

@ -2,10 +2,15 @@
extern crate alloc;
pub mod ast;
pub mod bytecode;
pub mod compiler;
pub mod diagnostic;
pub mod lexer;
pub mod parser;
pub mod render;
pub mod sexp;
pub mod source;
pub mod system;
pub mod token;
pub mod value;
pub mod vm;

607
crates/haku/src/parser.rs Normal file
View file

@ -0,0 +1,607 @@
use core::cell::Cell;
use alloc::vec::Vec;
use crate::{
ast::{Ast, NodeAllocError, NodeId, NodeKind},
diagnostic::Diagnostic,
source::Span,
token::{Lexis, TokenKind, TokenKindSet},
};
#[derive(Debug, Clone, Copy)]
pub struct ParserLimits {
pub max_events: usize,
}
pub struct Parser<'a> {
tokens: &'a Lexis,
events: Vec<Event>,
position: u32,
fuel: Cell<u32>,
pub diagnostics: Vec<Diagnostic>,
}
#[derive(Debug)]
enum Event {
Open { kind: NodeKind },
Close,
Advance,
}
struct Open {
index: Option<usize>,
}
struct Closed {
index: Option<usize>,
}
impl<'a> Parser<'a> {
const FUEL: u32 = 256;
pub fn new(input: &'a Lexis, limits: &ParserLimits) -> Self {
assert!(limits.max_events < u32::MAX as usize);
Self {
tokens: input,
events: Vec::with_capacity(limits.max_events),
position: 0,
diagnostics: Vec::with_capacity(16),
fuel: Cell::new(Self::FUEL),
}
}
fn event(&mut self, event: Event) -> Option<usize> {
if self.events.len() < self.events.capacity() {
let index = self.events.len();
self.events.push(event);
Some(index)
} else {
None
}
}
fn open(&mut self) -> Open {
Open {
index: self.event(Event::Open {
kind: NodeKind::Error,
}),
}
}
fn open_before(&mut self, closed: Closed) -> Open {
if let Some(index) = closed.index {
if self.events.len() < self.events.capacity() {
self.events.insert(
index,
Event::Open {
kind: NodeKind::Error,
},
);
return Open { index: Some(index) };
}
}
Open { index: None }
}
fn close(&mut self, open: Open, kind: NodeKind) -> Closed {
if let Some(index) = open.index {
self.events[index] = Event::Open { kind };
self.event(Event::Close);
Closed { index: Some(index) }
} else {
Closed { index: None }
}
}
fn is_eof(&self) -> bool {
self.peek() == TokenKind::Eof
}
fn advance(&mut self) {
if !self.is_eof() {
self.position += 1;
self.event(Event::Advance);
self.fuel.set(Self::FUEL);
}
}
#[track_caller]
fn peek(&self) -> TokenKind {
assert_ne!(self.fuel.get(), 0, "parser is stuck");
self.fuel.set(self.fuel.get() - 1);
self.tokens.kind(self.position)
}
fn span(&self) -> Span {
self.tokens.span(self.position)
}
fn emit(&mut self, diagnostic: Diagnostic) {
if self.diagnostics.len() < self.diagnostics.capacity() {
self.diagnostics.push(diagnostic);
}
}
fn advance_with_error(&mut self) -> Closed {
let opened = self.open();
self.advance();
self.close(opened, NodeKind::Error)
}
fn optional_newline(&mut self) -> bool {
if self.peek() == TokenKind::Newline {
self.advance();
true
} else {
false
}
}
pub fn into_ast(self, ast: &mut Ast) -> Result<(NodeId, Vec<Diagnostic>), NodeAllocError> {
let mut token = 0;
let mut events = self.events;
let mut stack = Vec::new();
struct StackEntry {
node_id: NodeId,
// TODO: This should probably be optimized to use a shared stack.
children: Vec<NodeId>,
}
// Remove the last Close to keep a single node on the stack.
assert!(matches!(events.pop(), Some(Event::Close)));
for event in events {
match event {
Event::Open { kind } => {
stack.push(StackEntry {
node_id: ast.alloc(kind, self.tokens.span(token))?,
children: Vec::new(),
});
}
Event::Close => {
let end_span = self.tokens.span(token.saturating_sub(1));
let stack_entry = stack.pop().unwrap();
ast.alloc_children(stack_entry.node_id, &stack_entry.children);
ast.extend_span(stack_entry.node_id, end_span.end);
stack.last_mut().unwrap().children.push(stack_entry.node_id);
}
Event::Advance => {
let span = self.tokens.span(token);
let node_id = ast.alloc(NodeKind::Token, span)?;
stack
.last_mut()
.expect("advance() may only be used in an open node")
.children
.push(node_id);
token += 1;
}
}
}
if stack.len() != 1 {
// This means we had too many events emitted and they are no longer balanced.
return Err(NodeAllocError);
}
// assert_eq!(token, self.tokens.len());
let end_span = self.tokens.span(token.saturating_sub(1));
let stack_entry = stack.pop().unwrap();
ast.alloc_children(stack_entry.node_id, &stack_entry.children);
ast.extend_span(stack_entry.node_id, end_span.end);
Ok((stack_entry.node_id, self.diagnostics))
}
}
impl<'a> core::fmt::Debug for Parser<'a> {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.debug_struct("Parser")
.field("events", &self.events)
.finish_non_exhaustive()
}
}
enum Tighter {
Left,
Right,
}
fn tighter(left: TokenKind, right: TokenKind) -> Tighter {
fn tightness(kind: TokenKind) -> Option<usize> {
match kind {
TokenKind::Equal => Some(0),
TokenKind::EqualEqual
| TokenKind::NotEqual
| TokenKind::Less
| TokenKind::LessEqual
| TokenKind::Greater
| TokenKind::GreaterEqual => Some(1),
TokenKind::Plus | TokenKind::Minus => Some(2),
TokenKind::Star | TokenKind::Slash => Some(3),
_ if PREFIX_TOKENS.contains(kind) => Some(4),
_ => None,
}
}
let Some(right_tightness) = tightness(right) else {
return Tighter::Left;
};
let Some(left_tightness) = tightness(left) else {
assert!(left == TokenKind::Eof);
return Tighter::Right;
};
if right_tightness > left_tightness {
Tighter::Right
} else {
Tighter::Left
}
}
fn precedence_parse(p: &mut Parser, left: TokenKind) {
let mut lhs = prefix(p);
loop {
let right = p.peek();
match tighter(left, right) {
Tighter::Left => break,
Tighter::Right => {
let o = p.open_before(lhs);
let kind = infix(p, right);
lhs = p.close(o, kind);
}
}
}
}
fn one(p: &mut Parser, kind: NodeKind) -> Closed {
let o = p.open();
p.advance();
p.close(o, kind)
}
fn list(p: &mut Parser) -> Closed {
let o = p.open();
let lspan = p.span();
p.advance(); // [
p.optional_newline();
loop {
match p.peek() {
TokenKind::Eof => {
p.emit(Diagnostic::error(lspan, "missing `]` to close this list"));
break;
}
TokenKind::RBrack => {
p.advance();
break;
}
_ => (),
}
expr(p);
match p.peek() {
TokenKind::Comma | TokenKind::Newline => {
p.advance();
continue;
}
TokenKind::RBrack => {
p.advance();
break;
}
_ => {
let span = p.span();
p.emit(Diagnostic::error(
span,
"comma `,` or new line expected after list element",
));
p.advance_with_error();
}
}
}
p.close(o, NodeKind::List)
}
fn unary(p: &mut Parser) -> Closed {
let o = p.open();
let op = p.open();
p.advance();
p.close(op, NodeKind::Op);
prefix(p);
p.close(o, NodeKind::Unary)
}
fn paren(p: &mut Parser) -> Closed {
let o = p.open();
let lspan = p.span();
p.advance(); // (
if p.peek() == TokenKind::RParen {
p.advance(); // )
p.close(o, NodeKind::ParenEmpty)
} else {
p.optional_newline();
expr(p);
p.optional_newline();
if p.peek() != TokenKind::RParen {
p.emit(Diagnostic::error(lspan, "missing closing parenthesis `)`"));
p.advance_with_error()
} else {
p.advance();
p.close(o, NodeKind::Paren)
}
}
}
fn param(p: &mut Parser) {
let o = p.open();
if let TokenKind::Ident | TokenKind::Underscore = p.peek() {
p.advance();
} else {
let span = p.span();
p.emit(Diagnostic::error(
span,
"parameter names must be identifiers or `_`",
));
p.advance_with_error();
}
p.close(o, NodeKind::Param);
}
fn lambda(p: &mut Parser) -> Closed {
let o = p.open();
p.advance(); // backslash
let params = p.open();
loop {
param(p);
match p.peek() {
TokenKind::Comma => {
p.advance();
continue;
}
TokenKind::RArrow => break,
_ => {
let span = p.span();
p.emit(Diagnostic::error(
span,
"`,` or `->` expected after function parameter",
));
p.advance_with_error();
break;
}
}
}
p.close(params, NodeKind::Params);
// NOTE: Can be false if there are some stray tokens.
// We prefer to bail early and let the rest of the program parse.
if p.peek() == TokenKind::RArrow {
p.advance();
p.optional_newline();
expr(p);
}
p.close(o, NodeKind::Lambda)
}
fn if_expr(p: &mut Parser) -> Closed {
let o = p.open();
p.advance(); // if
if p.peek() != TokenKind::LParen {
let span = p.span();
p.emit(Diagnostic::error(
span,
"the condition in an `if` expression must be surrounded with parentheses",
));
// NOTE: Don't advance, it's more likely the programmer expected no parentheses to be needed.
}
p.advance();
expr(p); // Condition
if p.peek() != TokenKind::RParen {
let span = p.span();
p.emit(Diagnostic::error(
span,
"missing closing parenthesis after `if` condition",
));
}
p.advance();
p.optional_newline();
expr(p); // True branch
p.optional_newline();
if p.peek() != TokenKind::Else {
let span = p.span();
p.emit(Diagnostic::error(
span,
"`if` expression is missing an `else` clause",
));
}
p.advance();
p.optional_newline();
expr(p); // False branch
p.close(o, NodeKind::If)
}
fn let_expr(p: &mut Parser) -> Closed {
let o = p.open();
p.advance(); // let
if p.peek() == TokenKind::Ident {
let ident = p.open();
p.advance();
p.close(ident, NodeKind::Ident);
} else {
let span = p.span();
p.emit(Diagnostic::error(span, "`let` variable name expected"));
p.advance_with_error();
}
if p.peek() == TokenKind::Equal {
p.advance();
} else {
let span = p.span();
p.emit(Diagnostic::error(span, "`=` expected after variable name"));
p.advance_with_error();
}
expr(p);
if p.peek() == TokenKind::Newline {
p.advance();
} else {
let span = p.span();
p.emit(Diagnostic::error(
span,
"new line expected after `let` expression",
));
p.advance_with_error();
}
expr(p);
p.close(o, NodeKind::Let)
}
const PREFIX_TOKENS: TokenKindSet = TokenKindSet::new(&[
TokenKind::Ident,
TokenKind::Tag,
TokenKind::Number,
TokenKind::Color,
// NOTE: This is ambiguous in function calls.
// In that case, the infix operator takes precedence (because the `match` arms for the infix op
// come first.)
TokenKind::Minus,
TokenKind::Not,
TokenKind::LParen,
TokenKind::Backslash,
TokenKind::If,
TokenKind::Let,
TokenKind::LBrack,
]);
fn prefix(p: &mut Parser) -> Closed {
match p.peek() {
TokenKind::Ident => one(p, NodeKind::Ident),
TokenKind::Tag => one(p, NodeKind::Tag),
TokenKind::Number => one(p, NodeKind::Number),
TokenKind::Color => one(p, NodeKind::Color),
TokenKind::LBrack => list(p),
TokenKind::Minus | TokenKind::Not => unary(p),
TokenKind::LParen => paren(p),
TokenKind::Backslash => lambda(p),
TokenKind::If => if_expr(p),
TokenKind::Let => let_expr(p),
_ => {
assert!(
!PREFIX_TOKENS.contains(p.peek()),
"{:?} found in PREFIX_TOKENS",
p.peek()
);
let span = p.span();
p.emit(Diagnostic::error(
span,
"an expression was expected, but this token does not start one",
));
p.advance_with_error()
}
}
}
fn infix(p: &mut Parser, op: TokenKind) -> NodeKind {
match op {
TokenKind::Plus
| TokenKind::Minus
| TokenKind::Star
| TokenKind::Slash
| TokenKind::EqualEqual
| TokenKind::NotEqual
| TokenKind::Less
| TokenKind::LessEqual
| TokenKind::Greater
| TokenKind::GreaterEqual
| TokenKind::Equal => infix_binary(p, op),
_ if PREFIX_TOKENS.contains(op) => infix_call(p),
_ => panic!("unhandled infix operator {op:?}"),
}
}
fn infix_binary(p: &mut Parser, op: TokenKind) -> NodeKind {
let o = p.open();
p.advance();
p.close(o, NodeKind::Op);
if p.peek() == TokenKind::Newline {
p.advance();
}
precedence_parse(p, op);
NodeKind::Binary
}
fn infix_call(p: &mut Parser) -> NodeKind {
while PREFIX_TOKENS.contains(p.peek()) {
prefix(p);
}
NodeKind::Call
}
pub fn expr(p: &mut Parser) {
precedence_parse(p, TokenKind::Eof)
}
pub fn toplevel(p: &mut Parser) {
let o = p.open();
p.optional_newline();
while p.peek() != TokenKind::Eof {
expr(p);
match p.peek() {
TokenKind::Newline => {
p.advance();
continue;
}
TokenKind::Eof => break,
_ => {
let span = p.span();
p.emit(Diagnostic::error(
span,
"newline expected after toplevel expression",
))
}
}
}
p.close(o, NodeKind::Toplevel);
}
#[cfg(test)]
mod tests;

View file

@ -0,0 +1,912 @@
use alloc::{format, string::String};
use crate::{
ast::{dump::dump, Ast, NodeId},
lexer::{lex, Lexer},
parser::expr,
source::SourceCode,
token::Lexis,
};
use super::{toplevel, Parser, ParserLimits};
fn parse(s: &str, f: fn(&mut Parser)) -> (Ast, NodeId) {
let mut lexer = Lexer::new(Lexis::new(1024), SourceCode::unlimited_len(s));
lex(&mut lexer).expect("too many tokens");
let mut parser = Parser::new(&lexer.lexis, &ParserLimits { max_events: 1024 });
f(&mut parser);
if !parser.diagnostics.is_empty() {
panic!("parser emitted diagnostics: {:#?}", parser.diagnostics);
}
let mut ast = Ast::new(1024);
let (root, _) = parser.into_ast(&mut ast).unwrap();
(ast, root)
}
fn ast(s: &str, f: fn(&mut Parser)) -> String {
let (ast, root) = parse(s, f);
// The extra newline is mostly so that it's easier to make the string literals look nice.
format!("\n{}", dump(&ast, root, None))
}
#[track_caller]
fn assert_ast_eq(s: &str, f: fn(&mut Parser), ast_s: &str) {
let got = ast(s, f);
if ast_s != got {
panic!("AST mismatch. expected:\n{ast_s}\n\ngot:\n{got}\n");
}
}
#[test]
fn one_literals() {
assert_ast_eq(
"1",
expr,
"
Number @ 0..1
Token @ 0..1",
);
assert_ast_eq(
"ExampleTag123",
expr,
"
Tag @ 0..13
Token @ 0..13",
);
assert_ast_eq(
"example_ident123",
expr,
"
Ident @ 0..16
Token @ 0..16",
);
assert_ast_eq(
"#000",
expr,
"
Color @ 0..4
Token @ 0..4",
);
assert_ast_eq(
"#000F",
expr,
"
Color @ 0..5
Token @ 0..5",
);
assert_ast_eq(
"#058EF0",
expr,
"
Color @ 0..7
Token @ 0..7",
);
assert_ast_eq(
"#058EF0FF",
expr,
"
Color @ 0..9
Token @ 0..9",
);
}
#[test]
fn list() {
assert_ast_eq(
"[]",
expr,
"
List @ 0..2
Token @ 0..1
Token @ 1..2",
);
assert_ast_eq(
"[1]",
expr,
"
List @ 0..3
Token @ 0..1
Number @ 1..2
Token @ 1..2
Token @ 2..3",
);
assert_ast_eq(
"[1, 2]",
expr,
"
List @ 0..6
Token @ 0..1
Number @ 1..2
Token @ 1..2
Token @ 2..3
Number @ 4..5
Token @ 4..5
Token @ 5..6",
);
assert_ast_eq(
"[
1
2
]",
expr,
"
List @ 0..42
Token @ 0..1
Token @ 1..2
Number @ 15..16
Token @ 15..16
Token @ 16..17
Number @ 30..31
Token @ 30..31
Token @ 31..32
Token @ 41..42",
);
}
#[test]
fn unary() {
assert_ast_eq(
"-1",
expr,
"
Unary @ 0..2
Op @ 0..1
Token @ 0..1
Number @ 1..2
Token @ 1..2",
);
assert_ast_eq(
"!1",
expr,
"
Unary @ 0..2
Op @ 0..1
Token @ 0..1
Number @ 1..2
Token @ 1..2",
);
}
#[test]
fn binary_single() {
assert_ast_eq(
"1 + 1",
expr,
"
Binary @ 0..5
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Number @ 4..5
Token @ 4..5",
);
assert_ast_eq(
"1 - 1",
expr,
"
Binary @ 0..5
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Number @ 4..5
Token @ 4..5",
);
assert_ast_eq(
"1 * 1",
expr,
"
Binary @ 0..5
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Number @ 4..5
Token @ 4..5",
);
assert_ast_eq(
"1 / 1",
expr,
"
Binary @ 0..5
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Number @ 4..5
Token @ 4..5",
);
assert_ast_eq(
"1 < 1",
expr,
"
Binary @ 0..5
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Number @ 4..5
Token @ 4..5",
);
assert_ast_eq(
"1 > 1",
expr,
"
Binary @ 0..5
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Number @ 4..5
Token @ 4..5",
);
assert_ast_eq(
"1 == 1",
expr,
"
Binary @ 0..6
Number @ 0..1
Token @ 0..1
Op @ 2..4
Token @ 2..4
Number @ 5..6
Token @ 5..6",
);
assert_ast_eq(
"1 != 1",
expr,
"
Binary @ 0..6
Number @ 0..1
Token @ 0..1
Op @ 2..4
Token @ 2..4
Number @ 5..6
Token @ 5..6",
);
assert_ast_eq(
"1 <= 1",
expr,
"
Binary @ 0..6
Number @ 0..1
Token @ 0..1
Op @ 2..4
Token @ 2..4
Number @ 5..6
Token @ 5..6",
);
assert_ast_eq(
"1 >= 1",
expr,
"
Binary @ 0..6
Number @ 0..1
Token @ 0..1
Op @ 2..4
Token @ 2..4
Number @ 5..6
Token @ 5..6",
);
assert_ast_eq(
"1 = 1",
expr,
"
Binary @ 0..5
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Number @ 4..5
Token @ 4..5",
);
}
#[test]
fn binary_precedence() {
assert_ast_eq(
"1 + 1 + 1",
expr,
"
Binary @ 0..9
Binary @ 0..5
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Number @ 4..5
Token @ 4..5
Op @ 6..7
Token @ 6..7
Number @ 8..9
Token @ 8..9",
);
assert_ast_eq(
"1 * 1 + 1",
expr,
"
Binary @ 0..9
Binary @ 0..5
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Number @ 4..5
Token @ 4..5
Op @ 6..7
Token @ 6..7
Number @ 8..9
Token @ 8..9",
);
assert_ast_eq(
"1 + 1 * 1",
expr,
"
Binary @ 0..9
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Binary @ 4..9
Number @ 4..5
Token @ 4..5
Op @ 6..7
Token @ 6..7
Number @ 8..9
Token @ 8..9",
);
assert_ast_eq(
"1 < 1 + 1",
expr,
"
Binary @ 0..9
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Binary @ 4..9
Number @ 4..5
Token @ 4..5
Op @ 6..7
Token @ 6..7
Number @ 8..9
Token @ 8..9",
);
assert_ast_eq(
"1 + 1 < 1",
expr,
"
Binary @ 0..9
Binary @ 0..5
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Number @ 4..5
Token @ 4..5
Op @ 6..7
Token @ 6..7
Number @ 8..9
Token @ 8..9",
);
assert_ast_eq(
"1 + 1 * 1 < 1",
expr,
"
Binary @ 0..13
Binary @ 0..9
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Binary @ 4..9
Number @ 4..5
Token @ 4..5
Op @ 6..7
Token @ 6..7
Number @ 8..9
Token @ 8..9
Op @ 10..11
Token @ 10..11
Number @ 12..13
Token @ 12..13",
);
assert_ast_eq(
"1 * 1 + 1 < 1",
expr,
"
Binary @ 0..13
Binary @ 0..9
Binary @ 0..5
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Number @ 4..5
Token @ 4..5
Op @ 6..7
Token @ 6..7
Number @ 8..9
Token @ 8..9
Op @ 10..11
Token @ 10..11
Number @ 12..13
Token @ 12..13",
);
}
#[test]
fn binary_cont() {
assert_ast_eq(
"1 +
1",
expr,
"
Binary @ 0..16
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Token @ 3..4
Number @ 15..16
Token @ 15..16",
);
assert_ast_eq(
"1 +
1",
expr,
"
Binary @ 0..17
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Token @ 3..4
Number @ 16..17
Token @ 16..17",
);
}
#[test]
fn paren_empty() {
assert_ast_eq(
"()",
expr,
"
ParenEmpty @ 0..2
Token @ 0..1
Token @ 1..2",
);
}
#[test]
fn paren() {
assert_ast_eq(
"(1)",
expr,
"
Paren @ 0..3
Token @ 0..1
Number @ 1..2
Token @ 1..2
Token @ 2..3",
);
assert_ast_eq(
"(1 + 1) * 1",
expr,
"
Binary @ 0..11
Paren @ 0..7
Token @ 0..1
Binary @ 1..6
Number @ 1..2
Token @ 1..2
Op @ 3..4
Token @ 3..4
Number @ 5..6
Token @ 5..6
Token @ 6..7
Op @ 8..9
Token @ 8..9
Number @ 10..11
Token @ 10..11",
);
assert_ast_eq(
"1 * (1 + 1)",
expr,
"
Binary @ 0..11
Number @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Paren @ 4..11
Token @ 4..5
Binary @ 5..10
Number @ 5..6
Token @ 5..6
Op @ 7..8
Token @ 7..8
Number @ 9..10
Token @ 9..10
Token @ 10..11",
);
assert_ast_eq(
"(
1 +
1
)",
expr,
"
Paren @ 0..47
Token @ 0..1
Token @ 1..2
Binary @ 15..33
Number @ 15..16
Token @ 15..16
Op @ 17..18
Token @ 17..18
Token @ 18..19
Number @ 32..33
Token @ 32..33
Token @ 36..37
Token @ 46..47",
);
}
#[test]
fn infix_call() {
assert_ast_eq(
"f x y",
toplevel,
"
Toplevel @ 0..5
Call @ 0..5
Ident @ 0..1
Token @ 0..1
Ident @ 2..3
Token @ 2..3
Ident @ 4..5
Token @ 4..5",
);
assert_ast_eq(
"sin 1 + cos 2",
toplevel,
"
Toplevel @ 0..13
Binary @ 0..13
Call @ 0..5
Ident @ 0..3
Token @ 0..3
Number @ 4..5
Token @ 4..5
Op @ 6..7
Token @ 6..7
Call @ 8..13
Ident @ 8..11
Token @ 8..11
Number @ 12..13
Token @ 12..13",
);
}
#[test]
fn infix_call_unary_arg() {
assert_ast_eq(
// NOTE: The whitespace here is misleading.
// This is a binary `-`.
"f -1",
toplevel,
"
Toplevel @ 0..4
Binary @ 0..4
Ident @ 0..1
Token @ 0..1
Op @ 2..3
Token @ 2..3
Number @ 3..4
Token @ 3..4",
);
assert_ast_eq(
"f (-1)",
toplevel,
"
Toplevel @ 0..6
Call @ 0..6
Ident @ 0..1
Token @ 0..1
Paren @ 2..6
Token @ 2..3
Unary @ 3..5
Op @ 3..4
Token @ 3..4
Number @ 4..5
Token @ 4..5
Token @ 5..6",
);
}
#[test]
fn lambda() {
assert_ast_eq(
r#" \_ -> () "#,
toplevel,
"
Toplevel @ 1..9
Lambda @ 1..9
Token @ 1..2
Params @ 2..3
Param @ 2..3
Token @ 2..3
Token @ 4..6
ParenEmpty @ 7..9
Token @ 7..8
Token @ 8..9",
);
assert_ast_eq(
r#" \x -> x "#,
toplevel,
"
Toplevel @ 1..8
Lambda @ 1..8
Token @ 1..2
Params @ 2..3
Param @ 2..3
Token @ 2..3
Token @ 4..6
Ident @ 7..8
Token @ 7..8",
);
assert_ast_eq(
r#" \x, y -> x + y "#,
toplevel,
"
Toplevel @ 1..15
Lambda @ 1..15
Token @ 1..2
Params @ 2..6
Param @ 2..3
Token @ 2..3
Token @ 3..4
Param @ 5..6
Token @ 5..6
Token @ 7..9
Binary @ 10..15
Ident @ 10..11
Token @ 10..11
Op @ 12..13
Token @ 12..13
Ident @ 14..15
Token @ 14..15",
);
assert_ast_eq(
r#" \x, y ->
x + y "#,
toplevel,
"
Toplevel @ 1..29
Lambda @ 1..29
Token @ 1..2
Params @ 2..6
Param @ 2..3
Token @ 2..3
Token @ 3..4
Param @ 5..6
Token @ 5..6
Token @ 7..9
Token @ 9..10
Binary @ 24..29
Ident @ 24..25
Token @ 24..25
Op @ 26..27
Token @ 26..27
Ident @ 28..29
Token @ 28..29",
);
assert_ast_eq(
r#" f \x -> g \y -> x + y "#,
toplevel,
"
Toplevel @ 1..22
Call @ 1..22
Ident @ 1..2
Token @ 1..2
Lambda @ 3..22
Token @ 3..4
Params @ 4..5
Param @ 4..5
Token @ 4..5
Token @ 6..8
Call @ 9..22
Ident @ 9..10
Token @ 9..10
Lambda @ 11..22
Token @ 11..12
Params @ 12..13
Param @ 12..13
Token @ 12..13
Token @ 14..16
Binary @ 17..22
Ident @ 17..18
Token @ 17..18
Op @ 19..20
Token @ 19..20
Ident @ 21..22
Token @ 21..22",
);
assert_ast_eq(
r#" f \x ->
g \y ->
x + y "#,
toplevel,
"
Toplevel @ 1..48
Call @ 1..48
Ident @ 1..2
Token @ 1..2
Lambda @ 3..48
Token @ 3..4
Params @ 4..5
Param @ 4..5
Token @ 4..5
Token @ 6..8
Token @ 8..9
Call @ 21..48
Ident @ 21..22
Token @ 21..22
Lambda @ 23..48
Token @ 23..24
Params @ 24..25
Param @ 24..25
Token @ 24..25
Token @ 26..28
Token @ 28..29
Binary @ 43..48
Ident @ 43..44
Token @ 43..44
Op @ 45..46
Token @ 45..46
Ident @ 47..48
Token @ 47..48",
);
}
#[test]
fn if_expr() {
assert_ast_eq(
r#" if (true) 1 else 2 "#,
toplevel,
"
Toplevel @ 1..19
If @ 1..19
Token @ 1..3
Token @ 4..5
Ident @ 5..9
Token @ 5..9
Token @ 9..10
Number @ 11..12
Token @ 11..12
Token @ 13..17
Number @ 18..19
Token @ 18..19",
);
assert_ast_eq(
r#" if (true)
1
else
2 "#,
toplevel,
"
Toplevel @ 1..63
If @ 1..63
Token @ 1..3
Token @ 4..5
Ident @ 5..9
Token @ 5..9
Token @ 9..10
Token @ 10..11
Number @ 27..28
Token @ 27..28
Token @ 28..29
Token @ 41..45
Token @ 45..46
Number @ 62..63
Token @ 62..63",
);
}
#[test]
fn let_expr() {
assert_ast_eq(
r#" let x = 1
x "#,
toplevel,
"
Toplevel @ 1..24
Let @ 1..24
Token @ 1..4
Ident @ 5..6
Token @ 5..6
Token @ 7..8
Number @ 9..10
Token @ 9..10
Token @ 10..11
Ident @ 23..24
Token @ 23..24",
);
assert_ast_eq(
r#" let x = 1
let y = 2
x + y "#,
toplevel,
"
Toplevel @ 1..50
Let @ 1..50
Token @ 1..4
Ident @ 5..6
Token @ 5..6
Token @ 7..8
Number @ 9..10
Token @ 9..10
Token @ 10..11
Let @ 23..50
Token @ 23..26
Ident @ 27..28
Token @ 27..28
Token @ 29..30
Number @ 31..32
Token @ 31..32
Token @ 32..33
Binary @ 45..50
Ident @ 45..46
Token @ 45..46
Op @ 47..48
Token @ 47..48
Ident @ 49..50
Token @ 49..50",
)
}

View file

@ -1,510 +0,0 @@
use core::{cell::Cell, fmt, ops::Deref};
use alloc::vec::Vec;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Span {
pub start: usize,
pub end: usize,
}
impl Span {
pub fn new(start: usize, end: usize) -> Self {
Self { start, end }
}
pub fn slice<'a>(&self, source: &'a SourceCode) -> &'a str {
&source.code[self.start..self.end]
}
}
/// Source code string with a verified size limit.
/// An exact size limit is not enforced by this type - it only ensures the string isn't longer than
/// intended, to not stall the parser for an unexpected amount of time.
#[derive(Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct SourceCode {
code: str,
}
impl SourceCode {
pub fn limited_len(code: &str, max_len: usize) -> Option<&Self> {
if code.len() <= max_len {
Some(Self::unlimited_len(code))
} else {
None
}
}
pub fn unlimited_len(code: &str) -> &Self {
// SAFETY: SourceCode is a transparent wrapper around str, so converting between them is safe.
unsafe { core::mem::transmute(code) }
}
}
impl Deref for SourceCode {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.code
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct NodeId(usize);
impl NodeId {
pub const NIL: NodeId = NodeId(0);
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum NodeKind {
Nil,
Eof,
// Atoms
Ident,
Number,
List(NodeId, NodeId),
Toplevel(NodeId),
Error(&'static str),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Node {
pub span: Span,
pub kind: NodeKind,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Ast {
pub nodes: Vec<Node>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AstWriteMode {
Compact,
Spans,
}
impl Ast {
pub fn new(capacity: usize) -> Self {
assert!(capacity >= 1, "there must be space for at least a nil node");
let mut ast = Self {
nodes: Vec::with_capacity(capacity),
};
ast.alloc(Node {
span: Span::new(0, 0),
kind: NodeKind::Nil,
})
.unwrap();
ast
}
pub fn alloc(&mut self, node: Node) -> Result<NodeId, NodeAllocError> {
if self.nodes.len() >= self.nodes.capacity() {
return Err(NodeAllocError);
}
let index = self.nodes.len();
self.nodes.push(node);
Ok(NodeId(index))
}
pub fn get(&self, node_id: NodeId) -> &Node {
&self.nodes[node_id.0]
}
pub fn get_mut(&mut self, node_id: NodeId) -> &mut Node {
&mut self.nodes[node_id.0]
}
pub fn write(
&self,
source: &SourceCode,
node_id: NodeId,
w: &mut dyn fmt::Write,
mode: AstWriteMode,
) -> fmt::Result {
#[allow(clippy::too_many_arguments)]
fn write_list(
ast: &Ast,
source: &SourceCode,
w: &mut dyn fmt::Write,
mode: AstWriteMode,
mut head: NodeId,
mut tail: NodeId,
sep_element: &str,
sep_tail: &str,
) -> fmt::Result {
loop {
write_rec(ast, source, w, mode, head)?;
match ast.get(tail).kind {
NodeKind::Nil => break,
NodeKind::List(head2, tail2) => {
w.write_str(sep_element)?;
(head, tail) = (head2, tail2);
}
_ => {
w.write_str(sep_tail)?;
write_rec(ast, source, w, mode, tail)?;
break;
}
}
}
Ok(())
}
// NOTE: Separated out to a separate function in case we ever want to introduce auto-indentation.
fn write_rec(
ast: &Ast,
source: &SourceCode,
w: &mut dyn fmt::Write,
mode: AstWriteMode,
node_id: NodeId,
) -> fmt::Result {
let node = ast.get(node_id);
match &node.kind {
NodeKind::Nil => write!(w, "()")?,
NodeKind::Eof => write!(w, "<eof>")?,
NodeKind::Ident | NodeKind::Number => write!(w, "{}", node.span.slice(source))?,
NodeKind::List(head, tail) => {
w.write_char('(')?;
write_list(ast, source, w, mode, *head, *tail, " ", " . ")?;
w.write_char(')')?;
}
NodeKind::Toplevel(list) => {
let NodeKind::List(head, tail) = ast.get(*list).kind else {
unreachable!("child of Toplevel must be a List");
};
write_list(ast, source, w, mode, head, tail, "\n", " . ")?;
}
NodeKind::Error(message) => write!(w, "#error({message})")?,
}
if mode == AstWriteMode::Spans {
write!(w, "@{}..{}", node.span.start, node.span.end)?;
}
Ok(())
}
write_rec(self, source, w, mode, node_id)?;
Ok(())
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct NodeAllocError;
pub struct Parser<'a> {
pub ast: Ast,
input: &'a SourceCode,
position: usize,
fuel: Cell<usize>,
alloc_error: NodeId,
}
impl<'a> Parser<'a> {
const FUEL: usize = 256;
pub fn new(mut ast: Ast, input: &'a SourceCode) -> Self {
let alloc_error = ast
.alloc(Node {
span: Span::new(0, 0),
kind: NodeKind::Error("program is too big"),
})
.expect("there is not enough space in the arena for an error node");
Self {
ast,
input,
position: 0,
fuel: Cell::new(Self::FUEL),
alloc_error,
}
}
#[track_caller]
pub fn current(&self) -> char {
assert_ne!(self.fuel.get(), 0, "parser is stuck");
self.fuel.set(self.fuel.get() - 1);
self.input[self.position..].chars().next().unwrap_or('\0')
}
pub fn advance(&mut self) {
self.position += self.current().len_utf8();
self.fuel.set(Self::FUEL);
}
pub fn alloc(&mut self, expr: Node) -> NodeId {
self.ast.alloc(expr).unwrap_or(self.alloc_error)
}
}
pub fn skip_whitespace_and_comments(p: &mut Parser<'_>) {
loop {
match p.current() {
' ' | '\t' | '\n' => {
p.advance();
continue;
}
';' => {
while p.current() != '\n' && p.current() != '\0' {
p.advance();
}
}
_ => break,
}
}
}
fn is_decimal_digit(c: char) -> bool {
c.is_ascii_digit()
}
pub fn parse_number(p: &mut Parser<'_>) -> NodeKind {
while is_decimal_digit(p.current()) {
p.advance();
}
if p.current() == '.' {
p.advance();
if !is_decimal_digit(p.current()) {
return NodeKind::Error("missing digits after decimal point '.' in number literal");
}
while is_decimal_digit(p.current()) {
p.advance();
}
}
NodeKind::Number
}
fn is_ident(c: char) -> bool {
// The identifier character set is quite limited to help with easy expansion in the future.
// Rationale:
// - alphabet and digits are pretty obvious
// - '-' and '_' can be used for identifier separators, whichever you prefer.
// - '+', '-', '*', '/', '^' are for arithmetic.
// - '=', '!', '<', '>' are fore comparison.
// - '\' is for builtin string constants, such as \n.
// For other operators, it's generally clearer to use words (such as `and` and `or`.)
matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '+' | '*' | '/' | '\\' | '^' | '!' | '=' | '<' | '>')
}
pub fn parse_ident(p: &mut Parser<'_>) -> NodeKind {
while is_ident(p.current()) {
p.advance();
}
NodeKind::Ident
}
struct List {
head: NodeId,
tail: NodeId,
}
impl List {
fn new() -> Self {
Self {
head: NodeId::NIL,
tail: NodeId::NIL,
}
}
fn append(&mut self, p: &mut Parser<'_>, node: NodeId) {
let node_span = p.ast.get(node).span;
let new_tail = p.alloc(Node {
span: node_span,
kind: NodeKind::List(node, NodeId::NIL),
});
if self.head == NodeId::NIL {
self.head = new_tail;
self.tail = new_tail;
} else {
let old_tail = p.ast.get_mut(self.tail);
let NodeKind::List(expr_before, _) = old_tail.kind else {
return;
};
*old_tail = Node {
span: Span::new(old_tail.span.start, node_span.end),
kind: NodeKind::List(expr_before, new_tail),
};
self.tail = new_tail;
}
}
}
pub fn parse_list(p: &mut Parser<'_>) -> NodeId {
// This could've been a lot simpler if Rust supported tail recursion.
let start = p.position;
p.advance(); // skip past opening parenthesis
skip_whitespace_and_comments(p);
let mut list = List::new();
while p.current() != ')' {
if p.current() == '\0' {
return p.alloc(Node {
span: Span::new(start, p.position),
kind: NodeKind::Error("missing ')' to close '('"),
});
}
let expr = parse_expr(p);
skip_whitespace_and_comments(p);
list.append(p, expr);
}
p.advance(); // skip past closing parenthesis
// If we didn't have any elements, we must not modify the initial Nil with ID 0.
if list.head == NodeId::NIL {
list.head = p.alloc(Node {
span: Span::new(0, 0),
kind: NodeKind::Nil,
});
}
let end = p.position;
p.ast.get_mut(list.head).span = Span::new(start, end);
list.head
}
pub fn parse_expr(p: &mut Parser<'_>) -> NodeId {
let start = p.position;
let kind = match p.current() {
'\0' => NodeKind::Eof,
c if is_decimal_digit(c) => parse_number(p),
// NOTE: Because of the `match` order, this prevents identifiers from starting with a digit.
c if is_ident(c) => parse_ident(p),
'(' => return parse_list(p),
_ => {
p.advance();
NodeKind::Error("unexpected character")
}
};
let end = p.position;
p.alloc(Node {
span: Span::new(start, end),
kind,
})
}
pub fn parse_toplevel(p: &mut Parser<'_>) -> NodeId {
let start = p.position;
let mut nodes = List::new();
skip_whitespace_and_comments(p);
while p.current() != '\0' {
let expr = parse_expr(p);
skip_whitespace_and_comments(p);
nodes.append(p, expr);
}
let end = p.position;
p.alloc(Node {
span: Span::new(start, end),
kind: NodeKind::Toplevel(nodes.head),
})
}
#[cfg(test)]
mod tests {
use core::error::Error;
use alloc::{boxed::Box, string::String};
use super::*;
#[track_caller]
fn parse(
f: fn(&mut Parser<'_>) -> NodeId,
source: &str,
expected: &str,
) -> Result<(), Box<dyn Error>> {
let ast = Ast::new(16);
let code = SourceCode::unlimited_len(source);
let mut p = Parser::new(ast, code);
let node = f(&mut p);
let ast = p.ast;
let mut s = String::new();
ast.write(code, node, &mut s, AstWriteMode::Spans)?;
assert_eq!(s, expected);
Ok(())
}
#[test]
fn parse_number() -> Result<(), Box<dyn Error>> {
parse(parse_expr, "123", "123@0..3")?;
parse(parse_expr, "123.456", "123.456@0..7")?;
Ok(())
}
#[test]
fn parse_ident() -> Result<(), Box<dyn Error>> {
parse(parse_expr, "abc", "abc@0..3")?;
parse(parse_expr, "abcABC_01234", "abcABC_01234@0..12")?;
parse(parse_expr, "+-*/\\^!=<>", "+-*/\\^!=<>@0..10")?;
Ok(())
}
#[test]
fn parse_list() -> Result<(), Box<dyn Error>> {
parse(parse_expr, "()", "()@0..2")?;
parse(parse_expr, "(a a)", "(a@1..2 a@3..4)@0..5")?;
parse(parse_expr, "(a a a)", "(a@1..2 a@3..4 a@5..6)@0..7")?;
parse(parse_expr, "(() ())", "(()@1..3 ()@4..6)@0..7")?;
parse(
parse_expr,
"(nestedy (nest OwO))",
"(nestedy@1..8 (nest@10..14 OwO@15..18)@9..19)@0..20",
)?;
Ok(())
}
#[test]
fn oom() -> Result<(), Box<dyn Error>> {
parse(parse_expr, "(a a a a a a a a)", "(a@1..2 a@3..4 a@5..6 a@7..8 a@9..10 a@11..12 a@13..14 . #error(program is too big)@0..0)@0..17")?;
parse(parse_expr, "(a a a a a a a a a)", "(a@1..2 a@3..4 a@5..6 a@7..8 a@9..10 a@11..12 a@13..14 . #error(program is too big)@0..0)@0..19")?;
parse(parse_expr, "(a a a a a a a a a a)", "(a@1..2 a@3..4 a@5..6 a@7..8 a@9..10 a@11..12 a@13..14 . #error(program is too big)@0..0)@0..21")?;
parse(parse_expr, "(a a a a a a a a a a a)", "(a@1..2 a@3..4 a@5..6 a@7..8 a@9..10 a@11..12 a@13..14 . #error(program is too big)@0..0)@0..23")?;
Ok(())
}
#[test]
fn toplevel() -> Result<(), Box<dyn Error>> {
parse(
parse_toplevel,
r#"
(hello world)
(abc)
"#,
"(hello@18..23 world@24..29)@17..30\n(abc@48..51)@47..52@0..65",
)?;
Ok(())
}
}

55
crates/haku/src/source.rs Normal file
View file

@ -0,0 +1,55 @@
use core::{fmt, ops::Deref};
#[derive(Clone, Copy, PartialEq, Eq)]
pub struct Span {
pub start: u32,
pub end: u32,
}
impl Span {
pub fn new(start: u32, end: u32) -> Self {
Self { start, end }
}
pub fn slice<'a>(&self, source: &'a SourceCode) -> &'a str {
&source.code[self.start as usize..self.end as usize]
}
}
impl fmt::Debug for Span {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}..{}", self.start, self.end)
}
}
/// Source code string with a verified size limit.
/// An exact size limit is not enforced by this type - it only ensures the string isn't longer than
/// intended, to not stall the parser for an unexpected amount of time.
#[derive(Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct SourceCode {
code: str,
}
impl SourceCode {
pub fn limited_len(code: &str, max_len: u32) -> Option<&Self> {
if code.len() <= max_len as usize {
Some(Self::unlimited_len(code))
} else {
None
}
}
pub fn unlimited_len(code: &str) -> &Self {
// SAFETY: SourceCode is a transparent wrapper around str, so converting between them is safe.
unsafe { core::mem::transmute(code) }
}
}
impl Deref for SourceCode {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.code
}
}

View file

@ -16,10 +16,17 @@ pub type SystemFn = fn(&mut Vm, FnArgs) -> Result<Value, Exception>;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ChunkId(u32);
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum SystemFnArity {
Unary,
Binary,
Nary,
}
#[derive(Debug, Clone)]
pub struct System {
/// Resolves a system function name to an index into `fn`s.
pub resolve_fn: fn(&str) -> Option<u8>,
pub resolve_fn: fn(SystemFnArity, &str) -> Option<u8>,
pub fns: [Option<SystemFn>; 256],
pub chunks: Vec<Chunk>,
}
@ -30,7 +37,7 @@ pub struct SystemImage {
}
macro_rules! def_fns {
($($index:tt $name:tt => $fnref:expr),* $(,)?) => {
($($index:tt $arity:tt $name:tt => $fnref:expr),* $(,)?) => {
pub(crate) fn init_fns(system: &mut System) {
$(
debug_assert!(system.fns[$index].is_none());
@ -38,9 +45,9 @@ macro_rules! def_fns {
)*
}
pub(crate) fn resolve(name: &str) -> Option<u8> {
match name {
$($name => Some($index),)*
pub(crate) fn resolve(arity: SystemFnArity, name: &str) -> Option<u8> {
match (arity, name){
$((SystemFnArity::$arity, $name) => Some($index),)*
_ => None,
}
}
@ -106,43 +113,44 @@ pub mod fns {
vm::{Exception, FnArgs, Vm},
};
use super::System;
use super::{System, SystemFnArity};
impl System {
def_fns! {
0x00 "+" => add,
0x01 "-" => sub,
0x02 "*" => mul,
0x03 "/" => div,
0x00 Binary "+" => add,
0x01 Binary "-" => sub,
0x02 Binary "*" => mul,
0x03 Binary "/" => div,
0x04 Unary "-" => neg,
0x40 "not" => not,
0x41 "=" => eq,
0x42 "<>" => neq,
0x43 "<" => lt,
0x44 "<=" => leq,
0x45 ">" => gt,
0x46 ">=" => geq,
0x40 Unary "!" => not,
0x41 Binary "==" => eq,
0x42 Binary "!=" => neq,
0x43 Binary "<" => lt,
0x44 Binary "<=" => leq,
0x45 Binary ">" => gt,
0x46 Binary ">=" => geq,
0x80 "vec" => vec,
0x81 ".x" => vec_x,
0x82 ".y" => vec_y,
0x83 ".z" => vec_z,
0x84 ".w" => vec_w,
0x80 Nary "vec" => vec,
0x81 Nary "vecX" => vec_x,
0x82 Nary "vecY" => vec_y,
0x83 Nary "vecZ" => vec_z,
0x84 Nary "vecW" => vec_w,
0x85 "rgba" => rgba,
0x86 ".r" => rgba_r,
0x87 ".g" => rgba_g,
0x88 ".b" => rgba_b,
0x89 ".a" => rgba_a,
0x85 Nary "rgba" => rgba,
0x86 Nary "rgbaR" => rgba_r,
0x87 Nary "rgbaG" => rgba_g,
0x88 Nary "rgbaB" => rgba_b,
0x89 Nary "rgbaA" => rgba_a,
0x90 "list" => list,
0x90 Nary "list" => list,
0xc0 "to-shape" => to_shape_f,
0xc1 "line" => line,
0xc2 "rect" => rect,
0xc3 "circle" => circle,
0xe0 "stroke" => stroke,
0xe1 "fill" => fill,
0xc0 Nary "toShape" => to_shape_f,
0xc1 Nary "line" => line,
0xc2 Nary "rect" => rect,
0xc3 Nary "circle" => circle,
0xe0 Nary "stroke" => stroke,
0xe1 Nary "fill" => fill,
}
}
@ -196,6 +204,11 @@ pub mod fns {
Ok(Value::Number(result))
}
pub fn neg(vm: &mut Vm, args: FnArgs) -> Result<Value, Exception> {
let x = args.get_number(vm, 0, "`-` can only work with numbers")?;
Ok(Value::Number(-x))
}
pub fn not(vm: &mut Vm, args: FnArgs) -> Result<Value, Exception> {
if args.num() != 1 {
return Err(vm.create_exception("(not) expects a single argument to negate"));

143
crates/haku/src/token.rs Normal file
View file

@ -0,0 +1,143 @@
use core::{error::Error, fmt::Display};
use alloc::vec::Vec;
use crate::source::Span;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum TokenKind {
Eof,
Ident,
Tag,
Number,
Color,
// Operators
Plus,
Minus,
Star,
Slash,
EqualEqual,
NotEqual,
Less,
LessEqual,
Greater,
GreaterEqual,
Not,
// Punctuation
Newline,
LParen,
RParen,
LBrack,
RBrack,
Comma,
Equal,
Backslash,
RArrow,
// Keywords
Underscore,
And,
Or,
If,
Else,
Let,
// NOTE: This must be kept last for TokenSet to work correctly.
Error,
}
#[derive(Debug, Clone)]
pub struct Lexis {
pub kinds: Vec<TokenKind>,
pub spans: Vec<Span>,
}
impl Lexis {
pub fn new(capacity: usize) -> Self {
assert!(capacity < u32::MAX as usize);
Self {
kinds: Vec::with_capacity(capacity),
spans: Vec::with_capacity(capacity),
}
}
pub fn len(&self) -> u32 {
self.kinds.len() as u32
}
pub fn is_empty(&self) -> bool {
self.len() == 0
}
pub fn push(&mut self, kind: TokenKind, span: Span) -> Result<(), TokenAllocError> {
if self.kinds.len() >= self.kinds.capacity() {
return Err(TokenAllocError);
}
self.kinds.push(kind);
self.spans.push(span);
Ok(())
}
pub fn kind(&self, position: u32) -> TokenKind {
self.kinds[position as usize]
}
pub fn span(&self, position: u32) -> Span {
self.spans[position as usize]
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct TokenAllocError;
impl Display for TokenAllocError {
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
f.write_str("too many tokens")
}
}
impl Error for TokenAllocError {}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct TokenKindSet {
bits: [u32; Self::WORDS],
}
impl TokenKindSet {
const WORDS: usize = ((TokenKind::Error as u32 + u32::BITS - 1) / (u32::BITS)) as usize;
const fn word(kind: TokenKind) -> usize {
(kind as u32 / u32::BITS) as usize
}
const fn bit(kind: TokenKind) -> u32 {
1 << (kind as u32 % u32::BITS)
}
pub const fn new(elems: &[TokenKind]) -> Self {
let mut set = Self {
bits: [0; Self::WORDS],
};
let mut i = 0;
while i < elems.len() {
set = set.include(elems[i]);
i += 1;
}
set
}
pub const fn include(mut self, kind: TokenKind) -> Self {
self.bits[Self::word(kind)] |= Self::bit(kind);
self
}
pub fn contains(&self, kind: TokenKind) -> bool {
self.bits[Self::word(kind)] & Self::bit(kind) != 0
}
}

View file

@ -1,6 +1,6 @@
use alloc::vec::Vec;
use crate::system::ChunkId;
use crate::{compiler::ClosureSpec, system::ChunkId};
// TODO: Probably needs some pretty hardcore space optimization.
// Maybe when we have static typing.
@ -156,9 +156,25 @@ pub struct Closure {
pub start: BytecodeLoc,
pub name: FunctionName,
pub param_count: u8,
pub local_count: u8,
pub captures: Vec<Value>,
}
impl Closure {
pub fn chunk(chunk_id: ChunkId, spec: ClosureSpec) -> Self {
Self {
start: BytecodeLoc {
chunk_id,
offset: 0,
},
name: FunctionName::Anonymous,
param_count: 0,
local_count: spec.local_count,
captures: Vec::new(),
}
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct List {
pub elements: Vec<Value>,

View file

@ -123,8 +123,9 @@ impl Vm {
fn push(&mut self, value: Value) -> Result<(), Exception> {
if self.stack.len() >= self.stack.capacity() {
// TODO: can this error message be made clearer?
return Err(self.create_exception("too many local variables"));
return Err(self.create_exception(
"too many temporary values (local variables and expression operands)",
));
}
self.stack.push(value);
Ok(())
@ -136,6 +137,14 @@ impl Vm {
})
}
fn get_mut(&mut self, index: usize) -> Result<&mut Value, Exception> {
if self.stack.get(index).is_some() {
Ok(&mut self.stack[index])
} else {
Err(self.create_exception("corrupted bytecode (set local variable out of bounds)"))
}
}
fn pop(&mut self) -> Result<Value, Exception> {
self.stack
.pop()
@ -168,6 +177,11 @@ impl Vm {
let mut bottom = self.stack.len();
let mut fuel = self.fuel;
let init_bottom = bottom;
for _ in 0..closure.local_count {
self.push(Value::Nil)?;
}
#[allow(unused)]
let closure = (); // Do not use `closure` after this! Use `get_ref` on `closure_id` instead.
@ -200,6 +214,12 @@ impl Vm {
self.push(value)?;
}
Opcode::SetLocal => {
let index = chunk.read_u8(&mut pc)? as usize;
let new_value = self.pop()?;
*self.get_mut(index)? = new_value;
}
Opcode::Capture => {
let index = chunk.read_u8(&mut pc)? as usize;
let closure = self.get_ref(closure_id).as_closure().unwrap();
@ -226,26 +246,14 @@ impl Vm {
}
}
Opcode::DropLet => {
let count = chunk.read_u8(&mut pc)? as usize;
if count != 0 {
let new_len = self.stack.len().checked_sub(count).ok_or_else(|| {
self.create_exception(
"corrupted bytecode (Drop tried to drop too many values off the stack)",
)
})?;
let value = self.pop()?;
self.stack.resize_with(new_len, || unreachable!());
self.push(value)?;
}
}
Opcode::Function => {
let param_count = chunk.read_u8(&mut pc)?;
let then = chunk.read_u16(&mut pc)? as usize;
let body = pc;
pc = then;
let local_count = chunk.read_u8(&mut pc)?;
let capture_count = chunk.read_u8(&mut pc)? as usize;
let mut captures = Vec::with_capacity(capture_count);
for _ in 0..capture_count {
@ -272,6 +280,7 @@ impl Vm {
},
name: FunctionName::Anonymous,
param_count,
local_count,
captures,
}))?;
self.push(Value::Ref(id))?;
@ -327,6 +336,11 @@ impl Vm {
)
})?;
// NOTE: Locals are only pushed _after_ we do any stack calculations.
for _ in 0..closure.local_count {
self.push(Value::Nil)?;
}
self.push_call(frame)?;
}
@ -381,10 +395,13 @@ impl Vm {
}
}
Ok(self
let result = self
.stack
.pop()
.expect("there should be a result at the top of the stack"))
.expect("there should be a result at the top of the stack");
self.stack.resize_with(init_bottom, || unreachable!());
Ok(result)
}
fn store_context(&mut self, context: Context) {

View file

@ -1,10 +1,14 @@
use std::error::Error;
use haku::{
ast::{dump::dump, Ast},
bytecode::{Chunk, Defs},
compiler::{compile_expr, Compiler, Source},
sexp::{self, Ast, Parser, SourceCode},
lexer::{lex, Lexer},
parser::{self, Parser, ParserLimits},
source::SourceCode,
system::System,
token::Lexis,
value::{BytecodeLoc, Closure, FunctionName, Ref, RefId, Value},
vm::{Vm, VmLimits},
};
@ -12,11 +16,16 @@ use haku::{
fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
let mut system = System::new(1);
let ast = Ast::new(1024);
let code = SourceCode::unlimited_len(code);
let mut parser = Parser::new(ast, code);
let root = sexp::parse_toplevel(&mut parser);
let ast = parser.ast;
let mut lexer = Lexer::new(Lexis::new(1024), code);
lex(&mut lexer)?;
let mut ast = Ast::new(1024);
let mut parser = Parser::new(&lexer.lexis, &ParserLimits { max_events: 1024 });
parser::toplevel(&mut parser);
let (root, mut parser_diagnostics) = parser.into_ast(&mut ast)?;
println!("{}", dump(&ast, root, Some(code)));
let src = Source {
code,
ast: &ast,
@ -27,21 +36,29 @@ fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
let mut chunk = Chunk::new(65536).unwrap();
let mut compiler = Compiler::new(&mut defs, &mut chunk);
compile_expr(&mut compiler, &src, root)?;
let closure_spec = compiler.closure_spec();
let defs = compiler.defs;
for diagnostic in &compiler.diagnostics {
let mut diagnostics = lexer.diagnostics;
diagnostics.append(&mut parser_diagnostics);
diagnostics.append(&mut compiler.diagnostics);
for diagnostic in &diagnostics {
println!(
"{}..{}: {}",
diagnostic.span.start, diagnostic.span.end, diagnostic.message
"{}..{} {:?}: {}",
diagnostic.span().start,
diagnostic.span().end,
diagnostic.span().slice(code),
diagnostic.message()
);
}
if !compiler.diagnostics.is_empty() {
panic!("compiler diagnostics were emitted")
if !diagnostics.is_empty() {
panic!("diagnostics were emitted")
}
let limits = VmLimits {
stack_capacity: 256,
stack_capacity: 1024,
call_stack_capacity: 256,
ref_capacity: 256,
fuel: 32768,
@ -50,16 +67,9 @@ fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
let mut vm = Vm::new(defs, &limits);
let chunk_id = system.add_chunk(chunk)?;
println!("bytecode: {:?}", system.chunk(chunk_id));
println!("closure spec: {closure_spec:?}");
let closure = vm.create_ref(Ref::Closure(Closure {
start: BytecodeLoc {
chunk_id,
offset: 0,
},
name: FunctionName::Anonymous,
param_count: 0,
captures: Vec::new(),
}))?;
let closure = vm.create_ref(Ref::Closure(Closure::chunk(chunk_id, closure_spec)))?;
let result = vm.run(&system, closure)?;
println!("used fuel: {}", limits.fuel - vm.remaining_fuel());
@ -87,49 +97,52 @@ fn literal_number() {
#[test]
fn literal_bool() {
assert_eq!(eval("false").unwrap(), Value::False);
assert_eq!(eval("true").unwrap(), Value::True);
assert_eq!(eval("False").unwrap(), Value::False);
assert_eq!(eval("True").unwrap(), Value::True);
}
#[test]
fn function_nil() {
assert_eq!(eval("(fn () ())").unwrap(), Value::Ref(RefId::from_u32(1)));
assert_eq!(
eval(r#" \_ -> () "#).unwrap(),
Value::Ref(RefId::from_u32(1))
);
}
#[test]
fn function_nil_call() {
assert_eq!(eval("((fn () ()))").unwrap(), Value::Nil);
assert_eq!(eval(r#"(\_ -> ()) ()"#).unwrap(), Value::Nil);
}
#[test]
fn function_arithmetic() {
expect_number("((fn (x) (+ x 2)) 2)", 4.0, 0.0001);
expect_number(r#"(\x -> x + 2) 2"#, 4.0, 0.0001);
}
#[test]
fn function_let() {
expect_number("((fn (add-two) (add-two 2)) (fn (x) (+ x 2)))", 4.0, 0.0001);
expect_number(r#"(\addTwo -> addTwo 2) \x -> x + 2"#, 4.0, 0.0001);
}
#[test]
fn function_closure() {
expect_number("(((fn (x) (fn (y) (+ x y))) 2) 2)", 4.0, 0.0001);
expect_number(r#"((\x -> \y -> x + y) 2) 2"#, 4.0, 0.0001);
}
#[test]
fn if_literal() {
expect_number("(if 1 1 2)", 1.0, 0.0001);
expect_number("(if () 1 2)", 2.0, 0.0001);
expect_number("(if false 1 2)", 2.0, 0.0001);
expect_number("(if true 1 2)", 1.0, 0.0001);
expect_number("if (1) 1 else 2", 1.0, 0.0001);
expect_number("if (()) 1 else 2", 2.0, 0.0001);
expect_number("if (False) 1 else 2", 2.0, 0.0001);
expect_number("if (True) 1 else 2", 1.0, 0.0001);
}
#[test]
fn def_simple() {
let code = r#"
(def x 1)
(def y 2)
(+ x y)
x = 1
y = 2
x + y
"#;
expect_number(code, 3.0, 0.0001);
}
@ -137,13 +150,13 @@ fn def_simple() {
#[test]
fn def_fib_recursive() {
let code = r#"
(def fib
(fn (n)
(if (< n 2)
n
(+ (fib (- n 1)) (fib (- n 2))))))
(fib 10)
fib = \n ->
if (n < 2)
n
else
fib (n - 1) + fib (n - 2)
fib 10
"#;
expect_number(code, 55.0, 0.0001);
}
@ -151,27 +164,30 @@ fn def_fib_recursive() {
#[test]
fn def_mutually_recursive() {
let code = r#"
(def f
(fn (x)
(if (< x 10)
(g (+ x 1))
x)))
f = \x ->
if (x < 10)
g (x + 1)
else
x
(def g
(fn (x)
(if (< x 10)
(f (* x 2))
x)))
g = \x ->
if (x < 10)
f (x * 2)
else
x
(f 0)
f 0
"#;
expect_number(code, 14.0, 0.0001);
}
#[test]
fn def_botsbuildbots() {
let result = eval("(def botsbuildbots (fn () (botsbuildbots))) (botsbuildbots)");
if let Err(error) = result {
let code = r#"
botsbuildbots = \_ -> botsbuildbots ()
botsbuildbots ()
"#;
if let Err(error) = eval(code) {
assert_eq!(
error.to_string(),
"Exception {\n message: \"too much recursion\",\n}"
@ -184,8 +200,8 @@ fn def_botsbuildbots() {
#[test]
fn let_single() {
let code = r#"
(let ((x 1))
(+ x 1))
let x = 1
x + 1
"#;
expect_number(code, 2.0, 0.0001);
}
@ -193,9 +209,9 @@ fn let_single() {
#[test]
fn let_many() {
let code = r#"
(let ((x 1)
(y 2))
(+ x y))
let x = 1
let y = 2
x + y
"#;
expect_number(code, 3.0, 0.0001);
}
@ -203,9 +219,9 @@ fn let_many() {
#[test]
fn let_sequence() {
let code = r#"
(let ((x 1)
(y (+ x 1)))
(+ x y))
let x = 1
let y = x + 1
x + y
"#;
expect_number(code, 3.0, 0.0001);
}
@ -213,59 +229,40 @@ fn let_sequence() {
#[test]
fn let_subexpr() {
let code = r#"
(+
(let ((x 1)
(y 2))
(* x y)))
(let x = 1
let y = 2
x * y) + 2
"#;
expect_number(code, 2.0, 0.0001);
expect_number(code, 4.0, 0.0001);
}
#[test]
fn let_empty() {
fn let_subexpr_two() {
let code = r#"
(let () 1)
"#;
expect_number(code, 1.0, 0.0001);
}
#[test]
fn let_subexpr_empty() {
let code = r#"
(+ (let () 1) (let () 1))
"#;
expect_number(code, 2.0, 0.0001);
}
#[test]
fn let_subexpr_many() {
let code = r#"
(+
(let ((x 1)
(y 2))
(* x y))
(let () 1)
(let ((x 1)) x))
(let x = 1
2) +
(let x = 1
x)
"#;
expect_number(code, 3.0, 0.0001);
}
#[test]
fn system_arithmetic() {
expect_number("(+ 1 2 3 4)", 10.0, 0.0001);
expect_number("(+ (* 2 1) 1 (/ 6 2) (- 10 3))", 13.0, 0.0001);
fn let_subexpr_many() {
let code = r#"
(let x = 1
let y = 2
x * y) +
(let x = 1
2) +
(let x = 1
x)
"#;
expect_number(code, 5.0, 0.0001);
}
#[test]
fn practical_fib_recursive() {
let code = r#"
((fn (fib)
(fib fib 10))
(fn (fib n)
(if (< n 2)
n
(+ (fib fib (- n 1)) (fib fib (- n 2))))))
"#;
expect_number(code, 55.0, 0.0001);
fn system_arithmetic() {
expect_number("1 + 2 + 3 + 4", 10.0, 0.0001);
expect_number("(2 * 1) + 1 + (6 / 2) + (10 - 3)", 13.0, 0.0001);
}

View file

@ -5,11 +5,15 @@
use eyre::{bail, Context, OptionExt};
use haku::{
ast::Ast,
bytecode::{Chunk, Defs, DefsImage},
compiler::{Compiler, Source},
lexer::{lex, Lexer},
parser::{self, Parser, ParserLimits},
render::{tiny_skia::Pixmap, Renderer, RendererLimits},
sexp::{Ast, Parser, SourceCode},
source::SourceCode,
system::{ChunkId, System, SystemImage},
token::Lexis,
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
vm::{Vm, VmImage, VmLimits},
};
@ -22,9 +26,11 @@ use crate::schema::Vec2;
// because we do some dynamic typing magic over on the JavaScript side to automatically call all
// the appropriate functions for setting these limits on the client side.
pub struct Limits {
pub max_source_code_len: usize,
pub max_source_code_len: u32,
pub max_chunks: usize,
pub max_defs: usize,
pub max_tokens: usize,
pub max_parser_events: usize,
pub ast_capacity: usize,
pub chunk_capacity: usize,
pub stack_capacity: usize,
@ -88,12 +94,21 @@ impl Haku {
pub fn set_brush(&mut self, code: &str) -> eyre::Result<()> {
self.reset();
let ast = Ast::new(self.limits.ast_capacity);
let code = SourceCode::limited_len(code, self.limits.max_source_code_len)
.ok_or_eyre("source code is too long")?;
let mut parser = Parser::new(ast, code);
let root = haku::sexp::parse_toplevel(&mut parser);
let ast = parser.ast;
let mut lexer = Lexer::new(Lexis::new(self.limits.max_tokens), code);
lex(&mut lexer)?;
let mut parser = Parser::new(
&lexer.lexis,
&ParserLimits {
max_events: self.limits.max_parser_events,
},
);
parser::toplevel(&mut parser);
let mut ast = Ast::new(self.limits.ast_capacity);
let (root, parser_diagnostics) = parser.into_ast(&mut ast)?;
let src = Source {
code,
@ -107,7 +122,10 @@ impl Haku {
haku::compiler::compile_expr(&mut compiler, &src, root)
.context("failed to compile the chunk")?;
if !compiler.diagnostics.is_empty() {
if !lexer.diagnostics.is_empty()
|| !parser_diagnostics.is_empty()
|| !compiler.diagnostics.is_empty()
{
bail!("diagnostics were emitted");
}

View file

@ -61,6 +61,12 @@ max_chunks = 2
# Maximum amount of defs across all source code chunks.
max_defs = 256
# Maximum amount of tokens a single chunk can have.
max_tokens = 4096
# Maximum amount of events that the parser may emit in a single chunk.
max_parser_events = 4096
# Maximum amount of AST nodes in a single parse.
ast_capacity = 4096