syntax v2
introduce a new, more ergonomic syntax for haku not all features are implemented just yet. still missing: - custom tags (non-True/False) - color literals - lists
This commit is contained in:
parent
a3e5e8bd10
commit
2595bf0d82
21 changed files with 2844 additions and 1062 deletions
|
@ -1,71 +1,31 @@
|
||||||
// NOTE: This is a very bad CLI.
|
// NOTE: This is a very bad CLI. I only use it for debugging haku with LLDB.
|
||||||
// Sorry!
|
// Sorry that it doesn't actually do anything!
|
||||||
|
|
||||||
use std::{error::Error, fmt::Display, io::BufRead};
|
use std::{error::Error, fmt::Display, io::BufRead};
|
||||||
|
|
||||||
use haku::{
|
use haku::{
|
||||||
bytecode::{Chunk, Defs},
|
ast::{dump::dump, Ast},
|
||||||
compiler::{compile_expr, Compiler, Source},
|
lexer::{lex, Lexer},
|
||||||
sexp::{parse_toplevel, Ast, Parser, SourceCode},
|
parser::{expr, Parser, ParserLimits},
|
||||||
system::System,
|
source::SourceCode,
|
||||||
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
|
token::Lexis,
|
||||||
vm::{Vm, VmLimits},
|
value::Value,
|
||||||
};
|
};
|
||||||
|
|
||||||
fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
|
fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
|
||||||
let mut system = System::new(1);
|
|
||||||
|
|
||||||
let ast = Ast::new(1024);
|
|
||||||
let code = SourceCode::unlimited_len(code);
|
let code = SourceCode::unlimited_len(code);
|
||||||
let mut parser = Parser::new(ast, code);
|
let mut lexer = Lexer::new(Lexis::new(1024), code);
|
||||||
let root = parse_toplevel(&mut parser);
|
lex(&mut lexer).expect("too many tokens");
|
||||||
let ast = parser.ast;
|
|
||||||
let src = Source {
|
|
||||||
code,
|
|
||||||
ast: &ast,
|
|
||||||
system: &system,
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut defs = Defs::new(256);
|
let mut parser = Parser::new(&lexer.lexis, &ParserLimits { max_events: 1024 });
|
||||||
let mut chunk = Chunk::new(65536).unwrap();
|
expr(&mut parser);
|
||||||
let mut compiler = Compiler::new(&mut defs, &mut chunk);
|
|
||||||
compile_expr(&mut compiler, &src, root)?;
|
|
||||||
let diagnostics = compiler.diagnostics;
|
|
||||||
let defs = compiler.defs;
|
|
||||||
println!("{chunk:?}");
|
|
||||||
|
|
||||||
for diagnostic in &diagnostics {
|
let mut ast = Ast::new(1024);
|
||||||
eprintln!(
|
let (root, _) = parser.into_ast(&mut ast).unwrap();
|
||||||
"{}..{}: {}",
|
|
||||||
diagnostic.span.start, diagnostic.span.end, diagnostic.message
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
if !diagnostics.is_empty() {
|
eprintln!("{}", dump(&ast, root, Some(code)));
|
||||||
return Err(Box::new(DiagnosticsEmitted));
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut vm = Vm::new(
|
Ok(Value::Nil)
|
||||||
defs,
|
|
||||||
&VmLimits {
|
|
||||||
stack_capacity: 256,
|
|
||||||
call_stack_capacity: 256,
|
|
||||||
ref_capacity: 256,
|
|
||||||
fuel: 32768,
|
|
||||||
memory: 1024,
|
|
||||||
},
|
|
||||||
);
|
|
||||||
let chunk_id = system.add_chunk(chunk)?;
|
|
||||||
let closure = vm.create_ref(Ref::Closure(Closure {
|
|
||||||
start: BytecodeLoc {
|
|
||||||
chunk_id,
|
|
||||||
offset: 0,
|
|
||||||
},
|
|
||||||
name: FunctionName::Anonymous,
|
|
||||||
param_count: 0,
|
|
||||||
captures: Vec::new(),
|
|
||||||
}))?;
|
|
||||||
Ok(vm.run(&system, closure)?)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
|
|
@ -2,18 +2,23 @@
|
||||||
|
|
||||||
extern crate alloc;
|
extern crate alloc;
|
||||||
|
|
||||||
use core::{alloc::Layout, slice};
|
use core::{alloc::Layout, num::Saturating, slice};
|
||||||
|
|
||||||
use alloc::{boxed::Box, vec::Vec};
|
use alloc::{boxed::Box, vec::Vec};
|
||||||
use haku::{
|
use haku::{
|
||||||
|
ast::Ast,
|
||||||
bytecode::{Chunk, Defs, DefsImage},
|
bytecode::{Chunk, Defs, DefsImage},
|
||||||
compiler::{compile_expr, CompileError, Compiler, Diagnostic, Source},
|
compiler::{compile_expr, CompileError, Compiler, Source},
|
||||||
|
diagnostic::Diagnostic,
|
||||||
|
lexer::{lex, Lexer},
|
||||||
|
parser::{self, Parser},
|
||||||
render::{
|
render::{
|
||||||
tiny_skia::{Pixmap, PremultipliedColorU8},
|
tiny_skia::{Pixmap, PremultipliedColorU8},
|
||||||
Renderer, RendererLimits,
|
Renderer, RendererLimits,
|
||||||
},
|
},
|
||||||
sexp::{parse_toplevel, Ast, Parser, SourceCode},
|
source::SourceCode,
|
||||||
system::{ChunkId, System, SystemImage},
|
system::{ChunkId, System, SystemImage},
|
||||||
|
token::Lexis,
|
||||||
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
|
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
|
||||||
vm::{Exception, Vm, VmImage, VmLimits},
|
vm::{Exception, Vm, VmImage, VmLimits},
|
||||||
};
|
};
|
||||||
|
@ -41,6 +46,8 @@ struct Limits {
|
||||||
max_source_code_len: usize,
|
max_source_code_len: usize,
|
||||||
max_chunks: usize,
|
max_chunks: usize,
|
||||||
max_defs: usize,
|
max_defs: usize,
|
||||||
|
max_tokens: usize,
|
||||||
|
max_parser_events: usize,
|
||||||
ast_capacity: usize,
|
ast_capacity: usize,
|
||||||
chunk_capacity: usize,
|
chunk_capacity: usize,
|
||||||
stack_capacity: usize,
|
stack_capacity: usize,
|
||||||
|
@ -58,6 +65,8 @@ impl Default for Limits {
|
||||||
max_source_code_len: 65536,
|
max_source_code_len: 65536,
|
||||||
max_chunks: 2,
|
max_chunks: 2,
|
||||||
max_defs: 256,
|
max_defs: 256,
|
||||||
|
max_tokens: 1024,
|
||||||
|
max_parser_events: 1024,
|
||||||
ast_capacity: 1024,
|
ast_capacity: 1024,
|
||||||
chunk_capacity: 65536,
|
chunk_capacity: 65536,
|
||||||
stack_capacity: 1024,
|
stack_capacity: 1024,
|
||||||
|
@ -101,6 +110,8 @@ macro_rules! limit_setter {
|
||||||
limit_setter!(max_source_code_len);
|
limit_setter!(max_source_code_len);
|
||||||
limit_setter!(max_chunks);
|
limit_setter!(max_chunks);
|
||||||
limit_setter!(max_defs);
|
limit_setter!(max_defs);
|
||||||
|
limit_setter!(max_tokens);
|
||||||
|
limit_setter!(max_parser_events);
|
||||||
limit_setter!(ast_capacity);
|
limit_setter!(ast_capacity);
|
||||||
limit_setter!(chunk_capacity);
|
limit_setter!(chunk_capacity);
|
||||||
limit_setter!(stack_capacity);
|
limit_setter!(stack_capacity);
|
||||||
|
@ -207,6 +218,8 @@ unsafe extern "C" fn haku_exception_message_len(instance: *const Instance) -> u3
|
||||||
enum StatusCode {
|
enum StatusCode {
|
||||||
Ok,
|
Ok,
|
||||||
SourceCodeTooLong,
|
SourceCodeTooLong,
|
||||||
|
TooManyTokens,
|
||||||
|
TooManyAstNodes,
|
||||||
ChunkTooBig,
|
ChunkTooBig,
|
||||||
DiagnosticsEmitted,
|
DiagnosticsEmitted,
|
||||||
TooManyChunks,
|
TooManyChunks,
|
||||||
|
@ -238,6 +251,8 @@ extern "C" fn haku_status_string(code: StatusCode) -> *const i8 {
|
||||||
match code {
|
match code {
|
||||||
StatusCode::Ok => c"ok",
|
StatusCode::Ok => c"ok",
|
||||||
StatusCode::SourceCodeTooLong => c"source code is too long",
|
StatusCode::SourceCodeTooLong => c"source code is too long",
|
||||||
|
StatusCode::TooManyTokens => c"source code has too many tokens",
|
||||||
|
StatusCode::TooManyAstNodes => c"source code has too many AST nodes",
|
||||||
StatusCode::ChunkTooBig => c"compiled bytecode is too large",
|
StatusCode::ChunkTooBig => c"compiled bytecode is too large",
|
||||||
StatusCode::DiagnosticsEmitted => c"diagnostics were emitted",
|
StatusCode::DiagnosticsEmitted => c"diagnostics were emitted",
|
||||||
StatusCode::TooManyChunks => c"too many registered bytecode chunks",
|
StatusCode::TooManyChunks => c"too many registered bytecode chunks",
|
||||||
|
@ -281,22 +296,22 @@ unsafe extern "C" fn haku_num_diagnostics(brush: *const Brush) -> u32 {
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
unsafe extern "C" fn haku_diagnostic_start(brush: *const Brush, index: u32) -> u32 {
|
unsafe extern "C" fn haku_diagnostic_start(brush: *const Brush, index: u32) -> u32 {
|
||||||
(*brush).diagnostics[index as usize].span.start as u32
|
(*brush).diagnostics[index as usize].span().start
|
||||||
}
|
}
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
unsafe extern "C" fn haku_diagnostic_end(brush: *const Brush, index: u32) -> u32 {
|
unsafe extern "C" fn haku_diagnostic_end(brush: *const Brush, index: u32) -> u32 {
|
||||||
(*brush).diagnostics[index as usize].span.end as u32
|
(*brush).diagnostics[index as usize].span().end
|
||||||
}
|
}
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
unsafe extern "C" fn haku_diagnostic_message(brush: *const Brush, index: u32) -> *const u8 {
|
unsafe extern "C" fn haku_diagnostic_message(brush: *const Brush, index: u32) -> *const u8 {
|
||||||
(*brush).diagnostics[index as usize].message.as_ptr()
|
(*brush).diagnostics[index as usize].message().as_ptr()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
unsafe extern "C" fn haku_diagnostic_message_len(brush: *const Brush, index: u32) -> u32 {
|
unsafe extern "C" fn haku_diagnostic_message_len(brush: *const Brush, index: u32) -> u32 {
|
||||||
(*brush).diagnostics[index as usize].message.len() as u32
|
(*brush).diagnostics[index as usize].message().len() as u32
|
||||||
}
|
}
|
||||||
|
|
||||||
#[no_mangle]
|
#[no_mangle]
|
||||||
|
@ -315,15 +330,27 @@ unsafe extern "C" fn haku_compile_brush(
|
||||||
|
|
||||||
let code = core::str::from_utf8(slice::from_raw_parts(code, code_len as usize))
|
let code = core::str::from_utf8(slice::from_raw_parts(code, code_len as usize))
|
||||||
.expect("invalid UTF-8");
|
.expect("invalid UTF-8");
|
||||||
let code = match SourceCode::limited_len(code, instance.limits.max_source_code_len) {
|
let Some(code) = SourceCode::limited_len(code, instance.limits.max_source_code_len as u32)
|
||||||
Some(code) => code,
|
else {
|
||||||
None => return StatusCode::SourceCodeTooLong,
|
return StatusCode::SourceCodeTooLong;
|
||||||
};
|
};
|
||||||
|
|
||||||
let ast = Ast::new(instance.limits.ast_capacity);
|
let mut lexer = Lexer::new(Lexis::new(instance.limits.max_tokens), code);
|
||||||
let mut parser = Parser::new(ast, code);
|
if lex(&mut lexer).is_err() {
|
||||||
let root = parse_toplevel(&mut parser);
|
return StatusCode::TooManyTokens;
|
||||||
let ast = parser.ast;
|
};
|
||||||
|
|
||||||
|
let mut ast = Ast::new(instance.limits.ast_capacity);
|
||||||
|
let mut parser = Parser::new(
|
||||||
|
&lexer.lexis,
|
||||||
|
&haku::parser::ParserLimits {
|
||||||
|
max_events: instance.limits.max_parser_events,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
parser::toplevel(&mut parser);
|
||||||
|
let Ok((root, mut parser_diagnostics)) = parser.into_ast(&mut ast) else {
|
||||||
|
return StatusCode::TooManyAstNodes;
|
||||||
|
};
|
||||||
|
|
||||||
let src = Source {
|
let src = Source {
|
||||||
code,
|
code,
|
||||||
|
@ -339,8 +366,11 @@ unsafe extern "C" fn haku_compile_brush(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if !compiler.diagnostics.is_empty() {
|
let mut diagnostics = lexer.diagnostics;
|
||||||
brush.diagnostics = compiler.diagnostics;
|
diagnostics.append(&mut parser_diagnostics);
|
||||||
|
diagnostics.append(&mut compiler.diagnostics);
|
||||||
|
if !diagnostics.is_empty() {
|
||||||
|
brush.diagnostics = diagnostics;
|
||||||
return StatusCode::DiagnosticsEmitted;
|
return StatusCode::DiagnosticsEmitted;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
125
crates/haku/src/ast.rs
Normal file
125
crates/haku/src/ast.rs
Normal file
|
@ -0,0 +1,125 @@
|
||||||
|
use core::{error::Error, fmt::Display};
|
||||||
|
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
|
||||||
|
use crate::source::Span;
|
||||||
|
|
||||||
|
pub mod dump;
|
||||||
|
pub mod walk;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct NodeId(u32);
|
||||||
|
|
||||||
|
impl NodeId {
|
||||||
|
pub const NIL: NodeId = NodeId(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum NodeKind {
|
||||||
|
Nil,
|
||||||
|
|
||||||
|
Token,
|
||||||
|
|
||||||
|
Ident,
|
||||||
|
Tag,
|
||||||
|
Number,
|
||||||
|
Color,
|
||||||
|
List,
|
||||||
|
|
||||||
|
Op,
|
||||||
|
Unary,
|
||||||
|
Binary,
|
||||||
|
Call,
|
||||||
|
ParenEmpty,
|
||||||
|
Paren,
|
||||||
|
Lambda,
|
||||||
|
Params,
|
||||||
|
Param,
|
||||||
|
If,
|
||||||
|
Let,
|
||||||
|
|
||||||
|
Toplevel,
|
||||||
|
|
||||||
|
Error,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub struct Node {
|
||||||
|
pub span: Span,
|
||||||
|
pub kind: NodeKind,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Ast {
|
||||||
|
kinds: Vec<NodeKind>,
|
||||||
|
spans: Vec<Span>,
|
||||||
|
children_spans: Vec<(u32, u32)>,
|
||||||
|
children: Vec<NodeId>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Ast {
|
||||||
|
pub fn new(capacity: usize) -> Self {
|
||||||
|
assert!(capacity >= 1, "there must be space for at least a nil node");
|
||||||
|
assert!(capacity <= u32::MAX as usize);
|
||||||
|
|
||||||
|
let mut ast = Self {
|
||||||
|
kinds: Vec::with_capacity(capacity),
|
||||||
|
spans: Vec::with_capacity(capacity),
|
||||||
|
children_spans: Vec::with_capacity(capacity),
|
||||||
|
children: Vec::new(),
|
||||||
|
};
|
||||||
|
|
||||||
|
ast.alloc(NodeKind::Nil, Span::new(0, 0)).unwrap();
|
||||||
|
|
||||||
|
ast
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn alloc(&mut self, kind: NodeKind, span: Span) -> Result<NodeId, NodeAllocError> {
|
||||||
|
if self.kinds.len() >= self.kinds.capacity() {
|
||||||
|
return Err(NodeAllocError);
|
||||||
|
}
|
||||||
|
|
||||||
|
let index = self.kinds.len() as u32;
|
||||||
|
self.kinds.push(kind);
|
||||||
|
self.spans.push(span);
|
||||||
|
self.children_spans.push((0, 0));
|
||||||
|
Ok(NodeId(index))
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: This never produces a NodeAllocError, because there can more or less only ever be as many children for
|
||||||
|
// nodes as there are nodes.
|
||||||
|
pub fn alloc_children(&mut self, for_node: NodeId, children: &[NodeId]) {
|
||||||
|
let start = self.children.len();
|
||||||
|
self.children.extend_from_slice(children);
|
||||||
|
let end = self.children.len();
|
||||||
|
self.children_spans[for_node.0 as usize] = (start as u32, end as u32);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn extend_span(&mut self, in_node: NodeId, end: u32) {
|
||||||
|
self.spans[in_node.0 as usize].end = end;
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn kind(&self, id: NodeId) -> NodeKind {
|
||||||
|
self.kinds[id.0 as usize]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn span(&self, id: NodeId) -> Span {
|
||||||
|
self.spans[id.0 as usize]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn children(&self, id: NodeId) -> &[NodeId] {
|
||||||
|
let (start, end) = self.children_spans[id.0 as usize];
|
||||||
|
&self.children[start as usize..end as usize]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
|
pub struct NodeAllocError;
|
||||||
|
|
||||||
|
impl Display for NodeAllocError {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||||
|
f.write_str("too many nodes")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for NodeAllocError {}
|
34
crates/haku/src/ast/dump.rs
Normal file
34
crates/haku/src/ast/dump.rs
Normal file
|
@ -0,0 +1,34 @@
|
||||||
|
use alloc::string::String;
|
||||||
|
use core::fmt::Write;
|
||||||
|
|
||||||
|
use crate::{ast::NodeKind, source::SourceCode};
|
||||||
|
|
||||||
|
use super::{Ast, NodeId};
|
||||||
|
|
||||||
|
pub fn dump(ast: &Ast, node: NodeId, code: Option<&SourceCode>) -> String {
|
||||||
|
let mut result = String::new();
|
||||||
|
|
||||||
|
fn rec(ast: &Ast, node: NodeId, code: Option<&SourceCode>, result: &mut String, depth: usize) {
|
||||||
|
for _ in 0..depth {
|
||||||
|
result.push_str(" ");
|
||||||
|
}
|
||||||
|
|
||||||
|
write!(result, "{:?} @ {:?}", ast.kind(node), ast.span(node)).unwrap();
|
||||||
|
if let Some(code) = code {
|
||||||
|
if ast.kind(node) == NodeKind::Token {
|
||||||
|
write!(result, " {:?}", ast.span(node).slice(code)).unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writeln!(result).unwrap();
|
||||||
|
for &child in ast.children(node) {
|
||||||
|
rec(ast, child, code, result, depth + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
rec(ast, node, code, &mut result, 0);
|
||||||
|
|
||||||
|
// Remove the trailing newline.
|
||||||
|
result.pop();
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
73
crates/haku/src/ast/walk.rs
Normal file
73
crates/haku/src/ast/walk.rs
Normal file
|
@ -0,0 +1,73 @@
|
||||||
|
use super::{Ast, NodeId, NodeKind};
|
||||||
|
|
||||||
|
impl Ast {
|
||||||
|
pub fn child(&self, parent: NodeId, kind: NodeKind) -> Option<NodeId> {
|
||||||
|
self.children(parent)
|
||||||
|
.iter()
|
||||||
|
.find(|&&child| self.kind(child) == kind)
|
||||||
|
.copied()
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn walk(&self, parent: NodeId) -> Walk<'_> {
|
||||||
|
Walk {
|
||||||
|
ast: self,
|
||||||
|
parent,
|
||||||
|
index: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// An iterator over a node's children, with convenience methods for accessing those children.
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub struct Walk<'a> {
|
||||||
|
ast: &'a Ast,
|
||||||
|
parent: NodeId,
|
||||||
|
index: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Walk<'a> {
|
||||||
|
/// Walk to the first non-Nil, non-Error, non-Token node.
|
||||||
|
pub fn node(&mut self) -> Option<NodeId> {
|
||||||
|
while let Some(id) = self.next() {
|
||||||
|
if !matches!(
|
||||||
|
self.ast.kind(id),
|
||||||
|
NodeKind::Nil | NodeKind::Token | NodeKind::Error
|
||||||
|
) {
|
||||||
|
return Some(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Walk to the next [`node`][`Self::node`] of the given kind.
|
||||||
|
pub fn node_of(&mut self, kind: NodeKind) -> Option<NodeId> {
|
||||||
|
while let Some(id) = self.node() {
|
||||||
|
if self.ast.kind(id) == kind {
|
||||||
|
return Some(id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Find the first node of the given kind. This does not advance the iterator.
|
||||||
|
pub fn get(&self, kind: NodeKind) -> Option<NodeId> {
|
||||||
|
self.clone().find(|&id| self.ast.kind(id) == kind)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Iterator for Walk<'a> {
|
||||||
|
type Item = NodeId;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
let children = self.ast.children(self.parent);
|
||||||
|
if self.index < children.len() {
|
||||||
|
let index = self.index;
|
||||||
|
self.index += 1;
|
||||||
|
Some(children[index])
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,6 +17,8 @@ pub enum Opcode {
|
||||||
// Duplicate existing values.
|
// Duplicate existing values.
|
||||||
/// Push a value relative to the bottom of the current stack window.
|
/// Push a value relative to the bottom of the current stack window.
|
||||||
Local, // (index: u8)
|
Local, // (index: u8)
|
||||||
|
/// Set the value of a value relative to the bottom of the current stack window.
|
||||||
|
SetLocal, // (index: u8)
|
||||||
/// Push a captured value.
|
/// Push a captured value.
|
||||||
Capture, // (index: u8)
|
Capture, // (index: u8)
|
||||||
/// Get the value of a definition.
|
/// Get the value of a definition.
|
||||||
|
@ -24,12 +26,8 @@ pub enum Opcode {
|
||||||
/// Set the value of a definition.
|
/// Set the value of a definition.
|
||||||
SetDef, // (index: u16)
|
SetDef, // (index: u16)
|
||||||
|
|
||||||
/// Drop `number` values from the stack.
|
|
||||||
/// <!-- OwO -->
|
|
||||||
DropLet, // (number: u8)
|
|
||||||
|
|
||||||
// Create literal functions.
|
// Create literal functions.
|
||||||
Function, // (params: u8, then: u16), at `then`: (capture_count: u8, captures: [(source: u8, index: u8); capture_count])
|
Function, // (params: u8, then: u16), at `then`: (local_count: u8, capture_count: u8, captures: [(source: u8, index: u8); capture_count])
|
||||||
|
|
||||||
// Control flow.
|
// Control flow.
|
||||||
Jump, // (offset: u16)
|
Jump, // (offset: u16)
|
||||||
|
|
|
@ -6,9 +6,11 @@ use core::{
|
||||||
use alloc::vec::Vec;
|
use alloc::vec::Vec;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
|
ast::{Ast, NodeId, NodeKind},
|
||||||
bytecode::{Chunk, DefError, Defs, EmitError, Opcode, CAPTURE_CAPTURE, CAPTURE_LOCAL},
|
bytecode::{Chunk, DefError, Defs, EmitError, Opcode, CAPTURE_CAPTURE, CAPTURE_LOCAL},
|
||||||
sexp::{Ast, NodeId, NodeKind, SourceCode, Span},
|
diagnostic::Diagnostic,
|
||||||
system::System,
|
source::SourceCode,
|
||||||
|
system::{System, SystemFnArity},
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct Source<'a> {
|
pub struct Source<'a> {
|
||||||
|
@ -17,12 +19,6 @@ pub struct Source<'a> {
|
||||||
pub system: &'a System,
|
pub system: &'a System,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
|
||||||
pub struct Diagnostic {
|
|
||||||
pub span: Span,
|
|
||||||
pub message: &'static str,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
struct Local<'a> {
|
struct Local<'a> {
|
||||||
name: &'a str,
|
name: &'a str,
|
||||||
|
@ -46,6 +42,11 @@ pub struct Compiler<'a, 'b> {
|
||||||
scopes: Vec<Scope<'a>>,
|
scopes: Vec<Scope<'a>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct ClosureSpec {
|
||||||
|
pub(crate) local_count: u8,
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a, 'b> Compiler<'a, 'b> {
|
impl<'a, 'b> Compiler<'a, 'b> {
|
||||||
pub fn new(defs: &'a mut Defs, chunk: &'b mut Chunk) -> Self {
|
pub fn new(defs: &'a mut Defs, chunk: &'b mut Chunk) -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
@ -59,18 +60,22 @@ impl<'a, 'b> Compiler<'a, 'b> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn diagnose(&mut self, diagnostic: Diagnostic) {
|
fn emit(&mut self, diagnostic: Diagnostic) {
|
||||||
if self.diagnostics.len() >= self.diagnostics.capacity() {
|
if self.diagnostics.len() < self.diagnostics.capacity() {
|
||||||
return;
|
self.diagnostics.push(diagnostic);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.diagnostics.len() == self.diagnostics.capacity() - 1 {
|
pub fn closure_spec(&self) -> ClosureSpec {
|
||||||
self.diagnostics.push(Diagnostic {
|
ClosureSpec {
|
||||||
span: Span::new(0, 0),
|
local_count: self
|
||||||
message: "too many diagnostics emitted, stopping", // hello clangd!
|
.scopes
|
||||||
})
|
.last()
|
||||||
} else {
|
.unwrap()
|
||||||
self.diagnostics.push(diagnostic);
|
.locals
|
||||||
|
.len()
|
||||||
|
.try_into()
|
||||||
|
.unwrap_or_default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -82,27 +87,51 @@ pub fn compile_expr<'a>(
|
||||||
src: &Source<'a>,
|
src: &Source<'a>,
|
||||||
node_id: NodeId,
|
node_id: NodeId,
|
||||||
) -> CompileResult {
|
) -> CompileResult {
|
||||||
let node = src.ast.get(node_id);
|
match src.ast.kind(node_id) {
|
||||||
match node.kind {
|
// The nil node is special, as it inhabits node ID 0.
|
||||||
NodeKind::Eof => unreachable!("eof node should never be emitted"),
|
NodeKind::Nil => {
|
||||||
|
unreachable!("Nil node should never be emitted (ParenEmpty is used for nil literals)")
|
||||||
|
}
|
||||||
|
// Tokens are trivia and should never be emitted---they're only useful for error reporting.
|
||||||
|
NodeKind::Token => unreachable!("Token node should never be emitted"),
|
||||||
|
// Op nodes are only used to provide a searching anchor for the operator in Unary and Binary.
|
||||||
|
NodeKind::Op => unreachable!("Op node should never be emitted"),
|
||||||
|
// Params nodes are only used to provide a searching anchor for Lambda parameters.
|
||||||
|
NodeKind::Params => unreachable!("Param node should never be emitted"),
|
||||||
|
// Param nodes are only used to provide a searching anchor for identifiers in Params nodes,
|
||||||
|
// as they may also contain commas and other trivia.
|
||||||
|
NodeKind::Param => unreachable!("Param node should never be emitted"),
|
||||||
|
|
||||||
|
NodeKind::Color => unsupported(c, src, node_id, "color literals are not implemented yet"),
|
||||||
|
|
||||||
NodeKind::Nil => compile_nil(c),
|
|
||||||
NodeKind::Ident => compile_ident(c, src, node_id),
|
NodeKind::Ident => compile_ident(c, src, node_id),
|
||||||
NodeKind::Number => compile_number(c, src, node_id),
|
NodeKind::Number => compile_number(c, src, node_id),
|
||||||
NodeKind::List(_, _) => compile_list(c, src, node_id),
|
NodeKind::Tag => compile_tag(c, src, node_id),
|
||||||
NodeKind::Toplevel(_) => compile_toplevel(c, src, node_id),
|
NodeKind::List => unsupported(c, src, node_id, "list literals are not implemented yet"),
|
||||||
|
|
||||||
NodeKind::Error(message) => {
|
NodeKind::Unary => compile_unary(c, src, node_id),
|
||||||
c.diagnose(Diagnostic {
|
NodeKind::Binary => compile_binary(c, src, node_id),
|
||||||
span: node.span,
|
NodeKind::Call => compile_call(c, src, node_id),
|
||||||
message,
|
NodeKind::Paren => compile_paren(c, src, node_id),
|
||||||
});
|
NodeKind::ParenEmpty => compile_nil(c),
|
||||||
Ok(())
|
NodeKind::Lambda => compile_lambda(c, src, node_id),
|
||||||
}
|
NodeKind::If => compile_if(c, src, node_id),
|
||||||
|
NodeKind::Let => compile_let(c, src, node_id),
|
||||||
|
|
||||||
|
NodeKind::Toplevel => compile_toplevel(c, src, node_id),
|
||||||
|
|
||||||
|
// Error nodes are ignored, because for each error node an appropriate parser
|
||||||
|
// diagnostic is emitted anyways.
|
||||||
|
NodeKind::Error => Ok(()),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn compile_nil(c: &mut Compiler<'_, '_>) -> CompileResult {
|
fn unsupported(c: &mut Compiler, src: &Source, node_id: NodeId, message: &str) -> CompileResult {
|
||||||
|
c.emit(Diagnostic::error(src.ast.span(node_id), message));
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compile_nil(c: &mut Compiler) -> CompileResult {
|
||||||
c.chunk.emit_opcode(Opcode::Nil)?;
|
c.chunk.emit_opcode(Opcode::Nil)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -144,13 +173,10 @@ fn find_variable(
|
||||||
}
|
}
|
||||||
|
|
||||||
fn compile_ident<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
fn compile_ident<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
||||||
let ident = src.ast.get(node_id);
|
let span = src.ast.span(node_id);
|
||||||
let name = ident.span.slice(src.code);
|
let name = span.slice(src.code);
|
||||||
|
|
||||||
match name {
|
match find_variable(c, name, c.scopes.len() - 1) {
|
||||||
"false" => _ = c.chunk.emit_opcode(Opcode::False)?,
|
|
||||||
"true" => _ = c.chunk.emit_opcode(Opcode::True)?,
|
|
||||||
_ => match find_variable(c, name, c.scopes.len() - 1) {
|
|
||||||
Ok(Some(Variable::Local(index))) => {
|
Ok(Some(Variable::Local(index))) => {
|
||||||
c.chunk.emit_opcode(Opcode::Local)?;
|
c.chunk.emit_opcode(Opcode::Local)?;
|
||||||
c.chunk.emit_u8(index)?;
|
c.chunk.emit_u8(index)?;
|
||||||
|
@ -164,28 +190,22 @@ fn compile_ident<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId
|
||||||
c.chunk.emit_opcode(Opcode::Def)?;
|
c.chunk.emit_opcode(Opcode::Def)?;
|
||||||
c.chunk.emit_u16(def_id.to_u16())?;
|
c.chunk.emit_u16(def_id.to_u16())?;
|
||||||
} else {
|
} else {
|
||||||
c.diagnose(Diagnostic {
|
c.emit(Diagnostic::error(span, "undefined variable"));
|
||||||
span: ident.span,
|
|
||||||
message: "undefined variable",
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Err(CaptureError) => {
|
Err(CaptureError) => {
|
||||||
c.diagnose(Diagnostic {
|
c.emit(Diagnostic::error(
|
||||||
span: ident.span,
|
span,
|
||||||
message: "too many variables captured from outer functions in this scope",
|
"too many variables captured from outer functions in this scope",
|
||||||
});
|
));
|
||||||
}
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
|
};
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn compile_number(c: &mut Compiler<'_, '_>, src: &Source<'_>, node_id: NodeId) -> CompileResult {
|
fn compile_number(c: &mut Compiler<'_, '_>, src: &Source<'_>, node_id: NodeId) -> CompileResult {
|
||||||
let node = src.ast.get(node_id);
|
let literal = src.ast.span(node_id).slice(src.code);
|
||||||
|
|
||||||
let literal = node.span.slice(src.code);
|
|
||||||
let float: f32 = literal
|
let float: f32 = literal
|
||||||
.parse()
|
.parse()
|
||||||
.expect("the parser should've gotten us a string parsable by the stdlib");
|
.expect("the parser should've gotten us a string parsable by the stdlib");
|
||||||
|
@ -196,48 +216,130 @@ fn compile_number(c: &mut Compiler<'_, '_>, src: &Source<'_>, node_id: NodeId) -
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn compile_list<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
fn compile_tag(c: &mut Compiler<'_, '_>, src: &Source, node_id: NodeId) -> CompileResult {
|
||||||
let NodeKind::List(function_id, args) = src.ast.get(node_id).kind else {
|
let tag = src.ast.span(node_id).slice(src.code);
|
||||||
unreachable!("compile_list expects a List");
|
|
||||||
};
|
|
||||||
|
|
||||||
let function = src.ast.get(function_id);
|
match tag {
|
||||||
let name = function.span.slice(src.code);
|
"False" => {
|
||||||
|
c.chunk.emit_opcode(Opcode::False)?;
|
||||||
if function.kind == NodeKind::Ident {
|
}
|
||||||
match name {
|
"True" => {
|
||||||
"fn" => return compile_fn(c, src, args),
|
c.chunk.emit_opcode(Opcode::True)?;
|
||||||
"if" => return compile_if(c, src, args),
|
}
|
||||||
"let" => return compile_let(c, src, args),
|
_ => {
|
||||||
_ => (),
|
c.emit(Diagnostic::error(src.ast.span(node_id), "uppercased identifiers are reserved for future use; please start your identifiers with a lowercase letter instead"));
|
||||||
};
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compile_unary<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
||||||
|
let mut walk = src.ast.walk(node_id);
|
||||||
|
let Some(op) = walk.node() else { return Ok(()) };
|
||||||
|
let Some(expr) = walk.node() else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
|
||||||
|
if src.ast.kind(op) != NodeKind::Op {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let name = src.ast.span(op).slice(src.code);
|
||||||
|
|
||||||
|
compile_expr(c, src, expr)?;
|
||||||
|
if let Some(index) = (src.system.resolve_fn)(SystemFnArity::Unary, name) {
|
||||||
|
let argument_count = 1;
|
||||||
|
c.chunk.emit_opcode(Opcode::System)?;
|
||||||
|
c.chunk.emit_u8(index)?;
|
||||||
|
c.chunk.emit_u8(argument_count)?;
|
||||||
|
} else {
|
||||||
|
c.emit(Diagnostic::error(
|
||||||
|
src.ast.span(op),
|
||||||
|
"this unary operator is currently unimplemented",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compile_binary<'a>(
|
||||||
|
c: &mut Compiler<'a, '_>,
|
||||||
|
src: &Source<'a>,
|
||||||
|
node_id: NodeId,
|
||||||
|
) -> CompileResult {
|
||||||
|
let mut walk = src.ast.walk(node_id);
|
||||||
|
let Some(left) = walk.node() else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
let Some(op) = walk.node() else { return Ok(()) };
|
||||||
|
let Some(right) = walk.node() else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
|
||||||
|
if src.ast.kind(op) != NodeKind::Op {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
let name = src.ast.span(op).slice(src.code);
|
||||||
|
|
||||||
|
if name == "=" {
|
||||||
|
c.emit(Diagnostic::error(
|
||||||
|
src.ast.span(op),
|
||||||
|
"defs `a = b` may only appear at the top level",
|
||||||
|
));
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
|
||||||
|
compile_expr(c, src, left)?;
|
||||||
|
compile_expr(c, src, right)?;
|
||||||
|
if let Some(index) = (src.system.resolve_fn)(SystemFnArity::Binary, name) {
|
||||||
|
let argument_count = 2;
|
||||||
|
c.chunk.emit_opcode(Opcode::System)?;
|
||||||
|
c.chunk.emit_u8(index)?;
|
||||||
|
c.chunk.emit_u8(argument_count)?;
|
||||||
|
} else {
|
||||||
|
c.emit(Diagnostic::error(
|
||||||
|
src.ast.span(op),
|
||||||
|
"this unary operator is currently unimplemented",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compile_call<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
||||||
|
let mut walk = src.ast.walk(node_id);
|
||||||
|
let Some(func) = walk.node() else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
let name = src.ast.span(func).slice(src.code);
|
||||||
|
|
||||||
let mut argument_count = 0;
|
let mut argument_count = 0;
|
||||||
let mut args = args;
|
while let Some(arg) = walk.node() {
|
||||||
while let NodeKind::List(head, tail) = src.ast.get(args).kind {
|
compile_expr(c, src, arg)?;
|
||||||
compile_expr(c, src, head)?;
|
|
||||||
argument_count += 1;
|
argument_count += 1;
|
||||||
args = tail;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let argument_count = u8::try_from(argument_count).unwrap_or_else(|_| {
|
let argument_count = u8::try_from(argument_count).unwrap_or_else(|_| {
|
||||||
c.diagnose(Diagnostic {
|
c.emit(Diagnostic::error(
|
||||||
span: src.ast.get(args).span,
|
src.ast.span(node_id),
|
||||||
message: "function call has too many arguments",
|
"function call has too many arguments",
|
||||||
});
|
));
|
||||||
0
|
0
|
||||||
});
|
});
|
||||||
|
|
||||||
if let (NodeKind::Ident, Some(index)) = (function.kind, (src.system.resolve_fn)(name)) {
|
if let (NodeKind::Ident, Some(index)) = (
|
||||||
|
src.ast.kind(func),
|
||||||
|
(src.system.resolve_fn)(SystemFnArity::Nary, name),
|
||||||
|
) {
|
||||||
c.chunk.emit_opcode(Opcode::System)?;
|
c.chunk.emit_opcode(Opcode::System)?;
|
||||||
c.chunk.emit_u8(index)?;
|
c.chunk.emit_u8(index)?;
|
||||||
c.chunk.emit_u8(argument_count)?;
|
c.chunk.emit_u8(argument_count)?;
|
||||||
} else {
|
} else {
|
||||||
// This is a bit of an oddity: we only emit the function expression _after_ the arguments,
|
// This is a bit of an oddity: we only emit the function expression _after_ the arguments,
|
||||||
// but since the language is effectless this doesn't matter in practice.
|
// but since the language is effectless this doesn't matter in practice.
|
||||||
// It makes for less code in the compiler and the VM.
|
// It makes for a bit less code in the VM, since there's no need to find the function
|
||||||
compile_expr(c, src, function_id)?;
|
// down the stack - it's always on top.
|
||||||
|
compile_expr(c, src, func)?;
|
||||||
c.chunk.emit_opcode(Opcode::Call)?;
|
c.chunk.emit_opcode(Opcode::Call)?;
|
||||||
c.chunk.emit_u8(argument_count)?;
|
c.chunk.emit_u8(argument_count)?;
|
||||||
}
|
}
|
||||||
|
@ -245,67 +347,28 @@ fn compile_list<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId)
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
struct WalkList {
|
fn compile_paren<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
||||||
current: NodeId,
|
let Some(inner) = src.ast.walk(node_id).node() else {
|
||||||
ok: bool,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl WalkList {
|
|
||||||
fn new(start: NodeId) -> Self {
|
|
||||||
Self {
|
|
||||||
current: start,
|
|
||||||
ok: true,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn expect_arg(
|
|
||||||
&mut self,
|
|
||||||
c: &mut Compiler<'_, '_>,
|
|
||||||
src: &Source<'_>,
|
|
||||||
message: &'static str,
|
|
||||||
) -> NodeId {
|
|
||||||
if !self.ok {
|
|
||||||
return NodeId::NIL;
|
|
||||||
}
|
|
||||||
|
|
||||||
if let NodeKind::List(expr, tail) = src.ast.get(self.current).kind {
|
|
||||||
self.current = tail;
|
|
||||||
expr
|
|
||||||
} else {
|
|
||||||
c.diagnose(Diagnostic {
|
|
||||||
span: src.ast.get(self.current).span,
|
|
||||||
message,
|
|
||||||
});
|
|
||||||
self.ok = false;
|
|
||||||
NodeId::NIL
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn expect_nil(&mut self, c: &mut Compiler<'_, '_>, src: &Source<'_>, message: &'static str) {
|
|
||||||
if src.ast.get(self.current).kind != NodeKind::Nil {
|
|
||||||
c.diagnose(Diagnostic {
|
|
||||||
span: src.ast.get(self.current).span,
|
|
||||||
message,
|
|
||||||
});
|
|
||||||
// NOTE: Don't set self.ok to false, since this is not a fatal error.
|
|
||||||
// The nodes returned previously are valid and therefore it's safe to operate on them.
|
|
||||||
// Just having extra arguments shouldn't inhibit emitting additional diagnostics in
|
|
||||||
// the expression.
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn compile_if<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> CompileResult {
|
|
||||||
let mut list = WalkList::new(args);
|
|
||||||
|
|
||||||
let condition = list.expect_arg(c, src, "missing `if` condition");
|
|
||||||
let if_true = list.expect_arg(c, src, "missing `if` true branch");
|
|
||||||
let if_false = list.expect_arg(c, src, "missing `if` false branch");
|
|
||||||
list.expect_nil(c, src, "extra arguments after `if` false branch");
|
|
||||||
|
|
||||||
if !list.ok {
|
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
};
|
||||||
|
|
||||||
|
compile_expr(c, src, inner)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn compile_if<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
||||||
|
let mut walk = src.ast.walk(node_id);
|
||||||
|
|
||||||
|
let Some(condition) = walk.node() else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
let Some(if_true) = walk.node() else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
let Some(if_false) = walk.node() else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
|
||||||
compile_expr(c, src, condition)?;
|
compile_expr(c, src, condition)?;
|
||||||
|
|
||||||
|
@ -328,113 +391,70 @@ fn compile_if<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> C
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn compile_let<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> CompileResult {
|
fn compile_let<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
||||||
let mut list = WalkList::new(args);
|
let mut walk = src.ast.walk(node_id);
|
||||||
|
|
||||||
let binding_list = list.expect_arg(c, src, "missing `let` binding list ((x 1) (y 2) ...)");
|
let Some(ident) = walk.node() else {
|
||||||
let expr = list.expect_arg(c, src, "missing expression to `let` names into");
|
|
||||||
list.expect_nil(c, src, "extra arguments after `let` expression");
|
|
||||||
|
|
||||||
if !list.ok {
|
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
};
|
||||||
|
let Some(expr) = walk.node() else {
|
||||||
// NOTE: Our `let` behaves like `let*` from Lisps.
|
return Ok(());
|
||||||
// This is because this is generally the more intuitive behaviour with how variable declarations
|
};
|
||||||
// work in traditional imperative languages.
|
let Some(then) = walk.node() else {
|
||||||
// We do not offer an alternative to Lisp `let` to be as minimal as possible.
|
return Ok(());
|
||||||
|
};
|
||||||
let mut current = binding_list;
|
|
||||||
let mut local_count: usize = 0;
|
|
||||||
while let NodeKind::List(head, tail) = src.ast.get(current).kind {
|
|
||||||
if !matches!(src.ast.get(head).kind, NodeKind::List(_, _)) {
|
|
||||||
c.diagnose(Diagnostic {
|
|
||||||
span: src.ast.get(head).span,
|
|
||||||
message: "`let` binding expected, like (x 1)",
|
|
||||||
});
|
|
||||||
current = tail;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut list = WalkList::new(head);
|
|
||||||
let ident = list.expect_arg(c, src, "binding name expected");
|
|
||||||
let value = list.expect_arg(c, src, "binding value expected");
|
|
||||||
list.expect_nil(c, src, "extra expressions after `let` binding value");
|
|
||||||
|
|
||||||
if src.ast.get(ident).kind != NodeKind::Ident {
|
|
||||||
c.diagnose(Diagnostic {
|
|
||||||
span: src.ast.get(ident).span,
|
|
||||||
message: "binding name must be an identifier",
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// NOTE: Compile expression _before_ putting the value into scope.
|
|
||||||
// This is so that the variable cannot refer to itself, as it is yet to be declared.
|
|
||||||
compile_expr(c, src, value)?;
|
|
||||||
|
|
||||||
let name = src.ast.get(ident).span.slice(src.code);
|
|
||||||
let scope = c.scopes.last_mut().unwrap();
|
|
||||||
if scope.locals.len() >= u8::MAX as usize {
|
|
||||||
c.diagnose(Diagnostic {
|
|
||||||
span: src.ast.get(ident).span,
|
|
||||||
message: "too many names bound in this function at a single time",
|
|
||||||
});
|
|
||||||
} else {
|
|
||||||
scope.locals.push(Local { name });
|
|
||||||
}
|
|
||||||
|
|
||||||
local_count += 1;
|
|
||||||
current = tail;
|
|
||||||
}
|
|
||||||
|
|
||||||
compile_expr(c, src, expr)?;
|
compile_expr(c, src, expr)?;
|
||||||
|
let name = src.ast.span(ident).slice(src.code);
|
||||||
let scope = c.scopes.last_mut().unwrap();
|
let scope = c.scopes.last_mut().unwrap();
|
||||||
scope
|
let index = if scope.locals.len() >= u8::MAX as usize {
|
||||||
.locals
|
c.emit(Diagnostic::error(
|
||||||
.resize_with(scope.locals.len() - local_count, || unreachable!());
|
src.ast.span(ident),
|
||||||
|
"too many names bound in this function at a single time",
|
||||||
|
));
|
||||||
|
|
||||||
// NOTE: If we reach more than 255 locals declared in our `let`, we should've gotten
|
// Don't emit the expression, because it will most likely contain errors due to this
|
||||||
// a diagnostic emitted in the `while` loop beforehand.
|
// `let` failing.
|
||||||
let local_count = u8::try_from(local_count).unwrap_or(0);
|
return Ok(());
|
||||||
c.chunk.emit_opcode(Opcode::DropLet)?;
|
} else {
|
||||||
c.chunk.emit_u8(local_count)?;
|
let index = scope.locals.len();
|
||||||
|
scope.locals.push(Local { name });
|
||||||
|
index as u8
|
||||||
|
};
|
||||||
|
c.chunk.emit_opcode(Opcode::SetLocal)?;
|
||||||
|
c.chunk.emit_u8(index)?;
|
||||||
|
|
||||||
|
compile_expr(c, src, then)?;
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn compile_fn<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> CompileResult {
|
fn compile_lambda<'a>(
|
||||||
let mut list = WalkList::new(args);
|
c: &mut Compiler<'a, '_>,
|
||||||
|
src: &Source<'a>,
|
||||||
let param_list = list.expect_arg(c, src, "missing function parameters");
|
node_id: NodeId,
|
||||||
let body = list.expect_arg(c, src, "missing function body");
|
) -> CompileResult {
|
||||||
list.expect_nil(c, src, "extra arguments after function body");
|
let mut walk = src.ast.walk(node_id);
|
||||||
|
let Some(params) = walk.node() else {
|
||||||
if !list.ok {
|
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
};
|
||||||
|
let Some(body) = walk.node() else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
|
||||||
let mut locals = Vec::new();
|
let mut locals = Vec::new();
|
||||||
let mut current = param_list;
|
let mut params_walk = src.ast.walk(params);
|
||||||
while let NodeKind::List(ident, tail) = src.ast.get(current).kind {
|
while let Some(param) = params_walk.node() {
|
||||||
if let NodeKind::Ident = src.ast.get(ident).kind {
|
|
||||||
locals.push(Local {
|
locals.push(Local {
|
||||||
name: src.ast.get(ident).span.slice(src.code),
|
name: src.ast.span(param).slice(src.code),
|
||||||
})
|
});
|
||||||
} else {
|
|
||||||
c.diagnose(Diagnostic {
|
|
||||||
span: src.ast.get(ident).span,
|
|
||||||
message: "function parameters must be identifiers",
|
|
||||||
})
|
|
||||||
}
|
|
||||||
current = tail;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let param_count = u8::try_from(locals.len()).unwrap_or_else(|_| {
|
let param_count = u8::try_from(locals.len()).unwrap_or_else(|_| {
|
||||||
c.diagnose(Diagnostic {
|
c.emit(Diagnostic::error(
|
||||||
span: src.ast.get(param_list).span,
|
src.ast.span(params),
|
||||||
message: "too many function parameters",
|
"too many function parameters",
|
||||||
});
|
));
|
||||||
0
|
0
|
||||||
});
|
});
|
||||||
|
|
||||||
|
@ -453,13 +473,21 @@ fn compile_fn<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> C
|
||||||
c.chunk.patch_u16(after_offset, after);
|
c.chunk.patch_u16(after_offset, after);
|
||||||
|
|
||||||
let scope = c.scopes.pop().unwrap();
|
let scope = c.scopes.pop().unwrap();
|
||||||
let capture_count = u8::try_from(scope.captures.len()).unwrap_or_else(|_| {
|
let local_count = u8::try_from(scope.locals.len()).unwrap_or_else(|_| {
|
||||||
c.diagnose(Diagnostic {
|
c.emit(Diagnostic::error(
|
||||||
span: src.ast.get(body).span,
|
src.ast.span(body),
|
||||||
message: "function refers to too many variables from the outer function",
|
"function contains too many local variables",
|
||||||
});
|
));
|
||||||
0
|
0
|
||||||
});
|
});
|
||||||
|
let capture_count = u8::try_from(scope.captures.len()).unwrap_or_else(|_| {
|
||||||
|
c.emit(Diagnostic::error(
|
||||||
|
src.ast.span(body),
|
||||||
|
"function refers to too many variables from its outer functions",
|
||||||
|
));
|
||||||
|
0
|
||||||
|
});
|
||||||
|
c.chunk.emit_u8(local_count)?;
|
||||||
c.chunk.emit_u8(capture_count)?;
|
c.chunk.emit_u8(capture_count)?;
|
||||||
for capture in scope.captures {
|
for capture in scope.captures {
|
||||||
match capture {
|
match capture {
|
||||||
|
@ -484,31 +512,27 @@ fn compile_toplevel<'a>(
|
||||||
src: &Source<'a>,
|
src: &Source<'a>,
|
||||||
node_id: NodeId,
|
node_id: NodeId,
|
||||||
) -> CompileResult {
|
) -> CompileResult {
|
||||||
let NodeKind::Toplevel(mut current) = src.ast.get(node_id).kind else {
|
def_prepass(c, src, node_id)?;
|
||||||
unreachable!("compile_toplevel expects a Toplevel");
|
|
||||||
};
|
|
||||||
|
|
||||||
def_prepass(c, src, current)?;
|
let mut walk = src.ast.walk(node_id);
|
||||||
|
let mut result_expr = None;
|
||||||
|
while let Some(toplevel_expr) = walk.node() {
|
||||||
|
if let Some(result_expr) = result_expr {
|
||||||
|
// TODO: This diagnostic should show you the expression after the result.
|
||||||
|
c.emit(Diagnostic::error(
|
||||||
|
src.ast.span(result_expr),
|
||||||
|
"the result value must be the last thing in the program",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
let mut had_result = false;
|
match compile_toplevel_expr(c, src, toplevel_expr)? {
|
||||||
while let NodeKind::List(expr, tail) = src.ast.get(current).kind {
|
|
||||||
match compile_toplevel_expr(c, src, expr)? {
|
|
||||||
ToplevelExpr::Def => (),
|
ToplevelExpr::Def => (),
|
||||||
ToplevelExpr::Result => had_result = true,
|
ToplevelExpr::Result if result_expr.is_none() => result_expr = Some(toplevel_expr),
|
||||||
|
ToplevelExpr::Result => (),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if had_result && src.ast.get(tail).kind != NodeKind::Nil {
|
if result_expr.is_none() {
|
||||||
c.diagnose(Diagnostic {
|
|
||||||
span: src.ast.get(tail).span,
|
|
||||||
message: "result value may not be followed by anything else",
|
|
||||||
});
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
current = tail;
|
|
||||||
}
|
|
||||||
|
|
||||||
if !had_result {
|
|
||||||
c.chunk.emit_opcode(Opcode::Nil)?;
|
c.chunk.emit_opcode(Opcode::Nil)?;
|
||||||
}
|
}
|
||||||
c.chunk.emit_opcode(Opcode::Return)?;
|
c.chunk.emit_opcode(Opcode::Return)?;
|
||||||
|
@ -516,38 +540,30 @@ fn compile_toplevel<'a>(
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn def_prepass<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
fn def_prepass<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, toplevel: NodeId) -> CompileResult {
|
||||||
|
let mut walk = src.ast.walk(toplevel);
|
||||||
|
|
||||||
// This is a bit of a pattern matching tapeworm, but Rust unfortunately doesn't have `if let`
|
// This is a bit of a pattern matching tapeworm, but Rust unfortunately doesn't have `if let`
|
||||||
// chains yet to make this more readable.
|
// chains yet to make this more readable.
|
||||||
let mut current = node_id;
|
while let Some(binary) = walk.node_of(NodeKind::Binary) {
|
||||||
while let NodeKind::List(expr, tail) = src.ast.get(current).kind {
|
let mut binary_walk = src.ast.walk(binary);
|
||||||
if let NodeKind::List(head_id, tail_id) = src.ast.get(expr).kind {
|
if let (Some(ident), Some(op)) = (binary_walk.node(), binary_walk.get(NodeKind::Op)) {
|
||||||
let head = src.ast.get(head_id);
|
if src.ast.span(op).slice(src.code) == "=" {
|
||||||
let name = head.span.slice(src.code);
|
let name = src.ast.span(ident).slice(src.code);
|
||||||
if head.kind == NodeKind::Ident && name == "def" {
|
|
||||||
if let NodeKind::List(ident_id, _) = src.ast.get(tail_id).kind {
|
|
||||||
let ident = src.ast.get(ident_id);
|
|
||||||
if ident.kind == NodeKind::Ident {
|
|
||||||
let name = ident.span.slice(src.code);
|
|
||||||
match c.defs.add(name) {
|
match c.defs.add(name) {
|
||||||
Ok(_) => (),
|
Ok(_) => (),
|
||||||
Err(DefError::Exists) => c.diagnose(Diagnostic {
|
Err(DefError::Exists) => c.emit(Diagnostic::error(
|
||||||
span: ident.span,
|
src.ast.span(ident),
|
||||||
message: "redefinitions of defs are not allowed",
|
"a def with this name already exists",
|
||||||
}),
|
)),
|
||||||
Err(DefError::OutOfSpace) => c.diagnose(Diagnostic {
|
Err(DefError::OutOfSpace) => {
|
||||||
span: ident.span,
|
c.emit(Diagnostic::error(src.ast.span(binary), "too many defs"))
|
||||||
message: "too many defs",
|
|
||||||
}),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
current = tail;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -562,14 +578,10 @@ fn compile_toplevel_expr<'a>(
|
||||||
src: &Source<'a>,
|
src: &Source<'a>,
|
||||||
node_id: NodeId,
|
node_id: NodeId,
|
||||||
) -> CompileResult<ToplevelExpr> {
|
) -> CompileResult<ToplevelExpr> {
|
||||||
let node = src.ast.get(node_id);
|
if src.ast.kind(node_id) == NodeKind::Binary {
|
||||||
|
if let Some(op) = src.ast.walk(node_id).get(NodeKind::Op) {
|
||||||
if let NodeKind::List(head_id, tail_id) = node.kind {
|
if src.ast.span(op).slice(src.code) == "=" {
|
||||||
let head = src.ast.get(head_id);
|
compile_def(c, src, node_id)?;
|
||||||
if head.kind == NodeKind::Ident {
|
|
||||||
let name = head.span.slice(src.code);
|
|
||||||
if name == "def" {
|
|
||||||
compile_def(c, src, tail_id)?;
|
|
||||||
return Ok(ToplevelExpr::Def);
|
return Ok(ToplevelExpr::Def);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -579,24 +591,32 @@ fn compile_toplevel_expr<'a>(
|
||||||
Ok(ToplevelExpr::Result)
|
Ok(ToplevelExpr::Result)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn compile_def<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> CompileResult {
|
fn compile_def<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
||||||
let mut list = WalkList::new(args);
|
let mut walk = src.ast.walk(node_id);
|
||||||
|
let Some(left) = walk.node() else {
|
||||||
let ident = list.expect_arg(c, src, "missing definition name");
|
|
||||||
let value = list.expect_arg(c, src, "missing definition value");
|
|
||||||
list.expect_nil(c, src, "extra arguments after definition");
|
|
||||||
|
|
||||||
if !list.ok {
|
|
||||||
return Ok(());
|
return Ok(());
|
||||||
|
};
|
||||||
|
let Some(_op) = walk.node() else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
let Some(right) = walk.node() else {
|
||||||
|
return Ok(());
|
||||||
|
};
|
||||||
|
|
||||||
|
if src.ast.kind(left) != NodeKind::Ident {
|
||||||
|
c.emit(Diagnostic::error(
|
||||||
|
src.ast.span(left),
|
||||||
|
"def name (identifier) expected",
|
||||||
|
));
|
||||||
}
|
}
|
||||||
|
|
||||||
let name = src.ast.get(ident).span.slice(src.code);
|
let name = src.ast.span(left).slice(src.code);
|
||||||
// NOTE: def_prepass collects all definitions beforehand.
|
// NOTE: def_prepass collects all definitions beforehand.
|
||||||
// In case a def ends up not existing, that means we ran out of space for defs - so emit a
|
// In case a def ends up not existing, that means we ran out of space for defs - so emit a
|
||||||
// zero def instead.
|
// zero def instead.
|
||||||
let def_id = c.defs.get(name).unwrap_or_default();
|
let def_id = c.defs.get(name).unwrap_or_default();
|
||||||
|
|
||||||
compile_expr(c, src, value)?;
|
compile_expr(c, src, right)?;
|
||||||
c.chunk.emit_opcode(Opcode::SetDef)?;
|
c.chunk.emit_opcode(Opcode::SetDef)?;
|
||||||
c.chunk.emit_u16(def_id.to_u16())?;
|
c.chunk.emit_u16(def_id.to_u16())?;
|
||||||
|
|
||||||
|
|
26
crates/haku/src/diagnostic.rs
Normal file
26
crates/haku/src/diagnostic.rs
Normal file
|
@ -0,0 +1,26 @@
|
||||||
|
use alloc::string::String;
|
||||||
|
|
||||||
|
use crate::source::Span;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Diagnostic {
|
||||||
|
span: Span,
|
||||||
|
message: String,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Diagnostic {
|
||||||
|
pub fn error(span: Span, message: impl Into<String>) -> Self {
|
||||||
|
Self {
|
||||||
|
span,
|
||||||
|
message: message.into(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn span(&self) -> Span {
|
||||||
|
self.span
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn message(&self) -> &str {
|
||||||
|
&self.message
|
||||||
|
}
|
||||||
|
}
|
237
crates/haku/src/lexer.rs
Normal file
237
crates/haku/src/lexer.rs
Normal file
|
@ -0,0 +1,237 @@
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
diagnostic::Diagnostic,
|
||||||
|
source::{SourceCode, Span},
|
||||||
|
token::{Lexis, TokenAllocError, TokenKind},
|
||||||
|
};
|
||||||
|
|
||||||
|
pub struct Lexer<'a> {
|
||||||
|
pub lexis: Lexis,
|
||||||
|
pub diagnostics: Vec<Diagnostic>,
|
||||||
|
input: &'a SourceCode,
|
||||||
|
position: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Lexer<'a> {
|
||||||
|
pub fn new(lexis: Lexis, input: &'a SourceCode) -> Self {
|
||||||
|
Self {
|
||||||
|
lexis,
|
||||||
|
diagnostics: Vec::new(),
|
||||||
|
input,
|
||||||
|
position: 0,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn current(&self) -> char {
|
||||||
|
self.input[self.position as usize..]
|
||||||
|
.chars()
|
||||||
|
.next()
|
||||||
|
.unwrap_or('\0')
|
||||||
|
}
|
||||||
|
|
||||||
|
fn advance(&mut self) {
|
||||||
|
self.position += self.current().len_utf8() as u32;
|
||||||
|
}
|
||||||
|
|
||||||
|
fn emit(&mut self, diagnostic: Diagnostic) {
|
||||||
|
if self.diagnostics.len() < self.diagnostics.capacity() {
|
||||||
|
self.diagnostics.push(diagnostic);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn one(l: &mut Lexer<'_>, kind: TokenKind) -> TokenKind {
|
||||||
|
l.advance();
|
||||||
|
kind
|
||||||
|
}
|
||||||
|
|
||||||
|
fn one_or_two(l: &mut Lexer<'_>, kind1: TokenKind, c2: char, kind2: TokenKind) -> TokenKind {
|
||||||
|
l.advance();
|
||||||
|
if l.current() == c2 {
|
||||||
|
l.advance();
|
||||||
|
kind2
|
||||||
|
} else {
|
||||||
|
kind1
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_ident_char(c: char) -> bool {
|
||||||
|
matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_')
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ident(l: &mut Lexer<'_>) -> TokenKind {
|
||||||
|
let start = l.position;
|
||||||
|
while is_ident_char(l.current()) {
|
||||||
|
l.advance();
|
||||||
|
}
|
||||||
|
let end = l.position;
|
||||||
|
|
||||||
|
match Span::new(start, end).slice(l.input) {
|
||||||
|
"_" => TokenKind::Underscore,
|
||||||
|
"and" => TokenKind::And,
|
||||||
|
"or" => TokenKind::Or,
|
||||||
|
"if" => TokenKind::If,
|
||||||
|
"else" => TokenKind::Else,
|
||||||
|
"let" => TokenKind::Let,
|
||||||
|
_ => TokenKind::Ident,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn tag(l: &mut Lexer<'_>) -> TokenKind {
|
||||||
|
while is_ident_char(l.current()) {
|
||||||
|
l.advance();
|
||||||
|
}
|
||||||
|
TokenKind::Tag
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: You shouldn't expect that the numbers produced by the lexer are parsable.
|
||||||
|
fn number(l: &mut Lexer<'_>) -> TokenKind {
|
||||||
|
while l.current().is_ascii_digit() {
|
||||||
|
l.advance();
|
||||||
|
}
|
||||||
|
|
||||||
|
if l.current() == '.' {
|
||||||
|
let dot = l.position;
|
||||||
|
l.advance();
|
||||||
|
if !l.current().is_ascii_digit() {
|
||||||
|
l.emit(Diagnostic::error(
|
||||||
|
Span::new(dot, l.position),
|
||||||
|
"there must be at least a single digit after the decimal point",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
while l.current().is_ascii_digit() {
|
||||||
|
l.advance();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TokenKind::Number
|
||||||
|
}
|
||||||
|
|
||||||
|
// NOTE: You shouldn't expect that the color literals produced by the lexer are parsable.
|
||||||
|
fn color(l: &mut Lexer<'_>) -> TokenKind {
|
||||||
|
let hash = l.position;
|
||||||
|
l.advance(); // #
|
||||||
|
|
||||||
|
if !l.current().is_ascii_hexdigit() {
|
||||||
|
l.emit(Diagnostic::error(
|
||||||
|
Span::new(hash, l.position),
|
||||||
|
"hex digits expected after `#` (color literal)",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
|
||||||
|
let start = l.position;
|
||||||
|
while l.current().is_ascii_hexdigit() {
|
||||||
|
l.advance();
|
||||||
|
}
|
||||||
|
let len = l.position - start;
|
||||||
|
|
||||||
|
if !matches!(len, 3 | 4 | 6 | 8) {
|
||||||
|
l.emit(Diagnostic::error(Span::new(hash, l.position), "incorrect number of digits in color literal (must be #RGB, #RGBA, #RRGGBB, or #RRGGBBAA)"));
|
||||||
|
}
|
||||||
|
|
||||||
|
TokenKind::Color
|
||||||
|
}
|
||||||
|
|
||||||
|
fn whitespace_and_comments(l: &mut Lexer<'_>) {
|
||||||
|
loop {
|
||||||
|
match l.current() {
|
||||||
|
'-' => {
|
||||||
|
let position = l.position;
|
||||||
|
l.advance();
|
||||||
|
if l.current() == '-' {
|
||||||
|
while l.current() != '\n' {
|
||||||
|
l.advance();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// An unfortunate little bit of backtracking here;
|
||||||
|
// This seems like the simplest possible solution though.
|
||||||
|
// We don't treat comments as a separate token to simplify the parsing phase,
|
||||||
|
// and because of this, handling this at the "real" token level would complicate
|
||||||
|
// things quite a bit.
|
||||||
|
l.position = position;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
' ' | '\r' | '\t' => l.advance(),
|
||||||
|
|
||||||
|
_ => break,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn newline(l: &mut Lexer<'_>) -> (TokenKind, Span) {
|
||||||
|
let start = l.position;
|
||||||
|
l.advance(); // skip the initial newline
|
||||||
|
let end = l.position;
|
||||||
|
|
||||||
|
// Skip additional newlines after this one, to only produce one token.
|
||||||
|
// These do not count into this newline's span though.
|
||||||
|
loop {
|
||||||
|
whitespace_and_comments(l);
|
||||||
|
if l.current() == '\n' {
|
||||||
|
l.advance();
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
(TokenKind::Newline, Span::new(start, end))
|
||||||
|
}
|
||||||
|
|
||||||
|
fn token(l: &mut Lexer<'_>) -> (TokenKind, Span) {
|
||||||
|
whitespace_and_comments(l);
|
||||||
|
|
||||||
|
let start = l.position;
|
||||||
|
let kind = match l.current() {
|
||||||
|
'\0' => TokenKind::Eof,
|
||||||
|
|
||||||
|
// NOTE: Order matters here. Numbers and tags take priority over identifers.
|
||||||
|
c if c.is_ascii_uppercase() => tag(l),
|
||||||
|
c if c.is_ascii_digit() => number(l),
|
||||||
|
c if is_ident_char(c) => ident(l),
|
||||||
|
|
||||||
|
'#' => color(l),
|
||||||
|
|
||||||
|
'+' => one(l, TokenKind::Plus),
|
||||||
|
'-' => one_or_two(l, TokenKind::Minus, '>', TokenKind::RArrow),
|
||||||
|
'*' => one(l, TokenKind::Star),
|
||||||
|
'/' => one(l, TokenKind::Slash),
|
||||||
|
'=' => one_or_two(l, TokenKind::Equal, '=', TokenKind::EqualEqual),
|
||||||
|
'!' => one_or_two(l, TokenKind::Not, '=', TokenKind::NotEqual),
|
||||||
|
'<' => one_or_two(l, TokenKind::Less, '=', TokenKind::LessEqual),
|
||||||
|
'>' => one_or_two(l, TokenKind::Greater, '=', TokenKind::GreaterEqual),
|
||||||
|
|
||||||
|
'\n' => return newline(l),
|
||||||
|
'(' => one(l, TokenKind::LParen),
|
||||||
|
')' => one(l, TokenKind::RParen),
|
||||||
|
'[' => one(l, TokenKind::LBrack),
|
||||||
|
']' => one(l, TokenKind::RBrack),
|
||||||
|
',' => one(l, TokenKind::Comma),
|
||||||
|
'\\' => one(l, TokenKind::Backslash),
|
||||||
|
|
||||||
|
_ => {
|
||||||
|
l.advance();
|
||||||
|
l.emit(Diagnostic::error(
|
||||||
|
Span::new(start, l.position),
|
||||||
|
"unexpected character",
|
||||||
|
));
|
||||||
|
TokenKind::Error
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let end = l.position;
|
||||||
|
(kind, Span::new(start, end))
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn lex(l: &mut Lexer<'_>) -> Result<(), TokenAllocError> {
|
||||||
|
loop {
|
||||||
|
let (kind, span) = token(l);
|
||||||
|
l.lexis.push(kind, span)?;
|
||||||
|
if kind == TokenKind::Eof {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
|
@ -2,10 +2,15 @@
|
||||||
|
|
||||||
extern crate alloc;
|
extern crate alloc;
|
||||||
|
|
||||||
|
pub mod ast;
|
||||||
pub mod bytecode;
|
pub mod bytecode;
|
||||||
pub mod compiler;
|
pub mod compiler;
|
||||||
|
pub mod diagnostic;
|
||||||
|
pub mod lexer;
|
||||||
|
pub mod parser;
|
||||||
pub mod render;
|
pub mod render;
|
||||||
pub mod sexp;
|
pub mod source;
|
||||||
pub mod system;
|
pub mod system;
|
||||||
|
pub mod token;
|
||||||
pub mod value;
|
pub mod value;
|
||||||
pub mod vm;
|
pub mod vm;
|
||||||
|
|
607
crates/haku/src/parser.rs
Normal file
607
crates/haku/src/parser.rs
Normal file
|
@ -0,0 +1,607 @@
|
||||||
|
use core::cell::Cell;
|
||||||
|
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
ast::{Ast, NodeAllocError, NodeId, NodeKind},
|
||||||
|
diagnostic::Diagnostic,
|
||||||
|
source::Span,
|
||||||
|
token::{Lexis, TokenKind, TokenKindSet},
|
||||||
|
};
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy)]
|
||||||
|
pub struct ParserLimits {
|
||||||
|
pub max_events: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct Parser<'a> {
|
||||||
|
tokens: &'a Lexis,
|
||||||
|
events: Vec<Event>,
|
||||||
|
position: u32,
|
||||||
|
fuel: Cell<u32>,
|
||||||
|
pub diagnostics: Vec<Diagnostic>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum Event {
|
||||||
|
Open { kind: NodeKind },
|
||||||
|
Close,
|
||||||
|
Advance,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Open {
|
||||||
|
index: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
struct Closed {
|
||||||
|
index: Option<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Parser<'a> {
|
||||||
|
const FUEL: u32 = 256;
|
||||||
|
|
||||||
|
pub fn new(input: &'a Lexis, limits: &ParserLimits) -> Self {
|
||||||
|
assert!(limits.max_events < u32::MAX as usize);
|
||||||
|
|
||||||
|
Self {
|
||||||
|
tokens: input,
|
||||||
|
events: Vec::with_capacity(limits.max_events),
|
||||||
|
position: 0,
|
||||||
|
diagnostics: Vec::with_capacity(16),
|
||||||
|
fuel: Cell::new(Self::FUEL),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn event(&mut self, event: Event) -> Option<usize> {
|
||||||
|
if self.events.len() < self.events.capacity() {
|
||||||
|
let index = self.events.len();
|
||||||
|
self.events.push(event);
|
||||||
|
Some(index)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn open(&mut self) -> Open {
|
||||||
|
Open {
|
||||||
|
index: self.event(Event::Open {
|
||||||
|
kind: NodeKind::Error,
|
||||||
|
}),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn open_before(&mut self, closed: Closed) -> Open {
|
||||||
|
if let Some(index) = closed.index {
|
||||||
|
if self.events.len() < self.events.capacity() {
|
||||||
|
self.events.insert(
|
||||||
|
index,
|
||||||
|
Event::Open {
|
||||||
|
kind: NodeKind::Error,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
return Open { index: Some(index) };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Open { index: None }
|
||||||
|
}
|
||||||
|
|
||||||
|
fn close(&mut self, open: Open, kind: NodeKind) -> Closed {
|
||||||
|
if let Some(index) = open.index {
|
||||||
|
self.events[index] = Event::Open { kind };
|
||||||
|
self.event(Event::Close);
|
||||||
|
Closed { index: Some(index) }
|
||||||
|
} else {
|
||||||
|
Closed { index: None }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_eof(&self) -> bool {
|
||||||
|
self.peek() == TokenKind::Eof
|
||||||
|
}
|
||||||
|
|
||||||
|
fn advance(&mut self) {
|
||||||
|
if !self.is_eof() {
|
||||||
|
self.position += 1;
|
||||||
|
self.event(Event::Advance);
|
||||||
|
self.fuel.set(Self::FUEL);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[track_caller]
|
||||||
|
fn peek(&self) -> TokenKind {
|
||||||
|
assert_ne!(self.fuel.get(), 0, "parser is stuck");
|
||||||
|
self.fuel.set(self.fuel.get() - 1);
|
||||||
|
|
||||||
|
self.tokens.kind(self.position)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn span(&self) -> Span {
|
||||||
|
self.tokens.span(self.position)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn emit(&mut self, diagnostic: Diagnostic) {
|
||||||
|
if self.diagnostics.len() < self.diagnostics.capacity() {
|
||||||
|
self.diagnostics.push(diagnostic);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn advance_with_error(&mut self) -> Closed {
|
||||||
|
let opened = self.open();
|
||||||
|
self.advance();
|
||||||
|
self.close(opened, NodeKind::Error)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn optional_newline(&mut self) -> bool {
|
||||||
|
if self.peek() == TokenKind::Newline {
|
||||||
|
self.advance();
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
false
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_ast(self, ast: &mut Ast) -> Result<(NodeId, Vec<Diagnostic>), NodeAllocError> {
|
||||||
|
let mut token = 0;
|
||||||
|
let mut events = self.events;
|
||||||
|
let mut stack = Vec::new();
|
||||||
|
|
||||||
|
struct StackEntry {
|
||||||
|
node_id: NodeId,
|
||||||
|
// TODO: This should probably be optimized to use a shared stack.
|
||||||
|
children: Vec<NodeId>,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove the last Close to keep a single node on the stack.
|
||||||
|
assert!(matches!(events.pop(), Some(Event::Close)));
|
||||||
|
|
||||||
|
for event in events {
|
||||||
|
match event {
|
||||||
|
Event::Open { kind } => {
|
||||||
|
stack.push(StackEntry {
|
||||||
|
node_id: ast.alloc(kind, self.tokens.span(token))?,
|
||||||
|
children: Vec::new(),
|
||||||
|
});
|
||||||
|
}
|
||||||
|
Event::Close => {
|
||||||
|
let end_span = self.tokens.span(token.saturating_sub(1));
|
||||||
|
let stack_entry = stack.pop().unwrap();
|
||||||
|
ast.alloc_children(stack_entry.node_id, &stack_entry.children);
|
||||||
|
ast.extend_span(stack_entry.node_id, end_span.end);
|
||||||
|
stack.last_mut().unwrap().children.push(stack_entry.node_id);
|
||||||
|
}
|
||||||
|
Event::Advance => {
|
||||||
|
let span = self.tokens.span(token);
|
||||||
|
let node_id = ast.alloc(NodeKind::Token, span)?;
|
||||||
|
stack
|
||||||
|
.last_mut()
|
||||||
|
.expect("advance() may only be used in an open node")
|
||||||
|
.children
|
||||||
|
.push(node_id);
|
||||||
|
token += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if stack.len() != 1 {
|
||||||
|
// This means we had too many events emitted and they are no longer balanced.
|
||||||
|
return Err(NodeAllocError);
|
||||||
|
}
|
||||||
|
// assert_eq!(token, self.tokens.len());
|
||||||
|
|
||||||
|
let end_span = self.tokens.span(token.saturating_sub(1));
|
||||||
|
let stack_entry = stack.pop().unwrap();
|
||||||
|
ast.alloc_children(stack_entry.node_id, &stack_entry.children);
|
||||||
|
ast.extend_span(stack_entry.node_id, end_span.end);
|
||||||
|
|
||||||
|
Ok((stack_entry.node_id, self.diagnostics))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> core::fmt::Debug for Parser<'a> {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||||
|
f.debug_struct("Parser")
|
||||||
|
.field("events", &self.events)
|
||||||
|
.finish_non_exhaustive()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
enum Tighter {
|
||||||
|
Left,
|
||||||
|
Right,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn tighter(left: TokenKind, right: TokenKind) -> Tighter {
|
||||||
|
fn tightness(kind: TokenKind) -> Option<usize> {
|
||||||
|
match kind {
|
||||||
|
TokenKind::Equal => Some(0),
|
||||||
|
TokenKind::EqualEqual
|
||||||
|
| TokenKind::NotEqual
|
||||||
|
| TokenKind::Less
|
||||||
|
| TokenKind::LessEqual
|
||||||
|
| TokenKind::Greater
|
||||||
|
| TokenKind::GreaterEqual => Some(1),
|
||||||
|
TokenKind::Plus | TokenKind::Minus => Some(2),
|
||||||
|
TokenKind::Star | TokenKind::Slash => Some(3),
|
||||||
|
_ if PREFIX_TOKENS.contains(kind) => Some(4),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let Some(right_tightness) = tightness(right) else {
|
||||||
|
return Tighter::Left;
|
||||||
|
};
|
||||||
|
let Some(left_tightness) = tightness(left) else {
|
||||||
|
assert!(left == TokenKind::Eof);
|
||||||
|
return Tighter::Right;
|
||||||
|
};
|
||||||
|
|
||||||
|
if right_tightness > left_tightness {
|
||||||
|
Tighter::Right
|
||||||
|
} else {
|
||||||
|
Tighter::Left
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn precedence_parse(p: &mut Parser, left: TokenKind) {
|
||||||
|
let mut lhs = prefix(p);
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let right = p.peek();
|
||||||
|
match tighter(left, right) {
|
||||||
|
Tighter::Left => break,
|
||||||
|
Tighter::Right => {
|
||||||
|
let o = p.open_before(lhs);
|
||||||
|
let kind = infix(p, right);
|
||||||
|
lhs = p.close(o, kind);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn one(p: &mut Parser, kind: NodeKind) -> Closed {
|
||||||
|
let o = p.open();
|
||||||
|
p.advance();
|
||||||
|
p.close(o, kind)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn list(p: &mut Parser) -> Closed {
|
||||||
|
let o = p.open();
|
||||||
|
let lspan = p.span();
|
||||||
|
p.advance(); // [
|
||||||
|
p.optional_newline();
|
||||||
|
|
||||||
|
loop {
|
||||||
|
match p.peek() {
|
||||||
|
TokenKind::Eof => {
|
||||||
|
p.emit(Diagnostic::error(lspan, "missing `]` to close this list"));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
TokenKind::RBrack => {
|
||||||
|
p.advance();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => (),
|
||||||
|
}
|
||||||
|
|
||||||
|
expr(p);
|
||||||
|
|
||||||
|
match p.peek() {
|
||||||
|
TokenKind::Comma | TokenKind::Newline => {
|
||||||
|
p.advance();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
TokenKind::RBrack => {
|
||||||
|
p.advance();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
_ => {
|
||||||
|
let span = p.span();
|
||||||
|
p.emit(Diagnostic::error(
|
||||||
|
span,
|
||||||
|
"comma `,` or new line expected after list element",
|
||||||
|
));
|
||||||
|
p.advance_with_error();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
p.close(o, NodeKind::List)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn unary(p: &mut Parser) -> Closed {
|
||||||
|
let o = p.open();
|
||||||
|
|
||||||
|
let op = p.open();
|
||||||
|
p.advance();
|
||||||
|
p.close(op, NodeKind::Op);
|
||||||
|
|
||||||
|
prefix(p);
|
||||||
|
|
||||||
|
p.close(o, NodeKind::Unary)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn paren(p: &mut Parser) -> Closed {
|
||||||
|
let o = p.open();
|
||||||
|
let lspan = p.span();
|
||||||
|
p.advance(); // (
|
||||||
|
if p.peek() == TokenKind::RParen {
|
||||||
|
p.advance(); // )
|
||||||
|
p.close(o, NodeKind::ParenEmpty)
|
||||||
|
} else {
|
||||||
|
p.optional_newline();
|
||||||
|
expr(p);
|
||||||
|
p.optional_newline();
|
||||||
|
if p.peek() != TokenKind::RParen {
|
||||||
|
p.emit(Diagnostic::error(lspan, "missing closing parenthesis `)`"));
|
||||||
|
p.advance_with_error()
|
||||||
|
} else {
|
||||||
|
p.advance();
|
||||||
|
p.close(o, NodeKind::Paren)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn param(p: &mut Parser) {
|
||||||
|
let o = p.open();
|
||||||
|
|
||||||
|
if let TokenKind::Ident | TokenKind::Underscore = p.peek() {
|
||||||
|
p.advance();
|
||||||
|
} else {
|
||||||
|
let span = p.span();
|
||||||
|
p.emit(Diagnostic::error(
|
||||||
|
span,
|
||||||
|
"parameter names must be identifiers or `_`",
|
||||||
|
));
|
||||||
|
p.advance_with_error();
|
||||||
|
}
|
||||||
|
|
||||||
|
p.close(o, NodeKind::Param);
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lambda(p: &mut Parser) -> Closed {
|
||||||
|
let o = p.open();
|
||||||
|
p.advance(); // backslash
|
||||||
|
|
||||||
|
let params = p.open();
|
||||||
|
loop {
|
||||||
|
param(p);
|
||||||
|
match p.peek() {
|
||||||
|
TokenKind::Comma => {
|
||||||
|
p.advance();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
TokenKind::RArrow => break,
|
||||||
|
|
||||||
|
_ => {
|
||||||
|
let span = p.span();
|
||||||
|
p.emit(Diagnostic::error(
|
||||||
|
span,
|
||||||
|
"`,` or `->` expected after function parameter",
|
||||||
|
));
|
||||||
|
p.advance_with_error();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p.close(params, NodeKind::Params);
|
||||||
|
|
||||||
|
// NOTE: Can be false if there are some stray tokens.
|
||||||
|
// We prefer to bail early and let the rest of the program parse.
|
||||||
|
if p.peek() == TokenKind::RArrow {
|
||||||
|
p.advance();
|
||||||
|
p.optional_newline();
|
||||||
|
expr(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
p.close(o, NodeKind::Lambda)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn if_expr(p: &mut Parser) -> Closed {
|
||||||
|
let o = p.open();
|
||||||
|
|
||||||
|
p.advance(); // if
|
||||||
|
if p.peek() != TokenKind::LParen {
|
||||||
|
let span = p.span();
|
||||||
|
p.emit(Diagnostic::error(
|
||||||
|
span,
|
||||||
|
"the condition in an `if` expression must be surrounded with parentheses",
|
||||||
|
));
|
||||||
|
// NOTE: Don't advance, it's more likely the programmer expected no parentheses to be needed.
|
||||||
|
}
|
||||||
|
p.advance();
|
||||||
|
expr(p); // Condition
|
||||||
|
if p.peek() != TokenKind::RParen {
|
||||||
|
let span = p.span();
|
||||||
|
p.emit(Diagnostic::error(
|
||||||
|
span,
|
||||||
|
"missing closing parenthesis after `if` condition",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
p.advance();
|
||||||
|
p.optional_newline();
|
||||||
|
|
||||||
|
expr(p); // True branch
|
||||||
|
p.optional_newline();
|
||||||
|
|
||||||
|
if p.peek() != TokenKind::Else {
|
||||||
|
let span = p.span();
|
||||||
|
p.emit(Diagnostic::error(
|
||||||
|
span,
|
||||||
|
"`if` expression is missing an `else` clause",
|
||||||
|
));
|
||||||
|
}
|
||||||
|
p.advance();
|
||||||
|
p.optional_newline();
|
||||||
|
|
||||||
|
expr(p); // False branch
|
||||||
|
|
||||||
|
p.close(o, NodeKind::If)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn let_expr(p: &mut Parser) -> Closed {
|
||||||
|
let o = p.open();
|
||||||
|
|
||||||
|
p.advance(); // let
|
||||||
|
|
||||||
|
if p.peek() == TokenKind::Ident {
|
||||||
|
let ident = p.open();
|
||||||
|
p.advance();
|
||||||
|
p.close(ident, NodeKind::Ident);
|
||||||
|
} else {
|
||||||
|
let span = p.span();
|
||||||
|
p.emit(Diagnostic::error(span, "`let` variable name expected"));
|
||||||
|
p.advance_with_error();
|
||||||
|
}
|
||||||
|
|
||||||
|
if p.peek() == TokenKind::Equal {
|
||||||
|
p.advance();
|
||||||
|
} else {
|
||||||
|
let span = p.span();
|
||||||
|
p.emit(Diagnostic::error(span, "`=` expected after variable name"));
|
||||||
|
p.advance_with_error();
|
||||||
|
}
|
||||||
|
|
||||||
|
expr(p);
|
||||||
|
|
||||||
|
if p.peek() == TokenKind::Newline {
|
||||||
|
p.advance();
|
||||||
|
} else {
|
||||||
|
let span = p.span();
|
||||||
|
p.emit(Diagnostic::error(
|
||||||
|
span,
|
||||||
|
"new line expected after `let` expression",
|
||||||
|
));
|
||||||
|
p.advance_with_error();
|
||||||
|
}
|
||||||
|
|
||||||
|
expr(p);
|
||||||
|
|
||||||
|
p.close(o, NodeKind::Let)
|
||||||
|
}
|
||||||
|
|
||||||
|
const PREFIX_TOKENS: TokenKindSet = TokenKindSet::new(&[
|
||||||
|
TokenKind::Ident,
|
||||||
|
TokenKind::Tag,
|
||||||
|
TokenKind::Number,
|
||||||
|
TokenKind::Color,
|
||||||
|
// NOTE: This is ambiguous in function calls.
|
||||||
|
// In that case, the infix operator takes precedence (because the `match` arms for the infix op
|
||||||
|
// come first.)
|
||||||
|
TokenKind::Minus,
|
||||||
|
TokenKind::Not,
|
||||||
|
TokenKind::LParen,
|
||||||
|
TokenKind::Backslash,
|
||||||
|
TokenKind::If,
|
||||||
|
TokenKind::Let,
|
||||||
|
TokenKind::LBrack,
|
||||||
|
]);
|
||||||
|
|
||||||
|
fn prefix(p: &mut Parser) -> Closed {
|
||||||
|
match p.peek() {
|
||||||
|
TokenKind::Ident => one(p, NodeKind::Ident),
|
||||||
|
TokenKind::Tag => one(p, NodeKind::Tag),
|
||||||
|
TokenKind::Number => one(p, NodeKind::Number),
|
||||||
|
TokenKind::Color => one(p, NodeKind::Color),
|
||||||
|
TokenKind::LBrack => list(p),
|
||||||
|
|
||||||
|
TokenKind::Minus | TokenKind::Not => unary(p),
|
||||||
|
TokenKind::LParen => paren(p),
|
||||||
|
TokenKind::Backslash => lambda(p),
|
||||||
|
TokenKind::If => if_expr(p),
|
||||||
|
TokenKind::Let => let_expr(p),
|
||||||
|
|
||||||
|
_ => {
|
||||||
|
assert!(
|
||||||
|
!PREFIX_TOKENS.contains(p.peek()),
|
||||||
|
"{:?} found in PREFIX_TOKENS",
|
||||||
|
p.peek()
|
||||||
|
);
|
||||||
|
|
||||||
|
let span = p.span();
|
||||||
|
p.emit(Diagnostic::error(
|
||||||
|
span,
|
||||||
|
"an expression was expected, but this token does not start one",
|
||||||
|
));
|
||||||
|
p.advance_with_error()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn infix(p: &mut Parser, op: TokenKind) -> NodeKind {
|
||||||
|
match op {
|
||||||
|
TokenKind::Plus
|
||||||
|
| TokenKind::Minus
|
||||||
|
| TokenKind::Star
|
||||||
|
| TokenKind::Slash
|
||||||
|
| TokenKind::EqualEqual
|
||||||
|
| TokenKind::NotEqual
|
||||||
|
| TokenKind::Less
|
||||||
|
| TokenKind::LessEqual
|
||||||
|
| TokenKind::Greater
|
||||||
|
| TokenKind::GreaterEqual
|
||||||
|
| TokenKind::Equal => infix_binary(p, op),
|
||||||
|
|
||||||
|
_ if PREFIX_TOKENS.contains(op) => infix_call(p),
|
||||||
|
|
||||||
|
_ => panic!("unhandled infix operator {op:?}"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn infix_binary(p: &mut Parser, op: TokenKind) -> NodeKind {
|
||||||
|
let o = p.open();
|
||||||
|
p.advance();
|
||||||
|
p.close(o, NodeKind::Op);
|
||||||
|
|
||||||
|
if p.peek() == TokenKind::Newline {
|
||||||
|
p.advance();
|
||||||
|
}
|
||||||
|
|
||||||
|
precedence_parse(p, op);
|
||||||
|
NodeKind::Binary
|
||||||
|
}
|
||||||
|
|
||||||
|
fn infix_call(p: &mut Parser) -> NodeKind {
|
||||||
|
while PREFIX_TOKENS.contains(p.peek()) {
|
||||||
|
prefix(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
NodeKind::Call
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn expr(p: &mut Parser) {
|
||||||
|
precedence_parse(p, TokenKind::Eof)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn toplevel(p: &mut Parser) {
|
||||||
|
let o = p.open();
|
||||||
|
p.optional_newline();
|
||||||
|
while p.peek() != TokenKind::Eof {
|
||||||
|
expr(p);
|
||||||
|
|
||||||
|
match p.peek() {
|
||||||
|
TokenKind::Newline => {
|
||||||
|
p.advance();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
TokenKind::Eof => break,
|
||||||
|
|
||||||
|
_ => {
|
||||||
|
let span = p.span();
|
||||||
|
p.emit(Diagnostic::error(
|
||||||
|
span,
|
||||||
|
"newline expected after toplevel expression",
|
||||||
|
))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p.close(o, NodeKind::Toplevel);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests;
|
912
crates/haku/src/parser/tests.rs
Normal file
912
crates/haku/src/parser/tests.rs
Normal file
|
@ -0,0 +1,912 @@
|
||||||
|
use alloc::{format, string::String};
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
ast::{dump::dump, Ast, NodeId},
|
||||||
|
lexer::{lex, Lexer},
|
||||||
|
parser::expr,
|
||||||
|
source::SourceCode,
|
||||||
|
token::Lexis,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{toplevel, Parser, ParserLimits};
|
||||||
|
|
||||||
|
fn parse(s: &str, f: fn(&mut Parser)) -> (Ast, NodeId) {
|
||||||
|
let mut lexer = Lexer::new(Lexis::new(1024), SourceCode::unlimited_len(s));
|
||||||
|
lex(&mut lexer).expect("too many tokens");
|
||||||
|
|
||||||
|
let mut parser = Parser::new(&lexer.lexis, &ParserLimits { max_events: 1024 });
|
||||||
|
f(&mut parser);
|
||||||
|
|
||||||
|
if !parser.diagnostics.is_empty() {
|
||||||
|
panic!("parser emitted diagnostics: {:#?}", parser.diagnostics);
|
||||||
|
}
|
||||||
|
|
||||||
|
let mut ast = Ast::new(1024);
|
||||||
|
let (root, _) = parser.into_ast(&mut ast).unwrap();
|
||||||
|
(ast, root)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn ast(s: &str, f: fn(&mut Parser)) -> String {
|
||||||
|
let (ast, root) = parse(s, f);
|
||||||
|
// The extra newline is mostly so that it's easier to make the string literals look nice.
|
||||||
|
format!("\n{}", dump(&ast, root, None))
|
||||||
|
}
|
||||||
|
|
||||||
|
#[track_caller]
|
||||||
|
fn assert_ast_eq(s: &str, f: fn(&mut Parser), ast_s: &str) {
|
||||||
|
let got = ast(s, f);
|
||||||
|
if ast_s != got {
|
||||||
|
panic!("AST mismatch. expected:\n{ast_s}\n\ngot:\n{got}\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn one_literals() {
|
||||||
|
assert_ast_eq(
|
||||||
|
"1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"ExampleTag123",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Tag @ 0..13
|
||||||
|
Token @ 0..13",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"example_ident123",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Ident @ 0..16
|
||||||
|
Token @ 0..16",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"#000",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Color @ 0..4
|
||||||
|
Token @ 0..4",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"#000F",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Color @ 0..5
|
||||||
|
Token @ 0..5",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"#058EF0",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Color @ 0..7
|
||||||
|
Token @ 0..7",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"#058EF0FF",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Color @ 0..9
|
||||||
|
Token @ 0..9",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn list() {
|
||||||
|
assert_ast_eq(
|
||||||
|
"[]",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
List @ 0..2
|
||||||
|
Token @ 0..1
|
||||||
|
Token @ 1..2",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"[1]",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
List @ 0..3
|
||||||
|
Token @ 0..1
|
||||||
|
Number @ 1..2
|
||||||
|
Token @ 1..2
|
||||||
|
Token @ 2..3",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"[1, 2]",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
List @ 0..6
|
||||||
|
Token @ 0..1
|
||||||
|
Number @ 1..2
|
||||||
|
Token @ 1..2
|
||||||
|
Token @ 2..3
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5
|
||||||
|
Token @ 5..6",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"[
|
||||||
|
1
|
||||||
|
2
|
||||||
|
]",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
List @ 0..42
|
||||||
|
Token @ 0..1
|
||||||
|
Token @ 1..2
|
||||||
|
Number @ 15..16
|
||||||
|
Token @ 15..16
|
||||||
|
Token @ 16..17
|
||||||
|
Number @ 30..31
|
||||||
|
Token @ 30..31
|
||||||
|
Token @ 31..32
|
||||||
|
Token @ 41..42",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn unary() {
|
||||||
|
assert_ast_eq(
|
||||||
|
"-1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Unary @ 0..2
|
||||||
|
Op @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Number @ 1..2
|
||||||
|
Token @ 1..2",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"!1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Unary @ 0..2
|
||||||
|
Op @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Number @ 1..2
|
||||||
|
Token @ 1..2",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn binary_single() {
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 + 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..5
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 - 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..5
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 * 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..5
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 / 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..5
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 < 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..5
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 > 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..5
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 == 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..6
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..4
|
||||||
|
Token @ 2..4
|
||||||
|
Number @ 5..6
|
||||||
|
Token @ 5..6",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 != 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..6
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..4
|
||||||
|
Token @ 2..4
|
||||||
|
Number @ 5..6
|
||||||
|
Token @ 5..6",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 <= 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..6
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..4
|
||||||
|
Token @ 2..4
|
||||||
|
Number @ 5..6
|
||||||
|
Token @ 5..6",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 >= 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..6
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..4
|
||||||
|
Token @ 2..4
|
||||||
|
Number @ 5..6
|
||||||
|
Token @ 5..6",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 = 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..5
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn binary_precedence() {
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 + 1 + 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..9
|
||||||
|
Binary @ 0..5
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5
|
||||||
|
Op @ 6..7
|
||||||
|
Token @ 6..7
|
||||||
|
Number @ 8..9
|
||||||
|
Token @ 8..9",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 * 1 + 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..9
|
||||||
|
Binary @ 0..5
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5
|
||||||
|
Op @ 6..7
|
||||||
|
Token @ 6..7
|
||||||
|
Number @ 8..9
|
||||||
|
Token @ 8..9",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 + 1 * 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..9
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Binary @ 4..9
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5
|
||||||
|
Op @ 6..7
|
||||||
|
Token @ 6..7
|
||||||
|
Number @ 8..9
|
||||||
|
Token @ 8..9",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 < 1 + 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..9
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Binary @ 4..9
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5
|
||||||
|
Op @ 6..7
|
||||||
|
Token @ 6..7
|
||||||
|
Number @ 8..9
|
||||||
|
Token @ 8..9",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 + 1 < 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..9
|
||||||
|
Binary @ 0..5
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5
|
||||||
|
Op @ 6..7
|
||||||
|
Token @ 6..7
|
||||||
|
Number @ 8..9
|
||||||
|
Token @ 8..9",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 + 1 * 1 < 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..13
|
||||||
|
Binary @ 0..9
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Binary @ 4..9
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5
|
||||||
|
Op @ 6..7
|
||||||
|
Token @ 6..7
|
||||||
|
Number @ 8..9
|
||||||
|
Token @ 8..9
|
||||||
|
Op @ 10..11
|
||||||
|
Token @ 10..11
|
||||||
|
Number @ 12..13
|
||||||
|
Token @ 12..13",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 * 1 + 1 < 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..13
|
||||||
|
Binary @ 0..9
|
||||||
|
Binary @ 0..5
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5
|
||||||
|
Op @ 6..7
|
||||||
|
Token @ 6..7
|
||||||
|
Number @ 8..9
|
||||||
|
Token @ 8..9
|
||||||
|
Op @ 10..11
|
||||||
|
Token @ 10..11
|
||||||
|
Number @ 12..13
|
||||||
|
Token @ 12..13",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn binary_cont() {
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 +
|
||||||
|
1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..16
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Token @ 3..4
|
||||||
|
Number @ 15..16
|
||||||
|
Token @ 15..16",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 +
|
||||||
|
|
||||||
|
1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..17
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Token @ 3..4
|
||||||
|
Number @ 16..17
|
||||||
|
Token @ 16..17",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn paren_empty() {
|
||||||
|
assert_ast_eq(
|
||||||
|
"()",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
ParenEmpty @ 0..2
|
||||||
|
Token @ 0..1
|
||||||
|
Token @ 1..2",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn paren() {
|
||||||
|
assert_ast_eq(
|
||||||
|
"(1)",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Paren @ 0..3
|
||||||
|
Token @ 0..1
|
||||||
|
Number @ 1..2
|
||||||
|
Token @ 1..2
|
||||||
|
Token @ 2..3",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"(1 + 1) * 1",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..11
|
||||||
|
Paren @ 0..7
|
||||||
|
Token @ 0..1
|
||||||
|
Binary @ 1..6
|
||||||
|
Number @ 1..2
|
||||||
|
Token @ 1..2
|
||||||
|
Op @ 3..4
|
||||||
|
Token @ 3..4
|
||||||
|
Number @ 5..6
|
||||||
|
Token @ 5..6
|
||||||
|
Token @ 6..7
|
||||||
|
Op @ 8..9
|
||||||
|
Token @ 8..9
|
||||||
|
Number @ 10..11
|
||||||
|
Token @ 10..11",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"1 * (1 + 1)",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Binary @ 0..11
|
||||||
|
Number @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Paren @ 4..11
|
||||||
|
Token @ 4..5
|
||||||
|
Binary @ 5..10
|
||||||
|
Number @ 5..6
|
||||||
|
Token @ 5..6
|
||||||
|
Op @ 7..8
|
||||||
|
Token @ 7..8
|
||||||
|
Number @ 9..10
|
||||||
|
Token @ 9..10
|
||||||
|
Token @ 10..11",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"(
|
||||||
|
1 +
|
||||||
|
1
|
||||||
|
)",
|
||||||
|
expr,
|
||||||
|
"
|
||||||
|
Paren @ 0..47
|
||||||
|
Token @ 0..1
|
||||||
|
Token @ 1..2
|
||||||
|
Binary @ 15..33
|
||||||
|
Number @ 15..16
|
||||||
|
Token @ 15..16
|
||||||
|
Op @ 17..18
|
||||||
|
Token @ 17..18
|
||||||
|
Token @ 18..19
|
||||||
|
Number @ 32..33
|
||||||
|
Token @ 32..33
|
||||||
|
Token @ 36..37
|
||||||
|
Token @ 46..47",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn infix_call() {
|
||||||
|
assert_ast_eq(
|
||||||
|
"f x y",
|
||||||
|
toplevel,
|
||||||
|
"
|
||||||
|
Toplevel @ 0..5
|
||||||
|
Call @ 0..5
|
||||||
|
Ident @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Ident @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Ident @ 4..5
|
||||||
|
Token @ 4..5",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"sin 1 + cos 2",
|
||||||
|
toplevel,
|
||||||
|
"
|
||||||
|
Toplevel @ 0..13
|
||||||
|
Binary @ 0..13
|
||||||
|
Call @ 0..5
|
||||||
|
Ident @ 0..3
|
||||||
|
Token @ 0..3
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5
|
||||||
|
Op @ 6..7
|
||||||
|
Token @ 6..7
|
||||||
|
Call @ 8..13
|
||||||
|
Ident @ 8..11
|
||||||
|
Token @ 8..11
|
||||||
|
Number @ 12..13
|
||||||
|
Token @ 12..13",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn infix_call_unary_arg() {
|
||||||
|
assert_ast_eq(
|
||||||
|
// NOTE: The whitespace here is misleading.
|
||||||
|
// This is a binary `-`.
|
||||||
|
"f -1",
|
||||||
|
toplevel,
|
||||||
|
"
|
||||||
|
Toplevel @ 0..4
|
||||||
|
Binary @ 0..4
|
||||||
|
Ident @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Op @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Number @ 3..4
|
||||||
|
Token @ 3..4",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
"f (-1)",
|
||||||
|
toplevel,
|
||||||
|
"
|
||||||
|
Toplevel @ 0..6
|
||||||
|
Call @ 0..6
|
||||||
|
Ident @ 0..1
|
||||||
|
Token @ 0..1
|
||||||
|
Paren @ 2..6
|
||||||
|
Token @ 2..3
|
||||||
|
Unary @ 3..5
|
||||||
|
Op @ 3..4
|
||||||
|
Token @ 3..4
|
||||||
|
Number @ 4..5
|
||||||
|
Token @ 4..5
|
||||||
|
Token @ 5..6",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn lambda() {
|
||||||
|
assert_ast_eq(
|
||||||
|
r#" \_ -> () "#,
|
||||||
|
toplevel,
|
||||||
|
"
|
||||||
|
Toplevel @ 1..9
|
||||||
|
Lambda @ 1..9
|
||||||
|
Token @ 1..2
|
||||||
|
Params @ 2..3
|
||||||
|
Param @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Token @ 4..6
|
||||||
|
ParenEmpty @ 7..9
|
||||||
|
Token @ 7..8
|
||||||
|
Token @ 8..9",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
r#" \x -> x "#,
|
||||||
|
toplevel,
|
||||||
|
"
|
||||||
|
Toplevel @ 1..8
|
||||||
|
Lambda @ 1..8
|
||||||
|
Token @ 1..2
|
||||||
|
Params @ 2..3
|
||||||
|
Param @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Token @ 4..6
|
||||||
|
Ident @ 7..8
|
||||||
|
Token @ 7..8",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
r#" \x, y -> x + y "#,
|
||||||
|
toplevel,
|
||||||
|
"
|
||||||
|
Toplevel @ 1..15
|
||||||
|
Lambda @ 1..15
|
||||||
|
Token @ 1..2
|
||||||
|
Params @ 2..6
|
||||||
|
Param @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Token @ 3..4
|
||||||
|
Param @ 5..6
|
||||||
|
Token @ 5..6
|
||||||
|
Token @ 7..9
|
||||||
|
Binary @ 10..15
|
||||||
|
Ident @ 10..11
|
||||||
|
Token @ 10..11
|
||||||
|
Op @ 12..13
|
||||||
|
Token @ 12..13
|
||||||
|
Ident @ 14..15
|
||||||
|
Token @ 14..15",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
r#" \x, y ->
|
||||||
|
x + y "#,
|
||||||
|
toplevel,
|
||||||
|
"
|
||||||
|
Toplevel @ 1..29
|
||||||
|
Lambda @ 1..29
|
||||||
|
Token @ 1..2
|
||||||
|
Params @ 2..6
|
||||||
|
Param @ 2..3
|
||||||
|
Token @ 2..3
|
||||||
|
Token @ 3..4
|
||||||
|
Param @ 5..6
|
||||||
|
Token @ 5..6
|
||||||
|
Token @ 7..9
|
||||||
|
Token @ 9..10
|
||||||
|
Binary @ 24..29
|
||||||
|
Ident @ 24..25
|
||||||
|
Token @ 24..25
|
||||||
|
Op @ 26..27
|
||||||
|
Token @ 26..27
|
||||||
|
Ident @ 28..29
|
||||||
|
Token @ 28..29",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
r#" f \x -> g \y -> x + y "#,
|
||||||
|
toplevel,
|
||||||
|
"
|
||||||
|
Toplevel @ 1..22
|
||||||
|
Call @ 1..22
|
||||||
|
Ident @ 1..2
|
||||||
|
Token @ 1..2
|
||||||
|
Lambda @ 3..22
|
||||||
|
Token @ 3..4
|
||||||
|
Params @ 4..5
|
||||||
|
Param @ 4..5
|
||||||
|
Token @ 4..5
|
||||||
|
Token @ 6..8
|
||||||
|
Call @ 9..22
|
||||||
|
Ident @ 9..10
|
||||||
|
Token @ 9..10
|
||||||
|
Lambda @ 11..22
|
||||||
|
Token @ 11..12
|
||||||
|
Params @ 12..13
|
||||||
|
Param @ 12..13
|
||||||
|
Token @ 12..13
|
||||||
|
Token @ 14..16
|
||||||
|
Binary @ 17..22
|
||||||
|
Ident @ 17..18
|
||||||
|
Token @ 17..18
|
||||||
|
Op @ 19..20
|
||||||
|
Token @ 19..20
|
||||||
|
Ident @ 21..22
|
||||||
|
Token @ 21..22",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
r#" f \x ->
|
||||||
|
g \y ->
|
||||||
|
x + y "#,
|
||||||
|
toplevel,
|
||||||
|
"
|
||||||
|
Toplevel @ 1..48
|
||||||
|
Call @ 1..48
|
||||||
|
Ident @ 1..2
|
||||||
|
Token @ 1..2
|
||||||
|
Lambda @ 3..48
|
||||||
|
Token @ 3..4
|
||||||
|
Params @ 4..5
|
||||||
|
Param @ 4..5
|
||||||
|
Token @ 4..5
|
||||||
|
Token @ 6..8
|
||||||
|
Token @ 8..9
|
||||||
|
Call @ 21..48
|
||||||
|
Ident @ 21..22
|
||||||
|
Token @ 21..22
|
||||||
|
Lambda @ 23..48
|
||||||
|
Token @ 23..24
|
||||||
|
Params @ 24..25
|
||||||
|
Param @ 24..25
|
||||||
|
Token @ 24..25
|
||||||
|
Token @ 26..28
|
||||||
|
Token @ 28..29
|
||||||
|
Binary @ 43..48
|
||||||
|
Ident @ 43..44
|
||||||
|
Token @ 43..44
|
||||||
|
Op @ 45..46
|
||||||
|
Token @ 45..46
|
||||||
|
Ident @ 47..48
|
||||||
|
Token @ 47..48",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn if_expr() {
|
||||||
|
assert_ast_eq(
|
||||||
|
r#" if (true) 1 else 2 "#,
|
||||||
|
toplevel,
|
||||||
|
"
|
||||||
|
Toplevel @ 1..19
|
||||||
|
If @ 1..19
|
||||||
|
Token @ 1..3
|
||||||
|
Token @ 4..5
|
||||||
|
Ident @ 5..9
|
||||||
|
Token @ 5..9
|
||||||
|
Token @ 9..10
|
||||||
|
Number @ 11..12
|
||||||
|
Token @ 11..12
|
||||||
|
Token @ 13..17
|
||||||
|
Number @ 18..19
|
||||||
|
Token @ 18..19",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
r#" if (true)
|
||||||
|
1
|
||||||
|
else
|
||||||
|
2 "#,
|
||||||
|
toplevel,
|
||||||
|
"
|
||||||
|
Toplevel @ 1..63
|
||||||
|
If @ 1..63
|
||||||
|
Token @ 1..3
|
||||||
|
Token @ 4..5
|
||||||
|
Ident @ 5..9
|
||||||
|
Token @ 5..9
|
||||||
|
Token @ 9..10
|
||||||
|
Token @ 10..11
|
||||||
|
Number @ 27..28
|
||||||
|
Token @ 27..28
|
||||||
|
Token @ 28..29
|
||||||
|
Token @ 41..45
|
||||||
|
Token @ 45..46
|
||||||
|
Number @ 62..63
|
||||||
|
Token @ 62..63",
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn let_expr() {
|
||||||
|
assert_ast_eq(
|
||||||
|
r#" let x = 1
|
||||||
|
x "#,
|
||||||
|
toplevel,
|
||||||
|
"
|
||||||
|
Toplevel @ 1..24
|
||||||
|
Let @ 1..24
|
||||||
|
Token @ 1..4
|
||||||
|
Ident @ 5..6
|
||||||
|
Token @ 5..6
|
||||||
|
Token @ 7..8
|
||||||
|
Number @ 9..10
|
||||||
|
Token @ 9..10
|
||||||
|
Token @ 10..11
|
||||||
|
Ident @ 23..24
|
||||||
|
Token @ 23..24",
|
||||||
|
);
|
||||||
|
|
||||||
|
assert_ast_eq(
|
||||||
|
r#" let x = 1
|
||||||
|
let y = 2
|
||||||
|
x + y "#,
|
||||||
|
toplevel,
|
||||||
|
"
|
||||||
|
Toplevel @ 1..50
|
||||||
|
Let @ 1..50
|
||||||
|
Token @ 1..4
|
||||||
|
Ident @ 5..6
|
||||||
|
Token @ 5..6
|
||||||
|
Token @ 7..8
|
||||||
|
Number @ 9..10
|
||||||
|
Token @ 9..10
|
||||||
|
Token @ 10..11
|
||||||
|
Let @ 23..50
|
||||||
|
Token @ 23..26
|
||||||
|
Ident @ 27..28
|
||||||
|
Token @ 27..28
|
||||||
|
Token @ 29..30
|
||||||
|
Number @ 31..32
|
||||||
|
Token @ 31..32
|
||||||
|
Token @ 32..33
|
||||||
|
Binary @ 45..50
|
||||||
|
Ident @ 45..46
|
||||||
|
Token @ 45..46
|
||||||
|
Op @ 47..48
|
||||||
|
Token @ 47..48
|
||||||
|
Ident @ 49..50
|
||||||
|
Token @ 49..50",
|
||||||
|
)
|
||||||
|
}
|
|
@ -1,510 +0,0 @@
|
||||||
use core::{cell::Cell, fmt, ops::Deref};
|
|
||||||
|
|
||||||
use alloc::vec::Vec;
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
||||||
pub struct Span {
|
|
||||||
pub start: usize,
|
|
||||||
pub end: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Span {
|
|
||||||
pub fn new(start: usize, end: usize) -> Self {
|
|
||||||
Self { start, end }
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn slice<'a>(&self, source: &'a SourceCode) -> &'a str {
|
|
||||||
&source.code[self.start..self.end]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Source code string with a verified size limit.
|
|
||||||
/// An exact size limit is not enforced by this type - it only ensures the string isn't longer than
|
|
||||||
/// intended, to not stall the parser for an unexpected amount of time.
|
|
||||||
#[derive(Debug, PartialEq, Eq)]
|
|
||||||
#[repr(transparent)]
|
|
||||||
pub struct SourceCode {
|
|
||||||
code: str,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SourceCode {
|
|
||||||
pub fn limited_len(code: &str, max_len: usize) -> Option<&Self> {
|
|
||||||
if code.len() <= max_len {
|
|
||||||
Some(Self::unlimited_len(code))
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn unlimited_len(code: &str) -> &Self {
|
|
||||||
// SAFETY: SourceCode is a transparent wrapper around str, so converting between them is safe.
|
|
||||||
unsafe { core::mem::transmute(code) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Deref for SourceCode {
|
|
||||||
type Target = str;
|
|
||||||
|
|
||||||
fn deref(&self) -> &Self::Target {
|
|
||||||
&self.code
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
|
||||||
pub struct NodeId(usize);
|
|
||||||
|
|
||||||
impl NodeId {
|
|
||||||
pub const NIL: NodeId = NodeId(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
||||||
pub enum NodeKind {
|
|
||||||
Nil,
|
|
||||||
Eof,
|
|
||||||
|
|
||||||
// Atoms
|
|
||||||
Ident,
|
|
||||||
Number,
|
|
||||||
|
|
||||||
List(NodeId, NodeId),
|
|
||||||
Toplevel(NodeId),
|
|
||||||
|
|
||||||
Error(&'static str),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
||||||
pub struct Node {
|
|
||||||
pub span: Span,
|
|
||||||
pub kind: NodeKind,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
||||||
pub struct Ast {
|
|
||||||
pub nodes: Vec<Node>,
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
|
||||||
pub enum AstWriteMode {
|
|
||||||
Compact,
|
|
||||||
Spans,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Ast {
|
|
||||||
pub fn new(capacity: usize) -> Self {
|
|
||||||
assert!(capacity >= 1, "there must be space for at least a nil node");
|
|
||||||
|
|
||||||
let mut ast = Self {
|
|
||||||
nodes: Vec::with_capacity(capacity),
|
|
||||||
};
|
|
||||||
|
|
||||||
ast.alloc(Node {
|
|
||||||
span: Span::new(0, 0),
|
|
||||||
kind: NodeKind::Nil,
|
|
||||||
})
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
ast
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn alloc(&mut self, node: Node) -> Result<NodeId, NodeAllocError> {
|
|
||||||
if self.nodes.len() >= self.nodes.capacity() {
|
|
||||||
return Err(NodeAllocError);
|
|
||||||
}
|
|
||||||
|
|
||||||
let index = self.nodes.len();
|
|
||||||
self.nodes.push(node);
|
|
||||||
Ok(NodeId(index))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get(&self, node_id: NodeId) -> &Node {
|
|
||||||
&self.nodes[node_id.0]
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_mut(&mut self, node_id: NodeId) -> &mut Node {
|
|
||||||
&mut self.nodes[node_id.0]
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn write(
|
|
||||||
&self,
|
|
||||||
source: &SourceCode,
|
|
||||||
node_id: NodeId,
|
|
||||||
w: &mut dyn fmt::Write,
|
|
||||||
mode: AstWriteMode,
|
|
||||||
) -> fmt::Result {
|
|
||||||
#[allow(clippy::too_many_arguments)]
|
|
||||||
fn write_list(
|
|
||||||
ast: &Ast,
|
|
||||||
source: &SourceCode,
|
|
||||||
w: &mut dyn fmt::Write,
|
|
||||||
mode: AstWriteMode,
|
|
||||||
mut head: NodeId,
|
|
||||||
mut tail: NodeId,
|
|
||||||
sep_element: &str,
|
|
||||||
sep_tail: &str,
|
|
||||||
) -> fmt::Result {
|
|
||||||
loop {
|
|
||||||
write_rec(ast, source, w, mode, head)?;
|
|
||||||
match ast.get(tail).kind {
|
|
||||||
NodeKind::Nil => break,
|
|
||||||
NodeKind::List(head2, tail2) => {
|
|
||||||
w.write_str(sep_element)?;
|
|
||||||
(head, tail) = (head2, tail2);
|
|
||||||
}
|
|
||||||
_ => {
|
|
||||||
w.write_str(sep_tail)?;
|
|
||||||
write_rec(ast, source, w, mode, tail)?;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
// NOTE: Separated out to a separate function in case we ever want to introduce auto-indentation.
|
|
||||||
fn write_rec(
|
|
||||||
ast: &Ast,
|
|
||||||
source: &SourceCode,
|
|
||||||
w: &mut dyn fmt::Write,
|
|
||||||
mode: AstWriteMode,
|
|
||||||
node_id: NodeId,
|
|
||||||
) -> fmt::Result {
|
|
||||||
let node = ast.get(node_id);
|
|
||||||
match &node.kind {
|
|
||||||
NodeKind::Nil => write!(w, "()")?,
|
|
||||||
NodeKind::Eof => write!(w, "<eof>")?,
|
|
||||||
NodeKind::Ident | NodeKind::Number => write!(w, "{}", node.span.slice(source))?,
|
|
||||||
|
|
||||||
NodeKind::List(head, tail) => {
|
|
||||||
w.write_char('(')?;
|
|
||||||
write_list(ast, source, w, mode, *head, *tail, " ", " . ")?;
|
|
||||||
w.write_char(')')?;
|
|
||||||
}
|
|
||||||
|
|
||||||
NodeKind::Toplevel(list) => {
|
|
||||||
let NodeKind::List(head, tail) = ast.get(*list).kind else {
|
|
||||||
unreachable!("child of Toplevel must be a List");
|
|
||||||
};
|
|
||||||
|
|
||||||
write_list(ast, source, w, mode, head, tail, "\n", " . ")?;
|
|
||||||
}
|
|
||||||
|
|
||||||
NodeKind::Error(message) => write!(w, "#error({message})")?,
|
|
||||||
}
|
|
||||||
|
|
||||||
if mode == AstWriteMode::Spans {
|
|
||||||
write!(w, "@{}..{}", node.span.start, node.span.end)?;
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
write_rec(self, source, w, mode, node_id)?;
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
|
||||||
pub struct NodeAllocError;
|
|
||||||
|
|
||||||
pub struct Parser<'a> {
|
|
||||||
pub ast: Ast,
|
|
||||||
input: &'a SourceCode,
|
|
||||||
position: usize,
|
|
||||||
fuel: Cell<usize>,
|
|
||||||
alloc_error: NodeId,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> Parser<'a> {
|
|
||||||
const FUEL: usize = 256;
|
|
||||||
|
|
||||||
pub fn new(mut ast: Ast, input: &'a SourceCode) -> Self {
|
|
||||||
let alloc_error = ast
|
|
||||||
.alloc(Node {
|
|
||||||
span: Span::new(0, 0),
|
|
||||||
kind: NodeKind::Error("program is too big"),
|
|
||||||
})
|
|
||||||
.expect("there is not enough space in the arena for an error node");
|
|
||||||
|
|
||||||
Self {
|
|
||||||
ast,
|
|
||||||
input,
|
|
||||||
position: 0,
|
|
||||||
fuel: Cell::new(Self::FUEL),
|
|
||||||
alloc_error,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[track_caller]
|
|
||||||
pub fn current(&self) -> char {
|
|
||||||
assert_ne!(self.fuel.get(), 0, "parser is stuck");
|
|
||||||
self.fuel.set(self.fuel.get() - 1);
|
|
||||||
|
|
||||||
self.input[self.position..].chars().next().unwrap_or('\0')
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn advance(&mut self) {
|
|
||||||
self.position += self.current().len_utf8();
|
|
||||||
self.fuel.set(Self::FUEL);
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn alloc(&mut self, expr: Node) -> NodeId {
|
|
||||||
self.ast.alloc(expr).unwrap_or(self.alloc_error)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn skip_whitespace_and_comments(p: &mut Parser<'_>) {
|
|
||||||
loop {
|
|
||||||
match p.current() {
|
|
||||||
' ' | '\t' | '\n' => {
|
|
||||||
p.advance();
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
';' => {
|
|
||||||
while p.current() != '\n' && p.current() != '\0' {
|
|
||||||
p.advance();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
_ => break,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_decimal_digit(c: char) -> bool {
|
|
||||||
c.is_ascii_digit()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse_number(p: &mut Parser<'_>) -> NodeKind {
|
|
||||||
while is_decimal_digit(p.current()) {
|
|
||||||
p.advance();
|
|
||||||
}
|
|
||||||
if p.current() == '.' {
|
|
||||||
p.advance();
|
|
||||||
if !is_decimal_digit(p.current()) {
|
|
||||||
return NodeKind::Error("missing digits after decimal point '.' in number literal");
|
|
||||||
}
|
|
||||||
while is_decimal_digit(p.current()) {
|
|
||||||
p.advance();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
NodeKind::Number
|
|
||||||
}
|
|
||||||
|
|
||||||
fn is_ident(c: char) -> bool {
|
|
||||||
// The identifier character set is quite limited to help with easy expansion in the future.
|
|
||||||
// Rationale:
|
|
||||||
// - alphabet and digits are pretty obvious
|
|
||||||
// - '-' and '_' can be used for identifier separators, whichever you prefer.
|
|
||||||
// - '+', '-', '*', '/', '^' are for arithmetic.
|
|
||||||
// - '=', '!', '<', '>' are fore comparison.
|
|
||||||
// - '\' is for builtin string constants, such as \n.
|
|
||||||
// For other operators, it's generally clearer to use words (such as `and` and `or`.)
|
|
||||||
matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '+' | '*' | '/' | '\\' | '^' | '!' | '=' | '<' | '>')
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse_ident(p: &mut Parser<'_>) -> NodeKind {
|
|
||||||
while is_ident(p.current()) {
|
|
||||||
p.advance();
|
|
||||||
}
|
|
||||||
|
|
||||||
NodeKind::Ident
|
|
||||||
}
|
|
||||||
|
|
||||||
struct List {
|
|
||||||
head: NodeId,
|
|
||||||
tail: NodeId,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl List {
|
|
||||||
fn new() -> Self {
|
|
||||||
Self {
|
|
||||||
head: NodeId::NIL,
|
|
||||||
tail: NodeId::NIL,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fn append(&mut self, p: &mut Parser<'_>, node: NodeId) {
|
|
||||||
let node_span = p.ast.get(node).span;
|
|
||||||
|
|
||||||
let new_tail = p.alloc(Node {
|
|
||||||
span: node_span,
|
|
||||||
kind: NodeKind::List(node, NodeId::NIL),
|
|
||||||
});
|
|
||||||
if self.head == NodeId::NIL {
|
|
||||||
self.head = new_tail;
|
|
||||||
self.tail = new_tail;
|
|
||||||
} else {
|
|
||||||
let old_tail = p.ast.get_mut(self.tail);
|
|
||||||
let NodeKind::List(expr_before, _) = old_tail.kind else {
|
|
||||||
return;
|
|
||||||
};
|
|
||||||
*old_tail = Node {
|
|
||||||
span: Span::new(old_tail.span.start, node_span.end),
|
|
||||||
kind: NodeKind::List(expr_before, new_tail),
|
|
||||||
};
|
|
||||||
self.tail = new_tail;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse_list(p: &mut Parser<'_>) -> NodeId {
|
|
||||||
// This could've been a lot simpler if Rust supported tail recursion.
|
|
||||||
|
|
||||||
let start = p.position;
|
|
||||||
|
|
||||||
p.advance(); // skip past opening parenthesis
|
|
||||||
skip_whitespace_and_comments(p);
|
|
||||||
|
|
||||||
let mut list = List::new();
|
|
||||||
|
|
||||||
while p.current() != ')' {
|
|
||||||
if p.current() == '\0' {
|
|
||||||
return p.alloc(Node {
|
|
||||||
span: Span::new(start, p.position),
|
|
||||||
kind: NodeKind::Error("missing ')' to close '('"),
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
let expr = parse_expr(p);
|
|
||||||
skip_whitespace_and_comments(p);
|
|
||||||
|
|
||||||
list.append(p, expr);
|
|
||||||
}
|
|
||||||
p.advance(); // skip past closing parenthesis
|
|
||||||
|
|
||||||
// If we didn't have any elements, we must not modify the initial Nil with ID 0.
|
|
||||||
if list.head == NodeId::NIL {
|
|
||||||
list.head = p.alloc(Node {
|
|
||||||
span: Span::new(0, 0),
|
|
||||||
kind: NodeKind::Nil,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
let end = p.position;
|
|
||||||
p.ast.get_mut(list.head).span = Span::new(start, end);
|
|
||||||
|
|
||||||
list.head
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse_expr(p: &mut Parser<'_>) -> NodeId {
|
|
||||||
let start = p.position;
|
|
||||||
let kind = match p.current() {
|
|
||||||
'\0' => NodeKind::Eof,
|
|
||||||
c if is_decimal_digit(c) => parse_number(p),
|
|
||||||
// NOTE: Because of the `match` order, this prevents identifiers from starting with a digit.
|
|
||||||
c if is_ident(c) => parse_ident(p),
|
|
||||||
'(' => return parse_list(p),
|
|
||||||
_ => {
|
|
||||||
p.advance();
|
|
||||||
NodeKind::Error("unexpected character")
|
|
||||||
}
|
|
||||||
};
|
|
||||||
let end = p.position;
|
|
||||||
|
|
||||||
p.alloc(Node {
|
|
||||||
span: Span::new(start, end),
|
|
||||||
kind,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn parse_toplevel(p: &mut Parser<'_>) -> NodeId {
|
|
||||||
let start = p.position;
|
|
||||||
|
|
||||||
let mut nodes = List::new();
|
|
||||||
|
|
||||||
skip_whitespace_and_comments(p);
|
|
||||||
while p.current() != '\0' {
|
|
||||||
let expr = parse_expr(p);
|
|
||||||
skip_whitespace_and_comments(p);
|
|
||||||
|
|
||||||
nodes.append(p, expr);
|
|
||||||
}
|
|
||||||
|
|
||||||
let end = p.position;
|
|
||||||
|
|
||||||
p.alloc(Node {
|
|
||||||
span: Span::new(start, end),
|
|
||||||
kind: NodeKind::Toplevel(nodes.head),
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use core::error::Error;
|
|
||||||
|
|
||||||
use alloc::{boxed::Box, string::String};
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[track_caller]
|
|
||||||
fn parse(
|
|
||||||
f: fn(&mut Parser<'_>) -> NodeId,
|
|
||||||
source: &str,
|
|
||||||
expected: &str,
|
|
||||||
) -> Result<(), Box<dyn Error>> {
|
|
||||||
let ast = Ast::new(16);
|
|
||||||
let code = SourceCode::unlimited_len(source);
|
|
||||||
let mut p = Parser::new(ast, code);
|
|
||||||
let node = f(&mut p);
|
|
||||||
let ast = p.ast;
|
|
||||||
|
|
||||||
let mut s = String::new();
|
|
||||||
ast.write(code, node, &mut s, AstWriteMode::Spans)?;
|
|
||||||
|
|
||||||
assert_eq!(s, expected);
|
|
||||||
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_number() -> Result<(), Box<dyn Error>> {
|
|
||||||
parse(parse_expr, "123", "123@0..3")?;
|
|
||||||
parse(parse_expr, "123.456", "123.456@0..7")?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_ident() -> Result<(), Box<dyn Error>> {
|
|
||||||
parse(parse_expr, "abc", "abc@0..3")?;
|
|
||||||
parse(parse_expr, "abcABC_01234", "abcABC_01234@0..12")?;
|
|
||||||
parse(parse_expr, "+-*/\\^!=<>", "+-*/\\^!=<>@0..10")?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn parse_list() -> Result<(), Box<dyn Error>> {
|
|
||||||
parse(parse_expr, "()", "()@0..2")?;
|
|
||||||
parse(parse_expr, "(a a)", "(a@1..2 a@3..4)@0..5")?;
|
|
||||||
parse(parse_expr, "(a a a)", "(a@1..2 a@3..4 a@5..6)@0..7")?;
|
|
||||||
parse(parse_expr, "(() ())", "(()@1..3 ()@4..6)@0..7")?;
|
|
||||||
parse(
|
|
||||||
parse_expr,
|
|
||||||
"(nestedy (nest OwO))",
|
|
||||||
"(nestedy@1..8 (nest@10..14 OwO@15..18)@9..19)@0..20",
|
|
||||||
)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn oom() -> Result<(), Box<dyn Error>> {
|
|
||||||
parse(parse_expr, "(a a a a a a a a)", "(a@1..2 a@3..4 a@5..6 a@7..8 a@9..10 a@11..12 a@13..14 . #error(program is too big)@0..0)@0..17")?;
|
|
||||||
parse(parse_expr, "(a a a a a a a a a)", "(a@1..2 a@3..4 a@5..6 a@7..8 a@9..10 a@11..12 a@13..14 . #error(program is too big)@0..0)@0..19")?;
|
|
||||||
parse(parse_expr, "(a a a a a a a a a a)", "(a@1..2 a@3..4 a@5..6 a@7..8 a@9..10 a@11..12 a@13..14 . #error(program is too big)@0..0)@0..21")?;
|
|
||||||
parse(parse_expr, "(a a a a a a a a a a a)", "(a@1..2 a@3..4 a@5..6 a@7..8 a@9..10 a@11..12 a@13..14 . #error(program is too big)@0..0)@0..23")?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn toplevel() -> Result<(), Box<dyn Error>> {
|
|
||||||
parse(
|
|
||||||
parse_toplevel,
|
|
||||||
r#"
|
|
||||||
(hello world)
|
|
||||||
(abc)
|
|
||||||
"#,
|
|
||||||
"(hello@18..23 world@24..29)@17..30\n(abc@48..51)@47..52@0..65",
|
|
||||||
)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
}
|
|
55
crates/haku/src/source.rs
Normal file
55
crates/haku/src/source.rs
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
use core::{fmt, ops::Deref};
|
||||||
|
|
||||||
|
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub struct Span {
|
||||||
|
pub start: u32,
|
||||||
|
pub end: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Span {
|
||||||
|
pub fn new(start: u32, end: u32) -> Self {
|
||||||
|
Self { start, end }
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn slice<'a>(&self, source: &'a SourceCode) -> &'a str {
|
||||||
|
&source.code[self.start as usize..self.end as usize]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl fmt::Debug for Span {
|
||||||
|
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||||
|
write!(f, "{}..{}", self.start, self.end)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Source code string with a verified size limit.
|
||||||
|
/// An exact size limit is not enforced by this type - it only ensures the string isn't longer than
|
||||||
|
/// intended, to not stall the parser for an unexpected amount of time.
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
#[repr(transparent)]
|
||||||
|
pub struct SourceCode {
|
||||||
|
code: str,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SourceCode {
|
||||||
|
pub fn limited_len(code: &str, max_len: u32) -> Option<&Self> {
|
||||||
|
if code.len() <= max_len as usize {
|
||||||
|
Some(Self::unlimited_len(code))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn unlimited_len(code: &str) -> &Self {
|
||||||
|
// SAFETY: SourceCode is a transparent wrapper around str, so converting between them is safe.
|
||||||
|
unsafe { core::mem::transmute(code) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Deref for SourceCode {
|
||||||
|
type Target = str;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.code
|
||||||
|
}
|
||||||
|
}
|
|
@ -16,10 +16,17 @@ pub type SystemFn = fn(&mut Vm, FnArgs) -> Result<Value, Exception>;
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
pub struct ChunkId(u32);
|
pub struct ChunkId(u32);
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum SystemFnArity {
|
||||||
|
Unary,
|
||||||
|
Binary,
|
||||||
|
Nary,
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct System {
|
pub struct System {
|
||||||
/// Resolves a system function name to an index into `fn`s.
|
/// Resolves a system function name to an index into `fn`s.
|
||||||
pub resolve_fn: fn(&str) -> Option<u8>,
|
pub resolve_fn: fn(SystemFnArity, &str) -> Option<u8>,
|
||||||
pub fns: [Option<SystemFn>; 256],
|
pub fns: [Option<SystemFn>; 256],
|
||||||
pub chunks: Vec<Chunk>,
|
pub chunks: Vec<Chunk>,
|
||||||
}
|
}
|
||||||
|
@ -30,7 +37,7 @@ pub struct SystemImage {
|
||||||
}
|
}
|
||||||
|
|
||||||
macro_rules! def_fns {
|
macro_rules! def_fns {
|
||||||
($($index:tt $name:tt => $fnref:expr),* $(,)?) => {
|
($($index:tt $arity:tt $name:tt => $fnref:expr),* $(,)?) => {
|
||||||
pub(crate) fn init_fns(system: &mut System) {
|
pub(crate) fn init_fns(system: &mut System) {
|
||||||
$(
|
$(
|
||||||
debug_assert!(system.fns[$index].is_none());
|
debug_assert!(system.fns[$index].is_none());
|
||||||
|
@ -38,9 +45,9 @@ macro_rules! def_fns {
|
||||||
)*
|
)*
|
||||||
}
|
}
|
||||||
|
|
||||||
pub(crate) fn resolve(name: &str) -> Option<u8> {
|
pub(crate) fn resolve(arity: SystemFnArity, name: &str) -> Option<u8> {
|
||||||
match name {
|
match (arity, name){
|
||||||
$($name => Some($index),)*
|
$((SystemFnArity::$arity, $name) => Some($index),)*
|
||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -106,43 +113,44 @@ pub mod fns {
|
||||||
vm::{Exception, FnArgs, Vm},
|
vm::{Exception, FnArgs, Vm},
|
||||||
};
|
};
|
||||||
|
|
||||||
use super::System;
|
use super::{System, SystemFnArity};
|
||||||
|
|
||||||
impl System {
|
impl System {
|
||||||
def_fns! {
|
def_fns! {
|
||||||
0x00 "+" => add,
|
0x00 Binary "+" => add,
|
||||||
0x01 "-" => sub,
|
0x01 Binary "-" => sub,
|
||||||
0x02 "*" => mul,
|
0x02 Binary "*" => mul,
|
||||||
0x03 "/" => div,
|
0x03 Binary "/" => div,
|
||||||
|
0x04 Unary "-" => neg,
|
||||||
|
|
||||||
0x40 "not" => not,
|
0x40 Unary "!" => not,
|
||||||
0x41 "=" => eq,
|
0x41 Binary "==" => eq,
|
||||||
0x42 "<>" => neq,
|
0x42 Binary "!=" => neq,
|
||||||
0x43 "<" => lt,
|
0x43 Binary "<" => lt,
|
||||||
0x44 "<=" => leq,
|
0x44 Binary "<=" => leq,
|
||||||
0x45 ">" => gt,
|
0x45 Binary ">" => gt,
|
||||||
0x46 ">=" => geq,
|
0x46 Binary ">=" => geq,
|
||||||
|
|
||||||
0x80 "vec" => vec,
|
0x80 Nary "vec" => vec,
|
||||||
0x81 ".x" => vec_x,
|
0x81 Nary "vecX" => vec_x,
|
||||||
0x82 ".y" => vec_y,
|
0x82 Nary "vecY" => vec_y,
|
||||||
0x83 ".z" => vec_z,
|
0x83 Nary "vecZ" => vec_z,
|
||||||
0x84 ".w" => vec_w,
|
0x84 Nary "vecW" => vec_w,
|
||||||
|
|
||||||
0x85 "rgba" => rgba,
|
0x85 Nary "rgba" => rgba,
|
||||||
0x86 ".r" => rgba_r,
|
0x86 Nary "rgbaR" => rgba_r,
|
||||||
0x87 ".g" => rgba_g,
|
0x87 Nary "rgbaG" => rgba_g,
|
||||||
0x88 ".b" => rgba_b,
|
0x88 Nary "rgbaB" => rgba_b,
|
||||||
0x89 ".a" => rgba_a,
|
0x89 Nary "rgbaA" => rgba_a,
|
||||||
|
|
||||||
0x90 "list" => list,
|
0x90 Nary "list" => list,
|
||||||
|
|
||||||
0xc0 "to-shape" => to_shape_f,
|
0xc0 Nary "toShape" => to_shape_f,
|
||||||
0xc1 "line" => line,
|
0xc1 Nary "line" => line,
|
||||||
0xc2 "rect" => rect,
|
0xc2 Nary "rect" => rect,
|
||||||
0xc3 "circle" => circle,
|
0xc3 Nary "circle" => circle,
|
||||||
0xe0 "stroke" => stroke,
|
0xe0 Nary "stroke" => stroke,
|
||||||
0xe1 "fill" => fill,
|
0xe1 Nary "fill" => fill,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -196,6 +204,11 @@ pub mod fns {
|
||||||
Ok(Value::Number(result))
|
Ok(Value::Number(result))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn neg(vm: &mut Vm, args: FnArgs) -> Result<Value, Exception> {
|
||||||
|
let x = args.get_number(vm, 0, "`-` can only work with numbers")?;
|
||||||
|
Ok(Value::Number(-x))
|
||||||
|
}
|
||||||
|
|
||||||
pub fn not(vm: &mut Vm, args: FnArgs) -> Result<Value, Exception> {
|
pub fn not(vm: &mut Vm, args: FnArgs) -> Result<Value, Exception> {
|
||||||
if args.num() != 1 {
|
if args.num() != 1 {
|
||||||
return Err(vm.create_exception("(not) expects a single argument to negate"));
|
return Err(vm.create_exception("(not) expects a single argument to negate"));
|
||||||
|
|
143
crates/haku/src/token.rs
Normal file
143
crates/haku/src/token.rs
Normal file
|
@ -0,0 +1,143 @@
|
||||||
|
use core::{error::Error, fmt::Display};
|
||||||
|
|
||||||
|
use alloc::vec::Vec;
|
||||||
|
|
||||||
|
use crate::source::Span;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub enum TokenKind {
|
||||||
|
Eof,
|
||||||
|
|
||||||
|
Ident,
|
||||||
|
Tag,
|
||||||
|
Number,
|
||||||
|
Color,
|
||||||
|
|
||||||
|
// Operators
|
||||||
|
Plus,
|
||||||
|
Minus,
|
||||||
|
Star,
|
||||||
|
Slash,
|
||||||
|
EqualEqual,
|
||||||
|
NotEqual,
|
||||||
|
Less,
|
||||||
|
LessEqual,
|
||||||
|
Greater,
|
||||||
|
GreaterEqual,
|
||||||
|
Not,
|
||||||
|
|
||||||
|
// Punctuation
|
||||||
|
Newline,
|
||||||
|
LParen,
|
||||||
|
RParen,
|
||||||
|
LBrack,
|
||||||
|
RBrack,
|
||||||
|
Comma,
|
||||||
|
Equal,
|
||||||
|
Backslash,
|
||||||
|
RArrow,
|
||||||
|
|
||||||
|
// Keywords
|
||||||
|
Underscore,
|
||||||
|
And,
|
||||||
|
Or,
|
||||||
|
If,
|
||||||
|
Else,
|
||||||
|
Let,
|
||||||
|
|
||||||
|
// NOTE: This must be kept last for TokenSet to work correctly.
|
||||||
|
Error,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct Lexis {
|
||||||
|
pub kinds: Vec<TokenKind>,
|
||||||
|
pub spans: Vec<Span>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Lexis {
|
||||||
|
pub fn new(capacity: usize) -> Self {
|
||||||
|
assert!(capacity < u32::MAX as usize);
|
||||||
|
|
||||||
|
Self {
|
||||||
|
kinds: Vec::with_capacity(capacity),
|
||||||
|
spans: Vec::with_capacity(capacity),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn len(&self) -> u32 {
|
||||||
|
self.kinds.len() as u32
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
self.len() == 0
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn push(&mut self, kind: TokenKind, span: Span) -> Result<(), TokenAllocError> {
|
||||||
|
if self.kinds.len() >= self.kinds.capacity() {
|
||||||
|
return Err(TokenAllocError);
|
||||||
|
}
|
||||||
|
|
||||||
|
self.kinds.push(kind);
|
||||||
|
self.spans.push(span);
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn kind(&self, position: u32) -> TokenKind {
|
||||||
|
self.kinds[position as usize]
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn span(&self, position: u32) -> Span {
|
||||||
|
self.spans[position as usize]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub struct TokenAllocError;
|
||||||
|
|
||||||
|
impl Display for TokenAllocError {
|
||||||
|
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||||
|
f.write_str("too many tokens")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Error for TokenAllocError {}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||||
|
pub struct TokenKindSet {
|
||||||
|
bits: [u32; Self::WORDS],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TokenKindSet {
|
||||||
|
const WORDS: usize = ((TokenKind::Error as u32 + u32::BITS - 1) / (u32::BITS)) as usize;
|
||||||
|
|
||||||
|
const fn word(kind: TokenKind) -> usize {
|
||||||
|
(kind as u32 / u32::BITS) as usize
|
||||||
|
}
|
||||||
|
|
||||||
|
const fn bit(kind: TokenKind) -> u32 {
|
||||||
|
1 << (kind as u32 % u32::BITS)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn new(elems: &[TokenKind]) -> Self {
|
||||||
|
let mut set = Self {
|
||||||
|
bits: [0; Self::WORDS],
|
||||||
|
};
|
||||||
|
let mut i = 0;
|
||||||
|
while i < elems.len() {
|
||||||
|
set = set.include(elems[i]);
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
set
|
||||||
|
}
|
||||||
|
|
||||||
|
pub const fn include(mut self, kind: TokenKind) -> Self {
|
||||||
|
self.bits[Self::word(kind)] |= Self::bit(kind);
|
||||||
|
self
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn contains(&self, kind: TokenKind) -> bool {
|
||||||
|
self.bits[Self::word(kind)] & Self::bit(kind) != 0
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,6 +1,6 @@
|
||||||
use alloc::vec::Vec;
|
use alloc::vec::Vec;
|
||||||
|
|
||||||
use crate::system::ChunkId;
|
use crate::{compiler::ClosureSpec, system::ChunkId};
|
||||||
|
|
||||||
// TODO: Probably needs some pretty hardcore space optimization.
|
// TODO: Probably needs some pretty hardcore space optimization.
|
||||||
// Maybe when we have static typing.
|
// Maybe when we have static typing.
|
||||||
|
@ -156,9 +156,25 @@ pub struct Closure {
|
||||||
pub start: BytecodeLoc,
|
pub start: BytecodeLoc,
|
||||||
pub name: FunctionName,
|
pub name: FunctionName,
|
||||||
pub param_count: u8,
|
pub param_count: u8,
|
||||||
|
pub local_count: u8,
|
||||||
pub captures: Vec<Value>,
|
pub captures: Vec<Value>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Closure {
|
||||||
|
pub fn chunk(chunk_id: ChunkId, spec: ClosureSpec) -> Self {
|
||||||
|
Self {
|
||||||
|
start: BytecodeLoc {
|
||||||
|
chunk_id,
|
||||||
|
offset: 0,
|
||||||
|
},
|
||||||
|
name: FunctionName::Anonymous,
|
||||||
|
param_count: 0,
|
||||||
|
local_count: spec.local_count,
|
||||||
|
captures: Vec::new(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq)]
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
pub struct List {
|
pub struct List {
|
||||||
pub elements: Vec<Value>,
|
pub elements: Vec<Value>,
|
||||||
|
|
|
@ -123,8 +123,9 @@ impl Vm {
|
||||||
|
|
||||||
fn push(&mut self, value: Value) -> Result<(), Exception> {
|
fn push(&mut self, value: Value) -> Result<(), Exception> {
|
||||||
if self.stack.len() >= self.stack.capacity() {
|
if self.stack.len() >= self.stack.capacity() {
|
||||||
// TODO: can this error message be made clearer?
|
return Err(self.create_exception(
|
||||||
return Err(self.create_exception("too many local variables"));
|
"too many temporary values (local variables and expression operands)",
|
||||||
|
));
|
||||||
}
|
}
|
||||||
self.stack.push(value);
|
self.stack.push(value);
|
||||||
Ok(())
|
Ok(())
|
||||||
|
@ -136,6 +137,14 @@ impl Vm {
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn get_mut(&mut self, index: usize) -> Result<&mut Value, Exception> {
|
||||||
|
if self.stack.get(index).is_some() {
|
||||||
|
Ok(&mut self.stack[index])
|
||||||
|
} else {
|
||||||
|
Err(self.create_exception("corrupted bytecode (set local variable out of bounds)"))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
fn pop(&mut self) -> Result<Value, Exception> {
|
fn pop(&mut self) -> Result<Value, Exception> {
|
||||||
self.stack
|
self.stack
|
||||||
.pop()
|
.pop()
|
||||||
|
@ -168,6 +177,11 @@ impl Vm {
|
||||||
let mut bottom = self.stack.len();
|
let mut bottom = self.stack.len();
|
||||||
let mut fuel = self.fuel;
|
let mut fuel = self.fuel;
|
||||||
|
|
||||||
|
let init_bottom = bottom;
|
||||||
|
for _ in 0..closure.local_count {
|
||||||
|
self.push(Value::Nil)?;
|
||||||
|
}
|
||||||
|
|
||||||
#[allow(unused)]
|
#[allow(unused)]
|
||||||
let closure = (); // Do not use `closure` after this! Use `get_ref` on `closure_id` instead.
|
let closure = (); // Do not use `closure` after this! Use `get_ref` on `closure_id` instead.
|
||||||
|
|
||||||
|
@ -200,6 +214,12 @@ impl Vm {
|
||||||
self.push(value)?;
|
self.push(value)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Opcode::SetLocal => {
|
||||||
|
let index = chunk.read_u8(&mut pc)? as usize;
|
||||||
|
let new_value = self.pop()?;
|
||||||
|
*self.get_mut(index)? = new_value;
|
||||||
|
}
|
||||||
|
|
||||||
Opcode::Capture => {
|
Opcode::Capture => {
|
||||||
let index = chunk.read_u8(&mut pc)? as usize;
|
let index = chunk.read_u8(&mut pc)? as usize;
|
||||||
let closure = self.get_ref(closure_id).as_closure().unwrap();
|
let closure = self.get_ref(closure_id).as_closure().unwrap();
|
||||||
|
@ -226,26 +246,14 @@ impl Vm {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Opcode::DropLet => {
|
|
||||||
let count = chunk.read_u8(&mut pc)? as usize;
|
|
||||||
if count != 0 {
|
|
||||||
let new_len = self.stack.len().checked_sub(count).ok_or_else(|| {
|
|
||||||
self.create_exception(
|
|
||||||
"corrupted bytecode (Drop tried to drop too many values off the stack)",
|
|
||||||
)
|
|
||||||
})?;
|
|
||||||
let value = self.pop()?;
|
|
||||||
self.stack.resize_with(new_len, || unreachable!());
|
|
||||||
self.push(value)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
Opcode::Function => {
|
Opcode::Function => {
|
||||||
let param_count = chunk.read_u8(&mut pc)?;
|
let param_count = chunk.read_u8(&mut pc)?;
|
||||||
let then = chunk.read_u16(&mut pc)? as usize;
|
let then = chunk.read_u16(&mut pc)? as usize;
|
||||||
let body = pc;
|
let body = pc;
|
||||||
pc = then;
|
pc = then;
|
||||||
|
|
||||||
|
let local_count = chunk.read_u8(&mut pc)?;
|
||||||
|
|
||||||
let capture_count = chunk.read_u8(&mut pc)? as usize;
|
let capture_count = chunk.read_u8(&mut pc)? as usize;
|
||||||
let mut captures = Vec::with_capacity(capture_count);
|
let mut captures = Vec::with_capacity(capture_count);
|
||||||
for _ in 0..capture_count {
|
for _ in 0..capture_count {
|
||||||
|
@ -272,6 +280,7 @@ impl Vm {
|
||||||
},
|
},
|
||||||
name: FunctionName::Anonymous,
|
name: FunctionName::Anonymous,
|
||||||
param_count,
|
param_count,
|
||||||
|
local_count,
|
||||||
captures,
|
captures,
|
||||||
}))?;
|
}))?;
|
||||||
self.push(Value::Ref(id))?;
|
self.push(Value::Ref(id))?;
|
||||||
|
@ -327,6 +336,11 @@ impl Vm {
|
||||||
)
|
)
|
||||||
})?;
|
})?;
|
||||||
|
|
||||||
|
// NOTE: Locals are only pushed _after_ we do any stack calculations.
|
||||||
|
for _ in 0..closure.local_count {
|
||||||
|
self.push(Value::Nil)?;
|
||||||
|
}
|
||||||
|
|
||||||
self.push_call(frame)?;
|
self.push_call(frame)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -381,10 +395,13 @@ impl Vm {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(self
|
let result = self
|
||||||
.stack
|
.stack
|
||||||
.pop()
|
.pop()
|
||||||
.expect("there should be a result at the top of the stack"))
|
.expect("there should be a result at the top of the stack");
|
||||||
|
self.stack.resize_with(init_bottom, || unreachable!());
|
||||||
|
|
||||||
|
Ok(result)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn store_context(&mut self, context: Context) {
|
fn store_context(&mut self, context: Context) {
|
||||||
|
|
|
@ -1,10 +1,14 @@
|
||||||
use std::error::Error;
|
use std::error::Error;
|
||||||
|
|
||||||
use haku::{
|
use haku::{
|
||||||
|
ast::{dump::dump, Ast},
|
||||||
bytecode::{Chunk, Defs},
|
bytecode::{Chunk, Defs},
|
||||||
compiler::{compile_expr, Compiler, Source},
|
compiler::{compile_expr, Compiler, Source},
|
||||||
sexp::{self, Ast, Parser, SourceCode},
|
lexer::{lex, Lexer},
|
||||||
|
parser::{self, Parser, ParserLimits},
|
||||||
|
source::SourceCode,
|
||||||
system::System,
|
system::System,
|
||||||
|
token::Lexis,
|
||||||
value::{BytecodeLoc, Closure, FunctionName, Ref, RefId, Value},
|
value::{BytecodeLoc, Closure, FunctionName, Ref, RefId, Value},
|
||||||
vm::{Vm, VmLimits},
|
vm::{Vm, VmLimits},
|
||||||
};
|
};
|
||||||
|
@ -12,11 +16,16 @@ use haku::{
|
||||||
fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
|
fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
|
||||||
let mut system = System::new(1);
|
let mut system = System::new(1);
|
||||||
|
|
||||||
let ast = Ast::new(1024);
|
|
||||||
let code = SourceCode::unlimited_len(code);
|
let code = SourceCode::unlimited_len(code);
|
||||||
let mut parser = Parser::new(ast, code);
|
|
||||||
let root = sexp::parse_toplevel(&mut parser);
|
let mut lexer = Lexer::new(Lexis::new(1024), code);
|
||||||
let ast = parser.ast;
|
lex(&mut lexer)?;
|
||||||
|
|
||||||
|
let mut ast = Ast::new(1024);
|
||||||
|
let mut parser = Parser::new(&lexer.lexis, &ParserLimits { max_events: 1024 });
|
||||||
|
parser::toplevel(&mut parser);
|
||||||
|
let (root, mut parser_diagnostics) = parser.into_ast(&mut ast)?;
|
||||||
|
println!("{}", dump(&ast, root, Some(code)));
|
||||||
let src = Source {
|
let src = Source {
|
||||||
code,
|
code,
|
||||||
ast: &ast,
|
ast: &ast,
|
||||||
|
@ -27,21 +36,29 @@ fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
|
||||||
let mut chunk = Chunk::new(65536).unwrap();
|
let mut chunk = Chunk::new(65536).unwrap();
|
||||||
let mut compiler = Compiler::new(&mut defs, &mut chunk);
|
let mut compiler = Compiler::new(&mut defs, &mut chunk);
|
||||||
compile_expr(&mut compiler, &src, root)?;
|
compile_expr(&mut compiler, &src, root)?;
|
||||||
|
let closure_spec = compiler.closure_spec();
|
||||||
let defs = compiler.defs;
|
let defs = compiler.defs;
|
||||||
|
|
||||||
for diagnostic in &compiler.diagnostics {
|
let mut diagnostics = lexer.diagnostics;
|
||||||
|
diagnostics.append(&mut parser_diagnostics);
|
||||||
|
diagnostics.append(&mut compiler.diagnostics);
|
||||||
|
|
||||||
|
for diagnostic in &diagnostics {
|
||||||
println!(
|
println!(
|
||||||
"{}..{}: {}",
|
"{}..{} {:?}: {}",
|
||||||
diagnostic.span.start, diagnostic.span.end, diagnostic.message
|
diagnostic.span().start,
|
||||||
|
diagnostic.span().end,
|
||||||
|
diagnostic.span().slice(code),
|
||||||
|
diagnostic.message()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if !compiler.diagnostics.is_empty() {
|
if !diagnostics.is_empty() {
|
||||||
panic!("compiler diagnostics were emitted")
|
panic!("diagnostics were emitted")
|
||||||
}
|
}
|
||||||
|
|
||||||
let limits = VmLimits {
|
let limits = VmLimits {
|
||||||
stack_capacity: 256,
|
stack_capacity: 1024,
|
||||||
call_stack_capacity: 256,
|
call_stack_capacity: 256,
|
||||||
ref_capacity: 256,
|
ref_capacity: 256,
|
||||||
fuel: 32768,
|
fuel: 32768,
|
||||||
|
@ -50,16 +67,9 @@ fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
|
||||||
let mut vm = Vm::new(defs, &limits);
|
let mut vm = Vm::new(defs, &limits);
|
||||||
let chunk_id = system.add_chunk(chunk)?;
|
let chunk_id = system.add_chunk(chunk)?;
|
||||||
println!("bytecode: {:?}", system.chunk(chunk_id));
|
println!("bytecode: {:?}", system.chunk(chunk_id));
|
||||||
|
println!("closure spec: {closure_spec:?}");
|
||||||
|
|
||||||
let closure = vm.create_ref(Ref::Closure(Closure {
|
let closure = vm.create_ref(Ref::Closure(Closure::chunk(chunk_id, closure_spec)))?;
|
||||||
start: BytecodeLoc {
|
|
||||||
chunk_id,
|
|
||||||
offset: 0,
|
|
||||||
},
|
|
||||||
name: FunctionName::Anonymous,
|
|
||||||
param_count: 0,
|
|
||||||
captures: Vec::new(),
|
|
||||||
}))?;
|
|
||||||
let result = vm.run(&system, closure)?;
|
let result = vm.run(&system, closure)?;
|
||||||
|
|
||||||
println!("used fuel: {}", limits.fuel - vm.remaining_fuel());
|
println!("used fuel: {}", limits.fuel - vm.remaining_fuel());
|
||||||
|
@ -87,49 +97,52 @@ fn literal_number() {
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn literal_bool() {
|
fn literal_bool() {
|
||||||
assert_eq!(eval("false").unwrap(), Value::False);
|
assert_eq!(eval("False").unwrap(), Value::False);
|
||||||
assert_eq!(eval("true").unwrap(), Value::True);
|
assert_eq!(eval("True").unwrap(), Value::True);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn function_nil() {
|
fn function_nil() {
|
||||||
assert_eq!(eval("(fn () ())").unwrap(), Value::Ref(RefId::from_u32(1)));
|
assert_eq!(
|
||||||
|
eval(r#" \_ -> () "#).unwrap(),
|
||||||
|
Value::Ref(RefId::from_u32(1))
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn function_nil_call() {
|
fn function_nil_call() {
|
||||||
assert_eq!(eval("((fn () ()))").unwrap(), Value::Nil);
|
assert_eq!(eval(r#"(\_ -> ()) ()"#).unwrap(), Value::Nil);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn function_arithmetic() {
|
fn function_arithmetic() {
|
||||||
expect_number("((fn (x) (+ x 2)) 2)", 4.0, 0.0001);
|
expect_number(r#"(\x -> x + 2) 2"#, 4.0, 0.0001);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn function_let() {
|
fn function_let() {
|
||||||
expect_number("((fn (add-two) (add-two 2)) (fn (x) (+ x 2)))", 4.0, 0.0001);
|
expect_number(r#"(\addTwo -> addTwo 2) \x -> x + 2"#, 4.0, 0.0001);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn function_closure() {
|
fn function_closure() {
|
||||||
expect_number("(((fn (x) (fn (y) (+ x y))) 2) 2)", 4.0, 0.0001);
|
expect_number(r#"((\x -> \y -> x + y) 2) 2"#, 4.0, 0.0001);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn if_literal() {
|
fn if_literal() {
|
||||||
expect_number("(if 1 1 2)", 1.0, 0.0001);
|
expect_number("if (1) 1 else 2", 1.0, 0.0001);
|
||||||
expect_number("(if () 1 2)", 2.0, 0.0001);
|
expect_number("if (()) 1 else 2", 2.0, 0.0001);
|
||||||
expect_number("(if false 1 2)", 2.0, 0.0001);
|
expect_number("if (False) 1 else 2", 2.0, 0.0001);
|
||||||
expect_number("(if true 1 2)", 1.0, 0.0001);
|
expect_number("if (True) 1 else 2", 1.0, 0.0001);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn def_simple() {
|
fn def_simple() {
|
||||||
let code = r#"
|
let code = r#"
|
||||||
(def x 1)
|
x = 1
|
||||||
(def y 2)
|
y = 2
|
||||||
(+ x y)
|
x + y
|
||||||
"#;
|
"#;
|
||||||
expect_number(code, 3.0, 0.0001);
|
expect_number(code, 3.0, 0.0001);
|
||||||
}
|
}
|
||||||
|
@ -137,13 +150,13 @@ fn def_simple() {
|
||||||
#[test]
|
#[test]
|
||||||
fn def_fib_recursive() {
|
fn def_fib_recursive() {
|
||||||
let code = r#"
|
let code = r#"
|
||||||
(def fib
|
fib = \n ->
|
||||||
(fn (n)
|
if (n < 2)
|
||||||
(if (< n 2)
|
|
||||||
n
|
n
|
||||||
(+ (fib (- n 1)) (fib (- n 2))))))
|
else
|
||||||
|
fib (n - 1) + fib (n - 2)
|
||||||
|
|
||||||
(fib 10)
|
fib 10
|
||||||
"#;
|
"#;
|
||||||
expect_number(code, 55.0, 0.0001);
|
expect_number(code, 55.0, 0.0001);
|
||||||
}
|
}
|
||||||
|
@ -151,27 +164,30 @@ fn def_fib_recursive() {
|
||||||
#[test]
|
#[test]
|
||||||
fn def_mutually_recursive() {
|
fn def_mutually_recursive() {
|
||||||
let code = r#"
|
let code = r#"
|
||||||
(def f
|
f = \x ->
|
||||||
(fn (x)
|
if (x < 10)
|
||||||
(if (< x 10)
|
g (x + 1)
|
||||||
(g (+ x 1))
|
else
|
||||||
x)))
|
x
|
||||||
|
|
||||||
(def g
|
g = \x ->
|
||||||
(fn (x)
|
if (x < 10)
|
||||||
(if (< x 10)
|
f (x * 2)
|
||||||
(f (* x 2))
|
else
|
||||||
x)))
|
x
|
||||||
|
|
||||||
(f 0)
|
f 0
|
||||||
"#;
|
"#;
|
||||||
expect_number(code, 14.0, 0.0001);
|
expect_number(code, 14.0, 0.0001);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn def_botsbuildbots() {
|
fn def_botsbuildbots() {
|
||||||
let result = eval("(def botsbuildbots (fn () (botsbuildbots))) (botsbuildbots)");
|
let code = r#"
|
||||||
if let Err(error) = result {
|
botsbuildbots = \_ -> botsbuildbots ()
|
||||||
|
botsbuildbots ()
|
||||||
|
"#;
|
||||||
|
if let Err(error) = eval(code) {
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
error.to_string(),
|
error.to_string(),
|
||||||
"Exception {\n message: \"too much recursion\",\n}"
|
"Exception {\n message: \"too much recursion\",\n}"
|
||||||
|
@ -184,8 +200,8 @@ fn def_botsbuildbots() {
|
||||||
#[test]
|
#[test]
|
||||||
fn let_single() {
|
fn let_single() {
|
||||||
let code = r#"
|
let code = r#"
|
||||||
(let ((x 1))
|
let x = 1
|
||||||
(+ x 1))
|
x + 1
|
||||||
"#;
|
"#;
|
||||||
expect_number(code, 2.0, 0.0001);
|
expect_number(code, 2.0, 0.0001);
|
||||||
}
|
}
|
||||||
|
@ -193,9 +209,9 @@ fn let_single() {
|
||||||
#[test]
|
#[test]
|
||||||
fn let_many() {
|
fn let_many() {
|
||||||
let code = r#"
|
let code = r#"
|
||||||
(let ((x 1)
|
let x = 1
|
||||||
(y 2))
|
let y = 2
|
||||||
(+ x y))
|
x + y
|
||||||
"#;
|
"#;
|
||||||
expect_number(code, 3.0, 0.0001);
|
expect_number(code, 3.0, 0.0001);
|
||||||
}
|
}
|
||||||
|
@ -203,9 +219,9 @@ fn let_many() {
|
||||||
#[test]
|
#[test]
|
||||||
fn let_sequence() {
|
fn let_sequence() {
|
||||||
let code = r#"
|
let code = r#"
|
||||||
(let ((x 1)
|
let x = 1
|
||||||
(y (+ x 1)))
|
let y = x + 1
|
||||||
(+ x y))
|
x + y
|
||||||
"#;
|
"#;
|
||||||
expect_number(code, 3.0, 0.0001);
|
expect_number(code, 3.0, 0.0001);
|
||||||
}
|
}
|
||||||
|
@ -213,59 +229,40 @@ fn let_sequence() {
|
||||||
#[test]
|
#[test]
|
||||||
fn let_subexpr() {
|
fn let_subexpr() {
|
||||||
let code = r#"
|
let code = r#"
|
||||||
(+
|
(let x = 1
|
||||||
(let ((x 1)
|
let y = 2
|
||||||
(y 2))
|
x * y) + 2
|
||||||
(* x y)))
|
|
||||||
"#;
|
"#;
|
||||||
expect_number(code, 2.0, 0.0001);
|
expect_number(code, 4.0, 0.0001);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn let_empty() {
|
fn let_subexpr_two() {
|
||||||
let code = r#"
|
let code = r#"
|
||||||
(let () 1)
|
(let x = 1
|
||||||
"#;
|
2) +
|
||||||
expect_number(code, 1.0, 0.0001);
|
(let x = 1
|
||||||
}
|
x)
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn let_subexpr_empty() {
|
|
||||||
let code = r#"
|
|
||||||
(+ (let () 1) (let () 1))
|
|
||||||
"#;
|
|
||||||
expect_number(code, 2.0, 0.0001);
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn let_subexpr_many() {
|
|
||||||
let code = r#"
|
|
||||||
(+
|
|
||||||
(let ((x 1)
|
|
||||||
(y 2))
|
|
||||||
(* x y))
|
|
||||||
(let () 1)
|
|
||||||
(let ((x 1)) x))
|
|
||||||
"#;
|
"#;
|
||||||
expect_number(code, 3.0, 0.0001);
|
expect_number(code, 3.0, 0.0001);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn system_arithmetic() {
|
fn let_subexpr_many() {
|
||||||
expect_number("(+ 1 2 3 4)", 10.0, 0.0001);
|
let code = r#"
|
||||||
expect_number("(+ (* 2 1) 1 (/ 6 2) (- 10 3))", 13.0, 0.0001);
|
(let x = 1
|
||||||
|
let y = 2
|
||||||
|
x * y) +
|
||||||
|
(let x = 1
|
||||||
|
2) +
|
||||||
|
(let x = 1
|
||||||
|
x)
|
||||||
|
"#;
|
||||||
|
expect_number(code, 5.0, 0.0001);
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn practical_fib_recursive() {
|
fn system_arithmetic() {
|
||||||
let code = r#"
|
expect_number("1 + 2 + 3 + 4", 10.0, 0.0001);
|
||||||
((fn (fib)
|
expect_number("(2 * 1) + 1 + (6 / 2) + (10 - 3)", 13.0, 0.0001);
|
||||||
(fib fib 10))
|
|
||||||
|
|
||||||
(fn (fib n)
|
|
||||||
(if (< n 2)
|
|
||||||
n
|
|
||||||
(+ (fib fib (- n 1)) (fib fib (- n 2))))))
|
|
||||||
"#;
|
|
||||||
expect_number(code, 55.0, 0.0001);
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -5,11 +5,15 @@
|
||||||
|
|
||||||
use eyre::{bail, Context, OptionExt};
|
use eyre::{bail, Context, OptionExt};
|
||||||
use haku::{
|
use haku::{
|
||||||
|
ast::Ast,
|
||||||
bytecode::{Chunk, Defs, DefsImage},
|
bytecode::{Chunk, Defs, DefsImage},
|
||||||
compiler::{Compiler, Source},
|
compiler::{Compiler, Source},
|
||||||
|
lexer::{lex, Lexer},
|
||||||
|
parser::{self, Parser, ParserLimits},
|
||||||
render::{tiny_skia::Pixmap, Renderer, RendererLimits},
|
render::{tiny_skia::Pixmap, Renderer, RendererLimits},
|
||||||
sexp::{Ast, Parser, SourceCode},
|
source::SourceCode,
|
||||||
system::{ChunkId, System, SystemImage},
|
system::{ChunkId, System, SystemImage},
|
||||||
|
token::Lexis,
|
||||||
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
|
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
|
||||||
vm::{Vm, VmImage, VmLimits},
|
vm::{Vm, VmImage, VmLimits},
|
||||||
};
|
};
|
||||||
|
@ -22,9 +26,11 @@ use crate::schema::Vec2;
|
||||||
// because we do some dynamic typing magic over on the JavaScript side to automatically call all
|
// because we do some dynamic typing magic over on the JavaScript side to automatically call all
|
||||||
// the appropriate functions for setting these limits on the client side.
|
// the appropriate functions for setting these limits on the client side.
|
||||||
pub struct Limits {
|
pub struct Limits {
|
||||||
pub max_source_code_len: usize,
|
pub max_source_code_len: u32,
|
||||||
pub max_chunks: usize,
|
pub max_chunks: usize,
|
||||||
pub max_defs: usize,
|
pub max_defs: usize,
|
||||||
|
pub max_tokens: usize,
|
||||||
|
pub max_parser_events: usize,
|
||||||
pub ast_capacity: usize,
|
pub ast_capacity: usize,
|
||||||
pub chunk_capacity: usize,
|
pub chunk_capacity: usize,
|
||||||
pub stack_capacity: usize,
|
pub stack_capacity: usize,
|
||||||
|
@ -88,12 +94,21 @@ impl Haku {
|
||||||
pub fn set_brush(&mut self, code: &str) -> eyre::Result<()> {
|
pub fn set_brush(&mut self, code: &str) -> eyre::Result<()> {
|
||||||
self.reset();
|
self.reset();
|
||||||
|
|
||||||
let ast = Ast::new(self.limits.ast_capacity);
|
|
||||||
let code = SourceCode::limited_len(code, self.limits.max_source_code_len)
|
let code = SourceCode::limited_len(code, self.limits.max_source_code_len)
|
||||||
.ok_or_eyre("source code is too long")?;
|
.ok_or_eyre("source code is too long")?;
|
||||||
let mut parser = Parser::new(ast, code);
|
|
||||||
let root = haku::sexp::parse_toplevel(&mut parser);
|
let mut lexer = Lexer::new(Lexis::new(self.limits.max_tokens), code);
|
||||||
let ast = parser.ast;
|
lex(&mut lexer)?;
|
||||||
|
|
||||||
|
let mut parser = Parser::new(
|
||||||
|
&lexer.lexis,
|
||||||
|
&ParserLimits {
|
||||||
|
max_events: self.limits.max_parser_events,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
parser::toplevel(&mut parser);
|
||||||
|
let mut ast = Ast::new(self.limits.ast_capacity);
|
||||||
|
let (root, parser_diagnostics) = parser.into_ast(&mut ast)?;
|
||||||
|
|
||||||
let src = Source {
|
let src = Source {
|
||||||
code,
|
code,
|
||||||
|
@ -107,7 +122,10 @@ impl Haku {
|
||||||
haku::compiler::compile_expr(&mut compiler, &src, root)
|
haku::compiler::compile_expr(&mut compiler, &src, root)
|
||||||
.context("failed to compile the chunk")?;
|
.context("failed to compile the chunk")?;
|
||||||
|
|
||||||
if !compiler.diagnostics.is_empty() {
|
if !lexer.diagnostics.is_empty()
|
||||||
|
|| !parser_diagnostics.is_empty()
|
||||||
|
|| !compiler.diagnostics.is_empty()
|
||||||
|
{
|
||||||
bail!("diagnostics were emitted");
|
bail!("diagnostics were emitted");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -61,6 +61,12 @@ max_chunks = 2
|
||||||
# Maximum amount of defs across all source code chunks.
|
# Maximum amount of defs across all source code chunks.
|
||||||
max_defs = 256
|
max_defs = 256
|
||||||
|
|
||||||
|
# Maximum amount of tokens a single chunk can have.
|
||||||
|
max_tokens = 4096
|
||||||
|
|
||||||
|
# Maximum amount of events that the parser may emit in a single chunk.
|
||||||
|
max_parser_events = 4096
|
||||||
|
|
||||||
# Maximum amount of AST nodes in a single parse.
|
# Maximum amount of AST nodes in a single parse.
|
||||||
ast_capacity = 4096
|
ast_capacity = 4096
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue