syntax v2
introduce a new, more ergonomic syntax for haku not all features are implemented just yet. still missing: - custom tags (non-True/False) - color literals - lists
This commit is contained in:
parent
a3e5e8bd10
commit
2595bf0d82
21 changed files with 2844 additions and 1062 deletions
|
@ -1,71 +1,31 @@
|
|||
// NOTE: This is a very bad CLI.
|
||||
// Sorry!
|
||||
// NOTE: This is a very bad CLI. I only use it for debugging haku with LLDB.
|
||||
// Sorry that it doesn't actually do anything!
|
||||
|
||||
use std::{error::Error, fmt::Display, io::BufRead};
|
||||
|
||||
use haku::{
|
||||
bytecode::{Chunk, Defs},
|
||||
compiler::{compile_expr, Compiler, Source},
|
||||
sexp::{parse_toplevel, Ast, Parser, SourceCode},
|
||||
system::System,
|
||||
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
|
||||
vm::{Vm, VmLimits},
|
||||
ast::{dump::dump, Ast},
|
||||
lexer::{lex, Lexer},
|
||||
parser::{expr, Parser, ParserLimits},
|
||||
source::SourceCode,
|
||||
token::Lexis,
|
||||
value::Value,
|
||||
};
|
||||
|
||||
fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
|
||||
let mut system = System::new(1);
|
||||
|
||||
let ast = Ast::new(1024);
|
||||
let code = SourceCode::unlimited_len(code);
|
||||
let mut parser = Parser::new(ast, code);
|
||||
let root = parse_toplevel(&mut parser);
|
||||
let ast = parser.ast;
|
||||
let src = Source {
|
||||
code,
|
||||
ast: &ast,
|
||||
system: &system,
|
||||
};
|
||||
let mut lexer = Lexer::new(Lexis::new(1024), code);
|
||||
lex(&mut lexer).expect("too many tokens");
|
||||
|
||||
let mut defs = Defs::new(256);
|
||||
let mut chunk = Chunk::new(65536).unwrap();
|
||||
let mut compiler = Compiler::new(&mut defs, &mut chunk);
|
||||
compile_expr(&mut compiler, &src, root)?;
|
||||
let diagnostics = compiler.diagnostics;
|
||||
let defs = compiler.defs;
|
||||
println!("{chunk:?}");
|
||||
let mut parser = Parser::new(&lexer.lexis, &ParserLimits { max_events: 1024 });
|
||||
expr(&mut parser);
|
||||
|
||||
for diagnostic in &diagnostics {
|
||||
eprintln!(
|
||||
"{}..{}: {}",
|
||||
diagnostic.span.start, diagnostic.span.end, diagnostic.message
|
||||
);
|
||||
}
|
||||
let mut ast = Ast::new(1024);
|
||||
let (root, _) = parser.into_ast(&mut ast).unwrap();
|
||||
|
||||
if !diagnostics.is_empty() {
|
||||
return Err(Box::new(DiagnosticsEmitted));
|
||||
}
|
||||
eprintln!("{}", dump(&ast, root, Some(code)));
|
||||
|
||||
let mut vm = Vm::new(
|
||||
defs,
|
||||
&VmLimits {
|
||||
stack_capacity: 256,
|
||||
call_stack_capacity: 256,
|
||||
ref_capacity: 256,
|
||||
fuel: 32768,
|
||||
memory: 1024,
|
||||
},
|
||||
);
|
||||
let chunk_id = system.add_chunk(chunk)?;
|
||||
let closure = vm.create_ref(Ref::Closure(Closure {
|
||||
start: BytecodeLoc {
|
||||
chunk_id,
|
||||
offset: 0,
|
||||
},
|
||||
name: FunctionName::Anonymous,
|
||||
param_count: 0,
|
||||
captures: Vec::new(),
|
||||
}))?;
|
||||
Ok(vm.run(&system, closure)?)
|
||||
Ok(Value::Nil)
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
|
|
|
@ -2,18 +2,23 @@
|
|||
|
||||
extern crate alloc;
|
||||
|
||||
use core::{alloc::Layout, slice};
|
||||
use core::{alloc::Layout, num::Saturating, slice};
|
||||
|
||||
use alloc::{boxed::Box, vec::Vec};
|
||||
use haku::{
|
||||
ast::Ast,
|
||||
bytecode::{Chunk, Defs, DefsImage},
|
||||
compiler::{compile_expr, CompileError, Compiler, Diagnostic, Source},
|
||||
compiler::{compile_expr, CompileError, Compiler, Source},
|
||||
diagnostic::Diagnostic,
|
||||
lexer::{lex, Lexer},
|
||||
parser::{self, Parser},
|
||||
render::{
|
||||
tiny_skia::{Pixmap, PremultipliedColorU8},
|
||||
Renderer, RendererLimits,
|
||||
},
|
||||
sexp::{parse_toplevel, Ast, Parser, SourceCode},
|
||||
source::SourceCode,
|
||||
system::{ChunkId, System, SystemImage},
|
||||
token::Lexis,
|
||||
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
|
||||
vm::{Exception, Vm, VmImage, VmLimits},
|
||||
};
|
||||
|
@ -41,6 +46,8 @@ struct Limits {
|
|||
max_source_code_len: usize,
|
||||
max_chunks: usize,
|
||||
max_defs: usize,
|
||||
max_tokens: usize,
|
||||
max_parser_events: usize,
|
||||
ast_capacity: usize,
|
||||
chunk_capacity: usize,
|
||||
stack_capacity: usize,
|
||||
|
@ -58,6 +65,8 @@ impl Default for Limits {
|
|||
max_source_code_len: 65536,
|
||||
max_chunks: 2,
|
||||
max_defs: 256,
|
||||
max_tokens: 1024,
|
||||
max_parser_events: 1024,
|
||||
ast_capacity: 1024,
|
||||
chunk_capacity: 65536,
|
||||
stack_capacity: 1024,
|
||||
|
@ -101,6 +110,8 @@ macro_rules! limit_setter {
|
|||
limit_setter!(max_source_code_len);
|
||||
limit_setter!(max_chunks);
|
||||
limit_setter!(max_defs);
|
||||
limit_setter!(max_tokens);
|
||||
limit_setter!(max_parser_events);
|
||||
limit_setter!(ast_capacity);
|
||||
limit_setter!(chunk_capacity);
|
||||
limit_setter!(stack_capacity);
|
||||
|
@ -207,6 +218,8 @@ unsafe extern "C" fn haku_exception_message_len(instance: *const Instance) -> u3
|
|||
enum StatusCode {
|
||||
Ok,
|
||||
SourceCodeTooLong,
|
||||
TooManyTokens,
|
||||
TooManyAstNodes,
|
||||
ChunkTooBig,
|
||||
DiagnosticsEmitted,
|
||||
TooManyChunks,
|
||||
|
@ -238,6 +251,8 @@ extern "C" fn haku_status_string(code: StatusCode) -> *const i8 {
|
|||
match code {
|
||||
StatusCode::Ok => c"ok",
|
||||
StatusCode::SourceCodeTooLong => c"source code is too long",
|
||||
StatusCode::TooManyTokens => c"source code has too many tokens",
|
||||
StatusCode::TooManyAstNodes => c"source code has too many AST nodes",
|
||||
StatusCode::ChunkTooBig => c"compiled bytecode is too large",
|
||||
StatusCode::DiagnosticsEmitted => c"diagnostics were emitted",
|
||||
StatusCode::TooManyChunks => c"too many registered bytecode chunks",
|
||||
|
@ -281,22 +296,22 @@ unsafe extern "C" fn haku_num_diagnostics(brush: *const Brush) -> u32 {
|
|||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn haku_diagnostic_start(brush: *const Brush, index: u32) -> u32 {
|
||||
(*brush).diagnostics[index as usize].span.start as u32
|
||||
(*brush).diagnostics[index as usize].span().start
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn haku_diagnostic_end(brush: *const Brush, index: u32) -> u32 {
|
||||
(*brush).diagnostics[index as usize].span.end as u32
|
||||
(*brush).diagnostics[index as usize].span().end
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn haku_diagnostic_message(brush: *const Brush, index: u32) -> *const u8 {
|
||||
(*brush).diagnostics[index as usize].message.as_ptr()
|
||||
(*brush).diagnostics[index as usize].message().as_ptr()
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
unsafe extern "C" fn haku_diagnostic_message_len(brush: *const Brush, index: u32) -> u32 {
|
||||
(*brush).diagnostics[index as usize].message.len() as u32
|
||||
(*brush).diagnostics[index as usize].message().len() as u32
|
||||
}
|
||||
|
||||
#[no_mangle]
|
||||
|
@ -315,15 +330,27 @@ unsafe extern "C" fn haku_compile_brush(
|
|||
|
||||
let code = core::str::from_utf8(slice::from_raw_parts(code, code_len as usize))
|
||||
.expect("invalid UTF-8");
|
||||
let code = match SourceCode::limited_len(code, instance.limits.max_source_code_len) {
|
||||
Some(code) => code,
|
||||
None => return StatusCode::SourceCodeTooLong,
|
||||
let Some(code) = SourceCode::limited_len(code, instance.limits.max_source_code_len as u32)
|
||||
else {
|
||||
return StatusCode::SourceCodeTooLong;
|
||||
};
|
||||
|
||||
let ast = Ast::new(instance.limits.ast_capacity);
|
||||
let mut parser = Parser::new(ast, code);
|
||||
let root = parse_toplevel(&mut parser);
|
||||
let ast = parser.ast;
|
||||
let mut lexer = Lexer::new(Lexis::new(instance.limits.max_tokens), code);
|
||||
if lex(&mut lexer).is_err() {
|
||||
return StatusCode::TooManyTokens;
|
||||
};
|
||||
|
||||
let mut ast = Ast::new(instance.limits.ast_capacity);
|
||||
let mut parser = Parser::new(
|
||||
&lexer.lexis,
|
||||
&haku::parser::ParserLimits {
|
||||
max_events: instance.limits.max_parser_events,
|
||||
},
|
||||
);
|
||||
parser::toplevel(&mut parser);
|
||||
let Ok((root, mut parser_diagnostics)) = parser.into_ast(&mut ast) else {
|
||||
return StatusCode::TooManyAstNodes;
|
||||
};
|
||||
|
||||
let src = Source {
|
||||
code,
|
||||
|
@ -339,8 +366,11 @@ unsafe extern "C" fn haku_compile_brush(
|
|||
}
|
||||
}
|
||||
|
||||
if !compiler.diagnostics.is_empty() {
|
||||
brush.diagnostics = compiler.diagnostics;
|
||||
let mut diagnostics = lexer.diagnostics;
|
||||
diagnostics.append(&mut parser_diagnostics);
|
||||
diagnostics.append(&mut compiler.diagnostics);
|
||||
if !diagnostics.is_empty() {
|
||||
brush.diagnostics = diagnostics;
|
||||
return StatusCode::DiagnosticsEmitted;
|
||||
}
|
||||
|
||||
|
|
125
crates/haku/src/ast.rs
Normal file
125
crates/haku/src/ast.rs
Normal file
|
@ -0,0 +1,125 @@
|
|||
use core::{error::Error, fmt::Display};
|
||||
|
||||
use alloc::vec::Vec;
|
||||
|
||||
use crate::source::Span;
|
||||
|
||||
pub mod dump;
|
||||
pub mod walk;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct NodeId(u32);
|
||||
|
||||
impl NodeId {
|
||||
pub const NIL: NodeId = NodeId(0);
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum NodeKind {
|
||||
Nil,
|
||||
|
||||
Token,
|
||||
|
||||
Ident,
|
||||
Tag,
|
||||
Number,
|
||||
Color,
|
||||
List,
|
||||
|
||||
Op,
|
||||
Unary,
|
||||
Binary,
|
||||
Call,
|
||||
ParenEmpty,
|
||||
Paren,
|
||||
Lambda,
|
||||
Params,
|
||||
Param,
|
||||
If,
|
||||
Let,
|
||||
|
||||
Toplevel,
|
||||
|
||||
Error,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct Node {
|
||||
pub span: Span,
|
||||
pub kind: NodeKind,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Ast {
|
||||
kinds: Vec<NodeKind>,
|
||||
spans: Vec<Span>,
|
||||
children_spans: Vec<(u32, u32)>,
|
||||
children: Vec<NodeId>,
|
||||
}
|
||||
|
||||
impl Ast {
|
||||
pub fn new(capacity: usize) -> Self {
|
||||
assert!(capacity >= 1, "there must be space for at least a nil node");
|
||||
assert!(capacity <= u32::MAX as usize);
|
||||
|
||||
let mut ast = Self {
|
||||
kinds: Vec::with_capacity(capacity),
|
||||
spans: Vec::with_capacity(capacity),
|
||||
children_spans: Vec::with_capacity(capacity),
|
||||
children: Vec::new(),
|
||||
};
|
||||
|
||||
ast.alloc(NodeKind::Nil, Span::new(0, 0)).unwrap();
|
||||
|
||||
ast
|
||||
}
|
||||
|
||||
pub fn alloc(&mut self, kind: NodeKind, span: Span) -> Result<NodeId, NodeAllocError> {
|
||||
if self.kinds.len() >= self.kinds.capacity() {
|
||||
return Err(NodeAllocError);
|
||||
}
|
||||
|
||||
let index = self.kinds.len() as u32;
|
||||
self.kinds.push(kind);
|
||||
self.spans.push(span);
|
||||
self.children_spans.push((0, 0));
|
||||
Ok(NodeId(index))
|
||||
}
|
||||
|
||||
// NOTE: This never produces a NodeAllocError, because there can more or less only ever be as many children for
|
||||
// nodes as there are nodes.
|
||||
pub fn alloc_children(&mut self, for_node: NodeId, children: &[NodeId]) {
|
||||
let start = self.children.len();
|
||||
self.children.extend_from_slice(children);
|
||||
let end = self.children.len();
|
||||
self.children_spans[for_node.0 as usize] = (start as u32, end as u32);
|
||||
}
|
||||
|
||||
pub fn extend_span(&mut self, in_node: NodeId, end: u32) {
|
||||
self.spans[in_node.0 as usize].end = end;
|
||||
}
|
||||
|
||||
pub fn kind(&self, id: NodeId) -> NodeKind {
|
||||
self.kinds[id.0 as usize]
|
||||
}
|
||||
|
||||
pub fn span(&self, id: NodeId) -> Span {
|
||||
self.spans[id.0 as usize]
|
||||
}
|
||||
|
||||
pub fn children(&self, id: NodeId) -> &[NodeId] {
|
||||
let (start, end) = self.children_spans[id.0 as usize];
|
||||
&self.children[start as usize..end as usize]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct NodeAllocError;
|
||||
|
||||
impl Display for NodeAllocError {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
f.write_str("too many nodes")
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for NodeAllocError {}
|
34
crates/haku/src/ast/dump.rs
Normal file
34
crates/haku/src/ast/dump.rs
Normal file
|
@ -0,0 +1,34 @@
|
|||
use alloc::string::String;
|
||||
use core::fmt::Write;
|
||||
|
||||
use crate::{ast::NodeKind, source::SourceCode};
|
||||
|
||||
use super::{Ast, NodeId};
|
||||
|
||||
pub fn dump(ast: &Ast, node: NodeId, code: Option<&SourceCode>) -> String {
|
||||
let mut result = String::new();
|
||||
|
||||
fn rec(ast: &Ast, node: NodeId, code: Option<&SourceCode>, result: &mut String, depth: usize) {
|
||||
for _ in 0..depth {
|
||||
result.push_str(" ");
|
||||
}
|
||||
|
||||
write!(result, "{:?} @ {:?}", ast.kind(node), ast.span(node)).unwrap();
|
||||
if let Some(code) = code {
|
||||
if ast.kind(node) == NodeKind::Token {
|
||||
write!(result, " {:?}", ast.span(node).slice(code)).unwrap();
|
||||
}
|
||||
}
|
||||
writeln!(result).unwrap();
|
||||
for &child in ast.children(node) {
|
||||
rec(ast, child, code, result, depth + 1);
|
||||
}
|
||||
}
|
||||
|
||||
rec(ast, node, code, &mut result, 0);
|
||||
|
||||
// Remove the trailing newline.
|
||||
result.pop();
|
||||
|
||||
result
|
||||
}
|
73
crates/haku/src/ast/walk.rs
Normal file
73
crates/haku/src/ast/walk.rs
Normal file
|
@ -0,0 +1,73 @@
|
|||
use super::{Ast, NodeId, NodeKind};
|
||||
|
||||
impl Ast {
|
||||
pub fn child(&self, parent: NodeId, kind: NodeKind) -> Option<NodeId> {
|
||||
self.children(parent)
|
||||
.iter()
|
||||
.find(|&&child| self.kind(child) == kind)
|
||||
.copied()
|
||||
}
|
||||
|
||||
pub fn walk(&self, parent: NodeId) -> Walk<'_> {
|
||||
Walk {
|
||||
ast: self,
|
||||
parent,
|
||||
index: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An iterator over a node's children, with convenience methods for accessing those children.
|
||||
#[derive(Clone)]
|
||||
pub struct Walk<'a> {
|
||||
ast: &'a Ast,
|
||||
parent: NodeId,
|
||||
index: usize,
|
||||
}
|
||||
|
||||
impl<'a> Walk<'a> {
|
||||
/// Walk to the first non-Nil, non-Error, non-Token node.
|
||||
pub fn node(&mut self) -> Option<NodeId> {
|
||||
while let Some(id) = self.next() {
|
||||
if !matches!(
|
||||
self.ast.kind(id),
|
||||
NodeKind::Nil | NodeKind::Token | NodeKind::Error
|
||||
) {
|
||||
return Some(id);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Walk to the next [`node`][`Self::node`] of the given kind.
|
||||
pub fn node_of(&mut self, kind: NodeKind) -> Option<NodeId> {
|
||||
while let Some(id) = self.node() {
|
||||
if self.ast.kind(id) == kind {
|
||||
return Some(id);
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
/// Find the first node of the given kind. This does not advance the iterator.
|
||||
pub fn get(&self, kind: NodeKind) -> Option<NodeId> {
|
||||
self.clone().find(|&id| self.ast.kind(id) == kind)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Walk<'a> {
|
||||
type Item = NodeId;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let children = self.ast.children(self.parent);
|
||||
if self.index < children.len() {
|
||||
let index = self.index;
|
||||
self.index += 1;
|
||||
Some(children[index])
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
|
@ -17,6 +17,8 @@ pub enum Opcode {
|
|||
// Duplicate existing values.
|
||||
/// Push a value relative to the bottom of the current stack window.
|
||||
Local, // (index: u8)
|
||||
/// Set the value of a value relative to the bottom of the current stack window.
|
||||
SetLocal, // (index: u8)
|
||||
/// Push a captured value.
|
||||
Capture, // (index: u8)
|
||||
/// Get the value of a definition.
|
||||
|
@ -24,12 +26,8 @@ pub enum Opcode {
|
|||
/// Set the value of a definition.
|
||||
SetDef, // (index: u16)
|
||||
|
||||
/// Drop `number` values from the stack.
|
||||
/// <!-- OwO -->
|
||||
DropLet, // (number: u8)
|
||||
|
||||
// Create literal functions.
|
||||
Function, // (params: u8, then: u16), at `then`: (capture_count: u8, captures: [(source: u8, index: u8); capture_count])
|
||||
Function, // (params: u8, then: u16), at `then`: (local_count: u8, capture_count: u8, captures: [(source: u8, index: u8); capture_count])
|
||||
|
||||
// Control flow.
|
||||
Jump, // (offset: u16)
|
||||
|
|
|
@ -6,9 +6,11 @@ use core::{
|
|||
use alloc::vec::Vec;
|
||||
|
||||
use crate::{
|
||||
ast::{Ast, NodeId, NodeKind},
|
||||
bytecode::{Chunk, DefError, Defs, EmitError, Opcode, CAPTURE_CAPTURE, CAPTURE_LOCAL},
|
||||
sexp::{Ast, NodeId, NodeKind, SourceCode, Span},
|
||||
system::System,
|
||||
diagnostic::Diagnostic,
|
||||
source::SourceCode,
|
||||
system::{System, SystemFnArity},
|
||||
};
|
||||
|
||||
pub struct Source<'a> {
|
||||
|
@ -17,12 +19,6 @@ pub struct Source<'a> {
|
|||
pub system: &'a System,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Diagnostic {
|
||||
pub span: Span,
|
||||
pub message: &'static str,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
struct Local<'a> {
|
||||
name: &'a str,
|
||||
|
@ -46,6 +42,11 @@ pub struct Compiler<'a, 'b> {
|
|||
scopes: Vec<Scope<'a>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct ClosureSpec {
|
||||
pub(crate) local_count: u8,
|
||||
}
|
||||
|
||||
impl<'a, 'b> Compiler<'a, 'b> {
|
||||
pub fn new(defs: &'a mut Defs, chunk: &'b mut Chunk) -> Self {
|
||||
Self {
|
||||
|
@ -59,18 +60,22 @@ impl<'a, 'b> Compiler<'a, 'b> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn diagnose(&mut self, diagnostic: Diagnostic) {
|
||||
if self.diagnostics.len() >= self.diagnostics.capacity() {
|
||||
return;
|
||||
fn emit(&mut self, diagnostic: Diagnostic) {
|
||||
if self.diagnostics.len() < self.diagnostics.capacity() {
|
||||
self.diagnostics.push(diagnostic);
|
||||
}
|
||||
}
|
||||
|
||||
if self.diagnostics.len() == self.diagnostics.capacity() - 1 {
|
||||
self.diagnostics.push(Diagnostic {
|
||||
span: Span::new(0, 0),
|
||||
message: "too many diagnostics emitted, stopping", // hello clangd!
|
||||
})
|
||||
} else {
|
||||
self.diagnostics.push(diagnostic);
|
||||
pub fn closure_spec(&self) -> ClosureSpec {
|
||||
ClosureSpec {
|
||||
local_count: self
|
||||
.scopes
|
||||
.last()
|
||||
.unwrap()
|
||||
.locals
|
||||
.len()
|
||||
.try_into()
|
||||
.unwrap_or_default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -82,27 +87,51 @@ pub fn compile_expr<'a>(
|
|||
src: &Source<'a>,
|
||||
node_id: NodeId,
|
||||
) -> CompileResult {
|
||||
let node = src.ast.get(node_id);
|
||||
match node.kind {
|
||||
NodeKind::Eof => unreachable!("eof node should never be emitted"),
|
||||
match src.ast.kind(node_id) {
|
||||
// The nil node is special, as it inhabits node ID 0.
|
||||
NodeKind::Nil => {
|
||||
unreachable!("Nil node should never be emitted (ParenEmpty is used for nil literals)")
|
||||
}
|
||||
// Tokens are trivia and should never be emitted---they're only useful for error reporting.
|
||||
NodeKind::Token => unreachable!("Token node should never be emitted"),
|
||||
// Op nodes are only used to provide a searching anchor for the operator in Unary and Binary.
|
||||
NodeKind::Op => unreachable!("Op node should never be emitted"),
|
||||
// Params nodes are only used to provide a searching anchor for Lambda parameters.
|
||||
NodeKind::Params => unreachable!("Param node should never be emitted"),
|
||||
// Param nodes are only used to provide a searching anchor for identifiers in Params nodes,
|
||||
// as they may also contain commas and other trivia.
|
||||
NodeKind::Param => unreachable!("Param node should never be emitted"),
|
||||
|
||||
NodeKind::Color => unsupported(c, src, node_id, "color literals are not implemented yet"),
|
||||
|
||||
NodeKind::Nil => compile_nil(c),
|
||||
NodeKind::Ident => compile_ident(c, src, node_id),
|
||||
NodeKind::Number => compile_number(c, src, node_id),
|
||||
NodeKind::List(_, _) => compile_list(c, src, node_id),
|
||||
NodeKind::Toplevel(_) => compile_toplevel(c, src, node_id),
|
||||
NodeKind::Tag => compile_tag(c, src, node_id),
|
||||
NodeKind::List => unsupported(c, src, node_id, "list literals are not implemented yet"),
|
||||
|
||||
NodeKind::Error(message) => {
|
||||
c.diagnose(Diagnostic {
|
||||
span: node.span,
|
||||
message,
|
||||
});
|
||||
NodeKind::Unary => compile_unary(c, src, node_id),
|
||||
NodeKind::Binary => compile_binary(c, src, node_id),
|
||||
NodeKind::Call => compile_call(c, src, node_id),
|
||||
NodeKind::Paren => compile_paren(c, src, node_id),
|
||||
NodeKind::ParenEmpty => compile_nil(c),
|
||||
NodeKind::Lambda => compile_lambda(c, src, node_id),
|
||||
NodeKind::If => compile_if(c, src, node_id),
|
||||
NodeKind::Let => compile_let(c, src, node_id),
|
||||
|
||||
NodeKind::Toplevel => compile_toplevel(c, src, node_id),
|
||||
|
||||
// Error nodes are ignored, because for each error node an appropriate parser
|
||||
// diagnostic is emitted anyways.
|
||||
NodeKind::Error => Ok(()),
|
||||
}
|
||||
}
|
||||
|
||||
fn unsupported(c: &mut Compiler, src: &Source, node_id: NodeId, message: &str) -> CompileResult {
|
||||
c.emit(Diagnostic::error(src.ast.span(node_id), message));
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_nil(c: &mut Compiler<'_, '_>) -> CompileResult {
|
||||
fn compile_nil(c: &mut Compiler) -> CompileResult {
|
||||
c.chunk.emit_opcode(Opcode::Nil)?;
|
||||
|
||||
Ok(())
|
||||
|
@ -144,13 +173,10 @@ fn find_variable(
|
|||
}
|
||||
|
||||
fn compile_ident<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
||||
let ident = src.ast.get(node_id);
|
||||
let name = ident.span.slice(src.code);
|
||||
let span = src.ast.span(node_id);
|
||||
let name = span.slice(src.code);
|
||||
|
||||
match name {
|
||||
"false" => _ = c.chunk.emit_opcode(Opcode::False)?,
|
||||
"true" => _ = c.chunk.emit_opcode(Opcode::True)?,
|
||||
_ => match find_variable(c, name, c.scopes.len() - 1) {
|
||||
match find_variable(c, name, c.scopes.len() - 1) {
|
||||
Ok(Some(Variable::Local(index))) => {
|
||||
c.chunk.emit_opcode(Opcode::Local)?;
|
||||
c.chunk.emit_u8(index)?;
|
||||
|
@ -164,28 +190,22 @@ fn compile_ident<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId
|
|||
c.chunk.emit_opcode(Opcode::Def)?;
|
||||
c.chunk.emit_u16(def_id.to_u16())?;
|
||||
} else {
|
||||
c.diagnose(Diagnostic {
|
||||
span: ident.span,
|
||||
message: "undefined variable",
|
||||
});
|
||||
c.emit(Diagnostic::error(span, "undefined variable"));
|
||||
}
|
||||
}
|
||||
Err(CaptureError) => {
|
||||
c.diagnose(Diagnostic {
|
||||
span: ident.span,
|
||||
message: "too many variables captured from outer functions in this scope",
|
||||
});
|
||||
}
|
||||
},
|
||||
c.emit(Diagnostic::error(
|
||||
span,
|
||||
"too many variables captured from outer functions in this scope",
|
||||
));
|
||||
}
|
||||
};
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compile_number(c: &mut Compiler<'_, '_>, src: &Source<'_>, node_id: NodeId) -> CompileResult {
|
||||
let node = src.ast.get(node_id);
|
||||
|
||||
let literal = node.span.slice(src.code);
|
||||
let literal = src.ast.span(node_id).slice(src.code);
|
||||
let float: f32 = literal
|
||||
.parse()
|
||||
.expect("the parser should've gotten us a string parsable by the stdlib");
|
||||
|
@ -196,48 +216,130 @@ fn compile_number(c: &mut Compiler<'_, '_>, src: &Source<'_>, node_id: NodeId) -
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn compile_list<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
||||
let NodeKind::List(function_id, args) = src.ast.get(node_id).kind else {
|
||||
unreachable!("compile_list expects a List");
|
||||
};
|
||||
fn compile_tag(c: &mut Compiler<'_, '_>, src: &Source, node_id: NodeId) -> CompileResult {
|
||||
let tag = src.ast.span(node_id).slice(src.code);
|
||||
|
||||
let function = src.ast.get(function_id);
|
||||
let name = function.span.slice(src.code);
|
||||
|
||||
if function.kind == NodeKind::Ident {
|
||||
match name {
|
||||
"fn" => return compile_fn(c, src, args),
|
||||
"if" => return compile_if(c, src, args),
|
||||
"let" => return compile_let(c, src, args),
|
||||
_ => (),
|
||||
};
|
||||
match tag {
|
||||
"False" => {
|
||||
c.chunk.emit_opcode(Opcode::False)?;
|
||||
}
|
||||
"True" => {
|
||||
c.chunk.emit_opcode(Opcode::True)?;
|
||||
}
|
||||
_ => {
|
||||
c.emit(Diagnostic::error(src.ast.span(node_id), "uppercased identifiers are reserved for future use; please start your identifiers with a lowercase letter instead"));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compile_unary<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
||||
let mut walk = src.ast.walk(node_id);
|
||||
let Some(op) = walk.node() else { return Ok(()) };
|
||||
let Some(expr) = walk.node() else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
if src.ast.kind(op) != NodeKind::Op {
|
||||
return Ok(());
|
||||
}
|
||||
let name = src.ast.span(op).slice(src.code);
|
||||
|
||||
compile_expr(c, src, expr)?;
|
||||
if let Some(index) = (src.system.resolve_fn)(SystemFnArity::Unary, name) {
|
||||
let argument_count = 1;
|
||||
c.chunk.emit_opcode(Opcode::System)?;
|
||||
c.chunk.emit_u8(index)?;
|
||||
c.chunk.emit_u8(argument_count)?;
|
||||
} else {
|
||||
c.emit(Diagnostic::error(
|
||||
src.ast.span(op),
|
||||
"this unary operator is currently unimplemented",
|
||||
));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compile_binary<'a>(
|
||||
c: &mut Compiler<'a, '_>,
|
||||
src: &Source<'a>,
|
||||
node_id: NodeId,
|
||||
) -> CompileResult {
|
||||
let mut walk = src.ast.walk(node_id);
|
||||
let Some(left) = walk.node() else {
|
||||
return Ok(());
|
||||
};
|
||||
let Some(op) = walk.node() else { return Ok(()) };
|
||||
let Some(right) = walk.node() else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
if src.ast.kind(op) != NodeKind::Op {
|
||||
return Ok(());
|
||||
}
|
||||
let name = src.ast.span(op).slice(src.code);
|
||||
|
||||
if name == "=" {
|
||||
c.emit(Diagnostic::error(
|
||||
src.ast.span(op),
|
||||
"defs `a = b` may only appear at the top level",
|
||||
));
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
compile_expr(c, src, left)?;
|
||||
compile_expr(c, src, right)?;
|
||||
if let Some(index) = (src.system.resolve_fn)(SystemFnArity::Binary, name) {
|
||||
let argument_count = 2;
|
||||
c.chunk.emit_opcode(Opcode::System)?;
|
||||
c.chunk.emit_u8(index)?;
|
||||
c.chunk.emit_u8(argument_count)?;
|
||||
} else {
|
||||
c.emit(Diagnostic::error(
|
||||
src.ast.span(op),
|
||||
"this unary operator is currently unimplemented",
|
||||
));
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compile_call<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
||||
let mut walk = src.ast.walk(node_id);
|
||||
let Some(func) = walk.node() else {
|
||||
return Ok(());
|
||||
};
|
||||
let name = src.ast.span(func).slice(src.code);
|
||||
|
||||
let mut argument_count = 0;
|
||||
let mut args = args;
|
||||
while let NodeKind::List(head, tail) = src.ast.get(args).kind {
|
||||
compile_expr(c, src, head)?;
|
||||
while let Some(arg) = walk.node() {
|
||||
compile_expr(c, src, arg)?;
|
||||
argument_count += 1;
|
||||
args = tail;
|
||||
}
|
||||
|
||||
let argument_count = u8::try_from(argument_count).unwrap_or_else(|_| {
|
||||
c.diagnose(Diagnostic {
|
||||
span: src.ast.get(args).span,
|
||||
message: "function call has too many arguments",
|
||||
});
|
||||
c.emit(Diagnostic::error(
|
||||
src.ast.span(node_id),
|
||||
"function call has too many arguments",
|
||||
));
|
||||
0
|
||||
});
|
||||
|
||||
if let (NodeKind::Ident, Some(index)) = (function.kind, (src.system.resolve_fn)(name)) {
|
||||
if let (NodeKind::Ident, Some(index)) = (
|
||||
src.ast.kind(func),
|
||||
(src.system.resolve_fn)(SystemFnArity::Nary, name),
|
||||
) {
|
||||
c.chunk.emit_opcode(Opcode::System)?;
|
||||
c.chunk.emit_u8(index)?;
|
||||
c.chunk.emit_u8(argument_count)?;
|
||||
} else {
|
||||
// This is a bit of an oddity: we only emit the function expression _after_ the arguments,
|
||||
// but since the language is effectless this doesn't matter in practice.
|
||||
// It makes for less code in the compiler and the VM.
|
||||
compile_expr(c, src, function_id)?;
|
||||
// It makes for a bit less code in the VM, since there's no need to find the function
|
||||
// down the stack - it's always on top.
|
||||
compile_expr(c, src, func)?;
|
||||
c.chunk.emit_opcode(Opcode::Call)?;
|
||||
c.chunk.emit_u8(argument_count)?;
|
||||
}
|
||||
|
@ -245,68 +347,29 @@ fn compile_list<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId)
|
|||
Ok(())
|
||||
}
|
||||
|
||||
struct WalkList {
|
||||
current: NodeId,
|
||||
ok: bool,
|
||||
}
|
||||
|
||||
impl WalkList {
|
||||
fn new(start: NodeId) -> Self {
|
||||
Self {
|
||||
current: start,
|
||||
ok: true,
|
||||
}
|
||||
}
|
||||
|
||||
fn expect_arg(
|
||||
&mut self,
|
||||
c: &mut Compiler<'_, '_>,
|
||||
src: &Source<'_>,
|
||||
message: &'static str,
|
||||
) -> NodeId {
|
||||
if !self.ok {
|
||||
return NodeId::NIL;
|
||||
}
|
||||
|
||||
if let NodeKind::List(expr, tail) = src.ast.get(self.current).kind {
|
||||
self.current = tail;
|
||||
expr
|
||||
} else {
|
||||
c.diagnose(Diagnostic {
|
||||
span: src.ast.get(self.current).span,
|
||||
message,
|
||||
});
|
||||
self.ok = false;
|
||||
NodeId::NIL
|
||||
}
|
||||
}
|
||||
|
||||
fn expect_nil(&mut self, c: &mut Compiler<'_, '_>, src: &Source<'_>, message: &'static str) {
|
||||
if src.ast.get(self.current).kind != NodeKind::Nil {
|
||||
c.diagnose(Diagnostic {
|
||||
span: src.ast.get(self.current).span,
|
||||
message,
|
||||
});
|
||||
// NOTE: Don't set self.ok to false, since this is not a fatal error.
|
||||
// The nodes returned previously are valid and therefore it's safe to operate on them.
|
||||
// Just having extra arguments shouldn't inhibit emitting additional diagnostics in
|
||||
// the expression.
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn compile_if<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> CompileResult {
|
||||
let mut list = WalkList::new(args);
|
||||
|
||||
let condition = list.expect_arg(c, src, "missing `if` condition");
|
||||
let if_true = list.expect_arg(c, src, "missing `if` true branch");
|
||||
let if_false = list.expect_arg(c, src, "missing `if` false branch");
|
||||
list.expect_nil(c, src, "extra arguments after `if` false branch");
|
||||
|
||||
if !list.ok {
|
||||
fn compile_paren<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
||||
let Some(inner) = src.ast.walk(node_id).node() else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
compile_expr(c, src, inner)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compile_if<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
||||
let mut walk = src.ast.walk(node_id);
|
||||
|
||||
let Some(condition) = walk.node() else {
|
||||
return Ok(());
|
||||
};
|
||||
let Some(if_true) = walk.node() else {
|
||||
return Ok(());
|
||||
};
|
||||
let Some(if_false) = walk.node() else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
compile_expr(c, src, condition)?;
|
||||
|
||||
c.chunk.emit_opcode(Opcode::JumpIfNot)?;
|
||||
|
@ -328,113 +391,70 @@ fn compile_if<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> C
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn compile_let<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> CompileResult {
|
||||
let mut list = WalkList::new(args);
|
||||
fn compile_let<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
||||
let mut walk = src.ast.walk(node_id);
|
||||
|
||||
let binding_list = list.expect_arg(c, src, "missing `let` binding list ((x 1) (y 2) ...)");
|
||||
let expr = list.expect_arg(c, src, "missing expression to `let` names into");
|
||||
list.expect_nil(c, src, "extra arguments after `let` expression");
|
||||
|
||||
if !list.ok {
|
||||
let Some(ident) = walk.node() else {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// NOTE: Our `let` behaves like `let*` from Lisps.
|
||||
// This is because this is generally the more intuitive behaviour with how variable declarations
|
||||
// work in traditional imperative languages.
|
||||
// We do not offer an alternative to Lisp `let` to be as minimal as possible.
|
||||
|
||||
let mut current = binding_list;
|
||||
let mut local_count: usize = 0;
|
||||
while let NodeKind::List(head, tail) = src.ast.get(current).kind {
|
||||
if !matches!(src.ast.get(head).kind, NodeKind::List(_, _)) {
|
||||
c.diagnose(Diagnostic {
|
||||
span: src.ast.get(head).span,
|
||||
message: "`let` binding expected, like (x 1)",
|
||||
});
|
||||
current = tail;
|
||||
continue;
|
||||
}
|
||||
|
||||
let mut list = WalkList::new(head);
|
||||
let ident = list.expect_arg(c, src, "binding name expected");
|
||||
let value = list.expect_arg(c, src, "binding value expected");
|
||||
list.expect_nil(c, src, "extra expressions after `let` binding value");
|
||||
|
||||
if src.ast.get(ident).kind != NodeKind::Ident {
|
||||
c.diagnose(Diagnostic {
|
||||
span: src.ast.get(ident).span,
|
||||
message: "binding name must be an identifier",
|
||||
});
|
||||
}
|
||||
|
||||
// NOTE: Compile expression _before_ putting the value into scope.
|
||||
// This is so that the variable cannot refer to itself, as it is yet to be declared.
|
||||
compile_expr(c, src, value)?;
|
||||
|
||||
let name = src.ast.get(ident).span.slice(src.code);
|
||||
let scope = c.scopes.last_mut().unwrap();
|
||||
if scope.locals.len() >= u8::MAX as usize {
|
||||
c.diagnose(Diagnostic {
|
||||
span: src.ast.get(ident).span,
|
||||
message: "too many names bound in this function at a single time",
|
||||
});
|
||||
} else {
|
||||
scope.locals.push(Local { name });
|
||||
}
|
||||
|
||||
local_count += 1;
|
||||
current = tail;
|
||||
}
|
||||
};
|
||||
let Some(expr) = walk.node() else {
|
||||
return Ok(());
|
||||
};
|
||||
let Some(then) = walk.node() else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
compile_expr(c, src, expr)?;
|
||||
|
||||
let name = src.ast.span(ident).slice(src.code);
|
||||
let scope = c.scopes.last_mut().unwrap();
|
||||
scope
|
||||
.locals
|
||||
.resize_with(scope.locals.len() - local_count, || unreachable!());
|
||||
let index = if scope.locals.len() >= u8::MAX as usize {
|
||||
c.emit(Diagnostic::error(
|
||||
src.ast.span(ident),
|
||||
"too many names bound in this function at a single time",
|
||||
));
|
||||
|
||||
// NOTE: If we reach more than 255 locals declared in our `let`, we should've gotten
|
||||
// a diagnostic emitted in the `while` loop beforehand.
|
||||
let local_count = u8::try_from(local_count).unwrap_or(0);
|
||||
c.chunk.emit_opcode(Opcode::DropLet)?;
|
||||
c.chunk.emit_u8(local_count)?;
|
||||
// Don't emit the expression, because it will most likely contain errors due to this
|
||||
// `let` failing.
|
||||
return Ok(());
|
||||
} else {
|
||||
let index = scope.locals.len();
|
||||
scope.locals.push(Local { name });
|
||||
index as u8
|
||||
};
|
||||
c.chunk.emit_opcode(Opcode::SetLocal)?;
|
||||
c.chunk.emit_u8(index)?;
|
||||
|
||||
compile_expr(c, src, then)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compile_fn<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> CompileResult {
|
||||
let mut list = WalkList::new(args);
|
||||
|
||||
let param_list = list.expect_arg(c, src, "missing function parameters");
|
||||
let body = list.expect_arg(c, src, "missing function body");
|
||||
list.expect_nil(c, src, "extra arguments after function body");
|
||||
|
||||
if !list.ok {
|
||||
fn compile_lambda<'a>(
|
||||
c: &mut Compiler<'a, '_>,
|
||||
src: &Source<'a>,
|
||||
node_id: NodeId,
|
||||
) -> CompileResult {
|
||||
let mut walk = src.ast.walk(node_id);
|
||||
let Some(params) = walk.node() else {
|
||||
return Ok(());
|
||||
}
|
||||
};
|
||||
let Some(body) = walk.node() else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
let mut locals = Vec::new();
|
||||
let mut current = param_list;
|
||||
while let NodeKind::List(ident, tail) = src.ast.get(current).kind {
|
||||
if let NodeKind::Ident = src.ast.get(ident).kind {
|
||||
let mut params_walk = src.ast.walk(params);
|
||||
while let Some(param) = params_walk.node() {
|
||||
locals.push(Local {
|
||||
name: src.ast.get(ident).span.slice(src.code),
|
||||
})
|
||||
} else {
|
||||
c.diagnose(Diagnostic {
|
||||
span: src.ast.get(ident).span,
|
||||
message: "function parameters must be identifiers",
|
||||
})
|
||||
}
|
||||
current = tail;
|
||||
name: src.ast.span(param).slice(src.code),
|
||||
});
|
||||
}
|
||||
|
||||
let param_count = u8::try_from(locals.len()).unwrap_or_else(|_| {
|
||||
c.diagnose(Diagnostic {
|
||||
span: src.ast.get(param_list).span,
|
||||
message: "too many function parameters",
|
||||
});
|
||||
c.emit(Diagnostic::error(
|
||||
src.ast.span(params),
|
||||
"too many function parameters",
|
||||
));
|
||||
0
|
||||
});
|
||||
|
||||
|
@ -453,13 +473,21 @@ fn compile_fn<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> C
|
|||
c.chunk.patch_u16(after_offset, after);
|
||||
|
||||
let scope = c.scopes.pop().unwrap();
|
||||
let capture_count = u8::try_from(scope.captures.len()).unwrap_or_else(|_| {
|
||||
c.diagnose(Diagnostic {
|
||||
span: src.ast.get(body).span,
|
||||
message: "function refers to too many variables from the outer function",
|
||||
});
|
||||
let local_count = u8::try_from(scope.locals.len()).unwrap_or_else(|_| {
|
||||
c.emit(Diagnostic::error(
|
||||
src.ast.span(body),
|
||||
"function contains too many local variables",
|
||||
));
|
||||
0
|
||||
});
|
||||
let capture_count = u8::try_from(scope.captures.len()).unwrap_or_else(|_| {
|
||||
c.emit(Diagnostic::error(
|
||||
src.ast.span(body),
|
||||
"function refers to too many variables from its outer functions",
|
||||
));
|
||||
0
|
||||
});
|
||||
c.chunk.emit_u8(local_count)?;
|
||||
c.chunk.emit_u8(capture_count)?;
|
||||
for capture in scope.captures {
|
||||
match capture {
|
||||
|
@ -484,31 +512,27 @@ fn compile_toplevel<'a>(
|
|||
src: &Source<'a>,
|
||||
node_id: NodeId,
|
||||
) -> CompileResult {
|
||||
let NodeKind::Toplevel(mut current) = src.ast.get(node_id).kind else {
|
||||
unreachable!("compile_toplevel expects a Toplevel");
|
||||
};
|
||||
def_prepass(c, src, node_id)?;
|
||||
|
||||
def_prepass(c, src, current)?;
|
||||
let mut walk = src.ast.walk(node_id);
|
||||
let mut result_expr = None;
|
||||
while let Some(toplevel_expr) = walk.node() {
|
||||
if let Some(result_expr) = result_expr {
|
||||
// TODO: This diagnostic should show you the expression after the result.
|
||||
c.emit(Diagnostic::error(
|
||||
src.ast.span(result_expr),
|
||||
"the result value must be the last thing in the program",
|
||||
));
|
||||
}
|
||||
|
||||
let mut had_result = false;
|
||||
while let NodeKind::List(expr, tail) = src.ast.get(current).kind {
|
||||
match compile_toplevel_expr(c, src, expr)? {
|
||||
match compile_toplevel_expr(c, src, toplevel_expr)? {
|
||||
ToplevelExpr::Def => (),
|
||||
ToplevelExpr::Result => had_result = true,
|
||||
ToplevelExpr::Result if result_expr.is_none() => result_expr = Some(toplevel_expr),
|
||||
ToplevelExpr::Result => (),
|
||||
}
|
||||
}
|
||||
|
||||
if had_result && src.ast.get(tail).kind != NodeKind::Nil {
|
||||
c.diagnose(Diagnostic {
|
||||
span: src.ast.get(tail).span,
|
||||
message: "result value may not be followed by anything else",
|
||||
});
|
||||
break;
|
||||
}
|
||||
|
||||
current = tail;
|
||||
}
|
||||
|
||||
if !had_result {
|
||||
if result_expr.is_none() {
|
||||
c.chunk.emit_opcode(Opcode::Nil)?;
|
||||
}
|
||||
c.chunk.emit_opcode(Opcode::Return)?;
|
||||
|
@ -516,38 +540,30 @@ fn compile_toplevel<'a>(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
fn def_prepass<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
||||
fn def_prepass<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, toplevel: NodeId) -> CompileResult {
|
||||
let mut walk = src.ast.walk(toplevel);
|
||||
|
||||
// This is a bit of a pattern matching tapeworm, but Rust unfortunately doesn't have `if let`
|
||||
// chains yet to make this more readable.
|
||||
let mut current = node_id;
|
||||
while let NodeKind::List(expr, tail) = src.ast.get(current).kind {
|
||||
if let NodeKind::List(head_id, tail_id) = src.ast.get(expr).kind {
|
||||
let head = src.ast.get(head_id);
|
||||
let name = head.span.slice(src.code);
|
||||
if head.kind == NodeKind::Ident && name == "def" {
|
||||
if let NodeKind::List(ident_id, _) = src.ast.get(tail_id).kind {
|
||||
let ident = src.ast.get(ident_id);
|
||||
if ident.kind == NodeKind::Ident {
|
||||
let name = ident.span.slice(src.code);
|
||||
while let Some(binary) = walk.node_of(NodeKind::Binary) {
|
||||
let mut binary_walk = src.ast.walk(binary);
|
||||
if let (Some(ident), Some(op)) = (binary_walk.node(), binary_walk.get(NodeKind::Op)) {
|
||||
if src.ast.span(op).slice(src.code) == "=" {
|
||||
let name = src.ast.span(ident).slice(src.code);
|
||||
match c.defs.add(name) {
|
||||
Ok(_) => (),
|
||||
Err(DefError::Exists) => c.diagnose(Diagnostic {
|
||||
span: ident.span,
|
||||
message: "redefinitions of defs are not allowed",
|
||||
}),
|
||||
Err(DefError::OutOfSpace) => c.diagnose(Diagnostic {
|
||||
span: ident.span,
|
||||
message: "too many defs",
|
||||
}),
|
||||
Err(DefError::Exists) => c.emit(Diagnostic::error(
|
||||
src.ast.span(ident),
|
||||
"a def with this name already exists",
|
||||
)),
|
||||
Err(DefError::OutOfSpace) => {
|
||||
c.emit(Diagnostic::error(src.ast.span(binary), "too many defs"))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
current = tail;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
|
@ -562,14 +578,10 @@ fn compile_toplevel_expr<'a>(
|
|||
src: &Source<'a>,
|
||||
node_id: NodeId,
|
||||
) -> CompileResult<ToplevelExpr> {
|
||||
let node = src.ast.get(node_id);
|
||||
|
||||
if let NodeKind::List(head_id, tail_id) = node.kind {
|
||||
let head = src.ast.get(head_id);
|
||||
if head.kind == NodeKind::Ident {
|
||||
let name = head.span.slice(src.code);
|
||||
if name == "def" {
|
||||
compile_def(c, src, tail_id)?;
|
||||
if src.ast.kind(node_id) == NodeKind::Binary {
|
||||
if let Some(op) = src.ast.walk(node_id).get(NodeKind::Op) {
|
||||
if src.ast.span(op).slice(src.code) == "=" {
|
||||
compile_def(c, src, node_id)?;
|
||||
return Ok(ToplevelExpr::Def);
|
||||
}
|
||||
}
|
||||
|
@ -579,24 +591,32 @@ fn compile_toplevel_expr<'a>(
|
|||
Ok(ToplevelExpr::Result)
|
||||
}
|
||||
|
||||
fn compile_def<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> CompileResult {
|
||||
let mut list = WalkList::new(args);
|
||||
|
||||
let ident = list.expect_arg(c, src, "missing definition name");
|
||||
let value = list.expect_arg(c, src, "missing definition value");
|
||||
list.expect_nil(c, src, "extra arguments after definition");
|
||||
|
||||
if !list.ok {
|
||||
fn compile_def<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
|
||||
let mut walk = src.ast.walk(node_id);
|
||||
let Some(left) = walk.node() else {
|
||||
return Ok(());
|
||||
};
|
||||
let Some(_op) = walk.node() else {
|
||||
return Ok(());
|
||||
};
|
||||
let Some(right) = walk.node() else {
|
||||
return Ok(());
|
||||
};
|
||||
|
||||
if src.ast.kind(left) != NodeKind::Ident {
|
||||
c.emit(Diagnostic::error(
|
||||
src.ast.span(left),
|
||||
"def name (identifier) expected",
|
||||
));
|
||||
}
|
||||
|
||||
let name = src.ast.get(ident).span.slice(src.code);
|
||||
let name = src.ast.span(left).slice(src.code);
|
||||
// NOTE: def_prepass collects all definitions beforehand.
|
||||
// In case a def ends up not existing, that means we ran out of space for defs - so emit a
|
||||
// zero def instead.
|
||||
let def_id = c.defs.get(name).unwrap_or_default();
|
||||
|
||||
compile_expr(c, src, value)?;
|
||||
compile_expr(c, src, right)?;
|
||||
c.chunk.emit_opcode(Opcode::SetDef)?;
|
||||
c.chunk.emit_u16(def_id.to_u16())?;
|
||||
|
||||
|
|
26
crates/haku/src/diagnostic.rs
Normal file
26
crates/haku/src/diagnostic.rs
Normal file
|
@ -0,0 +1,26 @@
|
|||
use alloc::string::String;
|
||||
|
||||
use crate::source::Span;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Diagnostic {
|
||||
span: Span,
|
||||
message: String,
|
||||
}
|
||||
|
||||
impl Diagnostic {
|
||||
pub fn error(span: Span, message: impl Into<String>) -> Self {
|
||||
Self {
|
||||
span,
|
||||
message: message.into(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn span(&self) -> Span {
|
||||
self.span
|
||||
}
|
||||
|
||||
pub fn message(&self) -> &str {
|
||||
&self.message
|
||||
}
|
||||
}
|
237
crates/haku/src/lexer.rs
Normal file
237
crates/haku/src/lexer.rs
Normal file
|
@ -0,0 +1,237 @@
|
|||
use alloc::vec::Vec;
|
||||
|
||||
use crate::{
|
||||
diagnostic::Diagnostic,
|
||||
source::{SourceCode, Span},
|
||||
token::{Lexis, TokenAllocError, TokenKind},
|
||||
};
|
||||
|
||||
pub struct Lexer<'a> {
|
||||
pub lexis: Lexis,
|
||||
pub diagnostics: Vec<Diagnostic>,
|
||||
input: &'a SourceCode,
|
||||
position: u32,
|
||||
}
|
||||
|
||||
impl<'a> Lexer<'a> {
|
||||
pub fn new(lexis: Lexis, input: &'a SourceCode) -> Self {
|
||||
Self {
|
||||
lexis,
|
||||
diagnostics: Vec::new(),
|
||||
input,
|
||||
position: 0,
|
||||
}
|
||||
}
|
||||
|
||||
fn current(&self) -> char {
|
||||
self.input[self.position as usize..]
|
||||
.chars()
|
||||
.next()
|
||||
.unwrap_or('\0')
|
||||
}
|
||||
|
||||
fn advance(&mut self) {
|
||||
self.position += self.current().len_utf8() as u32;
|
||||
}
|
||||
|
||||
fn emit(&mut self, diagnostic: Diagnostic) {
|
||||
if self.diagnostics.len() < self.diagnostics.capacity() {
|
||||
self.diagnostics.push(diagnostic);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn one(l: &mut Lexer<'_>, kind: TokenKind) -> TokenKind {
|
||||
l.advance();
|
||||
kind
|
||||
}
|
||||
|
||||
fn one_or_two(l: &mut Lexer<'_>, kind1: TokenKind, c2: char, kind2: TokenKind) -> TokenKind {
|
||||
l.advance();
|
||||
if l.current() == c2 {
|
||||
l.advance();
|
||||
kind2
|
||||
} else {
|
||||
kind1
|
||||
}
|
||||
}
|
||||
|
||||
fn is_ident_char(c: char) -> bool {
|
||||
matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_')
|
||||
}
|
||||
|
||||
fn ident(l: &mut Lexer<'_>) -> TokenKind {
|
||||
let start = l.position;
|
||||
while is_ident_char(l.current()) {
|
||||
l.advance();
|
||||
}
|
||||
let end = l.position;
|
||||
|
||||
match Span::new(start, end).slice(l.input) {
|
||||
"_" => TokenKind::Underscore,
|
||||
"and" => TokenKind::And,
|
||||
"or" => TokenKind::Or,
|
||||
"if" => TokenKind::If,
|
||||
"else" => TokenKind::Else,
|
||||
"let" => TokenKind::Let,
|
||||
_ => TokenKind::Ident,
|
||||
}
|
||||
}
|
||||
|
||||
fn tag(l: &mut Lexer<'_>) -> TokenKind {
|
||||
while is_ident_char(l.current()) {
|
||||
l.advance();
|
||||
}
|
||||
TokenKind::Tag
|
||||
}
|
||||
|
||||
// NOTE: You shouldn't expect that the numbers produced by the lexer are parsable.
|
||||
fn number(l: &mut Lexer<'_>) -> TokenKind {
|
||||
while l.current().is_ascii_digit() {
|
||||
l.advance();
|
||||
}
|
||||
|
||||
if l.current() == '.' {
|
||||
let dot = l.position;
|
||||
l.advance();
|
||||
if !l.current().is_ascii_digit() {
|
||||
l.emit(Diagnostic::error(
|
||||
Span::new(dot, l.position),
|
||||
"there must be at least a single digit after the decimal point",
|
||||
));
|
||||
}
|
||||
while l.current().is_ascii_digit() {
|
||||
l.advance();
|
||||
}
|
||||
}
|
||||
|
||||
TokenKind::Number
|
||||
}
|
||||
|
||||
// NOTE: You shouldn't expect that the color literals produced by the lexer are parsable.
|
||||
fn color(l: &mut Lexer<'_>) -> TokenKind {
|
||||
let hash = l.position;
|
||||
l.advance(); // #
|
||||
|
||||
if !l.current().is_ascii_hexdigit() {
|
||||
l.emit(Diagnostic::error(
|
||||
Span::new(hash, l.position),
|
||||
"hex digits expected after `#` (color literal)",
|
||||
));
|
||||
}
|
||||
|
||||
let start = l.position;
|
||||
while l.current().is_ascii_hexdigit() {
|
||||
l.advance();
|
||||
}
|
||||
let len = l.position - start;
|
||||
|
||||
if !matches!(len, 3 | 4 | 6 | 8) {
|
||||
l.emit(Diagnostic::error(Span::new(hash, l.position), "incorrect number of digits in color literal (must be #RGB, #RGBA, #RRGGBB, or #RRGGBBAA)"));
|
||||
}
|
||||
|
||||
TokenKind::Color
|
||||
}
|
||||
|
||||
fn whitespace_and_comments(l: &mut Lexer<'_>) {
|
||||
loop {
|
||||
match l.current() {
|
||||
'-' => {
|
||||
let position = l.position;
|
||||
l.advance();
|
||||
if l.current() == '-' {
|
||||
while l.current() != '\n' {
|
||||
l.advance();
|
||||
}
|
||||
} else {
|
||||
// An unfortunate little bit of backtracking here;
|
||||
// This seems like the simplest possible solution though.
|
||||
// We don't treat comments as a separate token to simplify the parsing phase,
|
||||
// and because of this, handling this at the "real" token level would complicate
|
||||
// things quite a bit.
|
||||
l.position = position;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
' ' | '\r' | '\t' => l.advance(),
|
||||
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn newline(l: &mut Lexer<'_>) -> (TokenKind, Span) {
|
||||
let start = l.position;
|
||||
l.advance(); // skip the initial newline
|
||||
let end = l.position;
|
||||
|
||||
// Skip additional newlines after this one, to only produce one token.
|
||||
// These do not count into this newline's span though.
|
||||
loop {
|
||||
whitespace_and_comments(l);
|
||||
if l.current() == '\n' {
|
||||
l.advance();
|
||||
continue;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
(TokenKind::Newline, Span::new(start, end))
|
||||
}
|
||||
|
||||
fn token(l: &mut Lexer<'_>) -> (TokenKind, Span) {
|
||||
whitespace_and_comments(l);
|
||||
|
||||
let start = l.position;
|
||||
let kind = match l.current() {
|
||||
'\0' => TokenKind::Eof,
|
||||
|
||||
// NOTE: Order matters here. Numbers and tags take priority over identifers.
|
||||
c if c.is_ascii_uppercase() => tag(l),
|
||||
c if c.is_ascii_digit() => number(l),
|
||||
c if is_ident_char(c) => ident(l),
|
||||
|
||||
'#' => color(l),
|
||||
|
||||
'+' => one(l, TokenKind::Plus),
|
||||
'-' => one_or_two(l, TokenKind::Minus, '>', TokenKind::RArrow),
|
||||
'*' => one(l, TokenKind::Star),
|
||||
'/' => one(l, TokenKind::Slash),
|
||||
'=' => one_or_two(l, TokenKind::Equal, '=', TokenKind::EqualEqual),
|
||||
'!' => one_or_two(l, TokenKind::Not, '=', TokenKind::NotEqual),
|
||||
'<' => one_or_two(l, TokenKind::Less, '=', TokenKind::LessEqual),
|
||||
'>' => one_or_two(l, TokenKind::Greater, '=', TokenKind::GreaterEqual),
|
||||
|
||||
'\n' => return newline(l),
|
||||
'(' => one(l, TokenKind::LParen),
|
||||
')' => one(l, TokenKind::RParen),
|
||||
'[' => one(l, TokenKind::LBrack),
|
||||
']' => one(l, TokenKind::RBrack),
|
||||
',' => one(l, TokenKind::Comma),
|
||||
'\\' => one(l, TokenKind::Backslash),
|
||||
|
||||
_ => {
|
||||
l.advance();
|
||||
l.emit(Diagnostic::error(
|
||||
Span::new(start, l.position),
|
||||
"unexpected character",
|
||||
));
|
||||
TokenKind::Error
|
||||
}
|
||||
};
|
||||
let end = l.position;
|
||||
(kind, Span::new(start, end))
|
||||
}
|
||||
|
||||
pub fn lex(l: &mut Lexer<'_>) -> Result<(), TokenAllocError> {
|
||||
loop {
|
||||
let (kind, span) = token(l);
|
||||
l.lexis.push(kind, span)?;
|
||||
if kind == TokenKind::Eof {
|
||||
break;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
|
@ -2,10 +2,15 @@
|
|||
|
||||
extern crate alloc;
|
||||
|
||||
pub mod ast;
|
||||
pub mod bytecode;
|
||||
pub mod compiler;
|
||||
pub mod diagnostic;
|
||||
pub mod lexer;
|
||||
pub mod parser;
|
||||
pub mod render;
|
||||
pub mod sexp;
|
||||
pub mod source;
|
||||
pub mod system;
|
||||
pub mod token;
|
||||
pub mod value;
|
||||
pub mod vm;
|
||||
|
|
607
crates/haku/src/parser.rs
Normal file
607
crates/haku/src/parser.rs
Normal file
|
@ -0,0 +1,607 @@
|
|||
use core::cell::Cell;
|
||||
|
||||
use alloc::vec::Vec;
|
||||
|
||||
use crate::{
|
||||
ast::{Ast, NodeAllocError, NodeId, NodeKind},
|
||||
diagnostic::Diagnostic,
|
||||
source::Span,
|
||||
token::{Lexis, TokenKind, TokenKindSet},
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct ParserLimits {
|
||||
pub max_events: usize,
|
||||
}
|
||||
|
||||
pub struct Parser<'a> {
|
||||
tokens: &'a Lexis,
|
||||
events: Vec<Event>,
|
||||
position: u32,
|
||||
fuel: Cell<u32>,
|
||||
pub diagnostics: Vec<Diagnostic>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum Event {
|
||||
Open { kind: NodeKind },
|
||||
Close,
|
||||
Advance,
|
||||
}
|
||||
|
||||
struct Open {
|
||||
index: Option<usize>,
|
||||
}
|
||||
|
||||
struct Closed {
|
||||
index: Option<usize>,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
const FUEL: u32 = 256;
|
||||
|
||||
pub fn new(input: &'a Lexis, limits: &ParserLimits) -> Self {
|
||||
assert!(limits.max_events < u32::MAX as usize);
|
||||
|
||||
Self {
|
||||
tokens: input,
|
||||
events: Vec::with_capacity(limits.max_events),
|
||||
position: 0,
|
||||
diagnostics: Vec::with_capacity(16),
|
||||
fuel: Cell::new(Self::FUEL),
|
||||
}
|
||||
}
|
||||
|
||||
fn event(&mut self, event: Event) -> Option<usize> {
|
||||
if self.events.len() < self.events.capacity() {
|
||||
let index = self.events.len();
|
||||
self.events.push(event);
|
||||
Some(index)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn open(&mut self) -> Open {
|
||||
Open {
|
||||
index: self.event(Event::Open {
|
||||
kind: NodeKind::Error,
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
fn open_before(&mut self, closed: Closed) -> Open {
|
||||
if let Some(index) = closed.index {
|
||||
if self.events.len() < self.events.capacity() {
|
||||
self.events.insert(
|
||||
index,
|
||||
Event::Open {
|
||||
kind: NodeKind::Error,
|
||||
},
|
||||
);
|
||||
return Open { index: Some(index) };
|
||||
}
|
||||
}
|
||||
Open { index: None }
|
||||
}
|
||||
|
||||
fn close(&mut self, open: Open, kind: NodeKind) -> Closed {
|
||||
if let Some(index) = open.index {
|
||||
self.events[index] = Event::Open { kind };
|
||||
self.event(Event::Close);
|
||||
Closed { index: Some(index) }
|
||||
} else {
|
||||
Closed { index: None }
|
||||
}
|
||||
}
|
||||
|
||||
fn is_eof(&self) -> bool {
|
||||
self.peek() == TokenKind::Eof
|
||||
}
|
||||
|
||||
fn advance(&mut self) {
|
||||
if !self.is_eof() {
|
||||
self.position += 1;
|
||||
self.event(Event::Advance);
|
||||
self.fuel.set(Self::FUEL);
|
||||
}
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn peek(&self) -> TokenKind {
|
||||
assert_ne!(self.fuel.get(), 0, "parser is stuck");
|
||||
self.fuel.set(self.fuel.get() - 1);
|
||||
|
||||
self.tokens.kind(self.position)
|
||||
}
|
||||
|
||||
fn span(&self) -> Span {
|
||||
self.tokens.span(self.position)
|
||||
}
|
||||
|
||||
fn emit(&mut self, diagnostic: Diagnostic) {
|
||||
if self.diagnostics.len() < self.diagnostics.capacity() {
|
||||
self.diagnostics.push(diagnostic);
|
||||
}
|
||||
}
|
||||
|
||||
fn advance_with_error(&mut self) -> Closed {
|
||||
let opened = self.open();
|
||||
self.advance();
|
||||
self.close(opened, NodeKind::Error)
|
||||
}
|
||||
|
||||
fn optional_newline(&mut self) -> bool {
|
||||
if self.peek() == TokenKind::Newline {
|
||||
self.advance();
|
||||
true
|
||||
} else {
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_ast(self, ast: &mut Ast) -> Result<(NodeId, Vec<Diagnostic>), NodeAllocError> {
|
||||
let mut token = 0;
|
||||
let mut events = self.events;
|
||||
let mut stack = Vec::new();
|
||||
|
||||
struct StackEntry {
|
||||
node_id: NodeId,
|
||||
// TODO: This should probably be optimized to use a shared stack.
|
||||
children: Vec<NodeId>,
|
||||
}
|
||||
|
||||
// Remove the last Close to keep a single node on the stack.
|
||||
assert!(matches!(events.pop(), Some(Event::Close)));
|
||||
|
||||
for event in events {
|
||||
match event {
|
||||
Event::Open { kind } => {
|
||||
stack.push(StackEntry {
|
||||
node_id: ast.alloc(kind, self.tokens.span(token))?,
|
||||
children: Vec::new(),
|
||||
});
|
||||
}
|
||||
Event::Close => {
|
||||
let end_span = self.tokens.span(token.saturating_sub(1));
|
||||
let stack_entry = stack.pop().unwrap();
|
||||
ast.alloc_children(stack_entry.node_id, &stack_entry.children);
|
||||
ast.extend_span(stack_entry.node_id, end_span.end);
|
||||
stack.last_mut().unwrap().children.push(stack_entry.node_id);
|
||||
}
|
||||
Event::Advance => {
|
||||
let span = self.tokens.span(token);
|
||||
let node_id = ast.alloc(NodeKind::Token, span)?;
|
||||
stack
|
||||
.last_mut()
|
||||
.expect("advance() may only be used in an open node")
|
||||
.children
|
||||
.push(node_id);
|
||||
token += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if stack.len() != 1 {
|
||||
// This means we had too many events emitted and they are no longer balanced.
|
||||
return Err(NodeAllocError);
|
||||
}
|
||||
// assert_eq!(token, self.tokens.len());
|
||||
|
||||
let end_span = self.tokens.span(token.saturating_sub(1));
|
||||
let stack_entry = stack.pop().unwrap();
|
||||
ast.alloc_children(stack_entry.node_id, &stack_entry.children);
|
||||
ast.extend_span(stack_entry.node_id, end_span.end);
|
||||
|
||||
Ok((stack_entry.node_id, self.diagnostics))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> core::fmt::Debug for Parser<'a> {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
f.debug_struct("Parser")
|
||||
.field("events", &self.events)
|
||||
.finish_non_exhaustive()
|
||||
}
|
||||
}
|
||||
|
||||
enum Tighter {
|
||||
Left,
|
||||
Right,
|
||||
}
|
||||
|
||||
fn tighter(left: TokenKind, right: TokenKind) -> Tighter {
|
||||
fn tightness(kind: TokenKind) -> Option<usize> {
|
||||
match kind {
|
||||
TokenKind::Equal => Some(0),
|
||||
TokenKind::EqualEqual
|
||||
| TokenKind::NotEqual
|
||||
| TokenKind::Less
|
||||
| TokenKind::LessEqual
|
||||
| TokenKind::Greater
|
||||
| TokenKind::GreaterEqual => Some(1),
|
||||
TokenKind::Plus | TokenKind::Minus => Some(2),
|
||||
TokenKind::Star | TokenKind::Slash => Some(3),
|
||||
_ if PREFIX_TOKENS.contains(kind) => Some(4),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
let Some(right_tightness) = tightness(right) else {
|
||||
return Tighter::Left;
|
||||
};
|
||||
let Some(left_tightness) = tightness(left) else {
|
||||
assert!(left == TokenKind::Eof);
|
||||
return Tighter::Right;
|
||||
};
|
||||
|
||||
if right_tightness > left_tightness {
|
||||
Tighter::Right
|
||||
} else {
|
||||
Tighter::Left
|
||||
}
|
||||
}
|
||||
|
||||
fn precedence_parse(p: &mut Parser, left: TokenKind) {
|
||||
let mut lhs = prefix(p);
|
||||
|
||||
loop {
|
||||
let right = p.peek();
|
||||
match tighter(left, right) {
|
||||
Tighter::Left => break,
|
||||
Tighter::Right => {
|
||||
let o = p.open_before(lhs);
|
||||
let kind = infix(p, right);
|
||||
lhs = p.close(o, kind);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn one(p: &mut Parser, kind: NodeKind) -> Closed {
|
||||
let o = p.open();
|
||||
p.advance();
|
||||
p.close(o, kind)
|
||||
}
|
||||
|
||||
fn list(p: &mut Parser) -> Closed {
|
||||
let o = p.open();
|
||||
let lspan = p.span();
|
||||
p.advance(); // [
|
||||
p.optional_newline();
|
||||
|
||||
loop {
|
||||
match p.peek() {
|
||||
TokenKind::Eof => {
|
||||
p.emit(Diagnostic::error(lspan, "missing `]` to close this list"));
|
||||
break;
|
||||
}
|
||||
|
||||
TokenKind::RBrack => {
|
||||
p.advance();
|
||||
break;
|
||||
}
|
||||
|
||||
_ => (),
|
||||
}
|
||||
|
||||
expr(p);
|
||||
|
||||
match p.peek() {
|
||||
TokenKind::Comma | TokenKind::Newline => {
|
||||
p.advance();
|
||||
continue;
|
||||
}
|
||||
|
||||
TokenKind::RBrack => {
|
||||
p.advance();
|
||||
break;
|
||||
}
|
||||
|
||||
_ => {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"comma `,` or new line expected after list element",
|
||||
));
|
||||
p.advance_with_error();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
p.close(o, NodeKind::List)
|
||||
}
|
||||
|
||||
fn unary(p: &mut Parser) -> Closed {
|
||||
let o = p.open();
|
||||
|
||||
let op = p.open();
|
||||
p.advance();
|
||||
p.close(op, NodeKind::Op);
|
||||
|
||||
prefix(p);
|
||||
|
||||
p.close(o, NodeKind::Unary)
|
||||
}
|
||||
|
||||
fn paren(p: &mut Parser) -> Closed {
|
||||
let o = p.open();
|
||||
let lspan = p.span();
|
||||
p.advance(); // (
|
||||
if p.peek() == TokenKind::RParen {
|
||||
p.advance(); // )
|
||||
p.close(o, NodeKind::ParenEmpty)
|
||||
} else {
|
||||
p.optional_newline();
|
||||
expr(p);
|
||||
p.optional_newline();
|
||||
if p.peek() != TokenKind::RParen {
|
||||
p.emit(Diagnostic::error(lspan, "missing closing parenthesis `)`"));
|
||||
p.advance_with_error()
|
||||
} else {
|
||||
p.advance();
|
||||
p.close(o, NodeKind::Paren)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn param(p: &mut Parser) {
|
||||
let o = p.open();
|
||||
|
||||
if let TokenKind::Ident | TokenKind::Underscore = p.peek() {
|
||||
p.advance();
|
||||
} else {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"parameter names must be identifiers or `_`",
|
||||
));
|
||||
p.advance_with_error();
|
||||
}
|
||||
|
||||
p.close(o, NodeKind::Param);
|
||||
}
|
||||
|
||||
fn lambda(p: &mut Parser) -> Closed {
|
||||
let o = p.open();
|
||||
p.advance(); // backslash
|
||||
|
||||
let params = p.open();
|
||||
loop {
|
||||
param(p);
|
||||
match p.peek() {
|
||||
TokenKind::Comma => {
|
||||
p.advance();
|
||||
continue;
|
||||
}
|
||||
|
||||
TokenKind::RArrow => break,
|
||||
|
||||
_ => {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"`,` or `->` expected after function parameter",
|
||||
));
|
||||
p.advance_with_error();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
p.close(params, NodeKind::Params);
|
||||
|
||||
// NOTE: Can be false if there are some stray tokens.
|
||||
// We prefer to bail early and let the rest of the program parse.
|
||||
if p.peek() == TokenKind::RArrow {
|
||||
p.advance();
|
||||
p.optional_newline();
|
||||
expr(p);
|
||||
}
|
||||
|
||||
p.close(o, NodeKind::Lambda)
|
||||
}
|
||||
|
||||
fn if_expr(p: &mut Parser) -> Closed {
|
||||
let o = p.open();
|
||||
|
||||
p.advance(); // if
|
||||
if p.peek() != TokenKind::LParen {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"the condition in an `if` expression must be surrounded with parentheses",
|
||||
));
|
||||
// NOTE: Don't advance, it's more likely the programmer expected no parentheses to be needed.
|
||||
}
|
||||
p.advance();
|
||||
expr(p); // Condition
|
||||
if p.peek() != TokenKind::RParen {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"missing closing parenthesis after `if` condition",
|
||||
));
|
||||
}
|
||||
p.advance();
|
||||
p.optional_newline();
|
||||
|
||||
expr(p); // True branch
|
||||
p.optional_newline();
|
||||
|
||||
if p.peek() != TokenKind::Else {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"`if` expression is missing an `else` clause",
|
||||
));
|
||||
}
|
||||
p.advance();
|
||||
p.optional_newline();
|
||||
|
||||
expr(p); // False branch
|
||||
|
||||
p.close(o, NodeKind::If)
|
||||
}
|
||||
|
||||
fn let_expr(p: &mut Parser) -> Closed {
|
||||
let o = p.open();
|
||||
|
||||
p.advance(); // let
|
||||
|
||||
if p.peek() == TokenKind::Ident {
|
||||
let ident = p.open();
|
||||
p.advance();
|
||||
p.close(ident, NodeKind::Ident);
|
||||
} else {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(span, "`let` variable name expected"));
|
||||
p.advance_with_error();
|
||||
}
|
||||
|
||||
if p.peek() == TokenKind::Equal {
|
||||
p.advance();
|
||||
} else {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(span, "`=` expected after variable name"));
|
||||
p.advance_with_error();
|
||||
}
|
||||
|
||||
expr(p);
|
||||
|
||||
if p.peek() == TokenKind::Newline {
|
||||
p.advance();
|
||||
} else {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"new line expected after `let` expression",
|
||||
));
|
||||
p.advance_with_error();
|
||||
}
|
||||
|
||||
expr(p);
|
||||
|
||||
p.close(o, NodeKind::Let)
|
||||
}
|
||||
|
||||
const PREFIX_TOKENS: TokenKindSet = TokenKindSet::new(&[
|
||||
TokenKind::Ident,
|
||||
TokenKind::Tag,
|
||||
TokenKind::Number,
|
||||
TokenKind::Color,
|
||||
// NOTE: This is ambiguous in function calls.
|
||||
// In that case, the infix operator takes precedence (because the `match` arms for the infix op
|
||||
// come first.)
|
||||
TokenKind::Minus,
|
||||
TokenKind::Not,
|
||||
TokenKind::LParen,
|
||||
TokenKind::Backslash,
|
||||
TokenKind::If,
|
||||
TokenKind::Let,
|
||||
TokenKind::LBrack,
|
||||
]);
|
||||
|
||||
fn prefix(p: &mut Parser) -> Closed {
|
||||
match p.peek() {
|
||||
TokenKind::Ident => one(p, NodeKind::Ident),
|
||||
TokenKind::Tag => one(p, NodeKind::Tag),
|
||||
TokenKind::Number => one(p, NodeKind::Number),
|
||||
TokenKind::Color => one(p, NodeKind::Color),
|
||||
TokenKind::LBrack => list(p),
|
||||
|
||||
TokenKind::Minus | TokenKind::Not => unary(p),
|
||||
TokenKind::LParen => paren(p),
|
||||
TokenKind::Backslash => lambda(p),
|
||||
TokenKind::If => if_expr(p),
|
||||
TokenKind::Let => let_expr(p),
|
||||
|
||||
_ => {
|
||||
assert!(
|
||||
!PREFIX_TOKENS.contains(p.peek()),
|
||||
"{:?} found in PREFIX_TOKENS",
|
||||
p.peek()
|
||||
);
|
||||
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"an expression was expected, but this token does not start one",
|
||||
));
|
||||
p.advance_with_error()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn infix(p: &mut Parser, op: TokenKind) -> NodeKind {
|
||||
match op {
|
||||
TokenKind::Plus
|
||||
| TokenKind::Minus
|
||||
| TokenKind::Star
|
||||
| TokenKind::Slash
|
||||
| TokenKind::EqualEqual
|
||||
| TokenKind::NotEqual
|
||||
| TokenKind::Less
|
||||
| TokenKind::LessEqual
|
||||
| TokenKind::Greater
|
||||
| TokenKind::GreaterEqual
|
||||
| TokenKind::Equal => infix_binary(p, op),
|
||||
|
||||
_ if PREFIX_TOKENS.contains(op) => infix_call(p),
|
||||
|
||||
_ => panic!("unhandled infix operator {op:?}"),
|
||||
}
|
||||
}
|
||||
|
||||
fn infix_binary(p: &mut Parser, op: TokenKind) -> NodeKind {
|
||||
let o = p.open();
|
||||
p.advance();
|
||||
p.close(o, NodeKind::Op);
|
||||
|
||||
if p.peek() == TokenKind::Newline {
|
||||
p.advance();
|
||||
}
|
||||
|
||||
precedence_parse(p, op);
|
||||
NodeKind::Binary
|
||||
}
|
||||
|
||||
fn infix_call(p: &mut Parser) -> NodeKind {
|
||||
while PREFIX_TOKENS.contains(p.peek()) {
|
||||
prefix(p);
|
||||
}
|
||||
|
||||
NodeKind::Call
|
||||
}
|
||||
|
||||
pub fn expr(p: &mut Parser) {
|
||||
precedence_parse(p, TokenKind::Eof)
|
||||
}
|
||||
|
||||
pub fn toplevel(p: &mut Parser) {
|
||||
let o = p.open();
|
||||
p.optional_newline();
|
||||
while p.peek() != TokenKind::Eof {
|
||||
expr(p);
|
||||
|
||||
match p.peek() {
|
||||
TokenKind::Newline => {
|
||||
p.advance();
|
||||
continue;
|
||||
}
|
||||
|
||||
TokenKind::Eof => break,
|
||||
|
||||
_ => {
|
||||
let span = p.span();
|
||||
p.emit(Diagnostic::error(
|
||||
span,
|
||||
"newline expected after toplevel expression",
|
||||
))
|
||||
}
|
||||
}
|
||||
}
|
||||
p.close(o, NodeKind::Toplevel);
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests;
|
912
crates/haku/src/parser/tests.rs
Normal file
912
crates/haku/src/parser/tests.rs
Normal file
|
@ -0,0 +1,912 @@
|
|||
use alloc::{format, string::String};
|
||||
|
||||
use crate::{
|
||||
ast::{dump::dump, Ast, NodeId},
|
||||
lexer::{lex, Lexer},
|
||||
parser::expr,
|
||||
source::SourceCode,
|
||||
token::Lexis,
|
||||
};
|
||||
|
||||
use super::{toplevel, Parser, ParserLimits};
|
||||
|
||||
fn parse(s: &str, f: fn(&mut Parser)) -> (Ast, NodeId) {
|
||||
let mut lexer = Lexer::new(Lexis::new(1024), SourceCode::unlimited_len(s));
|
||||
lex(&mut lexer).expect("too many tokens");
|
||||
|
||||
let mut parser = Parser::new(&lexer.lexis, &ParserLimits { max_events: 1024 });
|
||||
f(&mut parser);
|
||||
|
||||
if !parser.diagnostics.is_empty() {
|
||||
panic!("parser emitted diagnostics: {:#?}", parser.diagnostics);
|
||||
}
|
||||
|
||||
let mut ast = Ast::new(1024);
|
||||
let (root, _) = parser.into_ast(&mut ast).unwrap();
|
||||
(ast, root)
|
||||
}
|
||||
|
||||
fn ast(s: &str, f: fn(&mut Parser)) -> String {
|
||||
let (ast, root) = parse(s, f);
|
||||
// The extra newline is mostly so that it's easier to make the string literals look nice.
|
||||
format!("\n{}", dump(&ast, root, None))
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
fn assert_ast_eq(s: &str, f: fn(&mut Parser), ast_s: &str) {
|
||||
let got = ast(s, f);
|
||||
if ast_s != got {
|
||||
panic!("AST mismatch. expected:\n{ast_s}\n\ngot:\n{got}\n");
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn one_literals() {
|
||||
assert_ast_eq(
|
||||
"1",
|
||||
expr,
|
||||
"
|
||||
Number @ 0..1
|
||||
Token @ 0..1",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"ExampleTag123",
|
||||
expr,
|
||||
"
|
||||
Tag @ 0..13
|
||||
Token @ 0..13",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"example_ident123",
|
||||
expr,
|
||||
"
|
||||
Ident @ 0..16
|
||||
Token @ 0..16",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"#000",
|
||||
expr,
|
||||
"
|
||||
Color @ 0..4
|
||||
Token @ 0..4",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"#000F",
|
||||
expr,
|
||||
"
|
||||
Color @ 0..5
|
||||
Token @ 0..5",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"#058EF0",
|
||||
expr,
|
||||
"
|
||||
Color @ 0..7
|
||||
Token @ 0..7",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"#058EF0FF",
|
||||
expr,
|
||||
"
|
||||
Color @ 0..9
|
||||
Token @ 0..9",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn list() {
|
||||
assert_ast_eq(
|
||||
"[]",
|
||||
expr,
|
||||
"
|
||||
List @ 0..2
|
||||
Token @ 0..1
|
||||
Token @ 1..2",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"[1]",
|
||||
expr,
|
||||
"
|
||||
List @ 0..3
|
||||
Token @ 0..1
|
||||
Number @ 1..2
|
||||
Token @ 1..2
|
||||
Token @ 2..3",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"[1, 2]",
|
||||
expr,
|
||||
"
|
||||
List @ 0..6
|
||||
Token @ 0..1
|
||||
Number @ 1..2
|
||||
Token @ 1..2
|
||||
Token @ 2..3
|
||||
Number @ 4..5
|
||||
Token @ 4..5
|
||||
Token @ 5..6",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"[
|
||||
1
|
||||
2
|
||||
]",
|
||||
expr,
|
||||
"
|
||||
List @ 0..42
|
||||
Token @ 0..1
|
||||
Token @ 1..2
|
||||
Number @ 15..16
|
||||
Token @ 15..16
|
||||
Token @ 16..17
|
||||
Number @ 30..31
|
||||
Token @ 30..31
|
||||
Token @ 31..32
|
||||
Token @ 41..42",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn unary() {
|
||||
assert_ast_eq(
|
||||
"-1",
|
||||
expr,
|
||||
"
|
||||
Unary @ 0..2
|
||||
Op @ 0..1
|
||||
Token @ 0..1
|
||||
Number @ 1..2
|
||||
Token @ 1..2",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"!1",
|
||||
expr,
|
||||
"
|
||||
Unary @ 0..2
|
||||
Op @ 0..1
|
||||
Token @ 0..1
|
||||
Number @ 1..2
|
||||
Token @ 1..2",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn binary_single() {
|
||||
assert_ast_eq(
|
||||
"1 + 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..5
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Number @ 4..5
|
||||
Token @ 4..5",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 - 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..5
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Number @ 4..5
|
||||
Token @ 4..5",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 * 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..5
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Number @ 4..5
|
||||
Token @ 4..5",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 / 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..5
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Number @ 4..5
|
||||
Token @ 4..5",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 < 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..5
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Number @ 4..5
|
||||
Token @ 4..5",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 > 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..5
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Number @ 4..5
|
||||
Token @ 4..5",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 == 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..6
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..4
|
||||
Token @ 2..4
|
||||
Number @ 5..6
|
||||
Token @ 5..6",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 != 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..6
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..4
|
||||
Token @ 2..4
|
||||
Number @ 5..6
|
||||
Token @ 5..6",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 <= 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..6
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..4
|
||||
Token @ 2..4
|
||||
Number @ 5..6
|
||||
Token @ 5..6",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 >= 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..6
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..4
|
||||
Token @ 2..4
|
||||
Number @ 5..6
|
||||
Token @ 5..6",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 = 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..5
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Number @ 4..5
|
||||
Token @ 4..5",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn binary_precedence() {
|
||||
assert_ast_eq(
|
||||
"1 + 1 + 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..9
|
||||
Binary @ 0..5
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Number @ 4..5
|
||||
Token @ 4..5
|
||||
Op @ 6..7
|
||||
Token @ 6..7
|
||||
Number @ 8..9
|
||||
Token @ 8..9",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 * 1 + 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..9
|
||||
Binary @ 0..5
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Number @ 4..5
|
||||
Token @ 4..5
|
||||
Op @ 6..7
|
||||
Token @ 6..7
|
||||
Number @ 8..9
|
||||
Token @ 8..9",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 + 1 * 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..9
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Binary @ 4..9
|
||||
Number @ 4..5
|
||||
Token @ 4..5
|
||||
Op @ 6..7
|
||||
Token @ 6..7
|
||||
Number @ 8..9
|
||||
Token @ 8..9",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 < 1 + 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..9
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Binary @ 4..9
|
||||
Number @ 4..5
|
||||
Token @ 4..5
|
||||
Op @ 6..7
|
||||
Token @ 6..7
|
||||
Number @ 8..9
|
||||
Token @ 8..9",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 + 1 < 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..9
|
||||
Binary @ 0..5
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Number @ 4..5
|
||||
Token @ 4..5
|
||||
Op @ 6..7
|
||||
Token @ 6..7
|
||||
Number @ 8..9
|
||||
Token @ 8..9",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 + 1 * 1 < 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..13
|
||||
Binary @ 0..9
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Binary @ 4..9
|
||||
Number @ 4..5
|
||||
Token @ 4..5
|
||||
Op @ 6..7
|
||||
Token @ 6..7
|
||||
Number @ 8..9
|
||||
Token @ 8..9
|
||||
Op @ 10..11
|
||||
Token @ 10..11
|
||||
Number @ 12..13
|
||||
Token @ 12..13",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 * 1 + 1 < 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..13
|
||||
Binary @ 0..9
|
||||
Binary @ 0..5
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Number @ 4..5
|
||||
Token @ 4..5
|
||||
Op @ 6..7
|
||||
Token @ 6..7
|
||||
Number @ 8..9
|
||||
Token @ 8..9
|
||||
Op @ 10..11
|
||||
Token @ 10..11
|
||||
Number @ 12..13
|
||||
Token @ 12..13",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn binary_cont() {
|
||||
assert_ast_eq(
|
||||
"1 +
|
||||
1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..16
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Token @ 3..4
|
||||
Number @ 15..16
|
||||
Token @ 15..16",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 +
|
||||
|
||||
1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..17
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Token @ 3..4
|
||||
Number @ 16..17
|
||||
Token @ 16..17",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn paren_empty() {
|
||||
assert_ast_eq(
|
||||
"()",
|
||||
expr,
|
||||
"
|
||||
ParenEmpty @ 0..2
|
||||
Token @ 0..1
|
||||
Token @ 1..2",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn paren() {
|
||||
assert_ast_eq(
|
||||
"(1)",
|
||||
expr,
|
||||
"
|
||||
Paren @ 0..3
|
||||
Token @ 0..1
|
||||
Number @ 1..2
|
||||
Token @ 1..2
|
||||
Token @ 2..3",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"(1 + 1) * 1",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..11
|
||||
Paren @ 0..7
|
||||
Token @ 0..1
|
||||
Binary @ 1..6
|
||||
Number @ 1..2
|
||||
Token @ 1..2
|
||||
Op @ 3..4
|
||||
Token @ 3..4
|
||||
Number @ 5..6
|
||||
Token @ 5..6
|
||||
Token @ 6..7
|
||||
Op @ 8..9
|
||||
Token @ 8..9
|
||||
Number @ 10..11
|
||||
Token @ 10..11",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"1 * (1 + 1)",
|
||||
expr,
|
||||
"
|
||||
Binary @ 0..11
|
||||
Number @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Paren @ 4..11
|
||||
Token @ 4..5
|
||||
Binary @ 5..10
|
||||
Number @ 5..6
|
||||
Token @ 5..6
|
||||
Op @ 7..8
|
||||
Token @ 7..8
|
||||
Number @ 9..10
|
||||
Token @ 9..10
|
||||
Token @ 10..11",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"(
|
||||
1 +
|
||||
1
|
||||
)",
|
||||
expr,
|
||||
"
|
||||
Paren @ 0..47
|
||||
Token @ 0..1
|
||||
Token @ 1..2
|
||||
Binary @ 15..33
|
||||
Number @ 15..16
|
||||
Token @ 15..16
|
||||
Op @ 17..18
|
||||
Token @ 17..18
|
||||
Token @ 18..19
|
||||
Number @ 32..33
|
||||
Token @ 32..33
|
||||
Token @ 36..37
|
||||
Token @ 46..47",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn infix_call() {
|
||||
assert_ast_eq(
|
||||
"f x y",
|
||||
toplevel,
|
||||
"
|
||||
Toplevel @ 0..5
|
||||
Call @ 0..5
|
||||
Ident @ 0..1
|
||||
Token @ 0..1
|
||||
Ident @ 2..3
|
||||
Token @ 2..3
|
||||
Ident @ 4..5
|
||||
Token @ 4..5",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"sin 1 + cos 2",
|
||||
toplevel,
|
||||
"
|
||||
Toplevel @ 0..13
|
||||
Binary @ 0..13
|
||||
Call @ 0..5
|
||||
Ident @ 0..3
|
||||
Token @ 0..3
|
||||
Number @ 4..5
|
||||
Token @ 4..5
|
||||
Op @ 6..7
|
||||
Token @ 6..7
|
||||
Call @ 8..13
|
||||
Ident @ 8..11
|
||||
Token @ 8..11
|
||||
Number @ 12..13
|
||||
Token @ 12..13",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn infix_call_unary_arg() {
|
||||
assert_ast_eq(
|
||||
// NOTE: The whitespace here is misleading.
|
||||
// This is a binary `-`.
|
||||
"f -1",
|
||||
toplevel,
|
||||
"
|
||||
Toplevel @ 0..4
|
||||
Binary @ 0..4
|
||||
Ident @ 0..1
|
||||
Token @ 0..1
|
||||
Op @ 2..3
|
||||
Token @ 2..3
|
||||
Number @ 3..4
|
||||
Token @ 3..4",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
"f (-1)",
|
||||
toplevel,
|
||||
"
|
||||
Toplevel @ 0..6
|
||||
Call @ 0..6
|
||||
Ident @ 0..1
|
||||
Token @ 0..1
|
||||
Paren @ 2..6
|
||||
Token @ 2..3
|
||||
Unary @ 3..5
|
||||
Op @ 3..4
|
||||
Token @ 3..4
|
||||
Number @ 4..5
|
||||
Token @ 4..5
|
||||
Token @ 5..6",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn lambda() {
|
||||
assert_ast_eq(
|
||||
r#" \_ -> () "#,
|
||||
toplevel,
|
||||
"
|
||||
Toplevel @ 1..9
|
||||
Lambda @ 1..9
|
||||
Token @ 1..2
|
||||
Params @ 2..3
|
||||
Param @ 2..3
|
||||
Token @ 2..3
|
||||
Token @ 4..6
|
||||
ParenEmpty @ 7..9
|
||||
Token @ 7..8
|
||||
Token @ 8..9",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
r#" \x -> x "#,
|
||||
toplevel,
|
||||
"
|
||||
Toplevel @ 1..8
|
||||
Lambda @ 1..8
|
||||
Token @ 1..2
|
||||
Params @ 2..3
|
||||
Param @ 2..3
|
||||
Token @ 2..3
|
||||
Token @ 4..6
|
||||
Ident @ 7..8
|
||||
Token @ 7..8",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
r#" \x, y -> x + y "#,
|
||||
toplevel,
|
||||
"
|
||||
Toplevel @ 1..15
|
||||
Lambda @ 1..15
|
||||
Token @ 1..2
|
||||
Params @ 2..6
|
||||
Param @ 2..3
|
||||
Token @ 2..3
|
||||
Token @ 3..4
|
||||
Param @ 5..6
|
||||
Token @ 5..6
|
||||
Token @ 7..9
|
||||
Binary @ 10..15
|
||||
Ident @ 10..11
|
||||
Token @ 10..11
|
||||
Op @ 12..13
|
||||
Token @ 12..13
|
||||
Ident @ 14..15
|
||||
Token @ 14..15",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
r#" \x, y ->
|
||||
x + y "#,
|
||||
toplevel,
|
||||
"
|
||||
Toplevel @ 1..29
|
||||
Lambda @ 1..29
|
||||
Token @ 1..2
|
||||
Params @ 2..6
|
||||
Param @ 2..3
|
||||
Token @ 2..3
|
||||
Token @ 3..4
|
||||
Param @ 5..6
|
||||
Token @ 5..6
|
||||
Token @ 7..9
|
||||
Token @ 9..10
|
||||
Binary @ 24..29
|
||||
Ident @ 24..25
|
||||
Token @ 24..25
|
||||
Op @ 26..27
|
||||
Token @ 26..27
|
||||
Ident @ 28..29
|
||||
Token @ 28..29",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
r#" f \x -> g \y -> x + y "#,
|
||||
toplevel,
|
||||
"
|
||||
Toplevel @ 1..22
|
||||
Call @ 1..22
|
||||
Ident @ 1..2
|
||||
Token @ 1..2
|
||||
Lambda @ 3..22
|
||||
Token @ 3..4
|
||||
Params @ 4..5
|
||||
Param @ 4..5
|
||||
Token @ 4..5
|
||||
Token @ 6..8
|
||||
Call @ 9..22
|
||||
Ident @ 9..10
|
||||
Token @ 9..10
|
||||
Lambda @ 11..22
|
||||
Token @ 11..12
|
||||
Params @ 12..13
|
||||
Param @ 12..13
|
||||
Token @ 12..13
|
||||
Token @ 14..16
|
||||
Binary @ 17..22
|
||||
Ident @ 17..18
|
||||
Token @ 17..18
|
||||
Op @ 19..20
|
||||
Token @ 19..20
|
||||
Ident @ 21..22
|
||||
Token @ 21..22",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
r#" f \x ->
|
||||
g \y ->
|
||||
x + y "#,
|
||||
toplevel,
|
||||
"
|
||||
Toplevel @ 1..48
|
||||
Call @ 1..48
|
||||
Ident @ 1..2
|
||||
Token @ 1..2
|
||||
Lambda @ 3..48
|
||||
Token @ 3..4
|
||||
Params @ 4..5
|
||||
Param @ 4..5
|
||||
Token @ 4..5
|
||||
Token @ 6..8
|
||||
Token @ 8..9
|
||||
Call @ 21..48
|
||||
Ident @ 21..22
|
||||
Token @ 21..22
|
||||
Lambda @ 23..48
|
||||
Token @ 23..24
|
||||
Params @ 24..25
|
||||
Param @ 24..25
|
||||
Token @ 24..25
|
||||
Token @ 26..28
|
||||
Token @ 28..29
|
||||
Binary @ 43..48
|
||||
Ident @ 43..44
|
||||
Token @ 43..44
|
||||
Op @ 45..46
|
||||
Token @ 45..46
|
||||
Ident @ 47..48
|
||||
Token @ 47..48",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn if_expr() {
|
||||
assert_ast_eq(
|
||||
r#" if (true) 1 else 2 "#,
|
||||
toplevel,
|
||||
"
|
||||
Toplevel @ 1..19
|
||||
If @ 1..19
|
||||
Token @ 1..3
|
||||
Token @ 4..5
|
||||
Ident @ 5..9
|
||||
Token @ 5..9
|
||||
Token @ 9..10
|
||||
Number @ 11..12
|
||||
Token @ 11..12
|
||||
Token @ 13..17
|
||||
Number @ 18..19
|
||||
Token @ 18..19",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
r#" if (true)
|
||||
1
|
||||
else
|
||||
2 "#,
|
||||
toplevel,
|
||||
"
|
||||
Toplevel @ 1..63
|
||||
If @ 1..63
|
||||
Token @ 1..3
|
||||
Token @ 4..5
|
||||
Ident @ 5..9
|
||||
Token @ 5..9
|
||||
Token @ 9..10
|
||||
Token @ 10..11
|
||||
Number @ 27..28
|
||||
Token @ 27..28
|
||||
Token @ 28..29
|
||||
Token @ 41..45
|
||||
Token @ 45..46
|
||||
Number @ 62..63
|
||||
Token @ 62..63",
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn let_expr() {
|
||||
assert_ast_eq(
|
||||
r#" let x = 1
|
||||
x "#,
|
||||
toplevel,
|
||||
"
|
||||
Toplevel @ 1..24
|
||||
Let @ 1..24
|
||||
Token @ 1..4
|
||||
Ident @ 5..6
|
||||
Token @ 5..6
|
||||
Token @ 7..8
|
||||
Number @ 9..10
|
||||
Token @ 9..10
|
||||
Token @ 10..11
|
||||
Ident @ 23..24
|
||||
Token @ 23..24",
|
||||
);
|
||||
|
||||
assert_ast_eq(
|
||||
r#" let x = 1
|
||||
let y = 2
|
||||
x + y "#,
|
||||
toplevel,
|
||||
"
|
||||
Toplevel @ 1..50
|
||||
Let @ 1..50
|
||||
Token @ 1..4
|
||||
Ident @ 5..6
|
||||
Token @ 5..6
|
||||
Token @ 7..8
|
||||
Number @ 9..10
|
||||
Token @ 9..10
|
||||
Token @ 10..11
|
||||
Let @ 23..50
|
||||
Token @ 23..26
|
||||
Ident @ 27..28
|
||||
Token @ 27..28
|
||||
Token @ 29..30
|
||||
Number @ 31..32
|
||||
Token @ 31..32
|
||||
Token @ 32..33
|
||||
Binary @ 45..50
|
||||
Ident @ 45..46
|
||||
Token @ 45..46
|
||||
Op @ 47..48
|
||||
Token @ 47..48
|
||||
Ident @ 49..50
|
||||
Token @ 49..50",
|
||||
)
|
||||
}
|
|
@ -1,510 +0,0 @@
|
|||
use core::{cell::Cell, fmt, ops::Deref};
|
||||
|
||||
use alloc::vec::Vec;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct Span {
|
||||
pub start: usize,
|
||||
pub end: usize,
|
||||
}
|
||||
|
||||
impl Span {
|
||||
pub fn new(start: usize, end: usize) -> Self {
|
||||
Self { start, end }
|
||||
}
|
||||
|
||||
pub fn slice<'a>(&self, source: &'a SourceCode) -> &'a str {
|
||||
&source.code[self.start..self.end]
|
||||
}
|
||||
}
|
||||
|
||||
/// Source code string with a verified size limit.
|
||||
/// An exact size limit is not enforced by this type - it only ensures the string isn't longer than
|
||||
/// intended, to not stall the parser for an unexpected amount of time.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[repr(transparent)]
|
||||
pub struct SourceCode {
|
||||
code: str,
|
||||
}
|
||||
|
||||
impl SourceCode {
|
||||
pub fn limited_len(code: &str, max_len: usize) -> Option<&Self> {
|
||||
if code.len() <= max_len {
|
||||
Some(Self::unlimited_len(code))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unlimited_len(code: &str) -> &Self {
|
||||
// SAFETY: SourceCode is a transparent wrapper around str, so converting between them is safe.
|
||||
unsafe { core::mem::transmute(code) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for SourceCode {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.code
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct NodeId(usize);
|
||||
|
||||
impl NodeId {
|
||||
pub const NIL: NodeId = NodeId(0);
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum NodeKind {
|
||||
Nil,
|
||||
Eof,
|
||||
|
||||
// Atoms
|
||||
Ident,
|
||||
Number,
|
||||
|
||||
List(NodeId, NodeId),
|
||||
Toplevel(NodeId),
|
||||
|
||||
Error(&'static str),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct Node {
|
||||
pub span: Span,
|
||||
pub kind: NodeKind,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Ast {
|
||||
pub nodes: Vec<Node>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum AstWriteMode {
|
||||
Compact,
|
||||
Spans,
|
||||
}
|
||||
|
||||
impl Ast {
|
||||
pub fn new(capacity: usize) -> Self {
|
||||
assert!(capacity >= 1, "there must be space for at least a nil node");
|
||||
|
||||
let mut ast = Self {
|
||||
nodes: Vec::with_capacity(capacity),
|
||||
};
|
||||
|
||||
ast.alloc(Node {
|
||||
span: Span::new(0, 0),
|
||||
kind: NodeKind::Nil,
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
ast
|
||||
}
|
||||
|
||||
pub fn alloc(&mut self, node: Node) -> Result<NodeId, NodeAllocError> {
|
||||
if self.nodes.len() >= self.nodes.capacity() {
|
||||
return Err(NodeAllocError);
|
||||
}
|
||||
|
||||
let index = self.nodes.len();
|
||||
self.nodes.push(node);
|
||||
Ok(NodeId(index))
|
||||
}
|
||||
|
||||
pub fn get(&self, node_id: NodeId) -> &Node {
|
||||
&self.nodes[node_id.0]
|
||||
}
|
||||
|
||||
pub fn get_mut(&mut self, node_id: NodeId) -> &mut Node {
|
||||
&mut self.nodes[node_id.0]
|
||||
}
|
||||
|
||||
pub fn write(
|
||||
&self,
|
||||
source: &SourceCode,
|
||||
node_id: NodeId,
|
||||
w: &mut dyn fmt::Write,
|
||||
mode: AstWriteMode,
|
||||
) -> fmt::Result {
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn write_list(
|
||||
ast: &Ast,
|
||||
source: &SourceCode,
|
||||
w: &mut dyn fmt::Write,
|
||||
mode: AstWriteMode,
|
||||
mut head: NodeId,
|
||||
mut tail: NodeId,
|
||||
sep_element: &str,
|
||||
sep_tail: &str,
|
||||
) -> fmt::Result {
|
||||
loop {
|
||||
write_rec(ast, source, w, mode, head)?;
|
||||
match ast.get(tail).kind {
|
||||
NodeKind::Nil => break,
|
||||
NodeKind::List(head2, tail2) => {
|
||||
w.write_str(sep_element)?;
|
||||
(head, tail) = (head2, tail2);
|
||||
}
|
||||
_ => {
|
||||
w.write_str(sep_tail)?;
|
||||
write_rec(ast, source, w, mode, tail)?;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
// NOTE: Separated out to a separate function in case we ever want to introduce auto-indentation.
|
||||
fn write_rec(
|
||||
ast: &Ast,
|
||||
source: &SourceCode,
|
||||
w: &mut dyn fmt::Write,
|
||||
mode: AstWriteMode,
|
||||
node_id: NodeId,
|
||||
) -> fmt::Result {
|
||||
let node = ast.get(node_id);
|
||||
match &node.kind {
|
||||
NodeKind::Nil => write!(w, "()")?,
|
||||
NodeKind::Eof => write!(w, "<eof>")?,
|
||||
NodeKind::Ident | NodeKind::Number => write!(w, "{}", node.span.slice(source))?,
|
||||
|
||||
NodeKind::List(head, tail) => {
|
||||
w.write_char('(')?;
|
||||
write_list(ast, source, w, mode, *head, *tail, " ", " . ")?;
|
||||
w.write_char(')')?;
|
||||
}
|
||||
|
||||
NodeKind::Toplevel(list) => {
|
||||
let NodeKind::List(head, tail) = ast.get(*list).kind else {
|
||||
unreachable!("child of Toplevel must be a List");
|
||||
};
|
||||
|
||||
write_list(ast, source, w, mode, head, tail, "\n", " . ")?;
|
||||
}
|
||||
|
||||
NodeKind::Error(message) => write!(w, "#error({message})")?,
|
||||
}
|
||||
|
||||
if mode == AstWriteMode::Spans {
|
||||
write!(w, "@{}..{}", node.span.start, node.span.end)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
write_rec(self, source, w, mode, node_id)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||
pub struct NodeAllocError;
|
||||
|
||||
pub struct Parser<'a> {
|
||||
pub ast: Ast,
|
||||
input: &'a SourceCode,
|
||||
position: usize,
|
||||
fuel: Cell<usize>,
|
||||
alloc_error: NodeId,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
const FUEL: usize = 256;
|
||||
|
||||
pub fn new(mut ast: Ast, input: &'a SourceCode) -> Self {
|
||||
let alloc_error = ast
|
||||
.alloc(Node {
|
||||
span: Span::new(0, 0),
|
||||
kind: NodeKind::Error("program is too big"),
|
||||
})
|
||||
.expect("there is not enough space in the arena for an error node");
|
||||
|
||||
Self {
|
||||
ast,
|
||||
input,
|
||||
position: 0,
|
||||
fuel: Cell::new(Self::FUEL),
|
||||
alloc_error,
|
||||
}
|
||||
}
|
||||
|
||||
#[track_caller]
|
||||
pub fn current(&self) -> char {
|
||||
assert_ne!(self.fuel.get(), 0, "parser is stuck");
|
||||
self.fuel.set(self.fuel.get() - 1);
|
||||
|
||||
self.input[self.position..].chars().next().unwrap_or('\0')
|
||||
}
|
||||
|
||||
pub fn advance(&mut self) {
|
||||
self.position += self.current().len_utf8();
|
||||
self.fuel.set(Self::FUEL);
|
||||
}
|
||||
|
||||
pub fn alloc(&mut self, expr: Node) -> NodeId {
|
||||
self.ast.alloc(expr).unwrap_or(self.alloc_error)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn skip_whitespace_and_comments(p: &mut Parser<'_>) {
|
||||
loop {
|
||||
match p.current() {
|
||||
' ' | '\t' | '\n' => {
|
||||
p.advance();
|
||||
continue;
|
||||
}
|
||||
';' => {
|
||||
while p.current() != '\n' && p.current() != '\0' {
|
||||
p.advance();
|
||||
}
|
||||
}
|
||||
_ => break,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn is_decimal_digit(c: char) -> bool {
|
||||
c.is_ascii_digit()
|
||||
}
|
||||
|
||||
pub fn parse_number(p: &mut Parser<'_>) -> NodeKind {
|
||||
while is_decimal_digit(p.current()) {
|
||||
p.advance();
|
||||
}
|
||||
if p.current() == '.' {
|
||||
p.advance();
|
||||
if !is_decimal_digit(p.current()) {
|
||||
return NodeKind::Error("missing digits after decimal point '.' in number literal");
|
||||
}
|
||||
while is_decimal_digit(p.current()) {
|
||||
p.advance();
|
||||
}
|
||||
}
|
||||
|
||||
NodeKind::Number
|
||||
}
|
||||
|
||||
fn is_ident(c: char) -> bool {
|
||||
// The identifier character set is quite limited to help with easy expansion in the future.
|
||||
// Rationale:
|
||||
// - alphabet and digits are pretty obvious
|
||||
// - '-' and '_' can be used for identifier separators, whichever you prefer.
|
||||
// - '+', '-', '*', '/', '^' are for arithmetic.
|
||||
// - '=', '!', '<', '>' are fore comparison.
|
||||
// - '\' is for builtin string constants, such as \n.
|
||||
// For other operators, it's generally clearer to use words (such as `and` and `or`.)
|
||||
matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '-' | '_' | '+' | '*' | '/' | '\\' | '^' | '!' | '=' | '<' | '>')
|
||||
}
|
||||
|
||||
pub fn parse_ident(p: &mut Parser<'_>) -> NodeKind {
|
||||
while is_ident(p.current()) {
|
||||
p.advance();
|
||||
}
|
||||
|
||||
NodeKind::Ident
|
||||
}
|
||||
|
||||
struct List {
|
||||
head: NodeId,
|
||||
tail: NodeId,
|
||||
}
|
||||
|
||||
impl List {
|
||||
fn new() -> Self {
|
||||
Self {
|
||||
head: NodeId::NIL,
|
||||
tail: NodeId::NIL,
|
||||
}
|
||||
}
|
||||
|
||||
fn append(&mut self, p: &mut Parser<'_>, node: NodeId) {
|
||||
let node_span = p.ast.get(node).span;
|
||||
|
||||
let new_tail = p.alloc(Node {
|
||||
span: node_span,
|
||||
kind: NodeKind::List(node, NodeId::NIL),
|
||||
});
|
||||
if self.head == NodeId::NIL {
|
||||
self.head = new_tail;
|
||||
self.tail = new_tail;
|
||||
} else {
|
||||
let old_tail = p.ast.get_mut(self.tail);
|
||||
let NodeKind::List(expr_before, _) = old_tail.kind else {
|
||||
return;
|
||||
};
|
||||
*old_tail = Node {
|
||||
span: Span::new(old_tail.span.start, node_span.end),
|
||||
kind: NodeKind::List(expr_before, new_tail),
|
||||
};
|
||||
self.tail = new_tail;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse_list(p: &mut Parser<'_>) -> NodeId {
|
||||
// This could've been a lot simpler if Rust supported tail recursion.
|
||||
|
||||
let start = p.position;
|
||||
|
||||
p.advance(); // skip past opening parenthesis
|
||||
skip_whitespace_and_comments(p);
|
||||
|
||||
let mut list = List::new();
|
||||
|
||||
while p.current() != ')' {
|
||||
if p.current() == '\0' {
|
||||
return p.alloc(Node {
|
||||
span: Span::new(start, p.position),
|
||||
kind: NodeKind::Error("missing ')' to close '('"),
|
||||
});
|
||||
}
|
||||
|
||||
let expr = parse_expr(p);
|
||||
skip_whitespace_and_comments(p);
|
||||
|
||||
list.append(p, expr);
|
||||
}
|
||||
p.advance(); // skip past closing parenthesis
|
||||
|
||||
// If we didn't have any elements, we must not modify the initial Nil with ID 0.
|
||||
if list.head == NodeId::NIL {
|
||||
list.head = p.alloc(Node {
|
||||
span: Span::new(0, 0),
|
||||
kind: NodeKind::Nil,
|
||||
});
|
||||
}
|
||||
|
||||
let end = p.position;
|
||||
p.ast.get_mut(list.head).span = Span::new(start, end);
|
||||
|
||||
list.head
|
||||
}
|
||||
|
||||
pub fn parse_expr(p: &mut Parser<'_>) -> NodeId {
|
||||
let start = p.position;
|
||||
let kind = match p.current() {
|
||||
'\0' => NodeKind::Eof,
|
||||
c if is_decimal_digit(c) => parse_number(p),
|
||||
// NOTE: Because of the `match` order, this prevents identifiers from starting with a digit.
|
||||
c if is_ident(c) => parse_ident(p),
|
||||
'(' => return parse_list(p),
|
||||
_ => {
|
||||
p.advance();
|
||||
NodeKind::Error("unexpected character")
|
||||
}
|
||||
};
|
||||
let end = p.position;
|
||||
|
||||
p.alloc(Node {
|
||||
span: Span::new(start, end),
|
||||
kind,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn parse_toplevel(p: &mut Parser<'_>) -> NodeId {
|
||||
let start = p.position;
|
||||
|
||||
let mut nodes = List::new();
|
||||
|
||||
skip_whitespace_and_comments(p);
|
||||
while p.current() != '\0' {
|
||||
let expr = parse_expr(p);
|
||||
skip_whitespace_and_comments(p);
|
||||
|
||||
nodes.append(p, expr);
|
||||
}
|
||||
|
||||
let end = p.position;
|
||||
|
||||
p.alloc(Node {
|
||||
span: Span::new(start, end),
|
||||
kind: NodeKind::Toplevel(nodes.head),
|
||||
})
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use core::error::Error;
|
||||
|
||||
use alloc::{boxed::Box, string::String};
|
||||
|
||||
use super::*;
|
||||
|
||||
#[track_caller]
|
||||
fn parse(
|
||||
f: fn(&mut Parser<'_>) -> NodeId,
|
||||
source: &str,
|
||||
expected: &str,
|
||||
) -> Result<(), Box<dyn Error>> {
|
||||
let ast = Ast::new(16);
|
||||
let code = SourceCode::unlimited_len(source);
|
||||
let mut p = Parser::new(ast, code);
|
||||
let node = f(&mut p);
|
||||
let ast = p.ast;
|
||||
|
||||
let mut s = String::new();
|
||||
ast.write(code, node, &mut s, AstWriteMode::Spans)?;
|
||||
|
||||
assert_eq!(s, expected);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_number() -> Result<(), Box<dyn Error>> {
|
||||
parse(parse_expr, "123", "123@0..3")?;
|
||||
parse(parse_expr, "123.456", "123.456@0..7")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_ident() -> Result<(), Box<dyn Error>> {
|
||||
parse(parse_expr, "abc", "abc@0..3")?;
|
||||
parse(parse_expr, "abcABC_01234", "abcABC_01234@0..12")?;
|
||||
parse(parse_expr, "+-*/\\^!=<>", "+-*/\\^!=<>@0..10")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn parse_list() -> Result<(), Box<dyn Error>> {
|
||||
parse(parse_expr, "()", "()@0..2")?;
|
||||
parse(parse_expr, "(a a)", "(a@1..2 a@3..4)@0..5")?;
|
||||
parse(parse_expr, "(a a a)", "(a@1..2 a@3..4 a@5..6)@0..7")?;
|
||||
parse(parse_expr, "(() ())", "(()@1..3 ()@4..6)@0..7")?;
|
||||
parse(
|
||||
parse_expr,
|
||||
"(nestedy (nest OwO))",
|
||||
"(nestedy@1..8 (nest@10..14 OwO@15..18)@9..19)@0..20",
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn oom() -> Result<(), Box<dyn Error>> {
|
||||
parse(parse_expr, "(a a a a a a a a)", "(a@1..2 a@3..4 a@5..6 a@7..8 a@9..10 a@11..12 a@13..14 . #error(program is too big)@0..0)@0..17")?;
|
||||
parse(parse_expr, "(a a a a a a a a a)", "(a@1..2 a@3..4 a@5..6 a@7..8 a@9..10 a@11..12 a@13..14 . #error(program is too big)@0..0)@0..19")?;
|
||||
parse(parse_expr, "(a a a a a a a a a a)", "(a@1..2 a@3..4 a@5..6 a@7..8 a@9..10 a@11..12 a@13..14 . #error(program is too big)@0..0)@0..21")?;
|
||||
parse(parse_expr, "(a a a a a a a a a a a)", "(a@1..2 a@3..4 a@5..6 a@7..8 a@9..10 a@11..12 a@13..14 . #error(program is too big)@0..0)@0..23")?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn toplevel() -> Result<(), Box<dyn Error>> {
|
||||
parse(
|
||||
parse_toplevel,
|
||||
r#"
|
||||
(hello world)
|
||||
(abc)
|
||||
"#,
|
||||
"(hello@18..23 world@24..29)@17..30\n(abc@48..51)@47..52@0..65",
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
55
crates/haku/src/source.rs
Normal file
55
crates/haku/src/source.rs
Normal file
|
@ -0,0 +1,55 @@
|
|||
use core::{fmt, ops::Deref};
|
||||
|
||||
#[derive(Clone, Copy, PartialEq, Eq)]
|
||||
pub struct Span {
|
||||
pub start: u32,
|
||||
pub end: u32,
|
||||
}
|
||||
|
||||
impl Span {
|
||||
pub fn new(start: u32, end: u32) -> Self {
|
||||
Self { start, end }
|
||||
}
|
||||
|
||||
pub fn slice<'a>(&self, source: &'a SourceCode) -> &'a str {
|
||||
&source.code[self.start as usize..self.end as usize]
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Debug for Span {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
write!(f, "{}..{}", self.start, self.end)
|
||||
}
|
||||
}
|
||||
|
||||
/// Source code string with a verified size limit.
|
||||
/// An exact size limit is not enforced by this type - it only ensures the string isn't longer than
|
||||
/// intended, to not stall the parser for an unexpected amount of time.
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
#[repr(transparent)]
|
||||
pub struct SourceCode {
|
||||
code: str,
|
||||
}
|
||||
|
||||
impl SourceCode {
|
||||
pub fn limited_len(code: &str, max_len: u32) -> Option<&Self> {
|
||||
if code.len() <= max_len as usize {
|
||||
Some(Self::unlimited_len(code))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn unlimited_len(code: &str) -> &Self {
|
||||
// SAFETY: SourceCode is a transparent wrapper around str, so converting between them is safe.
|
||||
unsafe { core::mem::transmute(code) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for SourceCode {
|
||||
type Target = str;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.code
|
||||
}
|
||||
}
|
|
@ -16,10 +16,17 @@ pub type SystemFn = fn(&mut Vm, FnArgs) -> Result<Value, Exception>;
|
|||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct ChunkId(u32);
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum SystemFnArity {
|
||||
Unary,
|
||||
Binary,
|
||||
Nary,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct System {
|
||||
/// Resolves a system function name to an index into `fn`s.
|
||||
pub resolve_fn: fn(&str) -> Option<u8>,
|
||||
pub resolve_fn: fn(SystemFnArity, &str) -> Option<u8>,
|
||||
pub fns: [Option<SystemFn>; 256],
|
||||
pub chunks: Vec<Chunk>,
|
||||
}
|
||||
|
@ -30,7 +37,7 @@ pub struct SystemImage {
|
|||
}
|
||||
|
||||
macro_rules! def_fns {
|
||||
($($index:tt $name:tt => $fnref:expr),* $(,)?) => {
|
||||
($($index:tt $arity:tt $name:tt => $fnref:expr),* $(,)?) => {
|
||||
pub(crate) fn init_fns(system: &mut System) {
|
||||
$(
|
||||
debug_assert!(system.fns[$index].is_none());
|
||||
|
@ -38,9 +45,9 @@ macro_rules! def_fns {
|
|||
)*
|
||||
}
|
||||
|
||||
pub(crate) fn resolve(name: &str) -> Option<u8> {
|
||||
match name {
|
||||
$($name => Some($index),)*
|
||||
pub(crate) fn resolve(arity: SystemFnArity, name: &str) -> Option<u8> {
|
||||
match (arity, name){
|
||||
$((SystemFnArity::$arity, $name) => Some($index),)*
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
@ -106,43 +113,44 @@ pub mod fns {
|
|||
vm::{Exception, FnArgs, Vm},
|
||||
};
|
||||
|
||||
use super::System;
|
||||
use super::{System, SystemFnArity};
|
||||
|
||||
impl System {
|
||||
def_fns! {
|
||||
0x00 "+" => add,
|
||||
0x01 "-" => sub,
|
||||
0x02 "*" => mul,
|
||||
0x03 "/" => div,
|
||||
0x00 Binary "+" => add,
|
||||
0x01 Binary "-" => sub,
|
||||
0x02 Binary "*" => mul,
|
||||
0x03 Binary "/" => div,
|
||||
0x04 Unary "-" => neg,
|
||||
|
||||
0x40 "not" => not,
|
||||
0x41 "=" => eq,
|
||||
0x42 "<>" => neq,
|
||||
0x43 "<" => lt,
|
||||
0x44 "<=" => leq,
|
||||
0x45 ">" => gt,
|
||||
0x46 ">=" => geq,
|
||||
0x40 Unary "!" => not,
|
||||
0x41 Binary "==" => eq,
|
||||
0x42 Binary "!=" => neq,
|
||||
0x43 Binary "<" => lt,
|
||||
0x44 Binary "<=" => leq,
|
||||
0x45 Binary ">" => gt,
|
||||
0x46 Binary ">=" => geq,
|
||||
|
||||
0x80 "vec" => vec,
|
||||
0x81 ".x" => vec_x,
|
||||
0x82 ".y" => vec_y,
|
||||
0x83 ".z" => vec_z,
|
||||
0x84 ".w" => vec_w,
|
||||
0x80 Nary "vec" => vec,
|
||||
0x81 Nary "vecX" => vec_x,
|
||||
0x82 Nary "vecY" => vec_y,
|
||||
0x83 Nary "vecZ" => vec_z,
|
||||
0x84 Nary "vecW" => vec_w,
|
||||
|
||||
0x85 "rgba" => rgba,
|
||||
0x86 ".r" => rgba_r,
|
||||
0x87 ".g" => rgba_g,
|
||||
0x88 ".b" => rgba_b,
|
||||
0x89 ".a" => rgba_a,
|
||||
0x85 Nary "rgba" => rgba,
|
||||
0x86 Nary "rgbaR" => rgba_r,
|
||||
0x87 Nary "rgbaG" => rgba_g,
|
||||
0x88 Nary "rgbaB" => rgba_b,
|
||||
0x89 Nary "rgbaA" => rgba_a,
|
||||
|
||||
0x90 "list" => list,
|
||||
0x90 Nary "list" => list,
|
||||
|
||||
0xc0 "to-shape" => to_shape_f,
|
||||
0xc1 "line" => line,
|
||||
0xc2 "rect" => rect,
|
||||
0xc3 "circle" => circle,
|
||||
0xe0 "stroke" => stroke,
|
||||
0xe1 "fill" => fill,
|
||||
0xc0 Nary "toShape" => to_shape_f,
|
||||
0xc1 Nary "line" => line,
|
||||
0xc2 Nary "rect" => rect,
|
||||
0xc3 Nary "circle" => circle,
|
||||
0xe0 Nary "stroke" => stroke,
|
||||
0xe1 Nary "fill" => fill,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -196,6 +204,11 @@ pub mod fns {
|
|||
Ok(Value::Number(result))
|
||||
}
|
||||
|
||||
pub fn neg(vm: &mut Vm, args: FnArgs) -> Result<Value, Exception> {
|
||||
let x = args.get_number(vm, 0, "`-` can only work with numbers")?;
|
||||
Ok(Value::Number(-x))
|
||||
}
|
||||
|
||||
pub fn not(vm: &mut Vm, args: FnArgs) -> Result<Value, Exception> {
|
||||
if args.num() != 1 {
|
||||
return Err(vm.create_exception("(not) expects a single argument to negate"));
|
||||
|
|
143
crates/haku/src/token.rs
Normal file
143
crates/haku/src/token.rs
Normal file
|
@ -0,0 +1,143 @@
|
|||
use core::{error::Error, fmt::Display};
|
||||
|
||||
use alloc::vec::Vec;
|
||||
|
||||
use crate::source::Span;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum TokenKind {
|
||||
Eof,
|
||||
|
||||
Ident,
|
||||
Tag,
|
||||
Number,
|
||||
Color,
|
||||
|
||||
// Operators
|
||||
Plus,
|
||||
Minus,
|
||||
Star,
|
||||
Slash,
|
||||
EqualEqual,
|
||||
NotEqual,
|
||||
Less,
|
||||
LessEqual,
|
||||
Greater,
|
||||
GreaterEqual,
|
||||
Not,
|
||||
|
||||
// Punctuation
|
||||
Newline,
|
||||
LParen,
|
||||
RParen,
|
||||
LBrack,
|
||||
RBrack,
|
||||
Comma,
|
||||
Equal,
|
||||
Backslash,
|
||||
RArrow,
|
||||
|
||||
// Keywords
|
||||
Underscore,
|
||||
And,
|
||||
Or,
|
||||
If,
|
||||
Else,
|
||||
Let,
|
||||
|
||||
// NOTE: This must be kept last for TokenSet to work correctly.
|
||||
Error,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Lexis {
|
||||
pub kinds: Vec<TokenKind>,
|
||||
pub spans: Vec<Span>,
|
||||
}
|
||||
|
||||
impl Lexis {
|
||||
pub fn new(capacity: usize) -> Self {
|
||||
assert!(capacity < u32::MAX as usize);
|
||||
|
||||
Self {
|
||||
kinds: Vec::with_capacity(capacity),
|
||||
spans: Vec::with_capacity(capacity),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn len(&self) -> u32 {
|
||||
self.kinds.len() as u32
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.len() == 0
|
||||
}
|
||||
|
||||
pub fn push(&mut self, kind: TokenKind, span: Span) -> Result<(), TokenAllocError> {
|
||||
if self.kinds.len() >= self.kinds.capacity() {
|
||||
return Err(TokenAllocError);
|
||||
}
|
||||
|
||||
self.kinds.push(kind);
|
||||
self.spans.push(span);
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn kind(&self, position: u32) -> TokenKind {
|
||||
self.kinds[position as usize]
|
||||
}
|
||||
|
||||
pub fn span(&self, position: u32) -> Span {
|
||||
self.spans[position as usize]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct TokenAllocError;
|
||||
|
||||
impl Display for TokenAllocError {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
f.write_str("too many tokens")
|
||||
}
|
||||
}
|
||||
|
||||
impl Error for TokenAllocError {}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub struct TokenKindSet {
|
||||
bits: [u32; Self::WORDS],
|
||||
}
|
||||
|
||||
impl TokenKindSet {
|
||||
const WORDS: usize = ((TokenKind::Error as u32 + u32::BITS - 1) / (u32::BITS)) as usize;
|
||||
|
||||
const fn word(kind: TokenKind) -> usize {
|
||||
(kind as u32 / u32::BITS) as usize
|
||||
}
|
||||
|
||||
const fn bit(kind: TokenKind) -> u32 {
|
||||
1 << (kind as u32 % u32::BITS)
|
||||
}
|
||||
|
||||
pub const fn new(elems: &[TokenKind]) -> Self {
|
||||
let mut set = Self {
|
||||
bits: [0; Self::WORDS],
|
||||
};
|
||||
let mut i = 0;
|
||||
while i < elems.len() {
|
||||
set = set.include(elems[i]);
|
||||
i += 1;
|
||||
}
|
||||
set
|
||||
}
|
||||
|
||||
pub const fn include(mut self, kind: TokenKind) -> Self {
|
||||
self.bits[Self::word(kind)] |= Self::bit(kind);
|
||||
self
|
||||
}
|
||||
|
||||
pub fn contains(&self, kind: TokenKind) -> bool {
|
||||
self.bits[Self::word(kind)] & Self::bit(kind) != 0
|
||||
}
|
||||
}
|
|
@ -1,6 +1,6 @@
|
|||
use alloc::vec::Vec;
|
||||
|
||||
use crate::system::ChunkId;
|
||||
use crate::{compiler::ClosureSpec, system::ChunkId};
|
||||
|
||||
// TODO: Probably needs some pretty hardcore space optimization.
|
||||
// Maybe when we have static typing.
|
||||
|
@ -156,9 +156,25 @@ pub struct Closure {
|
|||
pub start: BytecodeLoc,
|
||||
pub name: FunctionName,
|
||||
pub param_count: u8,
|
||||
pub local_count: u8,
|
||||
pub captures: Vec<Value>,
|
||||
}
|
||||
|
||||
impl Closure {
|
||||
pub fn chunk(chunk_id: ChunkId, spec: ClosureSpec) -> Self {
|
||||
Self {
|
||||
start: BytecodeLoc {
|
||||
chunk_id,
|
||||
offset: 0,
|
||||
},
|
||||
name: FunctionName::Anonymous,
|
||||
param_count: 0,
|
||||
local_count: spec.local_count,
|
||||
captures: Vec::new(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct List {
|
||||
pub elements: Vec<Value>,
|
||||
|
|
|
@ -123,8 +123,9 @@ impl Vm {
|
|||
|
||||
fn push(&mut self, value: Value) -> Result<(), Exception> {
|
||||
if self.stack.len() >= self.stack.capacity() {
|
||||
// TODO: can this error message be made clearer?
|
||||
return Err(self.create_exception("too many local variables"));
|
||||
return Err(self.create_exception(
|
||||
"too many temporary values (local variables and expression operands)",
|
||||
));
|
||||
}
|
||||
self.stack.push(value);
|
||||
Ok(())
|
||||
|
@ -136,6 +137,14 @@ impl Vm {
|
|||
})
|
||||
}
|
||||
|
||||
fn get_mut(&mut self, index: usize) -> Result<&mut Value, Exception> {
|
||||
if self.stack.get(index).is_some() {
|
||||
Ok(&mut self.stack[index])
|
||||
} else {
|
||||
Err(self.create_exception("corrupted bytecode (set local variable out of bounds)"))
|
||||
}
|
||||
}
|
||||
|
||||
fn pop(&mut self) -> Result<Value, Exception> {
|
||||
self.stack
|
||||
.pop()
|
||||
|
@ -168,6 +177,11 @@ impl Vm {
|
|||
let mut bottom = self.stack.len();
|
||||
let mut fuel = self.fuel;
|
||||
|
||||
let init_bottom = bottom;
|
||||
for _ in 0..closure.local_count {
|
||||
self.push(Value::Nil)?;
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
let closure = (); // Do not use `closure` after this! Use `get_ref` on `closure_id` instead.
|
||||
|
||||
|
@ -200,6 +214,12 @@ impl Vm {
|
|||
self.push(value)?;
|
||||
}
|
||||
|
||||
Opcode::SetLocal => {
|
||||
let index = chunk.read_u8(&mut pc)? as usize;
|
||||
let new_value = self.pop()?;
|
||||
*self.get_mut(index)? = new_value;
|
||||
}
|
||||
|
||||
Opcode::Capture => {
|
||||
let index = chunk.read_u8(&mut pc)? as usize;
|
||||
let closure = self.get_ref(closure_id).as_closure().unwrap();
|
||||
|
@ -226,26 +246,14 @@ impl Vm {
|
|||
}
|
||||
}
|
||||
|
||||
Opcode::DropLet => {
|
||||
let count = chunk.read_u8(&mut pc)? as usize;
|
||||
if count != 0 {
|
||||
let new_len = self.stack.len().checked_sub(count).ok_or_else(|| {
|
||||
self.create_exception(
|
||||
"corrupted bytecode (Drop tried to drop too many values off the stack)",
|
||||
)
|
||||
})?;
|
||||
let value = self.pop()?;
|
||||
self.stack.resize_with(new_len, || unreachable!());
|
||||
self.push(value)?;
|
||||
}
|
||||
}
|
||||
|
||||
Opcode::Function => {
|
||||
let param_count = chunk.read_u8(&mut pc)?;
|
||||
let then = chunk.read_u16(&mut pc)? as usize;
|
||||
let body = pc;
|
||||
pc = then;
|
||||
|
||||
let local_count = chunk.read_u8(&mut pc)?;
|
||||
|
||||
let capture_count = chunk.read_u8(&mut pc)? as usize;
|
||||
let mut captures = Vec::with_capacity(capture_count);
|
||||
for _ in 0..capture_count {
|
||||
|
@ -272,6 +280,7 @@ impl Vm {
|
|||
},
|
||||
name: FunctionName::Anonymous,
|
||||
param_count,
|
||||
local_count,
|
||||
captures,
|
||||
}))?;
|
||||
self.push(Value::Ref(id))?;
|
||||
|
@ -327,6 +336,11 @@ impl Vm {
|
|||
)
|
||||
})?;
|
||||
|
||||
// NOTE: Locals are only pushed _after_ we do any stack calculations.
|
||||
for _ in 0..closure.local_count {
|
||||
self.push(Value::Nil)?;
|
||||
}
|
||||
|
||||
self.push_call(frame)?;
|
||||
}
|
||||
|
||||
|
@ -381,10 +395,13 @@ impl Vm {
|
|||
}
|
||||
}
|
||||
|
||||
Ok(self
|
||||
let result = self
|
||||
.stack
|
||||
.pop()
|
||||
.expect("there should be a result at the top of the stack"))
|
||||
.expect("there should be a result at the top of the stack");
|
||||
self.stack.resize_with(init_bottom, || unreachable!());
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn store_context(&mut self, context: Context) {
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
use std::error::Error;
|
||||
|
||||
use haku::{
|
||||
ast::{dump::dump, Ast},
|
||||
bytecode::{Chunk, Defs},
|
||||
compiler::{compile_expr, Compiler, Source},
|
||||
sexp::{self, Ast, Parser, SourceCode},
|
||||
lexer::{lex, Lexer},
|
||||
parser::{self, Parser, ParserLimits},
|
||||
source::SourceCode,
|
||||
system::System,
|
||||
token::Lexis,
|
||||
value::{BytecodeLoc, Closure, FunctionName, Ref, RefId, Value},
|
||||
vm::{Vm, VmLimits},
|
||||
};
|
||||
|
@ -12,11 +16,16 @@ use haku::{
|
|||
fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
|
||||
let mut system = System::new(1);
|
||||
|
||||
let ast = Ast::new(1024);
|
||||
let code = SourceCode::unlimited_len(code);
|
||||
let mut parser = Parser::new(ast, code);
|
||||
let root = sexp::parse_toplevel(&mut parser);
|
||||
let ast = parser.ast;
|
||||
|
||||
let mut lexer = Lexer::new(Lexis::new(1024), code);
|
||||
lex(&mut lexer)?;
|
||||
|
||||
let mut ast = Ast::new(1024);
|
||||
let mut parser = Parser::new(&lexer.lexis, &ParserLimits { max_events: 1024 });
|
||||
parser::toplevel(&mut parser);
|
||||
let (root, mut parser_diagnostics) = parser.into_ast(&mut ast)?;
|
||||
println!("{}", dump(&ast, root, Some(code)));
|
||||
let src = Source {
|
||||
code,
|
||||
ast: &ast,
|
||||
|
@ -27,21 +36,29 @@ fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
|
|||
let mut chunk = Chunk::new(65536).unwrap();
|
||||
let mut compiler = Compiler::new(&mut defs, &mut chunk);
|
||||
compile_expr(&mut compiler, &src, root)?;
|
||||
let closure_spec = compiler.closure_spec();
|
||||
let defs = compiler.defs;
|
||||
|
||||
for diagnostic in &compiler.diagnostics {
|
||||
let mut diagnostics = lexer.diagnostics;
|
||||
diagnostics.append(&mut parser_diagnostics);
|
||||
diagnostics.append(&mut compiler.diagnostics);
|
||||
|
||||
for diagnostic in &diagnostics {
|
||||
println!(
|
||||
"{}..{}: {}",
|
||||
diagnostic.span.start, diagnostic.span.end, diagnostic.message
|
||||
"{}..{} {:?}: {}",
|
||||
diagnostic.span().start,
|
||||
diagnostic.span().end,
|
||||
diagnostic.span().slice(code),
|
||||
diagnostic.message()
|
||||
);
|
||||
}
|
||||
|
||||
if !compiler.diagnostics.is_empty() {
|
||||
panic!("compiler diagnostics were emitted")
|
||||
if !diagnostics.is_empty() {
|
||||
panic!("diagnostics were emitted")
|
||||
}
|
||||
|
||||
let limits = VmLimits {
|
||||
stack_capacity: 256,
|
||||
stack_capacity: 1024,
|
||||
call_stack_capacity: 256,
|
||||
ref_capacity: 256,
|
||||
fuel: 32768,
|
||||
|
@ -50,16 +67,9 @@ fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
|
|||
let mut vm = Vm::new(defs, &limits);
|
||||
let chunk_id = system.add_chunk(chunk)?;
|
||||
println!("bytecode: {:?}", system.chunk(chunk_id));
|
||||
println!("closure spec: {closure_spec:?}");
|
||||
|
||||
let closure = vm.create_ref(Ref::Closure(Closure {
|
||||
start: BytecodeLoc {
|
||||
chunk_id,
|
||||
offset: 0,
|
||||
},
|
||||
name: FunctionName::Anonymous,
|
||||
param_count: 0,
|
||||
captures: Vec::new(),
|
||||
}))?;
|
||||
let closure = vm.create_ref(Ref::Closure(Closure::chunk(chunk_id, closure_spec)))?;
|
||||
let result = vm.run(&system, closure)?;
|
||||
|
||||
println!("used fuel: {}", limits.fuel - vm.remaining_fuel());
|
||||
|
@ -87,49 +97,52 @@ fn literal_number() {
|
|||
|
||||
#[test]
|
||||
fn literal_bool() {
|
||||
assert_eq!(eval("false").unwrap(), Value::False);
|
||||
assert_eq!(eval("true").unwrap(), Value::True);
|
||||
assert_eq!(eval("False").unwrap(), Value::False);
|
||||
assert_eq!(eval("True").unwrap(), Value::True);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn function_nil() {
|
||||
assert_eq!(eval("(fn () ())").unwrap(), Value::Ref(RefId::from_u32(1)));
|
||||
assert_eq!(
|
||||
eval(r#" \_ -> () "#).unwrap(),
|
||||
Value::Ref(RefId::from_u32(1))
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn function_nil_call() {
|
||||
assert_eq!(eval("((fn () ()))").unwrap(), Value::Nil);
|
||||
assert_eq!(eval(r#"(\_ -> ()) ()"#).unwrap(), Value::Nil);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn function_arithmetic() {
|
||||
expect_number("((fn (x) (+ x 2)) 2)", 4.0, 0.0001);
|
||||
expect_number(r#"(\x -> x + 2) 2"#, 4.0, 0.0001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn function_let() {
|
||||
expect_number("((fn (add-two) (add-two 2)) (fn (x) (+ x 2)))", 4.0, 0.0001);
|
||||
expect_number(r#"(\addTwo -> addTwo 2) \x -> x + 2"#, 4.0, 0.0001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn function_closure() {
|
||||
expect_number("(((fn (x) (fn (y) (+ x y))) 2) 2)", 4.0, 0.0001);
|
||||
expect_number(r#"((\x -> \y -> x + y) 2) 2"#, 4.0, 0.0001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn if_literal() {
|
||||
expect_number("(if 1 1 2)", 1.0, 0.0001);
|
||||
expect_number("(if () 1 2)", 2.0, 0.0001);
|
||||
expect_number("(if false 1 2)", 2.0, 0.0001);
|
||||
expect_number("(if true 1 2)", 1.0, 0.0001);
|
||||
expect_number("if (1) 1 else 2", 1.0, 0.0001);
|
||||
expect_number("if (()) 1 else 2", 2.0, 0.0001);
|
||||
expect_number("if (False) 1 else 2", 2.0, 0.0001);
|
||||
expect_number("if (True) 1 else 2", 1.0, 0.0001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn def_simple() {
|
||||
let code = r#"
|
||||
(def x 1)
|
||||
(def y 2)
|
||||
(+ x y)
|
||||
x = 1
|
||||
y = 2
|
||||
x + y
|
||||
"#;
|
||||
expect_number(code, 3.0, 0.0001);
|
||||
}
|
||||
|
@ -137,13 +150,13 @@ fn def_simple() {
|
|||
#[test]
|
||||
fn def_fib_recursive() {
|
||||
let code = r#"
|
||||
(def fib
|
||||
(fn (n)
|
||||
(if (< n 2)
|
||||
fib = \n ->
|
||||
if (n < 2)
|
||||
n
|
||||
(+ (fib (- n 1)) (fib (- n 2))))))
|
||||
else
|
||||
fib (n - 1) + fib (n - 2)
|
||||
|
||||
(fib 10)
|
||||
fib 10
|
||||
"#;
|
||||
expect_number(code, 55.0, 0.0001);
|
||||
}
|
||||
|
@ -151,27 +164,30 @@ fn def_fib_recursive() {
|
|||
#[test]
|
||||
fn def_mutually_recursive() {
|
||||
let code = r#"
|
||||
(def f
|
||||
(fn (x)
|
||||
(if (< x 10)
|
||||
(g (+ x 1))
|
||||
x)))
|
||||
f = \x ->
|
||||
if (x < 10)
|
||||
g (x + 1)
|
||||
else
|
||||
x
|
||||
|
||||
(def g
|
||||
(fn (x)
|
||||
(if (< x 10)
|
||||
(f (* x 2))
|
||||
x)))
|
||||
g = \x ->
|
||||
if (x < 10)
|
||||
f (x * 2)
|
||||
else
|
||||
x
|
||||
|
||||
(f 0)
|
||||
f 0
|
||||
"#;
|
||||
expect_number(code, 14.0, 0.0001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn def_botsbuildbots() {
|
||||
let result = eval("(def botsbuildbots (fn () (botsbuildbots))) (botsbuildbots)");
|
||||
if let Err(error) = result {
|
||||
let code = r#"
|
||||
botsbuildbots = \_ -> botsbuildbots ()
|
||||
botsbuildbots ()
|
||||
"#;
|
||||
if let Err(error) = eval(code) {
|
||||
assert_eq!(
|
||||
error.to_string(),
|
||||
"Exception {\n message: \"too much recursion\",\n}"
|
||||
|
@ -184,8 +200,8 @@ fn def_botsbuildbots() {
|
|||
#[test]
|
||||
fn let_single() {
|
||||
let code = r#"
|
||||
(let ((x 1))
|
||||
(+ x 1))
|
||||
let x = 1
|
||||
x + 1
|
||||
"#;
|
||||
expect_number(code, 2.0, 0.0001);
|
||||
}
|
||||
|
@ -193,9 +209,9 @@ fn let_single() {
|
|||
#[test]
|
||||
fn let_many() {
|
||||
let code = r#"
|
||||
(let ((x 1)
|
||||
(y 2))
|
||||
(+ x y))
|
||||
let x = 1
|
||||
let y = 2
|
||||
x + y
|
||||
"#;
|
||||
expect_number(code, 3.0, 0.0001);
|
||||
}
|
||||
|
@ -203,9 +219,9 @@ fn let_many() {
|
|||
#[test]
|
||||
fn let_sequence() {
|
||||
let code = r#"
|
||||
(let ((x 1)
|
||||
(y (+ x 1)))
|
||||
(+ x y))
|
||||
let x = 1
|
||||
let y = x + 1
|
||||
x + y
|
||||
"#;
|
||||
expect_number(code, 3.0, 0.0001);
|
||||
}
|
||||
|
@ -213,59 +229,40 @@ fn let_sequence() {
|
|||
#[test]
|
||||
fn let_subexpr() {
|
||||
let code = r#"
|
||||
(+
|
||||
(let ((x 1)
|
||||
(y 2))
|
||||
(* x y)))
|
||||
(let x = 1
|
||||
let y = 2
|
||||
x * y) + 2
|
||||
"#;
|
||||
expect_number(code, 2.0, 0.0001);
|
||||
expect_number(code, 4.0, 0.0001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn let_empty() {
|
||||
fn let_subexpr_two() {
|
||||
let code = r#"
|
||||
(let () 1)
|
||||
"#;
|
||||
expect_number(code, 1.0, 0.0001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn let_subexpr_empty() {
|
||||
let code = r#"
|
||||
(+ (let () 1) (let () 1))
|
||||
"#;
|
||||
expect_number(code, 2.0, 0.0001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn let_subexpr_many() {
|
||||
let code = r#"
|
||||
(+
|
||||
(let ((x 1)
|
||||
(y 2))
|
||||
(* x y))
|
||||
(let () 1)
|
||||
(let ((x 1)) x))
|
||||
(let x = 1
|
||||
2) +
|
||||
(let x = 1
|
||||
x)
|
||||
"#;
|
||||
expect_number(code, 3.0, 0.0001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn system_arithmetic() {
|
||||
expect_number("(+ 1 2 3 4)", 10.0, 0.0001);
|
||||
expect_number("(+ (* 2 1) 1 (/ 6 2) (- 10 3))", 13.0, 0.0001);
|
||||
fn let_subexpr_many() {
|
||||
let code = r#"
|
||||
(let x = 1
|
||||
let y = 2
|
||||
x * y) +
|
||||
(let x = 1
|
||||
2) +
|
||||
(let x = 1
|
||||
x)
|
||||
"#;
|
||||
expect_number(code, 5.0, 0.0001);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn practical_fib_recursive() {
|
||||
let code = r#"
|
||||
((fn (fib)
|
||||
(fib fib 10))
|
||||
|
||||
(fn (fib n)
|
||||
(if (< n 2)
|
||||
n
|
||||
(+ (fib fib (- n 1)) (fib fib (- n 2))))))
|
||||
"#;
|
||||
expect_number(code, 55.0, 0.0001);
|
||||
fn system_arithmetic() {
|
||||
expect_number("1 + 2 + 3 + 4", 10.0, 0.0001);
|
||||
expect_number("(2 * 1) + 1 + (6 / 2) + (10 - 3)", 13.0, 0.0001);
|
||||
}
|
||||
|
|
|
@ -5,11 +5,15 @@
|
|||
|
||||
use eyre::{bail, Context, OptionExt};
|
||||
use haku::{
|
||||
ast::Ast,
|
||||
bytecode::{Chunk, Defs, DefsImage},
|
||||
compiler::{Compiler, Source},
|
||||
lexer::{lex, Lexer},
|
||||
parser::{self, Parser, ParserLimits},
|
||||
render::{tiny_skia::Pixmap, Renderer, RendererLimits},
|
||||
sexp::{Ast, Parser, SourceCode},
|
||||
source::SourceCode,
|
||||
system::{ChunkId, System, SystemImage},
|
||||
token::Lexis,
|
||||
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
|
||||
vm::{Vm, VmImage, VmLimits},
|
||||
};
|
||||
|
@ -22,9 +26,11 @@ use crate::schema::Vec2;
|
|||
// because we do some dynamic typing magic over on the JavaScript side to automatically call all
|
||||
// the appropriate functions for setting these limits on the client side.
|
||||
pub struct Limits {
|
||||
pub max_source_code_len: usize,
|
||||
pub max_source_code_len: u32,
|
||||
pub max_chunks: usize,
|
||||
pub max_defs: usize,
|
||||
pub max_tokens: usize,
|
||||
pub max_parser_events: usize,
|
||||
pub ast_capacity: usize,
|
||||
pub chunk_capacity: usize,
|
||||
pub stack_capacity: usize,
|
||||
|
@ -88,12 +94,21 @@ impl Haku {
|
|||
pub fn set_brush(&mut self, code: &str) -> eyre::Result<()> {
|
||||
self.reset();
|
||||
|
||||
let ast = Ast::new(self.limits.ast_capacity);
|
||||
let code = SourceCode::limited_len(code, self.limits.max_source_code_len)
|
||||
.ok_or_eyre("source code is too long")?;
|
||||
let mut parser = Parser::new(ast, code);
|
||||
let root = haku::sexp::parse_toplevel(&mut parser);
|
||||
let ast = parser.ast;
|
||||
|
||||
let mut lexer = Lexer::new(Lexis::new(self.limits.max_tokens), code);
|
||||
lex(&mut lexer)?;
|
||||
|
||||
let mut parser = Parser::new(
|
||||
&lexer.lexis,
|
||||
&ParserLimits {
|
||||
max_events: self.limits.max_parser_events,
|
||||
},
|
||||
);
|
||||
parser::toplevel(&mut parser);
|
||||
let mut ast = Ast::new(self.limits.ast_capacity);
|
||||
let (root, parser_diagnostics) = parser.into_ast(&mut ast)?;
|
||||
|
||||
let src = Source {
|
||||
code,
|
||||
|
@ -107,7 +122,10 @@ impl Haku {
|
|||
haku::compiler::compile_expr(&mut compiler, &src, root)
|
||||
.context("failed to compile the chunk")?;
|
||||
|
||||
if !compiler.diagnostics.is_empty() {
|
||||
if !lexer.diagnostics.is_empty()
|
||||
|| !parser_diagnostics.is_empty()
|
||||
|| !compiler.diagnostics.is_empty()
|
||||
{
|
||||
bail!("diagnostics were emitted");
|
||||
}
|
||||
|
||||
|
|
|
@ -61,6 +61,12 @@ max_chunks = 2
|
|||
# Maximum amount of defs across all source code chunks.
|
||||
max_defs = 256
|
||||
|
||||
# Maximum amount of tokens a single chunk can have.
|
||||
max_tokens = 4096
|
||||
|
||||
# Maximum amount of events that the parser may emit in a single chunk.
|
||||
max_parser_events = 4096
|
||||
|
||||
# Maximum amount of AST nodes in a single parse.
|
||||
ast_capacity = 4096
|
||||
|
||||
|
|
Loading…
Reference in a new issue