initial commit

This commit is contained in:
liquidex 2024-08-10 23:10:03 +02:00
commit caec0b8ac9
27 changed files with 4786 additions and 0 deletions

15
.editorconfig Normal file
View file

@ -0,0 +1,15 @@
# EditorConfig is awesome: https://EditorConfig.org
# top-most EditorConfig file
root = true
[*]
indent_style = space
indent_size = 4
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
[*.js]
max_line_length = 100

1
.gitignore vendored Normal file
View file

@ -0,0 +1 @@
/target

1058
Cargo.lock generated Normal file

File diff suppressed because it is too large Load diff

15
Cargo.toml Normal file
View file

@ -0,0 +1,15 @@
[workspace]
resolver = "2"
members = ["crates/*"]
[workspace.dependencies]
haku.path = "crates/haku"
log = "0.4.22"
[profile.wasm-dev]
inherits = "dev"
panic = "abort"
[profile.wasm-release]
inherits = "release"
panic = "abort"

5
Justfile Normal file
View file

@ -0,0 +1,5 @@
serve wasm_profile="wasm-dev": (wasm wasm_profile)
cargo run -p canvane
wasm profile="wasm-dev":
cargo build -p haku-wasm --target wasm32-unknown-unknown --profile {{profile}}

15
crates/canvane/Cargo.toml Normal file
View file

@ -0,0 +1,15 @@
[package]
name = "canvane"
version = "0.1.0"
edition = "2021"
[dependencies]
axum = "0.7.5"
color-eyre = "0.6.3"
copy_dir = "0.1.3"
eyre = "0.6.12"
haku.workspace = true
tokio = { version = "1.39.2", features = ["full"] }
tower-http = { version = "0.5.2", features = ["fs"] }
tracing = "0.1.40"
tracing-subscriber = { version = "0.3.18", features = ["fmt"] }

View file

@ -0,0 +1,23 @@
use std::time::Duration;
use axum::{routing::get, Router};
use tokio::time::sleep;
pub fn router<S>() -> Router<S> {
Router::new()
.route("/stall", get(stall))
.route("/back-up", get(back_up))
.with_state(())
}
async fn stall() -> String {
loop {
// Sleep for a day, I guess. Just to uphold the connection forever without really using any
// significant resources.
sleep(Duration::from_secs(60 * 60 * 24)).await;
}
}
async fn back_up() -> String {
"".into()
}

View file

@ -0,0 +1,70 @@
use std::{
fs::{copy, create_dir_all, remove_dir_all},
path::Path,
};
use axum::Router;
use copy_dir::copy_dir;
use eyre::Context;
use tokio::net::TcpListener;
use tower_http::services::{ServeDir, ServeFile};
use tracing::{info, info_span};
use tracing_subscriber::fmt::format::FmtSpan;
#[cfg(debug_assertions)]
mod live_reload;
struct Paths<'a> {
target_dir: &'a Path,
}
fn build(paths: &Paths<'_>) -> eyre::Result<()> {
let _span = info_span!("build").entered();
_ = remove_dir_all(paths.target_dir);
create_dir_all(paths.target_dir).context("cannot create target directory")?;
copy_dir("static", paths.target_dir.join("static")).context("cannot copy static directory")?;
create_dir_all(paths.target_dir.join("static/wasm"))
.context("cannot create static/wasm directory")?;
copy(
"target/wasm32-unknown-unknown/wasm-dev/haku_wasm.wasm",
paths.target_dir.join("static/wasm/haku.wasm"),
)
.context("cannot copy haku.wasm file")?;
Ok(())
}
#[tokio::main]
async fn main() {
color_eyre::install().unwrap();
tracing_subscriber::fmt()
.with_span_events(FmtSpan::ACTIVE)
.init();
let paths = Paths {
target_dir: Path::new("target/site"),
};
match build(&paths) {
Ok(()) => (),
Err(error) => eprintln!("{error:?}"),
}
let app = Router::new()
.route_service(
"/",
ServeFile::new(paths.target_dir.join("static/index.html")),
)
.nest_service("/static", ServeDir::new(paths.target_dir.join("static")));
#[cfg(debug_assertions)]
let app = app.nest("/dev/live-reload", live_reload::router());
let listener = TcpListener::bind("0.0.0.0:8080")
.await
.expect("cannot bind to port");
info!("listening on port 8080");
axum::serve(listener, app).await.expect("cannot serve app");
}

View file

@ -0,0 +1,7 @@
[package]
name = "haku-cli"
version = "0.1.0"
edition = "2021"
[dependencies]
haku.workspace = true

View file

@ -0,0 +1,91 @@
// NOTE: This is a very bad CLI.
// Sorry!
use std::{error::Error, fmt::Display, io::BufRead};
use haku::{
bytecode::{Chunk, Defs},
compiler::{compile_expr, Compiler, Source},
sexp::{parse_toplevel, Ast, Parser},
system::System,
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
vm::{Vm, VmLimits},
};
fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
let mut system = System::new(1);
let ast = Ast::new(1024);
let mut parser = Parser::new(ast, code);
let root = parse_toplevel(&mut parser);
let ast = parser.ast;
let src = Source {
code,
ast: &ast,
system: &system,
};
let mut defs = Defs::new(256);
let mut chunk = Chunk::new(65536).unwrap();
let mut compiler = Compiler::new(&mut defs, &mut chunk);
compile_expr(&mut compiler, &src, root)?;
let diagnostics = compiler.diagnostics;
let defs = compiler.defs;
println!("{chunk:?}");
for diagnostic in &diagnostics {
eprintln!(
"{}..{}: {}",
diagnostic.span.start, diagnostic.span.end, diagnostic.message
);
}
if !diagnostics.is_empty() {
return Err(Box::new(DiagnosticsEmitted));
}
let mut vm = Vm::new(
defs,
&VmLimits {
stack_capacity: 256,
call_stack_capacity: 256,
ref_capacity: 256,
fuel: 32768,
},
);
let chunk_id = system.add_chunk(chunk)?;
let closure = vm.create_ref(Ref::Closure(Closure {
start: BytecodeLoc {
chunk_id,
offset: 0,
},
name: FunctionName::Anonymous,
param_count: 0,
captures: Vec::new(),
}))?;
Ok(vm.run(&system, closure)?)
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
struct DiagnosticsEmitted;
impl Display for DiagnosticsEmitted {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str("diagnostics were emitted")
}
}
impl Error for DiagnosticsEmitted {}
fn main() -> Result<(), Box<dyn Error>> {
let stdin = std::io::stdin();
for line in stdin.lock().lines() {
let line = line?;
match eval(&line) {
Ok(value) => println!("{value:?}"),
Err(error) => eprintln!("error: {error}"),
}
}
Ok(())
}

View file

@ -0,0 +1,14 @@
[package]
name = "haku-wasm"
version = "0.1.0"
edition = "2021"
[lib]
crate-type = ["cdylib"]
[dependencies]
arrayvec = { version = "0.7.4", default-features = false }
dlmalloc = { version = "0.2.6", features = ["global"] }
haku.workspace = true
log.workspace = true

349
crates/haku-wasm/src/lib.rs Normal file
View file

@ -0,0 +1,349 @@
#![no_std]
extern crate alloc;
use core::{alloc::Layout, ffi::CStr, slice, str};
use alloc::{boxed::Box, vec::Vec};
use haku::{
bytecode::{Chunk, Defs, DefsImage},
compiler::{compile_expr, CompileError, Compiler, Diagnostic, Source},
render::{Bitmap, Renderer, RendererLimits},
sexp::{self, parse_toplevel, Ast, Parser},
system::{ChunkId, System, SystemImage},
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
vm::{Exception, Vm, VmImage, VmLimits},
};
use log::info;
pub mod logging;
mod panicking;
#[global_allocator]
static ALLOCATOR: dlmalloc::GlobalDlmalloc = dlmalloc::GlobalDlmalloc;
#[no_mangle]
unsafe extern "C" fn haku_alloc(size: usize, align: usize) -> *mut u8 {
alloc::alloc::alloc(Layout::from_size_align(size, align).unwrap())
}
#[no_mangle]
unsafe extern "C" fn haku_free(ptr: *mut u8, size: usize, align: usize) {
alloc::alloc::dealloc(ptr, Layout::from_size_align(size, align).unwrap())
}
#[derive(Debug, Clone, Copy)]
struct Limits {
max_chunks: usize,
max_defs: usize,
ast_capacity: usize,
chunk_capacity: usize,
stack_capacity: usize,
call_stack_capacity: usize,
ref_capacity: usize,
fuel: usize,
bitmap_stack_capacity: usize,
transform_stack_capacity: usize,
}
impl Default for Limits {
fn default() -> Self {
Self {
max_chunks: 2,
max_defs: 256,
ast_capacity: 1024,
chunk_capacity: 65536,
stack_capacity: 1024,
call_stack_capacity: 256,
ref_capacity: 2048,
fuel: 65536,
bitmap_stack_capacity: 4,
transform_stack_capacity: 16,
}
}
}
#[derive(Debug, Clone)]
struct Instance {
limits: Limits,
system: System,
system_image: SystemImage,
defs: Defs,
defs_image: DefsImage,
vm: Vm,
vm_image: VmImage,
exception: Option<Exception>,
}
#[no_mangle]
unsafe extern "C" fn haku_instance_new() -> *mut Instance {
// TODO: This should be a parameter.
let limits = Limits::default();
let system = System::new(limits.max_chunks);
let defs = Defs::new(limits.max_defs);
let vm = Vm::new(
&defs,
&VmLimits {
stack_capacity: limits.stack_capacity,
call_stack_capacity: limits.call_stack_capacity,
ref_capacity: limits.ref_capacity,
fuel: limits.fuel,
},
);
let system_image = system.image();
let defs_image = defs.image();
let vm_image = vm.image();
let instance = Box::new(Instance {
limits,
system,
system_image,
defs,
defs_image,
vm,
vm_image,
exception: None,
});
Box::leak(instance)
}
#[no_mangle]
unsafe extern "C" fn haku_instance_destroy(instance: *mut Instance) {
drop(Box::from_raw(instance));
}
#[no_mangle]
unsafe extern "C" fn haku_has_exception(instance: *mut Instance) -> bool {
(*instance).exception.is_some()
}
#[no_mangle]
unsafe extern "C" fn haku_exception_message(instance: *const Instance) -> *const u8 {
(*instance).exception.as_ref().unwrap().message.as_ptr()
}
#[no_mangle]
unsafe extern "C" fn haku_exception_message_len(instance: *const Instance) -> u32 {
(*instance).exception.as_ref().unwrap().message.len() as u32
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
#[repr(C)]
enum StatusCode {
Ok,
ChunkTooBig,
DiagnosticsEmitted,
TooManyChunks,
OutOfRefSlots,
EvalException,
RenderException,
}
#[no_mangle]
extern "C" fn haku_is_ok(code: StatusCode) -> bool {
code == StatusCode::Ok
}
#[no_mangle]
extern "C" fn haku_status_string(code: StatusCode) -> *const i8 {
match code {
StatusCode::Ok => c"ok",
StatusCode::ChunkTooBig => c"compiled bytecode is too large",
StatusCode::DiagnosticsEmitted => c"diagnostics were emitted",
StatusCode::TooManyChunks => c"too many registered bytecode chunks",
StatusCode::OutOfRefSlots => c"out of ref slots (did you forget to restore the VM image?)",
StatusCode::EvalException => c"an exception occurred while evaluating your code",
StatusCode::RenderException => c"an exception occurred while rendering your brush",
}
.as_ptr()
}
#[derive(Debug, Default)]
enum BrushState {
#[default]
Default,
Ready(ChunkId),
}
#[derive(Debug, Default)]
struct Brush {
diagnostics: Vec<Diagnostic>,
state: BrushState,
}
#[no_mangle]
extern "C" fn haku_brush_new() -> *mut Brush {
Box::leak(Box::new(Brush::default()))
}
#[no_mangle]
unsafe extern "C" fn haku_brush_destroy(brush: *mut Brush) {
drop(Box::from_raw(brush))
}
#[no_mangle]
unsafe extern "C" fn haku_num_diagnostics(brush: *const Brush) -> u32 {
(*brush).diagnostics.len() as u32
}
#[no_mangle]
unsafe extern "C" fn haku_diagnostic_start(brush: *const Brush, index: u32) -> u32 {
(*brush).diagnostics[index as usize].span.start as u32
}
#[no_mangle]
unsafe extern "C" fn haku_diagnostic_end(brush: *const Brush, index: u32) -> u32 {
(*brush).diagnostics[index as usize].span.end as u32
}
#[no_mangle]
unsafe extern "C" fn haku_diagnostic_message(brush: *const Brush, index: u32) -> *const u8 {
(*brush).diagnostics[index as usize].message.as_ptr()
}
#[no_mangle]
unsafe extern "C" fn haku_diagnostic_message_len(brush: *const Brush, index: u32) -> u32 {
(*brush).diagnostics[index as usize].message.len() as u32
}
#[no_mangle]
unsafe extern "C" fn haku_compile_brush(
instance: *mut Instance,
out_brush: *mut Brush,
code_len: u32,
code: *const u8,
) -> StatusCode {
info!("compiling brush");
let instance = &mut *instance;
let brush = &mut *out_brush;
*brush = Brush::default();
let code = core::str::from_utf8(slice::from_raw_parts(code, code_len as usize))
.expect("invalid UTF-8");
let ast = Ast::new(instance.limits.ast_capacity);
let mut parser = Parser::new(ast, code);
let root = parse_toplevel(&mut parser);
let ast = parser.ast;
let src = Source {
code,
ast: &ast,
system: &instance.system,
};
let mut chunk = Chunk::new(instance.limits.chunk_capacity).unwrap();
let mut compiler = Compiler::new(&mut instance.defs, &mut chunk);
if let Err(error) = compile_expr(&mut compiler, &src, root) {
match error {
CompileError::Emit => return StatusCode::ChunkTooBig,
}
}
if !compiler.diagnostics.is_empty() {
brush.diagnostics = compiler.diagnostics;
return StatusCode::DiagnosticsEmitted;
}
let chunk_id = match instance.system.add_chunk(chunk) {
Ok(chunk_id) => chunk_id,
Err(_) => return StatusCode::TooManyChunks,
};
brush.state = BrushState::Ready(chunk_id);
info!("brush compiled into {chunk_id:?}");
StatusCode::Ok
}
struct BitmapLock {
bitmap: Option<Bitmap>,
}
#[no_mangle]
extern "C" fn haku_bitmap_new(width: u32, height: u32) -> *mut BitmapLock {
Box::leak(Box::new(BitmapLock {
bitmap: Some(Bitmap::new(width, height)),
}))
}
#[no_mangle]
unsafe extern "C" fn haku_bitmap_destroy(bitmap: *mut BitmapLock) {
drop(Box::from_raw(bitmap))
}
#[no_mangle]
unsafe extern "C" fn haku_bitmap_data(bitmap: *mut BitmapLock) -> *mut u8 {
let bitmap = (*bitmap)
.bitmap
.as_mut()
.expect("bitmap is already being rendered to");
bitmap.pixels[..].as_mut_ptr() as *mut u8
}
#[no_mangle]
unsafe extern "C" fn haku_render_brush(
instance: *mut Instance,
brush: *const Brush,
bitmap: *mut BitmapLock,
) -> StatusCode {
let instance = &mut *instance;
let brush = &*brush;
let BrushState::Ready(chunk_id) = brush.state else {
panic!("brush is not compiled and ready to be used");
};
let Ok(closure_id) = instance.vm.create_ref(Ref::Closure(Closure {
start: BytecodeLoc {
chunk_id,
offset: 0,
},
name: FunctionName::Anonymous,
param_count: 0,
captures: Vec::new(),
})) else {
return StatusCode::OutOfRefSlots;
};
let scribble = match instance.vm.run(&instance.system, closure_id) {
Ok(value) => value,
Err(exn) => {
instance.exception = Some(exn);
return StatusCode::EvalException;
}
};
let bitmap_locked = (*bitmap)
.bitmap
.take()
.expect("bitmap is already being rendered to");
let mut renderer = Renderer::new(
bitmap_locked,
&RendererLimits {
bitmap_stack_capacity: instance.limits.bitmap_stack_capacity,
transform_stack_capacity: instance.limits.transform_stack_capacity,
},
);
match renderer.render(&instance.vm, scribble) {
Ok(()) => (),
Err(exn) => {
instance.exception = Some(exn);
return StatusCode::RenderException;
}
}
let bitmap_locked = renderer.finish();
(*bitmap).bitmap = Some(bitmap_locked);
instance.vm.restore_image(&instance.vm_image);
StatusCode::Ok
}

View file

@ -0,0 +1,44 @@
use alloc::format;
use log::{info, Log};
extern "C" {
fn trace(message_len: u32, message: *const u8);
fn debug(message_len: u32, message: *const u8);
fn info(message_len: u32, message: *const u8);
fn warn(message_len: u32, message: *const u8);
fn error(message_len: u32, message: *const u8);
}
struct ConsoleLogger;
impl Log for ConsoleLogger {
fn enabled(&self, _: &log::Metadata) -> bool {
true
}
fn log(&self, record: &log::Record) {
let s = record
.module_path()
.map(|module_path| format!("{module_path}: {}", record.args()))
.unwrap_or_else(|| format!("{}", record.args()));
unsafe {
match record.level() {
log::Level::Error => error(s.len() as u32, s.as_ptr()),
log::Level::Warn => warn(s.len() as u32, s.as_ptr()),
log::Level::Info => info(s.len() as u32, s.as_ptr()),
log::Level::Debug => debug(s.len() as u32, s.as_ptr()),
log::Level::Trace => trace(s.len() as u32, s.as_ptr()),
}
}
}
fn flush(&self) {}
}
#[no_mangle]
extern "C" fn haku_init_logging() {
log::set_logger(&ConsoleLogger).unwrap();
log::set_max_level(log::LevelFilter::Trace);
info!("enabled logging");
}

View file

@ -0,0 +1,20 @@
use core::fmt::Write;
use alloc::string::String;
extern "C" {
fn panic(message_len: u32, message: *const u8) -> !;
}
fn panic_impl(info: &core::panic::PanicInfo) -> ! {
let mut message = String::new();
_ = write!(&mut message, "{info}");
unsafe { panic(message.len() as u32, message.as_ptr()) };
}
#[cfg(not(test))]
#[panic_handler]
fn panic_handler(info: &core::panic::PanicInfo) -> ! {
panic_impl(info)
}

6
crates/haku/Cargo.toml Normal file
View file

@ -0,0 +1,6 @@
[package]
name = "haku"
version = "0.1.0"
edition = "2021"
[dependencies]

266
crates/haku/src/bytecode.rs Normal file
View file

@ -0,0 +1,266 @@
use core::{
fmt::{self, Display},
mem::transmute,
};
use alloc::{borrow::ToOwned, string::String, vec::Vec};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[repr(u8)]
pub enum Opcode {
// Push literal values onto the stack.
Nil,
False,
True,
Number, // (float: f32)
// Duplicate existing values.
/// Push a value relative to the bottom of the current stack window.
Local, // (index: u8)
/// Push a captured value.
Capture, // (index: u8)
/// Get the value of a definition.
Def, // (index: u16)
/// Set the value of a definition.
SetDef, // (index: u16)
/// Drop `number` values from the stack.
/// <!-- OwO -->
DropLet, // (number: u8)
// Create literal functions.
Function, // (params: u8, then: u16), at `then`: (capture_count: u8, captures: [(source: u8, index: u8); capture_count])
// Control flow.
Jump, // (offset: u16)
JumpIfNot, // (offset: u16)
// Function calls.
Call, // (argc: u8)
/// This is a fast path for system calls, which are quite common (e.g. basic arithmetic.)
System, // (index: u8, argc: u8)
Return,
// NOTE: There must be no more opcodes after this.
// They will get treated as invalid.
}
// Constants used by the Function opcode to indicate capture sources.
pub const CAPTURE_LOCAL: u8 = 0;
pub const CAPTURE_CAPTURE: u8 = 1;
#[derive(Debug, Clone)]
pub struct Chunk {
pub bytecode: Vec<u8>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Offset(u16);
impl Chunk {
pub fn new(capacity: usize) -> Result<Chunk, ChunkSizeError> {
if capacity <= (1 << 16) {
Ok(Chunk {
bytecode: Vec::with_capacity(capacity),
})
} else {
Err(ChunkSizeError)
}
}
pub fn offset(&self) -> Offset {
Offset(self.bytecode.len() as u16)
}
pub fn emit_bytes(&mut self, bytes: &[u8]) -> Result<Offset, EmitError> {
if self.bytecode.len() + bytes.len() > self.bytecode.capacity() {
return Err(EmitError);
}
let offset = Offset(self.bytecode.len() as u16);
self.bytecode.extend_from_slice(bytes);
Ok(offset)
}
pub fn emit_opcode(&mut self, opcode: Opcode) -> Result<Offset, EmitError> {
self.emit_bytes(&[opcode as u8])
}
pub fn emit_u8(&mut self, x: u8) -> Result<Offset, EmitError> {
self.emit_bytes(&[x])
}
pub fn emit_u16(&mut self, x: u16) -> Result<Offset, EmitError> {
self.emit_bytes(&x.to_le_bytes())
}
pub fn emit_u32(&mut self, x: u32) -> Result<Offset, EmitError> {
self.emit_bytes(&x.to_le_bytes())
}
pub fn emit_f32(&mut self, x: f32) -> Result<Offset, EmitError> {
self.emit_bytes(&x.to_le_bytes())
}
pub fn patch_u8(&mut self, offset: Offset, x: u8) {
self.bytecode[offset.0 as usize] = x;
}
pub fn patch_u16(&mut self, offset: Offset, x: u16) {
let b = x.to_le_bytes();
let i = offset.0 as usize;
self.bytecode[i] = b[0];
self.bytecode[i + 1] = b[1];
}
pub fn patch_offset(&mut self, offset: Offset, x: Offset) {
self.patch_u16(offset, x.0);
}
// NOTE: I'm aware these aren't the fastest implementations since they validate quite a lot
// during runtime, but this is just an MVP. It doesn't have to be blazingly fast.
pub fn read_u8(&self, pc: &mut usize) -> Result<u8, ReadError> {
let x = self.bytecode.get(*pc).copied();
*pc += 1;
x.ok_or(ReadError)
}
pub fn read_u16(&self, pc: &mut usize) -> Result<u16, ReadError> {
let xs = &self.bytecode[*pc..*pc + 2];
*pc += 2;
Ok(u16::from_le_bytes(xs.try_into().map_err(|_| ReadError)?))
}
pub fn read_u32(&self, pc: &mut usize) -> Result<u32, ReadError> {
let xs = &self.bytecode[*pc..*pc + 4];
*pc += 4;
Ok(u32::from_le_bytes(xs.try_into().map_err(|_| ReadError)?))
}
pub fn read_f32(&self, pc: &mut usize) -> Result<f32, ReadError> {
let xs = &self.bytecode[*pc..*pc + 4];
*pc += 4;
Ok(f32::from_le_bytes(xs.try_into().map_err(|_| ReadError)?))
}
pub fn read_opcode(&self, pc: &mut usize) -> Result<Opcode, ReadError> {
let x = self.read_u8(pc)?;
if x <= Opcode::Return as u8 {
Ok(unsafe { transmute::<u8, Opcode>(x) })
} else {
Err(ReadError)
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ChunkSizeError;
impl Display for ChunkSizeError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "chunk size must be less than 64 KiB")
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct EmitError;
impl Display for EmitError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "out of space in chunk")
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct ReadError;
impl Display for ReadError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "invalid bytecode: out of bounds read or invalid opcode")
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
pub struct DefId(u16);
impl DefId {
pub fn to_u16(self) -> u16 {
self.0
}
}
#[derive(Debug, Clone)]
pub struct Defs {
defs: Vec<String>,
}
#[derive(Debug, Clone, Copy)]
pub struct DefsImage {
defs: usize,
}
impl Defs {
pub fn new(capacity: usize) -> Self {
assert!(capacity < u16::MAX as usize + 1);
Self {
defs: Vec::with_capacity(capacity),
}
}
pub fn len(&self) -> u16 {
self.defs.len() as u16
}
pub fn is_empty(&self) -> bool {
self.len() != 0
}
pub fn get(&mut self, name: &str) -> Option<DefId> {
self.defs
.iter()
.position(|n| *n == name)
.map(|index| DefId(index as u16))
}
pub fn add(&mut self, name: &str) -> Result<DefId, DefError> {
if self.defs.iter().any(|n| n == name) {
Err(DefError::Exists)
} else {
if self.defs.len() >= self.defs.capacity() {
return Err(DefError::OutOfSpace);
}
let id = DefId(self.defs.len() as u16);
self.defs.push(name.to_owned());
Ok(id)
}
}
pub fn image(&self) -> DefsImage {
DefsImage {
defs: self.defs.len(),
}
}
pub fn restore_image(&mut self, image: &DefsImage) {
self.defs.resize_with(image.defs, || {
panic!("image must be a subset of the current defs")
});
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum DefError {
Exists,
OutOfSpace,
}
impl Display for DefError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(match self {
DefError::Exists => "definition already exists",
DefError::OutOfSpace => "too many definitions",
})
}
}

625
crates/haku/src/compiler.rs Normal file
View file

@ -0,0 +1,625 @@
use core::{
error::Error,
fmt::{self, Display},
};
use alloc::vec::Vec;
use crate::{
bytecode::{Chunk, DefError, DefId, Defs, EmitError, Opcode, CAPTURE_CAPTURE, CAPTURE_LOCAL},
sexp::{Ast, NodeId, NodeKind, Span},
system::System,
};
pub struct Source<'a> {
pub code: &'a str,
pub ast: &'a Ast,
pub system: &'a System,
}
#[derive(Debug, Clone, Copy)]
pub struct Diagnostic {
pub span: Span,
pub message: &'static str,
}
#[derive(Debug, Clone, Copy)]
struct Local<'a> {
name: &'a str,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Variable {
Local(u8),
Captured(u8),
}
struct Scope<'a> {
locals: Vec<Local<'a>>,
captures: Vec<Variable>,
}
pub struct Compiler<'a, 'b> {
pub defs: &'a mut Defs,
pub chunk: &'b mut Chunk,
pub diagnostics: Vec<Diagnostic>,
scopes: Vec<Scope<'a>>,
}
impl<'a, 'b> Compiler<'a, 'b> {
pub fn new(defs: &'a mut Defs, chunk: &'b mut Chunk) -> Self {
Self {
defs,
chunk,
diagnostics: Vec::with_capacity(16),
scopes: Vec::from_iter([Scope {
locals: Vec::new(),
captures: Vec::new(),
}]),
}
}
pub fn diagnose(&mut self, diagnostic: Diagnostic) {
if self.diagnostics.len() >= self.diagnostics.capacity() {
return;
}
if self.diagnostics.len() == self.diagnostics.capacity() - 1 {
self.diagnostics.push(Diagnostic {
span: Span::new(0, 0),
message: "too many diagnostics emitted, stopping", // hello clangd!
})
} else {
self.diagnostics.push(diagnostic);
}
}
}
type CompileResult<T = ()> = Result<T, CompileError>;
pub fn compile_expr<'a>(
c: &mut Compiler<'a, '_>,
src: &Source<'a>,
node_id: NodeId,
) -> CompileResult {
let node = src.ast.get(node_id);
match node.kind {
NodeKind::Eof => unreachable!("eof node should never be emitted"),
NodeKind::Nil => compile_nil(c),
NodeKind::Ident => compile_ident(c, src, node_id),
NodeKind::Number => compile_number(c, src, node_id),
NodeKind::List(_, _) => compile_list(c, src, node_id),
NodeKind::Toplevel(_) => compile_toplevel(c, src, node_id),
NodeKind::Error(message) => {
c.diagnose(Diagnostic {
span: node.span,
message,
});
Ok(())
}
}
}
fn compile_nil(c: &mut Compiler<'_, '_>) -> CompileResult {
c.chunk.emit_opcode(Opcode::Nil)?;
Ok(())
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
struct CaptureError;
fn find_variable(
c: &mut Compiler<'_, '_>,
name: &str,
scope_index: usize,
) -> Result<Option<Variable>, CaptureError> {
let scope = &c.scopes[scope_index];
if let Some(index) = scope.locals.iter().rposition(|l| l.name == name) {
let index = u8::try_from(index).expect("a function must not declare more than 256 locals");
Ok(Some(Variable::Local(index)))
} else if scope_index > 0 {
// Search upper scope if not found.
if let Some(variable) = find_variable(c, name, scope_index - 1)? {
let scope = &mut c.scopes[scope_index];
let capture_index = scope
.captures
.iter()
.position(|c| c == &variable)
.unwrap_or_else(|| {
let new_index = scope.captures.len();
scope.captures.push(variable);
new_index
});
let capture_index = u8::try_from(capture_index).map_err(|_| CaptureError)?;
Ok(Some(Variable::Captured(capture_index)))
} else {
Ok(None)
}
} else {
Ok(None)
}
}
fn compile_ident<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
let ident = src.ast.get(node_id);
let name = ident.span.slice(src.code);
match name {
"false" => _ = c.chunk.emit_opcode(Opcode::False)?,
"true" => _ = c.chunk.emit_opcode(Opcode::True)?,
_ => match find_variable(c, name, c.scopes.len() - 1) {
Ok(Some(Variable::Local(index))) => {
c.chunk.emit_opcode(Opcode::Local)?;
c.chunk.emit_u8(index)?;
}
Ok(Some(Variable::Captured(index))) => {
c.chunk.emit_opcode(Opcode::Capture)?;
c.chunk.emit_u8(index)?;
}
Ok(None) => {
if let Some(def_id) = c.defs.get(name) {
c.chunk.emit_opcode(Opcode::Def)?;
c.chunk.emit_u16(def_id.to_u16())?;
} else {
c.diagnose(Diagnostic {
span: ident.span,
message: "undefined variable",
});
}
}
Err(CaptureError) => {
c.diagnose(Diagnostic {
span: ident.span,
message: "too many variables captured from outer functions in this scope",
});
}
},
}
Ok(())
}
fn compile_number(c: &mut Compiler<'_, '_>, src: &Source<'_>, node_id: NodeId) -> CompileResult {
let node = src.ast.get(node_id);
let literal = node.span.slice(src.code);
let float: f32 = literal
.parse()
.expect("the parser should've gotten us a string parsable by the stdlib");
c.chunk.emit_opcode(Opcode::Number)?;
c.chunk.emit_f32(float)?;
Ok(())
}
fn compile_list<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, node_id: NodeId) -> CompileResult {
let NodeKind::List(function_id, args) = src.ast.get(node_id).kind else {
unreachable!("compile_list expects a List");
};
let function = src.ast.get(function_id);
let name = function.span.slice(src.code);
if function.kind == NodeKind::Ident {
match name {
"fn" => return compile_fn(c, src, args),
"if" => return compile_if(c, src, args),
"let" => return compile_let(c, src, args),
_ => (),
};
}
let mut argument_count = 0;
let mut args = args;
while let NodeKind::List(head, tail) = src.ast.get(args).kind {
compile_expr(c, src, head)?;
argument_count += 1;
args = tail;
}
let argument_count = u8::try_from(argument_count).unwrap_or_else(|_| {
c.diagnose(Diagnostic {
span: src.ast.get(args).span,
message: "function call has too many arguments",
});
0
});
if let (NodeKind::Ident, Some(index)) = (function.kind, (src.system.resolve_fn)(name)) {
c.chunk.emit_opcode(Opcode::System)?;
c.chunk.emit_u8(index)?;
c.chunk.emit_u8(argument_count)?;
} else {
// This is a bit of an oddity: we only emit the function expression _after_ the arguments,
// but since the language is effectless this doesn't matter in practice.
// It makes for less code in the compiler and the VM.
compile_expr(c, src, function_id)?;
c.chunk.emit_opcode(Opcode::Call)?;
c.chunk.emit_u8(argument_count)?;
}
Ok(())
}
struct WalkList {
current: NodeId,
ok: bool,
}
impl WalkList {
fn new(start: NodeId) -> Self {
Self {
current: start,
ok: true,
}
}
fn expect_arg(
&mut self,
c: &mut Compiler<'_, '_>,
src: &Source<'_>,
message: &'static str,
) -> NodeId {
if !self.ok {
return NodeId::NIL;
}
if let NodeKind::List(expr, tail) = src.ast.get(self.current).kind {
self.current = tail;
expr
} else {
c.diagnose(Diagnostic {
span: src.ast.get(self.current).span,
message,
});
self.ok = false;
NodeId::NIL
}
}
fn expect_nil(&mut self, c: &mut Compiler<'_, '_>, src: &Source<'_>, message: &'static str) {
if src.ast.get(self.current).kind != NodeKind::Nil {
c.diagnose(Diagnostic {
span: src.ast.get(self.current).span,
message,
});
// NOTE: Don't set self.ok to false, since this is not a fatal error.
// The nodes returned previously are valid and therefore it's safe to operate on them.
// Just having extra arguments shouldn't inhibit emitting additional diagnostics in
// the expression.
}
}
}
fn compile_if<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> CompileResult {
let mut list = WalkList::new(args);
let condition = list.expect_arg(c, src, "missing `if` condition");
let if_true = list.expect_arg(c, src, "missing `if` true branch");
let if_false = list.expect_arg(c, src, "missing `if` false branch");
list.expect_nil(c, src, "extra arguments after `if` false branch");
if !list.ok {
return Ok(());
}
compile_expr(c, src, condition)?;
c.chunk.emit_opcode(Opcode::JumpIfNot)?;
let false_jump_offset_offset = c.chunk.emit_u16(0)?;
compile_expr(c, src, if_true)?;
c.chunk.emit_opcode(Opcode::Jump)?;
let true_jump_offset_offset = c.chunk.emit_u16(0)?;
let false_jump_offset = c.chunk.offset();
c.chunk
.patch_offset(false_jump_offset_offset, false_jump_offset);
compile_expr(c, src, if_false)?;
let true_jump_offset = c.chunk.offset();
c.chunk
.patch_offset(true_jump_offset_offset, true_jump_offset);
Ok(())
}
fn compile_let<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> CompileResult {
let mut list = WalkList::new(args);
let binding_list = list.expect_arg(c, src, "missing `let` binding list ((x 1) (y 2) ...)");
let expr = list.expect_arg(c, src, "missing expression to `let` names into");
list.expect_nil(c, src, "extra arguments after `let` expression");
if !list.ok {
return Ok(());
}
// NOTE: Our `let` behaves like `let*` from Lisps.
// This is because this is generally the more intuitive behaviour with how variable declarations
// work in traditional imperative languages.
// We do not offer an alternative to Lisp `let` to be as minimal as possible.
let mut current = binding_list;
let mut local_count: usize = 0;
while let NodeKind::List(head, tail) = src.ast.get(current).kind {
if !matches!(src.ast.get(head).kind, NodeKind::List(_, _)) {
c.diagnose(Diagnostic {
span: src.ast.get(head).span,
message: "`let` binding expected, like (x 1)",
});
current = tail;
continue;
}
let mut list = WalkList::new(head);
let ident = list.expect_arg(c, src, "binding name expected");
let value = list.expect_arg(c, src, "binding value expected");
list.expect_nil(c, src, "extra expressions after `let` binding value");
if src.ast.get(ident).kind != NodeKind::Ident {
c.diagnose(Diagnostic {
span: src.ast.get(ident).span,
message: "binding name must be an identifier",
});
}
// NOTE: Compile expression _before_ putting the value into scope.
// This is so that the variable cannot refer to itself, as it is yet to be declared.
compile_expr(c, src, value)?;
let name = src.ast.get(ident).span.slice(src.code);
let scope = c.scopes.last_mut().unwrap();
if scope.locals.len() >= u8::MAX as usize {
c.diagnose(Diagnostic {
span: src.ast.get(ident).span,
message: "too many names bound in this function at a single time",
});
} else {
scope.locals.push(Local { name });
}
local_count += 1;
current = tail;
}
compile_expr(c, src, expr)?;
let scope = c.scopes.last_mut().unwrap();
scope
.locals
.resize_with(scope.locals.len() - local_count, || unreachable!());
// NOTE: If we reach more than 255 locals declared in our `let`, we should've gotten
// a diagnostic emitted in the `while` loop beforehand.
let local_count = u8::try_from(local_count).unwrap_or(0);
c.chunk.emit_opcode(Opcode::DropLet)?;
c.chunk.emit_u8(local_count)?;
Ok(())
}
fn compile_fn<'a>(c: &mut Compiler<'a, '_>, src: &Source<'a>, args: NodeId) -> CompileResult {
let mut list = WalkList::new(args);
let param_list = list.expect_arg(c, src, "missing function parameters");
let body = list.expect_arg(c, src, "missing function body");
list.expect_nil(c, src, "extra arguments after function body");
if !list.ok {
return Ok(());
}