From 6c88a041ea9896d995ceaa2e345e922239f66705 Mon Sep 17 00:00:00 2001 From: liquidev Date: Thu, 22 Aug 2024 20:27:25 +0200 Subject: [PATCH] add SourceCode wrapping str for enforcing source code length limits at parsing boundaries I'm actually quite happy with this API design. a little zero-cost wrapper that makes you _think_ when you need to think. --- crates/haku-cli/src/main.rs | 3 +- crates/haku-wasm/src/lib.rs | 11 +++++++- crates/haku/src/compiler.rs | 4 +-- crates/haku/src/sexp.rs | 53 ++++++++++++++++++++++++++++------- crates/haku/tests/language.rs | 3 +- crates/rkgk/src/haku.rs | 5 +++- rkgk.toml | 3 ++ 7 files changed, 66 insertions(+), 16 deletions(-) diff --git a/crates/haku-cli/src/main.rs b/crates/haku-cli/src/main.rs index 02f2770..5c506c0 100644 --- a/crates/haku-cli/src/main.rs +++ b/crates/haku-cli/src/main.rs @@ -6,7 +6,7 @@ use std::{error::Error, fmt::Display, io::BufRead}; use haku::{ bytecode::{Chunk, Defs}, compiler::{compile_expr, Compiler, Source}, - sexp::{parse_toplevel, Ast, Parser}, + sexp::{parse_toplevel, Ast, Parser, SourceCode}, system::System, value::{BytecodeLoc, Closure, FunctionName, Ref, Value}, vm::{Vm, VmLimits}, @@ -16,6 +16,7 @@ fn eval(code: &str) -> Result> { let mut system = System::new(1); let ast = Ast::new(1024); + let code = SourceCode::unlimited_len(code); let mut parser = Parser::new(ast, code); let root = parse_toplevel(&mut parser); let ast = parser.ast; diff --git a/crates/haku-wasm/src/lib.rs b/crates/haku-wasm/src/lib.rs index 92dc725..b510c29 100644 --- a/crates/haku-wasm/src/lib.rs +++ b/crates/haku-wasm/src/lib.rs @@ -12,7 +12,7 @@ use haku::{ tiny_skia::{Pixmap, PremultipliedColorU8}, Renderer, RendererLimits, }, - sexp::{parse_toplevel, Ast, Parser}, + sexp::{parse_toplevel, Ast, Parser, SourceCode}, system::{ChunkId, System, SystemImage}, value::{BytecodeLoc, Closure, FunctionName, Ref, Value}, vm::{Exception, Vm, VmImage, VmLimits}, @@ -37,6 +37,7 @@ unsafe extern "C" fn haku_free(ptr: *mut u8, size: usize, align: usize) { #[derive(Debug, Clone, Copy)] struct Limits { + max_source_code_len: usize, max_chunks: usize, max_defs: usize, ast_capacity: usize, @@ -53,6 +54,7 @@ struct Limits { impl Default for Limits { fn default() -> Self { Self { + max_source_code_len: 65536, max_chunks: 2, max_defs: 256, ast_capacity: 1024, @@ -92,6 +94,7 @@ macro_rules! limit_setter { }; } +limit_setter!(max_source_code_len); limit_setter!(max_chunks); limit_setter!(max_defs); limit_setter!(ast_capacity); @@ -193,6 +196,7 @@ unsafe extern "C" fn haku_exception_message_len(instance: *const Instance) -> u3 #[repr(C)] enum StatusCode { Ok, + SourceCodeTooLong, ChunkTooBig, DiagnosticsEmitted, TooManyChunks, @@ -223,6 +227,7 @@ extern "C" fn haku_is_exception(code: StatusCode) -> bool { extern "C" fn haku_status_string(code: StatusCode) -> *const i8 { match code { StatusCode::Ok => c"ok", + StatusCode::SourceCodeTooLong => c"source code is too long", StatusCode::ChunkTooBig => c"compiled bytecode is too large", StatusCode::DiagnosticsEmitted => c"diagnostics were emitted", StatusCode::TooManyChunks => c"too many registered bytecode chunks", @@ -297,6 +302,10 @@ unsafe extern "C" fn haku_compile_brush( let code = core::str::from_utf8(slice::from_raw_parts(code, code_len as usize)) .expect("invalid UTF-8"); + let code = match SourceCode::limited_len(code, instance.limits.max_source_code_len) { + Some(code) => code, + None => return StatusCode::SourceCodeTooLong, + }; let ast = Ast::new(instance.limits.ast_capacity); let mut parser = Parser::new(ast, code); diff --git a/crates/haku/src/compiler.rs b/crates/haku/src/compiler.rs index 4a0e65c..a6966f3 100644 --- a/crates/haku/src/compiler.rs +++ b/crates/haku/src/compiler.rs @@ -7,12 +7,12 @@ use alloc::vec::Vec; use crate::{ bytecode::{Chunk, DefError, Defs, EmitError, Opcode, CAPTURE_CAPTURE, CAPTURE_LOCAL}, - sexp::{Ast, NodeId, NodeKind, Span}, + sexp::{Ast, NodeId, NodeKind, SourceCode, Span}, system::System, }; pub struct Source<'a> { - pub code: &'a str, + pub code: &'a SourceCode, pub ast: &'a Ast, pub system: &'a System, } diff --git a/crates/haku/src/sexp.rs b/crates/haku/src/sexp.rs index 96892b3..9a2b8b2 100644 --- a/crates/haku/src/sexp.rs +++ b/crates/haku/src/sexp.rs @@ -1,4 +1,4 @@ -use core::{cell::Cell, fmt}; +use core::{cell::Cell, fmt, ops::Deref}; use alloc::vec::Vec; @@ -13,8 +13,40 @@ impl Span { Self { start, end } } - pub fn slice<'a>(&self, source: &'a str) -> &'a str { - &source[self.start..self.end] + pub fn slice<'a>(&self, source: &'a SourceCode) -> &'a str { + &source.code[self.start..self.end] + } +} + +/// Source code string with a verified size limit. +/// An exact size limit is not enforced by this type - it only ensures the string isn't longer than +/// intended, to not stall the parser for an unexpected amount of time. +#[derive(Debug, PartialEq, Eq)] +#[repr(transparent)] +pub struct SourceCode { + code: str, +} + +impl SourceCode { + pub fn limited_len(code: &str, max_len: usize) -> Option<&Self> { + if code.len() <= max_len { + Some(Self::unlimited_len(code)) + } else { + None + } + } + + pub fn unlimited_len(code: &str) -> &Self { + // SAFETY: SourceCode is a transparent wrapper around str, so converting between them is safe. + unsafe { core::mem::transmute(code) } + } +} + +impl Deref for SourceCode { + type Target = str; + + fn deref(&self) -> &Self::Target { + &self.code } } @@ -94,7 +126,7 @@ impl Ast { pub fn write( &self, - source: &str, + source: &SourceCode, node_id: NodeId, w: &mut dyn fmt::Write, mode: AstWriteMode, @@ -102,7 +134,7 @@ impl Ast { #[allow(clippy::too_many_arguments)] fn write_list( ast: &Ast, - source: &str, + source: &SourceCode, w: &mut dyn fmt::Write, mode: AstWriteMode, mut head: NodeId, @@ -131,7 +163,7 @@ impl Ast { // NOTE: Separated out to a separate function in case we ever want to introduce auto-indentation. fn write_rec( ast: &Ast, - source: &str, + source: &SourceCode, w: &mut dyn fmt::Write, mode: AstWriteMode, node_id: NodeId, @@ -177,7 +209,7 @@ pub struct NodeAllocError; pub struct Parser<'a> { pub ast: Ast, - input: &'a str, + input: &'a SourceCode, position: usize, fuel: Cell, alloc_error: NodeId, @@ -186,7 +218,7 @@ pub struct Parser<'a> { impl<'a> Parser<'a> { const FUEL: usize = 256; - pub fn new(mut ast: Ast, input: &'a str) -> Self { + pub fn new(mut ast: Ast, input: &'a SourceCode) -> Self { let alloc_error = ast .alloc(Node { span: Span::new(0, 0), @@ -412,12 +444,13 @@ mod tests { expected: &str, ) -> Result<(), Box> { let ast = Ast::new(16); - let mut p = Parser::new(ast, source); + let code = SourceCode::unlimited_len(source); + let mut p = Parser::new(ast, code); let node = f(&mut p); let ast = p.ast; let mut s = String::new(); - ast.write(source, node, &mut s, AstWriteMode::Spans)?; + ast.write(code, node, &mut s, AstWriteMode::Spans)?; assert_eq!(s, expected); diff --git a/crates/haku/tests/language.rs b/crates/haku/tests/language.rs index 594d549..787c46f 100644 --- a/crates/haku/tests/language.rs +++ b/crates/haku/tests/language.rs @@ -3,7 +3,7 @@ use std::error::Error; use haku::{ bytecode::{Chunk, Defs}, compiler::{compile_expr, Compiler, Source}, - sexp::{self, Ast, Parser}, + sexp::{self, Ast, Parser, SourceCode}, system::System, value::{BytecodeLoc, Closure, FunctionName, Ref, RefId, Value}, vm::{Vm, VmLimits}, @@ -13,6 +13,7 @@ fn eval(code: &str) -> Result> { let mut system = System::new(1); let ast = Ast::new(1024); + let code = SourceCode::unlimited_len(code); let mut parser = Parser::new(ast, code); let root = sexp::parse_toplevel(&mut parser); let ast = parser.ast; diff --git a/crates/rkgk/src/haku.rs b/crates/rkgk/src/haku.rs index 6a5e4ae..6f4aa19 100644 --- a/crates/rkgk/src/haku.rs +++ b/crates/rkgk/src/haku.rs @@ -8,7 +8,7 @@ use haku::{ bytecode::{Chunk, Defs, DefsImage}, compiler::{Compiler, Source}, render::{tiny_skia::Pixmap, Renderer, RendererLimits}, - sexp::{Ast, Parser}, + sexp::{Ast, Parser, SourceCode}, system::{ChunkId, System, SystemImage}, value::{BytecodeLoc, Closure, FunctionName, Ref, Value}, vm::{Vm, VmImage, VmLimits}, @@ -22,6 +22,7 @@ use crate::schema::Vec2; // because we do some dynamic typing magic over on the JavaScript side to automatically call all // the appropriate functions for setting these limits on the client side. pub struct Limits { + pub max_source_code_len: usize, pub max_chunks: usize, pub max_defs: usize, pub ast_capacity: usize, @@ -88,6 +89,8 @@ impl Haku { self.reset(); let ast = Ast::new(self.limits.ast_capacity); + let code = SourceCode::limited_len(code, self.limits.max_source_code_len) + .ok_or_eyre("source code is too long")?; let mut parser = Parser::new(ast, code); let root = haku::sexp::parse_toplevel(&mut parser); let ast = parser.ast; diff --git a/rkgk.toml b/rkgk.toml index 572feb2..92c2eb1 100644 --- a/rkgk.toml +++ b/rkgk.toml @@ -33,6 +33,9 @@ interval_seconds = 10 # Technically clients may override these settings with some hackery, but then the server may not # register changes they make to the canvas. +# Maximum length of source code. +max_source_code_len = 65536 + # Maximum amount of source code chunks. # This should be at least 2, to allow for loading in a standard library chunk. max_chunks = 2