add SourceCode wrapping str for enforcing source code length limits at parsing boundaries

I'm actually quite happy with this API design. a little zero-cost wrapper that makes you _think_ when you need to think.
This commit is contained in:
liquidex 2024-08-22 20:27:25 +02:00
parent 3913254215
commit 6c88a041ea
7 changed files with 66 additions and 16 deletions

View file

@ -6,7 +6,7 @@ use std::{error::Error, fmt::Display, io::BufRead};
use haku::{ use haku::{
bytecode::{Chunk, Defs}, bytecode::{Chunk, Defs},
compiler::{compile_expr, Compiler, Source}, compiler::{compile_expr, Compiler, Source},
sexp::{parse_toplevel, Ast, Parser}, sexp::{parse_toplevel, Ast, Parser, SourceCode},
system::System, system::System,
value::{BytecodeLoc, Closure, FunctionName, Ref, Value}, value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
vm::{Vm, VmLimits}, vm::{Vm, VmLimits},
@ -16,6 +16,7 @@ fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
let mut system = System::new(1); let mut system = System::new(1);
let ast = Ast::new(1024); let ast = Ast::new(1024);
let code = SourceCode::unlimited_len(code);
let mut parser = Parser::new(ast, code); let mut parser = Parser::new(ast, code);
let root = parse_toplevel(&mut parser); let root = parse_toplevel(&mut parser);
let ast = parser.ast; let ast = parser.ast;

View file

@ -12,7 +12,7 @@ use haku::{
tiny_skia::{Pixmap, PremultipliedColorU8}, tiny_skia::{Pixmap, PremultipliedColorU8},
Renderer, RendererLimits, Renderer, RendererLimits,
}, },
sexp::{parse_toplevel, Ast, Parser}, sexp::{parse_toplevel, Ast, Parser, SourceCode},
system::{ChunkId, System, SystemImage}, system::{ChunkId, System, SystemImage},
value::{BytecodeLoc, Closure, FunctionName, Ref, Value}, value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
vm::{Exception, Vm, VmImage, VmLimits}, vm::{Exception, Vm, VmImage, VmLimits},
@ -37,6 +37,7 @@ unsafe extern "C" fn haku_free(ptr: *mut u8, size: usize, align: usize) {
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
struct Limits { struct Limits {
max_source_code_len: usize,
max_chunks: usize, max_chunks: usize,
max_defs: usize, max_defs: usize,
ast_capacity: usize, ast_capacity: usize,
@ -53,6 +54,7 @@ struct Limits {
impl Default for Limits { impl Default for Limits {
fn default() -> Self { fn default() -> Self {
Self { Self {
max_source_code_len: 65536,
max_chunks: 2, max_chunks: 2,
max_defs: 256, max_defs: 256,
ast_capacity: 1024, ast_capacity: 1024,
@ -92,6 +94,7 @@ macro_rules! limit_setter {
}; };
} }
limit_setter!(max_source_code_len);
limit_setter!(max_chunks); limit_setter!(max_chunks);
limit_setter!(max_defs); limit_setter!(max_defs);
limit_setter!(ast_capacity); limit_setter!(ast_capacity);
@ -193,6 +196,7 @@ unsafe extern "C" fn haku_exception_message_len(instance: *const Instance) -> u3
#[repr(C)] #[repr(C)]
enum StatusCode { enum StatusCode {
Ok, Ok,
SourceCodeTooLong,
ChunkTooBig, ChunkTooBig,
DiagnosticsEmitted, DiagnosticsEmitted,
TooManyChunks, TooManyChunks,
@ -223,6 +227,7 @@ extern "C" fn haku_is_exception(code: StatusCode) -> bool {
extern "C" fn haku_status_string(code: StatusCode) -> *const i8 { extern "C" fn haku_status_string(code: StatusCode) -> *const i8 {
match code { match code {
StatusCode::Ok => c"ok", StatusCode::Ok => c"ok",
StatusCode::SourceCodeTooLong => c"source code is too long",
StatusCode::ChunkTooBig => c"compiled bytecode is too large", StatusCode::ChunkTooBig => c"compiled bytecode is too large",
StatusCode::DiagnosticsEmitted => c"diagnostics were emitted", StatusCode::DiagnosticsEmitted => c"diagnostics were emitted",
StatusCode::TooManyChunks => c"too many registered bytecode chunks", StatusCode::TooManyChunks => c"too many registered bytecode chunks",
@ -297,6 +302,10 @@ unsafe extern "C" fn haku_compile_brush(
let code = core::str::from_utf8(slice::from_raw_parts(code, code_len as usize)) let code = core::str::from_utf8(slice::from_raw_parts(code, code_len as usize))
.expect("invalid UTF-8"); .expect("invalid UTF-8");
let code = match SourceCode::limited_len(code, instance.limits.max_source_code_len) {
Some(code) => code,
None => return StatusCode::SourceCodeTooLong,
};
let ast = Ast::new(instance.limits.ast_capacity); let ast = Ast::new(instance.limits.ast_capacity);
let mut parser = Parser::new(ast, code); let mut parser = Parser::new(ast, code);

View file

@ -7,12 +7,12 @@ use alloc::vec::Vec;
use crate::{ use crate::{
bytecode::{Chunk, DefError, Defs, EmitError, Opcode, CAPTURE_CAPTURE, CAPTURE_LOCAL}, bytecode::{Chunk, DefError, Defs, EmitError, Opcode, CAPTURE_CAPTURE, CAPTURE_LOCAL},
sexp::{Ast, NodeId, NodeKind, Span}, sexp::{Ast, NodeId, NodeKind, SourceCode, Span},
system::System, system::System,
}; };
pub struct Source<'a> { pub struct Source<'a> {
pub code: &'a str, pub code: &'a SourceCode,
pub ast: &'a Ast, pub ast: &'a Ast,
pub system: &'a System, pub system: &'a System,
} }

View file

@ -1,4 +1,4 @@
use core::{cell::Cell, fmt}; use core::{cell::Cell, fmt, ops::Deref};
use alloc::vec::Vec; use alloc::vec::Vec;
@ -13,8 +13,40 @@ impl Span {
Self { start, end } Self { start, end }
} }
pub fn slice<'a>(&self, source: &'a str) -> &'a str { pub fn slice<'a>(&self, source: &'a SourceCode) -> &'a str {
&source[self.start..self.end] &source.code[self.start..self.end]
}
}
/// Source code string with a verified size limit.
/// An exact size limit is not enforced by this type - it only ensures the string isn't longer than
/// intended, to not stall the parser for an unexpected amount of time.
#[derive(Debug, PartialEq, Eq)]
#[repr(transparent)]
pub struct SourceCode {
code: str,
}
impl SourceCode {
pub fn limited_len(code: &str, max_len: usize) -> Option<&Self> {
if code.len() <= max_len {
Some(Self::unlimited_len(code))
} else {
None
}
}
pub fn unlimited_len(code: &str) -> &Self {
// SAFETY: SourceCode is a transparent wrapper around str, so converting between them is safe.
unsafe { core::mem::transmute(code) }
}
}
impl Deref for SourceCode {
type Target = str;
fn deref(&self) -> &Self::Target {
&self.code
} }
} }
@ -94,7 +126,7 @@ impl Ast {
pub fn write( pub fn write(
&self, &self,
source: &str, source: &SourceCode,
node_id: NodeId, node_id: NodeId,
w: &mut dyn fmt::Write, w: &mut dyn fmt::Write,
mode: AstWriteMode, mode: AstWriteMode,
@ -102,7 +134,7 @@ impl Ast {
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
fn write_list( fn write_list(
ast: &Ast, ast: &Ast,
source: &str, source: &SourceCode,
w: &mut dyn fmt::Write, w: &mut dyn fmt::Write,
mode: AstWriteMode, mode: AstWriteMode,
mut head: NodeId, mut head: NodeId,
@ -131,7 +163,7 @@ impl Ast {
// NOTE: Separated out to a separate function in case we ever want to introduce auto-indentation. // NOTE: Separated out to a separate function in case we ever want to introduce auto-indentation.
fn write_rec( fn write_rec(
ast: &Ast, ast: &Ast,
source: &str, source: &SourceCode,
w: &mut dyn fmt::Write, w: &mut dyn fmt::Write,
mode: AstWriteMode, mode: AstWriteMode,
node_id: NodeId, node_id: NodeId,
@ -177,7 +209,7 @@ pub struct NodeAllocError;
pub struct Parser<'a> { pub struct Parser<'a> {
pub ast: Ast, pub ast: Ast,
input: &'a str, input: &'a SourceCode,
position: usize, position: usize,
fuel: Cell<usize>, fuel: Cell<usize>,
alloc_error: NodeId, alloc_error: NodeId,
@ -186,7 +218,7 @@ pub struct Parser<'a> {
impl<'a> Parser<'a> { impl<'a> Parser<'a> {
const FUEL: usize = 256; const FUEL: usize = 256;
pub fn new(mut ast: Ast, input: &'a str) -> Self { pub fn new(mut ast: Ast, input: &'a SourceCode) -> Self {
let alloc_error = ast let alloc_error = ast
.alloc(Node { .alloc(Node {
span: Span::new(0, 0), span: Span::new(0, 0),
@ -412,12 +444,13 @@ mod tests {
expected: &str, expected: &str,
) -> Result<(), Box<dyn Error>> { ) -> Result<(), Box<dyn Error>> {
let ast = Ast::new(16); let ast = Ast::new(16);
let mut p = Parser::new(ast, source); let code = SourceCode::unlimited_len(source);
let mut p = Parser::new(ast, code);
let node = f(&mut p); let node = f(&mut p);
let ast = p.ast; let ast = p.ast;
let mut s = String::new(); let mut s = String::new();
ast.write(source, node, &mut s, AstWriteMode::Spans)?; ast.write(code, node, &mut s, AstWriteMode::Spans)?;
assert_eq!(s, expected); assert_eq!(s, expected);

View file

@ -3,7 +3,7 @@ use std::error::Error;
use haku::{ use haku::{
bytecode::{Chunk, Defs}, bytecode::{Chunk, Defs},
compiler::{compile_expr, Compiler, Source}, compiler::{compile_expr, Compiler, Source},
sexp::{self, Ast, Parser}, sexp::{self, Ast, Parser, SourceCode},
system::System, system::System,
value::{BytecodeLoc, Closure, FunctionName, Ref, RefId, Value}, value::{BytecodeLoc, Closure, FunctionName, Ref, RefId, Value},
vm::{Vm, VmLimits}, vm::{Vm, VmLimits},
@ -13,6 +13,7 @@ fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
let mut system = System::new(1); let mut system = System::new(1);
let ast = Ast::new(1024); let ast = Ast::new(1024);
let code = SourceCode::unlimited_len(code);
let mut parser = Parser::new(ast, code); let mut parser = Parser::new(ast, code);
let root = sexp::parse_toplevel(&mut parser); let root = sexp::parse_toplevel(&mut parser);
let ast = parser.ast; let ast = parser.ast;

View file

@ -8,7 +8,7 @@ use haku::{
bytecode::{Chunk, Defs, DefsImage}, bytecode::{Chunk, Defs, DefsImage},
compiler::{Compiler, Source}, compiler::{Compiler, Source},
render::{tiny_skia::Pixmap, Renderer, RendererLimits}, render::{tiny_skia::Pixmap, Renderer, RendererLimits},
sexp::{Ast, Parser}, sexp::{Ast, Parser, SourceCode},
system::{ChunkId, System, SystemImage}, system::{ChunkId, System, SystemImage},
value::{BytecodeLoc, Closure, FunctionName, Ref, Value}, value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
vm::{Vm, VmImage, VmLimits}, vm::{Vm, VmImage, VmLimits},
@ -22,6 +22,7 @@ use crate::schema::Vec2;
// because we do some dynamic typing magic over on the JavaScript side to automatically call all // because we do some dynamic typing magic over on the JavaScript side to automatically call all
// the appropriate functions for setting these limits on the client side. // the appropriate functions for setting these limits on the client side.
pub struct Limits { pub struct Limits {
pub max_source_code_len: usize,
pub max_chunks: usize, pub max_chunks: usize,
pub max_defs: usize, pub max_defs: usize,
pub ast_capacity: usize, pub ast_capacity: usize,
@ -88,6 +89,8 @@ impl Haku {
self.reset(); self.reset();
let ast = Ast::new(self.limits.ast_capacity); let ast = Ast::new(self.limits.ast_capacity);
let code = SourceCode::limited_len(code, self.limits.max_source_code_len)
.ok_or_eyre("source code is too long")?;
let mut parser = Parser::new(ast, code); let mut parser = Parser::new(ast, code);
let root = haku::sexp::parse_toplevel(&mut parser); let root = haku::sexp::parse_toplevel(&mut parser);
let ast = parser.ast; let ast = parser.ast;

View file

@ -33,6 +33,9 @@ interval_seconds = 10
# Technically clients may override these settings with some hackery, but then the server may not # Technically clients may override these settings with some hackery, but then the server may not
# register changes they make to the canvas. # register changes they make to the canvas.
# Maximum length of source code.
max_source_code_len = 65536
# Maximum amount of source code chunks. # Maximum amount of source code chunks.
# This should be at least 2, to allow for loading in a standard library chunk. # This should be at least 2, to allow for loading in a standard library chunk.
max_chunks = 2 max_chunks = 2