add SourceCode wrapping str for enforcing source code length limits at parsing boundaries
I'm actually quite happy with this API design. a little zero-cost wrapper that makes you _think_ when you need to think.
This commit is contained in:
parent
3913254215
commit
6c88a041ea
|
@ -6,7 +6,7 @@ use std::{error::Error, fmt::Display, io::BufRead};
|
||||||
use haku::{
|
use haku::{
|
||||||
bytecode::{Chunk, Defs},
|
bytecode::{Chunk, Defs},
|
||||||
compiler::{compile_expr, Compiler, Source},
|
compiler::{compile_expr, Compiler, Source},
|
||||||
sexp::{parse_toplevel, Ast, Parser},
|
sexp::{parse_toplevel, Ast, Parser, SourceCode},
|
||||||
system::System,
|
system::System,
|
||||||
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
|
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
|
||||||
vm::{Vm, VmLimits},
|
vm::{Vm, VmLimits},
|
||||||
|
@ -16,6 +16,7 @@ fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
|
||||||
let mut system = System::new(1);
|
let mut system = System::new(1);
|
||||||
|
|
||||||
let ast = Ast::new(1024);
|
let ast = Ast::new(1024);
|
||||||
|
let code = SourceCode::unlimited_len(code);
|
||||||
let mut parser = Parser::new(ast, code);
|
let mut parser = Parser::new(ast, code);
|
||||||
let root = parse_toplevel(&mut parser);
|
let root = parse_toplevel(&mut parser);
|
||||||
let ast = parser.ast;
|
let ast = parser.ast;
|
||||||
|
|
|
@ -12,7 +12,7 @@ use haku::{
|
||||||
tiny_skia::{Pixmap, PremultipliedColorU8},
|
tiny_skia::{Pixmap, PremultipliedColorU8},
|
||||||
Renderer, RendererLimits,
|
Renderer, RendererLimits,
|
||||||
},
|
},
|
||||||
sexp::{parse_toplevel, Ast, Parser},
|
sexp::{parse_toplevel, Ast, Parser, SourceCode},
|
||||||
system::{ChunkId, System, SystemImage},
|
system::{ChunkId, System, SystemImage},
|
||||||
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
|
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
|
||||||
vm::{Exception, Vm, VmImage, VmLimits},
|
vm::{Exception, Vm, VmImage, VmLimits},
|
||||||
|
@ -37,6 +37,7 @@ unsafe extern "C" fn haku_free(ptr: *mut u8, size: usize, align: usize) {
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy)]
|
#[derive(Debug, Clone, Copy)]
|
||||||
struct Limits {
|
struct Limits {
|
||||||
|
max_source_code_len: usize,
|
||||||
max_chunks: usize,
|
max_chunks: usize,
|
||||||
max_defs: usize,
|
max_defs: usize,
|
||||||
ast_capacity: usize,
|
ast_capacity: usize,
|
||||||
|
@ -53,6 +54,7 @@ struct Limits {
|
||||||
impl Default for Limits {
|
impl Default for Limits {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self {
|
Self {
|
||||||
|
max_source_code_len: 65536,
|
||||||
max_chunks: 2,
|
max_chunks: 2,
|
||||||
max_defs: 256,
|
max_defs: 256,
|
||||||
ast_capacity: 1024,
|
ast_capacity: 1024,
|
||||||
|
@ -92,6 +94,7 @@ macro_rules! limit_setter {
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
limit_setter!(max_source_code_len);
|
||||||
limit_setter!(max_chunks);
|
limit_setter!(max_chunks);
|
||||||
limit_setter!(max_defs);
|
limit_setter!(max_defs);
|
||||||
limit_setter!(ast_capacity);
|
limit_setter!(ast_capacity);
|
||||||
|
@ -193,6 +196,7 @@ unsafe extern "C" fn haku_exception_message_len(instance: *const Instance) -> u3
|
||||||
#[repr(C)]
|
#[repr(C)]
|
||||||
enum StatusCode {
|
enum StatusCode {
|
||||||
Ok,
|
Ok,
|
||||||
|
SourceCodeTooLong,
|
||||||
ChunkTooBig,
|
ChunkTooBig,
|
||||||
DiagnosticsEmitted,
|
DiagnosticsEmitted,
|
||||||
TooManyChunks,
|
TooManyChunks,
|
||||||
|
@ -223,6 +227,7 @@ extern "C" fn haku_is_exception(code: StatusCode) -> bool {
|
||||||
extern "C" fn haku_status_string(code: StatusCode) -> *const i8 {
|
extern "C" fn haku_status_string(code: StatusCode) -> *const i8 {
|
||||||
match code {
|
match code {
|
||||||
StatusCode::Ok => c"ok",
|
StatusCode::Ok => c"ok",
|
||||||
|
StatusCode::SourceCodeTooLong => c"source code is too long",
|
||||||
StatusCode::ChunkTooBig => c"compiled bytecode is too large",
|
StatusCode::ChunkTooBig => c"compiled bytecode is too large",
|
||||||
StatusCode::DiagnosticsEmitted => c"diagnostics were emitted",
|
StatusCode::DiagnosticsEmitted => c"diagnostics were emitted",
|
||||||
StatusCode::TooManyChunks => c"too many registered bytecode chunks",
|
StatusCode::TooManyChunks => c"too many registered bytecode chunks",
|
||||||
|
@ -297,6 +302,10 @@ unsafe extern "C" fn haku_compile_brush(
|
||||||
|
|
||||||
let code = core::str::from_utf8(slice::from_raw_parts(code, code_len as usize))
|
let code = core::str::from_utf8(slice::from_raw_parts(code, code_len as usize))
|
||||||
.expect("invalid UTF-8");
|
.expect("invalid UTF-8");
|
||||||
|
let code = match SourceCode::limited_len(code, instance.limits.max_source_code_len) {
|
||||||
|
Some(code) => code,
|
||||||
|
None => return StatusCode::SourceCodeTooLong,
|
||||||
|
};
|
||||||
|
|
||||||
let ast = Ast::new(instance.limits.ast_capacity);
|
let ast = Ast::new(instance.limits.ast_capacity);
|
||||||
let mut parser = Parser::new(ast, code);
|
let mut parser = Parser::new(ast, code);
|
||||||
|
|
|
@ -7,12 +7,12 @@ use alloc::vec::Vec;
|
||||||
|
|
||||||
use crate::{
|
use crate::{
|
||||||
bytecode::{Chunk, DefError, Defs, EmitError, Opcode, CAPTURE_CAPTURE, CAPTURE_LOCAL},
|
bytecode::{Chunk, DefError, Defs, EmitError, Opcode, CAPTURE_CAPTURE, CAPTURE_LOCAL},
|
||||||
sexp::{Ast, NodeId, NodeKind, Span},
|
sexp::{Ast, NodeId, NodeKind, SourceCode, Span},
|
||||||
system::System,
|
system::System,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub struct Source<'a> {
|
pub struct Source<'a> {
|
||||||
pub code: &'a str,
|
pub code: &'a SourceCode,
|
||||||
pub ast: &'a Ast,
|
pub ast: &'a Ast,
|
||||||
pub system: &'a System,
|
pub system: &'a System,
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
use core::{cell::Cell, fmt};
|
use core::{cell::Cell, fmt, ops::Deref};
|
||||||
|
|
||||||
use alloc::vec::Vec;
|
use alloc::vec::Vec;
|
||||||
|
|
||||||
|
@ -13,8 +13,40 @@ impl Span {
|
||||||
Self { start, end }
|
Self { start, end }
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn slice<'a>(&self, source: &'a str) -> &'a str {
|
pub fn slice<'a>(&self, source: &'a SourceCode) -> &'a str {
|
||||||
&source[self.start..self.end]
|
&source.code[self.start..self.end]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Source code string with a verified size limit.
|
||||||
|
/// An exact size limit is not enforced by this type - it only ensures the string isn't longer than
|
||||||
|
/// intended, to not stall the parser for an unexpected amount of time.
|
||||||
|
#[derive(Debug, PartialEq, Eq)]
|
||||||
|
#[repr(transparent)]
|
||||||
|
pub struct SourceCode {
|
||||||
|
code: str,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SourceCode {
|
||||||
|
pub fn limited_len(code: &str, max_len: usize) -> Option<&Self> {
|
||||||
|
if code.len() <= max_len {
|
||||||
|
Some(Self::unlimited_len(code))
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn unlimited_len(code: &str) -> &Self {
|
||||||
|
// SAFETY: SourceCode is a transparent wrapper around str, so converting between them is safe.
|
||||||
|
unsafe { core::mem::transmute(code) }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Deref for SourceCode {
|
||||||
|
type Target = str;
|
||||||
|
|
||||||
|
fn deref(&self) -> &Self::Target {
|
||||||
|
&self.code
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -94,7 +126,7 @@ impl Ast {
|
||||||
|
|
||||||
pub fn write(
|
pub fn write(
|
||||||
&self,
|
&self,
|
||||||
source: &str,
|
source: &SourceCode,
|
||||||
node_id: NodeId,
|
node_id: NodeId,
|
||||||
w: &mut dyn fmt::Write,
|
w: &mut dyn fmt::Write,
|
||||||
mode: AstWriteMode,
|
mode: AstWriteMode,
|
||||||
|
@ -102,7 +134,7 @@ impl Ast {
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn write_list(
|
fn write_list(
|
||||||
ast: &Ast,
|
ast: &Ast,
|
||||||
source: &str,
|
source: &SourceCode,
|
||||||
w: &mut dyn fmt::Write,
|
w: &mut dyn fmt::Write,
|
||||||
mode: AstWriteMode,
|
mode: AstWriteMode,
|
||||||
mut head: NodeId,
|
mut head: NodeId,
|
||||||
|
@ -131,7 +163,7 @@ impl Ast {
|
||||||
// NOTE: Separated out to a separate function in case we ever want to introduce auto-indentation.
|
// NOTE: Separated out to a separate function in case we ever want to introduce auto-indentation.
|
||||||
fn write_rec(
|
fn write_rec(
|
||||||
ast: &Ast,
|
ast: &Ast,
|
||||||
source: &str,
|
source: &SourceCode,
|
||||||
w: &mut dyn fmt::Write,
|
w: &mut dyn fmt::Write,
|
||||||
mode: AstWriteMode,
|
mode: AstWriteMode,
|
||||||
node_id: NodeId,
|
node_id: NodeId,
|
||||||
|
@ -177,7 +209,7 @@ pub struct NodeAllocError;
|
||||||
|
|
||||||
pub struct Parser<'a> {
|
pub struct Parser<'a> {
|
||||||
pub ast: Ast,
|
pub ast: Ast,
|
||||||
input: &'a str,
|
input: &'a SourceCode,
|
||||||
position: usize,
|
position: usize,
|
||||||
fuel: Cell<usize>,
|
fuel: Cell<usize>,
|
||||||
alloc_error: NodeId,
|
alloc_error: NodeId,
|
||||||
|
@ -186,7 +218,7 @@ pub struct Parser<'a> {
|
||||||
impl<'a> Parser<'a> {
|
impl<'a> Parser<'a> {
|
||||||
const FUEL: usize = 256;
|
const FUEL: usize = 256;
|
||||||
|
|
||||||
pub fn new(mut ast: Ast, input: &'a str) -> Self {
|
pub fn new(mut ast: Ast, input: &'a SourceCode) -> Self {
|
||||||
let alloc_error = ast
|
let alloc_error = ast
|
||||||
.alloc(Node {
|
.alloc(Node {
|
||||||
span: Span::new(0, 0),
|
span: Span::new(0, 0),
|
||||||
|
@ -412,12 +444,13 @@ mod tests {
|
||||||
expected: &str,
|
expected: &str,
|
||||||
) -> Result<(), Box<dyn Error>> {
|
) -> Result<(), Box<dyn Error>> {
|
||||||
let ast = Ast::new(16);
|
let ast = Ast::new(16);
|
||||||
let mut p = Parser::new(ast, source);
|
let code = SourceCode::unlimited_len(source);
|
||||||
|
let mut p = Parser::new(ast, code);
|
||||||
let node = f(&mut p);
|
let node = f(&mut p);
|
||||||
let ast = p.ast;
|
let ast = p.ast;
|
||||||
|
|
||||||
let mut s = String::new();
|
let mut s = String::new();
|
||||||
ast.write(source, node, &mut s, AstWriteMode::Spans)?;
|
ast.write(code, node, &mut s, AstWriteMode::Spans)?;
|
||||||
|
|
||||||
assert_eq!(s, expected);
|
assert_eq!(s, expected);
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ use std::error::Error;
|
||||||
use haku::{
|
use haku::{
|
||||||
bytecode::{Chunk, Defs},
|
bytecode::{Chunk, Defs},
|
||||||
compiler::{compile_expr, Compiler, Source},
|
compiler::{compile_expr, Compiler, Source},
|
||||||
sexp::{self, Ast, Parser},
|
sexp::{self, Ast, Parser, SourceCode},
|
||||||
system::System,
|
system::System,
|
||||||
value::{BytecodeLoc, Closure, FunctionName, Ref, RefId, Value},
|
value::{BytecodeLoc, Closure, FunctionName, Ref, RefId, Value},
|
||||||
vm::{Vm, VmLimits},
|
vm::{Vm, VmLimits},
|
||||||
|
@ -13,6 +13,7 @@ fn eval(code: &str) -> Result<Value, Box<dyn Error>> {
|
||||||
let mut system = System::new(1);
|
let mut system = System::new(1);
|
||||||
|
|
||||||
let ast = Ast::new(1024);
|
let ast = Ast::new(1024);
|
||||||
|
let code = SourceCode::unlimited_len(code);
|
||||||
let mut parser = Parser::new(ast, code);
|
let mut parser = Parser::new(ast, code);
|
||||||
let root = sexp::parse_toplevel(&mut parser);
|
let root = sexp::parse_toplevel(&mut parser);
|
||||||
let ast = parser.ast;
|
let ast = parser.ast;
|
||||||
|
|
|
@ -8,7 +8,7 @@ use haku::{
|
||||||
bytecode::{Chunk, Defs, DefsImage},
|
bytecode::{Chunk, Defs, DefsImage},
|
||||||
compiler::{Compiler, Source},
|
compiler::{Compiler, Source},
|
||||||
render::{tiny_skia::Pixmap, Renderer, RendererLimits},
|
render::{tiny_skia::Pixmap, Renderer, RendererLimits},
|
||||||
sexp::{Ast, Parser},
|
sexp::{Ast, Parser, SourceCode},
|
||||||
system::{ChunkId, System, SystemImage},
|
system::{ChunkId, System, SystemImage},
|
||||||
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
|
value::{BytecodeLoc, Closure, FunctionName, Ref, Value},
|
||||||
vm::{Vm, VmImage, VmLimits},
|
vm::{Vm, VmImage, VmLimits},
|
||||||
|
@ -22,6 +22,7 @@ use crate::schema::Vec2;
|
||||||
// because we do some dynamic typing magic over on the JavaScript side to automatically call all
|
// because we do some dynamic typing magic over on the JavaScript side to automatically call all
|
||||||
// the appropriate functions for setting these limits on the client side.
|
// the appropriate functions for setting these limits on the client side.
|
||||||
pub struct Limits {
|
pub struct Limits {
|
||||||
|
pub max_source_code_len: usize,
|
||||||
pub max_chunks: usize,
|
pub max_chunks: usize,
|
||||||
pub max_defs: usize,
|
pub max_defs: usize,
|
||||||
pub ast_capacity: usize,
|
pub ast_capacity: usize,
|
||||||
|
@ -88,6 +89,8 @@ impl Haku {
|
||||||
self.reset();
|
self.reset();
|
||||||
|
|
||||||
let ast = Ast::new(self.limits.ast_capacity);
|
let ast = Ast::new(self.limits.ast_capacity);
|
||||||
|
let code = SourceCode::limited_len(code, self.limits.max_source_code_len)
|
||||||
|
.ok_or_eyre("source code is too long")?;
|
||||||
let mut parser = Parser::new(ast, code);
|
let mut parser = Parser::new(ast, code);
|
||||||
let root = haku::sexp::parse_toplevel(&mut parser);
|
let root = haku::sexp::parse_toplevel(&mut parser);
|
||||||
let ast = parser.ast;
|
let ast = parser.ast;
|
||||||
|
|
|
@ -33,6 +33,9 @@ interval_seconds = 10
|
||||||
# Technically clients may override these settings with some hackery, but then the server may not
|
# Technically clients may override these settings with some hackery, but then the server may not
|
||||||
# register changes they make to the canvas.
|
# register changes they make to the canvas.
|
||||||
|
|
||||||
|
# Maximum length of source code.
|
||||||
|
max_source_code_len = 65536
|
||||||
|
|
||||||
# Maximum amount of source code chunks.
|
# Maximum amount of source code chunks.
|
||||||
# This should be at least 2, to allow for loading in a standard library chunk.
|
# This should be at least 2, to allow for loading in a standard library chunk.
|
||||||
max_chunks = 2
|
max_chunks = 2
|
||||||
|
|
Loading…
Reference in a new issue