From 0a185250da5b6c82a7aaed2a5e9b9815c28d5a34 Mon Sep 17 00:00:00 2001 From: lqdev Date: Fri, 18 Aug 2023 13:25:20 +0200 Subject: [PATCH] rewrite the parser to produce an AST --- Cargo.lock | 58 +++++++++++ Cargo.toml | 3 + crates/treehouse-format/Cargo.toml | 1 + crates/treehouse-format/src/ast.rs | 73 +++++++++++++ crates/treehouse-format/src/lib.rs | 103 ++----------------- crates/treehouse-format/src/pull.rs | 107 ++++++++++++++++++++ crates/treehouse-incubator/Cargo.toml | 1 + crates/treehouse-incubator/src/main.rs | 99 +++++++++++++----- crates/treehouse-incubator/src/tree_html.rs | 30 ------ static/index.hbs | 13 --- static/main.css | 0 template/index.hbs | 14 +++ treehouse.toml | 4 +- 13 files changed, 344 insertions(+), 162 deletions(-) create mode 100644 crates/treehouse-format/src/ast.rs create mode 100644 crates/treehouse-format/src/pull.rs delete mode 100644 crates/treehouse-incubator/src/tree_html.rs delete mode 100644 static/index.hbs create mode 100644 static/main.css create mode 100644 template/index.hbs diff --git a/Cargo.lock b/Cargo.lock index 22f201a..5232b1c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -23,6 +23,16 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "codespan-reporting" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e" +dependencies = [ + "termcolor", + "unicode-width", +] + [[package]] name = "cpufeatures" version = "0.2.9" @@ -224,6 +234,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "termcolor" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6" +dependencies = [ + "winapi-util", +] + [[package]] name = "thiserror" version = "1.0.47" @@ -248,6 +267,7 @@ dependencies = [ name = "treehouse-format" version = "0.1.0" dependencies = [ + "log", "thiserror", ] @@ -255,6 +275,7 @@ dependencies = [ name = "treehouse-incubator" version = "0.1.0" dependencies = [ + "codespan-reporting", "handlebars", "pulldown-cmark", "thiserror", @@ -288,8 +309,45 @@ version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c" +[[package]] +name = "unicode-width" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b" + [[package]] name = "version_check" version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" +dependencies = [ + "winapi", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/Cargo.toml b/Cargo.toml index 06237c5..06b4dc1 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,4 +3,7 @@ members = ["crates/*"] resolver = "2" [workspace.dependencies] + +log = "0.4.20" + treehouse-format = { path = "crates/treehouse-format" } diff --git a/crates/treehouse-format/Cargo.toml b/crates/treehouse-format/Cargo.toml index 7db6b26..cd10f3f 100644 --- a/crates/treehouse-format/Cargo.toml +++ b/crates/treehouse-format/Cargo.toml @@ -5,3 +5,4 @@ edition = "2021" [dependencies] thiserror = "1.0.47" +log = { workspace = true } diff --git a/crates/treehouse-format/src/ast.rs b/crates/treehouse-format/src/ast.rs new file mode 100644 index 0000000..a380a25 --- /dev/null +++ b/crates/treehouse-format/src/ast.rs @@ -0,0 +1,73 @@ +use std::ops::Range; + +use crate::{ + pull::{BranchEvent, BranchKind, Parser}, + ParseError, ParseErrorKind, +}; + +#[derive(Debug, Clone)] +pub struct Roots { + pub branches: Vec, +} + +impl Roots { + pub fn parse(parser: &mut Parser) -> Result { + let mut branches = vec![]; + while let Some((branch, indent_level)) = Branch::parse_with_indent_level(parser)? { + if indent_level != 0 { + return Err(ParseErrorKind::RootIndentLevel.at(branch.kind_span)); + } + branches.push(branch); + } + Ok(Self { branches }) + } +} + +#[derive(Debug, Clone)] +pub struct Branch { + pub attributes: Range, + pub kind: BranchKind, + pub kind_span: Range, + pub content: Range, + pub children: Vec, +} + +impl From for Branch { + fn from(branch: BranchEvent) -> Self { + Self { + attributes: branch.attributes, + kind: branch.kind, + kind_span: branch.kind_span, + content: branch.content, + children: vec![], + } + } +} + +impl Branch { + pub fn parse_with_indent_level( + parser: &mut Parser, + ) -> Result, ParseError> { + if let Some(branch_event) = parser.next_branch()? { + let own_indent_level = branch_event.indent_level; + let mut branch = Branch::from(branch_event); + let children_indent_level = parser.peek_indent_level(); + if children_indent_level > own_indent_level { + while parser.peek_indent_level() == children_indent_level { + if let Some(child) = Branch::parse(parser)? { + branch.children.push(child); + } else { + break; + } + } + } + Ok(Some((branch, own_indent_level))) + } else { + Ok(None) + } + } + + pub fn parse(parser: &mut Parser) -> Result, ParseError> { + Ok(Self::parse_with_indent_level(parser)?.map(|(branch, _)| branch)) + } +} diff --git a/crates/treehouse-format/src/lib.rs b/crates/treehouse-format/src/lib.rs index 722d226..62dde31 100644 --- a/crates/treehouse-format/src/lib.rs +++ b/crates/treehouse-format/src/lib.rs @@ -1,40 +1,15 @@ use std::ops::Range; -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum BranchKind { - /// Expanded by default. - Expanded, - /// Folded by default. - Collapsed, -} - -impl BranchKind { - pub fn char(&self) -> char { - match self { - BranchKind::Expanded => '-', - BranchKind::Collapsed => '+', - } - } -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Branch { - pub indent_level: usize, - pub config: Range, - pub kind: BranchKind, - pub content: Range, -} - -#[derive(Debug, Clone, PartialEq, Eq)] -pub struct Parser<'a> { - pub input: &'a str, - pub position: usize, -} +pub mod ast; +pub mod pull; #[derive(Debug, thiserror::Error, Clone, PartialEq, Eq)] pub enum ParseErrorKind { #[error("branch kind (`+` or `-`) expected")] BranchKindExpected, + + #[error("root branches must not be indented")] + RootIndentLevel, } #[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)] @@ -44,70 +19,8 @@ pub struct ParseError { pub range: Range, } -impl<'a> Parser<'a> { - fn current(&self) -> Option { - self.input[self.position..].chars().next() - } - - fn advance(&mut self) { - self.position += self.current().map(|c| c.len_utf8()).unwrap_or(0); - } - - fn eat_as_long_as(&mut self, c: char) -> usize { - let mut count = 0; - while self.current() == Some(c) { - count += 1; - self.advance(); - } - count - } - - fn eat_until(&mut self, c: char) { - while self.current() != Some(c) { - self.advance(); - } - self.advance(); - } - - pub fn next_branch(&mut self) -> Result, ParseError> { - if self.current().is_none() { - return Ok(None); - } - - let indent_level = self.eat_as_long_as(' '); - - // TODO: Configs - let config_start = self.position; - let config_end = self.position; - - let branch_kind_start = self.position; - let branch_kind = match self.current() { - Some('-') => BranchKind::Expanded, - Some('+') => BranchKind::Collapsed, - _ => { - return Err(ParseError { - kind: ParseErrorKind::BranchKindExpected, - range: branch_kind_start..branch_kind_start + 1, - }) - } - }; - self.advance(); - - let content_start = self.position; - loop { - self.eat_until('\n'); - if let Some('\n') | None = self.current() { - self.advance(); - break; - } - } - let content_end = self.position; - - Ok(Some(Branch { - indent_level, - config: config_start..config_end, - kind: branch_kind, - content: content_start..content_end, - })) +impl ParseErrorKind { + pub fn at(self, range: Range) -> ParseError { + ParseError { kind: self, range } } } diff --git a/crates/treehouse-format/src/pull.rs b/crates/treehouse-format/src/pull.rs new file mode 100644 index 0000000..6400208 --- /dev/null +++ b/crates/treehouse-format/src/pull.rs @@ -0,0 +1,107 @@ +use std::ops::Range; + +use crate::{ParseError, ParseErrorKind}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum BranchKind { + /// Expanded by default. + Expanded, + /// Folded by default. + Collapsed, +} + +impl BranchKind { + pub fn char(&self) -> char { + match self { + BranchKind::Expanded => '-', + BranchKind::Collapsed => '+', + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct BranchEvent { + pub indent_level: usize, + pub attributes: Range, + pub kind: BranchKind, + pub kind_span: Range, + pub content: Range, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Parser<'a> { + pub input: &'a str, + pub position: usize, +} + +impl<'a> Parser<'a> { + fn current(&self) -> Option { + self.input[self.position..].chars().next() + } + + fn advance(&mut self) { + self.position += self.current().map(|c| c.len_utf8()).unwrap_or(0); + } + + fn eat_as_long_as(&mut self, c: char) -> usize { + let mut count = 0; + while self.current() == Some(c) { + count += 1; + self.advance(); + } + count + } + + fn eat_until(&mut self, c: char) { + while self.current() != Some(c) { + self.advance(); + } + self.advance(); + } + + pub fn peek_indent_level(&mut self) -> usize { + let position = self.position; + let indent_level = self.eat_as_long_as(' '); + self.position = position; + indent_level + } + + pub fn next_branch(&mut self) -> Result, ParseError> { + if self.current().is_none() { + return Ok(None); + } + + let indent_level = self.eat_as_long_as(' '); + + // TODO: Configs + let config_start = self.position; + let config_end = self.position; + + let kind_start = self.position; + let kind = match self.current() { + Some('-') => BranchKind::Expanded, + Some('+') => BranchKind::Collapsed, + _ => return Err(ParseErrorKind::BranchKindExpected.at(kind_start..kind_start + 1)), + }; + self.advance(); + let kind_end = self.position; + + let content_start = self.position; + loop { + self.eat_until('\n'); + if let Some('\n') | None = self.current() { + self.advance(); + break; + } + } + let content_end = self.position; + + Ok(Some(BranchEvent { + indent_level, + attributes: config_start..config_end, + kind, + kind_span: kind_start..kind_end, + content: content_start..content_end, + })) + } +} diff --git a/crates/treehouse-incubator/Cargo.toml b/crates/treehouse-incubator/Cargo.toml index d906aa5..d81a51f 100644 --- a/crates/treehouse-incubator/Cargo.toml +++ b/crates/treehouse-incubator/Cargo.toml @@ -4,6 +4,7 @@ version = "0.1.0" edition = "2021" [dependencies] +codespan-reporting = "0.11.1" handlebars = "4.3.7" pulldown-cmark = { version = "0.9.3", default-features = false } thiserror = "1.0.47" diff --git a/crates/treehouse-incubator/src/main.rs b/crates/treehouse-incubator/src/main.rs index 88d1c2b..a8a28b9 100644 --- a/crates/treehouse-incubator/src/main.rs +++ b/crates/treehouse-incubator/src/main.rs @@ -1,6 +1,12 @@ -use tree_html::HtmlGenerator; - -mod tree_html; +use codespan_reporting::{ + diagnostic::{Diagnostic, Label, LabelStyle, Severity}, + files::SimpleFile, + term::termcolor::{ColorChoice, StandardStream}, +}; +use treehouse_format::{ + ast::{Branch, Roots}, + pull::Parser, +}; #[derive(Debug, thiserror::Error)] enum Error { @@ -11,35 +17,82 @@ enum Error { Parse(#[from] treehouse_format::ParseError), } +fn print_branch(branch: &Branch, source: &str) { + fn inner(branch: &Branch, source: &str, indent_level: usize) { + for _ in 0..indent_level { + print!(" "); + } + println!( + "{} {:?}", + branch.kind.char(), + &source[branch.content.clone()] + ); + for child in &branch.children { + inner(child, source, indent_level + 1); + } + } + inner(branch, source, 0); +} + fn main() -> Result<(), Box> { let _ = std::fs::remove_dir_all("target/site"); std::fs::create_dir_all("target/site")?; let root_file = std::fs::read_to_string("content/tree/root.tree")?; - - let mut parser = treehouse_format::Parser { + let parse_result = Roots::parse(&mut Parser { input: &root_file, position: 0, - }; - let mut generator = HtmlGenerator::default(); - while let Some(branch) = parser.next_branch()? { - for _ in 0..branch.indent_level { - print!(" "); + }); + + match parse_result { + Ok(roots) => { + for root in &roots.branches { + print_branch(root, &root_file); + } + } + Err(error) => { + let writer = StandardStream::stderr(ColorChoice::Auto); + let config = codespan_reporting::term::Config::default(); + let files = SimpleFile::new("root.tree", &root_file); + let diagnostic = Diagnostic { + severity: Severity::Error, + code: None, + message: error.kind.to_string(), + labels: vec![Label { + style: LabelStyle::Primary, + file_id: (), + range: error.range, + message: String::new(), + }], + notes: vec![], + }; + codespan_reporting::term::emit(&mut writer.lock(), &config, &files, &diagnostic)?; } - println!( - "{} {:?}", - branch.kind.char(), - &root_file[branch.content.clone()] - ); - generator.add(&root_file, &branch); } - std::fs::write( - "target/site/index.html", - format!( - "{}", - generator.finish() - ), - )?; + + // let mut parser = treehouse_format::Parser { + // input: &root_file, + // position: 0, + // }; + // let mut generator = HtmlGenerator::default(); + // while let Some(branch) = parser.next_branch()? { + // for _ in 0..branch.indent_level { + // print!(" "); + // } + // println!( + // "{} {:?}", + // branch.kind.char(), + // &root_file[branch.content.clone()] + // ); + // generator.add(&root_file, &branch); + // } + // std::fs::write( + // "target/site/index.html", + // format!( + // "{}", + // generator.finish() + // ), + // )?; Ok(()) } diff --git a/crates/treehouse-incubator/src/tree_html.rs b/crates/treehouse-incubator/src/tree_html.rs deleted file mode 100644 index c692793..0000000 --- a/crates/treehouse-incubator/src/tree_html.rs +++ /dev/null @@ -1,30 +0,0 @@ -use treehouse_format::Branch; - -#[derive(Debug, Clone, Default)] -pub struct HtmlGenerator { - buffer: String, - indent_level_stack: Vec, -} - -impl HtmlGenerator { - pub fn add(&mut self, source: &str, branch: &Branch) { - if Some(&branch.indent_level) > self.indent_level_stack.last() { - self.indent_level_stack.push(branch.indent_level); - self.buffer.push_str("
    "); - } - while Some(&branch.indent_level) < self.indent_level_stack.last() { - self.indent_level_stack.pop(); - self.buffer.push_str("
"); - } - self.buffer.push_str("
  • "); - self.buffer.push_str(&source[branch.content.clone()]); - self.buffer.push_str("
  • "); - } - - pub fn finish(mut self) -> String { - for _ in self.indent_level_stack.drain(..) { - self.buffer.push_str(""); - } - self.buffer - } -} diff --git a/static/index.hbs b/static/index.hbs deleted file mode 100644 index 7af0dbf..0000000 --- a/static/index.hbs +++ /dev/null @@ -1,13 +0,0 @@ - - - - - - {{ config.treehouse.title }} - - - - {{ tree }} - - - diff --git a/static/main.css b/static/main.css new file mode 100644 index 0000000..e69de29 diff --git a/template/index.hbs b/template/index.hbs new file mode 100644 index 0000000..16366e0 --- /dev/null +++ b/template/index.hbs @@ -0,0 +1,14 @@ + + + + + + {{ config.user.title }} + + + + + {{{ tree }}} + + + diff --git a/treehouse.toml b/treehouse.toml index 453beb1..b316570 100644 --- a/treehouse.toml +++ b/treehouse.toml @@ -1,3 +1,5 @@ -[treehouse] +# User settings go here. These are (string, string) key-value pairs. +# They are available under `config.user`. +[user] title = "treehouse" author = "liquidex"