rewrite the parser to produce an AST

This commit is contained in:
liquidex 2023-08-18 13:25:20 +02:00
parent e69dcdc197
commit 0a185250da
13 changed files with 344 additions and 162 deletions

58
Cargo.lock generated
View file

@ -23,6 +23,16 @@ version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "codespan-reporting"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e"
dependencies = [
"termcolor",
"unicode-width",
]
[[package]]
name = "cpufeatures"
version = "0.2.9"
@ -224,6 +234,15 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "termcolor"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "be55cf8942feac5c765c2c993422806843c9a9a45d4d5c407ad6dd2ea95eb9b6"
dependencies = [
"winapi-util",
]
[[package]]
name = "thiserror"
version = "1.0.47"
@ -248,6 +267,7 @@ dependencies = [
name = "treehouse-format"
version = "0.1.0"
dependencies = [
"log",
"thiserror",
]
@ -255,6 +275,7 @@ dependencies = [
name = "treehouse-incubator"
version = "0.1.0"
dependencies = [
"codespan-reporting",
"handlebars",
"pulldown-cmark",
"thiserror",
@ -288,8 +309,45 @@ version = "1.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "301abaae475aa91687eb82514b328ab47a211a533026cb25fc3e519b86adfc3c"
[[package]]
name = "unicode-width"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0edd1e5b14653f783770bce4a4dabb4a5108a5370a5f5d8cfe8710c361f6c8b"
[[package]]
name = "version_check"
version = "0.9.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View file

@ -3,4 +3,7 @@ members = ["crates/*"]
resolver = "2"
[workspace.dependencies]
log = "0.4.20"
treehouse-format = { path = "crates/treehouse-format" }

View file

@ -5,3 +5,4 @@ edition = "2021"
[dependencies]
thiserror = "1.0.47"
log = { workspace = true }

View file

@ -0,0 +1,73 @@
use std::ops::Range;
use crate::{
pull::{BranchEvent, BranchKind, Parser},
ParseError, ParseErrorKind,
};
#[derive(Debug, Clone)]
pub struct Roots {
pub branches: Vec<Branch>,
}
impl Roots {
pub fn parse(parser: &mut Parser) -> Result<Self, ParseError> {
let mut branches = vec![];
while let Some((branch, indent_level)) = Branch::parse_with_indent_level(parser)? {
if indent_level != 0 {
return Err(ParseErrorKind::RootIndentLevel.at(branch.kind_span));
}
branches.push(branch);
}
Ok(Self { branches })
}
}
#[derive(Debug, Clone)]
pub struct Branch {
pub attributes: Range<usize>,
pub kind: BranchKind,
pub kind_span: Range<usize>,
pub content: Range<usize>,
pub children: Vec<Branch>,
}
impl From<BranchEvent> for Branch {
fn from(branch: BranchEvent) -> Self {
Self {
attributes: branch.attributes,
kind: branch.kind,
kind_span: branch.kind_span,
content: branch.content,
children: vec![],
}
}
}
impl Branch {
pub fn parse_with_indent_level(
parser: &mut Parser,
) -> Result<Option<(Self, usize)>, ParseError> {
if let Some(branch_event) = parser.next_branch()? {
let own_indent_level = branch_event.indent_level;
let mut branch = Branch::from(branch_event);
let children_indent_level = parser.peek_indent_level();
if children_indent_level > own_indent_level {
while parser.peek_indent_level() == children_indent_level {
if let Some(child) = Branch::parse(parser)? {
branch.children.push(child);
} else {
break;
}
}
}
Ok(Some((branch, own_indent_level)))
} else {
Ok(None)
}
}
pub fn parse(parser: &mut Parser) -> Result<Option<Self>, ParseError> {
Ok(Self::parse_with_indent_level(parser)?.map(|(branch, _)| branch))
}
}

View file

@ -1,40 +1,15 @@
use std::ops::Range;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BranchKind {
/// Expanded by default.
Expanded,
/// Folded by default.
Collapsed,
}
impl BranchKind {
pub fn char(&self) -> char {
match self {
BranchKind::Expanded => '-',
BranchKind::Collapsed => '+',
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Branch {
pub indent_level: usize,
pub config: Range<usize>,
pub kind: BranchKind,
pub content: Range<usize>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Parser<'a> {
pub input: &'a str,
pub position: usize,
}
pub mod ast;
pub mod pull;
#[derive(Debug, thiserror::Error, Clone, PartialEq, Eq)]
pub enum ParseErrorKind {
#[error("branch kind (`+` or `-`) expected")]
BranchKindExpected,
#[error("root branches must not be indented")]
RootIndentLevel,
}
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
@ -44,70 +19,8 @@ pub struct ParseError {
pub range: Range<usize>,
}
impl<'a> Parser<'a> {
fn current(&self) -> Option<char> {
self.input[self.position..].chars().next()
}
fn advance(&mut self) {
self.position += self.current().map(|c| c.len_utf8()).unwrap_or(0);
}
fn eat_as_long_as(&mut self, c: char) -> usize {
let mut count = 0;
while self.current() == Some(c) {
count += 1;
self.advance();
}
count
}
fn eat_until(&mut self, c: char) {
while self.current() != Some(c) {
self.advance();
}
self.advance();
}
pub fn next_branch(&mut self) -> Result<Option<Branch>, ParseError> {
if self.current().is_none() {
return Ok(None);
}
let indent_level = self.eat_as_long_as(' ');
// TODO: Configs
let config_start = self.position;
let config_end = self.position;
let branch_kind_start = self.position;
let branch_kind = match self.current() {
Some('-') => BranchKind::Expanded,
Some('+') => BranchKind::Collapsed,
_ => {
return Err(ParseError {
kind: ParseErrorKind::BranchKindExpected,
range: branch_kind_start..branch_kind_start + 1,
})
}
};
self.advance();
let content_start = self.position;
loop {
self.eat_until('\n');
if let Some('\n') | None = self.current() {
self.advance();
break;
}
}
let content_end = self.position;
Ok(Some(Branch {
indent_level,
config: config_start..config_end,
kind: branch_kind,
content: content_start..content_end,
}))
impl ParseErrorKind {
pub fn at(self, range: Range<usize>) -> ParseError {
ParseError { kind: self, range }
}
}

View file

@ -0,0 +1,107 @@
use std::ops::Range;
use crate::{ParseError, ParseErrorKind};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BranchKind {
/// Expanded by default.
Expanded,
/// Folded by default.
Collapsed,
}
impl BranchKind {
pub fn char(&self) -> char {
match self {
BranchKind::Expanded => '-',
BranchKind::Collapsed => '+',
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BranchEvent {
pub indent_level: usize,
pub attributes: Range<usize>,
pub kind: BranchKind,
pub kind_span: Range<usize>,
pub content: Range<usize>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Parser<'a> {
pub input: &'a str,
pub position: usize,
}
impl<'a> Parser<'a> {
fn current(&self) -> Option<char> {
self.input[self.position..].chars().next()
}
fn advance(&mut self) {
self.position += self.current().map(|c| c.len_utf8()).unwrap_or(0);
}
fn eat_as_long_as(&mut self, c: char) -> usize {
let mut count = 0;
while self.current() == Some(c) {
count += 1;
self.advance();
}
count
}
fn eat_until(&mut self, c: char) {
while self.current() != Some(c) {
self.advance();
}
self.advance();
}
pub fn peek_indent_level(&mut self) -> usize {
let position = self.position;
let indent_level = self.eat_as_long_as(' ');
self.position = position;
indent_level
}
pub fn next_branch(&mut self) -> Result<Option<BranchEvent>, ParseError> {
if self.current().is_none() {
return Ok(None);
}
let indent_level = self.eat_as_long_as(' ');
// TODO: Configs
let config_start = self.position;
let config_end = self.position;
let kind_start = self.position;
let kind = match self.current() {
Some('-') => BranchKind::Expanded,
Some('+') => BranchKind::Collapsed,
_ => return Err(ParseErrorKind::BranchKindExpected.at(kind_start..kind_start + 1)),
};
self.advance();
let kind_end = self.position;
let content_start = self.position;
loop {
self.eat_until('\n');
if let Some('\n') | None = self.current() {
self.advance();
break;
}
}
let content_end = self.position;
Ok(Some(BranchEvent {
indent_level,
attributes: config_start..config_end,
kind,
kind_span: kind_start..kind_end,
content: content_start..content_end,
}))
}
}

View file

@ -4,6 +4,7 @@ version = "0.1.0"
edition = "2021"
[dependencies]
codespan-reporting = "0.11.1"
handlebars = "4.3.7"
pulldown-cmark = { version = "0.9.3", default-features = false }
thiserror = "1.0.47"

View file

@ -1,6 +1,12 @@
use tree_html::HtmlGenerator;
mod tree_html;
use codespan_reporting::{
diagnostic::{Diagnostic, Label, LabelStyle, Severity},
files::SimpleFile,
term::termcolor::{ColorChoice, StandardStream},
};
use treehouse_format::{
ast::{Branch, Roots},
pull::Parser,
};
#[derive(Debug, thiserror::Error)]
enum Error {
@ -11,35 +17,82 @@ enum Error {
Parse(#[from] treehouse_format::ParseError),
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let _ = std::fs::remove_dir_all("target/site");
std::fs::create_dir_all("target/site")?;
let root_file = std::fs::read_to_string("content/tree/root.tree")?;
let mut parser = treehouse_format::Parser {
input: &root_file,
position: 0,
};
let mut generator = HtmlGenerator::default();
while let Some(branch) = parser.next_branch()? {
for _ in 0..branch.indent_level {
fn print_branch(branch: &Branch, source: &str) {
fn inner(branch: &Branch, source: &str, indent_level: usize) {
for _ in 0..indent_level {
print!(" ");
}
println!(
"{} {:?}",
branch.kind.char(),
&root_file[branch.content.clone()]
&source[branch.content.clone()]
);
generator.add(&root_file, &branch);
for child in &branch.children {
inner(child, source, indent_level + 1);
}
std::fs::write(
"target/site/index.html",
format!(
"<!DOCTYPE html><html><head></head><body>{}</body></html>",
generator.finish()
),
)?;
}
inner(branch, source, 0);
}
fn main() -> Result<(), Box<dyn std::error::Error>> {
let _ = std::fs::remove_dir_all("target/site");
std::fs::create_dir_all("target/site")?;
let root_file = std::fs::read_to_string("content/tree/root.tree")?;
let parse_result = Roots::parse(&mut Parser {
input: &root_file,
position: 0,
});
match parse_result {
Ok(roots) => {
for root in &roots.branches {
print_branch(root, &root_file);
}
}
Err(error) => {
let writer = StandardStream::stderr(ColorChoice::Auto);
let config = codespan_reporting::term::Config::default();
let files = SimpleFile::new("root.tree", &root_file);
let diagnostic = Diagnostic {
severity: Severity::Error,
code: None,
message: error.kind.to_string(),
labels: vec![Label {
style: LabelStyle::Primary,
file_id: (),
range: error.range,
message: String::new(),
}],
notes: vec![],
};
codespan_reporting::term::emit(&mut writer.lock(), &config, &files, &diagnostic)?;
}
}
// let mut parser = treehouse_format::Parser {
// input: &root_file,
// position: 0,
// };
// let mut generator = HtmlGenerator::default();
// while let Some(branch) = parser.next_branch()? {
// for _ in 0..branch.indent_level {
// print!(" ");
// }
// println!(
// "{} {:?}",
// branch.kind.char(),
// &root_file[branch.content.clone()]
// );
// generator.add(&root_file, &branch);
// }
// std::fs::write(
// "target/site/index.html",
// format!(
// "<!DOCTYPE html><html><head></head><body>{}</body></html>",
// generator.finish()
// ),
// )?;
Ok(())
}

View file

@ -1,30 +0,0 @@
use treehouse_format::Branch;
#[derive(Debug, Clone, Default)]
pub struct HtmlGenerator {
buffer: String,
indent_level_stack: Vec<usize>,
}
impl HtmlGenerator {
pub fn add(&mut self, source: &str, branch: &Branch) {
if Some(&branch.indent_level) > self.indent_level_stack.last() {
self.indent_level_stack.push(branch.indent_level);
self.buffer.push_str("<ul>");
}
while Some(&branch.indent_level) < self.indent_level_stack.last() {
self.indent_level_stack.pop();
self.buffer.push_str("</ul>");
}
self.buffer.push_str("<li>");
self.buffer.push_str(&source[branch.content.clone()]);
self.buffer.push_str("</li>");
}
pub fn finish(mut self) -> String {
for _ in self.indent_level_stack.drain(..) {
self.buffer.push_str("</ul>");
}
self.buffer
}
}

View file

@ -1,13 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<title>{{ config.treehouse.title }}</title>
</head>
<body>
{{ tree }}
</body>
</html>

0
static/main.css Normal file
View file

14
template/index.hbs Normal file
View file

@ -0,0 +1,14 @@
<!DOCTYPE html>
<html>
<head>
<title>{{ config.user.title }}</title>
<link rel="stylesheet" href="{{ local 'static/main.css' }}">
</head>
<body>
{{{ tree }}}
</body>
</html>

View file

@ -1,3 +1,5 @@
[treehouse]
# User settings go here. These are (string, string) key-value pairs.
# They are available under `config.user`.
[user]
title = "treehouse"
author = "liquidex"