rewrite the parser to produce an AST
This commit is contained in:
parent
e69dcdc197
commit
0a185250da
13 changed files with 344 additions and 162 deletions
|
@ -5,3 +5,4 @@ edition = "2021"
|
|||
|
||||
[dependencies]
|
||||
thiserror = "1.0.47"
|
||||
log = { workspace = true }
|
||||
|
|
73
crates/treehouse-format/src/ast.rs
Normal file
73
crates/treehouse-format/src/ast.rs
Normal file
|
@ -0,0 +1,73 @@
|
|||
use std::ops::Range;
|
||||
|
||||
use crate::{
|
||||
pull::{BranchEvent, BranchKind, Parser},
|
||||
ParseError, ParseErrorKind,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Roots {
|
||||
pub branches: Vec<Branch>,
|
||||
}
|
||||
|
||||
impl Roots {
|
||||
pub fn parse(parser: &mut Parser) -> Result<Self, ParseError> {
|
||||
let mut branches = vec![];
|
||||
while let Some((branch, indent_level)) = Branch::parse_with_indent_level(parser)? {
|
||||
if indent_level != 0 {
|
||||
return Err(ParseErrorKind::RootIndentLevel.at(branch.kind_span));
|
||||
}
|
||||
branches.push(branch);
|
||||
}
|
||||
Ok(Self { branches })
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Branch {
|
||||
pub attributes: Range<usize>,
|
||||
pub kind: BranchKind,
|
||||
pub kind_span: Range<usize>,
|
||||
pub content: Range<usize>,
|
||||
pub children: Vec<Branch>,
|
||||
}
|
||||
|
||||
impl From<BranchEvent> for Branch {
|
||||
fn from(branch: BranchEvent) -> Self {
|
||||
Self {
|
||||
attributes: branch.attributes,
|
||||
kind: branch.kind,
|
||||
kind_span: branch.kind_span,
|
||||
content: branch.content,
|
||||
children: vec![],
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Branch {
|
||||
pub fn parse_with_indent_level(
|
||||
parser: &mut Parser,
|
||||
) -> Result<Option<(Self, usize)>, ParseError> {
|
||||
if let Some(branch_event) = parser.next_branch()? {
|
||||
let own_indent_level = branch_event.indent_level;
|
||||
let mut branch = Branch::from(branch_event);
|
||||
let children_indent_level = parser.peek_indent_level();
|
||||
if children_indent_level > own_indent_level {
|
||||
while parser.peek_indent_level() == children_indent_level {
|
||||
if let Some(child) = Branch::parse(parser)? {
|
||||
branch.children.push(child);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(Some((branch, own_indent_level)))
|
||||
} else {
|
||||
Ok(None)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn parse(parser: &mut Parser) -> Result<Option<Self>, ParseError> {
|
||||
Ok(Self::parse_with_indent_level(parser)?.map(|(branch, _)| branch))
|
||||
}
|
||||
}
|
|
@ -1,40 +1,15 @@
|
|||
use std::ops::Range;
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum BranchKind {
|
||||
/// Expanded by default.
|
||||
Expanded,
|
||||
/// Folded by default.
|
||||
Collapsed,
|
||||
}
|
||||
|
||||
impl BranchKind {
|
||||
pub fn char(&self) -> char {
|
||||
match self {
|
||||
BranchKind::Expanded => '-',
|
||||
BranchKind::Collapsed => '+',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Branch {
|
||||
pub indent_level: usize,
|
||||
pub config: Range<usize>,
|
||||
pub kind: BranchKind,
|
||||
pub content: Range<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Parser<'a> {
|
||||
pub input: &'a str,
|
||||
pub position: usize,
|
||||
}
|
||||
pub mod ast;
|
||||
pub mod pull;
|
||||
|
||||
#[derive(Debug, thiserror::Error, Clone, PartialEq, Eq)]
|
||||
pub enum ParseErrorKind {
|
||||
#[error("branch kind (`+` or `-`) expected")]
|
||||
BranchKindExpected,
|
||||
|
||||
#[error("root branches must not be indented")]
|
||||
RootIndentLevel,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
|
||||
|
@ -44,70 +19,8 @@ pub struct ParseError {
|
|||
pub range: Range<usize>,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
fn current(&self) -> Option<char> {
|
||||
self.input[self.position..].chars().next()
|
||||
}
|
||||
|
||||
fn advance(&mut self) {
|
||||
self.position += self.current().map(|c| c.len_utf8()).unwrap_or(0);
|
||||
}
|
||||
|
||||
fn eat_as_long_as(&mut self, c: char) -> usize {
|
||||
let mut count = 0;
|
||||
while self.current() == Some(c) {
|
||||
count += 1;
|
||||
self.advance();
|
||||
}
|
||||
count
|
||||
}
|
||||
|
||||
fn eat_until(&mut self, c: char) {
|
||||
while self.current() != Some(c) {
|
||||
self.advance();
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
|
||||
pub fn next_branch(&mut self) -> Result<Option<Branch>, ParseError> {
|
||||
if self.current().is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let indent_level = self.eat_as_long_as(' ');
|
||||
|
||||
// TODO: Configs
|
||||
let config_start = self.position;
|
||||
let config_end = self.position;
|
||||
|
||||
let branch_kind_start = self.position;
|
||||
let branch_kind = match self.current() {
|
||||
Some('-') => BranchKind::Expanded,
|
||||
Some('+') => BranchKind::Collapsed,
|
||||
_ => {
|
||||
return Err(ParseError {
|
||||
kind: ParseErrorKind::BranchKindExpected,
|
||||
range: branch_kind_start..branch_kind_start + 1,
|
||||
})
|
||||
}
|
||||
};
|
||||
self.advance();
|
||||
|
||||
let content_start = self.position;
|
||||
loop {
|
||||
self.eat_until('\n');
|
||||
if let Some('\n') | None = self.current() {
|
||||
self.advance();
|
||||
break;
|
||||
}
|
||||
}
|
||||
let content_end = self.position;
|
||||
|
||||
Ok(Some(Branch {
|
||||
indent_level,
|
||||
config: config_start..config_end,
|
||||
kind: branch_kind,
|
||||
content: content_start..content_end,
|
||||
}))
|
||||
impl ParseErrorKind {
|
||||
pub fn at(self, range: Range<usize>) -> ParseError {
|
||||
ParseError { kind: self, range }
|
||||
}
|
||||
}
|
||||
|
|
107
crates/treehouse-format/src/pull.rs
Normal file
107
crates/treehouse-format/src/pull.rs
Normal file
|
@ -0,0 +1,107 @@
|
|||
use std::ops::Range;
|
||||
|
||||
use crate::{ParseError, ParseErrorKind};
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
pub enum BranchKind {
|
||||
/// Expanded by default.
|
||||
Expanded,
|
||||
/// Folded by default.
|
||||
Collapsed,
|
||||
}
|
||||
|
||||
impl BranchKind {
|
||||
pub fn char(&self) -> char {
|
||||
match self {
|
||||
BranchKind::Expanded => '-',
|
||||
BranchKind::Collapsed => '+',
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct BranchEvent {
|
||||
pub indent_level: usize,
|
||||
pub attributes: Range<usize>,
|
||||
pub kind: BranchKind,
|
||||
pub kind_span: Range<usize>,
|
||||
pub content: Range<usize>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Parser<'a> {
|
||||
pub input: &'a str,
|
||||
pub position: usize,
|
||||
}
|
||||
|
||||
impl<'a> Parser<'a> {
|
||||
fn current(&self) -> Option<char> {
|
||||
self.input[self.position..].chars().next()
|
||||
}
|
||||
|
||||
fn advance(&mut self) {
|
||||
self.position += self.current().map(|c| c.len_utf8()).unwrap_or(0);
|
||||
}
|
||||
|
||||
fn eat_as_long_as(&mut self, c: char) -> usize {
|
||||
let mut count = 0;
|
||||
while self.current() == Some(c) {
|
||||
count += 1;
|
||||
self.advance();
|
||||
}
|
||||
count
|
||||
}
|
||||
|
||||
fn eat_until(&mut self, c: char) {
|
||||
while self.current() != Some(c) {
|
||||
self.advance();
|
||||
}
|
||||
self.advance();
|
||||
}
|
||||
|
||||
pub fn peek_indent_level(&mut self) -> usize {
|
||||
let position = self.position;
|
||||
let indent_level = self.eat_as_long_as(' ');
|
||||
self.position = position;
|
||||
indent_level
|
||||
}
|
||||
|
||||
pub fn next_branch(&mut self) -> Result<Option<BranchEvent>, ParseError> {
|
||||
if self.current().is_none() {
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let indent_level = self.eat_as_long_as(' ');
|
||||
|
||||
// TODO: Configs
|
||||
let config_start = self.position;
|
||||
let config_end = self.position;
|
||||
|
||||
let kind_start = self.position;
|
||||
let kind = match self.current() {
|
||||
Some('-') => BranchKind::Expanded,
|
||||
Some('+') => BranchKind::Collapsed,
|
||||
_ => return Err(ParseErrorKind::BranchKindExpected.at(kind_start..kind_start + 1)),
|
||||
};
|
||||
self.advance();
|
||||
let kind_end = self.position;
|
||||
|
||||
let content_start = self.position;
|
||||
loop {
|
||||
self.eat_until('\n');
|
||||
if let Some('\n') | None = self.current() {
|
||||
self.advance();
|
||||
break;
|
||||
}
|
||||
}
|
||||
let content_end = self.position;
|
||||
|
||||
Ok(Some(BranchEvent {
|
||||
indent_level,
|
||||
attributes: config_start..config_end,
|
||||
kind,
|
||||
kind_span: kind_start..kind_end,
|
||||
content: content_start..content_end,
|
||||
}))
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue