rewrite the parser to produce an AST

This commit is contained in:
りき萌 2023-08-18 13:25:20 +02:00
parent e69dcdc197
commit 0a185250da
13 changed files with 344 additions and 162 deletions

View file

@ -5,3 +5,4 @@ edition = "2021"
[dependencies]
thiserror = "1.0.47"
log = { workspace = true }

View file

@ -0,0 +1,73 @@
use std::ops::Range;
use crate::{
pull::{BranchEvent, BranchKind, Parser},
ParseError, ParseErrorKind,
};
#[derive(Debug, Clone)]
pub struct Roots {
pub branches: Vec<Branch>,
}
impl Roots {
pub fn parse(parser: &mut Parser) -> Result<Self, ParseError> {
let mut branches = vec![];
while let Some((branch, indent_level)) = Branch::parse_with_indent_level(parser)? {
if indent_level != 0 {
return Err(ParseErrorKind::RootIndentLevel.at(branch.kind_span));
}
branches.push(branch);
}
Ok(Self { branches })
}
}
#[derive(Debug, Clone)]
pub struct Branch {
pub attributes: Range<usize>,
pub kind: BranchKind,
pub kind_span: Range<usize>,
pub content: Range<usize>,
pub children: Vec<Branch>,
}
impl From<BranchEvent> for Branch {
fn from(branch: BranchEvent) -> Self {
Self {
attributes: branch.attributes,
kind: branch.kind,
kind_span: branch.kind_span,
content: branch.content,
children: vec![],
}
}
}
impl Branch {
pub fn parse_with_indent_level(
parser: &mut Parser,
) -> Result<Option<(Self, usize)>, ParseError> {
if let Some(branch_event) = parser.next_branch()? {
let own_indent_level = branch_event.indent_level;
let mut branch = Branch::from(branch_event);
let children_indent_level = parser.peek_indent_level();
if children_indent_level > own_indent_level {
while parser.peek_indent_level() == children_indent_level {
if let Some(child) = Branch::parse(parser)? {
branch.children.push(child);
} else {
break;
}
}
}
Ok(Some((branch, own_indent_level)))
} else {
Ok(None)
}
}
pub fn parse(parser: &mut Parser) -> Result<Option<Self>, ParseError> {
Ok(Self::parse_with_indent_level(parser)?.map(|(branch, _)| branch))
}
}

View file

@ -1,40 +1,15 @@
use std::ops::Range;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BranchKind {
/// Expanded by default.
Expanded,
/// Folded by default.
Collapsed,
}
impl BranchKind {
pub fn char(&self) -> char {
match self {
BranchKind::Expanded => '-',
BranchKind::Collapsed => '+',
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Branch {
pub indent_level: usize,
pub config: Range<usize>,
pub kind: BranchKind,
pub content: Range<usize>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Parser<'a> {
pub input: &'a str,
pub position: usize,
}
pub mod ast;
pub mod pull;
#[derive(Debug, thiserror::Error, Clone, PartialEq, Eq)]
pub enum ParseErrorKind {
#[error("branch kind (`+` or `-`) expected")]
BranchKindExpected,
#[error("root branches must not be indented")]
RootIndentLevel,
}
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
@ -44,70 +19,8 @@ pub struct ParseError {
pub range: Range<usize>,
}
impl<'a> Parser<'a> {
fn current(&self) -> Option<char> {
self.input[self.position..].chars().next()
}
fn advance(&mut self) {
self.position += self.current().map(|c| c.len_utf8()).unwrap_or(0);
}
fn eat_as_long_as(&mut self, c: char) -> usize {
let mut count = 0;
while self.current() == Some(c) {
count += 1;
self.advance();
}
count
}
fn eat_until(&mut self, c: char) {
while self.current() != Some(c) {
self.advance();
}
self.advance();
}
pub fn next_branch(&mut self) -> Result<Option<Branch>, ParseError> {
if self.current().is_none() {
return Ok(None);
}
let indent_level = self.eat_as_long_as(' ');
// TODO: Configs
let config_start = self.position;
let config_end = self.position;
let branch_kind_start = self.position;
let branch_kind = match self.current() {
Some('-') => BranchKind::Expanded,
Some('+') => BranchKind::Collapsed,
_ => {
return Err(ParseError {
kind: ParseErrorKind::BranchKindExpected,
range: branch_kind_start..branch_kind_start + 1,
})
}
};
self.advance();
let content_start = self.position;
loop {
self.eat_until('\n');
if let Some('\n') | None = self.current() {
self.advance();
break;
}
}
let content_end = self.position;
Ok(Some(Branch {
indent_level,
config: config_start..config_end,
kind: branch_kind,
content: content_start..content_end,
}))
impl ParseErrorKind {
pub fn at(self, range: Range<usize>) -> ParseError {
ParseError { kind: self, range }
}
}

View file

@ -0,0 +1,107 @@
use std::ops::Range;
use crate::{ParseError, ParseErrorKind};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BranchKind {
/// Expanded by default.
Expanded,
/// Folded by default.
Collapsed,
}
impl BranchKind {
pub fn char(&self) -> char {
match self {
BranchKind::Expanded => '-',
BranchKind::Collapsed => '+',
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BranchEvent {
pub indent_level: usize,
pub attributes: Range<usize>,
pub kind: BranchKind,
pub kind_span: Range<usize>,
pub content: Range<usize>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Parser<'a> {
pub input: &'a str,
pub position: usize,
}
impl<'a> Parser<'a> {
fn current(&self) -> Option<char> {
self.input[self.position..].chars().next()
}
fn advance(&mut self) {
self.position += self.current().map(|c| c.len_utf8()).unwrap_or(0);
}
fn eat_as_long_as(&mut self, c: char) -> usize {
let mut count = 0;
while self.current() == Some(c) {
count += 1;
self.advance();
}
count
}
fn eat_until(&mut self, c: char) {
while self.current() != Some(c) {
self.advance();
}
self.advance();
}
pub fn peek_indent_level(&mut self) -> usize {
let position = self.position;
let indent_level = self.eat_as_long_as(' ');
self.position = position;
indent_level
}
pub fn next_branch(&mut self) -> Result<Option<BranchEvent>, ParseError> {
if self.current().is_none() {
return Ok(None);
}
let indent_level = self.eat_as_long_as(' ');
// TODO: Configs
let config_start = self.position;
let config_end = self.position;
let kind_start = self.position;
let kind = match self.current() {
Some('-') => BranchKind::Expanded,
Some('+') => BranchKind::Collapsed,
_ => return Err(ParseErrorKind::BranchKindExpected.at(kind_start..kind_start + 1)),
};
self.advance();
let kind_end = self.position;
let content_start = self.position;
loop {
self.eat_until('\n');
if let Some('\n') | None = self.current() {
self.advance();
break;
}
}
let content_end = self.position;
Ok(Some(BranchEvent {
indent_level,
attributes: config_start..config_end,
kind,
kind_span: kind_start..kind_end,
content: content_start..content_end,
}))
}
}