diff --git a/Cargo.lock b/Cargo.lock index 792b7b8..fb0ff22 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -312,6 +312,12 @@ dependencies = [ "termcolor", ] +[[package]] +name = "equivalent" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" + [[package]] name = "errno" version = "0.3.2" @@ -397,6 +403,17 @@ dependencies = [ "version_check", ] +[[package]] +name = "getrandom" +version = "0.2.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "be4136b2a15dd319360be1c07d9933517ccf0be8f16bf62a3bee4f0d618df427" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "gimli" version = "0.27.3" @@ -417,6 +434,12 @@ dependencies = [ "thiserror", ] +[[package]] +name = "hashbrown" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2c6201b9ff9fd90a5a3bac2e56a830d0caa509576f0e503818ee82c181b3437a" + [[package]] name = "heck" version = "0.4.1" @@ -498,6 +521,16 @@ dependencies = [ "want", ] +[[package]] +name = "indexmap" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d5477fe2230a79769d8dc68e0eabf5437907c0457a5614a9e8dddb67f65eb65d" +dependencies = [ + "equivalent", + "hashbrown", +] + [[package]] name = "is-terminal" version = "0.4.9" @@ -721,6 +754,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "ppv-lite86" +version = "0.2.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" + [[package]] name = "proc-macro2" version = "1.0.66" @@ -750,6 +789,36 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom", +] + [[package]] name = "redox_syscall" version = "0.3.5" @@ -1028,6 +1097,23 @@ dependencies = [ "tokio", ] +[[package]] +name = "toml_datetime" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cda73e2f1397b1262d6dfdcef8aafae14d1de7748d66822d3bfeeb6d03e5e4b" + +[[package]] +name = "toml_edit" +version = "0.19.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8123f27e969974a3dfba720fdb560be359f57b44302d280ba72e76a74480e8a" +dependencies = [ + "indexmap", + "toml_datetime", + "winnow", +] + [[package]] name = "tower" version = "0.4.13" @@ -1129,11 +1215,14 @@ dependencies = [ "handlebars", "log", "pulldown-cmark", + "rand", "serde", "tokio", + "toml_edit", "tower-http", "tower-livereload", "treehouse-format", + "ulid", "walkdir", ] @@ -1162,6 +1251,15 @@ version = "0.1.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed646292ffc8188ef8ea4d1e0e0150fb15a5c2e12ad9b8fc191ae7a8a7f3c4b9" +[[package]] +name = "ulid" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13a3aaa69b04e5b66cc27309710a569ea23593612387d67daaf102e73aa974fd" +dependencies = [ + "rand", +] + [[package]] name = "unicase" version = "2.6.0" @@ -1316,3 +1414,12 @@ name = "windows_x86_64_msvc" version = "0.48.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6ade45bc8bf02ae2aa34a9d54ba660a1a58204da34ba793c00d83ca3730b5f1" + +[[package]] +name = "winnow" +version = "0.5.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d09770118a7eb1ccaf4a594a221334119a44a814fcb0d31c5b85e83e97227a97" +dependencies = [ + "memchr", +] diff --git a/content/index.tree b/content/index.tree index b86aed3..cd1216e 100644 --- a/content/index.tree +++ b/content/index.tree @@ -2,6 +2,7 @@ id = "treehouse" - # treehouse + % # example - welcome to the treehouse! - treehouse is a brand new static website generator, inspired by the likes of Jekyll and Hugo, but offering a writing experience more close to Logseq diff --git a/content/secret.tree b/content/secret.tree index 270dc86..c9caa46 100644 --- a/content/secret.tree +++ b/content/secret.tree @@ -1 +1,2 @@ +% id = "01H87RB4MB7RM6V2K1DR8XBV8N" - He is behind the tree. diff --git a/content/ulid-test.tree b/content/ulid-test.tree new file mode 100644 index 0000000..687c338 --- /dev/null +++ b/content/ulid-test.tree @@ -0,0 +1,35 @@ +% always_expanded = true + id = "treehouse" # this one has a custom id +- root + + % id = "01H87R133DNH9JRGS4ZWN586M4" + # this one is missing an ULID + - child + + % id = "01H87R133D75W92HRESGAXBTC5" + + # this one is formatted a bit weirdly + - another child + + % id = "custom-id" + - intertwined with one that has a custom ID + + % id = "01H87R133D4MXM01VGTEXHPVAR" + + # this is on a separate line and lacks indentation + - and another child + + % always_expanded = true + id = "01H87R133D9GDXRD7WA14ECDES" + - this one has a custom attribute but not an ID + + % id = "01H87R7PVRJ7FTWKC1SXHGQXDC" + - hellow + + % id = "01H87R133DHQPZWJVTMWPKVGP0" + - + +% id = "01H87R133DRJ7VKEMBSJKHYRJN" + + # comment +- diff --git a/crates/treehouse-format/src/pull.rs b/crates/treehouse-format/src/pull.rs index 16a3e68..66cacbe 100644 --- a/crates/treehouse-format/src/pull.rs +++ b/crates/treehouse-format/src/pull.rs @@ -108,6 +108,7 @@ impl<'a> Parser<'a> { self.advance(); let after_percent = self.position; self.eat_indented_lines_until(indent_level, |c| c == '-' || c == '+')?; + self.eat_as_long_as(' '); let end = self.position; Some(Attributes { percent: start..after_percent, diff --git a/crates/treehouse/Cargo.toml b/crates/treehouse/Cargo.toml index 6ba7d61..2d3c86d 100644 --- a/crates/treehouse/Cargo.toml +++ b/crates/treehouse/Cargo.toml @@ -18,8 +18,11 @@ handlebars = "4.3.7" pulldown-cmark = { version = "0.9.3", default-features = false } serde = { version = "1.0.183", features = ["derive"] } tokio = { version = "1.32.0", features = ["full"] } +toml_edit = "0.19.14" tower-http = { version = "0.4.3", features = ["fs"] } tower-livereload = "0.8.0" walkdir = "2.3.3" +ulid = "1.0.0" +rand = "0.8.5" diff --git a/crates/treehouse/src/cli/diagnostics.rs b/crates/treehouse/src/cli/diagnostics.rs new file mode 100644 index 0000000..8bc3983 --- /dev/null +++ b/crates/treehouse/src/cli/diagnostics.rs @@ -0,0 +1,42 @@ +use anyhow::Context; +use codespan_reporting::{ + diagnostic::Diagnostic, + files::SimpleFiles, + term::termcolor::{ColorChoice, StandardStream}, +}; + +pub type Files = SimpleFiles; +pub type FileId = >::FileId; + +pub struct Diagnosis { + pub files: Files, + pub diagnostics: Vec>, +} + +impl Diagnosis { + pub fn new() -> Self { + Self { + files: Files::new(), + diagnostics: vec![], + } + } + + /// Get the source code of a file, assuming it was previously registered. + pub fn get_source(&self, file_id: FileId) -> &str { + self.files + .get(file_id) + .expect("file should have been registered previously") + .source() + } + + pub fn report(&self) -> anyhow::Result<()> { + let writer = StandardStream::stderr(ColorChoice::Auto); + let config = codespan_reporting::term::Config::default(); + for diagnostic in &self.diagnostics { + codespan_reporting::term::emit(&mut writer.lock(), &config, &self.files, diagnostic) + .context("could not emit diagnostic")?; + } + + Ok(()) + } +} diff --git a/crates/treehouse/src/cli/fix.rs b/crates/treehouse/src/cli/fix.rs new file mode 100644 index 0000000..f517319 --- /dev/null +++ b/crates/treehouse/src/cli/fix.rs @@ -0,0 +1,150 @@ +use std::ops::Range; + +use anyhow::Context; +use treehouse_format::ast::Branch; + +use super::{ + diagnostics::{Diagnosis, FileId}, + parse::{self, parse_toml_with_diagnostics, parse_tree_with_diagnostics}, + FixArgs, +}; + +struct Fix { + range: Range, + replacement: String, +} + +#[derive(Default)] +struct State { + fixes: Vec, +} + +fn dfs_fix_branch(diagnosis: &mut Diagnosis, file_id: FileId, state: &mut State, branch: &Branch) { + let mut rng = rand::thread_rng(); + let ulid = ulid::Generator::new() + .generate_with_source(&mut rng) + .expect("failed to generate ulid for block"); // (wtf moment. you know how big the 80-bit combination space is?) + + let indent = " ".repeat(branch.indent_level); + if let Some(attributes) = branch.attributes.clone() { + // Scenario: Attributes need to be parsed as TOML and the id attribute has to be added into + // the top-level table. Then we also need to pretty-print everything to match the right + // indentation level. + if let Ok(mut toml) = + parse_toml_with_diagnostics(diagnosis, file_id, attributes.data.clone()) + { + if !toml.contains_key("id") { + toml["id"] = toml_edit::value(ulid.to_string()); + toml.key_decor_mut("id") + .unwrap() + .set_prefix(" ".repeat(branch.indent_level + 2)); + } + let mut toml_string = toml.to_string(); + + // This is incredibly janky and barely works. + let leading_spaces: usize = toml_string.chars().take_while(|&c| c == ' ').count(); + match leading_spaces { + 0 => toml_string.insert(0, ' '), + 1 => (), + _ => toml_string.replace_range(0..leading_spaces - 1, ""), + } + + let toml_string = fix_indent_in_generated_toml(&toml_string, branch.indent_level); + + state.fixes.push(Fix { + range: attributes.data.clone(), + replacement: toml_string, + }) + } + } else { + // Scenario: No attributes at all. + // In this case we can do a fast path where we generate the `% id = "whatever"` string + // directly, not going through toml_edit. + state.fixes.push(Fix { + range: branch.kind_span.start..branch.kind_span.start, + replacement: format!("% id = \"{ulid}\"\n{indent}"), + }); + } + + // Then we fix child branches. + for child in &branch.children { + dfs_fix_branch(diagnosis, file_id, state, child); + } +} + +fn fix_indent_in_generated_toml(toml: &str, min_indent_level: usize) -> String { + let toml = toml.trim_end(); + + let mut result = String::with_capacity(toml.len()); + + for (i, line) in toml.lines().enumerate() { + if line.is_empty() { + result.push('\n'); + } else { + let desired_line_indent_level = if i == 0 { 1 } else { min_indent_level + 2 }; + let leading_spaces: usize = line.chars().take_while(|&c| c == ' ').count(); + let needed_indentation = desired_line_indent_level.saturating_sub(leading_spaces); + for _ in 0..needed_indentation { + result.push(' '); + } + result.push_str(line); + result.push('\n'); + } + } + + for _ in 0..min_indent_level { + result.push(' '); + } + + result +} + +pub fn fix_file( + diagnosis: &mut Diagnosis, + file_id: FileId, +) -> Result { + parse_tree_with_diagnostics(diagnosis, file_id).map(|roots| { + let mut source = diagnosis.get_source(file_id).to_owned(); + let mut state = State::default(); + + for branch in &roots.branches { + dfs_fix_branch(diagnosis, file_id, &mut state, branch); + } + + // Doing a depth-first search of the branches yields fixes from the beginning of the file + // to its end. The most efficient way to apply all the fixes then is to reverse their order, + // which lets us modify the source string in place because the fix ranges always stay + // correct. + for fix in state.fixes.iter().rev() { + source.replace_range(fix.range.clone(), &fix.replacement); + } + + source + }) +} + +pub fn fix_file_cli(fix_args: FixArgs) -> anyhow::Result<()> { + let utf8_filename = fix_args.file.to_string_lossy().into_owned(); + let file = std::fs::read_to_string(&fix_args.file).context("cannot read file to fix")?; + + let mut diagnosis = Diagnosis::new(); + let file_id = diagnosis.files.add(utf8_filename, file); + + if let Ok(fixed) = fix_file(&mut diagnosis, file_id) { + if fix_args.apply { + // Try to write the backup first. If writing that fails, bail out without overwriting + // the source file. + if let Some(backup_path) = fix_args.backup { + std::fs::write(backup_path, diagnosis.get_source(file_id)) + .context("cannot write backup; original file will not be overwritten")?; + } + std::fs::write(&fix_args.file, fixed).context("cannot overwrite original file")?; + } else { + println!("{fixed}"); + } + } else { + diagnosis.report()?; + } + + Ok(()) +} diff --git a/crates/treehouse/src/cli/mod.rs b/crates/treehouse/src/cli/mod.rs index c28572b..f2bece0 100644 --- a/crates/treehouse/src/cli/mod.rs +++ b/crates/treehouse/src/cli/mod.rs @@ -1,5 +1,10 @@ +pub mod diagnostics; +pub mod fix; +mod parse; pub mod regenerate; +use std::path::PathBuf; + use clap::{Args, Parser, Subcommand}; #[derive(Parser)] @@ -12,6 +17,9 @@ pub struct ProgramArgs { pub enum Command { /// Regenerate the website. Regenerate(#[clap(flatten)] RegenerateArgs), + + /// Populate missing metadata in blocks. + Fix(#[clap(flatten)] FixArgs), } #[derive(Args)] @@ -20,3 +28,19 @@ pub struct RegenerateArgs { #[clap(short, long)] pub serve: bool, } + +#[derive(Args)] +pub struct FixArgs { + /// Which file to fix. The fixed file will be printed into stdout so that you have a chance to + /// see the changes. + pub file: PathBuf, + + /// If you're happy with the suggested changes, specifying this will apply them to the file + /// (overwrite it in place.) + #[clap(long)] + pub apply: bool, + + /// Write the previous version back to the specified path. + #[clap(long)] + pub backup: Option, +} diff --git a/crates/treehouse/src/cli/parse.rs b/crates/treehouse/src/cli/parse.rs new file mode 100644 index 0000000..5b12f31 --- /dev/null +++ b/crates/treehouse/src/cli/parse.rs @@ -0,0 +1,57 @@ +use std::{ops::Range, str::FromStr}; + +use codespan_reporting::diagnostic::{Diagnostic, Label, LabelStyle, Severity}; +use treehouse_format::ast::Roots; + +use super::diagnostics::{Diagnosis, FileId}; + +pub struct ErrorsEmitted; + +pub fn parse_tree_with_diagnostics( + diagnosis: &mut Diagnosis, + file_id: FileId, +) -> Result { + let input = diagnosis.get_source(file_id); + Roots::parse(&mut treehouse_format::pull::Parser { input, position: 0 }).map_err(|error| { + diagnosis.diagnostics.push(Diagnostic { + severity: Severity::Error, + code: Some("tree".into()), + message: error.kind.to_string(), + labels: vec![Label { + style: LabelStyle::Primary, + file_id, + range: error.range, + message: String::new(), + }], + notes: vec![], + }); + ErrorsEmitted + }) +} + +pub fn parse_toml_with_diagnostics( + diagnosis: &mut Diagnosis, + file_id: FileId, + range: Range, +) -> Result { + let input = &diagnosis.get_source(file_id)[range.clone()]; + toml_edit::Document::from_str(input).map_err(|error| { + diagnosis.diagnostics.push(Diagnostic { + severity: Severity::Error, + code: Some("toml".into()), + message: error.message().to_owned(), + labels: error + .span() + .map(|span| Label { + style: LabelStyle::Primary, + file_id, + range: range.start + span.start..range.start + span.end, + message: String::new(), + }) + .into_iter() + .collect(), + notes: vec![], + }); + ErrorsEmitted + }) +} diff --git a/crates/treehouse/src/cli/regenerate.rs b/crates/treehouse/src/cli/regenerate.rs index cf1f863..d68a133 100644 --- a/crates/treehouse/src/cli/regenerate.rs +++ b/crates/treehouse/src/cli/regenerate.rs @@ -7,8 +7,7 @@ use anyhow::{bail, Context}; use axum::Router; use codespan_reporting::{ diagnostic::{Diagnostic, Label, LabelStyle, Severity}, - files::{Files as _, SimpleFiles}, - term::termcolor::{ColorChoice, StandardStream}, + files::Files as _, }; use copy_dir::copy_dir; use handlebars::Handlebars; @@ -16,24 +15,17 @@ use log::{debug, info}; use serde::Serialize; use tower_http::services::ServeDir; use tower_livereload::LiveReloadLayer; -use treehouse_format::ast::Roots; use walkdir::WalkDir; -use crate::html::tree::branches_to_html; +use crate::{cli::parse::parse_tree_with_diagnostics, html::tree::branches_to_html}; + +use super::diagnostics::{Diagnosis, FileId}; #[derive(Default)] struct Generator { tree_files: Vec, } -type Files = SimpleFiles; -type FileId = >::FileId; - -pub struct Diagnosis { - pub files: Files, - pub diagnostics: Vec>, -} - impl Generator { fn add_directory_rec(&mut self, directory: &Path) -> anyhow::Result<()> { for entry in WalkDir::new(directory) { @@ -108,10 +100,7 @@ impl Generator { } fn generate_all_files(&self, dirs: &Dirs<'_>) -> anyhow::Result { - let mut diagnosis = Diagnosis { - files: Files::new(), - diagnostics: vec![], - }; + let mut diagnosis = Diagnosis::new(); let mut handlebars = Handlebars::new(); let tree_template = Self::register_template( @@ -145,51 +134,28 @@ impl Generator { } }; let file_id = diagnosis.files.add(utf8_filename.into_owned(), source); - let source = diagnosis - .files - .get(file_id) - .expect("file was just added to the list") - .source(); - let parse_result = Roots::parse(&mut treehouse_format::pull::Parser { - input: source, - position: 0, - }); + if let Ok(roots) = parse_tree_with_diagnostics(&mut diagnosis, file_id) { + let mut tree = String::new(); + let source = diagnosis.get_source(file_id); + branches_to_html(&mut tree, &roots.branches, source); - match parse_result { - Ok(roots) => { - let mut tree = String::new(); - branches_to_html(&mut tree, &roots.branches, source); + let template_data = TemplateData { tree }; + let templated_html = match handlebars.render("tree", &template_data) { + Ok(html) => html, + Err(error) => { + Self::wrangle_handlebars_error_into_diagnostic( + &mut diagnosis, + tree_template, + error.line_no, + error.column_no, + error.desc, + )?; + continue; + } + }; - let template_data = TemplateData { tree }; - let templated_html = match handlebars.render("tree", &template_data) { - Ok(html) => html, - Err(error) => { - Self::wrangle_handlebars_error_into_diagnostic( - &mut diagnosis, - tree_template, - error.line_no, - error.column_no, - error.desc, - )?; - continue; - } - }; - - std::fs::write(target_path, templated_html)?; - } - Err(error) => diagnosis.diagnostics.push(Diagnostic { - severity: Severity::Error, - code: Some("tree".into()), - message: error.kind.to_string(), - labels: vec![Label { - style: LabelStyle::Primary, - file_id, - range: error.range, - message: String::new(), - }], - notes: vec![], - }), + std::fs::write(target_path, templated_html)?; } } @@ -223,12 +189,7 @@ pub fn regenerate(dirs: &Dirs<'_>) -> anyhow::Result<()> { generator.add_directory_rec(dirs.content_dir)?; let diagnosis = generator.generate_all_files(dirs)?; - let writer = StandardStream::stderr(ColorChoice::Auto); - let config = codespan_reporting::term::Config::default(); - for diagnostic in &diagnosis.diagnostics { - codespan_reporting::term::emit(&mut writer.lock(), &config, &diagnosis.files, diagnostic) - .context("could not emit diagnostic")?; - } + diagnosis.report()?; Ok(()) } diff --git a/crates/treehouse/src/main.rs b/crates/treehouse/src/main.rs index 1c8d839..18ebdac 100644 --- a/crates/treehouse/src/main.rs +++ b/crates/treehouse/src/main.rs @@ -2,6 +2,7 @@ use std::path::Path; use clap::Parser; use cli::{ + fix::fix_file_cli, regenerate::{self, regenerate_or_report_error, Dirs}, Command, ProgramArgs, }; @@ -32,6 +33,8 @@ async fn fallible_main() -> anyhow::Result<()> { regenerate::web_server().await?; } } + + Command::Fix(fix_args) => fix_file_cli(fix_args)?, } Ok(())