remove treehouse-format crate and collapse everything into src

This commit is contained in:
りき萌 2025-07-10 16:50:41 +02:00
parent ca127a9411
commit b792688776
66 changed files with 145 additions and 112 deletions

88
src/cli.rs Normal file
View file

@ -0,0 +1,88 @@
pub mod fix;
pub mod serve;
pub mod wc;
use std::path::PathBuf;
use clap::{Args, Parser, Subcommand};
use crate::vfs::VPathBuf;
#[derive(Parser)]
pub struct ProgramArgs {
/// Emit a Chrome trace to the given file.
#[clap(long)]
pub trace: Option<PathBuf>,
#[clap(subcommand)]
pub command: Command,
}
#[derive(Subcommand)]
pub enum Command {
/// Populate missing metadata in blocks.
Fix(#[clap(flatten)] FixArgs),
/// Populate missing metadata in blocks across all files.
///
/// By default only prints which files would be changed. To apply the changes, use `--apply`.
FixAll(#[clap(flatten)] FixAllArgs),
/// `generate` and start a treehouse server.
///
/// The server uses the generated files and provides extra functionality on top, handling
Serve {
#[clap(flatten)]
generate: GenerateArgs,
#[clap(flatten)]
serve: ServeArgs,
},
/// Count words in the treehouse's branches.
Wc(#[clap(flatten)] WcArgs),
/// Generates a new ulid and prints it to stdout.
Ulid,
}
#[derive(Args)]
pub struct GenerateArgs {}
#[derive(Args)]
pub struct FixArgs {
/// Which file to fix. The fixed file will be printed into stdout so that you have a chance to
/// see the changes.
pub file: VPathBuf,
/// If you're happy with the suggested changes, specifying this will apply them to the file
/// (overwrite it in place.)
#[clap(long)]
pub apply: bool,
/// Write the previous version back to the specified path.
#[clap(long)]
pub backup: Option<VPathBuf>,
}
#[derive(Args)]
pub struct FixAllArgs {
/// If you're happy with the suggested changes, specifying this will apply them to the file
/// (overwrite it in place.)
#[clap(long)]
pub apply: bool,
}
#[derive(Args)]
pub struct ServeArgs {
/// The port under which to serve the treehouse.
#[clap(short, long, default_value_t = 8080)]
pub port: u16,
}
#[derive(Args)]
pub struct WcArgs {
/// A list of paths to report the word counts of.
/// If no paths are provided, the entire tree is word-counted.
pub paths: Vec<VPathBuf>,
}

235
src/cli/fix.rs Normal file
View file

@ -0,0 +1,235 @@
use std::ops::{ControlFlow, Range};
use anyhow::{anyhow, Context};
use codespan_reporting::diagnostic::Diagnostic;
use tracing::{error, info};
use crate::{
parse::{self, parse_toml_with_diagnostics, parse_tree_with_diagnostics},
state::{report_diagnostics, FileId, Source, Treehouse},
tree::ast::Branch,
vfs::{self, Content, Dir, Edit, EditPath, VPath},
};
use super::{FixAllArgs, FixArgs};
struct Fix {
range: Range<usize>,
replacement: String,
}
#[derive(Default)]
struct State {
fixes: Vec<Fix>,
}
fn dfs_fix_branch(treehouse: &mut Treehouse, file_id: FileId, state: &mut State, branch: &Branch) {
let mut rng = rand::thread_rng();
let ulid = ulid::Generator::new()
.generate_with_source(&mut rng)
.expect("failed to generate ulid for block"); // (wtf moment. you know how big the 80-bit combination space is?)
let indent = " ".repeat(branch.indent_level);
if let Some(attributes) = branch.attributes.clone() {
// Scenario: Attributes need to be parsed as TOML and the id attribute has to be added into
// the top-level table. Then we also need to pretty-print everything to match the right
// indentation level.
if let Ok(mut toml) =
parse_toml_with_diagnostics(treehouse, file_id, attributes.data.clone())
{
if !toml.contains_key("id") {
toml["id"] = toml_edit::value(ulid.to_string());
toml.key_decor_mut("id")
.unwrap()
.set_prefix(" ".repeat(branch.indent_level + 2));
}
let mut toml_string = toml.to_string();
// This is incredibly janky and barely works.
let leading_spaces: usize = toml_string.chars().take_while(|&c| c == ' ').count();
match leading_spaces {
0 => toml_string.insert(0, ' '),
1 => (),
_ => toml_string.replace_range(0..leading_spaces - 1, ""),
}
let toml_string = fix_indent_in_generated_toml(&toml_string, branch.indent_level);
state.fixes.push(Fix {
range: attributes.data.clone(),
replacement: toml_string,
})
}
} else {
// Scenario: No attributes at all.
// In this case we can do a fast path where we generate the `% id = "whatever"` string
// directly, not going through toml_edit.
state.fixes.push(Fix {
range: branch.kind_span.start..branch.kind_span.start,
replacement: format!("% id = \"{ulid}\"\n{indent}"),
});
}
// Then we fix child branches.
for child in &branch.children {
dfs_fix_branch(treehouse, file_id, state, child);
}
}
fn fix_indent_in_generated_toml(toml: &str, min_indent_level: usize) -> String {
let toml = toml.trim_end();
let mut result = String::with_capacity(toml.len());
for (i, line) in toml.lines().enumerate() {
if line.is_empty() {
result.push('\n');
} else {
let desired_line_indent_level = if i == 0 { 1 } else { min_indent_level + 2 };
let leading_spaces: usize = line.chars().take_while(|&c| c == ' ').count();
let needed_indentation = desired_line_indent_level.saturating_sub(leading_spaces);
for _ in 0..needed_indentation {
result.push(' ');
}
result.push_str(line);
result.push('\n');
}
}
for _ in 0..min_indent_level {
result.push(' ');
}
result
}
pub fn fix_file(
treehouse: &mut Treehouse,
diagnostics: &mut Vec<Diagnostic<FileId>>,
file_id: FileId,
) -> Result<String, parse::ErrorsEmitted> {
let source = treehouse.source(file_id).input();
parse_tree_with_diagnostics(file_id, source)
.map(|roots| {
let mut source = treehouse.source(file_id).input().to_owned();
let mut state = State::default();
for branch in &roots.branches {
dfs_fix_branch(treehouse, file_id, &mut state, branch);
}
// Doing a depth-first search of the branches yields fixes from the beginning of the file
// to its end. The most efficient way to apply all the fixes then is to reverse their order,
// which lets us modify the source string in place because the fix ranges always stay
// correct.
for fix in state.fixes.iter().rev() {
source.replace_range(fix.range.clone(), &fix.replacement);
}
source
})
.map_err(|mut new| {
diagnostics.append(&mut new);
parse::ErrorsEmitted
})
}
pub fn fix_file_cli(fix_args: FixArgs, root: &dyn Dir) -> anyhow::Result<Edit> {
let file = if &*fix_args.file == VPath::new("-") {
std::io::read_to_string(std::io::stdin().lock()).context("cannot read file from stdin")?
} else {
vfs::query::<Content>(root, &fix_args.file)
.ok_or_else(|| anyhow!("cannot read file to fix"))?
.string()?
};
let mut treehouse = Treehouse::new();
let mut diagnostics = vec![];
let file_id = treehouse.add_file(fix_args.file.clone(), Source::Other(file));
Ok(
if let Ok(fixed) = fix_file(&mut treehouse, &mut diagnostics, file_id) {
if fix_args.apply {
let edit_path = vfs::query::<EditPath>(root, &fix_args.file).ok_or_else(|| {
anyhow!(
"{} is not an editable file (perhaps it is not in a persistent path?)",
fix_args.file
)
})?;
// Try to write the backup first. If writing that fails, bail out without overwriting
// the source file.
if let Some(backup_path) = fix_args.backup {
let backup_edit_path =
vfs::query::<EditPath>(root, &backup_path).ok_or_else(|| {
anyhow!("backup file {backup_path} is not an editable file")
})?;
Edit::Seq(vec![
Edit::Write(
backup_edit_path,
treehouse.source(file_id).input().to_owned().into(),
),
Edit::Write(edit_path, fixed.into()),
])
} else {
Edit::Write(edit_path, fixed.into())
}
} else {
println!("{fixed}");
Edit::NoOp
}
} else {
report_diagnostics(&treehouse, &diagnostics)?;
Edit::NoOp
},
)
}
pub fn fix_all_cli(fix_all_args: FixAllArgs, dir: &dyn Dir) -> anyhow::Result<Edit> {
let mut edits = vec![];
fn fix_one(dir: &dyn Dir, path: &VPath) -> anyhow::Result<Edit> {
if path.extension() == Some("tree") {
let Some(content) = vfs::query::<Content>(dir, path).map(Content::bytes) else {
return Ok(Edit::NoOp);
};
let content = String::from_utf8(content).context("file is not valid UTF-8")?;
let mut treehouse = Treehouse::new();
let mut diagnostics = vec![];
let file_id = treehouse.add_file(path.to_owned(), Source::Other(content));
let edit_path = vfs::query::<EditPath>(dir, path).context("path is not editable")?;
if let Ok(fixed) = fix_file(&mut treehouse, &mut diagnostics, file_id) {
if fixed != treehouse.source(file_id).input() {
return Ok(Edit::Write(edit_path, fixed.into()));
}
} else {
report_diagnostics(&treehouse, &diagnostics)?;
}
}
Ok(Edit::NoOp)
}
info!("gathering edits");
vfs::walk_dir_rec(dir, VPath::ROOT, &mut |path| {
match fix_one(dir, path) {
Ok(Edit::NoOp) => (),
Ok(edit) => edits.push(edit),
Err(err) => error!("cannot fix {path}: {err:?}"),
}
ControlFlow::Continue(())
});
// NOTE: This number may be higher than you expect, because NoOp edits also count!
info!("{} edits to apply", edits.len());
if !fix_all_args.apply {
info!("dry run; add `--apply` to apply changes");
Ok(Edit::Dry(Box::new(Edit::All(edits))))
} else {
Ok(Edit::All(edits))
}
}

175
src/cli/serve.rs Normal file
View file

@ -0,0 +1,175 @@
#[cfg(debug_assertions)]
mod live_reload;
mod picture_upload;
use std::{net::Ipv4Addr, sync::Arc};
use axum::http::header::LOCATION;
use axum::{
extract::{Path, Query, RawQuery, State},
http::{
header::{CACHE_CONTROL, CONTENT_TYPE},
HeaderValue, StatusCode,
},
response::{Html, IntoResponse, Response},
routing::get,
Router,
};
use serde::Deserialize;
use tokio::net::TcpListener;
use tracing::{error, info, instrument};
use crate::dirs::Dirs;
use crate::sources::Sources;
use crate::state::Source;
use crate::vfs::asynch::AsyncDir;
use crate::vfs::{self, VPath};
mod system {
use crate::vfs::VPath;
pub const INDEX: &VPath = VPath::new_const("index");
pub const FOUR_OH_FOUR: &VPath = VPath::new_const("_treehouse/404");
pub const B_DOCS: &VPath = VPath::new_const("_treehouse/b");
}
struct Server {
sources: Arc<Sources>,
target: AsyncDir,
}
#[instrument(skip(sources, dirs, target))]
pub async fn serve(
sources: Arc<Sources>,
dirs: Arc<Dirs>,
target: AsyncDir,
port: u16,
) -> anyhow::Result<()> {
let app = Router::new()
.route("/", get(index)) // needed explicitly because * does not match empty paths
.route("/*path", get(vfs_entry))
.route("/b", get(branch))
.route("/treehouse/quit", get(quit))
.fallback(get(four_oh_four))
.with_state(Arc::new(Server {
sources: sources.clone(),
target,
}));
#[cfg(debug_assertions)]
let app = app
.nest("/dev/live-reload", live_reload::router())
.nest("/dev/picture-upload", picture_upload::router(dirs));
info!("serving on port {port}");
let listener = TcpListener::bind((Ipv4Addr::from([0u8, 0, 0, 0]), port)).await?;
Ok(axum::serve(listener, app).await?)
}
#[derive(Debug, Deserialize)]
struct VfsQuery {
#[serde(rename = "v")]
content_version: Option<String>,
}
#[instrument(skip(state))]
async fn get_static_file(path: &str, query: &VfsQuery, state: &Server) -> Option<Response> {
let vpath = VPath::try_new(path).ok()?;
if let Some(target) = state.sources.config.redirects.path.get(vpath) {
let url = vfs::url(&state.sources.config.site, state.target.sync(), target)?;
return Some((StatusCode::MOVED_PERMANENTLY, [(LOCATION, url)]).into_response());
}
let content = state.target.content(vpath).await?;
let content_type = HeaderValue::from_str(content.kind()).inspect_err(
|err| error!(?err, content_type = ?content.kind(), "content type cannot be used as an HTTP header"),
).ok()?;
let mut response = content.bytes().into_response();
response.headers_mut().insert(CONTENT_TYPE, content_type);
if query.content_version.is_some() {
response.headers_mut().insert(
CACHE_CONTROL,
HeaderValue::from_static("public, max-age=31536000, immutable"),
);
}
Some(response)
}
async fn vfs_entry(
Path(path): Path<String>,
Query(query): Query<VfsQuery>,
State(state): State<Arc<Server>>,
) -> Response {
if let Some(response) = get_static_file(&path, &query, &state).await {
response
} else {
four_oh_four(State(state)).await
}
}
async fn system_page(target: &AsyncDir, path: &VPath, status_code: StatusCode) -> Response {
if let Some(content) = target.content(path).await {
(status_code, Html(content.bytes())).into_response()
} else {
(
StatusCode::INTERNAL_SERVER_ERROR,
format!("500 Internal Server Error: system page {path} is not available"),
)
.into_response()
}
}
async fn index(State(state): State<Arc<Server>>) -> Response {
system_page(&state.target, system::INDEX, StatusCode::OK).await
}
async fn four_oh_four(State(state): State<Arc<Server>>) -> Response {
system_page(&state.target, system::FOUR_OH_FOUR, StatusCode::NOT_FOUND).await
}
#[instrument(skip(state))]
async fn branch(RawQuery(named_id): RawQuery, State(state): State<Arc<Server>>) -> Response {
if let Some(named_id) = named_id {
let branch_id = state
.sources
.treehouse
.branches_by_named_id
.get(&named_id)
.copied()
.or_else(|| {
state
.sources
.treehouse
.branch_redirects
.get(&named_id)
.copied()
});
if let Some(branch_id) = branch_id {
let branch = state.sources.treehouse.tree.branch(branch_id);
if let Source::Tree { tree_path, .. } = state.sources.treehouse.source(branch.file_id) {
if let Some(url) =
vfs::url(&state.sources.config.site, &state.target.sync(), tree_path)
{
let url = format!("{url}#{}", branch.html_id);
return (StatusCode::FOUND, [(LOCATION, url)]).into_response();
}
}
}
system_page(&state.target, system::FOUR_OH_FOUR, StatusCode::NOT_FOUND).await
} else {
system_page(&state.target, system::B_DOCS, StatusCode::OK).await
}
}
async fn quit() -> impl IntoResponse {
info!("somebody just quit the treehouse. congration to them!");
(
StatusCode::FOUND,
[(LOCATION, "https://www.youtube.com/watch?v=dQw4w9WgXcQ")],
)
}

View file

@ -0,0 +1,28 @@
use std::time::Duration;
use axum::{routing::get, Router};
use tokio::time::sleep;
pub fn router<S>() -> Router<S> {
let router = Router::new().route("/back-up", get(back_up));
// The endpoint for immediate reload is only enabled on debug builds.
// Release builds use the exponential backoff system that detects is the WebSocket is closed.
#[cfg(debug_assertions)]
let router = router.route("/stall", get(stall));
router.with_state(())
}
#[cfg(debug_assertions)]
async fn stall() -> String {
loop {
// Sleep for a day, I guess. Just to uphold the connection forever without really using any
// significant resources.
sleep(Duration::from_secs(60 * 60 * 24)).await;
}
}
async fn back_up() -> String {
"".into()
}

View file

@ -0,0 +1,131 @@
use std::{io::Cursor, sync::Arc};
use axum::{
body::Bytes,
debug_handler,
extract::{DefaultBodyLimit, Query, State},
response::IntoResponse,
routing::post,
Json, Router,
};
use serde::{Deserialize, Serialize};
use tracing::info;
use crate::{
dirs::Dirs,
vfs::{self, Edit, EditPath, VPathBuf},
};
pub fn router<S>(dirs: Arc<Dirs>) -> Router<S> {
Router::new()
.route("/", post(picture_upload))
.layer(DefaultBodyLimit::disable())
.with_state(dirs)
}
#[derive(Debug, Deserialize)]
struct PictureUpload {
label: String,
format: String,
compression: Compression,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize)]
enum Compression {
Lossless,
GameScreenshot,
}
impl Compression {
pub fn output_format(self) -> Option<&'static str> {
match self {
Compression::Lossless => None,
Compression::GameScreenshot => Some("image/webp"),
}
}
}
#[derive(Serialize)]
#[serde(rename_all = "camelCase")]
enum Response {
Ulid(String),
Error(String),
}
fn compress(image_data: &[u8], compression: Compression) -> anyhow::Result<Vec<u8>> {
match compression {
Compression::Lossless => Ok(image_data.to_vec()),
Compression::GameScreenshot => {
info!("decompressing original image");
let decompressed = image::ImageReader::new(Cursor::new(image_data))
.with_guessed_format()?
.decode()?
.to_rgba8();
info!("compressing to webp");
let compressed = webp::Encoder::new(
&decompressed,
webp::PixelLayout::Rgba,
decompressed.width(),
decompressed.height(),
)
.encode(85.0)
.to_vec();
Ok(compressed)
}
}
}
async fn write_compressed(
image_data: &[u8],
compression: Compression,
edit_path: EditPath,
) -> anyhow::Result<()> {
let compressed = compress(image_data, compression)?;
Edit::Write(edit_path, compressed).apply().await?;
Ok(())
}
#[debug_handler]
async fn picture_upload(
State(dirs): State<Arc<Dirs>>,
Query(mut params): Query<PictureUpload>,
image: Bytes,
) -> impl IntoResponse {
info!(?params, "uploading picture");
let ulid = ulid::Generator::new()
.generate_with_source(&mut rand::thread_rng())
.expect("failed to generate ulid");
if params.label.is_empty() {
params.label = "untitled".into();
}
let file_name = VPathBuf::new(format!(
"{ulid}-{}.{}",
params.label,
get_extension(params.compression.output_format().unwrap_or(&params.format))
.unwrap_or("unknown")
));
let Some(edit_path) = vfs::query::<EditPath>(&dirs.pic, &file_name) else {
return Json(Response::Error(format!("{file_name} is not editable")));
};
let result = write_compressed(&image, params.compression, edit_path).await;
info!(?result, "done processing");
Json(match result {
Ok(()) => Response::Ulid(ulid.to_string()),
Err(error) => Response::Error(error.to_string()),
})
}
fn get_extension(content_type: &str) -> Option<&'static str> {
match content_type {
"image/png" => Some("png"),
"image/jpeg" => Some("jpg"),
"image/svg+xml" => Some("svg"),
"image/webp" => Some("webp"),
_ => None,
}
}

66
src/cli/wc.rs Normal file
View file

@ -0,0 +1,66 @@
use std::ops::ControlFlow;
use crate::{
parse::parse_tree_with_diagnostics,
state::{report_diagnostics, Source, Treehouse},
tree::ast::{Branch, Roots},
vfs::{self, Content, Dir, VPath},
};
use super::WcArgs;
fn wc_branch(source: &str, branch: &Branch) -> usize {
let word_count = source[branch.content.clone()].split_whitespace().count();
word_count
+ branch
.children
.iter()
.map(|branch| wc_branch(source, branch))
.sum::<usize>()
}
fn wc_roots(source: &str, roots: &Roots) -> usize {
roots
.branches
.iter()
.map(|branch| wc_branch(source, branch))
.sum()
}
pub fn wc_cli(content_dir: &dyn Dir, mut wc_args: WcArgs) -> anyhow::Result<()> {
if wc_args.paths.is_empty() {
vfs::walk_dir_rec(content_dir, VPath::ROOT, &mut |path| {
if path.extension() == Some("tree") {
wc_args.paths.push(path.to_owned());
}
ControlFlow::Continue(())
});
}
let mut treehouse = Treehouse::new();
let mut total = 0;
for path in &wc_args.paths {
if let Some(content) =
vfs::query::<Content>(content_dir, path).and_then(|b| b.string().ok())
{
let file_id = treehouse.add_file(path.clone(), Source::Other(content.clone()));
match parse_tree_with_diagnostics(file_id, &content) {
Ok(parsed) => {
let source = treehouse.source(file_id);
let word_count = wc_roots(source.input(), &parsed);
println!("{word_count:>8} {}", treehouse.path(file_id));
total += word_count;
}
Err(diagnostics) => {
report_diagnostics(&treehouse, &diagnostics)?;
}
}
}
}
println!("{total:>8} total");
Ok(())
}

225
src/config.rs Normal file
View file

@ -0,0 +1,225 @@
use std::{
collections::{HashMap, HashSet},
ops::ControlFlow,
};
use anyhow::{anyhow, Context};
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use serde::{Deserialize, Serialize};
use tracing::{error, info_span, instrument};
use crate::{
html::highlight::{
compiled::{compile_syntax, CompiledSyntax},
Syntax,
},
import_map::ImportRoot,
vfs::{self, Content, Dir, DynDir, ImageSize, VPath, VPathBuf},
};
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct VfsConfig {
/// Cache salt string. Passed to `Blake3ContentVersionCache` as a salt for content version hashes.
/// Can be changed to bust cached assets for all clients.
pub cache_salt: String,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Config {
/// Website root; used when generating links.
/// Can also be specified using the environment variable `$TREEHOUSE_SITE`. (this is the
/// preferred way of setting this in production, so as not to clobber treehouse.toml.)
pub site: String,
/// This is used to generate a link in the footer that links to the page's source commit.
/// The final URL is `{commit_base_url}/{commit}/content/{tree_path}.tree`.
pub commit_base_url: String,
/// User-defined keys.
pub user: HashMap<String, String>,
/// Links exported to Markdown for use with reference syntax `[text][def:key]`.
pub defs: HashMap<String, String>,
/// Config for syndication feeds.
pub feed: Feed,
/// Redirects for moving pages around. These are used solely by the treehouse server.
///
/// Note that redirects are only resolved _non-recursively_ by the server. For a configuration
/// like:
///
/// ```toml
/// page.redirects.foo = "bar"
/// page.redirects.bar = "baz"
/// ```
///
/// the user will be redirected from `foo` to `bar`, then from `bar` to `baz`. This isn't
/// optimal for UX and causes unnecessary latency. Therefore you should always make redirects
/// point to the newest version of the page.
///
/// ```toml
/// page.redirects.foo = "baz"
/// page.redirects.bar = "baz"
/// ```
pub redirects: Redirects,
/// How the treehouse should be built.
pub build: Build,
/// Overrides for emoji names. Useful for setting up aliases.
///
/// Paths are anchored within `static/emoji` and must not contain parent directories.
#[serde(default)]
pub emoji: HashMap<String, VPathBuf>,
/// Overrides for pic filenames. Useful for setting up aliases.
///
/// On top of this, pics are autodiscovered by walking the `static/pic` directory.
/// Only the part before the first dash is treated as the pic's id.
pub pics: HashMap<String, VPathBuf>,
/// Syntax definitions.
///
/// These are not part of the config file, but are loaded as part of site configuration from
/// `static/syntax`.
#[serde(skip)]
pub syntaxes: HashMap<String, CompiledSyntax>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Feed {
/// Allowed tags in feed entries.
pub tags: HashSet<String>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Redirects {
/// Path redirects. When a user requests a path, if they request `p`, they will be redirected
/// to `path[p]` with a `301 Moved Permanently` status code.
pub path: HashMap<VPathBuf, VPathBuf>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Build {
/// Configuration for how JavaScript is compiled.
pub javascript: JavaScript,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct JavaScript {
/// Import roots to generate in the project's import map.
pub import_roots: Vec<ImportRoot>,
}
impl Config {
#[instrument(name = "Config::autopopulate_emoji", skip(self))]
pub fn autopopulate_emoji(&mut self, dir: &dyn Dir) -> anyhow::Result<()> {
vfs::walk_dir_rec(dir, VPath::ROOT, &mut |path| {
if path.extension().is_some_and(is_image_file) {
if let Some(emoji_name) = path.file_stem() {
if !self.emoji.contains_key(emoji_name) {
self.emoji.insert(emoji_name.to_owned(), path.to_owned());
}
}
}
ControlFlow::Continue(())
});
Ok(())
}
#[instrument(name = "Config::autopopulate_pics", skip(self))]
pub fn autopopulate_pics(&mut self, dir: &dyn Dir) -> anyhow::Result<()> {
vfs::walk_dir_rec(dir, VPath::ROOT, &mut |path| {
if path.extension().is_some_and(is_image_file) {
if let Some(pic_name) = path.file_stem() {
let pic_id = pic_name
.split_once('-')
.map(|(before_dash, _after_dash)| before_dash)
.unwrap_or(pic_name);
if !self.pics.contains_key(pic_id) {
self.pics.insert(pic_id.to_owned(), path.to_owned());
}
}
}
ControlFlow::Continue(())
});
Ok(())
}
pub fn page_url(&self, page: &str) -> String {
format!("{}/{}", self.site, page)
}
pub fn pic_url(&self, pics_dir: &dyn Dir, id: &str) -> String {
vfs::url(
&self.site,
pics_dir,
self.pics
.get(id)
.map(|x| &**x)
.unwrap_or(VPath::new("404.png")),
)
.expect("pics_dir is not anchored anywhere")
}
pub fn pic_size(&self, pics_dir: &dyn Dir, id: &str) -> Option<ImageSize> {
self.pics
.get(id)
.and_then(|path| vfs::query::<ImageSize>(pics_dir, path))
}
/// Loads all syntax definition files.
#[instrument(name = "Config::load_syntaxes", skip(self))]
pub fn load_syntaxes(&mut self, dir: DynDir) -> anyhow::Result<()> {
let mut paths = vec![];
vfs::walk_dir_rec(&dir, VPath::ROOT, &mut |path| {
if path.extension() == Some("json") {
paths.push(path.to_owned());
}
ControlFlow::Continue(())
});
let syntaxes: Vec<_> = paths
.par_iter()
.flat_map(|path| {
let name = path
.file_stem()
.expect("syntax file name should have a stem due to the .json extension");
let result: Result<Syntax, _> = vfs::query::<Content>(&dir, path)
.ok_or_else(|| anyhow!("syntax .json is not a file"))
.and_then(|b| b.string().context("syntax .json contains invalid UTF-8"))
.and_then(|s| {
let _span = info_span!("Config::load_syntaxes::parse").entered();
serde_json::from_str(&s).context("could not deserialize syntax file")
});
match result {
Ok(syntax) => {
let _span = info_span!("Config::load_syntaxes::compile", ?name).entered();
let compiled = compile_syntax(&syntax);
Some((name.to_owned(), compiled))
}
Err(err) => {
error!("error while loading syntax file `{path}`: {err}");
None
}
}
})
.collect();
for (name, compiled) in syntaxes {
self.syntaxes.insert(name, compiled);
}
Ok(())
}
}
pub fn is_image_file(extension: &str) -> bool {
matches!(extension, "png" | "svg" | "jpg" | "jpeg" | "webp")
}

15
src/dirs.rs Normal file
View file

@ -0,0 +1,15 @@
use crate::vfs::DynDir;
#[derive(Debug, Clone)]
pub struct Dirs {
pub root: DynDir,
pub content: DynDir,
pub static_: DynDir,
pub template: DynDir,
// `static` directories
pub pic: DynDir,
pub emoji: DynDir,
pub syntax: DynDir,
}

1
src/fun.rs Normal file
View file

@ -0,0 +1 @@
pub mod seasons;

78
src/fun/seasons.rs Normal file
View file

@ -0,0 +1,78 @@
use chrono::{Datelike, Utc};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
#[serde(rename_all = "snake_case")]
pub enum Season {
Spring,
Summer,
Autumn,
Winter,
}
impl Season {
pub fn on(month: u32, day: u32) -> Option<Season> {
let md = (month, day);
Some(match () {
_ if ((1, 1)..=(3, 20)).contains(&md) => Season::Winter,
_ if ((3, 21)..=(6, 21)).contains(&md) => Season::Spring,
_ if ((6, 22)..=(9, 22)).contains(&md) => Season::Summer,
_ if ((9, 23)..=(12, 21)).contains(&md) => Season::Autumn,
_ if ((12, 22)..=(12, 31)).contains(&md) => Season::Winter,
// Just in case something really darn weird happens to the calendar.
_ => return None,
})
}
pub fn current() -> Option<Season> {
let now = Utc::now();
Self::on(now.month(), now.day())
}
}
#[cfg(test)]
mod tests {
use crate::fun::seasons::Season;
#[test]
fn all_the_seasons() {
assert_eq!(Season::on(0, 0), None);
assert_eq!(Season::on(1, 1), Some(Season::Winter));
assert_eq!(Season::on(1, 15), Some(Season::Winter));
assert_eq!(Season::on(1, 31), Some(Season::Winter));
assert_eq!(Season::on(2, 1), Some(Season::Winter));
assert_eq!(Season::on(2, 28), Some(Season::Winter));
assert_eq!(Season::on(2, 29), Some(Season::Winter));
assert_eq!(Season::on(3, 1), Some(Season::Winter));
assert_eq!(Season::on(3, 20), Some(Season::Winter));
assert_eq!(Season::on(3, 21), Some(Season::Spring));
assert_eq!(Season::on(3, 22), Some(Season::Spring));
assert_eq!(Season::on(4, 1), Some(Season::Spring));
assert_eq!(Season::on(4, 30), Some(Season::Spring));
assert_eq!(Season::on(5, 1), Some(Season::Spring));
assert_eq!(Season::on(5, 31), Some(Season::Spring));
assert_eq!(Season::on(6, 1), Some(Season::Spring));
assert_eq!(Season::on(6, 21), Some(Season::Spring));
assert_eq!(Season::on(6, 22), Some(Season::Summer));
assert_eq!(Season::on(6, 30), Some(Season::Summer));
assert_eq!(Season::on(7, 1), Some(Season::Summer));
assert_eq!(Season::on(7, 31), Some(Season::Summer));
assert_eq!(Season::on(8, 1), Some(Season::Summer));
assert_eq!(Season::on(8, 31), Some(Season::Summer));
assert_eq!(Season::on(9, 1), Some(Season::Summer));
assert_eq!(Season::on(9, 22), Some(Season::Summer));
assert_eq!(Season::on(9, 23), Some(Season::Autumn));
assert_eq!(Season::on(9, 30), Some(Season::Autumn));
assert_eq!(Season::on(10, 1), Some(Season::Autumn));
assert_eq!(Season::on(10, 31), Some(Season::Autumn));
assert_eq!(Season::on(11, 1), Some(Season::Autumn));
assert_eq!(Season::on(11, 30), Some(Season::Autumn));
assert_eq!(Season::on(12, 1), Some(Season::Autumn));
assert_eq!(Season::on(12, 21), Some(Season::Autumn));
assert_eq!(Season::on(12, 22), Some(Season::Winter));
assert_eq!(Season::on(12, 22), Some(Season::Winter));
assert_eq!(Season::on(12, 31), Some(Season::Winter));
assert_eq!(Season::on(12, 32), None);
assert_eq!(Season::on(21, 37), None);
}
}

238
src/generate.rs Normal file
View file

@ -0,0 +1,238 @@
mod atom;
mod dir_helper;
mod include_static_helper;
mod simple_template;
mod tree;
use std::{collections::HashMap, fmt, ops::ControlFlow, sync::Arc};
use atom::FeedDir;
use dir_helper::DirHelper;
use handlebars::{handlebars_helper, Handlebars};
use include_static_helper::IncludeStaticHelper;
use serde::Serialize;
use tracing::{error, info_span, instrument};
use crate::{
config::Config,
dirs::Dirs,
fun::seasons::Season,
sources::Sources,
vfs::{
self, AnchoredAtExt, Cd, Content, ContentCache, Dir, DynDir, Entries, HtmlCanonicalize,
MemDir, Overlay, ToDynDir, VPath, VPathBuf,
},
};
#[derive(Serialize)]
struct BaseTemplateData<'a> {
config: &'a Config,
import_map: String,
season: Option<Season>,
dev: bool,
feeds: Vec<String>,
}
impl<'a> BaseTemplateData<'a> {
fn new(sources: &'a Sources) -> Self {
Self {
config: &sources.config,
import_map: serde_json::to_string_pretty(&sources.import_map)
.expect("import map should be serializable to JSON"),
season: Season::current(),
dev: cfg!(debug_assertions),
feeds: sources.treehouse.feeds_by_name.keys().cloned().collect(),
}
}
}
fn create_handlebars(site: &str, static_: DynDir) -> Handlebars<'static> {
let mut handlebars = Handlebars::new();
handlebars_helper!(cat: |a: String, b: String| a + &b);
handlebars.register_helper("cat", Box::new(cat));
handlebars.register_helper("asset", Box::new(DirHelper::new(site, static_.clone())));
handlebars.register_helper(
"include_static",
Box::new(IncludeStaticHelper::new(static_)),
);
handlebars
}
#[instrument(skip(handlebars))]
fn load_templates(handlebars: &mut Handlebars, dir: &dyn Dir) {
vfs::walk_dir_rec(dir, VPath::ROOT, &mut |path| {
if path.extension() == Some("hbs") {
if let Some(content) = vfs::query::<Content>(dir, path).and_then(|c| c.string().ok()) {
let _span = info_span!("register_template", ?path).entered();
if let Err(err) = handlebars.register_template_string(path.as_str(), content) {
error!("in template: {err}");
}
}
}
ControlFlow::Continue(())
});
}
struct TreehouseDir {
dirs: Arc<Dirs>,
sources: Arc<Sources>,
handlebars: Arc<Handlebars<'static>>,
dir_index: DirIndex,
}
impl TreehouseDir {
fn new(
dirs: Arc<Dirs>,
sources: Arc<Sources>,
handlebars: Arc<Handlebars<'static>>,
dir_index: DirIndex,
) -> Self {
Self {
dirs,
sources,
handlebars,
dir_index,
}
}
#[instrument("TreehouseDir::dir", skip(self))]
fn dir(&self, path: &VPath) -> Vec<VPathBuf> {
// NOTE: This does not include simple templates, because that's not really needed right now.
let mut index = &self.dir_index;
for component in path.segments() {
if let Some(child) = index.children.get(component) {
index = child;
} else {
// There cannot possibly be any entries under an invalid path.
// Bail early.
return vec![];
}
}
index
.children
.values()
.map(|child| child.full_path.clone())
.collect()
}
#[instrument("TreehouseDir::content", skip(self))]
fn content(&self, path: &VPath) -> Option<Content> {
let path = if path.is_root() {
VPath::new_const("index")
} else {
path
};
self.sources
.treehouse
.files_by_tree_path
.get(path)
.map(|&file_id| {
Content::new(
"text/html",
tree::generate_or_error(&self.sources, &self.dirs, &self.handlebars, file_id)
.into(),
)
})
.or_else(|| {
if path.file_name().is_some_and(|s| !s.starts_with('_')) {
let template_name = path.with_extension("hbs");
if self.handlebars.has_template(template_name.as_str()) {
return Some(Content::new(
"text/html",
simple_template::generate_or_error(
&self.sources,
&self.handlebars,
template_name.as_str(),
)
.into(),
));
}
}
None
})
}
}
impl Dir for TreehouseDir {
fn query(&self, path: &VPath, query: &mut vfs::Query) {
query.provide(|| Entries(self.dir(path)));
query.try_provide(|| self.content(path));
}
}
impl fmt::Debug for TreehouseDir {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("TreehouseDir")
}
}
/// Acceleration structure for `dir` operations on [`TreehouseDir`]s.
#[derive(Debug, Default)]
struct DirIndex {
full_path: VPathBuf,
children: HashMap<VPathBuf, DirIndex>,
}
impl DirIndex {
#[instrument(name = "DirIndex::new", skip(paths))]
pub fn new<'a>(paths: impl Iterator<Item = &'a VPath>) -> Self {
let mut root = DirIndex::default();
for path in paths {
let mut parent = &mut root;
let mut full_path = VPath::ROOT.to_owned();
for segment in path.segments() {
full_path.push(segment);
let child = parent
.children
.entry(segment.to_owned())
.or_insert_with(|| DirIndex {
full_path: full_path.clone(),
children: HashMap::new(),
});
parent = child;
}
}
root
}
}
pub fn target(dirs: Arc<Dirs>, sources: Arc<Sources>) -> DynDir {
let mut handlebars = create_handlebars(&sources.config.site, dirs.static_.clone());
load_templates(&mut handlebars, &dirs.template);
let handlebars = Arc::new(handlebars);
let mut root = MemDir::new();
root.add(
VPath::new("feed"),
ContentCache::new(FeedDir::new(
dirs.clone(),
sources.clone(),
handlebars.clone(),
))
.to_dyn(),
);
root.add(VPath::new("static"), dirs.static_.clone());
root.add(
VPath::new("robots.txt"),
Cd::new(dirs.static_.clone(), VPathBuf::new("robots.txt")).to_dyn(),
);
let dir_index = DirIndex::new(sources.treehouse.files_by_tree_path.keys().map(|x| &**x));
let tree_view = TreehouseDir::new(dirs, sources, handlebars, dir_index);
let tree_view = ContentCache::new(tree_view);
tree_view.warm_up();
let tree_view = HtmlCanonicalize::new(tree_view);
Overlay::new(tree_view.to_dyn(), root.to_dyn())
.anchored_at(VPath::ROOT.to_owned())
.to_dyn()
}

304
src/generate/atom.rs Normal file
View file

@ -0,0 +1,304 @@
use std::{fmt, sync::Arc};
use anyhow::Context;
use chrono::{DateTime, Utc};
use handlebars::Handlebars;
use serde::Serialize;
use tracing::{info, info_span, instrument};
use ulid::Ulid;
use crate::{
dirs::Dirs,
html::djot::{self, resolve_link},
sources::Sources,
state::FileId,
tree::SemaBranchId,
vfs::{self, Content, Dir, Entries, VPath, VPathBuf},
};
use super::BaseTemplateData;
pub struct FeedDir {
dirs: Arc<Dirs>,
sources: Arc<Sources>,
handlebars: Arc<Handlebars<'static>>,
}
impl FeedDir {
pub fn new(
dirs: Arc<Dirs>,
sources: Arc<Sources>,
handlebars: Arc<Handlebars<'static>>,
) -> Self {
Self {
dirs,
sources,
handlebars,
}
}
fn entries(&self, path: &VPath) -> Vec<VPathBuf> {
if path == VPath::ROOT {
self.sources
.treehouse
.feeds_by_name
.keys()
.map(|name| VPathBuf::new(format!("{name}.atom")))
.collect()
} else {
vec![]
}
}
fn content(&self, path: &VPath) -> Option<Content> {
if path.extension() == Some("atom") {
let feed_name = path.with_extension("").to_string();
self.sources
.treehouse
.feeds_by_name
.get(&feed_name)
.map(|file_id| {
Content::new(
"application/atom+xml",
generate_or_error(&self.sources, &self.dirs, &self.handlebars, *file_id)
.into(),
)
})
} else {
None
}
}
}
impl Dir for FeedDir {
fn query(&self, path: &VPath, query: &mut vfs::Query) {
query.provide(|| Entries(self.entries(path)));
query.try_provide(|| self.content(path));
}
}
impl fmt::Debug for FeedDir {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("FeedDir")
}
}
#[derive(Serialize)]
struct Feed {
name: String,
updated: DateTime<Utc>,
entries: Vec<Entry>,
}
#[derive(Serialize)]
struct Entry {
id: String,
updated: DateTime<Utc>,
url: String,
title: String,
categories: Vec<String>,
summary: String,
}
#[derive(Serialize)]
struct AtomTemplateData<'a> {
#[serde(flatten)]
base: &'a BaseTemplateData<'a>,
feed: Feed,
}
#[instrument(name = "atom::generate", skip(sources, handlebars))]
pub fn generate(
sources: &Sources,
dirs: &Dirs,
handlebars: &Handlebars,
file_id: FileId,
) -> anyhow::Result<String> {
let roots = &sources.treehouse.roots[&file_id];
let feed_name = roots.attributes.feed.clone().expect("page must be a feed");
let template_data = AtomTemplateData {
base: &BaseTemplateData::new(sources),
feed: Feed {
name: feed_name,
// The content cache layer should take care of sampling the current time only once,
// and then preserving it until the treehouse is deployed again.
updated: Utc::now(),
entries: extract_entries(sources, dirs, file_id),
},
};
let _span = info_span!("handlebars::render").entered();
handlebars
.render("_feed_atom.hbs", &template_data)
.context("template rendering failed")
}
pub fn generate_or_error(
sources: &Sources,
dirs: &Dirs,
handlebars: &Handlebars,
file_id: FileId,
) -> String {
match generate(sources, dirs, handlebars, file_id) {
Ok(html) => html,
Err(error) => format!("error: {error:?}"),
}
}
fn extract_entries(sources: &Sources, dirs: &Dirs, file_id: FileId) -> Vec<Entry> {
let roots = &sources.treehouse.roots[&file_id];
roots
.branches
.iter()
.flat_map(|&branch_id| {
let branch = sources.treehouse.tree.branch(branch_id);
let text = &sources.treehouse.source(file_id).input()[branch.content.clone()];
let parsed = parse_entry(sources, dirs, file_id, jotdown::Parser::new(text));
let mut summary = String::new();
branches_to_html_simple(&mut summary, sources, dirs, file_id, &branch.children);
let updated = branch
.attributes
.timestamp()
.unwrap_or(DateTime::UNIX_EPOCH); // if you see the Unix epoch... oops
parsed.link.map(|url| Entry {
id: branch.attributes.id.clone(),
updated,
url,
title: parsed.title.unwrap_or_else(|| "untitled".into()),
categories: branch.attributes.tags.clone(),
summary,
})
})
.collect()
}
#[derive(Debug, Clone)]
struct ParsedEntry {
title: Option<String>,
link: Option<String>,
}
fn parse_entry(
sources: &Sources,
dirs: &Dirs,
file_id: FileId,
parser: jotdown::Parser,
) -> ParsedEntry {
let mut parser = parser.into_offset_iter();
while let Some((event, span)) = parser.next() {
if let jotdown::Event::Start(jotdown::Container::Heading { .. }, _attrs) = &event {
let mut events = vec![(event, span)];
for (event, span) in parser.by_ref() {
// To my knowledge headings cannot nest, so it's okay not keeping a stack here.
let is_heading = matches!(
event,
jotdown::Event::End(jotdown::Container::Heading { .. })
);
events.push((event, span));
if is_heading {
break;
}
}
let title_events: Vec<_> = events
.iter()
.filter(|(event, _)| {
!matches!(
event,
// A little repetitive, but I don't mind.
// The point of this is not to include extra <h3> and <a> in the link text,
// but preserve other formatting such as bold, italic, code, etc.
jotdown::Event::Start(
jotdown::Container::Link(_, _) | jotdown::Container::Heading { .. },
_
) | jotdown::Event::End(
jotdown::Container::Link(_, _) | jotdown::Container::Heading { .. }
)
)
})
.cloned()
.collect();
let mut title = String::new();
let _render_diagnostics = djot::Renderer {
config: &sources.config,
dirs,
treehouse: &sources.treehouse,
file_id,
// How. Just, stop.
page_id: "liquidex-you-reeeeeal-dummy".into(),
}
.render(&title_events, &mut title);
let link = events.iter().find_map(|(event, _)| {
if let jotdown::Event::Start(jotdown::Container::Link(link, link_type), _) = event {
Some(link_url(sources, dirs, link, *link_type))
} else {
None
}
});
return ParsedEntry {
title: (!title.is_empty()).then_some(title),
link,
};
}
}
ParsedEntry {
title: None,
link: None,
}
}
fn link_url(sources: &Sources, dirs: &Dirs, url: &str, link_type: jotdown::LinkType) -> String {
if let jotdown::LinkType::Span(jotdown::SpanLinkType::Unresolved) = link_type {
if let Some(url) = resolve_link(&sources.config, &sources.treehouse, dirs, url) {
return url;
}
}
url.to_owned()
}
/// Extremely simple HTML renderer without the treehouse's fancy branch folding and linking features.
fn branches_to_html_simple(
s: &mut String,
sources: &Sources,
dirs: &Dirs,
file_id: FileId,
branches: &[SemaBranchId],
) {
s.push_str("<ul>");
for &branch_id in branches {
let branch = sources.treehouse.tree.branch(branch_id);
s.push_str("<li>");
let text = &sources.treehouse.source(file_id).input()[branch.content.clone()];
let events: Vec<_> = jotdown::Parser::new(text).into_offset_iter().collect();
// Ignore render diagnostics. Those should be reported by the main HTML generator.
let _render_diagnostics = djot::Renderer {
config: &sources.config,
dirs,
treehouse: &sources.treehouse,
file_id,
// Yeah, maybe don't include literate code in summaries...
page_id: "liquidex-is-a-dummy".into(),
}
.render(&events, s);
if !branch.children.is_empty() {
branches_to_html_simple(s, sources, dirs, file_id, &branch.children);
}
s.push_str("</li>");
}
s.push_str("</ul>");
}

View file

@ -0,0 +1,37 @@
use handlebars::{Context, Handlebars, Helper, HelperDef, RenderContext, RenderError, ScopedJson};
use serde_json::Value;
use crate::vfs::{self, DynDir, VPath};
pub struct DirHelper {
site: String,
dir: DynDir,
}
impl DirHelper {
pub fn new(site: &str, dir: DynDir) -> Self {
Self {
site: site.to_owned(),
dir,
}
}
}
impl HelperDef for DirHelper {
fn call_inner<'reg: 'rc, 'rc>(
&self,
h: &Helper<'reg, 'rc>,
_: &'reg Handlebars<'reg>,
_: &'rc Context,
_: &mut RenderContext<'reg, 'rc>,
) -> Result<ScopedJson<'reg, 'rc>, RenderError> {
if let Some(path) = h.param(0).and_then(|v| v.value().as_str()) {
let vpath = VPath::try_new(path).map_err(|e| RenderError::new(e.to_string()))?;
let url = vfs::url(&self.site, &self.dir, vpath)
.ok_or_else(|| RenderError::new("path is not anchored anywhere"))?;
Ok(ScopedJson::Derived(Value::String(url)))
} else {
Err(RenderError::new("missing path string"))
}
}
}

View file

@ -0,0 +1,35 @@
use handlebars::{Context, Handlebars, Helper, HelperDef, RenderContext, RenderError, ScopedJson};
use serde_json::Value;
use crate::vfs::{self, Content, DynDir, VPath};
pub struct IncludeStaticHelper {
dir: DynDir,
}
impl IncludeStaticHelper {
pub fn new(dir: DynDir) -> Self {
Self { dir }
}
}
impl HelperDef for IncludeStaticHelper {
fn call_inner<'reg: 'rc, 'rc>(
&self,
h: &Helper<'reg, 'rc>,
_: &'reg Handlebars<'reg>,
_: &'rc Context,
_: &mut RenderContext<'reg, 'rc>,
) -> Result<ScopedJson<'reg, 'rc>, RenderError> {
if let Some(path) = h.param(0).and_then(|v| v.value().as_str()) {
let vpath = VPath::try_new(path).map_err(|e| RenderError::new(e.to_string()))?;
let content = vfs::query::<Content>(&self.dir, vpath)
.ok_or_else(|| RenderError::new("file does not exist"))?
.string()
.map_err(|_| RenderError::new("included file does not contain UTF-8 text"))?;
Ok(ScopedJson::Derived(Value::String(content)))
} else {
Err(RenderError::new("missing path string"))
}
}
}

View file

@ -0,0 +1,30 @@
use anyhow::Context;
use handlebars::Handlebars;
use tracing::instrument;
use crate::sources::Sources;
use super::BaseTemplateData;
#[instrument(name = "simple_template::generate", skip(sources, handlebars))]
pub fn generate(
sources: &Sources,
handlebars: &Handlebars,
template_name: &str,
) -> anyhow::Result<String> {
let base_template_data = BaseTemplateData::new(sources);
handlebars
.render(template_name, &base_template_data)
.context("failed to render template")
}
pub fn generate_or_error(
sources: &Sources,
handlebars: &Handlebars,
template_name: &str,
) -> String {
match generate(sources, handlebars, template_name) {
Ok(html) => html,
Err(error) => format!("error: {error:?}"),
}
}

111
src/generate/tree.rs Normal file
View file

@ -0,0 +1,111 @@
use anyhow::{ensure, Context};
use handlebars::Handlebars;
use serde::Serialize;
use tracing::{info_span, instrument};
use crate::{
dirs::Dirs,
generate::BaseTemplateData,
html::{breadcrumbs::breadcrumbs_to_html, tree},
sources::Sources,
state::FileId,
};
#[derive(Serialize)]
struct Page {
title: String,
thumbnail: Option<Thumbnail>,
scripts: Vec<String>,
styles: Vec<String>,
breadcrumbs: String,
tree_path: Option<String>,
tree: String,
}
#[derive(Serialize)]
struct Thumbnail {
url: String,
alt: Option<String>,
}
#[derive(Serialize)]
struct PageTemplateData<'a> {
#[serde(flatten)]
base: &'a BaseTemplateData<'a>,
page: Page,
}
#[instrument(skip(sources, dirs, handlebars))]
pub fn generate(
sources: &Sources,
dirs: &Dirs,
handlebars: &Handlebars,
file_id: FileId,
) -> anyhow::Result<String> {
let breadcrumbs = breadcrumbs_to_html(&sources.config, &sources.navigation_map, file_id);
let roots = sources
.treehouse
.roots
.get(&file_id)
.expect("tree should have been added to the treehouse");
let tree = {
let _span = info_span!("generate_tree::root_to_html").entered();
let renderer = tree::Renderer {
sources,
dirs,
file_id,
};
let mut tree = String::new();
renderer.root(&mut tree);
tree
};
let template_data = PageTemplateData {
base: &BaseTemplateData::new(sources),
page: Page {
title: roots.attributes.title.clone(),
thumbnail: roots
.attributes
.thumbnail
.as_ref()
.map(|thumbnail| Thumbnail {
url: sources.config.pic_url(&*dirs.pic, &thumbnail.id),
alt: thumbnail.alt.clone(),
}),
scripts: roots.attributes.scripts.clone(),
styles: roots.attributes.styles.clone(),
breadcrumbs,
tree_path: sources.treehouse.tree_path(file_id).map(|s| s.to_string()),
tree,
},
};
let template_name = roots
.attributes
.template
.clone()
.unwrap_or_else(|| "_tree.hbs".into());
ensure!(
handlebars.has_template(&template_name),
"template {template_name} does not exist"
);
let _span = info_span!("handlebars::render").entered();
handlebars
.render(&template_name, &template_data)
.context("template rendering failed")
}
pub fn generate_or_error(
sources: &Sources,
dirs: &Dirs,
handlebars: &Handlebars,
file_id: FileId,
) -> String {
match generate(sources, dirs, handlebars, file_id) {
Ok(html) => html,
Err(error) => format!("error: {error:?}"),
}
}

106
src/history.rs Normal file
View file

@ -0,0 +1,106 @@
use std::collections::HashMap;
use indexmap::IndexMap;
use tracing::debug;
#[derive(Debug, Default, Clone)]
pub struct History {
// Sorted from newest to oldest.
pub commits: IndexMap<git2::Oid, Commit>,
pub by_page: HashMap<String, PageHistory>,
}
#[derive(Debug, Clone)]
pub struct Commit {
pub summary: String,
pub body: String,
}
#[derive(Debug, Clone, Default)]
pub struct PageHistory {
// Sorted from newest to oldest, so revision 0 is the current version.
// On the website these are sorted differently: 1 is the oldest revision, succeeding numbers are later revisions.
pub revisions: Vec<Revision>,
}
#[derive(Debug, Clone)]
pub struct Revision {
pub commit_oid: git2::Oid,
pub blob_oid: git2::Oid,
}
impl History {
pub fn get(git: &git2::Repository) -> anyhow::Result<Self> {
debug!("reading git history");
let mut history = History::default();
let mut revwalk = git.revwalk()?;
revwalk.push_head()?;
for commit_oid in revwalk {
let commit_oid = commit_oid?;
let commit = git.find_commit(commit_oid)?;
history.commits.insert(
commit_oid,
Commit {
summary: String::from_utf8_lossy(commit.summary_bytes().unwrap_or(&[]))
.into_owned(),
body: String::from_utf8_lossy(commit.body_bytes().unwrap_or(&[])).into_owned(),
},
);
let tree = commit.tree()?;
tree.walk(git2::TreeWalkMode::PreOrder, |parent_path, entry| {
if parent_path.is_empty() && entry.name() != Some("content") {
// This is content-only history, so skip all directories that don't contain content.
git2::TreeWalkResult::Skip
} else if entry.kind() == Some(git2::ObjectType::Blob)
&& entry.name().is_some_and(|name| name.ends_with(".tree"))
{
let path = format!(
"{parent_path}{}",
String::from_utf8_lossy(entry.name_bytes())
);
let page_history = history.by_page.entry(path).or_default();
let unchanged = page_history
.revisions
.last()
.is_some_and(|rev| rev.blob_oid == entry.id());
if unchanged {
// Note again that the history is reversed as we're walking from HEAD
// backwards, so we need to find the _earliest_ commit with this revision.
// Therefore we update that current revision's commit oid with the
// current commit.
page_history.revisions.last_mut().unwrap().commit_oid = commit_oid;
} else {
page_history.revisions.push(Revision {
commit_oid,
blob_oid: entry.id(),
});
}
git2::TreeWalkResult::Ok
} else {
git2::TreeWalkResult::Ok
}
})?;
}
Ok(history)
}
pub fn read_revision(
&self,
git: &git2::Repository,
revision: &Revision,
) -> anyhow::Result<Vec<u8>> {
Ok(git.find_blob(revision.blob_oid)?.content().to_owned())
}
}
impl Revision {
pub fn commit_short(&self) -> String {
self.commit_oid.to_string()[0..6].to_owned()
}
}

40
src/html.rs Normal file
View file

@ -0,0 +1,40 @@
use std::fmt::{self, Display, Write};
pub mod breadcrumbs;
pub mod djot;
pub mod highlight;
pub mod navmap;
pub mod tree;
pub struct EscapeAttribute<'a>(pub &'a str);
impl Display for EscapeAttribute<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for c in self.0.chars() {
if c == '"' {
f.write_str("&quot;")?;
} else {
f.write_char(c)?;
}
}
Ok(())
}
}
pub struct EscapeHtml<'a>(pub &'a str);
impl Display for EscapeHtml<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
for c in self.0.chars() {
match c {
'<' => f.write_str("&lt;")?,
'>' => f.write_str("&gt;")?,
'&' => f.write_str("&amp;")?,
'\'' => f.write_str("&apos;")?,
'"' => f.write_str("&quot;")?,
_ => f.write_char(c)?,
}
}
Ok(())
}
}

47
src/html/breadcrumbs.rs Normal file
View file

@ -0,0 +1,47 @@
use std::{borrow::Cow, fmt::Write};
use tracing::instrument;
use crate::{config::Config, state::FileId, vfs::VPath};
use super::{navmap::NavigationMap, EscapeAttribute};
#[instrument(skip(config, navigation_map))]
pub fn breadcrumbs_to_html(
config: &Config,
navigation_map: &NavigationMap,
file_id: FileId,
) -> String {
let mut s = String::new();
if let Some(path) = navigation_map.paths.get(&file_id) {
for (i, element) in path.iter().enumerate() {
// Skip the index because it's implied by the logo on the left.
if &**element != VPath::new_const("index") {
s.push_str("<li class=\"breadcrumb\">");
{
let short_element = path
.get(i - 1)
.map(|p| format!("{p}/"))
.and_then(|prefix| {
element
.as_str()
.strip_prefix(prefix.as_str())
.map(Cow::Borrowed)
})
.unwrap_or_else(|| Cow::Owned(format!("/{element}")));
write!(
s,
"<a href=\"{site}/{element}\">{short_element}</a>",
site = EscapeAttribute(&config.site),
element = EscapeAttribute(element.as_str())
)
.unwrap();
}
s.push_str("</li>");
}
}
}
s
}

692
src/html/djot.rs Normal file
View file

@ -0,0 +1,692 @@
//! Djot -> HTML renderer adapted from the one in jotdown.
//! Made concrete to avoid generic hell, with added treehouse-specific features.
use std::fmt::Write;
use std::ops::Range;
use codespan_reporting::diagnostic::Diagnostic;
use codespan_reporting::diagnostic::Label;
use codespan_reporting::diagnostic::LabelStyle;
use codespan_reporting::diagnostic::Severity;
use jotdown::Alignment;
use jotdown::Container;
use jotdown::Event;
use jotdown::LinkType;
use jotdown::ListKind;
use jotdown::OrderedListNumbering::*;
use jotdown::SpanLinkType;
use crate::config::Config;
use crate::dirs::Dirs;
use crate::state::FileId;
use crate::state::Treehouse;
use crate::vfs;
use crate::vfs::ImageSize;
use super::highlight::highlight;
/// [`Render`] implementor that writes HTML output.
pub struct Renderer<'a> {
pub config: &'a Config,
pub dirs: &'a Dirs,
pub treehouse: &'a Treehouse,
pub file_id: FileId,
pub page_id: String,
}
impl Renderer<'_> {
#[must_use]
pub fn render(
self,
events: &[(Event, Range<usize>)],
out: &mut String,
) -> Vec<Diagnostic<FileId>> {
let mut writer = Writer {
renderer: self,
raw: Raw::None,
code_block: None,
img_alt_text: 0,
list_tightness: vec![],
not_first_line: false,
ignore_next_event: false,
diagnostics: vec![],
};
for (event, range) in events {
writer
.render_event(event, range.clone(), out)
.expect("formatting event into string should not fail");
}
writer.diagnostics
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
enum Raw {
#[default]
None,
Html,
Other,
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum CodeBlockKind {
PlainText,
SyntaxHighlight,
LiterateProgram {
program_name: String,
placeholder_pic_id: Option<String>,
},
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct CodeBlock<'a> {
kind: CodeBlockKind,
language: &'a str,
}
struct Writer<'a> {
renderer: Renderer<'a>,
raw: Raw,
code_block: Option<CodeBlock<'a>>,
img_alt_text: usize,
list_tightness: Vec<bool>,
not_first_line: bool,
ignore_next_event: bool,
diagnostics: Vec<Diagnostic<FileId>>,
}
impl<'a> Writer<'a> {
fn render_event(
&mut self,
e: &Event<'a>,
range: Range<usize>,
out: &mut String,
) -> std::fmt::Result {
if let Event::Start(Container::Footnote { label: _ }, ..) = e {
self.diagnostics.push(Diagnostic {
severity: Severity::Error,
code: Some("djot".into()),
message: "Djot footnotes are not supported".into(),
labels: vec![Label {
style: LabelStyle::Primary,
file_id: self.renderer.file_id,
range: range.clone(),
message: "".into(),
}],
notes: vec![],
})
}
if matches!(&e, Event::Start(Container::LinkDefinition { .. }, ..)) {
self.ignore_next_event = true;
return Ok(());
}
if matches!(&e, Event::End(Container::LinkDefinition { .. })) {
self.ignore_next_event = false;
return Ok(());
}
// Completely omit section events. The treehouse's structure contains linkable ids in
// branches instead.
if matches!(
&e,
Event::Start(Container::Section { .. }, _) | Event::End(Container::Section { .. })
) {
return Ok(());
}
if self.ignore_next_event {
return Ok(());
}
match e {
Event::Start(c, attrs) => {
if c.is_block() && self.not_first_line {
out.push('\n');
}
if self.img_alt_text > 0 && !matches!(c, Container::Image(..)) {
return Ok(());
}
match &c {
Container::Blockquote => out.push_str("<blockquote"),
Container::List { kind, tight } => {
self.list_tightness.push(*tight);
match kind {
ListKind::Unordered | ListKind::Task => out.push_str("<ul"),
ListKind::Ordered {
numbering, start, ..
} => {
out.push_str("<ol");
if *start > 1 {
write!(out, r#" start="{}""#, start)?;
}
if let Some(ty) = match numbering {
Decimal => None,
AlphaLower => Some('a'),
AlphaUpper => Some('A'),
RomanLower => Some('i'),
RomanUpper => Some('I'),
} {
write!(out, r#" type="{}""#, ty)?;
}
}
}
}
Container::ListItem | Container::TaskListItem { .. } => {
out.push_str("<li");
}
Container::DescriptionList => out.push_str("<dl"),
Container::DescriptionDetails => out.push_str("<dd"),
Container::Footnote { .. } => unreachable!(),
Container::Table => out.push_str("<table"),
Container::TableRow { .. } => out.push_str("<tr"),
Container::Section { .. } => {}
Container::Div { .. } => out.push_str("<div"),
Container::Paragraph => {
if matches!(self.list_tightness.last(), Some(true)) {
return Ok(());
}
out.push_str("<p");
}
Container::Heading { level, .. } => write!(out, "<h{}", level)?,
Container::TableCell { head: false, .. } => out.push_str("<td"),
Container::TableCell { head: true, .. } => out.push_str("<th"),
Container::Caption => out.push_str("<caption"),
Container::DescriptionTerm => out.push_str("<dt"),
Container::CodeBlock { language } => {
if let Some(program) = attrs.get(":program") {
self.code_block = Some(CodeBlock {
kind: CodeBlockKind::LiterateProgram {
program_name: program.parts().collect(),
placeholder_pic_id: attrs
.get(":placeholder")
.map(|value| value.parts().collect()),
},
language,
});
out.push_str("<th-literate-program");
} else {
self.code_block = Some(CodeBlock {
kind: match self.renderer.config.syntaxes.contains_key(*language) {
true => CodeBlockKind::SyntaxHighlight,
false => CodeBlockKind::PlainText,
},
language,
});
out.push_str("<pre");
}
}
Container::Span | Container::Math { .. } => out.push_str("<span"),
Container::Link(dst, ty) => {
if matches!(ty, LinkType::Span(SpanLinkType::Unresolved)) {
out.push_str("<a");
if let Some(resolved) = resolve_link(
self.renderer.config,
self.renderer.treehouse,
self.renderer.dirs,
dst,
) {
out.push_str(r#" href=""#);
write_attr(&resolved, out);
out.push('"');
}
} else {
out.push_str(r#"<a href=""#);
if matches!(ty, LinkType::Email) {
out.push_str("mailto:");
}
write_attr(dst, out);
out.push('"');
}
}
Container::Image(..) => {
self.img_alt_text += 1;
if self.img_alt_text == 1 {
out.push_str(r#"<img class="pic" loading="lazy""#);
} else {
return Ok(());
}
}
Container::Verbatim => out.push_str("<code"),
Container::RawBlock { format } | Container::RawInline { format } => {
self.raw = if format == &"html" {
Raw::Html
} else {
Raw::Other
};
return Ok(());
}
Container::Subscript => out.push_str("<sub"),
Container::Superscript => out.push_str("<sup"),
Container::Insert => out.push_str("<ins"),
Container::Delete => out.push_str("<del"),
Container::Strong => out.push_str("<strong"),
Container::Emphasis => out.push_str("<em"),
Container::Mark => out.push_str("<mark"),
Container::LinkDefinition { .. } => return Ok(()),
}
for (key, value) in attrs
.into_iter()
.filter(|(a, _)| !(*a == "class" || a.starts_with(':')))
{
write!(out, r#" {}=""#, key)?;
value.parts().for_each(|part| write_attr(part, out));
out.push('"');
}
if attrs.into_iter().any(|(a, _)| a == "class")
|| matches!(
c,
Container::Div { class } if !class.is_empty())
|| matches!(c, |Container::Math { .. }| Container::List {
kind: ListKind::Task,
..
} | Container::TaskListItem { .. })
{
out.push_str(r#" class=""#);
let mut first_written = false;
if let Some(cls) = match c {
Container::List {
kind: ListKind::Task,
..
} => Some("task-list"),
Container::TaskListItem { checked: false } => Some("unchecked"),
Container::TaskListItem { checked: true } => Some("checked"),
Container::Math { display: false } => Some("math inline"),
Container::Math { display: true } => Some("math display"),
_ => None,
} {
first_written = true;
out.push_str(cls);
}
for class in attrs
.into_iter()
.filter(|(a, _)| a == &"class")
.map(|(_, cls)| cls)
{
if first_written {
out.push(' ');
}
first_written = true;
class.parts().for_each(|part| write_attr(part, out));
}
// div class goes after classes from attrs
if let Container::Div { class } = c {
if !class.is_empty() {
if first_written {
out.push(' ');
}
out.push_str(class);
}
}
out.push('"');
}
match c {
Container::TableCell { alignment, .. }
if !matches!(alignment, Alignment::Unspecified) =>
{
let a = match alignment {
Alignment::Unspecified => unreachable!(),
Alignment::Left => "left",
Alignment::Center => "center",
Alignment::Right => "right",
};
write!(out, r#" style="text-align: {};">"#, a)?;
}
Container::CodeBlock { language } => {
if language.is_empty() {
out.push_str("><code>");
} else {
let code_block = self.code_block.as_ref().unwrap();
if let CodeBlockKind::LiterateProgram { program_name, .. } =
&code_block.kind
{
out.push_str(r#" data-program=""#);
write_attr(&self.renderer.page_id, out);
out.push(':');
write_attr(program_name, out);
out.push('"');
out.push_str(r#" data-language=""#);
write_attr(language, out);
out.push('"');
if *language == "output" {
out.push_str(r#" data-mode="output""#);
} else {
out.push_str(r#" data-mode="input""#);
}
}
out.push('>');
if let CodeBlockKind::LiterateProgram {
placeholder_pic_id: Some(placeholder_pic_id),
..
} = &code_block.kind
{
out.push_str(
r#"<img class="placeholder-image" loading="lazy" src=""#,
);
let pic_url = self
.renderer
.config
.pic_url(&*self.renderer.dirs.pic, placeholder_pic_id);
write_attr(&pic_url, out);
out.push('"');
if let Some(image_size) = self
.renderer
.config
.pic_size(&*self.renderer.dirs.pic, placeholder_pic_id)
{
write!(
out,
r#" width="{}" height="{}""#,
image_size.width, image_size.height
)?;
}
out.push('>');
}
if let (CodeBlockKind::LiterateProgram { .. }, "output") =
(&code_block.kind, *language)
{
out.push_str(r#"<pre class="placeholder-console">"#);
} else {
out.push_str(r#"<code class="language-"#);
write_attr(language, out);
if self.renderer.config.syntaxes.contains_key(*language) {
out.push_str(" th-syntax-highlighting");
}
out.push_str(r#"">"#);
}
}
}
Container::Image(..) => {
if self.img_alt_text == 1 {
out.push_str(r#" alt=""#);
}
}
Container::Math { display } => {
out.push_str(if *display { r#">\["# } else { r#">\("# });
}
_ => out.push('>'),
}
}
Event::End(c) => {
if c.is_block_container() {
out.push('\n');
}
if self.img_alt_text > 0 && !matches!(c, Container::Image(..)) {
return Ok(());
}
match c {
Container::Blockquote => out.push_str("</blockquote>"),
Container::List { kind, .. } => {
self.list_tightness.pop();
match kind {
ListKind::Unordered | ListKind::Task => out.push_str("</ul>"),
ListKind::Ordered { .. } => out.push_str("</ol>"),
}
}
Container::ListItem | Container::TaskListItem { .. } => {
out.push_str("</li>");
}
Container::DescriptionList => out.push_str("</dl>"),
Container::DescriptionDetails => out.push_str("</dd>"),
Container::Footnote { .. } => unreachable!(),
Container::Table => out.push_str("</table>"),
Container::TableRow { .. } => out.push_str("</tr>"),
Container::Section { .. } => {}
Container::Div { .. } => out.push_str("</div>"),
Container::Paragraph => {
if matches!(self.list_tightness.last(), Some(true)) {
return Ok(());
}
out.push_str("</p>");
}
Container::Heading { level, .. } => write!(out, "</h{}>", level)?,
Container::TableCell { head: false, .. } => out.push_str("</td>"),
Container::TableCell { head: true, .. } => out.push_str("</th>"),
Container::Caption => out.push_str("</caption>"),
Container::DescriptionTerm => out.push_str("</dt>"),
Container::CodeBlock { language } => {
let code_block = self.code_block.take().unwrap();
out.push_str(match &code_block.kind {
CodeBlockKind::PlainText | CodeBlockKind::SyntaxHighlight => {
"</code></pre>"
}
CodeBlockKind::LiterateProgram { .. } if *language == "output" => {
"</pre></th-literate-program>"
}
CodeBlockKind::LiterateProgram { .. } => {
"</code></th-literate-program>"
}
});
}
Container::Span => out.push_str("</span>"),
Container::Link(..) => out.push_str("</a>"),
Container::Image(src, link_type) => {
if self.img_alt_text == 1 {
if !src.is_empty() {
out.push_str(r#"" src=""#);
if let SpanLinkType::Unresolved = link_type {
// TODO: Image size.
if let Some(resolved) = resolve_link(
self.renderer.config,
self.renderer.treehouse,
self.renderer.dirs,
src,
) {
write_attr(&resolved, out);
} else {
write_attr(src, out);
}
} else {
write_attr(src, out);
}
}
out.push_str(r#"">"#);
}
self.img_alt_text -= 1;
}
Container::Verbatim => out.push_str("</code>"),
Container::Math { display } => {
out.push_str(if *display {
r#"\]</span>"#
} else {
r#"\)</span>"#
});
}
Container::RawBlock { .. } | Container::RawInline { .. } => {
self.raw = Raw::None;
}
Container::Subscript => out.push_str("</sub>"),
Container::Superscript => out.push_str("</sup>"),
Container::Insert => out.push_str("</ins>"),
Container::Delete => out.push_str("</del>"),
Container::Strong => out.push_str("</strong>"),
Container::Emphasis => out.push_str("</em>"),
Container::Mark => out.push_str("</mark>"),
Container::LinkDefinition { .. } => unreachable!(),
}
}
Event::Str(s) => match self.raw {
Raw::None if self.img_alt_text > 0 => write_attr(s, out),
Raw::None => {
let syntax = self.code_block.as_ref().and_then(|code_block| {
self.renderer.config.syntaxes.get(code_block.language)
});
if let Some(syntax) = syntax {
highlight(out, syntax, s);
} else {
write_text(s, out);
}
}
Raw::Html => out.push_str(s),
Raw::Other => {}
},
Event::FootnoteReference(_label) => {
self.diagnostics.push(Diagnostic {
severity: Severity::Error,
code: Some("djot".into()),
message: "Djot footnotes are unsupported".into(),
labels: vec![Label {
style: LabelStyle::Primary,
file_id: self.renderer.file_id,
range,
message: "".into(),
}],
notes: vec![],
});
}
Event::Symbol(sym) => {
if let Some(vpath) = self.renderer.config.emoji.get(sym.as_ref()) {
let branch_id = self
.renderer
.treehouse
.branches_by_named_id
.get(&format!("emoji/{sym}"))
.copied();
if let Some(branch) =
branch_id.map(|id| self.renderer.treehouse.tree.branch(id))
{
out.push_str(r#"<a href=""#);
write_attr(&self.renderer.config.site, out);
out.push_str("/b?");
write_attr(&branch.attributes.id, out);
out.push_str(r#"">"#)
}
let url = vfs::url(
&self.renderer.config.site,
&*self.renderer.dirs.emoji,
vpath,
)
.expect("emoji directory is not anchored anywhere");
// TODO: this could do with better alt text
write!(
out,
r#"<img data-cast="emoji" title=":{sym}:" alt="{sym}" src=""#
)?;
write_attr(&url, out);
out.push('"');
if let Some(image_size) =
vfs::query::<ImageSize>(&self.renderer.dirs.emoji, vpath)
{
write!(
out,
r#" width="{}" height="{}""#,
image_size.width, image_size.height
)?;
}
out.push('>');
if branch_id.is_some() {
out.push_str("</a>");
}
} else {
write!(
out,
r#"<span class="th-emoji-unknown" title="this emoji does not exist… yet!">:{sym}:</span>"#,
)?
}
}
Event::LeftSingleQuote => out.push(''),
Event::RightSingleQuote => out.push(''),
Event::LeftDoubleQuote => out.push('“'),
Event::RightDoubleQuote => out.push('”'),
Event::Ellipsis => out.push('…'),
Event::EnDash => out.push(''),
Event::EmDash => out.push('—'),
Event::NonBreakingSpace => out.push_str("&nbsp;"),
Event::Hardbreak => out.push_str("<br>\n"),
Event::Softbreak => out.push('\n'),
Event::Escape | Event::Blankline => {}
Event::ThematicBreak(attrs) => {
if self.not_first_line {
out.push('\n');
}
out.push_str("<hr");
for (a, v) in attrs {
write!(out, r#" {}=""#, a)?;
v.parts().for_each(|part| write_attr(part, out));
out.push('"');
}
out.push('>');
}
}
self.not_first_line = true;
Ok(())
}
}
fn write_text(s: &str, out: &mut String) {
write_escape(s, false, out)
}
fn write_attr(s: &str, out: &mut String) {
write_escape(s, true, out)
}
fn write_escape(mut s: &str, escape_quotes: bool, out: &mut String) {
let mut ent = "";
while let Some(i) = s.find(|c| {
match c {
'<' => Some("&lt;"),
'>' => Some("&gt;"),
'&' => Some("&amp;"),
'"' if escape_quotes => Some("&quot;"),
_ => None,
}
.map_or(false, |s| {
ent = s;
true
})
}) {
out.push_str(&s[..i]);
out.push_str(ent);
s = &s[i + 1..];
}
out.push_str(s);
}
pub fn resolve_link(
config: &Config,
treehouse: &Treehouse,
dirs: &Dirs,
link: &str,
) -> Option<String> {
link.split_once(':').and_then(|(kind, linked)| match kind {
"def" => config.defs.get(linked).cloned(),
"branch" => treehouse
.branches_by_named_id
.get(linked)
.map(|&branch_id| {
format!(
"{}/b?{}",
config.site,
treehouse.tree.branch(branch_id).attributes.id
)
}),
"page" => Some(config.page_url(linked)),
"pic" => Some(config.pic_url(&*dirs.pic, linked)),
_ => None,
})
}

94
src/html/highlight.rs Normal file
View file

@ -0,0 +1,94 @@
//! Tokenizer and syntax highlighter inspired by the one found in rxi's lite.
//! I highly recommend checking it out!
//! https://github.com/rxi/lite/blob/master/data/core/tokenizer.lua
//! There's also a mirror of it in the JavaScript, used to power dynamically editable code blocks.
//!
//! Both of these syntax highlighters use the same JSON syntax definitions; however this one is
//! more limited, in that patterns do not support backtracking.
//! This is effectively enforced in the dynamic highlighter because this highlighter reports any
//! regex syntax errors upon site compilation.
pub mod compiled;
pub mod tokenize;
use std::{collections::HashMap, fmt::Write};
use serde::{Deserialize, Serialize};
use self::compiled::CompiledSyntax;
use super::EscapeHtml;
/// Syntax definition.
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Syntax {
/// Patterns, matched sequentially (patterns at the beginning of the list take precedence.)
pub patterns: Vec<Pattern>,
/// Map of replacements to use if a pattern matches a string exactly.
pub keywords: HashMap<String, Keyword>,
}
/// A pattern in a syntax definition.
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Pattern {
/// Regular expression to match.
pub regex: String,
/// Flags to pass to the regex engine to alter how strings are matched.
#[serde(default)]
pub flags: Vec<RegexFlag>,
/// Type to assign to the token. This can be any string, but only a select few have colors
/// assigned.
pub is: TokenTypes,
}
/// Assignable token types.
#[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(untagged)]
pub enum TokenTypes {
/// Assign a single token type to the entire match.
FullMatch(String),
/// Assign individual token types to each capture.
Captures(CaptureTokenTypes),
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct CaptureTokenTypes {
/// Token type to use outside captures.
pub default: String,
/// Token type to use inside captures.
pub captures: Vec<String>,
}
/// Flag passed to the regex engine.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
#[serde(rename_all = "camelCase")]
pub enum RegexFlag {
/// Make `.` match line separators.
DotMatchesNewline,
}
/// Keyword replacement.
#[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct Keyword {
/// What to replace the token type with.
pub into: String,
/// Only replace the token type if it matches this one. If this is not present, any token type
/// is replaced.
pub only_replaces: Option<String>,
}
pub fn highlight(out: &mut String, syntax: &CompiledSyntax, code: &str) {
let tokens = syntax.tokenize(code);
for token in tokens {
out.push_str("<span class=\"");
_ = write!(out, "{}", EscapeHtml(&syntax.token_names[token.id]));
out.push_str("\">");
_ = write!(out, "{}", EscapeHtml(&code[token.range]));
out.push_str("</span>");
}
}

View file

@ -0,0 +1,119 @@
use std::collections::HashMap;
use regex::{Regex, RegexBuilder};
use tracing::{error, instrument};
use super::{RegexFlag, Syntax, TokenTypes};
/// During compilation, token names are converted to numeric IDs for performance.
pub type TokenId = usize;
pub const TOKEN_ID_DEFAULT: TokenId = 0;
#[derive(Debug, Clone)]
pub struct CompiledSyntax {
/// Lookup table which maps numeric IDs to token names.
pub token_names: Vec<String>,
pub patterns: Vec<CompiledPattern>,
pub keywords: HashMap<String, CompiledKeyword>,
}
#[derive(Debug, Clone)]
pub enum CompiledTokenTypes {
FullMatch(TokenId),
Captures(CompiledCaptureTokenTypes),
}
#[derive(Debug, Clone)]
pub struct CompiledCaptureTokenTypes {
pub default: TokenId,
pub captures: Vec<TokenId>,
}
#[derive(Debug, Clone)]
pub struct CompiledPattern {
pub regex: Regex,
pub is: CompiledTokenTypes,
}
#[derive(Debug, Clone)]
pub struct CompiledKeyword {
pub into: TokenId,
pub only_replaces: Option<TokenId>,
}
#[instrument(skip(syntax))]
pub fn compile_syntax(syntax: &Syntax) -> CompiledSyntax {
let mut token_names = vec!["default".into()];
let mut get_token_id = |name: &str| -> TokenId {
if let Some(id) = token_names.iter().position(|n| n == name) {
id
} else {
let id = token_names.len();
token_names.push(name.to_owned());
id
}
};
let patterns = syntax
.patterns
.iter()
.filter_map(|pattern| {
// NOTE: `regex` has no support for sticky flags, so we need to anchor the match to the
// start ourselves.
let regex = RegexBuilder::new(&format!(
"^{}",
// If there's an existing `^`, it should not cause compilation errors for the user.
pattern.regex.strip_prefix('^').unwrap_or(&pattern.regex)
))
.dot_matches_new_line(pattern.flags.contains(&RegexFlag::DotMatchesNewline))
.build()
.map_err(|e| {
// NOTE: This could probably use better diagnostics, but it's pretty much
// impossible to get a source span out of serde's output (because it forgoes
// source information, rightfully so.) Therefore we have to settle on
// a poor man's error log.
error!("regex compilation error in pattern {pattern:?}: {e}");
})
.ok()?;
Some(CompiledPattern {
regex,
is: match &pattern.is {
TokenTypes::FullMatch(name) => {
CompiledTokenTypes::FullMatch(get_token_id(name))
}
TokenTypes::Captures(types) => {
CompiledTokenTypes::Captures(CompiledCaptureTokenTypes {
default: get_token_id(&types.default),
captures: types
.captures
.iter()
.map(|name| get_token_id(name))
.collect(),
})
}
},
})
})
.collect();
let keywords = syntax
.keywords
.iter()
.map(|(text, keyword)| {
(
text.clone(),
CompiledKeyword {
into: get_token_id(&keyword.into),
only_replaces: keyword.only_replaces.as_deref().map(&mut get_token_id),
},
)
})
.collect();
CompiledSyntax {
token_names,
patterns,
keywords,
}
}

View file

@ -0,0 +1,97 @@
use std::ops::Range;
use super::compiled::{CompiledSyntax, CompiledTokenTypes, TokenId, TOKEN_ID_DEFAULT};
pub struct Token {
pub id: TokenId,
pub range: Range<usize>,
}
impl CompiledSyntax {
pub fn tokenize(&self, text: &str) -> Vec<Token> {
let mut tokens = vec![];
let mut i = 0;
while i < text.len() {
let mut had_match = false;
for pattern in &self.patterns {
match &pattern.is {
CompiledTokenTypes::FullMatch(id) => {
if let Some(regex_match) = pattern.regex.find(&text[i..]) {
push_token(&mut tokens, *id, i..i + regex_match.range().end);
i += regex_match.range().end;
had_match = true;
break;
}
}
CompiledTokenTypes::Captures(types) => {
if let Some(captures) = pattern.regex.captures(&text[i..]) {
let whole_match = captures.get(0).unwrap();
let mut last_match_end = 0;
for (index, capture) in captures
.iter()
.skip(1)
.enumerate()
.filter_map(|(i, m)| m.map(|m| (i, m)))
{
let id = types
.captures
.get(index)
.copied()
.unwrap_or(TOKEN_ID_DEFAULT);
push_token(
&mut tokens,
types.default,
i + last_match_end..i + capture.range().start,
);
push_token(
&mut tokens,
id,
i + capture.range().start..i + capture.range().end,
);
last_match_end = capture.range().end;
}
push_token(
&mut tokens,
types.default,
i + last_match_end..i + whole_match.range().end,
);
i += whole_match.range().end;
had_match = true;
break;
}
}
}
}
if !had_match {
push_token(&mut tokens, TOKEN_ID_DEFAULT, i..i + 1);
i += 1;
}
}
for token in &mut tokens {
if let Some(keyword) = self.keywords.get(&text[token.range.clone()]) {
if keyword.only_replaces.is_none() || Some(token.id) == keyword.only_replaces {
token.id = keyword.into;
}
}
}
tokens
}
}
fn push_token(tokens: &mut Vec<Token>, id: TokenId, range: Range<usize>) {
if range.is_empty() {
return;
}
if let Some(previous_token) = tokens.last_mut() {
if previous_token.id == id {
previous_token.range.end = range.end;
return;
}
}
tokens.push(Token { id, range });
}

84
src/html/navmap.rs Normal file
View file

@ -0,0 +1,84 @@
use std::collections::HashMap;
use tracing::instrument;
use crate::{
state::{FileId, Treehouse},
tree::{attributes::Content, SemaBranchId},
vfs::VPathBuf,
};
#[derive(Debug, Clone, Default)]
struct NavigationMapBuilder {
stack: Vec<VPathBuf>,
navigation_map: NavigationMap,
}
impl NavigationMapBuilder {
fn enter_tree(&mut self, file_id: FileId, tree_path: VPathBuf) {
self.stack.push(tree_path.clone());
self.navigation_map
.paths
.insert(file_id, self.stack.clone());
}
fn exit_tree(&mut self) {
self.stack.pop();
}
fn finish(self) -> NavigationMap {
self.navigation_map
}
}
#[derive(Debug, Clone, Default)]
pub struct NavigationMap {
/// Tells you which pages need to be opened to get to the key.
pub paths: HashMap<FileId, Vec<VPathBuf>>,
}
impl NavigationMap {
#[instrument(name = "NavigationMap::build", skip(treehouse))]
pub fn build(treehouse: &Treehouse, root_file_id: FileId) -> Self {
let mut builder = NavigationMapBuilder::default();
fn rec_branch(
treehouse: &Treehouse,
builder: &mut NavigationMapBuilder,
branch_id: SemaBranchId,
) {
let branch = treehouse.tree.branch(branch_id);
if let Content::ResolvedLink(linked) = &branch.attributes.content {
rec_tree(treehouse, builder, *linked);
} else {
for &child_id in &branch.children {
rec_branch(treehouse, builder, child_id);
}
}
}
fn rec_tree(treehouse: &Treehouse, builder: &mut NavigationMapBuilder, file_id: FileId) {
if let Some(roots) = treehouse.roots.get(&file_id) {
// Pages can link to each other causing infinite recursion, so we need to handle that
// case by skipping pages that already have been analyzed.
if !builder.navigation_map.paths.contains_key(&file_id) {
builder.enter_tree(
file_id,
treehouse
.tree_path(file_id)
.expect("tree files may only link to other tree files")
.to_owned(),
);
for &branch_id in &roots.branches {
rec_branch(treehouse, builder, branch_id);
}
builder.exit_tree();
}
}
}
rec_tree(treehouse, &mut builder, root_file_id);
builder.finish()
}
}

467
src/html/tree.rs Normal file
View file

@ -0,0 +1,467 @@
use std::fmt::Write;
use chrono::{DateTime, Utc};
use crate::{
config::Config,
dirs::Dirs,
html::EscapeAttribute,
sources::Sources,
state::{FileId, Treehouse},
tree::{
attributes::{Content, Stage, Visibility},
mini_template,
pull::BranchKind,
SemaBranchId,
},
vfs::{self, VPath, VPathBuf},
};
use super::{djot, EscapeHtml};
pub struct Renderer<'a> {
pub sources: &'a Sources,
pub dirs: &'a Dirs,
pub file_id: FileId,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum HasChildren {
No,
Yes,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum LinkButton {
Tree,
Branch,
}
struct OpenBranch {
has_children: HasChildren,
}
impl Renderer<'_> {
fn treehouse(&self) -> &Treehouse {
&self.sources.treehouse
}
fn config(&self) -> &Config {
&self.sources.config
}
fn open_branch(&self, s: &mut String, id: &str) {
write!(s, "<li id=\"{}\"", EscapeAttribute(id)).unwrap();
}
fn attr(&self, s: &mut String, key: &'static str, value: &str) {
write!(s, r#" {key}="{}""#, EscapeAttribute(value)).unwrap()
}
fn attr_class_begin(&self, s: &mut String, has_children: HasChildren) {
write!(
s,
r#" class="{}"#,
EscapeAttribute(match has_children {
HasChildren::Yes => "branch",
HasChildren::No => "leaf",
})
)
.unwrap();
}
fn attr_class_push(&self, s: &mut String, class: &str) {
write!(s, " {}", EscapeAttribute(class)).unwrap();
}
fn attr_class_end(&self, s: &mut String) {
s.push('"');
}
fn attr_cast_begin(&self, s: &mut String) {
s.push_str(r#" data-cast=""#);
}
fn attr_cast_push(&self, s: &mut String, spell: &str) {
if s.as_bytes().last() != Some(&b'"') {
s.push(' ');
}
write!(s, "{}", EscapeAttribute(spell)).unwrap();
}
fn attr_cast_end(&self, s: &mut String) {
s.push('"');
}
fn attr_link(&self, s: &mut String, linked: &VPath) {
self.attr(s, "th-link", linked.as_str());
}
fn attr_ts(&self, s: &mut String, timestamp: DateTime<Utc>) {
self.attr(s, "th-ts", &timestamp.timestamp_millis().to_string())
}
fn attr_do_not_persist(&self, s: &mut String) {
s.push_str(" th-do-not-persist");
}
fn end_attrs(&self, s: &mut String) {
s.push('>');
}
fn begin_container(
&self,
s: &mut String,
has_children: HasChildren,
branch_kind: BranchKind,
) -> OpenBranch {
match has_children {
HasChildren::Yes => {
s.push_str(match branch_kind {
BranchKind::Expanded => "<details open>",
BranchKind::Collapsed => "<details>",
});
s.push_str("<summary class=\"branch-container\">");
}
HasChildren::No => {
s.push_str("<div class=\"branch-container\">");
}
}
OpenBranch { has_children }
}
fn begin_children(&self, s: &mut String, open: &OpenBranch) -> HasChildren {
if open.has_children == HasChildren::Yes {
s.push_str("</summary>");
}
open.has_children
}
fn close_branch(&self, s: &mut String, open: OpenBranch) {
match open.has_children {
HasChildren::Yes => {
s.push_str("</details>");
}
HasChildren::No => {
s.push_str("</div>");
}
}
s.push_str("</li>");
}
fn bullet_point(&self, s: &mut String) {
s.push_str("<th-bp></th-bp>");
}
fn branch_content(&self, s: &mut String, markup: &str, linked: Option<&VPath>) {
s.push_str("<th-bc>");
let events: Vec<_> = jotdown::Parser::new(markup).into_offset_iter().collect();
// TODO: Report rendering diagnostics.
let render_diagnostics = djot::Renderer {
page_id: self
.treehouse()
.tree_path(self.file_id)
.expect(".tree file expected")
.to_string(),
config: self.config(),
dirs: self.dirs,
treehouse: self.treehouse(),
file_id: self.file_id,
}
.render(&events, s);
if let Some(linked) = linked {
write!(
s,
"<noscript><a class=\"navigate icon-go\" href=\"{}/{}\">Go to linked tree: <code>{}</code></a></noscript>",
EscapeAttribute(&self.config().site),
EscapeAttribute(linked.as_str()),
EscapeHtml(linked.as_str()),
)
.unwrap();
}
s.push_str("</th-bc>");
}
fn button_bar(
&self,
s: &mut String,
date_time: Option<DateTime<Utc>>,
link_button: LinkButton,
link: &str,
) {
s.push_str("<th-bb>");
{
if let Some(date_time) = date_time {
write!(s, "<th-bd>{}</th-bd>", date_time.format("%F")).unwrap();
}
match link_button {
LinkButton::Tree => {
write!(
s,
"<a class=\"icon icon-go\" href=\"{}\" title=\"linked tree\"></a>",
EscapeAttribute(link)
)
.unwrap();
}
LinkButton::Branch => {
write!(
s,
"<a th-p class=\"icon icon-permalink\" href=\"{}\" title=\"permalink\"></a>",
EscapeAttribute(link)
)
.unwrap();
}
}
}
s.push_str("</th-bb>");
}
fn branch_children_empty(&self, s: &mut String) {
s.push_str("<ul></ul>");
}
fn branch_children(&self, s: &mut String, branch_id: SemaBranchId) {
let branch = self.treehouse().tree.branch(branch_id);
s.push_str("<ul");
if !branch.attributes.classes.branch_children.is_empty() {
write!(
s,
" class=\"{}\"",
EscapeAttribute(&branch.attributes.classes.branch_children)
)
.unwrap();
}
s.push('>');
let num_children = branch.children.len();
for i in 0..num_children {
let child_id = self.treehouse().tree.branch(branch_id).children[i];
self.branch(s, child_id);
}
s.push_str("</ul>");
}
fn preprocess_markup(&self, branch_id: SemaBranchId) -> String {
let branch = self.treehouse().tree.branch(branch_id);
let raw_block_content =
&self.treehouse().source(self.file_id).input()[branch.content.clone()];
let mut markup = String::with_capacity(raw_block_content.len());
for line in raw_block_content.lines() {
// Bit of a jank way to remove at most branch.indent_level spaces from the front.
let mut space_count = 0;
for i in 0..branch.indent_level {
if line.as_bytes().get(i).copied() == Some(b' ') {
space_count += 1;
} else {
break;
}
}
markup.push_str(&line[space_count..]);
markup.push('\n');
}
if branch.attributes.template {
markup = mini_template::render(self.config(), self.treehouse(), self.dirs, &markup);
}
markup
}
pub fn branch(&self, s: &mut String, branch_id: SemaBranchId) {
let branch = self.treehouse().tree.branch(branch_id);
if !cfg!(debug_assertions) && branch.attributes.stage == Stage::Draft {
return;
}
let has_children = match !branch.children.is_empty()
|| matches!(branch.attributes.content, Content::ResolvedLink(_))
{
true => HasChildren::Yes,
false => HasChildren::No,
};
let linked_tree = match branch.attributes.content {
Content::Inline | Content::Link(_) => None,
Content::ResolvedLink(file_id) => self.treehouse().tree_path(file_id),
};
self.open_branch(s, &branch.html_id);
{
// data-cast
self.attr_cast_begin(s);
self.attr_cast_push(
s,
match linked_tree {
Some(_) => "b-linked",
None => "b",
},
);
if !branch.attributes.cast.is_empty() {
self.attr_cast_push(s, &branch.attributes.cast);
}
self.attr_cast_end(s);
// th-link
if let Some(tree_path) = linked_tree {
self.attr_link(s, tree_path);
}
// class
self.attr_class_begin(s, has_children);
if !branch.attributes.classes.branch.is_empty() {
self.attr_class_push(s, &branch.attributes.classes.branch);
}
if branch.attributes.stage == Stage::Draft {
self.attr_class_push(s, "draft");
}
self.attr_class_end(s);
// th-do-not-persist
if branch.attributes.do_not_persist {
self.attr_do_not_persist(s);
}
}
self.end_attrs(s);
let open = self.begin_container(s, has_children, branch.kind);
{
self.bullet_point(s);
self.branch_content(s, &self.preprocess_markup(branch_id), linked_tree);
let date_time = branch.attributes.timestamp();
let link_button = match linked_tree {
Some(_) => LinkButton::Tree,
None => LinkButton::Branch,
};
let link = match linked_tree {
Some(tree_path) => format!("{}/{}", self.config().site, tree_path),
None => format!("{}/b?{}", self.config().site, &branch.named_id),
};
self.button_bar(s, date_time, link_button, &link);
if self.begin_children(s, &open) == HasChildren::Yes {
self.branch_children(s, branch_id);
}
}
self.close_branch(s, open);
}
pub fn root(&self, s: &mut String) {
let roots = self
.treehouse()
.roots
.get(&self.file_id)
.expect("tree should have been added to the treehouse");
s.push_str("<ul>");
for &child in &roots.branches {
self.branch(s, child);
}
let path = self.treehouse().path(self.file_id);
let children_path = if path == const { VPath::new_const("index.tree") } {
VPath::ROOT
} else {
path
};
let tree_path = children_path.with_extension("");
let child_pages = self.get_child_pages(&tree_path);
if !child_pages.is_empty() {
s.push_str(r#"<li class="child-pages">"#);
s.push_str("<ul>");
for child_page in &child_pages {
self.open_branch(s, &format!("p-{}", child_page.tree_path));
{
self.attr_cast_begin(s);
self.attr_cast_push(s, "b-linked");
self.attr_cast_end(s);
self.attr_link(s, &child_page.tree_path);
self.attr_class_begin(s, HasChildren::Yes);
self.attr_class_end(s);
if let Some(timestamp) = child_page.timestamp {
self.attr_ts(s, timestamp);
}
}
self.end_attrs(s);
let open = self.begin_container(s, HasChildren::Yes, BranchKind::Collapsed);
{
self.bullet_point(s);
self.branch_content(
s,
&format!(":{}: {}", child_page.icon, child_page.title),
Some(&child_page.tree_path),
);
self.button_bar(
s,
child_page.timestamp,
LinkButton::Tree,
&format!("{}/{}", self.config().site, child_page.tree_path),
);
self.begin_children(s, &open);
self.branch_children_empty(s);
}
self.close_branch(s, open);
}
s.push_str("</ul>");
s.push_str("</li>");
}
s.push_str("</ul>");
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
struct ChildPage {
timestamp: Option<DateTime<Utc>>,
title: String,
icon: String,
tree_path: VPathBuf,
}
impl Renderer<'_> {
fn get_child_pages(&self, parent_page: &VPath) -> Vec<ChildPage> {
let mut child_pages = vfs::entries(&self.dirs.content, parent_page);
child_pages.retain(|path| matches!(path.extension(), Some("tree")));
for child_page in &mut child_pages {
child_page.set_extension("");
}
child_pages.sort();
child_pages.dedup();
let mut child_pages: Vec<_> = child_pages
.into_iter()
.filter_map(|tree_path| {
self.treehouse()
.files_by_tree_path
.get(&tree_path)
.and_then(|file_id| {
let roots = &self.treehouse().roots[file_id];
let visible = roots.attributes.visibility == Visibility::Public;
visible.then(|| ChildPage {
tree_path,
title: roots.attributes.title.clone(),
icon: roots.attributes.icon.clone(),
timestamp: roots.attributes.timestamps.as_ref().map(|t| t.updated),
})
})
})
.collect();
child_pages.sort_by(|a, b| b.cmp(a));
child_pages
}
}

48
src/import_map.rs Normal file
View file

@ -0,0 +1,48 @@
use std::ops::ControlFlow;
use indexmap::IndexMap;
use serde::{Deserialize, Serialize};
use tracing::instrument;
use crate::vfs::{self, Dir, VPathBuf};
#[derive(Debug, Clone, Serialize)]
pub struct ImportMap {
pub imports: IndexMap<String, String>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct ImportRoot {
pub name: String,
pub path: VPathBuf,
}
impl ImportMap {
#[instrument(name = "ImportMap::generate", skip(import_roots))]
pub fn generate(site: &str, root: &dyn Dir, import_roots: &[ImportRoot]) -> Self {
let mut import_map = ImportMap {
imports: IndexMap::new(),
};
for import_root in import_roots {
vfs::walk_dir_rec(root, &import_root.path, &mut |path| {
if path.extension() == Some("js") {
import_map.imports.insert(
format!(
"{}/{}",
import_root.name,
path.strip_prefix(&import_root.path).unwrap_or(path)
),
vfs::url(site, root, path)
.expect("import directory is not anchored anywhere"),
);
}
ControlFlow::Continue(())
});
}
import_map.imports.sort_unstable_keys();
import_map
}
}

14
src/lib.rs Normal file
View file

@ -0,0 +1,14 @@
pub mod cli;
pub mod config;
pub mod dirs;
pub mod fun;
pub mod generate;
pub mod history;
pub mod html;
pub mod import_map;
pub mod parse;
pub mod paths;
pub mod sources;
pub mod state;
pub mod tree;
pub mod vfs;

139
src/main.rs Normal file
View file

@ -0,0 +1,139 @@
use std::fs;
use std::path::PathBuf;
use std::sync::Arc;
use anyhow::Context;
use clap::Parser;
use tracing::{error, info_span};
use tracing_subscriber::layer::SubscriberExt as _;
use tracing_subscriber::util::SubscriberInitExt as _;
use treehouse::cli::serve::serve;
use treehouse::config::VfsConfig;
use treehouse::dirs::Dirs;
use treehouse::generate;
use treehouse::sources::Sources;
use treehouse::vfs::asynch::AsyncDir;
use treehouse::vfs::{
AnchoredAtExt, Blake3ContentVersionCache, Content, DynDir, ImageSizeCache, ToDynDir, VPathBuf,
};
use treehouse::vfs::{Cd, PhysicalDir};
use treehouse::{
cli::{
fix::{fix_all_cli, fix_file_cli},
wc::wc_cli,
Command, ProgramArgs,
},
vfs::{BufferedFile, MemDir, VPath},
};
fn vfs_sources(config: &VfsConfig) -> anyhow::Result<DynDir> {
let mut root = MemDir::new();
root.add(
VPath::new("treehouse.toml"),
BufferedFile::new(Content::new(
"application/toml",
fs::read("treehouse.toml")?,
))
.to_dyn(),
);
root.add(
VPath::new("static"),
PhysicalDir::new(PathBuf::from("static"))
.anchored_at(VPathBuf::new("static"))
.to_dyn(),
);
root.add(
VPath::new("template"),
PhysicalDir::new(PathBuf::from("template")).to_dyn(),
);
root.add(
VPath::new("content"),
PhysicalDir::new(PathBuf::from("content")).to_dyn(),
);
let root = Blake3ContentVersionCache::new(config.cache_salt.as_bytes().to_owned(), root);
let root = ImageSizeCache::new(root);
Ok(root.to_dyn())
}
async fn fallible_main(
args: ProgramArgs,
flush_guard: Option<tracing_chrome::FlushGuard>,
) -> anyhow::Result<()> {
let vfs_config = toml_edit::de::from_str(
&fs::read_to_string("vfs.toml").context("failed to read vfs.toml")?,
)
.context("failed to deserialize vfs.toml")?;
let src = vfs_sources(&vfs_config)?;
let dirs = Arc::new(Dirs {
root: src.clone(),
content: Cd::new(src.clone(), VPathBuf::new("content")).to_dyn(),
static_: Cd::new(src.clone(), VPathBuf::new("static")).to_dyn(),
template: Cd::new(src.clone(), VPathBuf::new("template")).to_dyn(),
pic: Cd::new(src.clone(), VPathBuf::new("static/pic")).to_dyn(),
emoji: Cd::new(src.clone(), VPathBuf::new("static/emoji")).to_dyn(),
syntax: Cd::new(src.clone(), VPathBuf::new("static/syntax")).to_dyn(),
});
match args.command {
Command::Serve {
generate: _,
serve: serve_args,
} => {
let _span = info_span!("load").entered();
let sources = Arc::new(Sources::load(&dirs).context("failed to load sources")?);
let target = generate::target(dirs.clone(), sources.clone());
drop(_span);
drop(flush_guard);
serve(sources, dirs, AsyncDir::new(target), serve_args.port).await?;
}
Command::Fix(fix_args) => fix_file_cli(fix_args, &*dirs.root)?.apply().await?,
Command::FixAll(fix_args) => fix_all_cli(fix_args, &*dirs.content)?.apply().await?,
Command::Wc(wc_args) => wc_cli(&dirs.content, wc_args)?,
Command::Ulid => {
let mut rng = rand::thread_rng();
let ulid = ulid::Generator::new()
.generate_with_source(&mut rng)
.expect("failed to generate ulid");
println!("{ulid}");
}
}
Ok(())
}
#[tokio::main]
async fn main() -> Result<(), Box<dyn std::error::Error>> {
let args = ProgramArgs::parse();
let (chrome_layer, flush_guard) = args
.trace
.as_ref()
.map(|path| {
tracing_chrome::ChromeLayerBuilder::new()
.file(path)
.include_args(true)
.build()
})
.unzip();
tracing_subscriber::registry()
.with(tracing_subscriber::EnvFilter::new("treehouse=trace"))
.with(tracing_subscriber::fmt::layer().with_writer(std::io::stderr))
.with(chrome_layer)
.init();
match fallible_main(args, flush_guard).await {
Ok(_) => (),
Err(error) => error!("fatal: {error:?}"),
}
Ok(())
}

48
src/parse.rs Normal file
View file

@ -0,0 +1,48 @@
use std::{ops::Range, str::FromStr};
use codespan_reporting::diagnostic::{Diagnostic, Label, LabelStyle, Severity};
use tracing::instrument;
use crate::{
state::{toml_error_to_diagnostic, FileId, TomlError, Treehouse},
tree::{self, ast::Roots},
};
pub struct ErrorsEmitted;
#[instrument(skip(input))]
pub fn parse_tree_with_diagnostics(
file_id: FileId,
input: &str,
) -> Result<Roots, Vec<Diagnostic<FileId>>> {
Roots::parse(&mut tree::pull::Parser { input, position: 0 }).map_err(|error| {
vec![Diagnostic {
severity: Severity::Error,
code: Some("tree".into()),
message: error.kind.to_string(),
labels: vec![Label {
style: LabelStyle::Primary,
file_id,
range: error.range,
message: String::new(),
}],
notes: vec![],
}]
})
}
pub fn parse_toml_with_diagnostics(
treehouse: &mut Treehouse,
file_id: FileId,
range: Range<usize>,
) -> Result<toml_edit::Document, Vec<Diagnostic<FileId>>> {
let input = &treehouse.source(file_id).input()[range.clone()];
toml_edit::Document::from_str(input).map_err(|error| {
vec![toml_error_to_diagnostic(TomlError {
message: error.message().to_owned(),
span: error.span(),
file_id,
input_range: range.clone(),
})]
})
}

0
src/paths.rs Normal file
View file

136
src/sources.rs Normal file
View file

@ -0,0 +1,136 @@
use std::{collections::HashMap, ops::ControlFlow};
use anyhow::{anyhow, Context};
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
use tracing::{info_span, instrument};
use crate::{
config::Config,
dirs::Dirs,
html::navmap::NavigationMap,
import_map::ImportMap,
parse::parse_tree_with_diagnostics,
state::{report_diagnostics, Source, Treehouse},
tree::SemaRoots,
vfs::{self, Cd, Content, VPath, VPathBuf},
};
pub struct Sources {
pub config: Config,
pub treehouse: Treehouse,
pub navigation_map: NavigationMap,
pub import_map: ImportMap,
}
impl Sources {
pub fn load(dirs: &Dirs) -> anyhow::Result<Self> {
let config = {
let _span = info_span!("load_config").entered();
let mut config: Config = toml_edit::de::from_str(
&vfs::query::<Content>(&dirs.root, VPath::new_const("treehouse.toml"))
.map(Content::string)
.ok_or_else(|| anyhow!("config file does not exist"))??,
)
.context("failed to deserialize config")?;
config.site = std::env::var("TREEHOUSE_SITE").unwrap_or(config.site);
config.autopopulate_emoji(&*dirs.emoji)?;
config.autopopulate_pics(&*dirs.pic)?;
config.load_syntaxes(dirs.syntax.clone())?;
config
};
let treehouse = load_trees(&config, dirs)?;
let navigation_map = NavigationMap::build(
&treehouse,
treehouse.files_by_tree_path[VPath::new("index")],
);
let import_map = ImportMap::generate(
&config.site,
&Cd::new(dirs.static_.clone(), VPathBuf::new("js")),
&config.build.javascript.import_roots,
);
Ok(Sources {
config,
treehouse,
navigation_map,
import_map,
})
}
}
#[instrument(skip(config, dirs))]
fn load_trees(config: &Config, dirs: &Dirs) -> anyhow::Result<Treehouse> {
let mut treehouse = Treehouse::new();
let mut diagnostics = vec![];
let mut parsed_trees = HashMap::new();
let mut paths = vec![];
vfs::walk_dir_rec(&*dirs.content, VPath::ROOT, &mut |path| {
if path.extension() == Some("tree") {
paths.push(path.to_owned());
}
ControlFlow::Continue(())
});
// NOTE: Sources are filled in later; they can be left out until a call to report_diagnostics.
let file_ids: Vec<_> = paths
.iter()
.map(|path| treehouse.add_file(path.clone(), Source::Other(String::new())))
.collect();
let parse_results: Vec<_> = {
let _span = info_span!("load_trees::parse").entered();
paths
.into_par_iter()
.zip(&file_ids)
.flat_map(|(path, &file_id)| {
vfs::query::<Content>(&dirs.content, &path)
.and_then(|c| c.string().ok())
.map(|input| {
let parse_result = parse_tree_with_diagnostics(file_id, &input);
(path, file_id, input, parse_result)
})
})
.collect()
};
for (path, file_id, input, _) in &parse_results {
let tree_path = path.with_extension("");
treehouse
.files_by_tree_path
.insert(tree_path.clone(), *file_id);
treehouse.set_source(
*file_id,
Source::Tree {
input: input.clone(),
tree_path,
},
);
}
{
let _span = info_span!("load_trees::sema").entered();
for (path, file_id, _, result) in parse_results {
match result {
Ok(roots) => {
let roots = SemaRoots::from_roots(
&mut treehouse,
&mut diagnostics,
config,
file_id,
roots,
);
treehouse.roots.insert(file_id, roots);
parsed_trees.insert(path, file_id);
}
Err(mut parse_diagnostics) => diagnostics.append(&mut parse_diagnostics),
}
}
}
report_diagnostics(&treehouse, &diagnostics)?;
Ok(treehouse)
}

231
src/state.rs Normal file
View file

@ -0,0 +1,231 @@
use std::{collections::HashMap, ops::Range};
use anyhow::Context;
use codespan_reporting::{
diagnostic::{Diagnostic, Label, LabelStyle, Severity},
term::termcolor::{ColorChoice, StandardStream},
};
use tracing::instrument;
use ulid::Ulid;
use crate::{
tree::{SemaBranchId, SemaRoots, SemaTree},
vfs::{VPath, VPathBuf},
};
#[derive(Debug, Clone)]
pub enum Source {
Tree { input: String, tree_path: VPathBuf },
Other(String),
}
impl Source {
pub fn input(&self) -> &str {
match &self {
Source::Tree { input, .. } => input,
Source::Other(source) => source,
}
}
}
impl AsRef<str> for Source {
fn as_ref(&self) -> &str {
self.input()
}
}
#[derive(Debug, Clone)]
pub struct File {
pub path: VPathBuf,
pub source: Source,
pub line_starts: Vec<usize>,
}
impl File {
fn line_start(&self, line_index: usize) -> Result<usize, codespan_reporting::files::Error> {
use std::cmp::Ordering;
match line_index.cmp(&self.line_starts.len()) {
Ordering::Less => Ok(self
.line_starts
.get(line_index)
.cloned()
.expect("failed despite previous check")),
Ordering::Equal => Ok(self.source.as_ref().len()),
Ordering::Greater => Err(codespan_reporting::files::Error::LineTooLarge {
given: line_index,
max: self.line_starts.len() - 1,
}),
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub struct FileId(usize);
/// Treehouse compilation context.
pub struct Treehouse {
pub files: Vec<File>,
pub files_by_tree_path: HashMap<VPathBuf, FileId>,
pub feeds_by_name: HashMap<String, FileId>,
pub tree: SemaTree,
pub branches_by_named_id: HashMap<String, SemaBranchId>,
pub roots: HashMap<FileId, SemaRoots>,
pub branch_redirects: HashMap<String, SemaBranchId>,
pub missingno_generator: ulid::Generator,
}
impl Treehouse {
pub fn new() -> Self {
Self {
files: vec![],
files_by_tree_path: HashMap::new(),
feeds_by_name: HashMap::new(),
tree: SemaTree::default(),
branches_by_named_id: HashMap::new(),
roots: HashMap::new(),
branch_redirects: HashMap::new(),
missingno_generator: ulid::Generator::new(),
}
}
pub fn add_file(&mut self, path: VPathBuf, source: Source) -> FileId {
let id = FileId(self.files.len());
self.files.push(File {
line_starts: codespan_reporting::files::line_starts(source.input()).collect(),
path,
source,
});
id
}
/// Get the name of a file, assuming it was previously registered.
pub fn path(&self, file_id: FileId) -> &VPath {
&self.files[file_id.0].path
}
/// Get the source code of a file, assuming it was previously registered.
pub fn source(&self, file_id: FileId) -> &Source {
&self.files[file_id.0].source
}
pub fn set_source(&mut self, file_id: FileId, source: Source) {
self.files[file_id.0].line_starts =
codespan_reporting::files::line_starts(source.input()).collect();
self.files[file_id.0].source = source;
}
pub fn tree_path(&self, file_id: FileId) -> Option<&VPath> {
match self.source(file_id) {
Source::Tree { tree_path, .. } => Some(tree_path),
Source::Other(_) => None,
}
}
pub fn next_missingno(&mut self) -> Ulid {
self.missingno_generator
.generate()
.expect("just how much disk space do you have?")
}
}
impl Default for Treehouse {
fn default() -> Self {
Self::new()
}
}
impl<'a> codespan_reporting::files::Files<'a> for Treehouse {
type FileId = FileId;
type Name = &'a VPath;
type Source = &'a str;
fn name(&'a self, id: Self::FileId) -> Result<Self::Name, codespan_reporting::files::Error> {
Ok(self.path(id))
}
fn source(
&'a self,
id: Self::FileId,
) -> Result<Self::Source, codespan_reporting::files::Error> {
Ok(self.source(id).input())
}
fn line_index(
&'a self,
id: Self::FileId,
byte_index: usize,
) -> Result<usize, codespan_reporting::files::Error> {
let file = &self.files[id.0];
Ok(file
.line_starts
.binary_search(&byte_index)
.unwrap_or_else(|next_line| next_line - 1))
}
fn line_range(
&'a self,
id: Self::FileId,
line_index: usize,
) -> Result<Range<usize>, codespan_reporting::files::Error> {
let file = &self.files[id.0];
let line_start = file.line_start(line_index)?;
let next_line_start = file.line_start(line_index + 1)?;
Ok(line_start..next_line_start)
}
}
pub struct TomlError {
pub message: String,
pub span: Option<Range<usize>>,
pub file_id: FileId,
pub input_range: Range<usize>,
}
pub fn toml_error_to_diagnostic(error: TomlError) -> Diagnostic<FileId> {
Diagnostic {
severity: Severity::Error,
code: Some("toml".into()),
message: error.message,
labels: error
.span
.map(|span| Label {
style: LabelStyle::Primary,
file_id: error.file_id,
range: error.input_range.start + span.start..error.input_range.start + span.end,
message: String::new(),
})
.into_iter()
.collect(),
notes: vec![],
}
}
#[instrument(skip(files, diagnostics))]
pub fn report_diagnostics(
files: &Treehouse,
diagnostics: &[Diagnostic<FileId>],
) -> anyhow::Result<()> {
let writer = StandardStream::stderr(ColorChoice::Auto);
let config = codespan_reporting::term::Config::default();
for diagnostic in diagnostics {
codespan_reporting::term::emit(&mut writer.lock(), &config, files, diagnostic)
.context("could not emit diagnostic")?;
}
Ok(())
}
pub fn has_errors(diagnostics: &[Diagnostic<FileId>]) -> bool {
diagnostics.iter().any(|d| d.severity == Severity::Error)
}

441
src/tree.rs Normal file
View file

@ -0,0 +1,441 @@
pub mod ast;
pub mod attributes;
pub mod mini_template;
pub mod pull;
use std::ops::Range;
use attributes::Timestamps;
use codespan_reporting::diagnostic::{Diagnostic, Label, LabelStyle, Severity};
use tracing::instrument;
use crate::{
config::Config,
state::{toml_error_to_diagnostic, FileId, Source, TomlError, Treehouse},
tree::{
ast::{Branch, Roots},
attributes::{Attributes, Content},
pull::BranchKind,
},
};
use self::attributes::RootAttributes;
#[derive(Debug, Default, Clone)]
pub struct SemaTree {
branches: Vec<SemaBranch>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct SemaBranchId(usize);
impl SemaTree {
pub fn add_branch(&mut self, branch: SemaBranch) -> SemaBranchId {
let id = self.branches.len();
self.branches.push(branch);
SemaBranchId(id)
}
pub fn branch(&self, id: SemaBranchId) -> &SemaBranch {
&self.branches[id.0]
}
}
#[derive(Debug, Clone)]
pub struct SemaRoots {
pub attributes: RootAttributes,
pub branches: Vec<SemaBranchId>,
}
impl SemaRoots {
#[instrument(
name = "SemaRoots::from_roots",
skip(treehouse, diagnostics, config, roots)
)]
pub fn from_roots(
treehouse: &mut Treehouse,
diagnostics: &mut Vec<Diagnostic<FileId>>,
config: &Config,
file_id: FileId,
roots: Roots,
) -> Self {
let mut sema_roots = Self {
attributes: Self::parse_attributes(treehouse, diagnostics, config, file_id, &roots),
branches: roots
.branches
.into_iter()
.map(|branch| {
SemaBranch::from_branch(treehouse, diagnostics, config, file_id, branch)
})
.collect(),
};
if sema_roots.attributes.timestamps.is_none() {
let mut timestamps = None;
for &branch_id in &sema_roots.branches {
let branch = treehouse.tree.branch(branch_id);
if let Some(timestamp) = branch.attributes.timestamp() {
let timestamps = timestamps.get_or_insert(Timestamps {
created: timestamp,
updated: timestamp,
});
timestamps.created = timestamps.created.min(timestamp);
timestamps.updated = timestamps.updated.max(timestamp);
}
}
sema_roots.attributes.timestamps = timestamps;
}
sema_roots
}
fn parse_attributes(
treehouse: &mut Treehouse,
diagnostics: &mut Vec<Diagnostic<FileId>>,
config: &Config,
file_id: FileId,
roots: &Roots,
) -> RootAttributes {
let source = treehouse.source(file_id);
let mut successfully_parsed = true;
let mut attributes = if let Some(attributes) = &roots.attributes {
toml_edit::de::from_str(&source.input()[attributes.data.clone()]).unwrap_or_else(
|error| {
diagnostics.push(toml_error_to_diagnostic(TomlError {
message: error.message().to_owned(),
span: error.span(),
file_id,
input_range: attributes.data.clone(),
}));
successfully_parsed = false;
RootAttributes::default()
},
)
} else {
RootAttributes::default()
};
let successfully_parsed = successfully_parsed;
if successfully_parsed {
let _attribute_warning_span = roots
.attributes
.as_ref()
.map(|attributes| attributes.percent.clone())
.unwrap_or(0..1);
if attributes.title.is_empty() {
attributes.title = match treehouse.source(file_id) {
Source::Tree { tree_path, .. } => tree_path.to_string(),
_ => panic!("parse_attributes called for a non-.tree file"),
}
}
}
if let Some(thumbnail) = &attributes.thumbnail {
if thumbnail.alt.is_none() {
diagnostics.push(Diagnostic {
severity: Severity::Warning,
code: Some("sema".into()),
message: "thumbnail without alt text".into(),
labels: vec![Label {
style: LabelStyle::Primary,
file_id,
range: roots.attributes.as_ref().unwrap().percent.clone(),
message: "".into(),
}],
notes: vec![
"note: alt text is important for people using screen readers".into(),
"help: add alt text using the thumbnail.alt key".into(),
],
})
}
if !config.pics.contains_key(&thumbnail.id) {
diagnostics.push(Diagnostic {
severity: Severity::Warning,
code: Some("sema".into()),
message: format!(
"thumbnail picture with id '{}' does not exist",
thumbnail.id
),
labels: vec![Label {
style: LabelStyle::Primary,
file_id,
range: roots.attributes.as_ref().unwrap().percent.clone(),
message: "".into(),
}],
notes: vec!["note: check your id for typos".into()],
})
}
}
if let Some(feed_name) = &attributes.feed {
treehouse.feeds_by_name.insert(feed_name.clone(), file_id);
}
attributes
}
}
/// Analyzed branch.
#[derive(Debug, Clone)]
pub struct SemaBranch {
pub file_id: FileId,
pub indent_level: usize,
pub kind: BranchKind,
pub kind_span: Range<usize>,
pub content: Range<usize>,
pub html_id: String,
pub named_id: String,
pub attributes: Attributes,
pub children: Vec<SemaBranchId>,
}
impl SemaBranch {
pub fn from_branch(
treehouse: &mut Treehouse,
diagnostics: &mut Vec<Diagnostic<FileId>>,
config: &Config,
file_id: FileId,
branch: Branch,
) -> SemaBranchId {
let attributes = Self::parse_attributes(treehouse, diagnostics, config, file_id, &branch);
let named_id = attributes.id.to_owned();
let html_id = format!("b-{}", attributes.id);
let redirect_here = attributes.redirect_here.clone();
let branch = Self {
file_id,
indent_level: branch.indent_level,
kind: branch.kind,
kind_span: branch.kind_span,
content: branch.content,
html_id,
named_id: named_id.clone(),
attributes,
children: branch
.children
.into_iter()
.map(|child| Self::from_branch(treehouse, diagnostics, config, file_id, child))
.collect(),
};
let new_branch_id = treehouse.tree.add_branch(branch);
if let Some(old_branch_id) = treehouse
.branches_by_named_id
.insert(named_id.clone(), new_branch_id)
{
let new_branch = treehouse.tree.branch(new_branch_id);
let old_branch = treehouse.tree.branch(old_branch_id);
diagnostics.push(
Diagnostic::warning()
.with_code("sema")
.with_message(format!("two branches share the same id `{}`", named_id))
.with_labels(vec![
Label {
style: LabelStyle::Primary,
file_id,
range: new_branch.kind_span.clone(),
message: String::new(),
},
Label {
style: LabelStyle::Primary,
file_id: old_branch.file_id,
range: old_branch.kind_span.clone(),
message: String::new(),
},
]),
)
}
for source_branch_named_id in redirect_here {
if let Some(old_branch_id) = treehouse
.branch_redirects
.insert(source_branch_named_id.clone(), new_branch_id)
{
let new_branch = treehouse.tree.branch(new_branch_id);
let old_branch = treehouse.tree.branch(old_branch_id);
diagnostics.push(
Diagnostic::warning()
.with_code("sema")
.with_message(format!(
"two branches serve as redirect targets for `{source_branch_named_id}`"
))
.with_labels(vec![
Label {
style: LabelStyle::Primary,
file_id,
range: new_branch.kind_span.clone(),
message: String::new(),
},
Label {
style: LabelStyle::Primary,
file_id: old_branch.file_id,
range: old_branch.kind_span.clone(),
message: String::new(),
},
]),
)
}
}
new_branch_id
}
fn parse_attributes(
treehouse: &mut Treehouse,
diagnostics: &mut Vec<Diagnostic<FileId>>,
config: &Config,
file_id: FileId,
branch: &Branch,
) -> Attributes {
let source = treehouse.source(file_id);
let mut successfully_parsed = true;
let mut attributes = if let Some(attributes) = &branch.attributes {
toml_edit::de::from_str(&source.input()[attributes.data.clone()]).unwrap_or_else(
|error| {
diagnostics.push(toml_error_to_diagnostic(TomlError {
message: error.message().to_owned(),
span: error.span(),
file_id,
input_range: attributes.data.clone(),
}));
successfully_parsed = false;
Attributes::default()
},
)
} else {
Attributes::default()
};
let successfully_parsed = successfully_parsed;
// Only check for attribute validity if the attributes were parsed successfully.
if successfully_parsed {
let attribute_warning_span = branch
.attributes
.as_ref()
.map(|attributes| attributes.percent.clone())
.unwrap_or(branch.kind_span.clone());
// Check that every block has an ID.
if attributes.id.is_empty() {
attributes.id = format!("treehouse-missingno-{}", treehouse.next_missingno());
diagnostics.push(Diagnostic {
severity: Severity::Warning,
code: Some("attr".into()),
message: "branch does not have an `id` attribute".into(),
labels: vec![Label {
style: LabelStyle::Primary,
file_id,
range: attribute_warning_span.clone(),
message: String::new(),
}],
notes: vec![
format!(
"note: a generated id `{}` will be used, but this id is unstable and will not persist across generations",
attributes.id
),
format!("help: run `treehouse fix {}` to add missing ids to branches", treehouse.path(file_id)),
],
});
}
// Check that link-type blocks are `+`-type to facilitate lazy loading.
if let Content::Link(_) = &attributes.content {
if branch.kind == BranchKind::Expanded {
diagnostics.push(Diagnostic {
severity: Severity::Warning,
code: Some("attr".into()),
message: "`content.link` branch is expanded by default".into(),
labels: vec![Label {
style: LabelStyle::Primary,
file_id,
range: branch.kind_span.clone(),
message: String::new(),
}],
notes: vec![
"note: `content.link` branches should normally be collapsed to allow for lazy loading".into(),
],
});
}
}
// Resolve content.links.
if let Content::Link(tree_path) = &attributes.content {
if let Some(file_id) = treehouse.files_by_tree_path.get(tree_path) {
attributes.content = Content::ResolvedLink(*file_id);
} else {
diagnostics.push(Diagnostic {
severity: Severity::Error,
code: Some("attr".into()),
message: format!("linked tree `{tree_path}` does not exist"),
labels: vec![Label {
style: LabelStyle::Primary,
file_id,
range: attribute_warning_span.clone(),
message: "".into(),
}],
notes: vec![],
})
}
}
// Check that each tag belongs to the allowed set.
for tag in &attributes.tags {
if !config.feed.tags.contains(tag) {
diagnostics.push(Diagnostic {
severity: Severity::Warning,
code: Some("attr".into()),
message: format!("tag `{tag}` is not within the set of allowed tags"),
labels: vec![Label {
style: LabelStyle::Primary,
file_id,
range: attribute_warning_span.clone(),
message: "".into(),
}],
notes: vec![
"note: tag should be one from the set defined in `feed.tags` in treehouse.toml".into(),
],
})
}
}
}
attributes
}
}
#[derive(Debug, thiserror::Error, Clone, PartialEq, Eq)]
pub enum ParseErrorKind {
#[error("branch kind (`+` or `-`) expected")]
BranchKindExpected,
#[error("root branches must not be indented")]
RootIndentLevel,
#[error("at least {expected} spaces of indentation were expected, but got {got}")]
InconsistentIndentation { got: usize, expected: usize },
#[error("unterminated code block")]
UnterminatedCodeBlock,
}
#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
#[error("{range:?}: {kind}")]
pub struct ParseError {
pub kind: ParseErrorKind,
pub range: Range<usize>,
}
impl ParseErrorKind {
pub fn at(self, range: Range<usize>) -> ParseError {
ParseError { kind: self, range }
}
}

81
src/tree/ast.rs Normal file
View file

@ -0,0 +1,81 @@
use std::ops::Range;
use super::{
pull::{Attributes, BranchEvent, BranchKind, Parser},
ParseError, ParseErrorKind,
};
#[derive(Debug, Clone)]
pub struct Roots {
pub attributes: Option<Attributes>,
pub branches: Vec<Branch>,
}
impl Roots {
pub fn parse(parser: &mut Parser) -> Result<Self, ParseError> {
let attributes = parser.top_level_attributes()?;
let mut branches = vec![];
while let Some((branch, indent_level)) = Branch::parse_with_indent_level(parser)? {
if indent_level != 0 {
return Err(ParseErrorKind::RootIndentLevel.at(branch.kind_span));
}
branches.push(branch);
}
Ok(Self {
attributes,
branches,
})
}
}
#[derive(Debug, Clone)]
pub struct Branch {
pub indent_level: usize,
pub attributes: Option<Attributes>,
pub kind: BranchKind,
pub kind_span: Range<usize>,
pub content: Range<usize>,
pub children: Vec<Branch>,
}
impl From<BranchEvent> for Branch {
fn from(branch: BranchEvent) -> Self {
Self {
indent_level: branch.indent_level,
attributes: branch.attributes,
kind: branch.kind,
kind_span: branch.kind_span,
content: branch.content,
children: vec![],
}
}
}
impl Branch {
pub fn parse_with_indent_level(
parser: &mut Parser,
) -> Result<Option<(Self, usize)>, ParseError> {
if let Some(branch_event) = parser.next_branch()? {
let own_indent_level = branch_event.indent_level;
let mut branch = Branch::from(branch_event);
let children_indent_level = parser.peek_indent_level();
if children_indent_level > own_indent_level {
while parser.peek_indent_level() == children_indent_level {
if let Some(child) = Branch::parse(parser)? {
branch.children.push(child);
} else {
break;
}
}
}
Ok(Some((branch, own_indent_level)))
} else {
Ok(None)
}
}
pub fn parse(parser: &mut Parser) -> Result<Option<Self>, ParseError> {
Ok(Self::parse_with_indent_level(parser)?.map(|(branch, _)| branch))
}
}

202
src/tree/attributes.rs Normal file
View file

@ -0,0 +1,202 @@
use chrono::{DateTime, Utc};
use serde::{Deserialize, Serialize};
use ulid::Ulid;
use crate::{state::FileId, vfs::VPathBuf};
/// Top-level `%%` root attributes.
#[derive(Debug, Default, Clone, PartialEq, Eq, Deserialize, Serialize)]
pub struct RootAttributes {
/// Template to use for generating the page.
/// Defaults to `_tree.hbs`.
#[serde(default)]
pub template: Option<String>,
/// Title of the generated .html page.
///
/// The page's tree path is used if empty.
#[serde(default)]
pub title: String,
/// Page icon used in indexes.
/// This is an emoji name, such as `page` (default).
#[serde(default = "default_icon")]
pub icon: String,
/// Summary of the generated .html page.
#[serde(default)]
pub description: Option<String>,
/// ID of picture attached to the page, to be used as a thumbnail.
#[serde(default)]
pub thumbnail: Option<Picture>,
/// Additional scripts to load into to the page.
/// These are relative to the /static/js directory.
#[serde(default)]
pub scripts: Vec<String>,
/// Additional styles to load into to the page.
/// These are relative to the /static/css directory.
#[serde(default)]
pub styles: Vec<String>,
/// Visibility of a page in the parent page's index.
#[serde(default)]
pub visibility: Visibility,
/// The page's timestamps. These are automatically populated if a page has at least one branch
/// with an ID that includes a timestamp.
#[serde(default)]
pub timestamps: Option<Timestamps>,
/// When specified, this page will have a corresponding Atom feed under `rss/{feed}.xml`.
///
/// In feeds, top-level branches are expected to have a single heading containing the post title.
/// Their children are turned into the post description
#[serde(default)]
pub feed: Option<String>,
}
/// A picture reference.
#[derive(Debug, Default, Clone, PartialEq, Eq, Deserialize, Serialize)]
pub struct Picture {
/// ID of the picture.
pub id: String,
/// Optional alt text.
#[serde(default)]
pub alt: Option<String>,
}
/// Visibility of a page.
#[derive(Debug, Default, Clone, PartialEq, Eq, Deserialize, Serialize)]
pub enum Visibility {
#[default]
Public,
/// Hidden from the parent page's index.
Private,
}
/// Timestamps for a page.
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
pub struct Timestamps {
/// When the page was created. By default, this is the timestamp of the least recent branch.
pub created: DateTime<Utc>,
/// When the page was last updated. By default, this is the timestamp of the most recent branch.
pub updated: DateTime<Utc>,
}
fn default_icon() -> String {
String::from("page")
}
/// Branch attributes.
#[derive(Debug, Default, Clone, PartialEq, Eq, Deserialize)]
pub struct Attributes {
/// Unique identifier of the branch.
///
/// Note that this must be unique to the _entire_ site, not just a single tree.
/// This is because trees may be embedded within each other using [`Content::Link`].
#[serde(default)]
pub id: String,
/// Redirect old and deleted listed in the list to this branch.
///
/// This can be used to keep links permanent even in case the structure of the treehouse changes.
#[serde(default)]
pub redirect_here: Vec<String>,
/// Controls how the block should be presented.
#[serde(default)]
pub content: Content,
/// Do not persist the branch in localStorage.
#[serde(default)]
pub do_not_persist: bool,
/// Strings of extra CSS class names to include in the generated HTML.
#[serde(default)]
pub classes: Classes,
/// Enable `mini_template` templating in this branch.
#[serde(default)]
pub template: bool,
/// Publishing stage; if `Draft`, the branch is invisible unless treehouse is compiled in
/// debug mode.
#[serde(default)]
pub stage: Stage,
/// List of extra spells to cast on the branch.
#[serde(default)]
pub cast: String,
/// In feeds, specifies the list of tags to attach to an entry.
/// This only has an effect on top-level branches.
#[serde(default)]
pub tags: Vec<String>,
}
impl Attributes {
/// Parses the timestamp out of the branch's ID.
/// Returns `None` if the ID does not contain a timestamp.
pub fn timestamp(&self) -> Option<DateTime<Utc>> {
Ulid::from_string(&self.id)
.ok()
.as_ref()
.map(Ulid::timestamp_ms)
.and_then(|ms| DateTime::from_timestamp_millis(ms as i64))
}
}
/// Controls for block content presentation.
#[derive(Debug, Default, Clone, PartialEq, Eq, Deserialize)]
#[serde(rename_all = "snake_case")]
pub enum Content {
/// Children are stored inline in the block. Nothing special happens.
#[default]
Inline,
/// Link to another tree.
///
/// When JavaScript is enabled, the tree's roots will be embedded inline into the branch and
/// loaded lazily.
///
/// Without JavaScript, the tree will be linked with an `<a>` element.
///
/// The string provided as an argument is relative to the `content` root and should not contain
/// any file extensions. For example, to link to `content/my-article.tree`,
/// use `content.link = "my-article"`.
///
/// Note that `Link` branches must not contain any children. If a `Link` branch does contain
/// children, an `attribute`-type error is raised.
Link(VPathBuf),
/// Valid link to another tree.
/// This replaces `Content::Link` during semantic analysis.
#[serde(skip)]
ResolvedLink(FileId),
}
#[derive(Debug, Default, Clone, PartialEq, Eq, Deserialize)]
pub struct Classes {
/// Classes to append to the branch itself (<li data-cast="b">).
#[serde(default)]
pub branch: String,
/// Classes to append to the branch's <ul> element containing its children.
#[serde(default)]
pub branch_children: String,
}
/// Publish stage of a branch.
///
/// Draft branches are not included in release builds of treehouse. In debug builds, they are also
/// marked with an extra "draft" before the content.
#[derive(Debug, Default, Clone, PartialEq, Eq, Deserialize)]
pub enum Stage {
#[default]
Public,
Draft,
}

1
src/tree/lib.rs Normal file
View file

@ -0,0 +1 @@

224
src/tree/mini_template.rs Normal file
View file

@ -0,0 +1,224 @@
//! Minimalistic templating engine that integrates with the .tree format and Markdown.
//!
//! Mostly to avoid pulling in Handlebars everywhere; mini_template, unlike Handlebars, also allows
//! for injecting *custom, stateful* context into the renderer, which is important for things like
//! the `pic` template to work.
use std::fmt::Write;
use std::ops::Range;
use crate::{
config::Config,
dirs::Dirs,
html::EscapeHtml,
state::Treehouse,
vfs::{self, Content, VPath},
};
struct Lexer<'a> {
input: &'a str,
position: usize,
// Despite this parser's intentional simplicity, a peekahead buffer needs to be used for
// performance because tokens are usually quite long and therefore reparsing them would be
// too expensive.
peek_buffer: Option<(Token, usize)>,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum TokenKind {
/// Verbatim text, may be inside of a template.
Text,
Open(EscapingMode), // {%
Close, // %}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum EscapingMode {
EscapeHtml,
NoEscaping,
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct Token {
kind: TokenKind,
range: Range<usize>,
}
impl<'a> Lexer<'a> {
fn new(input: &'a str) -> Self {
Self {
input,
position: 0,
peek_buffer: None,
}
}
fn current(&self) -> Option<char> {
self.input[self.position..].chars().next()
}
fn advance(&mut self) {
self.position += self.current().map(|c| c.len_utf8()).unwrap_or(0);
}
fn create_token(&self, start: usize, kind: TokenKind) -> Token {
Token {
kind,
range: start..self.position,
}
}
fn next_inner(&mut self) -> Option<Token> {
if let Some((token, after_token)) = self.peek_buffer.take() {
self.position = after_token;
return Some(token);
}
let start = self.position;
match self.current() {
Some('{') => {
self.advance();
if self.current() == Some('%') {
self.advance();
if self.current() == Some('!') {
Some(self.create_token(start, TokenKind::Open(EscapingMode::NoEscaping)))
} else {
Some(self.create_token(start, TokenKind::Open(EscapingMode::EscapeHtml)))
}
} else {
self.advance();
Some(self.create_token(start, TokenKind::Text))
}
}
Some('%') => {
self.advance();
if self.current() == Some('}') {
self.advance();
Some(self.create_token(start, TokenKind::Close))
} else {
self.advance();
Some(self.create_token(start, TokenKind::Text))
}
}
Some(_) => {
while !matches!(self.current(), Some('{' | '%') | None) {
self.advance();
}
Some(self.create_token(start, TokenKind::Text))
}
None => None,
}
}
fn peek_inner(&mut self) -> Option<Token> {
let position = self.position;
let token = self.next();
let after_token = self.position;
self.position = position;
if let Some(token) = token.clone() {
self.peek_buffer = Some((token, after_token));
}
token
}
fn next(&mut self) -> Option<Token> {
self.next_inner().map(|mut token| {
// Coalesce multiple Text tokens into one.
if token.kind == TokenKind::Text {
while let Some(Token {
kind: TokenKind::Text,
..
}) = self.peek_inner()
{
let next_token = self.next_inner().unwrap();
token.range.end = next_token.range.end;
}
}
token
})
}
}
struct Renderer<'a> {
lexer: Lexer<'a>,
output: String,
}
struct InvalidTemplate;
impl Renderer<'_> {
fn emit_token_verbatim(&mut self, token: &Token) {
self.output.push_str(&self.lexer.input[token.range.clone()]);
}
fn render(&mut self, config: &Config, treehouse: &Treehouse, dirs: &Dirs) {
let kind_of = |token: &Token| token.kind;
while let Some(token) = self.lexer.next() {
match token.kind {
TokenKind::Open(escaping) => {
let inside = self.lexer.next();
let close = self.lexer.next();
if let Some((TokenKind::Text, TokenKind::Close)) = inside
.as_ref()
.map(kind_of)
.zip(close.as_ref().map(kind_of))
{
match Self::render_template(
config,
treehouse,
dirs,
self.lexer.input[inside.as_ref().unwrap().range.clone()].trim(),
) {
Ok(s) => match escaping {
EscapingMode::EscapeHtml => {
_ = write!(self.output, "{}", EscapeHtml(&s));
}
EscapingMode::NoEscaping => self.output.push_str(&s),
},
Err(InvalidTemplate) => {
inside.inspect(|token| self.emit_token_verbatim(token));
close.inspect(|token| self.emit_token_verbatim(token));
}
}
} else {
inside.inspect(|token| self.emit_token_verbatim(token));
close.inspect(|token| self.emit_token_verbatim(token));
}
}
_ => self.emit_token_verbatim(&token),
}
}
}
fn render_template(
config: &Config,
_treehouse: &Treehouse,
dirs: &Dirs,
template: &str,
) -> Result<String, InvalidTemplate> {
let (function, arguments) = template.split_once(' ').unwrap_or((template, ""));
match function {
"pic" => Ok(config.pic_url(&*dirs.pic, arguments)),
"include_static" => VPath::try_new(arguments)
.ok()
.and_then(|vpath| vfs::query::<Content>(&dirs.static_, vpath))
.and_then(|c| c.string().ok())
.ok_or(InvalidTemplate),
_ => Err(InvalidTemplate),
}
}
}
pub fn render(config: &Config, treehouse: &Treehouse, dirs: &Dirs, input: &str) -> String {
let mut renderer = Renderer {
lexer: Lexer::new(input),
output: String::new(),
};
renderer.render(config, treehouse, dirs);
renderer.output
}

233
src/tree/pull.rs Normal file
View file

@ -0,0 +1,233 @@
use std::{convert::identity, ops::Range};
use super::{ParseError, ParseErrorKind};
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum BranchKind {
/// Expanded by default.
Expanded,
/// Folded by default.
Collapsed,
}
impl BranchKind {
pub fn char(&self) -> char {
match self {
BranchKind::Expanded => '-',
BranchKind::Collapsed => '+',
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct BranchEvent {
pub indent_level: usize,
pub kind: BranchKind,
pub kind_span: Range<usize>,
pub content: Range<usize>,
pub attributes: Option<Attributes>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Attributes {
pub percent: Range<usize>,
pub data: Range<usize>,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Parser<'a> {
pub input: &'a str,
pub position: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum AllowCodeBlocks {
No,
Yes,
}
impl Parser<'_> {
fn current(&self) -> Option<char> {
self.input[self.position..].chars().next()
}
fn current_starts_with(&self, s: &str) -> bool {
self.input[self.position..].starts_with(s)
}
fn advance(&mut self) {
self.position += self.current().map(|c| c.len_utf8()).unwrap_or(0);
}
fn eat_as_long_as(&mut self, c: char) -> usize {
let mut count = 0;
while self.current() == Some(c) {
count += 1;
self.advance();
}
count
}
fn eat_while(&mut self, cond: impl Fn(char) -> bool) {
while self.current().map(&cond).is_some_and(|x| x) {
self.advance();
}
}
fn eat_until_line_break(&mut self) {
loop {
match self.current() {
Some('\r') => {
self.advance();
if self.current() == Some('\n') {
self.advance();
break;
}
}
Some('\n') => {
self.advance();
break;
}
Some(_) => self.advance(),
None => break,
}
}
}
pub fn peek_indent_level(&mut self) -> usize {
let position = self.position;
let indent_level = self.eat_as_long_as(' ');
self.position = position;
indent_level
}
fn eat_indented_lines_until(
&mut self,
indent_level: usize,
cond: impl Fn(char) -> bool,
allow_code_blocks: AllowCodeBlocks,
) -> Result<(), ParseError> {
let mut code_block: Option<Range<usize>> = None;
loop {
if let Some(range) = &code_block {
self.eat_while(|c| c == ' ');
if self.current_starts_with("```") {
code_block = None;
self.position += 3;
self.eat_until_line_break();
continue;
}
self.eat_until_line_break();
if self.current().is_none() {
return Err(ParseErrorKind::UnterminatedCodeBlock.at(range.clone()));
}
} else {
self.eat_while(|c| c == ' ');
if allow_code_blocks == AllowCodeBlocks::Yes && self.current_starts_with("```") {
code_block = Some(self.position..self.position + 3);
self.position += 3;
continue;
}
self.eat_until_line_break();
let before_indentation = self.position;
let line_indent_level = self.eat_as_long_as(' ');
let after_indentation = self.position;
if self.current().map(&cond).is_some_and(identity) || self.current().is_none() {
self.position = before_indentation;
break;
} else if !matches!(self.current(), Some('\n') | Some('\r'))
&& line_indent_level < indent_level
{
return Err(ParseErrorKind::InconsistentIndentation {
got: line_indent_level,
expected: indent_level,
}
.at(before_indentation..after_indentation));
}
}
}
Ok(())
}
pub fn top_level_attributes(&mut self) -> Result<Option<Attributes>, ParseError> {
let start = self.position;
match self.current() {
Some('%') => {
let after_one_percent = self.position;
self.advance();
if self.current() == Some('%') {
self.advance();
let after_two_percent = self.position;
self.eat_indented_lines_until(
0,
|c| c == '-' || c == '+' || c == '%',
AllowCodeBlocks::No,
)?;
let end = self.position;
Ok(Some(Attributes {
percent: start..after_two_percent,
data: after_two_percent..end,
}))
} else {
self.position = after_one_percent;
Ok(None)
}
}
_ => Ok(None),
}
}
pub fn next_branch(&mut self) -> Result<Option<BranchEvent>, ParseError> {
if self.current().is_none() {
return Ok(None);
}
let indent_level = self.eat_as_long_as(' ');
let attributes = if self.current() == Some('%') {
let start = self.position;
self.advance();
let after_percent = self.position;
self.eat_indented_lines_until(
indent_level,
|c| c == '-' || c == '+',
AllowCodeBlocks::No,
)?;
self.eat_as_long_as(' ');
let end = self.position;
Some(Attributes {
percent: start..after_percent,
data: after_percent..end,
})
} else {
None
};
let kind_start = self.position;
let kind = match self.current() {
Some('-') => BranchKind::Expanded,
Some('+') => BranchKind::Collapsed,
_ => return Err(ParseErrorKind::BranchKindExpected.at(kind_start..kind_start + 1)),
};
self.advance();
let kind_end = self.position;
let content_start = self.position;
self.eat_indented_lines_until(
indent_level,
|c| c == '-' || c == '+' || c == '%',
AllowCodeBlocks::Yes,
)?;
let content_end = self.position;
Ok(Some(BranchEvent {
indent_level,
attributes,
kind,
kind_span: kind_start..kind_end,
content: content_start..content_end,
}))
}
}

315
src/vfs.rs Normal file
View file

@ -0,0 +1,315 @@
//! The treehouse virtual file system.
//!
//! Unlike traditional file systems, there is no separation between directories and files.
//! Instead, our file system is based on _entries_, which may have specific, optional, well-typed
//! metadata attached to them.
//! A directory is formed by returning a list of paths from [`dir`][Dir::dir], and a file is
//! formed by returning `Some` from [`content`][Dir::content].
//!
//! This makes using the file system simpler, as you do not have to differentiate between different
//! entry kinds. All paths act as if they _could_ return byte content, and all paths act as if they
//! _could_ have children.
//!
//! # Composability
//!
//! [`Dir`]s are composable. The [`Dir`] itself starts off with the root path ([`VPath::ROOT`]),
//! which may contain further [`dir`][Dir::dir] entries, or content by itself.
//! This makes it possible to nest a [`Dir`] under another [`Dir`].
//!
//! Additionally, there's also the inverse operation, [`Cd`] (named after the `cd`
//! _change directory_ shell command), which returns a [`Dir`] viewing a subpath within another
//! [`Dir`].
//!
//! # Building directories
//!
//! In-memory directories can be composed using the following primitives:
//!
//! - [`BufferedFile`] - root path content is the provided byte vector.
//! - [`MemDir`] - a [`Dir`] containing a single level of other [`Dir`]s inside.
//!
//! Additionally, for interfacing with the OS file system, [`PhysicalDir`] is available,
//! representing a directory stored on the disk.
//!
//! # Virtual paths
//!
//! Entries within directories are referenced using [`VPath`]s (**v**irtual **path**s).
//! A virtual path is composed out of any amount of `/`-separated components.
//!
//! There are no special directories like `.` and `..` (those are just normal entries, though using
//! them is discouraged). [`VPath`]s are always relative to the root of the [`Dir`] you're querying.
//!
//! A leading or trailing slash is not allowed, because they would have no meaning.
//!
//! [`VPath`] also has an owned version, [`VPathBuf`].
use std::{
any::TypeId,
fmt::{self, Debug},
ops::{ControlFlow, Deref},
string::FromUtf8Error,
sync::Arc,
};
mod anchored;
pub mod asynch;
mod cd;
mod content_cache;
mod content_version_cache;
mod edit;
mod file;
mod html_canonicalize;
mod image_size_cache;
mod mem_dir;
mod overlay;
mod path;
mod physical;
pub use anchored::*;
pub use cd::*;
pub use content_cache::*;
pub use content_version_cache::*;
pub use edit::*;
pub use file::*;
pub use html_canonicalize::*;
pub use image_size_cache::*;
pub use mem_dir::*;
pub use overlay::*;
pub use path::*;
pub use physical::*;
pub trait Dir: Debug {
fn query(&self, path: &VPath, query: &mut Query);
}
pub trait Fork {}
pub fn query<'a, T>(dir: &'a (impl Dir + ?Sized), path: &VPath) -> Option<T>
where
T: 'static + Fork,
{
let mut slot = TaggedOption::<'a, tags::Value<T>>(None);
dir.query(path, Query::new(&mut slot));
slot.0
}
#[repr(transparent)]
pub struct Query<'a> {
erased: dyn Erased<'a> + 'a,
}
impl<'a> Query<'a> {
fn new<'b>(erased: &'b mut (dyn Erased<'a> + 'a)) -> &'b mut Query<'a> {
unsafe { &mut *(erased as *mut dyn Erased<'a> as *mut Query<'a>) }
}
pub fn provide<T>(&mut self, f: impl FnOnce() -> T)
where
T: 'static + Fork,
{
if let Some(result @ TaggedOption(None)) = self.erased.downcast_mut::<tags::Value<T>>() {
result.0 = Some(f());
}
}
pub fn try_provide<T>(&mut self, f: impl FnOnce() -> Option<T>)
where
T: 'static + Fork,
{
if let Some(result @ TaggedOption(None)) = self.erased.downcast_mut::<tags::Value<T>>() {
result.0 = f();
}
}
}
mod tags {
use std::marker::PhantomData;
pub trait Type<'a>: Sized + 'static {
type Reified: 'a;
}
pub struct Value<T>(PhantomData<T>)
where
T: 'static;
impl<T> Type<'_> for Value<T>
where
T: 'static,
{
type Reified = T;
}
}
#[repr(transparent)]
struct TaggedOption<'a, I: tags::Type<'a>>(Option<I::Reified>);
#[expect(clippy::missing_safety_doc)]
unsafe trait Erased<'a>: 'a {
fn tag_id(&self) -> TypeId;
}
unsafe impl<'a, I: tags::Type<'a>> Erased<'a> for TaggedOption<'a, I> {
fn tag_id(&self) -> TypeId {
TypeId::of::<I>()
}
}
impl<'a> dyn Erased<'a> + 'a {
fn downcast_mut<I>(&mut self) -> Option<&mut TaggedOption<'a, I>>
where
I: tags::Type<'a>,
{
if self.tag_id() == TypeId::of::<I>() {
// SAFETY: Just checked whether we're pointing to an I.
Some(unsafe { &mut *(self as *mut Self).cast::<TaggedOption<'a, I>>() })
} else {
None
}
}
}
impl<T> Dir for &T
where
T: Dir,
{
fn query(&self, path: &VPath, query: &mut Query) {
(**self).query(path, query)
}
}
#[derive(Clone)]
pub struct DynDir {
arc: Arc<dyn Dir + Send + Sync>,
}
impl Dir for DynDir {
fn query(&self, path: &VPath, query: &mut Query) {
self.arc.query(path, query);
}
}
impl fmt::Debug for DynDir {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&*self.arc, f)
}
}
impl Deref for DynDir {
type Target = dyn Dir + Send + Sync;
fn deref(&self) -> &Self::Target {
&*self.arc
}
}
pub trait ToDynDir {
fn to_dyn(self) -> DynDir;
}
impl<T> ToDynDir for T
where
T: Dir + Send + Sync + 'static,
{
fn to_dyn(self) -> DynDir {
DynDir {
arc: Arc::new(self),
}
}
}
pub trait AnchoredAtExt {
fn anchored_at(self, at: VPathBuf) -> Anchored<Self>
where
Self: Sized;
}
impl<T> AnchoredAtExt for T
where
T: Dir,
{
fn anchored_at(self, at: VPathBuf) -> Anchored<Self> {
Anchored::new(self, at)
}
}
/// List of child entries under a directory.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct Entries(pub Vec<VPathBuf>);
/// Byte content in an entry.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct Content {
/// Media type string. <https://en.wikipedia.org/wiki/Media_type>
kind: String,
bytes: Vec<u8>,
}
/// Abstract version of an entry.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct ContentVersion {
pub string: String,
}
/// Path relative to `config.site` indicating where the file will be available once served.
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct Anchor {
pub path: VPathBuf,
}
/// Size of image entries.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct ImageSize {
pub width: u32,
pub height: u32,
}
impl Content {
pub fn new(kind: impl Into<String>, bytes: Vec<u8>) -> Self {
Self {
kind: kind.into(),
bytes,
}
}
pub fn kind(&self) -> &str {
&self.kind
}
pub fn bytes(self) -> Vec<u8> {
self.bytes
}
pub fn string(self) -> Result<String, FromUtf8Error> {
String::from_utf8(self.bytes())
}
}
impl Fork for Entries {}
impl Fork for Content {}
impl Fork for ContentVersion {}
impl Fork for Anchor {}
impl Fork for ImageSize {}
impl Fork for EditPath {}
pub fn entries(dir: &dyn Dir, path: &VPath) -> Vec<VPathBuf> {
query::<Entries>(dir, path).map(|e| e.0).unwrap_or_default()
}
pub fn walk_dir_rec(dir: &dyn Dir, path: &VPath, f: &mut dyn FnMut(&VPath) -> ControlFlow<(), ()>) {
for entry in entries(dir, path) {
match f(&entry) {
ControlFlow::Continue(_) => (),
ControlFlow::Break(_) => return,
}
walk_dir_rec(dir, &entry, f);
}
}
pub fn url(site: &str, dir: &dyn Dir, path: &VPath) -> Option<String> {
let anchor = query::<Anchor>(dir, path)?;
if let Some(version) = query::<ContentVersion>(dir, path) {
Some(format!("{}/{}?v={}", site, anchor.path, version.string))
} else {
Some(format!("{}/{}", site, anchor.path))
}
}

36
src/vfs/anchored.rs Normal file
View file

@ -0,0 +1,36 @@
use std::fmt;
use super::{Anchor, Dir, Query, VPath, VPathBuf};
pub struct Anchored<T> {
inner: T,
at: VPathBuf,
}
impl<T> Anchored<T> {
pub fn new(inner: T, at: VPathBuf) -> Self {
Self { inner, at }
}
}
impl<T> Dir for Anchored<T>
where
T: Dir,
{
fn query(&self, path: &VPath, query: &mut Query) {
query.provide(|| Anchor {
path: self.at.join(path),
});
self.inner.query(path, query);
}
}
impl<T> fmt::Debug for Anchored<T>
where
T: fmt::Debug,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Anchored({:?}, {})", self.inner, self.at)
}
}

27
src/vfs/asynch.rs Normal file
View file

@ -0,0 +1,27 @@
use super::{query, Content, DynDir, VPath};
#[derive(Debug, Clone)]
pub struct AsyncDir {
inner: DynDir,
}
impl AsyncDir {
pub fn new(inner: DynDir) -> Self {
Self { inner }
}
pub fn sync(&self) -> &DynDir {
&self.inner
}
pub async fn content(&self, path: &VPath) -> Option<Content> {
let this = self.clone();
let path = path.to_owned();
// NOTE: Performance impact of spawning a blocking task may be a bit high in case
// we add caching.
// Measure throughput here.
tokio::task::spawn_blocking(move || query::<Content>(&this.inner, &path))
.await
.unwrap()
}
}

54
src/vfs/cd.rs Normal file
View file

@ -0,0 +1,54 @@
use std::fmt;
use super::{entries, Dir, Entries, Query, VPath, VPathBuf};
pub struct Cd<T> {
parent: T,
path: VPathBuf,
}
impl<T> Cd<T> {
pub fn new(parent: T, path: VPathBuf) -> Self {
Self { parent, path }
}
}
impl<T> Cd<T>
where
T: Dir,
{
fn dir(&self, path: &VPath) -> Vec<VPathBuf> {
entries(&self.parent, &self.path.join(path))
.into_iter()
.map(|entry| {
entry
.strip_prefix(&self.path)
.expect("all entries must be anchored within `self.path`")
.to_owned()
})
.collect()
}
}
impl<T> Dir for Cd<T>
where
T: Dir,
{
fn query(&self, path: &VPath, query: &mut Query) {
// The only query that meaningfully needs to return something else is `dir`, which must
// be modified to strip prefixes off of the parent's returned paths.
query.provide(|| Entries(self.dir(path)));
// Other queries can run unmodified, only passing them the right path.
self.parent.query(&self.path.join(path), query);
}
}
impl<T> fmt::Debug for Cd<T>
where
T: fmt::Debug,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:?}/{:?}", self.parent, self.path)
}
}

78
src/vfs/content_cache.rs Normal file
View file

@ -0,0 +1,78 @@
use std::{
fmt::{self, Debug},
ops::ControlFlow,
};
use dashmap::DashMap;
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
use tracing::{info_span, instrument};
use super::{query, walk_dir_rec, Content, Dir, Query, VPath, VPathBuf};
pub struct ContentCache<T> {
inner: T,
cache: DashMap<VPathBuf, Content>,
}
impl<T> ContentCache<T> {
pub fn new(inner: T) -> Self {
Self {
inner,
cache: DashMap::new(),
}
}
}
impl<T> ContentCache<T>
where
T: Dir + Send + Sync,
{
#[instrument(name = "ContentCache::warm_up", skip(self))]
pub fn warm_up(&self) {
let mut paths = vec![];
walk_dir_rec(&self.inner, VPath::ROOT, &mut |path| {
paths.push(path.to_owned());
ControlFlow::Continue(())
});
paths.par_iter().for_each(|path| _ = self.content(path));
}
}
impl<T> ContentCache<T>
where
T: Dir,
{
#[instrument(name = "ContentCache::content", skip(self))]
fn content(&self, path: &VPath) -> Option<Content> {
self.cache.get(path).map(|x| x.clone()).or_else(|| {
let _span = info_span!("cache_miss").entered();
let content = query::<Content>(&self.inner, path);
if let Some(content) = &content {
self.cache.insert(path.to_owned(), content.clone());
}
content
})
}
}
impl<T> Dir for ContentCache<T>
where
T: Dir,
{
fn query(&self, path: &VPath, query: &mut Query) {
query.try_provide(|| self.content(path));
self.inner.query(path, query);
}
}
impl<T> fmt::Debug for ContentCache<T>
where
T: Debug,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "ContentCache({:?})", self.inner)
}
}

View file

@ -0,0 +1,68 @@
use std::fmt::{self, Debug};
use dashmap::DashMap;
use tracing::{info_span, instrument};
use super::{query, Content, ContentVersion, Dir, Query, VPath, VPathBuf};
pub struct Blake3ContentVersionCache<T> {
salt: Vec<u8>,
inner: T,
cache: DashMap<VPathBuf, ContentVersion>,
}
impl<T> Blake3ContentVersionCache<T> {
pub fn new(salt: Vec<u8>, inner: T) -> Self {
Self {
salt,
inner,
cache: DashMap::new(),
}
}
}
impl<T> Blake3ContentVersionCache<T>
where
T: Dir,
{
#[instrument(name = "Blake3ContentVersionCache::content_version", skip(self))]
fn content_version(&self, path: &VPath) -> Option<ContentVersion> {
self.cache.get(path).map(|x| x.clone()).or_else(|| {
let _span = info_span!("cache_miss").entered();
let version = query::<Content>(&self.inner, path).map(|content| {
let mut hasher = blake3::Hasher::new();
hasher.update(&self.salt);
hasher.update(&content.bytes());
let hash = hasher.finalize().to_hex();
ContentVersion {
string: format!("b3-{}", &hash[0..8]),
}
});
if let Some(version) = &version {
self.cache.insert(path.to_owned(), version.clone());
}
version
})
}
}
impl<T> Dir for Blake3ContentVersionCache<T>
where
T: Dir,
{
fn query(&self, path: &VPath, query: &mut Query) {
query.try_provide(|| self.content_version(path));
self.inner.query(path, query);
}
}
impl<T> fmt::Debug for Blake3ContentVersionCache<T>
where
T: Debug,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Blake3ContentVersionCache({:?})", self.inner)
}
}

101
src/vfs/edit.rs Normal file
View file

@ -0,0 +1,101 @@
use std::{error::Error, fmt, future::Future, path::PathBuf};
use tokio::task::JoinSet;
use tracing::{debug, error, info};
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord)]
pub struct EditPath {
pub(super) path: PathBuf,
}
/// Represents a pending edit operation that can be written to persistent storage later.
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum Edit {
/// An edit that doesn't do anything.
NoOp,
/// Write the given content to a file.
Write(EditPath, Vec<u8>),
/// Execute a sequence of edits in order.
Seq(Vec<Edit>),
/// Execute the provided edits in parallel.
All(Vec<Edit>),
/// Makes an edit dry.
///
/// A dry edit only logs what operations would be performed, does not perform the I/O.
Dry(Box<Edit>),
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct ApplyFailed;
impl Edit {
#[expect(clippy::manual_async_fn)]
pub fn apply(self) -> impl Future<Output = Result<(), ApplyFailed>> + Send {
async {
match self {
Edit::NoOp => debug!("no op edit"),
Edit::Write(edit_path, content) => {
tokio::fs::write(&edit_path.path, &content)
.await
.inspect_err(|err| error!("write to {edit_path:?} failed: {err:?}"))
.map_err(|_| ApplyFailed)?;
debug!("wrote {} bytes to {edit_path:?}", content.len())
}
Edit::Seq(vec) => {
debug!("begin sequence of {} edits", vec.len());
for edit in vec {
Box::pin(edit.apply()).await?;
}
debug!("end sequence");
}
Edit::All(vec) => {
debug!("begin parallel {} edits", vec.len());
let mut set = JoinSet::new();
for edit in vec {
set.spawn(edit.apply());
}
while let Some(result) = set.join_next().await {
result.map_err(|_| ApplyFailed)??;
}
debug!("end parallel");
}
Edit::Dry(edit) => edit.dry(),
}
Ok(())
}
}
pub fn dry(&self) {
match self {
Edit::NoOp => (),
Edit::Write(edit_path, content) => {
info!("{edit_path:?}: would write {:?} bytes", content.len());
}
Edit::Seq(edits) => edits.iter().for_each(Self::dry),
Edit::All(edits) => edits.iter().for_each(Self::dry),
Edit::Dry(edit) => edit.dry(),
}
}
}
impl fmt::Display for ApplyFailed {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("failed to apply some edits")
}
}
impl Error for ApplyFailed {}
impl fmt::Debug for EditPath {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&self.path, f)
}
}

27
src/vfs/file.rs Normal file
View file

@ -0,0 +1,27 @@
use std::fmt;
use super::{Content, Dir, Query, VPath};
pub struct BufferedFile {
pub content: Content,
}
impl BufferedFile {
pub fn new(content: Content) -> Self {
Self { content }
}
}
impl Dir for BufferedFile {
fn query(&self, path: &VPath, query: &mut Query) {
if path == VPath::ROOT {
query.provide(|| self.content.clone());
}
}
}
impl fmt::Debug for BufferedFile {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "BufferedFile")
}
}

View file

@ -0,0 +1,36 @@
use core::fmt;
use super::{Dir, Query, VPath};
pub struct HtmlCanonicalize<T> {
inner: T,
}
impl<T> HtmlCanonicalize<T> {
pub fn new(inner: T) -> Self {
Self { inner }
}
}
impl<T> Dir for HtmlCanonicalize<T>
where
T: Dir,
{
fn query(&self, path: &VPath, query: &mut Query) {
let mut path = path.to_owned();
if path.extension() == Some("html") {
path.set_extension("");
}
self.inner.query(&path, query);
}
}
impl<T> fmt::Debug for HtmlCanonicalize<T>
where
T: fmt::Debug,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "HtmlCanonicalize({:?})", self.inner)
}
}

141
src/vfs/image_size_cache.rs Normal file
View file

@ -0,0 +1,141 @@
use std::{fmt, io::Cursor};
use anyhow::Context;
use dashmap::DashMap;
use tracing::{info_span, instrument, warn};
use crate::config;
use super::{query, Content, Dir, ImageSize, Query, VPath, VPathBuf};
pub struct ImageSizeCache<T> {
inner: T,
cache: DashMap<VPathBuf, ImageSize>,
}
impl<T> ImageSizeCache<T> {
pub fn new(inner: T) -> Self {
Self {
inner,
cache: DashMap::new(),
}
}
}
impl<T> ImageSizeCache<T>
where
T: Dir,
{
fn compute_image_size(&self, path: &VPath) -> anyhow::Result<Option<ImageSize>> {
if path.extension().is_some_and(config::is_image_file) {
if let Some(content) = query::<Content>(&self.inner, path) {
if path.extension() == Some("svg") {
return Ok(svg_size(&content.string()?));
} else {
let _span = info_span!("raster_image_size").entered();
let reader = image::ImageReader::new(Cursor::new(content.bytes()))
.with_guessed_format()
.context("cannot guess image format")?;
let (width, height) = reader.into_dimensions()?;
return Ok(Some(ImageSize { width, height }));
}
}
}
Ok(None)
}
#[instrument("ImageSizeCache::image_size", skip(self))]
fn image_size(&self, path: &VPath) -> Option<ImageSize> {
self.cache.get(path).map(|x| *x).or_else(|| {
let _span = info_span!("cache_miss").entered();
let image_size = self
.compute_image_size(path)
.inspect_err(|err| warn!(%path, ?err, "compute_image_size failure"))
.ok()
.flatten();
if let Some(image_size) = image_size {
self.cache.insert(path.to_owned(), image_size);
}
image_size
})
}
}
impl<T> Dir for ImageSizeCache<T>
where
T: Dir,
{
fn query(&self, path: &VPath, query: &mut Query) {
query.try_provide(|| self.image_size(path));
self.inner.query(path, query);
}
}
impl<T> fmt::Debug for ImageSizeCache<T>
where
T: fmt::Debug,
{
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "ImageSizeCache({:?})", self.inner)
}
}
/// Quickly determine the size of an SVG without parsing it into a DOM.
///
/// This method is a tentative check; the point is to return an image size that's _good enough
/// default_ rather than what the size is going to be in the user's web browser.
#[instrument(skip(svg))]
fn svg_size(svg: &str) -> Option<ImageSize> {
let mut tokenizer = xmlparser::Tokenizer::from(svg);
fn parse_view_box(s: &str) -> Option<[u32; 4]> {
let mut iter = s.split_whitespace();
let min_x = iter.next()?.parse().ok()?;
let min_y = iter.next()?.parse().ok()?;
let width = iter.next()?.parse().ok()?;
let height = iter.next()?.parse().ok()?;
Some([min_x, min_y, width, height])
}
let mut in_svg = false;
let mut width: Option<u32> = None;
let mut height: Option<u32> = None;
let mut view_box: Option<[u32; 4]> = None;
while let Some(Ok(token)) = tokenizer.next() {
if let xmlparser::Token::ElementStart { local, .. } = &token {
if local == "svg" {
in_svg = true;
continue;
}
}
if in_svg {
// If another element starts, we're no longer in the root <svg>.
if let xmlparser::Token::ElementStart { .. } = &token {
break;
}
if let xmlparser::Token::Attribute { local, value, .. } = &token {
match local.as_str() {
"width" => width = value.parse().ok(),
"height" => height = value.parse().ok(),
"viewBox" => {
view_box = parse_view_box(value);
}
_ => (),
}
continue;
}
}
}
match (width, height, view_box) {
(Some(width), Some(height), _) | (_, _, Some([_, _, width, height])) => {
Some(ImageSize { width, height })
}
_ => None,
}
}

102
src/vfs/mem_dir.rs Normal file
View file

@ -0,0 +1,102 @@
use std::{collections::HashMap, fmt};
use super::{entries, Dir, DynDir, Entries, Query, VPath, VPathBuf};
pub struct MemDir {
mount_points: HashMap<String, DynDir>,
}
enum Resolved<'fs, 'path> {
Root,
MountPoint {
fs: &'fs dyn Dir,
fs_path: &'path VPath,
subpath: &'path VPath,
},
None,
}
impl MemDir {
pub fn new() -> Self {
Self {
mount_points: HashMap::new(),
}
}
pub fn add(&mut self, path: &VPath, dir: DynDir) {
assert_eq!(
path.depth(), 0,
"path must be situated at root. MountPoints does not support nested paths, but you can nest MountPoints within other MountPoints"
);
assert!(
self.mount_points
.insert(path.as_str().to_owned(), dir)
.is_none(),
"duplicate mount point at {path:?}"
);
}
fn resolve<'fs, 'path>(&'fs self, path: &'path VPath) -> Resolved<'fs, 'path> {
if path == VPath::ROOT {
return Resolved::Root;
} else {
let mount_point_name = path.as_str().split(VPath::SEPARATOR).next().unwrap();
if let Some(mount_point) = self.mount_points.get(mount_point_name) {
return Resolved::MountPoint {
fs: &**mount_point,
fs_path: VPath::new(mount_point_name),
subpath: path
.strip_prefix(VPath::new(mount_point_name))
.expect("path should have `mount_point_name` as its prefix"),
};
}
}
Resolved::None
}
fn dir(&self, path: &VPath) -> Vec<VPathBuf> {
match self.resolve(path) {
Resolved::Root => self.mount_points.keys().map(VPathBuf::new).collect(),
Resolved::MountPoint {
fs,
fs_path,
subpath,
} => entries(fs, subpath)
.into_iter()
.map(|path| fs_path.join(&path))
.collect(),
Resolved::None => vec![],
}
}
}
impl Default for MemDir {
fn default() -> Self {
Self::new()
}
}
impl Dir for MemDir {
fn query(&self, path: &VPath, query: &mut Query) {
query.provide(|| Entries(self.dir(path)));
match self.resolve(path) {
Resolved::Root | Resolved::None => (),
Resolved::MountPoint {
fs,
fs_path: _,
subpath,
} => fs.query(subpath, query),
}
}
}
impl fmt::Debug for MemDir {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("MountPoints")
}
}

40
src/vfs/overlay.rs Normal file
View file

@ -0,0 +1,40 @@
use std::fmt;
use tracing::instrument;
use super::{entries, Dir, DynDir, Entries, Query, VPath, VPathBuf};
pub struct Overlay {
base: DynDir,
overlay: DynDir,
}
impl Overlay {
pub fn new(base: DynDir, overlay: DynDir) -> Self {
Self { base, overlay }
}
#[instrument("Overlay::dir", skip(self))]
fn dir(&self, path: &VPath) -> Vec<VPathBuf> {
let mut dir = entries(&self.base, path);
dir.append(&mut entries(&self.overlay, path));
dir.sort();
dir.dedup();
dir
}
}
impl Dir for Overlay {
fn query(&self, path: &VPath, query: &mut Query) {
query.provide(|| Entries(self.dir(path)));
self.overlay.query(path, query);
self.base.query(path, query);
}
}
impl fmt::Debug for Overlay {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Overlay({:?}, {:?})", self.base, self.overlay)
}
}

316
src/vfs/path.rs Normal file
View file

@ -0,0 +1,316 @@
use std::{borrow::Borrow, error::Error, fmt, ops::Deref, str::FromStr};
use serde::{Deserialize, Serialize};
#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct VPath {
path: str,
}
impl VPath {
pub const SEPARATOR_BYTE: u8 = b'/';
pub const SEPARATOR: char = Self::SEPARATOR_BYTE as char;
pub const ROOT: &Self = unsafe { Self::new_unchecked("") };
pub const fn try_new(s: &str) -> Result<&Self, InvalidPathError> {
if s.is_empty() {
return Ok(Self::ROOT);
}
let b = s.as_bytes();
if b[b.len() - 1] == Self::SEPARATOR_BYTE {
return Err(InvalidPathError::TrailingSlash);
}
if b[0] == Self::SEPARATOR_BYTE {
return Err(InvalidPathError::LeadingSlash);
}
Ok(unsafe { Self::new_unchecked(s) })
}
pub fn new(s: &str) -> &Self {
Self::try_new(s).expect("invalid path")
}
/// `const` version of [`new`][Self::new]. This has worse error messages, so prefer `new` whenever possible.
pub const fn new_const(s: &str) -> &Self {
match Self::try_new(s) {
Ok(p) => p,
Err(_) => panic!("invalid path"),
}
}
const unsafe fn new_unchecked(s: &str) -> &Self {
std::mem::transmute::<_, &Self>(s)
}
pub fn is_empty(&self) -> bool {
self.path.is_empty()
}
pub fn is_root(&self) -> bool {
self.is_empty()
}
pub fn join(&self, sub: &VPath) -> VPathBuf {
let mut buf = self.to_owned();
buf.push(sub);
buf
}
pub fn parent(&self) -> Option<&VPath> {
if self.is_root() {
None
} else if self.depth() == 0 {
Some(VPath::ROOT)
} else {
let (left, _right) = self
.path
.split_once(Self::SEPARATOR)
.expect("path with depth > 0 must have separators");
// SAFETY: We're splitting on a `/`, so there cannot be a trailing `/` in `left`.
Some(unsafe { VPath::new_unchecked(left) })
}
}
pub fn strip_prefix(&self, prefix: &VPath) -> Option<&Self> {
if self == prefix {
Some(VPath::ROOT)
} else {
self.path
.strip_prefix(&prefix.path)
.and_then(|p| p.strip_prefix(Self::SEPARATOR))
// SAFETY: If `self` starts with `prefix`, `p` will end up not being prefixed by `self`
// nor a leading slash.
.map(|p| unsafe { VPath::new_unchecked(p) })
}
}
pub fn depth(&self) -> usize {
self.path.chars().filter(|&c| c == Self::SEPARATOR).count()
}
pub fn segments(&self) -> impl Iterator<Item = &Self> {
if self.is_root() {
None.into_iter().flatten()
} else {
Some(self.as_str().split(Self::SEPARATOR).map(|s| unsafe {
// SAFETY: Since we're splitting on the separator, the path cannot start or end with it.
Self::new_unchecked(s)
}))
.into_iter()
.flatten()
}
}
pub fn rsegments(&self) -> impl Iterator<Item = &Self> {
self.as_str().rsplit(Self::SEPARATOR).map(|s| unsafe {
// SAFETY: Since we're splitting on the separator, the path cannot start or end with it.
Self::new_unchecked(s)
})
}
pub fn file_name(&self) -> Option<&str> {
self.rsegments().next().map(Self::as_str)
}
pub fn extension(&self) -> Option<&str> {
let file_name = self.file_name()?;
let (left, right) = file_name.rsplit_once('.')?;
if left.is_empty() {
None
} else {
Some(right)
}
}
pub fn with_extension(&self, extension: &str) -> VPathBuf {
let mut buf = self.to_owned();
buf.set_extension(extension);
buf
}
pub fn file_stem(&self) -> Option<&str> {
let file_name = self.file_name()?;
if let Some(extension) = self.extension() {
Some(&file_name[..file_name.len() - extension.len() - 1])
} else {
Some(file_name)
}
}
pub fn as_str(&self) -> &str {
&self.path
}
}
impl ToOwned for VPath {
type Owned = VPathBuf;
fn to_owned(&self) -> Self::Owned {
VPathBuf::from(self)
}
}
impl fmt::Debug for VPath {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.path)
}
}
impl fmt::Display for VPath {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.path)
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum InvalidPathError {
TrailingSlash,
LeadingSlash,
}
impl fmt::Display for InvalidPathError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
InvalidPathError::TrailingSlash => {
f.write_str("paths must not end with a trailing `/`")
}
InvalidPathError::LeadingSlash => {
f.write_str("paths are always absolute and must not start with `/`")
}
}
}
}
impl Error for InvalidPathError {}
#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct VPathBuf {
path: String,
}
impl VPathBuf {
pub fn new(path: impl Into<String>) -> Self {
Self::try_new(path).expect("invalid path")
}
pub fn try_new(path: impl Into<String>) -> Result<Self, InvalidPathError> {
let path = path.into();
match VPath::try_new(&path) {
Ok(_) => Ok(Self { path }),
Err(e) => Err(e),
}
}
unsafe fn new_unchecked(path: String) -> Self {
Self { path }
}
pub fn push(&mut self, sub: &VPath) {
if !sub.is_empty() {
if !self.is_empty() {
self.path.push('/');
}
self.path.push_str(&sub.path);
}
}
pub fn set_extension(&mut self, new_extension: &str) {
if let Some(existing) = self.extension() {
let mut chop_len = existing.len();
if new_extension.is_empty() {
chop_len += 1; // also chop off the `.`
}
let range = self.path.len() - chop_len..;
self.path.replace_range(range, new_extension);
} else if !new_extension.is_empty() {
self.path.push('.');
self.path.push_str(new_extension);
}
}
}
impl Default for VPathBuf {
fn default() -> Self {
VPath::ROOT.to_owned()
}
}
impl Deref for VPathBuf {
type Target = VPath;
fn deref(&self) -> &Self::Target {
unsafe { VPath::new_unchecked(&self.path) }
}
}
impl fmt::Debug for VPathBuf {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.path)
}
}
impl fmt::Display for VPathBuf {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(&self.path)
}
}
impl From<&VPath> for VPathBuf {
fn from(value: &VPath) -> Self {
unsafe { Self::new_unchecked(value.path.to_owned()) }
}
}
impl Borrow<VPath> for VPathBuf {
fn borrow(&self) -> &VPath {
self
}
}
impl<'de> Deserialize<'de> for VPathBuf {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::de;
struct Visitor;
impl de::Visitor<'_> for Visitor {
type Value = VPathBuf;
fn expecting(&self, formatter: &mut fmt::Formatter) -> fmt::Result {
formatter.write_str("virtual path")
}
fn visit_str<E>(self, v: &str) -> Result<Self::Value, E>
where
E: de::Error,
{
VPathBuf::try_new(v).map_err(de::Error::custom)
}
}
deserializer.deserialize_str(Visitor)
}
}
impl Serialize for VPathBuf {
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where
S: serde::Serializer,
{
serializer.serialize_str(self.as_str())
}
}
impl FromStr for VPathBuf {
type Err = InvalidPathError;
fn from_str(s: &str) -> Result<Self, Self::Err> {
Self::try_new(s)
}
}

108
src/vfs/physical.rs Normal file
View file

@ -0,0 +1,108 @@
use std::path::{Path, PathBuf};
use tracing::{error, instrument};
use super::{Content, Dir, EditPath, Entries, Query, VPath, VPathBuf};
#[derive(Debug, Clone)]
pub struct PhysicalDir {
root: PathBuf,
}
impl PhysicalDir {
pub fn new(root: PathBuf) -> Self {
Self { root }
}
fn entries(&self, vpath: &VPath) -> Vec<VPathBuf> {
let physical = self.root.join(physical_path(vpath));
if !physical.is_dir() {
return vec![];
}
match std::fs::read_dir(physical) {
Ok(read_dir) => read_dir
.filter_map(|entry| {
entry
.inspect_err(|err| {
error!(
"{self:?} error while reading entries: {err:?}",
)
})
.ok()
.and_then(|entry| {
let path = entry.path();
let path_str = match path.strip_prefix(&self.root).unwrap_or(&path).to_str() {
Some(p) => p,
None => {
error!("{self:?} entry {path:?} has invalid UTF-8 (while reading vpath {vpath:?})");
return None;
},
};
let vpath_buf = VPathBuf::try_new(path_str.replace('\\', "/"))
.inspect_err(|err| {
error!("{self:?} error with vpath for {path_str:?}: {err:?}");
})
.ok()?;
Some(vpath_buf)
})
})
.collect(),
Err(err) => {
error!(
"{self:?} cannot read vpath {vpath:?}: {err:?}",
);
vec![]
}
}
}
#[instrument("PhysicalDir::content", skip(self))]
fn content(&self, path: &VPath) -> Option<Content> {
let physical_path = self.root.join(physical_path(path));
std::fs::read(&physical_path)
.inspect_err(|err| error!("{self:?} cannot read file at vpath {path:?} / physical {physical_path:?}: {err:?}",))
.ok()
.map(|bytes| {
Content::new(
path.extension()
.and_then(guess_content_type)
.unwrap_or("text/plain"),
bytes,
)
})
}
fn edit_path(&self, path: &VPath) -> EditPath {
EditPath {
path: self.root.join(physical_path(path)),
}
}
}
impl Dir for PhysicalDir {
fn query(&self, path: &VPath, query: &mut Query) {
query.provide(|| Entries(self.entries(path)));
query.try_provide(|| self.content(path));
query.provide(|| self.edit_path(path));
}
}
fn physical_path(path: &VPath) -> &Path {
Path::new(path.as_str())
}
fn guess_content_type(extension: &str) -> Option<&'static str> {
match extension {
"html" => Some("text/html"),
"js" => Some("text/javascript"),
"css" => Some("text/css"),
"woff" => Some("font/woff2"),
"svg" => Some("image/svg+xml"),
"atom" => Some("application/atom+xml"),
"png" => Some("image/png"),
"webp" => Some("image/webp"),
"jpg" | "jpeg" => Some("image/jpeg"),
_ => None,
}
}