remove treehouse-format crate and collapse everything into src

This commit is contained in:
りき萌 2025-07-10 16:50:41 +02:00
parent ca127a9411
commit b792688776
66 changed files with 145 additions and 112 deletions

47
src/html/breadcrumbs.rs Normal file
View file

@ -0,0 +1,47 @@
use std::{borrow::Cow, fmt::Write};
use tracing::instrument;
use crate::{config::Config, state::FileId, vfs::VPath};
use super::{navmap::NavigationMap, EscapeAttribute};
#[instrument(skip(config, navigation_map))]
pub fn breadcrumbs_to_html(
config: &Config,
navigation_map: &NavigationMap,
file_id: FileId,
) -> String {
let mut s = String::new();
if let Some(path) = navigation_map.paths.get(&file_id) {
for (i, element) in path.iter().enumerate() {
// Skip the index because it's implied by the logo on the left.
if &**element != VPath::new_const("index") {
s.push_str("<li class=\"breadcrumb\">");
{
let short_element = path
.get(i - 1)
.map(|p| format!("{p}/"))
.and_then(|prefix| {
element
.as_str()
.strip_prefix(prefix.as_str())
.map(Cow::Borrowed)
})
.unwrap_or_else(|| Cow::Owned(format!("/{element}")));
write!(
s,
"<a href=\"{site}/{element}\">{short_element}</a>",
site = EscapeAttribute(&config.site),
element = EscapeAttribute(element.as_str())
)
.unwrap();
}
s.push_str("</li>");
}
}
}
s
}

692
src/html/djot.rs Normal file
View file

@ -0,0 +1,692 @@
//! Djot -> HTML renderer adapted from the one in jotdown.
//! Made concrete to avoid generic hell, with added treehouse-specific features.
use std::fmt::Write;
use std::ops::Range;
use codespan_reporting::diagnostic::Diagnostic;
use codespan_reporting::diagnostic::Label;
use codespan_reporting::diagnostic::LabelStyle;
use codespan_reporting::diagnostic::Severity;
use jotdown::Alignment;
use jotdown::Container;
use jotdown::Event;
use jotdown::LinkType;
use jotdown::ListKind;
use jotdown::OrderedListNumbering::*;
use jotdown::SpanLinkType;
use crate::config::Config;
use crate::dirs::Dirs;
use crate::state::FileId;
use crate::state::Treehouse;
use crate::vfs;
use crate::vfs::ImageSize;
use super::highlight::highlight;
/// [`Render`] implementor that writes HTML output.
pub struct Renderer<'a> {
pub config: &'a Config,
pub dirs: &'a Dirs,
pub treehouse: &'a Treehouse,
pub file_id: FileId,
pub page_id: String,
}
impl Renderer<'_> {
#[must_use]
pub fn render(
self,
events: &[(Event, Range<usize>)],
out: &mut String,
) -> Vec<Diagnostic<FileId>> {
let mut writer = Writer {
renderer: self,
raw: Raw::None,
code_block: None,
img_alt_text: 0,
list_tightness: vec![],
not_first_line: false,
ignore_next_event: false,
diagnostics: vec![],
};
for (event, range) in events {
writer
.render_event(event, range.clone(), out)
.expect("formatting event into string should not fail");
}
writer.diagnostics
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
enum Raw {
#[default]
None,
Html,
Other,
}
#[derive(Debug, Clone, PartialEq, Eq)]
enum CodeBlockKind {
PlainText,
SyntaxHighlight,
LiterateProgram {
program_name: String,
placeholder_pic_id: Option<String>,
},
}
#[derive(Debug, Clone, PartialEq, Eq)]
struct CodeBlock<'a> {
kind: CodeBlockKind,
language: &'a str,
}
struct Writer<'a> {
renderer: Renderer<'a>,
raw: Raw,
code_block: Option<CodeBlock<'a>>,
img_alt_text: usize,
list_tightness: Vec<bool>,
not_first_line: bool,
ignore_next_event: bool,
diagnostics: Vec<Diagnostic<FileId>>,
}
impl<'a> Writer<'a> {
fn render_event(
&mut self,
e: &Event<'a>,
range: Range<usize>,
out: &mut String,
) -> std::fmt::Result {
if let Event::Start(Container::Footnote { label: _ }, ..) = e {
self.diagnostics.push(Diagnostic {
severity: Severity::Error,
code: Some("djot".into()),
message: "Djot footnotes are not supported".into(),
labels: vec![Label {
style: LabelStyle::Primary,
file_id: self.renderer.file_id,
range: range.clone(),
message: "".into(),
}],
notes: vec![],
})
}
if matches!(&e, Event::Start(Container::LinkDefinition { .. }, ..)) {
self.ignore_next_event = true;
return Ok(());
}
if matches!(&e, Event::End(Container::LinkDefinition { .. })) {
self.ignore_next_event = false;
return Ok(());
}
// Completely omit section events. The treehouse's structure contains linkable ids in
// branches instead.
if matches!(
&e,
Event::Start(Container::Section { .. }, _) | Event::End(Container::Section { .. })
) {
return Ok(());
}
if self.ignore_next_event {
return Ok(());
}
match e {
Event::Start(c, attrs) => {
if c.is_block() && self.not_first_line {
out.push('\n');
}
if self.img_alt_text > 0 && !matches!(c, Container::Image(..)) {
return Ok(());
}
match &c {
Container::Blockquote => out.push_str("<blockquote"),
Container::List { kind, tight } => {
self.list_tightness.push(*tight);
match kind {
ListKind::Unordered | ListKind::Task => out.push_str("<ul"),
ListKind::Ordered {
numbering, start, ..
} => {
out.push_str("<ol");
if *start > 1 {
write!(out, r#" start="{}""#, start)?;
}
if let Some(ty) = match numbering {
Decimal => None,
AlphaLower => Some('a'),
AlphaUpper => Some('A'),
RomanLower => Some('i'),
RomanUpper => Some('I'),
} {
write!(out, r#" type="{}""#, ty)?;
}
}
}
}
Container::ListItem | Container::TaskListItem { .. } => {
out.push_str("<li");
}
Container::DescriptionList => out.push_str("<dl"),
Container::DescriptionDetails => out.push_str("<dd"),
Container::Footnote { .. } => unreachable!(),
Container::Table => out.push_str("<table"),
Container::TableRow { .. } => out.push_str("<tr"),
Container::Section { .. } => {}
Container::Div { .. } => out.push_str("<div"),
Container::Paragraph => {
if matches!(self.list_tightness.last(), Some(true)) {
return Ok(());
}
out.push_str("<p");
}
Container::Heading { level, .. } => write!(out, "<h{}", level)?,
Container::TableCell { head: false, .. } => out.push_str("<td"),
Container::TableCell { head: true, .. } => out.push_str("<th"),
Container::Caption => out.push_str("<caption"),
Container::DescriptionTerm => out.push_str("<dt"),
Container::CodeBlock { language } => {
if let Some(program) = attrs.get(":program") {
self.code_block = Some(CodeBlock {
kind: CodeBlockKind::LiterateProgram {
program_name: program.parts().collect(),
placeholder_pic_id: attrs
.get(":placeholder")
.map(|value| value.parts().collect()),
},
language,
});
out.push_str("<th-literate-program");
} else {
self.code_block = Some(CodeBlock {
kind: match self.renderer.config.syntaxes.contains_key(*language) {
true => CodeBlockKind::SyntaxHighlight,
false => CodeBlockKind::PlainText,
},
language,
});
out.push_str("<pre");
}
}
Container::Span | Container::Math { .. } => out.push_str("<span"),
Container::Link(dst, ty) => {
if matches!(ty, LinkType::Span(SpanLinkType::Unresolved)) {
out.push_str("<a");
if let Some(resolved) = resolve_link(
self.renderer.config,
self.renderer.treehouse,
self.renderer.dirs,
dst,
) {
out.push_str(r#" href=""#);
write_attr(&resolved, out);
out.push('"');
}
} else {
out.push_str(r#"<a href=""#);
if matches!(ty, LinkType::Email) {
out.push_str("mailto:");
}
write_attr(dst, out);
out.push('"');
}
}
Container::Image(..) => {
self.img_alt_text += 1;
if self.img_alt_text == 1 {
out.push_str(r#"<img class="pic" loading="lazy""#);
} else {
return Ok(());
}
}
Container::Verbatim => out.push_str("<code"),
Container::RawBlock { format } | Container::RawInline { format } => {
self.raw = if format == &"html" {
Raw::Html
} else {
Raw::Other
};
return Ok(());
}
Container::Subscript => out.push_str("<sub"),
Container::Superscript => out.push_str("<sup"),
Container::Insert => out.push_str("<ins"),
Container::Delete => out.push_str("<del"),
Container::Strong => out.push_str("<strong"),
Container::Emphasis => out.push_str("<em"),
Container::Mark => out.push_str("<mark"),
Container::LinkDefinition { .. } => return Ok(()),
}
for (key, value) in attrs
.into_iter()
.filter(|(a, _)| !(*a == "class" || a.starts_with(':')))
{
write!(out, r#" {}=""#, key)?;
value.parts().for_each(|part| write_attr(part, out));
out.push('"');
}
if attrs.into_iter().any(|(a, _)| a == "class")
|| matches!(
c,
Container::Div { class } if !class.is_empty())
|| matches!(c, |Container::Math { .. }| Container::List {
kind: ListKind::Task,
..
} | Container::TaskListItem { .. })
{
out.push_str(r#" class=""#);
let mut first_written = false;
if let Some(cls) = match c {
Container::List {
kind: ListKind::Task,
..
} => Some("task-list"),
Container::TaskListItem { checked: false } => Some("unchecked"),
Container::TaskListItem { checked: true } => Some("checked"),
Container::Math { display: false } => Some("math inline"),
Container::Math { display: true } => Some("math display"),
_ => None,
} {
first_written = true;
out.push_str(cls);
}
for class in attrs
.into_iter()
.filter(|(a, _)| a == &"class")
.map(|(_, cls)| cls)
{
if first_written {
out.push(' ');
}
first_written = true;
class.parts().for_each(|part| write_attr(part, out));
}
// div class goes after classes from attrs
if let Container::Div { class } = c {
if !class.is_empty() {
if first_written {
out.push(' ');
}
out.push_str(class);
}
}
out.push('"');
}
match c {
Container::TableCell { alignment, .. }
if !matches!(alignment, Alignment::Unspecified) =>
{
let a = match alignment {
Alignment::Unspecified => unreachable!(),
Alignment::Left => "left",
Alignment::Center => "center",
Alignment::Right => "right",
};
write!(out, r#" style="text-align: {};">"#, a)?;
}
Container::CodeBlock { language } => {
if language.is_empty() {
out.push_str("><code>");
} else {
let code_block = self.code_block.as_ref().unwrap();
if let CodeBlockKind::LiterateProgram { program_name, .. } =
&code_block.kind
{
out.push_str(r#" data-program=""#);
write_attr(&self.renderer.page_id, out);
out.push(':');
write_attr(program_name, out);
out.push('"');
out.push_str(r#" data-language=""#);
write_attr(language, out);
out.push('"');
if *language == "output" {
out.push_str(r#" data-mode="output""#);
} else {
out.push_str(r#" data-mode="input""#);
}
}
out.push('>');
if let CodeBlockKind::LiterateProgram {
placeholder_pic_id: Some(placeholder_pic_id),
..
} = &code_block.kind
{
out.push_str(
r#"<img class="placeholder-image" loading="lazy" src=""#,
);
let pic_url = self
.renderer
.config
.pic_url(&*self.renderer.dirs.pic, placeholder_pic_id);
write_attr(&pic_url, out);
out.push('"');
if let Some(image_size) = self
.renderer
.config
.pic_size(&*self.renderer.dirs.pic, placeholder_pic_id)
{
write!(
out,
r#" width="{}" height="{}""#,
image_size.width, image_size.height
)?;
}
out.push('>');
}
if let (CodeBlockKind::LiterateProgram { .. }, "output") =
(&code_block.kind, *language)
{
out.push_str(r#"<pre class="placeholder-console">"#);
} else {
out.push_str(r#"<code class="language-"#);
write_attr(language, out);
if self.renderer.config.syntaxes.contains_key(*language) {
out.push_str(" th-syntax-highlighting");
}
out.push_str(r#"">"#);
}
}
}
Container::Image(..) => {
if self.img_alt_text == 1 {
out.push_str(r#" alt=""#);
}
}
Container::Math { display } => {
out.push_str(if *display { r#">\["# } else { r#">\("# });
}
_ => out.push('>'),
}
}
Event::End(c) => {
if c.is_block_container() {
out.push('\n');
}
if self.img_alt_text > 0 && !matches!(c, Container::Image(..)) {
return Ok(());
}
match c {
Container::Blockquote => out.push_str("</blockquote>"),
Container::List { kind, .. } => {
self.list_tightness.pop();
match kind {
ListKind::Unordered | ListKind::Task => out.push_str("</ul>"),
ListKind::Ordered { .. } => out.push_str("</ol>"),
}
}
Container::ListItem | Container::TaskListItem { .. } => {
out.push_str("</li>");
}
Container::DescriptionList => out.push_str("</dl>"),
Container::DescriptionDetails => out.push_str("</dd>"),
Container::Footnote { .. } => unreachable!(),
Container::Table => out.push_str("</table>"),
Container::TableRow { .. } => out.push_str("</tr>"),
Container::Section { .. } => {}
Container::Div { .. } => out.push_str("</div>"),
Container::Paragraph => {
if matches!(self.list_tightness.last(), Some(true)) {
return Ok(());
}
out.push_str("</p>");
}
Container::Heading { level, .. } => write!(out, "</h{}>", level)?,
Container::TableCell { head: false, .. } => out.push_str("</td>"),
Container::TableCell { head: true, .. } => out.push_str("</th>"),
Container::Caption => out.push_str("</caption>"),
Container::DescriptionTerm => out.push_str("</dt>"),
Container::CodeBlock { language } => {
let code_block = self.code_block.take().unwrap();
out.push_str(match &code_block.kind {
CodeBlockKind::PlainText | CodeBlockKind::SyntaxHighlight => {
"</code></pre>"
}
CodeBlockKind::LiterateProgram { .. } if *language == "output" => {
"</pre></th-literate-program>"
}
CodeBlockKind::LiterateProgram { .. } => {
"</code></th-literate-program>"
}
});
}
Container::Span => out.push_str("</span>"),
Container::Link(..) => out.push_str("</a>"),
Container::Image(src, link_type) => {
if self.img_alt_text == 1 {
if !src.is_empty() {
out.push_str(r#"" src=""#);
if let SpanLinkType::Unresolved = link_type {
// TODO: Image size.
if let Some(resolved) = resolve_link(
self.renderer.config,
self.renderer.treehouse,
self.renderer.dirs,
src,
) {
write_attr(&resolved, out);
} else {
write_attr(src, out);
}
} else {
write_attr(src, out);
}
}
out.push_str(r#"">"#);
}
self.img_alt_text -= 1;
}
Container::Verbatim => out.push_str("</code>"),
Container::Math { display } => {
out.push_str(if *display {
r#"\]</span>"#
} else {
r#"\)</span>"#
});
}
Container::RawBlock { .. } | Container::RawInline { .. } => {
self.raw = Raw::None;
}
Container::Subscript => out.push_str("</sub>"),
Container::Superscript => out.push_str("</sup>"),
Container::Insert => out.push_str("</ins>"),
Container::Delete => out.push_str("</del>"),
Container::Strong => out.push_str("</strong>"),
Container::Emphasis => out.push_str("</em>"),
Container::Mark => out.push_str("</mark>"),
Container::LinkDefinition { .. } => unreachable!(),
}
}
Event::Str(s) => match self.raw {
Raw::None if self.img_alt_text > 0 => write_attr(s, out),
Raw::None => {
let syntax = self.code_block.as_ref().and_then(|code_block| {
self.renderer.config.syntaxes.get(code_block.language)
});
if let Some(syntax) = syntax {
highlight(out, syntax, s);
} else {
write_text(s, out);
}
}
Raw::Html => out.push_str(s),
Raw::Other => {}
},
Event::FootnoteReference(_label) => {
self.diagnostics.push(Diagnostic {
severity: Severity::Error,
code: Some("djot".into()),
message: "Djot footnotes are unsupported".into(),
labels: vec![Label {
style: LabelStyle::Primary,
file_id: self.renderer.file_id,
range,
message: "".into(),
}],
notes: vec![],
});
}
Event::Symbol(sym) => {
if let Some(vpath) = self.renderer.config.emoji.get(sym.as_ref()) {
let branch_id = self
.renderer
.treehouse
.branches_by_named_id
.get(&format!("emoji/{sym}"))
.copied();
if let Some(branch) =
branch_id.map(|id| self.renderer.treehouse.tree.branch(id))
{
out.push_str(r#"<a href=""#);
write_attr(&self.renderer.config.site, out);
out.push_str("/b?");
write_attr(&branch.attributes.id, out);
out.push_str(r#"">"#)
}
let url = vfs::url(
&self.renderer.config.site,
&*self.renderer.dirs.emoji,
vpath,
)
.expect("emoji directory is not anchored anywhere");
// TODO: this could do with better alt text
write!(
out,
r#"<img data-cast="emoji" title=":{sym}:" alt="{sym}" src=""#
)?;
write_attr(&url, out);
out.push('"');
if let Some(image_size) =
vfs::query::<ImageSize>(&self.renderer.dirs.emoji, vpath)
{
write!(
out,
r#" width="{}" height="{}""#,
image_size.width, image_size.height
)?;
}
out.push('>');
if branch_id.is_some() {
out.push_str("</a>");
}
} else {
write!(
out,
r#"<span class="th-emoji-unknown" title="this emoji does not exist… yet!">:{sym}:</span>"#,
)?
}
}
Event::LeftSingleQuote => out.push(''),
Event::RightSingleQuote => out.push(''),
Event::LeftDoubleQuote => out.push('“'),
Event::RightDoubleQuote => out.push('”'),
Event::Ellipsis => out.push('…'),
Event::EnDash => out.push(''),
Event::EmDash => out.push('—'),
Event::NonBreakingSpace => out.push_str("&nbsp;"),
Event::Hardbreak => out.push_str("<br>\n"),
Event::Softbreak => out.push('\n'),
Event::Escape | Event::Blankline => {}
Event::ThematicBreak(attrs) => {
if self.not_first_line {
out.push('\n');
}
out.push_str("<hr");
for (a, v) in attrs {
write!(out, r#" {}=""#, a)?;
v.parts().for_each(|part| write_attr(part, out));
out.push('"');
}
out.push('>');
}
}
self.not_first_line = true;
Ok(())
}
}
fn write_text(s: &str, out: &mut String) {
write_escape(s, false, out)
}
fn write_attr(s: &str, out: &mut String) {
write_escape(s, true, out)
}
fn write_escape(mut s: &str, escape_quotes: bool, out: &mut String) {
let mut ent = "";
while let Some(i) = s.find(|c| {
match c {
'<' => Some("&lt;"),
'>' => Some("&gt;"),
'&' => Some("&amp;"),
'"' if escape_quotes => Some("&quot;"),
_ => None,
}
.map_or(false, |s| {
ent = s;
true
})
}) {
out.push_str(&s[..i]);
out.push_str(ent);
s = &s[i + 1..];
}
out.push_str(s);
}
pub fn resolve_link(
config: &Config,
treehouse: &Treehouse,
dirs: &Dirs,
link: &str,
) -> Option<String> {
link.split_once(':').and_then(|(kind, linked)| match kind {
"def" => config.defs.get(linked).cloned(),
"branch" => treehouse
.branches_by_named_id
.get(linked)
.map(|&branch_id| {
format!(
"{}/b?{}",
config.site,
treehouse.tree.branch(branch_id).attributes.id
)
}),
"page" => Some(config.page_url(linked)),
"pic" => Some(config.pic_url(&*dirs.pic, linked)),
_ => None,
})
}

94
src/html/highlight.rs Normal file
View file

@ -0,0 +1,94 @@
//! Tokenizer and syntax highlighter inspired by the one found in rxi's lite.
//! I highly recommend checking it out!
//! https://github.com/rxi/lite/blob/master/data/core/tokenizer.lua
//! There's also a mirror of it in the JavaScript, used to power dynamically editable code blocks.
//!
//! Both of these syntax highlighters use the same JSON syntax definitions; however this one is
//! more limited, in that patterns do not support backtracking.
//! This is effectively enforced in the dynamic highlighter because this highlighter reports any
//! regex syntax errors upon site compilation.
pub mod compiled;
pub mod tokenize;
use std::{collections::HashMap, fmt::Write};
use serde::{Deserialize, Serialize};
use self::compiled::CompiledSyntax;
use super::EscapeHtml;
/// Syntax definition.
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Syntax {
/// Patterns, matched sequentially (patterns at the beginning of the list take precedence.)
pub patterns: Vec<Pattern>,
/// Map of replacements to use if a pattern matches a string exactly.
pub keywords: HashMap<String, Keyword>,
}
/// A pattern in a syntax definition.
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Pattern {
/// Regular expression to match.
pub regex: String,
/// Flags to pass to the regex engine to alter how strings are matched.
#[serde(default)]
pub flags: Vec<RegexFlag>,
/// Type to assign to the token. This can be any string, but only a select few have colors
/// assigned.
pub is: TokenTypes,
}
/// Assignable token types.
#[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(untagged)]
pub enum TokenTypes {
/// Assign a single token type to the entire match.
FullMatch(String),
/// Assign individual token types to each capture.
Captures(CaptureTokenTypes),
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct CaptureTokenTypes {
/// Token type to use outside captures.
pub default: String,
/// Token type to use inside captures.
pub captures: Vec<String>,
}
/// Flag passed to the regex engine.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
#[serde(rename_all = "camelCase")]
pub enum RegexFlag {
/// Make `.` match line separators.
DotMatchesNewline,
}
/// Keyword replacement.
#[derive(Debug, Clone, Deserialize, Serialize)]
#[serde(rename_all = "camelCase")]
pub struct Keyword {
/// What to replace the token type with.
pub into: String,
/// Only replace the token type if it matches this one. If this is not present, any token type
/// is replaced.
pub only_replaces: Option<String>,
}
pub fn highlight(out: &mut String, syntax: &CompiledSyntax, code: &str) {
let tokens = syntax.tokenize(code);
for token in tokens {
out.push_str("<span class=\"");
_ = write!(out, "{}", EscapeHtml(&syntax.token_names[token.id]));
out.push_str("\">");
_ = write!(out, "{}", EscapeHtml(&code[token.range]));
out.push_str("</span>");
}
}

View file

@ -0,0 +1,119 @@
use std::collections::HashMap;
use regex::{Regex, RegexBuilder};
use tracing::{error, instrument};
use super::{RegexFlag, Syntax, TokenTypes};
/// During compilation, token names are converted to numeric IDs for performance.
pub type TokenId = usize;
pub const TOKEN_ID_DEFAULT: TokenId = 0;
#[derive(Debug, Clone)]
pub struct CompiledSyntax {
/// Lookup table which maps numeric IDs to token names.
pub token_names: Vec<String>,
pub patterns: Vec<CompiledPattern>,
pub keywords: HashMap<String, CompiledKeyword>,
}
#[derive(Debug, Clone)]
pub enum CompiledTokenTypes {
FullMatch(TokenId),
Captures(CompiledCaptureTokenTypes),
}
#[derive(Debug, Clone)]
pub struct CompiledCaptureTokenTypes {
pub default: TokenId,
pub captures: Vec<TokenId>,
}
#[derive(Debug, Clone)]
pub struct CompiledPattern {
pub regex: Regex,
pub is: CompiledTokenTypes,
}
#[derive(Debug, Clone)]
pub struct CompiledKeyword {
pub into: TokenId,
pub only_replaces: Option<TokenId>,
}
#[instrument(skip(syntax))]
pub fn compile_syntax(syntax: &Syntax) -> CompiledSyntax {
let mut token_names = vec!["default".into()];
let mut get_token_id = |name: &str| -> TokenId {
if let Some(id) = token_names.iter().position(|n| n == name) {
id
} else {
let id = token_names.len();
token_names.push(name.to_owned());
id
}
};
let patterns = syntax
.patterns
.iter()
.filter_map(|pattern| {
// NOTE: `regex` has no support for sticky flags, so we need to anchor the match to the
// start ourselves.
let regex = RegexBuilder::new(&format!(
"^{}",
// If there's an existing `^`, it should not cause compilation errors for the user.
pattern.regex.strip_prefix('^').unwrap_or(&pattern.regex)
))
.dot_matches_new_line(pattern.flags.contains(&RegexFlag::DotMatchesNewline))
.build()
.map_err(|e| {
// NOTE: This could probably use better diagnostics, but it's pretty much
// impossible to get a source span out of serde's output (because it forgoes
// source information, rightfully so.) Therefore we have to settle on
// a poor man's error log.
error!("regex compilation error in pattern {pattern:?}: {e}");
})
.ok()?;
Some(CompiledPattern {
regex,
is: match &pattern.is {
TokenTypes::FullMatch(name) => {
CompiledTokenTypes::FullMatch(get_token_id(name))
}
TokenTypes::Captures(types) => {
CompiledTokenTypes::Captures(CompiledCaptureTokenTypes {
default: get_token_id(&types.default),
captures: types
.captures
.iter()
.map(|name| get_token_id(name))
.collect(),
})
}
},
})
})
.collect();
let keywords = syntax
.keywords
.iter()
.map(|(text, keyword)| {
(
text.clone(),
CompiledKeyword {
into: get_token_id(&keyword.into),
only_replaces: keyword.only_replaces.as_deref().map(&mut get_token_id),
},
)
})
.collect();
CompiledSyntax {
token_names,
patterns,
keywords,
}
}

View file

@ -0,0 +1,97 @@
use std::ops::Range;
use super::compiled::{CompiledSyntax, CompiledTokenTypes, TokenId, TOKEN_ID_DEFAULT};
pub struct Token {
pub id: TokenId,
pub range: Range<usize>,
}
impl CompiledSyntax {
pub fn tokenize(&self, text: &str) -> Vec<Token> {
let mut tokens = vec![];
let mut i = 0;
while i < text.len() {
let mut had_match = false;
for pattern in &self.patterns {
match &pattern.is {
CompiledTokenTypes::FullMatch(id) => {
if let Some(regex_match) = pattern.regex.find(&text[i..]) {
push_token(&mut tokens, *id, i..i + regex_match.range().end);
i += regex_match.range().end;
had_match = true;
break;
}
}
CompiledTokenTypes::Captures(types) => {
if let Some(captures) = pattern.regex.captures(&text[i..]) {
let whole_match = captures.get(0).unwrap();
let mut last_match_end = 0;
for (index, capture) in captures
.iter()
.skip(1)
.enumerate()
.filter_map(|(i, m)| m.map(|m| (i, m)))
{
let id = types
.captures
.get(index)
.copied()
.unwrap_or(TOKEN_ID_DEFAULT);
push_token(
&mut tokens,
types.default,
i + last_match_end..i + capture.range().start,
);
push_token(
&mut tokens,
id,
i + capture.range().start..i + capture.range().end,
);
last_match_end = capture.range().end;
}
push_token(
&mut tokens,
types.default,
i + last_match_end..i + whole_match.range().end,
);
i += whole_match.range().end;
had_match = true;
break;
}
}
}
}
if !had_match {
push_token(&mut tokens, TOKEN_ID_DEFAULT, i..i + 1);
i += 1;
}
}
for token in &mut tokens {
if let Some(keyword) = self.keywords.get(&text[token.range.clone()]) {
if keyword.only_replaces.is_none() || Some(token.id) == keyword.only_replaces {
token.id = keyword.into;
}
}
}
tokens
}
}
fn push_token(tokens: &mut Vec<Token>, id: TokenId, range: Range<usize>) {
if range.is_empty() {
return;
}
if let Some(previous_token) = tokens.last_mut() {
if previous_token.id == id {
previous_token.range.end = range.end;
return;
}
}
tokens.push(Token { id, range });
}

84
src/html/navmap.rs Normal file
View file

@ -0,0 +1,84 @@
use std::collections::HashMap;
use tracing::instrument;
use crate::{
state::{FileId, Treehouse},
tree::{attributes::Content, SemaBranchId},
vfs::VPathBuf,
};
#[derive(Debug, Clone, Default)]
struct NavigationMapBuilder {
stack: Vec<VPathBuf>,
navigation_map: NavigationMap,
}
impl NavigationMapBuilder {
fn enter_tree(&mut self, file_id: FileId, tree_path: VPathBuf) {
self.stack.push(tree_path.clone());
self.navigation_map
.paths
.insert(file_id, self.stack.clone());
}
fn exit_tree(&mut self) {
self.stack.pop();
}
fn finish(self) -> NavigationMap {
self.navigation_map
}
}
#[derive(Debug, Clone, Default)]
pub struct NavigationMap {
/// Tells you which pages need to be opened to get to the key.
pub paths: HashMap<FileId, Vec<VPathBuf>>,
}
impl NavigationMap {
#[instrument(name = "NavigationMap::build", skip(treehouse))]
pub fn build(treehouse: &Treehouse, root_file_id: FileId) -> Self {
let mut builder = NavigationMapBuilder::default();
fn rec_branch(
treehouse: &Treehouse,
builder: &mut NavigationMapBuilder,
branch_id: SemaBranchId,
) {
let branch = treehouse.tree.branch(branch_id);
if let Content::ResolvedLink(linked) = &branch.attributes.content {
rec_tree(treehouse, builder, *linked);
} else {
for &child_id in &branch.children {
rec_branch(treehouse, builder, child_id);
}
}
}
fn rec_tree(treehouse: &Treehouse, builder: &mut NavigationMapBuilder, file_id: FileId) {
if let Some(roots) = treehouse.roots.get(&file_id) {
// Pages can link to each other causing infinite recursion, so we need to handle that
// case by skipping pages that already have been analyzed.
if !builder.navigation_map.paths.contains_key(&file_id) {
builder.enter_tree(
file_id,
treehouse
.tree_path(file_id)
.expect("tree files may only link to other tree files")
.to_owned(),
);
for &branch_id in &roots.branches {
rec_branch(treehouse, builder, branch_id);
}
builder.exit_tree();
}
}
}
rec_tree(treehouse, &mut builder, root_file_id);
builder.finish()
}
}

467
src/html/tree.rs Normal file
View file

@ -0,0 +1,467 @@
use std::fmt::Write;
use chrono::{DateTime, Utc};
use crate::{
config::Config,
dirs::Dirs,
html::EscapeAttribute,
sources::Sources,
state::{FileId, Treehouse},
tree::{
attributes::{Content, Stage, Visibility},
mini_template,
pull::BranchKind,
SemaBranchId,
},
vfs::{self, VPath, VPathBuf},
};
use super::{djot, EscapeHtml};
pub struct Renderer<'a> {
pub sources: &'a Sources,
pub dirs: &'a Dirs,
pub file_id: FileId,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum HasChildren {
No,
Yes,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum LinkButton {
Tree,
Branch,
}
struct OpenBranch {
has_children: HasChildren,
}
impl Renderer<'_> {
fn treehouse(&self) -> &Treehouse {
&self.sources.treehouse
}
fn config(&self) -> &Config {
&self.sources.config
}
fn open_branch(&self, s: &mut String, id: &str) {
write!(s, "<li id=\"{}\"", EscapeAttribute(id)).unwrap();
}
fn attr(&self, s: &mut String, key: &'static str, value: &str) {
write!(s, r#" {key}="{}""#, EscapeAttribute(value)).unwrap()
}
fn attr_class_begin(&self, s: &mut String, has_children: HasChildren) {
write!(
s,
r#" class="{}"#,
EscapeAttribute(match has_children {
HasChildren::Yes => "branch",
HasChildren::No => "leaf",
})
)
.unwrap();
}
fn attr_class_push(&self, s: &mut String, class: &str) {
write!(s, " {}", EscapeAttribute(class)).unwrap();
}
fn attr_class_end(&self, s: &mut String) {
s.push('"');
}
fn attr_cast_begin(&self, s: &mut String) {
s.push_str(r#" data-cast=""#);
}
fn attr_cast_push(&self, s: &mut String, spell: &str) {
if s.as_bytes().last() != Some(&b'"') {
s.push(' ');
}
write!(s, "{}", EscapeAttribute(spell)).unwrap();
}
fn attr_cast_end(&self, s: &mut String) {
s.push('"');
}
fn attr_link(&self, s: &mut String, linked: &VPath) {
self.attr(s, "th-link", linked.as_str());
}
fn attr_ts(&self, s: &mut String, timestamp: DateTime<Utc>) {
self.attr(s, "th-ts", &timestamp.timestamp_millis().to_string())
}
fn attr_do_not_persist(&self, s: &mut String) {
s.push_str(" th-do-not-persist");
}
fn end_attrs(&self, s: &mut String) {
s.push('>');
}
fn begin_container(
&self,
s: &mut String,
has_children: HasChildren,
branch_kind: BranchKind,
) -> OpenBranch {
match has_children {
HasChildren::Yes => {
s.push_str(match branch_kind {
BranchKind::Expanded => "<details open>",
BranchKind::Collapsed => "<details>",
});
s.push_str("<summary class=\"branch-container\">");
}
HasChildren::No => {
s.push_str("<div class=\"branch-container\">");
}
}
OpenBranch { has_children }
}
fn begin_children(&self, s: &mut String, open: &OpenBranch) -> HasChildren {
if open.has_children == HasChildren::Yes {
s.push_str("</summary>");
}
open.has_children
}
fn close_branch(&self, s: &mut String, open: OpenBranch) {
match open.has_children {
HasChildren::Yes => {
s.push_str("</details>");
}
HasChildren::No => {
s.push_str("</div>");
}
}
s.push_str("</li>");
}
fn bullet_point(&self, s: &mut String) {
s.push_str("<th-bp></th-bp>");
}
fn branch_content(&self, s: &mut String, markup: &str, linked: Option<&VPath>) {
s.push_str("<th-bc>");
let events: Vec<_> = jotdown::Parser::new(markup).into_offset_iter().collect();
// TODO: Report rendering diagnostics.
let render_diagnostics = djot::Renderer {
page_id: self
.treehouse()
.tree_path(self.file_id)
.expect(".tree file expected")
.to_string(),
config: self.config(),
dirs: self.dirs,
treehouse: self.treehouse(),
file_id: self.file_id,
}
.render(&events, s);
if let Some(linked) = linked {
write!(
s,
"<noscript><a class=\"navigate icon-go\" href=\"{}/{}\">Go to linked tree: <code>{}</code></a></noscript>",
EscapeAttribute(&self.config().site),
EscapeAttribute(linked.as_str()),
EscapeHtml(linked.as_str()),
)
.unwrap();
}
s.push_str("</th-bc>");
}
fn button_bar(
&self,
s: &mut String,
date_time: Option<DateTime<Utc>>,
link_button: LinkButton,
link: &str,
) {
s.push_str("<th-bb>");
{
if let Some(date_time) = date_time {
write!(s, "<th-bd>{}</th-bd>", date_time.format("%F")).unwrap();
}
match link_button {
LinkButton::Tree => {
write!(
s,
"<a class=\"icon icon-go\" href=\"{}\" title=\"linked tree\"></a>",
EscapeAttribute(link)
)
.unwrap();
}
LinkButton::Branch => {
write!(
s,
"<a th-p class=\"icon icon-permalink\" href=\"{}\" title=\"permalink\"></a>",
EscapeAttribute(link)
)
.unwrap();
}
}
}
s.push_str("</th-bb>");
}
fn branch_children_empty(&self, s: &mut String) {
s.push_str("<ul></ul>");
}
fn branch_children(&self, s: &mut String, branch_id: SemaBranchId) {
let branch = self.treehouse().tree.branch(branch_id);
s.push_str("<ul");
if !branch.attributes.classes.branch_children.is_empty() {
write!(
s,
" class=\"{}\"",
EscapeAttribute(&branch.attributes.classes.branch_children)
)
.unwrap();
}
s.push('>');
let num_children = branch.children.len();
for i in 0..num_children {
let child_id = self.treehouse().tree.branch(branch_id).children[i];
self.branch(s, child_id);
}
s.push_str("</ul>");
}
fn preprocess_markup(&self, branch_id: SemaBranchId) -> String {
let branch = self.treehouse().tree.branch(branch_id);
let raw_block_content =
&self.treehouse().source(self.file_id).input()[branch.content.clone()];
let mut markup = String::with_capacity(raw_block_content.len());
for line in raw_block_content.lines() {
// Bit of a jank way to remove at most branch.indent_level spaces from the front.
let mut space_count = 0;
for i in 0..branch.indent_level {
if line.as_bytes().get(i).copied() == Some(b' ') {
space_count += 1;
} else {
break;
}
}
markup.push_str(&line[space_count..]);
markup.push('\n');
}
if branch.attributes.template {
markup = mini_template::render(self.config(), self.treehouse(), self.dirs, &markup);
}
markup
}
pub fn branch(&self, s: &mut String, branch_id: SemaBranchId) {
let branch = self.treehouse().tree.branch(branch_id);
if !cfg!(debug_assertions) && branch.attributes.stage == Stage::Draft {
return;
}
let has_children = match !branch.children.is_empty()
|| matches!(branch.attributes.content, Content::ResolvedLink(_))
{
true => HasChildren::Yes,
false => HasChildren::No,
};
let linked_tree = match branch.attributes.content {
Content::Inline | Content::Link(_) => None,
Content::ResolvedLink(file_id) => self.treehouse().tree_path(file_id),
};
self.open_branch(s, &branch.html_id);
{
// data-cast
self.attr_cast_begin(s);
self.attr_cast_push(
s,
match linked_tree {
Some(_) => "b-linked",
None => "b",
},
);
if !branch.attributes.cast.is_empty() {
self.attr_cast_push(s, &branch.attributes.cast);
}
self.attr_cast_end(s);
// th-link
if let Some(tree_path) = linked_tree {
self.attr_link(s, tree_path);
}
// class
self.attr_class_begin(s, has_children);
if !branch.attributes.classes.branch.is_empty() {
self.attr_class_push(s, &branch.attributes.classes.branch);
}
if branch.attributes.stage == Stage::Draft {
self.attr_class_push(s, "draft");
}
self.attr_class_end(s);
// th-do-not-persist
if branch.attributes.do_not_persist {
self.attr_do_not_persist(s);
}
}
self.end_attrs(s);
let open = self.begin_container(s, has_children, branch.kind);
{
self.bullet_point(s);
self.branch_content(s, &self.preprocess_markup(branch_id), linked_tree);
let date_time = branch.attributes.timestamp();
let link_button = match linked_tree {
Some(_) => LinkButton::Tree,
None => LinkButton::Branch,
};
let link = match linked_tree {
Some(tree_path) => format!("{}/{}", self.config().site, tree_path),
None => format!("{}/b?{}", self.config().site, &branch.named_id),
};
self.button_bar(s, date_time, link_button, &link);
if self.begin_children(s, &open) == HasChildren::Yes {
self.branch_children(s, branch_id);
}
}
self.close_branch(s, open);
}
pub fn root(&self, s: &mut String) {
let roots = self
.treehouse()
.roots
.get(&self.file_id)
.expect("tree should have been added to the treehouse");
s.push_str("<ul>");
for &child in &roots.branches {
self.branch(s, child);
}
let path = self.treehouse().path(self.file_id);
let children_path = if path == const { VPath::new_const("index.tree") } {
VPath::ROOT
} else {
path
};
let tree_path = children_path.with_extension("");
let child_pages = self.get_child_pages(&tree_path);
if !child_pages.is_empty() {
s.push_str(r#"<li class="child-pages">"#);
s.push_str("<ul>");
for child_page in &child_pages {
self.open_branch(s, &format!("p-{}", child_page.tree_path));
{
self.attr_cast_begin(s);
self.attr_cast_push(s, "b-linked");
self.attr_cast_end(s);
self.attr_link(s, &child_page.tree_path);
self.attr_class_begin(s, HasChildren::Yes);
self.attr_class_end(s);
if let Some(timestamp) = child_page.timestamp {
self.attr_ts(s, timestamp);
}
}
self.end_attrs(s);
let open = self.begin_container(s, HasChildren::Yes, BranchKind::Collapsed);
{
self.bullet_point(s);
self.branch_content(
s,
&format!(":{}: {}", child_page.icon, child_page.title),
Some(&child_page.tree_path),
);
self.button_bar(
s,
child_page.timestamp,
LinkButton::Tree,
&format!("{}/{}", self.config().site, child_page.tree_path),
);
self.begin_children(s, &open);
self.branch_children_empty(s);
}
self.close_branch(s, open);
}
s.push_str("</ul>");
s.push_str("</li>");
}
s.push_str("</ul>");
}
}
#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
struct ChildPage {
timestamp: Option<DateTime<Utc>>,
title: String,
icon: String,
tree_path: VPathBuf,
}
impl Renderer<'_> {
fn get_child_pages(&self, parent_page: &VPath) -> Vec<ChildPage> {
let mut child_pages = vfs::entries(&self.dirs.content, parent_page);
child_pages.retain(|path| matches!(path.extension(), Some("tree")));
for child_page in &mut child_pages {
child_page.set_extension("");
}
child_pages.sort();
child_pages.dedup();
let mut child_pages: Vec<_> = child_pages
.into_iter()
.filter_map(|tree_path| {
self.treehouse()
.files_by_tree_path
.get(&tree_path)
.and_then(|file_id| {
let roots = &self.treehouse().roots[file_id];
let visible = roots.attributes.visibility == Visibility::Public;
visible.then(|| ChildPage {
tree_path,
title: roots.attributes.title.clone(),
icon: roots.attributes.icon.clone(),
timestamp: roots.attributes.timestamps.as_ref().map(|t| t.updated),
})
})
})
.collect();
child_pages.sort_by(|a, b| b.cmp(a));
child_pages
}
}