static syntax highlighting WIP
This commit is contained in:
parent
7fd2d18b69
commit
5ab8ffdba2
13
Cargo.lock
generated
13
Cargo.lock
generated
|
@ -1160,9 +1160,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex"
|
name = "regex"
|
||||||
version = "1.9.3"
|
version = "1.10.3"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a"
|
checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"memchr",
|
"memchr",
|
||||||
|
@ -1172,9 +1172,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex-automata"
|
name = "regex-automata"
|
||||||
version = "0.3.6"
|
version = "0.4.6"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69"
|
checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"aho-corasick",
|
"aho-corasick",
|
||||||
"memchr",
|
"memchr",
|
||||||
|
@ -1183,9 +1183,9 @@ dependencies = [
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "regex-syntax"
|
name = "regex-syntax"
|
||||||
version = "0.7.4"
|
version = "0.8.2"
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
|
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "rustc-demangle"
|
name = "rustc-demangle"
|
||||||
|
@ -1580,6 +1580,7 @@ dependencies = [
|
||||||
"log",
|
"log",
|
||||||
"pulldown-cmark",
|
"pulldown-cmark",
|
||||||
"rand",
|
"rand",
|
||||||
|
"regex",
|
||||||
"serde",
|
"serde",
|
||||||
"serde_json",
|
"serde_json",
|
||||||
"tokio",
|
"tokio",
|
||||||
|
|
36
content/treehouse/dev/syntax-highlighting.tree
Normal file
36
content/treehouse/dev/syntax-highlighting.tree
Normal file
|
@ -0,0 +1,36 @@
|
||||||
|
%% title = "syntax highlighting gallery"
|
||||||
|
|
||||||
|
- this is a page demonstrating syntaxes supported by the treehouse
|
||||||
|
|
||||||
|
- really there's not much more to it, but I use it for debugging + with it you can get a general feel for how I highlight things in the treehouse
|
||||||
|
|
||||||
|
- `javascript`
|
||||||
|
```javascript
|
||||||
|
// t is an existing tile index; variable name is short for brevity
|
||||||
|
export function removeRedundancies(t) {
|
||||||
|
if (isSet(t, SE) && (!isSet(t, S) || !isSet(t, E))) {
|
||||||
|
t &= ~SE;
|
||||||
|
}
|
||||||
|
if (isSet(t, SW) && (!isSet(t, S) || !isSet(t, W))) {
|
||||||
|
t &= ~SW;
|
||||||
|
}
|
||||||
|
if (isSet(t, NW) && (!isSet(t, N) || !isSet(t, W))) {
|
||||||
|
t &= ~NW;
|
||||||
|
}
|
||||||
|
if (isSet(t, NE) && (!isSet(t, N) || !isSet(t, E))) {
|
||||||
|
t &= ~NE;
|
||||||
|
}
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This is
|
||||||
|
a multiline comment. */
|
||||||
|
|
||||||
|
ident Class CONSTANT funciton()
|
||||||
|
0b1010 0o01234567 0x0123456789ABCDEF
|
||||||
|
01234567
|
||||||
|
1.41e-3
|
||||||
|
'string' /**/ "string" /**/ `string`
|
||||||
|
+ - * / == != <= >= ! ~ || && . ? :
|
||||||
|
, ;
|
||||||
|
```
|
|
@ -29,3 +29,4 @@ ulid = "1.0.0"
|
||||||
url = "2.5.0"
|
url = "2.5.0"
|
||||||
base64 = "0.21.7"
|
base64 = "0.21.7"
|
||||||
chrono = "0.4.35"
|
chrono = "0.4.35"
|
||||||
|
regex = "1.10.3"
|
||||||
|
|
|
@ -361,6 +361,7 @@ pub fn generate(paths: &Paths<'_>) -> anyhow::Result<(Config, Treehouse)> {
|
||||||
config.site = std::env::var("TREEHOUSE_SITE").unwrap_or(config.site);
|
config.site = std::env::var("TREEHOUSE_SITE").unwrap_or(config.site);
|
||||||
config.autopopulate_emoji(&paths.static_dir.join("emoji"))?;
|
config.autopopulate_emoji(&paths.static_dir.join("emoji"))?;
|
||||||
config.autopopulate_pics(&paths.static_dir.join("pic"))?;
|
config.autopopulate_pics(&paths.static_dir.join("pic"))?;
|
||||||
|
config.load_syntaxes(&paths.static_dir.join("syntax"))?;
|
||||||
|
|
||||||
info!("cleaning target directory");
|
info!("cleaning target directory");
|
||||||
let _ = std::fs::remove_dir_all(paths.target_dir);
|
let _ = std::fs::remove_dir_all(paths.target_dir);
|
||||||
|
|
|
@ -1,9 +1,15 @@
|
||||||
use std::{collections::HashMap, ffi::OsStr, fs::File, io::BufReader, path::Path};
|
use std::{collections::HashMap, ffi::OsStr, fs::File, io::BufReader, path::Path};
|
||||||
|
|
||||||
use anyhow::Context;
|
use anyhow::Context;
|
||||||
|
use log::debug;
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use walkdir::WalkDir;
|
use walkdir::WalkDir;
|
||||||
|
|
||||||
|
use crate::html::highlight::{
|
||||||
|
compiled::{compile_syntax, CompiledSyntax},
|
||||||
|
Syntax,
|
||||||
|
};
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||||
pub struct Config {
|
pub struct Config {
|
||||||
/// Website root; used when generating links.
|
/// Website root; used when generating links.
|
||||||
|
@ -48,6 +54,13 @@ pub struct Config {
|
||||||
/// On top of this, pics are autodiscovered by walking the `static/pic` directory.
|
/// On top of this, pics are autodiscovered by walking the `static/pic` directory.
|
||||||
/// Only the part before the first dash is treated as the pic's id.
|
/// Only the part before the first dash is treated as the pic's id.
|
||||||
pub pics: HashMap<String, String>,
|
pub pics: HashMap<String, String>,
|
||||||
|
|
||||||
|
/// Syntax definitions.
|
||||||
|
///
|
||||||
|
/// These are not part of the config file, but are loaded as part of site configuration from
|
||||||
|
/// `static/syntax`.
|
||||||
|
#[serde(skip)]
|
||||||
|
pub syntaxes: HashMap<String, CompiledSyntax>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||||
|
@ -138,6 +151,30 @@ impl Config {
|
||||||
self.pics.get(id).map(|x| &**x).unwrap_or("404.png")
|
self.pics.get(id).map(|x| &**x).unwrap_or("404.png")
|
||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Loads all syntax definition files.
|
||||||
|
pub fn load_syntaxes(&mut self, dir: &Path) -> anyhow::Result<()> {
|
||||||
|
for entry in WalkDir::new(dir) {
|
||||||
|
let entry = entry?;
|
||||||
|
if entry.path().extension() == Some(OsStr::new("json")) {
|
||||||
|
let name = entry
|
||||||
|
.path()
|
||||||
|
.file_stem()
|
||||||
|
.expect("syntax file name should have a stem")
|
||||||
|
.to_string_lossy();
|
||||||
|
debug!("loading syntax {name:?}");
|
||||||
|
|
||||||
|
let syntax: Syntax = serde_json::from_reader(BufReader::new(
|
||||||
|
File::open(entry.path()).context("could not open syntax file")?,
|
||||||
|
))
|
||||||
|
.context("could not deserialize syntax file")?;
|
||||||
|
let compiled = compile_syntax(&syntax);
|
||||||
|
self.syntaxes.insert(name.into_owned(), compiled);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Data derived from the config.
|
/// Data derived from the config.
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
use std::fmt::{self, Display, Write};
|
use std::fmt::{self, Display, Write};
|
||||||
|
|
||||||
pub mod breadcrumbs;
|
pub mod breadcrumbs;
|
||||||
|
pub mod highlight;
|
||||||
mod markdown;
|
mod markdown;
|
||||||
pub mod navmap;
|
pub mod navmap;
|
||||||
pub mod tree;
|
pub mod tree;
|
||||||
|
|
94
crates/treehouse/src/html/highlight.rs
Normal file
94
crates/treehouse/src/html/highlight.rs
Normal file
|
@ -0,0 +1,94 @@
|
||||||
|
//! Tokenizer and syntax highlighter inspired by the one found in rxi's lite.
|
||||||
|
//! I highly recommend checking it out!
|
||||||
|
//! https://github.com/rxi/lite/blob/master/data/core/tokenizer.lua
|
||||||
|
//! There's also a mirror of it in the JavaScript, used to power dynamically editable code blocks.
|
||||||
|
//!
|
||||||
|
//! Both of these syntax highlighters use the same JSON syntax definitions; however this one is
|
||||||
|
//! more limited, in that patterns do not support backtracking.
|
||||||
|
//! This is effectively enforced in the dynamic highlighter because this highlighter reports any
|
||||||
|
//! regex syntax errors upon site compilation.
|
||||||
|
|
||||||
|
pub mod compiled;
|
||||||
|
pub mod tokenize;
|
||||||
|
|
||||||
|
use std::{collections::HashMap, io};
|
||||||
|
|
||||||
|
use pulldown_cmark::escape::{escape_html, StrWrite};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
use self::compiled::CompiledSyntax;
|
||||||
|
|
||||||
|
/// Syntax definition.
|
||||||
|
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||||
|
pub struct Syntax {
|
||||||
|
/// Patterns, matched sequentially (patterns at the beginning of the list take precedence.)
|
||||||
|
pub patterns: Vec<Pattern>,
|
||||||
|
|
||||||
|
/// Map of replacements to use if a pattern matches a string exactly.
|
||||||
|
pub keywords: HashMap<String, Keyword>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// A pattern in a syntax definition.
|
||||||
|
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||||
|
pub struct Pattern {
|
||||||
|
/// Regular expression to match.
|
||||||
|
pub regex: String,
|
||||||
|
|
||||||
|
/// Flags to pass to the regex engine to alter how strings are matched.
|
||||||
|
#[serde(default)]
|
||||||
|
pub flags: Vec<RegexFlag>,
|
||||||
|
|
||||||
|
/// Type to assign to the token. This can be any string, but only a select few have colors
|
||||||
|
/// assigned.
|
||||||
|
pub is: TokenTypes,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Assignable token types.
|
||||||
|
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||||
|
#[serde(untagged)]
|
||||||
|
pub enum TokenTypes {
|
||||||
|
/// Assign a single token type to the entire match.
|
||||||
|
FullMatch(String),
|
||||||
|
/// Assign individual token types to each capture.
|
||||||
|
Captures(CaptureTokenTypes),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||||
|
pub struct CaptureTokenTypes {
|
||||||
|
/// Token type to use outside captures.
|
||||||
|
pub default: String,
|
||||||
|
/// Token type to use inside captures.
|
||||||
|
pub captures: Vec<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Flag passed to the regex engine.
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub enum RegexFlag {
|
||||||
|
/// Make `.` match line separators.
|
||||||
|
DotMatchesNewline,
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Keyword replacement.
|
||||||
|
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||||
|
#[serde(rename_all = "camelCase")]
|
||||||
|
pub struct Keyword {
|
||||||
|
/// What to replace the token type with.
|
||||||
|
pub into: String,
|
||||||
|
|
||||||
|
/// Only replace the token type if it matches this one. If this is not present, any token type
|
||||||
|
/// is replaced.
|
||||||
|
pub only_replaces: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn highlight(mut w: impl StrWrite, syntax: &CompiledSyntax, code: &str) -> io::Result<()> {
|
||||||
|
let tokens = syntax.tokenize(code);
|
||||||
|
for token in tokens {
|
||||||
|
w.write_str("<span class=\"")?;
|
||||||
|
escape_html(&mut w, &syntax.token_names[token.id])?;
|
||||||
|
w.write_str("\">")?;
|
||||||
|
escape_html(&mut w, &code[token.range])?;
|
||||||
|
w.write_str("</span>")?;
|
||||||
|
}
|
||||||
|
Ok(())
|
||||||
|
}
|
118
crates/treehouse/src/html/highlight/compiled.rs
Normal file
118
crates/treehouse/src/html/highlight/compiled.rs
Normal file
|
@ -0,0 +1,118 @@
|
||||||
|
use std::collections::HashMap;
|
||||||
|
|
||||||
|
use log::error;
|
||||||
|
use regex::{Regex, RegexBuilder};
|
||||||
|
|
||||||
|
use super::{RegexFlag, Syntax, TokenTypes};
|
||||||
|
|
||||||
|
/// During compilation, token names are converted to numeric IDs for performance.
|
||||||
|
pub type TokenId = usize;
|
||||||
|
|
||||||
|
pub const TOKEN_ID_DEFAULT: TokenId = 0;
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct CompiledSyntax {
|
||||||
|
/// Lookup table which maps numeric IDs to token names.
|
||||||
|
pub token_names: Vec<String>,
|
||||||
|
|
||||||
|
pub patterns: Vec<CompiledPattern>,
|
||||||
|
pub keywords: HashMap<String, CompiledKeyword>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub enum CompiledTokenTypes {
|
||||||
|
FullMatch(TokenId),
|
||||||
|
Captures(CompiledCaptureTokenTypes),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct CompiledCaptureTokenTypes {
|
||||||
|
pub default: TokenId,
|
||||||
|
pub captures: Vec<TokenId>,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct CompiledPattern {
|
||||||
|
pub regex: Regex,
|
||||||
|
pub is: CompiledTokenTypes,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone)]
|
||||||
|
pub struct CompiledKeyword {
|
||||||
|
pub into: TokenId,
|
||||||
|
pub only_replaces: Option<TokenId>,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn compile_syntax(syntax: &Syntax) -> CompiledSyntax {
|
||||||
|
let mut token_names = vec!["default".into()];
|
||||||
|
let mut get_token_id = |name: &str| -> TokenId {
|
||||||
|
if let Some(id) = token_names.iter().position(|n| n == name) {
|
||||||
|
id
|
||||||
|
} else {
|
||||||
|
let id = token_names.len();
|
||||||
|
token_names.push(name.to_owned());
|
||||||
|
id
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
let patterns = syntax
|
||||||
|
.patterns
|
||||||
|
.iter()
|
||||||
|
.filter_map(|pattern| {
|
||||||
|
// NOTE: `regex` has no support for sticky flags, so we need to anchor the match to the
|
||||||
|
// start ourselves.
|
||||||
|
let regex = RegexBuilder::new(&format!(
|
||||||
|
"^{}",
|
||||||
|
// If there's an existing `^`, it should not cause compilation errors for the user.
|
||||||
|
pattern.regex.strip_prefix('^').unwrap_or(&pattern.regex)
|
||||||
|
))
|
||||||
|
.dot_matches_new_line(pattern.flags.contains(&RegexFlag::DotMatchesNewline))
|
||||||
|
.build()
|
||||||
|
.map_err(|e| {
|
||||||
|
// NOTE: This could probably use better diagnostics, but it's pretty much
|
||||||
|
// impossible to get a source span out of serde's output (because it forgoes
|
||||||
|
// source information, rightfully so.) Therefore we have to settle on
|
||||||
|
// a poor man's error log.
|
||||||
|
error!("regex compilation error in pattern {pattern:?}: {e}");
|
||||||
|
})
|
||||||
|
.ok()?;
|
||||||
|
Some(CompiledPattern {
|
||||||
|
regex,
|
||||||
|
is: match &pattern.is {
|
||||||
|
TokenTypes::FullMatch(name) => {
|
||||||
|
CompiledTokenTypes::FullMatch(get_token_id(name))
|
||||||
|
}
|
||||||
|
TokenTypes::Captures(types) => {
|
||||||
|
CompiledTokenTypes::Captures(CompiledCaptureTokenTypes {
|
||||||
|
default: get_token_id(&types.default),
|
||||||
|
captures: types
|
||||||
|
.captures
|
||||||
|
.iter()
|
||||||
|
.map(|name| get_token_id(name))
|
||||||
|
.collect(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
},
|
||||||
|
})
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
let keywords = syntax
|
||||||
|
.keywords
|
||||||
|
.iter()
|
||||||
|
.map(|(text, keyword)| {
|
||||||
|
(
|
||||||
|
text.clone(),
|
||||||
|
CompiledKeyword {
|
||||||
|
into: get_token_id(&keyword.into),
|
||||||
|
only_replaces: keyword.only_replaces.as_deref().map(&mut get_token_id),
|
||||||
|
},
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
CompiledSyntax {
|
||||||
|
token_names,
|
||||||
|
patterns,
|
||||||
|
keywords,
|
||||||
|
}
|
||||||
|
}
|
57
crates/treehouse/src/html/highlight/tokenize.rs
Normal file
57
crates/treehouse/src/html/highlight/tokenize.rs
Normal file
|
@ -0,0 +1,57 @@
|
||||||
|
use std::ops::Range;
|
||||||
|
|
||||||
|
use super::compiled::{CompiledSyntax, CompiledTokenTypes, TokenId, TOKEN_ID_DEFAULT};
|
||||||
|
|
||||||
|
pub struct Token {
|
||||||
|
pub id: TokenId,
|
||||||
|
pub range: Range<usize>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CompiledSyntax {
|
||||||
|
pub fn tokenize(&self, text: &str) -> Vec<Token> {
|
||||||
|
let mut tokens = vec![];
|
||||||
|
|
||||||
|
let mut i = 0;
|
||||||
|
while i < text.len() {
|
||||||
|
let mut had_match = false;
|
||||||
|
for pattern in &self.patterns {
|
||||||
|
match &pattern.is {
|
||||||
|
CompiledTokenTypes::FullMatch(id) => {
|
||||||
|
if let Some(regex_match) = pattern.regex.find(&text[i..]) {
|
||||||
|
push_token(&mut tokens, *id, i..i + regex_match.range().end);
|
||||||
|
i += regex_match.range().end;
|
||||||
|
had_match = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CompiledTokenTypes::Captures(types) => { /* TODO */ }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !had_match {
|
||||||
|
push_token(&mut tokens, TOKEN_ID_DEFAULT, i..i + 1);
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for token in &mut tokens {
|
||||||
|
if let Some(keyword) = self.keywords.get(&text[token.range.clone()]) {
|
||||||
|
if keyword.only_replaces.is_none() || Some(token.id) == keyword.only_replaces {
|
||||||
|
token.id = keyword.into;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
tokens
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn push_token(tokens: &mut Vec<Token>, id: TokenId, range: Range<usize>) {
|
||||||
|
if let Some(previous_token) = tokens.last_mut() {
|
||||||
|
if previous_token.id == id {
|
||||||
|
previous_token.range.end = range.end;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
tokens.push(Token { id, range });
|
||||||
|
}
|
|
@ -23,6 +23,7 @@
|
||||||
|
|
||||||
//! HTML renderer that takes an iterator of events as input.
|
//! HTML renderer that takes an iterator of events as input.
|
||||||
|
|
||||||
|
use std::borrow::Borrow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
use std::io;
|
use std::io;
|
||||||
|
|
||||||
|
@ -31,6 +32,7 @@ use pulldown_cmark::{Alignment, CodeBlockKind, Event, LinkType, Tag};
|
||||||
use pulldown_cmark::{CowStr, Event::*};
|
use pulldown_cmark::{CowStr, Event::*};
|
||||||
|
|
||||||
use crate::config::{Config, ConfigDerivedData, PicSize};
|
use crate::config::{Config, ConfigDerivedData, PicSize};
|
||||||
|
use crate::html::highlight::highlight;
|
||||||
use crate::state::Treehouse;
|
use crate::state::Treehouse;
|
||||||
|
|
||||||
enum TableState {
|
enum TableState {
|
||||||
|
@ -38,6 +40,12 @@ enum TableState {
|
||||||
Body,
|
Body,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||||
|
enum CodeBlockState<'a> {
|
||||||
|
NotInCodeBlock,
|
||||||
|
InCodeBlock(Option<CowStr<'a>>),
|
||||||
|
}
|
||||||
|
|
||||||
struct HtmlWriter<'a, I, W> {
|
struct HtmlWriter<'a, I, W> {
|
||||||
treehouse: &'a Treehouse,
|
treehouse: &'a Treehouse,
|
||||||
config: &'a Config,
|
config: &'a Config,
|
||||||
|
@ -58,7 +66,7 @@ struct HtmlWriter<'a, I, W> {
|
||||||
table_cell_index: usize,
|
table_cell_index: usize,
|
||||||
numbers: HashMap<CowStr<'a>, usize>,
|
numbers: HashMap<CowStr<'a>, usize>,
|
||||||
|
|
||||||
in_code_block: bool,
|
code_block_state: CodeBlockState<'a>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'a, I, W> HtmlWriter<'a, I, W>
|
impl<'a, I, W> HtmlWriter<'a, I, W>
|
||||||
|
@ -87,7 +95,7 @@ where
|
||||||
table_alignments: vec![],
|
table_alignments: vec![],
|
||||||
table_cell_index: 0,
|
table_cell_index: 0,
|
||||||
numbers: HashMap::new(),
|
numbers: HashMap::new(),
|
||||||
in_code_block: false,
|
code_block_state: CodeBlockState::NotInCodeBlock,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -234,65 +242,71 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Tag::CodeBlock(info) => {
|
Tag::CodeBlock(info) => {
|
||||||
self.in_code_block = true;
|
self.code_block_state = CodeBlockState::InCodeBlock(None);
|
||||||
if !self.end_newline {
|
if !self.end_newline {
|
||||||
self.write_newline()?;
|
self.write_newline()?;
|
||||||
}
|
}
|
||||||
match info {
|
match info {
|
||||||
CodeBlockKind::Fenced(language) => match CodeBlockMode::parse(&language) {
|
CodeBlockKind::Fenced(language) => {
|
||||||
CodeBlockMode::PlainText => self.write("<pre><code>"),
|
self.code_block_state = CodeBlockState::InCodeBlock(Some(language.clone()));
|
||||||
CodeBlockMode::SyntaxHighlightOnly { language } => {
|
match CodeBlockMode::parse(&language) {
|
||||||
self.write("<pre><code class=\"language-")?;
|
CodeBlockMode::PlainText => self.write("<pre><code>"),
|
||||||
escape_html(&mut self.writer, language)?;
|
CodeBlockMode::SyntaxHighlightOnly { language } => {
|
||||||
self.write("\">")
|
self.write("<pre><code class=\"language-")?;
|
||||||
}
|
escape_html(&mut self.writer, language)?;
|
||||||
CodeBlockMode::LiterateProgram {
|
if self.config.syntaxes.contains_key(language) {
|
||||||
language,
|
self.write(" th-syntax-highlighting")?;
|
||||||
kind,
|
|
||||||
program_name,
|
|
||||||
} => {
|
|
||||||
self.write(match &kind {
|
|
||||||
LiterateCodeKind::Input => {
|
|
||||||
"<th-literate-program data-mode=\"input\" "
|
|
||||||
}
|
|
||||||
LiterateCodeKind::Output { .. } => {
|
|
||||||
"<th-literate-program data-mode=\"output\" "
|
|
||||||
}
|
|
||||||
})?;
|
|
||||||
self.write("data-program=\"")?;
|
|
||||||
escape_href(&mut self.writer, self.page_id)?;
|
|
||||||
self.write(":")?;
|
|
||||||
escape_html(&mut self.writer, program_name)?;
|
|
||||||
self.write("\" data-language=\"")?;
|
|
||||||
escape_html(&mut self.writer, language)?;
|
|
||||||
self.write("\" role=\"code\">")?;
|
|
||||||
|
|
||||||
if let LiterateCodeKind::Output { placeholder_pic_id } = kind {
|
|
||||||
if !placeholder_pic_id.is_empty() {
|
|
||||||
self.write(
|
|
||||||
"<img class=\"placeholder-image\" loading=\"lazy\" src=\"",
|
|
||||||
)?;
|
|
||||||
escape_html(
|
|
||||||
&mut self.writer,
|
|
||||||
&self.config.pic_url(placeholder_pic_id),
|
|
||||||
)?;
|
|
||||||
self.write("\"")?;
|
|
||||||
if let Some(PicSize { width, height }) = self
|
|
||||||
.config_derived_data
|
|
||||||
.pic_size(self.config, placeholder_pic_id)
|
|
||||||
{
|
|
||||||
self.write(&format!(
|
|
||||||
" width=\"{width}\" height=\"{height}\""
|
|
||||||
))?;
|
|
||||||
}
|
|
||||||
self.write(">")?;
|
|
||||||
}
|
}
|
||||||
|
self.write("\">")
|
||||||
}
|
}
|
||||||
|
CodeBlockMode::LiterateProgram {
|
||||||
|
language,
|
||||||
|
kind,
|
||||||
|
program_name,
|
||||||
|
} => {
|
||||||
|
self.write(match &kind {
|
||||||
|
LiterateCodeKind::Input => {
|
||||||
|
"<th-literate-program data-mode=\"input\" "
|
||||||
|
}
|
||||||
|
LiterateCodeKind::Output { .. } => {
|
||||||
|
"<th-literate-program data-mode=\"output\" "
|
||||||
|
}
|
||||||
|
})?;
|
||||||
|
self.write("data-program=\"")?;
|
||||||
|
escape_href(&mut self.writer, self.page_id)?;
|
||||||
|
self.write(":")?;
|
||||||
|
escape_html(&mut self.writer, program_name)?;
|
||||||
|
self.write("\" data-language=\"")?;
|
||||||
|
escape_html(&mut self.writer, language)?;
|
||||||
|
self.write("\" role=\"code\">")?;
|
||||||
|
|
||||||
self.write("<pre class=\"placeholder-console\">")?;
|
if let LiterateCodeKind::Output { placeholder_pic_id } = kind {
|
||||||
Ok(())
|
if !placeholder_pic_id.is_empty() {
|
||||||
|
self.write(
|
||||||
|
"<img class=\"placeholder-image\" loading=\"lazy\" src=\"",
|
||||||
|
)?;
|
||||||
|
escape_html(
|
||||||
|
&mut self.writer,
|
||||||
|
&self.config.pic_url(placeholder_pic_id),
|
||||||
|
)?;
|
||||||
|
self.write("\"")?;
|
||||||
|
if let Some(PicSize { width, height }) = self
|
||||||
|
.config_derived_data
|
||||||
|
.pic_size(self.config, placeholder_pic_id)
|
||||||
|
{
|
||||||
|
self.write(&format!(
|
||||||
|
" width=\"{width}\" height=\"{height}\""
|
||||||
|
))?;
|
||||||
|
}
|
||||||
|
self.write(">")?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.write("<pre class=\"placeholder-console\">")?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
}
|
||||||
CodeBlockKind::Indented => self.write("<pre><code>"),
|
CodeBlockKind::Indented => self.write("<pre><code>"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -416,7 +430,7 @@ where
|
||||||
},
|
},
|
||||||
_ => "</code></pre>\n",
|
_ => "</code></pre>\n",
|
||||||
})?;
|
})?;
|
||||||
self.in_code_block = false;
|
self.code_block_state = CodeBlockState::NotInCodeBlock;
|
||||||
}
|
}
|
||||||
Tag::List(Some(_)) => {
|
Tag::List(Some(_)) => {
|
||||||
self.write("</ol>\n")?;
|
self.write("</ol>\n")?;
|
||||||
|
@ -505,8 +519,20 @@ where
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if self.in_code_block {
|
if let CodeBlockState::InCodeBlock(language) = &self.code_block_state {
|
||||||
escape_html(&mut self.writer, text)?;
|
let code_block_mode = language
|
||||||
|
.as_ref()
|
||||||
|
.map(|language| CodeBlockMode::parse(language));
|
||||||
|
let highlighting_language = code_block_mode
|
||||||
|
.as_ref()
|
||||||
|
.and_then(|mode| mode.highlighting_language());
|
||||||
|
let syntax =
|
||||||
|
highlighting_language.and_then(|language| self.config.syntaxes.get(language));
|
||||||
|
if let Some(syntax) = syntax {
|
||||||
|
highlight(&mut self.writer, syntax, text)?;
|
||||||
|
} else {
|
||||||
|
escape_html(&mut self.writer, text)?;
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
let mut parser = EmojiParser { text, position: 0 };
|
let mut parser = EmojiParser { text, position: 0 };
|
||||||
while let Some(token) = parser.next_token() {
|
while let Some(token) = parser.next_token() {
|
||||||
|
@ -623,6 +649,16 @@ impl<'a> CodeBlockMode<'a> {
|
||||||
CodeBlockMode::SyntaxHighlightOnly { language }
|
CodeBlockMode::SyntaxHighlightOnly { language }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn highlighting_language(&self) -> Option<&str> {
|
||||||
|
if let CodeBlockMode::LiterateProgram { language, .. }
|
||||||
|
| CodeBlockMode::SyntaxHighlightOnly { language } = self
|
||||||
|
{
|
||||||
|
Some(language)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and
|
/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and
|
||||||
|
|
|
@ -1,6 +1,8 @@
|
||||||
// This tokenizer is highly inspired by the one found in rxi's lite.
|
// This tokenizer is highly inspired by the one found in rxi's lite.
|
||||||
// I highly recommend checking it out!
|
// I highly recommend checking it out!
|
||||||
// https://github.com/rxi/lite/blob/master/data/core/tokenizer.lua
|
// https://github.com/rxi/lite/blob/master/data/core/tokenizer.lua
|
||||||
|
// There's also a mirror of it in the static generator, to enable highlighting of code blocks which
|
||||||
|
// are *not* JavaScript-powered.
|
||||||
|
|
||||||
export function compileSyntax(def) {
|
export function compileSyntax(def) {
|
||||||
for (let pattern of def.patterns) {
|
for (let pattern of def.patterns) {
|
||||||
|
@ -32,7 +34,7 @@ function tokenize(text, syntax) {
|
||||||
let match;
|
let match;
|
||||||
pattern.regex.lastIndex = i;
|
pattern.regex.lastIndex = i;
|
||||||
if ((match = pattern.regex.exec(text)) != null) {
|
if ((match = pattern.regex.exec(text)) != null) {
|
||||||
pushToken(tokens, pattern.as, match[0]); // TODO
|
pushToken(tokens, pattern.is, match[0]); // TODO
|
||||||
i = pattern.regex.lastIndex;
|
i = pattern.regex.lastIndex;
|
||||||
hadMatch = true;
|
hadMatch = true;
|
||||||
break;
|
break;
|
||||||
|
|
76
static/syntax/javascript.json
Normal file
76
static/syntax/javascript.json
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
{
|
||||||
|
"patterns": [
|
||||||
|
{ "regex": "\\/\\/.*", "is": "comment" },
|
||||||
|
{
|
||||||
|
"regex": "\\/\\*.*?\\*\\/",
|
||||||
|
"flags": ["dotMatchesNewline"],
|
||||||
|
"is": "comment"
|
||||||
|
},
|
||||||
|
{ "regex": "[A-Z_][a-zA-Z0-9_]*", "is": "keyword2" },
|
||||||
|
{
|
||||||
|
"regex": "[a-zA-Z_][a-zA-Z0-9_]*(\\()",
|
||||||
|
"is": { "default": "function", "captures": ["default"] }
|
||||||
|
},
|
||||||
|
{ "regex": "[a-zA-Z_][a-zA-Z0-9_]*", "is": "identifier" },
|
||||||
|
{ "regex": "0[bB][01_]+n?", "is": "literal" },
|
||||||
|
{ "regex": "0[oO][0-7_]+n?", "is": "literal" },
|
||||||
|
{ "regex": "0[xX][0-9a-fA-F_]+n?", "is": "literal" },
|
||||||
|
{ "regex": "[0-9_]+n", "is": "literal" },
|
||||||
|
{ "regex": "[0-9_]+(\\.[0-9_]*([eE][-+]?[0-9_]+)?)?", "is": "literal" },
|
||||||
|
{ "regex": "'(\\'|[^'])*'", "is": "string" },
|
||||||
|
{ "regex": "\"(\\\"|[^\"])*\"", "is": "string" },
|
||||||
|
{ "regex": "`(\\`|[^`])*`", "is": "string" },
|
||||||
|
{ "regex": "[+=/*^%<>!~|&\\.?:-]+", "is": "operator" },
|
||||||
|
{ "regex": "[,;]", "is": "punct" }
|
||||||
|
],
|
||||||
|
"keywords": {
|
||||||
|
"as": { "into": "keyword1", "onlyReplaces": "identifier" },
|
||||||
|
"async": { "into": "keyword1", "onlyReplaces": "identifier" },
|
||||||
|
"await": { "into": "keyword1" },
|
||||||
|
"break": { "into": "keyword1" },
|
||||||
|
"case": { "into": "keyword1" },
|
||||||
|
"catch": { "into": "keyword1" },
|
||||||
|
"class": { "into": "keyword1" },
|
||||||
|
"const": { "into": "keyword1" },
|
||||||
|
"continue": { "into": "keyword1" },
|
||||||
|
"debugger": { "into": "keyword1" },
|
||||||
|
"default": { "into": "keyword1" },
|
||||||
|
"delete": { "into": "keyword1" },
|
||||||
|
"do": { "into": "keyword1" },
|
||||||
|
"else": { "into": "keyword1" },
|
||||||
|
"export": { "into": "keyword1" },
|
||||||
|
"extends": { "into": "keyword1" },
|
||||||
|
"finally": { "into": "keyword1" },
|
||||||
|
"for": { "into": "keyword1" },
|
||||||
|
"from": { "into": "keyword1", "onlyReplaces": "identifier" },
|
||||||
|
"function": { "into": "keyword1" },
|
||||||
|
"get": { "into": "keyword1", "onlyReplaces": "identifier" },
|
||||||
|
"if": { "into": "keyword1" },
|
||||||
|
"import": { "into": "keyword1" },
|
||||||
|
"in": { "into": "keyword1" },
|
||||||
|
"instanceof": { "into": "keyword1" },
|
||||||
|
"let": { "into": "keyword1" },
|
||||||
|
"new": { "into": "keyword1" },
|
||||||
|
"of": { "into": "keyword1", "onlyReplaces": "identifier" },
|
||||||
|
"return": { "into": "keyword1" },
|
||||||
|
"set": { "into": "keyword1", "onlyReplaces": "identifier" },
|
||||||
|
"static": { "into": "keyword1" },
|
||||||
|
"switch": { "into": "keyword1" },
|
||||||
|
"throw": { "into": "keyword1" },
|
||||||
|
"try": { "into": "keyword1" },
|
||||||
|
"typeof": { "into": "keyword1" },
|
||||||
|
"var": { "into": "keyword1" },
|
||||||
|
"void": { "into": "keyword1" },
|
||||||
|
"while": { "into": "keyword1" },
|
||||||
|
"with": { "into": "keyword1" },
|
||||||
|
"yield": { "into": "keyword1" },
|
||||||
|
|
||||||
|
"super": { "into": "keyword2" },
|
||||||
|
"this": { "into": "keyword2" },
|
||||||
|
|
||||||
|
"false": { "into": "literal" },
|
||||||
|
"true": { "into": "literal" },
|
||||||
|
"undefined": { "into": "literal" },
|
||||||
|
"null": { "into": "literal" }
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue