static syntax highlighting WIP
This commit is contained in:
parent
7fd2d18b69
commit
5ab8ffdba2
13
Cargo.lock
generated
13
Cargo.lock
generated
|
@ -1160,9 +1160,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "regex"
|
||||
version = "1.9.3"
|
||||
version = "1.10.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "81bc1d4caf89fac26a70747fe603c130093b53c773888797a6329091246d651a"
|
||||
checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
|
@ -1172,9 +1172,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "regex-automata"
|
||||
version = "0.3.6"
|
||||
version = "0.4.6"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fed1ceff11a1dddaee50c9dc8e4938bd106e9d89ae372f192311e7da498e3b69"
|
||||
checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea"
|
||||
dependencies = [
|
||||
"aho-corasick",
|
||||
"memchr",
|
||||
|
@ -1183,9 +1183,9 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "regex-syntax"
|
||||
version = "0.7.4"
|
||||
version = "0.8.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e5ea92a5b6195c6ef2a0295ea818b312502c6fc94dde986c5553242e18fd4ce2"
|
||||
checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f"
|
||||
|
||||
[[package]]
|
||||
name = "rustc-demangle"
|
||||
|
@ -1580,6 +1580,7 @@ dependencies = [
|
|||
"log",
|
||||
"pulldown-cmark",
|
||||
"rand",
|
||||
"regex",
|
||||
"serde",
|
||||
"serde_json",
|
||||
"tokio",
|
||||
|
|
36
content/treehouse/dev/syntax-highlighting.tree
Normal file
36
content/treehouse/dev/syntax-highlighting.tree
Normal file
|
@ -0,0 +1,36 @@
|
|||
%% title = "syntax highlighting gallery"
|
||||
|
||||
- this is a page demonstrating syntaxes supported by the treehouse
|
||||
|
||||
- really there's not much more to it, but I use it for debugging + with it you can get a general feel for how I highlight things in the treehouse
|
||||
|
||||
- `javascript`
|
||||
```javascript
|
||||
// t is an existing tile index; variable name is short for brevity
|
||||
export function removeRedundancies(t) {
|
||||
if (isSet(t, SE) && (!isSet(t, S) || !isSet(t, E))) {
|
||||
t &= ~SE;
|
||||
}
|
||||
if (isSet(t, SW) && (!isSet(t, S) || !isSet(t, W))) {
|
||||
t &= ~SW;
|
||||
}
|
||||
if (isSet(t, NW) && (!isSet(t, N) || !isSet(t, W))) {
|
||||
t &= ~NW;
|
||||
}
|
||||
if (isSet(t, NE) && (!isSet(t, N) || !isSet(t, E))) {
|
||||
t &= ~NE;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
/* This is
|
||||
a multiline comment. */
|
||||
|
||||
ident Class CONSTANT funciton()
|
||||
0b1010 0o01234567 0x0123456789ABCDEF
|
||||
01234567
|
||||
1.41e-3
|
||||
'string' /**/ "string" /**/ `string`
|
||||
+ - * / == != <= >= ! ~ || && . ? :
|
||||
, ;
|
||||
```
|
|
@ -29,3 +29,4 @@ ulid = "1.0.0"
|
|||
url = "2.5.0"
|
||||
base64 = "0.21.7"
|
||||
chrono = "0.4.35"
|
||||
regex = "1.10.3"
|
||||
|
|
|
@ -361,6 +361,7 @@ pub fn generate(paths: &Paths<'_>) -> anyhow::Result<(Config, Treehouse)> {
|
|||
config.site = std::env::var("TREEHOUSE_SITE").unwrap_or(config.site);
|
||||
config.autopopulate_emoji(&paths.static_dir.join("emoji"))?;
|
||||
config.autopopulate_pics(&paths.static_dir.join("pic"))?;
|
||||
config.load_syntaxes(&paths.static_dir.join("syntax"))?;
|
||||
|
||||
info!("cleaning target directory");
|
||||
let _ = std::fs::remove_dir_all(paths.target_dir);
|
||||
|
|
|
@ -1,9 +1,15 @@
|
|||
use std::{collections::HashMap, ffi::OsStr, fs::File, io::BufReader, path::Path};
|
||||
|
||||
use anyhow::Context;
|
||||
use log::debug;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use walkdir::WalkDir;
|
||||
|
||||
use crate::html::highlight::{
|
||||
compiled::{compile_syntax, CompiledSyntax},
|
||||
Syntax,
|
||||
};
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct Config {
|
||||
/// Website root; used when generating links.
|
||||
|
@ -48,6 +54,13 @@ pub struct Config {
|
|||
/// On top of this, pics are autodiscovered by walking the `static/pic` directory.
|
||||
/// Only the part before the first dash is treated as the pic's id.
|
||||
pub pics: HashMap<String, String>,
|
||||
|
||||
/// Syntax definitions.
|
||||
///
|
||||
/// These are not part of the config file, but are loaded as part of site configuration from
|
||||
/// `static/syntax`.
|
||||
#[serde(skip)]
|
||||
pub syntaxes: HashMap<String, CompiledSyntax>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
|
@ -138,6 +151,30 @@ impl Config {
|
|||
self.pics.get(id).map(|x| &**x).unwrap_or("404.png")
|
||||
)
|
||||
}
|
||||
|
||||
/// Loads all syntax definition files.
|
||||
pub fn load_syntaxes(&mut self, dir: &Path) -> anyhow::Result<()> {
|
||||
for entry in WalkDir::new(dir) {
|
||||
let entry = entry?;
|
||||
if entry.path().extension() == Some(OsStr::new("json")) {
|
||||
let name = entry
|
||||
.path()
|
||||
.file_stem()
|
||||
.expect("syntax file name should have a stem")
|
||||
.to_string_lossy();
|
||||
debug!("loading syntax {name:?}");
|
||||
|
||||
let syntax: Syntax = serde_json::from_reader(BufReader::new(
|
||||
File::open(entry.path()).context("could not open syntax file")?,
|
||||
))
|
||||
.context("could not deserialize syntax file")?;
|
||||
let compiled = compile_syntax(&syntax);
|
||||
self.syntaxes.insert(name.into_owned(), compiled);
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Data derived from the config.
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
use std::fmt::{self, Display, Write};
|
||||
|
||||
pub mod breadcrumbs;
|
||||
pub mod highlight;
|
||||
mod markdown;
|
||||
pub mod navmap;
|
||||
pub mod tree;
|
||||
|
|
94
crates/treehouse/src/html/highlight.rs
Normal file
94
crates/treehouse/src/html/highlight.rs
Normal file
|
@ -0,0 +1,94 @@
|
|||
//! Tokenizer and syntax highlighter inspired by the one found in rxi's lite.
|
||||
//! I highly recommend checking it out!
|
||||
//! https://github.com/rxi/lite/blob/master/data/core/tokenizer.lua
|
||||
//! There's also a mirror of it in the JavaScript, used to power dynamically editable code blocks.
|
||||
//!
|
||||
//! Both of these syntax highlighters use the same JSON syntax definitions; however this one is
|
||||
//! more limited, in that patterns do not support backtracking.
|
||||
//! This is effectively enforced in the dynamic highlighter because this highlighter reports any
|
||||
//! regex syntax errors upon site compilation.
|
||||
|
||||
pub mod compiled;
|
||||
pub mod tokenize;
|
||||
|
||||
use std::{collections::HashMap, io};
|
||||
|
||||
use pulldown_cmark::escape::{escape_html, StrWrite};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
use self::compiled::CompiledSyntax;
|
||||
|
||||
/// Syntax definition.
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct Syntax {
|
||||
/// Patterns, matched sequentially (patterns at the beginning of the list take precedence.)
|
||||
pub patterns: Vec<Pattern>,
|
||||
|
||||
/// Map of replacements to use if a pattern matches a string exactly.
|
||||
pub keywords: HashMap<String, Keyword>,
|
||||
}
|
||||
|
||||
/// A pattern in a syntax definition.
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct Pattern {
|
||||
/// Regular expression to match.
|
||||
pub regex: String,
|
||||
|
||||
/// Flags to pass to the regex engine to alter how strings are matched.
|
||||
#[serde(default)]
|
||||
pub flags: Vec<RegexFlag>,
|
||||
|
||||
/// Type to assign to the token. This can be any string, but only a select few have colors
|
||||
/// assigned.
|
||||
pub is: TokenTypes,
|
||||
}
|
||||
|
||||
/// Assignable token types.
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
#[serde(untagged)]
|
||||
pub enum TokenTypes {
|
||||
/// Assign a single token type to the entire match.
|
||||
FullMatch(String),
|
||||
/// Assign individual token types to each capture.
|
||||
Captures(CaptureTokenTypes),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
pub struct CaptureTokenTypes {
|
||||
/// Token type to use outside captures.
|
||||
pub default: String,
|
||||
/// Token type to use inside captures.
|
||||
pub captures: Vec<String>,
|
||||
}
|
||||
|
||||
/// Flag passed to the regex engine.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Deserialize, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub enum RegexFlag {
|
||||
/// Make `.` match line separators.
|
||||
DotMatchesNewline,
|
||||
}
|
||||
|
||||
/// Keyword replacement.
|
||||
#[derive(Debug, Clone, Deserialize, Serialize)]
|
||||
#[serde(rename_all = "camelCase")]
|
||||
pub struct Keyword {
|
||||
/// What to replace the token type with.
|
||||
pub into: String,
|
||||
|
||||
/// Only replace the token type if it matches this one. If this is not present, any token type
|
||||
/// is replaced.
|
||||
pub only_replaces: Option<String>,
|
||||
}
|
||||
|
||||
pub fn highlight(mut w: impl StrWrite, syntax: &CompiledSyntax, code: &str) -> io::Result<()> {
|
||||
let tokens = syntax.tokenize(code);
|
||||
for token in tokens {
|
||||
w.write_str("<span class=\"")?;
|
||||
escape_html(&mut w, &syntax.token_names[token.id])?;
|
||||
w.write_str("\">")?;
|
||||
escape_html(&mut w, &code[token.range])?;
|
||||
w.write_str("</span>")?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
118
crates/treehouse/src/html/highlight/compiled.rs
Normal file
118
crates/treehouse/src/html/highlight/compiled.rs
Normal file
|
@ -0,0 +1,118 @@
|
|||
use std::collections::HashMap;
|
||||
|
||||
use log::error;
|
||||
use regex::{Regex, RegexBuilder};
|
||||
|
||||
use super::{RegexFlag, Syntax, TokenTypes};
|
||||
|
||||
/// During compilation, token names are converted to numeric IDs for performance.
|
||||
pub type TokenId = usize;
|
||||
|
||||
pub const TOKEN_ID_DEFAULT: TokenId = 0;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CompiledSyntax {
|
||||
/// Lookup table which maps numeric IDs to token names.
|
||||
pub token_names: Vec<String>,
|
||||
|
||||
pub patterns: Vec<CompiledPattern>,
|
||||
pub keywords: HashMap<String, CompiledKeyword>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum CompiledTokenTypes {
|
||||
FullMatch(TokenId),
|
||||
Captures(CompiledCaptureTokenTypes),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CompiledCaptureTokenTypes {
|
||||
pub default: TokenId,
|
||||
pub captures: Vec<TokenId>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CompiledPattern {
|
||||
pub regex: Regex,
|
||||
pub is: CompiledTokenTypes,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct CompiledKeyword {
|
||||
pub into: TokenId,
|
||||
pub only_replaces: Option<TokenId>,
|
||||
}
|
||||
|
||||
pub fn compile_syntax(syntax: &Syntax) -> CompiledSyntax {
|
||||
let mut token_names = vec!["default".into()];
|
||||
let mut get_token_id = |name: &str| -> TokenId {
|
||||
if let Some(id) = token_names.iter().position(|n| n == name) {
|
||||
id
|
||||
} else {
|
||||
let id = token_names.len();
|
||||
token_names.push(name.to_owned());
|
||||
id
|
||||
}
|
||||
};
|
||||
|
||||
let patterns = syntax
|
||||
.patterns
|
||||
.iter()
|
||||
.filter_map(|pattern| {
|
||||
// NOTE: `regex` has no support for sticky flags, so we need to anchor the match to the
|
||||
// start ourselves.
|
||||
let regex = RegexBuilder::new(&format!(
|
||||
"^{}",
|
||||
// If there's an existing `^`, it should not cause compilation errors for the user.
|
||||
pattern.regex.strip_prefix('^').unwrap_or(&pattern.regex)
|
||||
))
|
||||
.dot_matches_new_line(pattern.flags.contains(&RegexFlag::DotMatchesNewline))
|
||||
.build()
|
||||
.map_err(|e| {
|
||||
// NOTE: This could probably use better diagnostics, but it's pretty much
|
||||
// impossible to get a source span out of serde's output (because it forgoes
|
||||
// source information, rightfully so.) Therefore we have to settle on
|
||||
// a poor man's error log.
|
||||
error!("regex compilation error in pattern {pattern:?}: {e}");
|
||||
})
|
||||
.ok()?;
|
||||
Some(CompiledPattern {
|
||||
regex,
|
||||
is: match &pattern.is {
|
||||
TokenTypes::FullMatch(name) => {
|
||||
CompiledTokenTypes::FullMatch(get_token_id(name))
|
||||
}
|
||||
TokenTypes::Captures(types) => {
|
||||
CompiledTokenTypes::Captures(CompiledCaptureTokenTypes {
|
||||
default: get_token_id(&types.default),
|
||||
captures: types
|
||||
.captures
|
||||
.iter()
|
||||
.map(|name| get_token_id(name))
|
||||
.collect(),
|
||||
})
|
||||
}
|
||||
},
|
||||
})
|
||||
})
|
||||
.collect();
|
||||
let keywords = syntax
|
||||
.keywords
|
||||
.iter()
|
||||
.map(|(text, keyword)| {
|
||||
(
|
||||
text.clone(),
|
||||
CompiledKeyword {
|
||||
into: get_token_id(&keyword.into),
|
||||
only_replaces: keyword.only_replaces.as_deref().map(&mut get_token_id),
|
||||
},
|
||||
)
|
||||
})
|
||||
.collect();
|
||||
|
||||
CompiledSyntax {
|
||||
token_names,
|
||||
patterns,
|
||||
keywords,
|
||||
}
|
||||
}
|
57
crates/treehouse/src/html/highlight/tokenize.rs
Normal file
57
crates/treehouse/src/html/highlight/tokenize.rs
Normal file
|
@ -0,0 +1,57 @@
|
|||
use std::ops::Range;
|
||||
|
||||
use super::compiled::{CompiledSyntax, CompiledTokenTypes, TokenId, TOKEN_ID_DEFAULT};
|
||||
|
||||
pub struct Token {
|
||||
pub id: TokenId,
|
||||
pub range: Range<usize>,
|
||||
}
|
||||
|
||||
impl CompiledSyntax {
|
||||
pub fn tokenize(&self, text: &str) -> Vec<Token> {
|
||||
let mut tokens = vec![];
|
||||
|
||||
let mut i = 0;
|
||||
while i < text.len() {
|
||||
let mut had_match = false;
|
||||
for pattern in &self.patterns {
|
||||
match &pattern.is {
|
||||
CompiledTokenTypes::FullMatch(id) => {
|
||||
if let Some(regex_match) = pattern.regex.find(&text[i..]) {
|
||||
push_token(&mut tokens, *id, i..i + regex_match.range().end);
|
||||
i += regex_match.range().end;
|
||||
had_match = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
CompiledTokenTypes::Captures(types) => { /* TODO */ }
|
||||
}
|
||||
}
|
||||
|
||||
if !had_match {
|
||||
push_token(&mut tokens, TOKEN_ID_DEFAULT, i..i + 1);
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
|
||||
for token in &mut tokens {
|
||||
if let Some(keyword) = self.keywords.get(&text[token.range.clone()]) {
|
||||
if keyword.only_replaces.is_none() || Some(token.id) == keyword.only_replaces {
|
||||
token.id = keyword.into;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tokens
|
||||
}
|
||||
}
|
||||
|
||||
fn push_token(tokens: &mut Vec<Token>, id: TokenId, range: Range<usize>) {
|
||||
if let Some(previous_token) = tokens.last_mut() {
|
||||
if previous_token.id == id {
|
||||
previous_token.range.end = range.end;
|
||||
return;
|
||||
}
|
||||
}
|
||||
tokens.push(Token { id, range });
|
||||
}
|
|
@ -23,6 +23,7 @@
|
|||
|
||||
//! HTML renderer that takes an iterator of events as input.
|
||||
|
||||
use std::borrow::Borrow;
|
||||
use std::collections::HashMap;
|
||||
use std::io;
|
||||
|
||||
|
@ -31,6 +32,7 @@ use pulldown_cmark::{Alignment, CodeBlockKind, Event, LinkType, Tag};
|
|||
use pulldown_cmark::{CowStr, Event::*};
|
||||
|
||||
use crate::config::{Config, ConfigDerivedData, PicSize};
|
||||
use crate::html::highlight::highlight;
|
||||
use crate::state::Treehouse;
|
||||
|
||||
enum TableState {
|
||||
|
@ -38,6 +40,12 @@ enum TableState {
|
|||
Body,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
enum CodeBlockState<'a> {
|
||||
NotInCodeBlock,
|
||||
InCodeBlock(Option<CowStr<'a>>),
|
||||
}
|
||||
|
||||
struct HtmlWriter<'a, I, W> {
|
||||
treehouse: &'a Treehouse,
|
||||
config: &'a Config,
|
||||
|
@ -58,7 +66,7 @@ struct HtmlWriter<'a, I, W> {
|
|||
table_cell_index: usize,
|
||||
numbers: HashMap<CowStr<'a>, usize>,
|
||||
|
||||
in_code_block: bool,
|
||||
code_block_state: CodeBlockState<'a>,
|
||||
}
|
||||
|
||||
impl<'a, I, W> HtmlWriter<'a, I, W>
|
||||
|
@ -87,7 +95,7 @@ where
|
|||
table_alignments: vec![],
|
||||
table_cell_index: 0,
|
||||
numbers: HashMap::new(),
|
||||
in_code_block: false,
|
||||
code_block_state: CodeBlockState::NotInCodeBlock,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -234,16 +242,21 @@ where
|
|||
}
|
||||
}
|
||||
Tag::CodeBlock(info) => {
|
||||
self.in_code_block = true;
|
||||
self.code_block_state = CodeBlockState::InCodeBlock(None);
|
||||
if !self.end_newline {
|
||||
self.write_newline()?;
|
||||
}
|
||||
match info {
|
||||
CodeBlockKind::Fenced(language) => match CodeBlockMode::parse(&language) {
|
||||
CodeBlockKind::Fenced(language) => {
|
||||
self.code_block_state = CodeBlockState::InCodeBlock(Some(language.clone()));
|
||||
match CodeBlockMode::parse(&language) {
|
||||
CodeBlockMode::PlainText => self.write("<pre><code>"),
|
||||
CodeBlockMode::SyntaxHighlightOnly { language } => {
|
||||
self.write("<pre><code class=\"language-")?;
|
||||
escape_html(&mut self.writer, language)?;
|
||||
if self.config.syntaxes.contains_key(language) {
|
||||
self.write(" th-syntax-highlighting")?;
|
||||
}
|
||||
self.write("\">")
|
||||
}
|
||||
CodeBlockMode::LiterateProgram {
|
||||
|
@ -292,7 +305,8 @@ where
|
|||
self.write("<pre class=\"placeholder-console\">")?;
|
||||
Ok(())
|
||||
}
|
||||
},
|
||||
}
|
||||
}
|
||||
CodeBlockKind::Indented => self.write("<pre><code>"),
|
||||
}
|
||||
}
|
||||
|
@ -416,7 +430,7 @@ where
|
|||
},
|
||||
_ => "</code></pre>\n",
|
||||
})?;
|
||||
self.in_code_block = false;
|
||||
self.code_block_state = CodeBlockState::NotInCodeBlock;
|
||||
}
|
||||
Tag::List(Some(_)) => {
|
||||
self.write("</ol>\n")?;
|
||||
|
@ -505,8 +519,20 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
if self.in_code_block {
|
||||
if let CodeBlockState::InCodeBlock(language) = &self.code_block_state {
|
||||
let code_block_mode = language
|
||||
.as_ref()
|
||||
.map(|language| CodeBlockMode::parse(language));
|
||||
let highlighting_language = code_block_mode
|
||||
.as_ref()
|
||||
.and_then(|mode| mode.highlighting_language());
|
||||
let syntax =
|
||||
highlighting_language.and_then(|language| self.config.syntaxes.get(language));
|
||||
if let Some(syntax) = syntax {
|
||||
highlight(&mut self.writer, syntax, text)?;
|
||||
} else {
|
||||
escape_html(&mut self.writer, text)?;
|
||||
}
|
||||
} else {
|
||||
let mut parser = EmojiParser { text, position: 0 };
|
||||
while let Some(token) = parser.next_token() {
|
||||
|
@ -623,6 +649,16 @@ impl<'a> CodeBlockMode<'a> {
|
|||
CodeBlockMode::SyntaxHighlightOnly { language }
|
||||
}
|
||||
}
|
||||
|
||||
fn highlighting_language(&self) -> Option<&str> {
|
||||
if let CodeBlockMode::LiterateProgram { language, .. }
|
||||
| CodeBlockMode::SyntaxHighlightOnly { language } = self
|
||||
{
|
||||
Some(language)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Iterate over an `Iterator` of `Event`s, generate HTML for each `Event`, and
|
||||
|
|
|
@ -1,6 +1,8 @@
|
|||
// This tokenizer is highly inspired by the one found in rxi's lite.
|
||||
// I highly recommend checking it out!
|
||||
// https://github.com/rxi/lite/blob/master/data/core/tokenizer.lua
|
||||
// There's also a mirror of it in the static generator, to enable highlighting of code blocks which
|
||||
// are *not* JavaScript-powered.
|
||||
|
||||
export function compileSyntax(def) {
|
||||
for (let pattern of def.patterns) {
|
||||
|
@ -32,7 +34,7 @@ function tokenize(text, syntax) {
|
|||
let match;
|
||||
pattern.regex.lastIndex = i;
|
||||
if ((match = pattern.regex.exec(text)) != null) {
|
||||
pushToken(tokens, pattern.as, match[0]); // TODO
|
||||
pushToken(tokens, pattern.is, match[0]); // TODO
|
||||
i = pattern.regex.lastIndex;
|
||||
hadMatch = true;
|
||||
break;
|
||||
|
|
76
static/syntax/javascript.json
Normal file
76
static/syntax/javascript.json
Normal file
|
@ -0,0 +1,76 @@
|
|||
{
|
||||
"patterns": [
|
||||
{ "regex": "\\/\\/.*", "is": "comment" },
|
||||
{
|
||||
"regex": "\\/\\*.*?\\*\\/",
|
||||
"flags": ["dotMatchesNewline"],
|
||||
"is": "comment"
|
||||
},
|
||||
{ "regex": "[A-Z_][a-zA-Z0-9_]*", "is": "keyword2" },
|
||||
{
|
||||
"regex": "[a-zA-Z_][a-zA-Z0-9_]*(\\()",
|
||||
"is": { "default": "function", "captures": ["default"] }
|
||||
},
|
||||
{ "regex": "[a-zA-Z_][a-zA-Z0-9_]*", "is": "identifier" },
|
||||
{ "regex": "0[bB][01_]+n?", "is": "literal" },
|
||||
{ "regex": "0[oO][0-7_]+n?", "is": "literal" },
|
||||
{ "regex": "0[xX][0-9a-fA-F_]+n?", "is": "literal" },
|
||||
{ "regex": "[0-9_]+n", "is": "literal" },
|
||||
{ "regex": "[0-9_]+(\\.[0-9_]*([eE][-+]?[0-9_]+)?)?", "is": "literal" },
|
||||
{ "regex": "'(\\'|[^'])*'", "is": "string" },
|
||||
{ "regex": "\"(\\\"|[^\"])*\"", "is": "string" },
|
||||
{ "regex": "`(\\`|[^`])*`", "is": "string" },
|
||||
{ "regex": "[+=/*^%<>!~|&\\.?:-]+", "is": "operator" },
|
||||
{ "regex": "[,;]", "is": "punct" }
|
||||
],
|
||||
"keywords": {
|
||||
"as": { "into": "keyword1", "onlyReplaces": "identifier" },
|
||||
"async": { "into": "keyword1", "onlyReplaces": "identifier" },
|
||||
"await": { "into": "keyword1" },
|
||||
"break": { "into": "keyword1" },
|
||||
"case": { "into": "keyword1" },
|
||||
"catch": { "into": "keyword1" },
|
||||
"class": { "into": "keyword1" },
|
||||
"const": { "into": "keyword1" },
|
||||
"continue": { "into": "keyword1" },
|
||||
"debugger": { "into": "keyword1" },
|
||||
"default": { "into": "keyword1" },
|
||||
"delete": { "into": "keyword1" },
|
||||
"do": { "into": "keyword1" },
|
||||
"else": { "into": "keyword1" },
|
||||
"export": { "into": "keyword1" },
|
||||
"extends": { "into": "keyword1" },
|
||||
"finally": { "into": "keyword1" },
|
||||
"for": { "into": "keyword1" },
|
||||
"from": { "into": "keyword1", "onlyReplaces": "identifier" },
|
||||
"function": { "into": "keyword1" },
|
||||
"get": { "into": "keyword1", "onlyReplaces": "identifier" },
|
||||
"if": { "into": "keyword1" },
|
||||
"import": { "into": "keyword1" },
|
||||
"in": { "into": "keyword1" },
|
||||
"instanceof": { "into": "keyword1" },
|
||||
"let": { "into": "keyword1" },
|
||||
"new": { "into": "keyword1" },
|
||||
"of": { "into": "keyword1", "onlyReplaces": "identifier" },
|
||||
"return": { "into": "keyword1" },
|
||||
"set": { "into": "keyword1", "onlyReplaces": "identifier" },
|
||||
"static": { "into": "keyword1" },
|
||||
"switch": { "into": "keyword1" },
|
||||
"throw": { "into": "keyword1" },
|
||||
"try": { "into": "keyword1" },
|
||||
"typeof": { "into": "keyword1" },
|
||||
"var": { "into": "keyword1" },
|
||||
"void": { "into": "keyword1" },
|
||||
"while": { "into": "keyword1" },
|
||||
"with": { "into": "keyword1" },
|
||||
"yield": { "into": "keyword1" },
|
||||
|
||||
"super": { "into": "keyword2" },
|
||||
"this": { "into": "keyword2" },
|
||||
|
||||
"false": { "into": "literal" },
|
||||
"true": { "into": "literal" },
|
||||
"undefined": { "into": "literal" },
|
||||
"null": { "into": "literal" }
|
||||
}
|
||||
}
|
Loading…
Reference in a new issue