syntax highlighting

This commit is contained in:
liquidex 2024-02-17 18:01:17 +01:00
parent b9218c8ace
commit d9b351ad64
5 changed files with 213 additions and 9 deletions

View file

@ -288,7 +288,7 @@ styles = ["tairu.css"]
as I've already said, we represent each direction using a single bit.
% id = "01HPSY4Y19AW70YX8PPA7AS4DH"
- I'm using JavaScript by the way, because it's the native programming language of your web browser. read on to the end of this tangent to see why.
- I'm using JavaScript by the way, because it's the native programming language of your web browser. read on to see why.
- now I don't know about you, but I find the usual C-style way of checking whether a bit is set extremely hard to read, so let's take care of that:
@ -406,9 +406,6 @@ styles = ["tairu.css"]
TODO: The value from the previous output should not leak into this one. how do we do this? do we emit extra `pushMessage` calls inbetween the editors so that they know when to end?
maybe use a `classic` context instead of a module? or maybe have a way of sharing data between outputs? (return value?)
% id = "01HPD4XQPWT9N8X9BD9GKWD78F"
- bitwise autotiling is a really cool technique that I've used in plenty of games in the past.

View file

@ -175,6 +175,7 @@ th-literate-program {
--recursive-mono: 1.0;
--recursive-casl: 0.0;
--recursive-slnt: 0.0;
--recursive-wght: 450;
@ -548,3 +549,62 @@ th-literate-program[data-mode="output"] {
opacity: 50%;
/* Syntax highlighting */
:root {
/* TODO: Light mode syntax highlighting */
@media (prefers-color-scheme: dark) {
:root {
--syntax-comment: #aca8a4;
--syntax-identifier: var(--text-color);
--syntax-keyword1: #ffb06a;
--syntax-keyword2: #9acfe3;
--syntax-operator: #ec9f8d;
--syntax-function: #fbd283;
--syntax-literal: #e9b9f0;
--syntax-string: #b0dd7a;
--syntax-punct: #9d9a96;
.th-syntax-highlighting span {
&.comment {
--recursive-slnt: -16.0;
color: var(--syntax-comment);
&.identifier {
color: var(--syntax-identifier);
&.keyword1 {
color: var(--syntax-keyword1);
&.keyword2 {
color: var(--syntax-keyword2);
&.operator {
color: var(--syntax-operator);
&.function {
color: var(--syntax-function);
&.literal {
color: var(--syntax-literal);
&.string {
color: var(--syntax-string);
&.punct {
color: var(--syntax-punct);

View file

@ -1,4 +1,5 @@
import { CodeJar } from "../vendor/codejar.js";
import { compileSyntax, highlight } from "./literate-programming/highlight.js";
let literatePrograms = new Map();
@ -34,9 +35,81 @@ function getLiterateProgramWorkerCommands(name) {
class InputMode {
static JAVASCRIPT = compileSyntax({
patterns: [
{ regex: /\/\/.*/, as: "comment" },
{ regex: /\/\*.*?\*\//ms, as: "comment" },
{ regex: /[A-Z_][a-zA-Z0-9_]*/, as: "keyword2" },
{ regex: /[a-zA-Z_][a-zA-Z0-9_]*(?=\()/, as: "function" },
{ regex: /[a-zA-Z_][a-zA-Z0-9_]*/, as: "identifier" },
{ regex: /0[bB][01_]+n?/, as: "literal" },
{ regex: /0[oO][0-7_]+n?/, as: "literal" },
{ regex: /0[xX][0-9a-fA-F_]+n?/, as: "literal" },
{ regex: /[0-9_]+n/, as: "literal" },
{ regex: /[0-9_]+(\.[0-9_]*([eE][-+]?[0-9_]+)?)?/, as: "literal" },
{ regex: /'(\\'|[^'])*'/, as: "string" },
{ regex: /"(\\"|[^"])*"/, as: "string" },
{ regex: /`(\\`|[^"])*`/, as: "string" },
// TODO: RegExp literals?
{ regex: /[+=/*^%<>!~|&\.-]+/, as: "operator" },
{ regex: /[,;]/, as: "punct" },
keywords: new Map([
["as", { into: "keyword1", onlyReplaces: "identifier" }],
["async", { into: "keyword1", onlyReplaces: "identifier" }],
["await", { into: "keyword1" }],
["break", { into: "keyword1" }],
["case", { into: "keyword1" }],
["catch", { into: "keyword1" }],
["class", { into: "keyword1" }],
["const", { into: "keyword1" }],
["continue", { into: "keyword1" }],
["debugger", { into: "keyword1" }],
["default", { into: "keyword1" }],
["delete", { into: "keyword1" }],
["do", { into: "keyword1" }],
["else", { into: "keyword1" }],
["export", { into: "keyword1" }],
["extends", { into: "keyword1" }],
["finally", { into: "keyword1" }],
["for", { into: "keyword1" }],
["from", { into: "keyword1", onlyReplaces: "identifier" }],
["function", { into: "keyword1" }],
["get", { into: "keyword1", onlyReplaces: "identifier" }],
["if", { into: "keyword1" }],
["import", { into: "keyword1" }],
["in", { into: "keyword1" }],
["instanceof", { into: "keyword1" }],
["let", { into: "keyword1" }],
["new", { into: "keyword1" }],
["of", { into: "keyword1", onlyReplaces: "identifier" }],
["return", { into: "keyword1" }],
["set", { into: "keyword1", onlyReplaces: "identifier" }],
["static", { into: "keyword1" }],
["switch", { into: "keyword1" }],
["throw", { into: "keyword1" }],
["try", { into: "keyword1" }],
["typeof", { into: "keyword1" }],
["var", { into: "keyword1" }],
["void", { into: "keyword1" }],
["while", { into: "keyword1" }],
["with", { into: "keyword1" }],
["yield", { into: "keyword1" }],
["super", { into: "keyword2" }],
["this", { into: "keyword2" }],
["false", { into: "literal" }],
["true", { into: "literal" }],
["undefined", { into: "literal" }],
["null", { into: "literal" }],
constructor(frame) {
this.frame = frame;
this.codeJar = CodeJar(frame, InputMode.highlight);
this.codeJar.onUpdate(() => {
for (let handler of frame.program.onChanged) {
@ -48,7 +121,7 @@ class InputMode {
static highlight(frame) {
// TODO: Syntax highlighting
highlight(frame, InputMode.JAVASCRIPT);

View file

@ -0,0 +1,72 @@
// This tokenizer is highly inspired by the one found in rxi's lite.
// I highly recommend checking it out!
export function compileSyntax(def) {
for (let pattern of def.patterns) {
// Remove g (global) flag as it would interfere with the lexis process. We only want to match
// the first token at the cursor.
let flags = pattern.regex.flags.replace("g", "");
// Add d (indices) and y (sticky) flags so that we can tell where the matches start and end.
pattern.regex = new RegExp(pattern.regex, "y" + flags);
return def;
function pushToken(tokens, kind, string) {
let previousToken = tokens[tokens.length - 1];
if (previousToken != null && previousToken.kind == kind) {
previousToken.string += string;
} else {
tokens.push({ kind, string });
function tokenize(text, syntax) {
let tokens = [];
let i = 0;
while (i < text.length) {
let hadMatch = false;
for (let pattern of syntax.patterns) {
let match;
pattern.regex.lastIndex = i;
if ((match = pattern.regex.exec(text)) != null) {
pushToken(tokens,, match[0]); // TODO
i = pattern.regex.lastIndex;
hadMatch = true;
// Base case: no pattern matched, just add the current character to the output.
if (!hadMatch) {
pushToken(tokens, "default", text.substring(i, i + 1));
for (let token of tokens) {
let replacement = syntax.keywords.get(token.string);
if (replacement != null) {
if (replacement.onlyReplaces == null || token.kind == replacement.onlyReplaces) {
token.kind = replacement.into;
return tokens;
export function highlight(element, syntax) {
let tokens = tokenize(element.textContent, syntax);
element.textContent = "";
for (let token of tokens) {
let span = document.createElement("span");
span.textContent = token.string;

View file

@ -34,11 +34,12 @@ export function CodeJar(editor, highlight, opt = {}) {
isLegacy = true;
if (isLegacy)
editor.setAttribute('contenteditable', 'true');
const debounceHighlight = debounce(() => {
// PATCH(liquidex): Remove debouncing here.
const debounceHighlight = () => {
const pos = save();
doHighlight(editor, pos);
}, 30);
let recording = false;
const shouldRecord = (event) => {
return !isUndo(event) && !isRedo(event)
@ -78,14 +79,15 @@ export function CodeJar(editor, highlight, opt = {}) {
if (isLegacy && !isCopy(event))
// PATCH(liquidex): Do highlighting on keypress for faster feedback.
on('keyup', event => {
if (event.defaultPrevented)
if (event.isComposing)
if (prev !== toString())