implement RSS feeds

This commit is contained in:
りき萌 2024-11-27 19:02:30 +01:00
parent 1e3a1f3527
commit 55b6a0df96
16 changed files with 682 additions and 50 deletions

View file

@ -11,7 +11,7 @@ anyhow = "1.0.75"
axum = "0.7.4"
base64 = "0.21.7"
blake3 = "1.5.3"
chrono = "0.4.35"
chrono = { version = "0.4.35", features = ["serde"] }
clap = { version = "4.3.22", features = ["derive"] }
codespan-reporting = "0.11.1"
dashmap = "6.1.0"

View file

@ -59,6 +59,7 @@ fn get_content_type(extension: &str) -> Option<&'static str> {
"js" => Some("text/javascript"),
"woff" => Some("font/woff2"),
"svg" => Some("image/svg+xml"),
"atom" => Some("application/atom+xml"),
_ => None,
}
}

View file

@ -1,4 +1,7 @@
use std::{collections::HashMap, ops::ControlFlow};
use std::{
collections::{HashMap, HashSet},
ops::ControlFlow,
};
use anyhow::{anyhow, Context};
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
@ -31,6 +34,9 @@ pub struct Config {
/// Links exported to Markdown for use with reference syntax `[text][def:key]`.
pub defs: HashMap<String, String>,
/// Config for syndication feeds.
pub feed: Feed,
/// Redirects for moving pages around. These are used solely by the treehouse server.
///
/// Note that redirects are only resolved _non-recursively_ by the server. For a configuration
@ -74,6 +80,12 @@ pub struct Config {
pub syntaxes: HashMap<String, CompiledSyntax>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Feed {
/// Allowed tags in feed entries.
pub tags: HashSet<String>,
}
#[derive(Debug, Clone, Deserialize, Serialize)]
pub struct Redirects {
/// Page redirects. When a user navigates to a page, if they navigate to `url`, they will

View file

@ -1,3 +1,4 @@
mod atom;
mod dir_helper;
mod include_static_helper;
mod simple_template;
@ -5,6 +6,7 @@ mod tree;
use std::{collections::HashMap, fmt, ops::ControlFlow, sync::Arc};
use atom::FeedDir;
use dir_helper::DirHelper;
use handlebars::{handlebars_helper, Handlebars};
use include_static_helper::IncludeStaticHelper;
@ -28,6 +30,7 @@ struct BaseTemplateData<'a> {
import_map: String,
season: Option<Season>,
dev: bool,
feeds: Vec<String>,
}
impl<'a> BaseTemplateData<'a> {
@ -38,6 +41,7 @@ impl<'a> BaseTemplateData<'a> {
.expect("import map should be serializable to JSON"),
season: Season::current(),
dev: cfg!(debug_assertions),
feeds: sources.treehouse.feeds_by_name.keys().cloned().collect(),
}
}
}
@ -45,20 +49,22 @@ impl<'a> BaseTemplateData<'a> {
struct TreehouseDir {
dirs: Arc<Dirs>,
sources: Arc<Sources>,
handlebars: Arc<Handlebars<'static>>,
dir_index: DirIndex,
handlebars: Handlebars<'static>,
}
impl TreehouseDir {
fn new(dirs: Arc<Dirs>, sources: Arc<Sources>, dir_index: DirIndex) -> Self {
let mut handlebars = create_handlebars(&sources.config.site, dirs.static_.clone());
load_templates(&mut handlebars, &dirs.template);
fn new(
dirs: Arc<Dirs>,
sources: Arc<Sources>,
handlebars: Arc<Handlebars<'static>>,
dir_index: DirIndex,
) -> Self {
Self {
dirs,
sources,
dir_index,
handlebars,
dir_index,
}
}
}
@ -195,7 +201,20 @@ impl DirIndex {
}
pub fn target(dirs: Arc<Dirs>, sources: Arc<Sources>) -> DynDir {
let mut handlebars = create_handlebars(&sources.config.site, dirs.static_.clone());
load_templates(&mut handlebars, &dirs.template);
let handlebars = Arc::new(handlebars);
let mut root = MemDir::new();
root.add(
VPath::new("feed"),
ContentCache::new(FeedDir::new(
dirs.clone(),
sources.clone(),
handlebars.clone(),
))
.to_dyn(),
);
root.add(VPath::new("static"), dirs.static_.clone());
root.add(
VPath::new("robots.txt"),
@ -203,7 +222,7 @@ pub fn target(dirs: Arc<Dirs>, sources: Arc<Sources>) -> DynDir {
);
let dir_index = DirIndex::new(sources.treehouse.files_by_tree_path.keys().map(|x| &**x));
let tree_view = TreehouseDir::new(dirs, sources, dir_index);
let tree_view = TreehouseDir::new(dirs, sources, handlebars, dir_index);
let tree_view = ContentCache::new(tree_view);
tree_view.warm_up();

View file

@ -0,0 +1,302 @@
use std::{fmt, sync::Arc};
use anyhow::Context;
use chrono::{DateTime, Utc};
use handlebars::Handlebars;
use serde::Serialize;
use tracing::{info, info_span, instrument};
use ulid::Ulid;
use crate::{
dirs::Dirs,
html::djot::{self, resolve_link},
sources::Sources,
state::FileId,
tree::SemaBranchId,
vfs::{Dir, DirEntry, VPath, VPathBuf},
};
use super::BaseTemplateData;
pub struct FeedDir {
dirs: Arc<Dirs>,
sources: Arc<Sources>,
handlebars: Arc<Handlebars<'static>>,
}
impl FeedDir {
pub fn new(
dirs: Arc<Dirs>,
sources: Arc<Sources>,
handlebars: Arc<Handlebars<'static>>,
) -> Self {
Self {
dirs,
sources,
handlebars,
}
}
}
impl Dir for FeedDir {
fn dir(&self, path: &VPath) -> Vec<DirEntry> {
if path == VPath::ROOT {
self.sources
.treehouse
.feeds_by_name
.keys()
.map(|name| DirEntry {
path: VPathBuf::new(format!("{name}.atom")),
})
.collect()
} else {
vec![]
}
}
fn content(&self, path: &VPath) -> Option<Vec<u8>> {
info!("{path}");
if path.extension() == Some("atom") {
let feed_name = path.with_extension("").to_string();
self.sources
.treehouse
.feeds_by_name
.get(&feed_name)
.map(|file_id| {
generate_or_error(&self.sources, &self.dirs, &self.handlebars, *file_id).into()
})
} else {
None
}
}
fn content_version(&self, _path: &VPath) -> Option<String> {
None
}
}
impl fmt::Debug for FeedDir {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str("FeedDir")
}
}
#[derive(Serialize)]
struct Feed {
name: String,
updated: DateTime<Utc>,
entries: Vec<Entry>,
}
#[derive(Serialize)]
struct Entry {
id: String,
updated: DateTime<Utc>,
url: String,
title: String,
categories: Vec<String>,
summary: String,
}
#[derive(Serialize)]
struct AtomTemplateData<'a> {
#[serde(flatten)]
base: &'a BaseTemplateData<'a>,
feed: Feed,
}
#[instrument(name = "atom::generate", skip(sources, handlebars))]
pub fn generate(
sources: &Sources,
dirs: &Dirs,
handlebars: &Handlebars,
file_id: FileId,
) -> anyhow::Result<String> {
let roots = &sources.treehouse.roots[&file_id];
let feed_name = roots.attributes.feed.clone().expect("page must be a feed");
let template_data = AtomTemplateData {
base: &BaseTemplateData::new(sources),
feed: Feed {
name: feed_name,
// The content cache layer should take care of sampling the current time only once,
// and then preserving it until the treehouse is deployed again.
updated: Utc::now(),
entries: extract_entries(sources, dirs, file_id),
},
};
let _span = info_span!("handlebars::render").entered();
handlebars
.render("_feed_atom.hbs", &template_data)
.context("template rendering failed")
}
pub fn generate_or_error(
sources: &Sources,
dirs: &Dirs,
handlebars: &Handlebars,
file_id: FileId,
) -> String {
match generate(sources, dirs, handlebars, file_id) {
Ok(html) => html,
Err(error) => format!("error: {error:?}"),
}
}
fn extract_entries(sources: &Sources, dirs: &Dirs, file_id: FileId) -> Vec<Entry> {
let roots = &sources.treehouse.roots[&file_id];
roots
.branches
.iter()
.flat_map(|&branch_id| {
let branch = sources.treehouse.tree.branch(branch_id);
let text = &sources.treehouse.source(file_id).input()[branch.content.clone()];
let parsed = parse_entry(sources, dirs, file_id, jotdown::Parser::new(text));
let mut summary = String::new();
branches_to_html_simple(&mut summary, sources, dirs, file_id, &branch.children);
let updated = Ulid::from_string(&branch.attributes.id)
.ok()
.and_then(|ulid| DateTime::from_timestamp_millis(ulid.timestamp_ms() as i64))
.unwrap_or(DateTime::UNIX_EPOCH); // if you see the Unix epoch... oops
parsed.link.map(|url| Entry {
id: branch.attributes.id.clone(),
updated,
url,
title: parsed.title.unwrap_or_else(|| "untitled".into()),
categories: branch.attributes.tags.clone(),
summary,
})
})
.collect()
}
#[derive(Debug, Clone)]
struct ParsedEntry {
title: Option<String>,
link: Option<String>,
}
fn parse_entry(
sources: &Sources,
dirs: &Dirs,
file_id: FileId,
parser: jotdown::Parser,
) -> ParsedEntry {
let mut parser = parser.into_offset_iter();
while let Some((event, span)) = parser.next() {
if let jotdown::Event::Start(jotdown::Container::Heading { .. }, _attrs) = &event {
let mut events = vec![(event, span)];
for (event, span) in parser.by_ref() {
// To my knowledge headings cannot nest, so it's okay not keeping a stack here.
let is_heading = matches!(
event,
jotdown::Event::End(jotdown::Container::Heading { .. })
);
events.push((event, span));
if is_heading {
break;
}
}
let title_events: Vec<_> = events
.iter()
.filter(|(event, _)| {
!matches!(
event,
// A little repetitive, but I don't mind.
// The point of this is not to include extra <h3> and <a> in the link text,
// but preserve other formatting such as bold, italic, code, etc.
jotdown::Event::Start(
jotdown::Container::Link(_, _) | jotdown::Container::Heading { .. },
_
) | jotdown::Event::End(
jotdown::Container::Link(_, _) | jotdown::Container::Heading { .. }
)
)
})
.cloned()
.collect();
let mut title = String::new();
let _render_diagnostics = djot::Renderer {
config: &sources.config,
dirs,
treehouse: &sources.treehouse,
file_id,
// How. Just, stop.
page_id: "liquidex-you-reeeeeal-dummy".into(),
}
.render(&title_events, &mut title);
let link = events.iter().find_map(|(event, _)| {
if let jotdown::Event::Start(jotdown::Container::Link(link, link_type), _) = event {
Some(link_url(sources, dirs, link, *link_type))
} else {
None
}
});
return ParsedEntry {
title: (!title.is_empty()).then_some(title),
link,
};
}
}
ParsedEntry {
title: None,
link: None,
}
}
fn link_url(sources: &Sources, dirs: &Dirs, url: &str, link_type: jotdown::LinkType) -> String {
if let jotdown::LinkType::Span(jotdown::SpanLinkType::Unresolved) = link_type {
if let Some(url) = resolve_link(&sources.config, &sources.treehouse, dirs, url) {
return url;
}
}
url.to_owned()
}
/// Extremely simple HTML renderer without the treehouse's fancy branch folding and linking features.
fn branches_to_html_simple(
s: &mut String,
sources: &Sources,
dirs: &Dirs,
file_id: FileId,
branches: &[SemaBranchId],
) {
s.push_str("<ul>");
for &branch_id in branches {
let branch = sources.treehouse.tree.branch(branch_id);
s.push_str("<li>");
let text = &sources.treehouse.source(file_id).input()[branch.content.clone()];
let events: Vec<_> = jotdown::Parser::new(text).into_offset_iter().collect();
// Ignore render diagnostics. Those should be reported by the main HTML generator.
let _render_diagnostics = djot::Renderer {
config: &sources.config,
dirs,
treehouse: &sources.treehouse,
file_id,
// Yeah, maybe don't include literate code in summaries...
page_id: "liquidex-is-a-dummy".into(),
}
.render(&events, s);
if !branch.children.is_empty() {
branches_to_html_simple(s, sources, dirs, file_id, &branch.children);
}
s.push_str("</li>");
}
s.push_str("</ul>");
}

View file

@ -1,7 +1,7 @@
use std::fmt::{self, Display, Write};
pub mod breadcrumbs;
mod djot;
pub mod djot;
pub mod highlight;
pub mod navmap;
pub mod tree;

View file

@ -27,9 +27,7 @@ use super::highlight::highlight;
/// [`Render`] implementor that writes HTML output.
pub struct Renderer<'a> {
pub config: &'a Config,
pub dirs: &'a Dirs,
pub treehouse: &'a Treehouse,
pub file_id: FileId,
pub page_id: String,
@ -226,7 +224,12 @@ impl<'a> Writer<'a> {
Container::Link(dst, ty) => {
if matches!(ty, LinkType::Span(SpanLinkType::Unresolved)) {
out.push_str("<a");
if let Some(resolved) = self.resolve_link(dst) {
if let Some(resolved) = resolve_link(
self.renderer.config,
self.renderer.treehouse,
self.renderer.dirs,
dst,
) {
out.push_str(r#" href=""#);
write_attr(&resolved, out);
out.push('"');
@ -479,7 +482,12 @@ impl<'a> Writer<'a> {
out.push_str(r#"" src=""#);
if let SpanLinkType::Unresolved = link_type {
// TODO: Image size.
if let Some(resolved) = self.resolve_link(src) {
if let Some(resolved) = resolve_link(
self.renderer.config,
self.renderer.treehouse,
self.renderer.dirs,
src,
) {
write_attr(&resolved, out);
} else {
write_attr(src, out);
@ -624,28 +632,6 @@ impl<'a> Writer<'a> {
Ok(())
}
fn resolve_link(&self, link: &str) -> Option<String> {
let Renderer {
config, treehouse, ..
} = &self.renderer;
link.split_once(':').and_then(|(kind, linked)| match kind {
"def" => config.defs.get(linked).cloned(),
"branch" => treehouse
.branches_by_named_id
.get(linked)
.map(|&branch_id| {
format!(
"{}/b?{}",
config.site,
treehouse.tree.branch(branch_id).attributes.id
)
}),
"page" => Some(config.page_url(linked)),
"pic" => Some(config.pic_url(&*self.renderer.dirs.pic, linked)),
_ => None,
})
}
}
fn write_text(s: &str, out: &mut String) {
@ -677,3 +663,27 @@ fn write_escape(mut s: &str, escape_quotes: bool, out: &mut String) {
}
out.push_str(s);
}
pub fn resolve_link(
config: &Config,
treehouse: &Treehouse,
dirs: &Dirs,
link: &str,
) -> Option<String> {
link.split_once(':').and_then(|(kind, linked)| match kind {
"def" => config.defs.get(linked).cloned(),
"branch" => treehouse
.branches_by_named_id
.get(linked)
.map(|&branch_id| {
format!(
"{}/b?{}",
config.site,
treehouse.tree.branch(branch_id).attributes.id
)
}),
"page" => Some(config.page_url(linked)),
"pic" => Some(config.pic_url(&*dirs.pic, linked)),
_ => None,
})
}

View file

@ -67,6 +67,7 @@ pub struct FileId(usize);
pub struct Treehouse {
pub files: Vec<File>,
pub files_by_tree_path: HashMap<VPathBuf, FileId>,
pub feeds_by_name: HashMap<String, FileId>,
pub tree: SemaTree,
pub branches_by_named_id: HashMap<String, SemaBranchId>,
@ -82,6 +83,7 @@ impl Treehouse {
Self {
files: vec![],
files_by_tree_path: HashMap::new(),
feeds_by_name: HashMap::new(),
tree: SemaTree::default(),
branches_by_named_id: HashMap::new(),

View file

@ -61,7 +61,9 @@ impl SemaRoots {
branches: roots
.branches
.into_iter()
.map(|branch| SemaBranch::from_branch(treehouse, diagnostics, file_id, branch))
.map(|branch| {
SemaBranch::from_branch(treehouse, diagnostics, config, file_id, branch)
})
.collect(),
}
}
@ -94,10 +96,40 @@ impl SemaRoots {
};
let successfully_parsed = successfully_parsed;
if successfully_parsed && attributes.title.is_empty() {
attributes.title = match treehouse.source(file_id) {
Source::Tree { tree_path, .. } => tree_path.to_string(),
_ => panic!("parse_attributes called for a non-.tree file"),
if successfully_parsed {
let attribute_warning_span = roots
.attributes
.as_ref()
.map(|attributes| attributes.percent.clone())
.unwrap_or(0..1);
if attributes.title.is_empty() {
attributes.title = match treehouse.source(file_id) {
Source::Tree { tree_path, .. } => tree_path.to_string(),
_ => panic!("parse_attributes called for a non-.tree file"),
}
}
if attributes.id.is_empty() {
attributes.id = format!("treehouse-missingno-{}", treehouse.next_missingno());
diagnostics.push(Diagnostic {
severity: Severity::Warning,
code: Some("attr".into()),
message: "page does not have an `id` attribute".into(),
labels: vec![Label {
style: LabelStyle::Primary,
file_id,
range: attribute_warning_span.clone(),
message: String::new(),
}],
notes: vec![
format!(
"note: a generated id `{}` will be used, but this id is unstable and will not persist across generations",
attributes.id
),
format!("help: run `treehouse fix {}` to add missing ids to pages", treehouse.path(file_id)),
],
});
}
}
@ -139,6 +171,10 @@ impl SemaRoots {
}
}
if let Some(feed_name) = &attributes.feed {
treehouse.feeds_by_name.insert(feed_name.clone(), file_id);
}
attributes
}
}
@ -163,10 +199,11 @@ impl SemaBranch {
pub fn from_branch(
treehouse: &mut Treehouse,
diagnostics: &mut Vec<Diagnostic<FileId>>,
config: &Config,
file_id: FileId,
branch: Branch,
) -> SemaBranchId {
let attributes = Self::parse_attributes(treehouse, diagnostics, file_id, &branch);
let attributes = Self::parse_attributes(treehouse, diagnostics, config, file_id, &branch);
let named_id = attributes.id.to_owned();
let html_id = format!(
@ -189,7 +226,7 @@ impl SemaBranch {
children: branch
.children
.into_iter()
.map(|child| Self::from_branch(treehouse, diagnostics, file_id, child))
.map(|child| Self::from_branch(treehouse, diagnostics, config, file_id, child))
.collect(),
};
let new_branch_id = treehouse.tree.add_branch(branch);
@ -260,6 +297,7 @@ impl SemaBranch {
fn parse_attributes(
treehouse: &mut Treehouse,
diagnostics: &mut Vec<Diagnostic<FileId>>,
config: &Config,
file_id: FileId,
branch: &Branch,
) -> Attributes {
@ -354,6 +392,26 @@ impl SemaBranch {
})
}
}
// Check that each tag belongs to the allowed set.
for tag in &attributes.tags {
if !config.feed.tags.contains(tag) {
diagnostics.push(Diagnostic {
severity: Severity::Warning,
code: Some("attr".into()),
message: format!("tag `{tag}` is not within the set of allowed tags"),
labels: vec![Label {
style: LabelStyle::Primary,
file_id,
range: attribute_warning_span.clone(),
message: "".into(),
}],
notes: vec![
"note: tag should be one from the set defined in `feed.tags` in treehouse.toml".into(),
],
})
}
}
}
attributes
}

View file

@ -7,6 +7,10 @@ use crate::{state::FileId, vfs::VPathBuf};
/// Top-level `%%` root attributes.
#[derive(Debug, Default, Clone, PartialEq, Eq, Deserialize, Serialize)]
pub struct RootAttributes {
/// Permanent ID of this page.
#[serde(default)]
pub id: String,
/// Template to use for generating the page.
/// Defaults to `_tree.hbs`.
#[serde(default)]
@ -36,8 +40,10 @@ pub struct RootAttributes {
#[serde(default)]
pub styles: Vec<String>,
/// When specified, branches coming from this root will be added to a _feed_ with the given name.
/// Feeds can be read by Handlebars templates to generate content based on them.
/// When specified, this page will have a corresponding Atom feed under `rss/{feed}.xml`.
///
/// In feeds, top-level branches are expected to have a single heading containing the post title.
/// Their children are turned into the post description
#[serde(default)]
pub feed: Option<String>,
}
@ -97,6 +103,11 @@ pub struct Attributes {
/// List of extra `data` attributes to add to the block.
#[serde(default)]
pub data: HashMap<String, String>,
/// In feeds, specifies the list of tags to attach to an entry.
/// This only has an effect on top-level branches.
#[serde(default)]
pub tags: Vec<String>,
}
/// Controls for block content presentation.