treehouse/crates/treehouse/src/generate/atom.rs

use std::{fmt, sync::Arc};

use anyhow::Context;
use chrono::{DateTime, Utc};
use handlebars::Handlebars;
use serde::Serialize;
use tracing::{info, info_span, instrument};
use ulid::Ulid;

use crate::{
    dirs::Dirs,
    html::djot::{self, resolve_link},
    sources::Sources,
    state::FileId,
    tree::SemaBranchId,
    vfs::{self, Content, Dir, Entries, VPath, VPathBuf},
};

use super::BaseTemplateData;

pub struct FeedDir {
    dirs: Arc<Dirs>,
    sources: Arc<Sources>,
    handlebars: Arc<Handlebars<'static>>,
}

impl FeedDir {
    pub fn new(
        dirs: Arc<Dirs>,
        sources: Arc<Sources>,
        handlebars: Arc<Handlebars<'static>>,
    ) -> Self {
        Self {
            dirs,
            sources,
            handlebars,
        }
    }

    fn entries(&self, path: &VPath) -> Vec<VPathBuf> {
        if path == VPath::ROOT {
            self.sources
                .treehouse
                .feeds_by_name
                .keys()
                .map(|name| VPathBuf::new(format!("{name}.atom")))
                .collect()
        } else {
            vec![]
        }
    }

    fn content(&self, path: &VPath) -> Option<Content> {
        if path.extension() == Some("atom") {
            let feed_name = path.with_extension("").to_string();
            self.sources
                .treehouse
                .feeds_by_name
                .get(&feed_name)
                .map(|file_id| {
                    Content::new(
                        "application/atom+xml",
                        generate_or_error(&self.sources, &self.dirs, &self.handlebars, *file_id)
                            .into(),
                    )
                })
        } else {
            None
        }
    }
}

impl Dir for FeedDir {
    fn query(&self, path: &VPath, query: &mut vfs::Query) {
        query.provide(|| Entries(self.entries(path)));
        query.try_provide(|| self.content(path));
    }
}

impl fmt::Debug for FeedDir {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        f.write_str("FeedDir")
    }
}

#[derive(Serialize)]
struct Feed {
    name: String,
    updated: DateTime<Utc>,
    entries: Vec<Entry>,
}

#[derive(Serialize)]
struct Entry {
    id: String,
    updated: DateTime<Utc>,
    url: String,
    title: String,
    categories: Vec<String>,
    summary: String,
}

#[derive(Serialize)]
struct AtomTemplateData<'a> {
    #[serde(flatten)]
    base: &'a BaseTemplateData<'a>,
    feed: Feed,
}

#[instrument(name = "atom::generate", skip(sources, handlebars))]
pub fn generate(
    sources: &Sources,
    dirs: &Dirs,
    handlebars: &Handlebars,
    file_id: FileId,
) -> anyhow::Result<String> {
    let roots = &sources.treehouse.roots[&file_id];
    let feed_name = roots.attributes.feed.clone().expect("page must be a feed");

    let template_data = AtomTemplateData {
        base: &BaseTemplateData::new(sources),
        feed: Feed {
            name: feed_name,
            // The content cache layer should take care of sampling the current time only once,
            // and then preserving it until the treehouse is deployed again.
            updated: Utc::now(),
            entries: extract_entries(sources, dirs, file_id),
        },
    };

    let _span = info_span!("handlebars::render").entered();
    handlebars
        .render("_feed_atom.hbs", &template_data)
        .context("template rendering failed")
}

pub fn generate_or_error(
    sources: &Sources,
    dirs: &Dirs,
    handlebars: &Handlebars,
    file_id: FileId,
) -> String {
    match generate(sources, dirs, handlebars, file_id) {
        Ok(html) => html,
        Err(error) => format!("error: {error:?}"),
    }
}

fn extract_entries(sources: &Sources, dirs: &Dirs, file_id: FileId) -> Vec<Entry> {
    let roots = &sources.treehouse.roots[&file_id];

    roots
        .branches
        .iter()
        .flat_map(|&branch_id| {
            let branch = sources.treehouse.tree.branch(branch_id);

            let text = &sources.treehouse.source(file_id).input()[branch.content.clone()];
            let parsed = parse_entry(sources, dirs, file_id, jotdown::Parser::new(text));

            let mut summary = String::new();
            branches_to_html_simple(&mut summary, sources, dirs, file_id, &branch.children);

            let updated = branch
                .attributes
                .timestamp()
                .unwrap_or(DateTime::UNIX_EPOCH); // if you see the Unix epoch... oops

            parsed.link.map(|url| Entry {
                id: branch.attributes.id.clone(),
                updated,
                url,
                title: parsed.title.unwrap_or_else(|| "untitled".into()),
                categories: branch.attributes.tags.clone(),
                summary,
            })
        })
        .collect()
}

#[derive(Debug, Clone)]
struct ParsedEntry {
    title: Option<String>,
    link: Option<String>,
}

fn parse_entry(
    sources: &Sources,
    dirs: &Dirs,
    file_id: FileId,
    parser: jotdown::Parser,
) -> ParsedEntry {
    let mut parser = parser.into_offset_iter();
    while let Some((event, span)) = parser.next() {
        if let jotdown::Event::Start(jotdown::Container::Heading { .. }, _attrs) = &event {
            let mut events = vec![(event, span)];
            for (event, span) in parser.by_ref() {
                // To my knowledge headings cannot nest, so it's okay not keeping a stack here.
                let is_heading = matches!(
                    event,
                    jotdown::Event::End(jotdown::Container::Heading { .. })
                );
                events.push((event, span));
                if is_heading {
                    break;
                }
            }

            let title_events: Vec<_> = events
                .iter()
                .filter(|(event, _)| {
                    !matches!(
                        event,
                        // A little repetitive, but I don't mind.
                        // The point of this is not to include extra <h3> and <a> in the link text,
                        // but preserve other formatting such as bold, italic, code, etc.
                        jotdown::Event::Start(
                            jotdown::Container::Link(_, _) | jotdown::Container::Heading { .. },
                            _
                        ) | jotdown::Event::End(
                            jotdown::Container::Link(_, _) | jotdown::Container::Heading { .. }
                        )
                    )
                })
                .cloned()
                .collect();
            let mut title = String::new();
            let _render_diagnostics = djot::Renderer {
                config: &sources.config,
                dirs,
                treehouse: &sources.treehouse,
                file_id,

                // How. Just, stop.
                page_id: "liquidex-you-reeeeeal-dummy".into(),
            }
            .render(&title_events, &mut title);

            let link = events.iter().find_map(|(event, _)| {
                if let jotdown::Event::Start(jotdown::Container::Link(link, link_type), _) = event {
                    Some(link_url(sources, dirs, link, *link_type))
                } else {
                    None
                }
            });

            return ParsedEntry {
                title: (!title.is_empty()).then_some(title),
                link,
            };
        }
    }

    ParsedEntry {
        title: None,
        link: None,
    }
}

fn link_url(sources: &Sources, dirs: &Dirs, url: &str, link_type: jotdown::LinkType) -> String {
    if let jotdown::LinkType::Span(jotdown::SpanLinkType::Unresolved) = link_type {
        if let Some(url) = resolve_link(&sources.config, &sources.treehouse, dirs, url) {
            return url;
        }
    }
    url.to_owned()
}

/// Extremely simple HTML renderer without the treehouse's fancy branch folding and linking features.
fn branches_to_html_simple(
    s: &mut String,
    sources: &Sources,
    dirs: &Dirs,
    file_id: FileId,
    branches: &[SemaBranchId],
) {
    s.push_str("<ul>");
    for &branch_id in branches {
        let branch = sources.treehouse.tree.branch(branch_id);

        s.push_str("<li>");

        let text = &sources.treehouse.source(file_id).input()[branch.content.clone()];
        let events: Vec<_> = jotdown::Parser::new(text).into_offset_iter().collect();
        // Ignore render diagnostics. Those should be reported by the main HTML generator.
        let _render_diagnostics = djot::Renderer {
            config: &sources.config,
            dirs,
            treehouse: &sources.treehouse,
            file_id,

            // Yeah, maybe don't include literate code in summaries...
            page_id: "liquidex-is-a-dummy".into(),
        }
        .render(&events, s);

        if !branch.children.is_empty() {
            branches_to_html_simple(s, sources, dirs, file_id, &branch.children);
        }

        s.push_str("</li>");
    }
    s.push_str("</ul>");
}