water closet (actually a word count)

This commit is contained in:
liquidex 2024-05-19 13:05:28 +02:00
parent 07d069e2ef
commit 3e5f17018c
4 changed files with 83 additions and 2 deletions

View file

@ -2,6 +2,7 @@ pub mod fix;
pub mod generate;
mod parse;
pub mod serve;
pub mod wc;
use std::path::{Path, PathBuf};
@ -37,6 +38,9 @@ pub enum Command {
serve: ServeArgs,
},
/// Count words in the treehouse's branches.
Wc(#[clap(flatten)] WcArgs),
/// Generates a new ulid and prints it to stdout.
Ulid,
}
@ -75,6 +79,13 @@ pub struct ServeArgs {
pub port: u16,
}
#[derive(Args)]
pub struct WcArgs {
/// A list of paths to report the word counts of.
/// If no paths are provided, the entire tree is word-counted.
pub paths: Vec<PathBuf>,
}
#[derive(Debug, Clone, Copy)]
pub struct Paths<'a> {
pub target_dir: &'a Path,

View file

@ -0,0 +1,69 @@
use std::{ffi::OsStr, path::Path};
use anyhow::Context;
use treehouse_format::ast::{Branch, Roots};
use walkdir::WalkDir;
use crate::{
cli::parse::parse_tree_with_diagnostics,
state::{Source, Treehouse},
};
use super::WcArgs;
fn wc_branch(source: &str, branch: &Branch) -> usize {
let word_count = source[branch.content.clone()].split_whitespace().count();
word_count
+ branch
.children
.iter()
.map(|branch| wc_branch(source, branch))
.sum::<usize>()
}
fn wc_roots(source: &str, roots: &Roots) -> usize {
roots
.branches
.iter()
.map(|branch| wc_branch(source, branch))
.sum()
}
pub fn wc_cli(content_dir: &Path, mut wc_args: WcArgs) -> anyhow::Result<()> {
if wc_args.paths.is_empty() {
for entry in WalkDir::new(content_dir) {
let entry = entry?;
if entry.file_type().is_file() && entry.path().extension() == Some(OsStr::new("tree")) {
wc_args.paths.push(entry.into_path());
}
}
}
let mut treehouse = Treehouse::new();
let mut total = 0;
for path in &wc_args.paths {
let file = std::fs::read_to_string(path)
.with_context(|| format!("cannot read file to word count: {path:?}"))?;
let path_without_ext = path.with_extension("");
let utf8_filename = path_without_ext
.strip_prefix(content_dir)
.expect("paths should be rooted within the content directory")
.to_string_lossy();
let file_id = treehouse.add_file(utf8_filename.into_owned(), Source::Other(file));
if let Ok(parsed) = parse_tree_with_diagnostics(&mut treehouse, file_id) {
let source = treehouse.source(file_id);
let word_count = wc_roots(source.input(), &parsed);
println!("{word_count:>8} {}", treehouse.filename(file_id));
total += word_count;
}
}
println!("{total:>8} total");
treehouse.report_diagnostics()?;
Ok(())
}

View file

@ -5,6 +5,7 @@ use cli::{
fix::{fix_all_cli, fix_file_cli},
generate::regenerate_or_report_error,
serve::serve,
wc::wc_cli,
Command, Paths, ProgramArgs,
};
use log::{error, info, warn};
@ -50,6 +51,8 @@ async fn fallible_main() -> anyhow::Result<()> {
Command::Fix(fix_args) => fix_file_cli(fix_args)?,
Command::FixAll(fix_args) => fix_all_cli(fix_args, &paths)?,
Command::Wc(wc_args) => wc_cli(paths.content_dir, wc_args)?,
Command::Ulid => {
let mut rng = rand::thread_rng();
let ulid = ulid::Generator::new()

View file

@ -144,7 +144,6 @@ pub struct SemaBranch {
pub file_id: FileId,
pub indent_level: usize,
pub raw_attributes: Option<treehouse_format::pull::Attributes>,
pub kind: BranchKind,
pub kind_span: Range<usize>,
pub content: Range<usize>,
@ -170,7 +169,6 @@ impl SemaBranch {
let branch = Self {
file_id,
indent_level: branch.indent_level,
raw_attributes: branch.attributes,
kind: branch.kind,
kind_span: branch.kind_span,
content: branch.content,