From 3e5f17018c459402bdcb6d2b8bd9e6217932d87d Mon Sep 17 00:00:00 2001 From: liquidev Date: Sun, 19 May 2024 13:05:28 +0200 Subject: [PATCH] water closet (actually a word count) --- crates/treehouse/src/cli.rs | 11 ++++++ crates/treehouse/src/cli/wc.rs | 69 ++++++++++++++++++++++++++++++++++ crates/treehouse/src/main.rs | 3 ++ crates/treehouse/src/tree.rs | 2 - 4 files changed, 83 insertions(+), 2 deletions(-) create mode 100644 crates/treehouse/src/cli/wc.rs diff --git a/crates/treehouse/src/cli.rs b/crates/treehouse/src/cli.rs index 31bfe0e..9cf0937 100644 --- a/crates/treehouse/src/cli.rs +++ b/crates/treehouse/src/cli.rs @@ -2,6 +2,7 @@ pub mod fix; pub mod generate; mod parse; pub mod serve; +pub mod wc; use std::path::{Path, PathBuf}; @@ -37,6 +38,9 @@ pub enum Command { serve: ServeArgs, }, + /// Count words in the treehouse's branches. + Wc(#[clap(flatten)] WcArgs), + /// Generates a new ulid and prints it to stdout. Ulid, } @@ -75,6 +79,13 @@ pub struct ServeArgs { pub port: u16, } +#[derive(Args)] +pub struct WcArgs { + /// A list of paths to report the word counts of. + /// If no paths are provided, the entire tree is word-counted. + pub paths: Vec, +} + #[derive(Debug, Clone, Copy)] pub struct Paths<'a> { pub target_dir: &'a Path, diff --git a/crates/treehouse/src/cli/wc.rs b/crates/treehouse/src/cli/wc.rs new file mode 100644 index 0000000..bd9e2cc --- /dev/null +++ b/crates/treehouse/src/cli/wc.rs @@ -0,0 +1,69 @@ +use std::{ffi::OsStr, path::Path}; + +use anyhow::Context; +use treehouse_format::ast::{Branch, Roots}; +use walkdir::WalkDir; + +use crate::{ + cli::parse::parse_tree_with_diagnostics, + state::{Source, Treehouse}, +}; + +use super::WcArgs; + +fn wc_branch(source: &str, branch: &Branch) -> usize { + let word_count = source[branch.content.clone()].split_whitespace().count(); + word_count + + branch + .children + .iter() + .map(|branch| wc_branch(source, branch)) + .sum::() +} + +fn wc_roots(source: &str, roots: &Roots) -> usize { + roots + .branches + .iter() + .map(|branch| wc_branch(source, branch)) + .sum() +} + +pub fn wc_cli(content_dir: &Path, mut wc_args: WcArgs) -> anyhow::Result<()> { + if wc_args.paths.is_empty() { + for entry in WalkDir::new(content_dir) { + let entry = entry?; + if entry.file_type().is_file() && entry.path().extension() == Some(OsStr::new("tree")) { + wc_args.paths.push(entry.into_path()); + } + } + } + + let mut treehouse = Treehouse::new(); + + let mut total = 0; + + for path in &wc_args.paths { + let file = std::fs::read_to_string(path) + .with_context(|| format!("cannot read file to word count: {path:?}"))?; + let path_without_ext = path.with_extension(""); + let utf8_filename = path_without_ext + .strip_prefix(content_dir) + .expect("paths should be rooted within the content directory") + .to_string_lossy(); + + let file_id = treehouse.add_file(utf8_filename.into_owned(), Source::Other(file)); + if let Ok(parsed) = parse_tree_with_diagnostics(&mut treehouse, file_id) { + let source = treehouse.source(file_id); + let word_count = wc_roots(source.input(), &parsed); + println!("{word_count:>8} {}", treehouse.filename(file_id)); + total += word_count; + } + } + + println!("{total:>8} total"); + + treehouse.report_diagnostics()?; + + Ok(()) +} diff --git a/crates/treehouse/src/main.rs b/crates/treehouse/src/main.rs index 7cd81b8..0e380d3 100644 --- a/crates/treehouse/src/main.rs +++ b/crates/treehouse/src/main.rs @@ -5,6 +5,7 @@ use cli::{ fix::{fix_all_cli, fix_file_cli}, generate::regenerate_or_report_error, serve::serve, + wc::wc_cli, Command, Paths, ProgramArgs, }; use log::{error, info, warn}; @@ -50,6 +51,8 @@ async fn fallible_main() -> anyhow::Result<()> { Command::Fix(fix_args) => fix_file_cli(fix_args)?, Command::FixAll(fix_args) => fix_all_cli(fix_args, &paths)?, + Command::Wc(wc_args) => wc_cli(paths.content_dir, wc_args)?, + Command::Ulid => { let mut rng = rand::thread_rng(); let ulid = ulid::Generator::new() diff --git a/crates/treehouse/src/tree.rs b/crates/treehouse/src/tree.rs index fc6fac5..ee11f9a 100644 --- a/crates/treehouse/src/tree.rs +++ b/crates/treehouse/src/tree.rs @@ -144,7 +144,6 @@ pub struct SemaBranch { pub file_id: FileId, pub indent_level: usize, - pub raw_attributes: Option, pub kind: BranchKind, pub kind_span: Range, pub content: Range, @@ -170,7 +169,6 @@ impl SemaBranch { let branch = Self { file_id, indent_level: branch.indent_level, - raw_attributes: branch.attributes, kind: branch.kind, kind_span: branch.kind_span, content: branch.content,