From 1e1b8df4575cf7477c429b0c26cc9b66d3fef6bd Mon Sep 17 00:00:00 2001 From: liquidev Date: Fri, 8 Nov 2024 14:52:32 +0100 Subject: [PATCH] refactor: introduce virtual file system as a central router for source and target data --- content/treehouse/vfs.tree | 17 ++ crates/treehouse/src/generate.rs | 3 +- crates/treehouse/src/lib.rs | 14 ++ crates/treehouse/src/main.rs | 20 +-- crates/treehouse/src/vfs.rs | 151 ++++++++++++++++++ crates/treehouse/src/vfs/empty.rs | 17 ++ crates/treehouse/src/vfs/file.rs | 30 ++++ crates/treehouse/src/vfs/mount_points.rs | 92 +++++++++++ crates/treehouse/src/vfs/physical.rs | 78 +++++++++ crates/treehouse/tests/it/main.rs | 1 + crates/treehouse/tests/it/vfs.rs | 4 + crates/treehouse/tests/it/vfs/empty.rs | 16 ++ crates/treehouse/tests/it/vfs/file.rs | 29 ++++ crates/treehouse/tests/it/vfs/mount_points.rs | 50 ++++++ crates/treehouse/tests/it/vfs/physical.rs | 37 +++++ .../treehouse/tests/it/vfs_physical/test.txt | 1 + 16 files changed, 541 insertions(+), 19 deletions(-) create mode 100644 content/treehouse/vfs.tree create mode 100644 crates/treehouse/src/lib.rs create mode 100644 crates/treehouse/src/vfs.rs create mode 100644 crates/treehouse/src/vfs/empty.rs create mode 100644 crates/treehouse/src/vfs/file.rs create mode 100644 crates/treehouse/src/vfs/mount_points.rs create mode 100644 crates/treehouse/src/vfs/physical.rs create mode 100644 crates/treehouse/tests/it/main.rs create mode 100644 crates/treehouse/tests/it/vfs.rs create mode 100644 crates/treehouse/tests/it/vfs/empty.rs create mode 100644 crates/treehouse/tests/it/vfs/file.rs create mode 100644 crates/treehouse/tests/it/vfs/mount_points.rs create mode 100644 crates/treehouse/tests/it/vfs/physical.rs create mode 100644 crates/treehouse/tests/it/vfs_physical/test.txt diff --git a/content/treehouse/vfs.tree b/content/treehouse/vfs.tree new file mode 100644 index 0000000..e4f9a31 --- /dev/null +++ b/content/treehouse/vfs.tree @@ -0,0 +1,17 @@ +%% title = "treehouse virtual file system design" + +- notes on the design; this is not an actual listing of the virtual file system + +- `content` - `GitDir(".", "content")` + + - `GitDir` is a special filesystem which makes all files have subpaths with commit data sourced from git. + their entries are ordered by how new/old a commit is + + - `inner/` - contains the file content and a revision info fork + + - `inner/latest` - same but for the latest revision, if applicable. + this may be the working tree + +- `template` - `PhysicalDir("template")` + +- `static` - `PhysicalDir("static")` diff --git a/crates/treehouse/src/generate.rs b/crates/treehouse/src/generate.rs index ad197bc..774c040 100644 --- a/crates/treehouse/src/generate.rs +++ b/crates/treehouse/src/generate.rs @@ -17,6 +17,7 @@ use serde::Serialize; use walkdir::WalkDir; use crate::{ + cli::Paths, config::{Config, ConfigDerivedData}, fun::seasons::Season, history::History, @@ -35,8 +36,6 @@ use crate::{ use crate::state::{FileId, Treehouse}; -use super::Paths; - #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum LatestRevision { /// The working tree is treated as the latest revision. diff --git a/crates/treehouse/src/lib.rs b/crates/treehouse/src/lib.rs new file mode 100644 index 0000000..68fde5a --- /dev/null +++ b/crates/treehouse/src/lib.rs @@ -0,0 +1,14 @@ +pub mod cli; +pub mod config; +pub mod fun; +pub mod generate; +pub mod history; +pub mod html; +pub mod import_map; +pub mod include_static; +pub mod parse; +pub mod paths; +pub mod state; +pub mod static_urls; +pub mod tree; +pub mod vfs; diff --git a/crates/treehouse/src/main.rs b/crates/treehouse/src/main.rs index f38a216..eba04d1 100644 --- a/crates/treehouse/src/main.rs +++ b/crates/treehouse/src/main.rs @@ -1,28 +1,14 @@ use std::path::Path; use clap::Parser; -use cli::{ +use log::{error, info, warn}; +use treehouse::cli::{ fix::{fix_all_cli, fix_file_cli}, serve::serve, wc::wc_cli, Command, Paths, ProgramArgs, }; -use generate::{regenerate_or_report_error, LatestRevision}; -use log::{error, info, warn}; - -mod cli; -mod config; -mod fun; -mod generate; -mod history; -mod html; -mod import_map; -mod include_static; -mod parse; -mod paths; -mod state; -mod static_urls; -mod tree; +use treehouse::generate::{regenerate_or_report_error, LatestRevision}; async fn fallible_main() -> anyhow::Result<()> { let args = ProgramArgs::parse(); diff --git a/crates/treehouse/src/vfs.rs b/crates/treehouse/src/vfs.rs new file mode 100644 index 0000000..657093c --- /dev/null +++ b/crates/treehouse/src/vfs.rs @@ -0,0 +1,151 @@ +use std::{borrow::Borrow, fmt, ops::Deref}; + +use anyhow::ensure; + +pub mod empty; +pub mod file; +pub mod mount_points; +pub mod physical; + +#[derive(PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct VPath { + path: str, +} + +impl VPath { + pub const SEPARATOR: char = '/'; + pub const ROOT: &Self = unsafe { Self::new_unchecked("") }; + + pub fn try_new(s: &str) -> anyhow::Result<&Self> { + ensure!( + !s.ends_with(Self::SEPARATOR), + "path must not end with '{}'", + Self::SEPARATOR + ); + ensure!( + !s.starts_with(Self::SEPARATOR), + "paths are always absolute and must not start with '{}'", + Self::SEPARATOR + ); + + Ok(unsafe { Self::new_unchecked(s) }) + } + + pub fn new(s: &str) -> &Self { + Self::try_new(s).expect("invalid path") + } + + const unsafe fn new_unchecked(s: &str) -> &Self { + std::mem::transmute::<_, &Self>(s) + } + + pub fn try_join(&self, sub: &str) -> anyhow::Result { + let mut buf = VPathBuf::from(self); + let sub = VPath::try_new(sub)?; + buf.path.push_str(&sub.path); + Ok(buf) + } + + pub fn join(&self, sub: &str) -> VPathBuf { + self.try_join(sub).expect("invalid subpath") + } + + pub fn strip_prefix(&self, prefix: &VPath) -> Option<&Self> { + self.path + .strip_prefix(&prefix.path) + .and_then(|p| p.strip_prefix('/')) + // SAFETY: If `self` starts with `prefix`, `p` will end up not being prefixed by `self` + // nor a leading slash. + .map(|p| unsafe { VPath::new_unchecked(p) }) + } + + pub fn depth(&self) -> usize { + self.path.chars().filter(|&c| c == Self::SEPARATOR).count() + } + + pub fn as_str(&self) -> &str { + &self.path + } +} + +impl ToOwned for VPath { + type Owned = VPathBuf; + + fn to_owned(&self) -> Self::Owned { + VPathBuf::from(self) + } +} + +impl fmt::Debug for VPath { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.path) + } +} + +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct VPathBuf { + path: String, +} + +impl VPathBuf { + pub fn new(path: impl Into) -> Self { + Self::try_new(path).expect("invalid path") + } + + pub fn try_new(path: impl Into) -> anyhow::Result { + let path = path.into(); + match VPath::try_new(&path) { + Ok(_) => Ok(Self { path }), + Err(e) => Err(e), + } + } + + unsafe fn new_unchecked(path: String) -> Self { + Self { path } + } +} + +impl Deref for VPathBuf { + type Target = VPath; + + fn deref(&self) -> &Self::Target { + unsafe { VPath::new_unchecked(&self.path) } + } +} + +impl fmt::Debug for VPathBuf { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_str(&self.path) + } +} + +impl From<&VPath> for VPathBuf { + fn from(value: &VPath) -> Self { + unsafe { Self::new_unchecked(value.path.to_owned()) } + } +} + +impl Borrow for VPathBuf { + fn borrow(&self) -> &VPath { + self + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct DirEntry { + pub path: VPathBuf, +} + +pub trait ReadFilesystem { + /// List all files under the provided path. + fn dir(&self, path: &VPath) -> Vec; + + /// Get a string signifying the current version of the provided path's content. + /// If the content changes, the version must also change. + /// + /// Returns None if there is no content or no version string is available. + fn content_version(&self, path: &VPath) -> Option; + + /// Return the byte content of the entry at the given path. + fn content(&self, path: &VPath) -> Option>; +} diff --git a/crates/treehouse/src/vfs/empty.rs b/crates/treehouse/src/vfs/empty.rs new file mode 100644 index 0000000..f738900 --- /dev/null +++ b/crates/treehouse/src/vfs/empty.rs @@ -0,0 +1,17 @@ +use super::{DirEntry, ReadFilesystem, VPath}; + +pub struct EmptyFilesystem; + +impl ReadFilesystem for EmptyFilesystem { + fn dir(&self, path: &VPath) -> Vec { + vec![] + } + + fn content_version(&self, path: &VPath) -> Option { + None + } + + fn content(&self, path: &VPath) -> Option> { + None + } +} diff --git a/crates/treehouse/src/vfs/file.rs b/crates/treehouse/src/vfs/file.rs new file mode 100644 index 0000000..af4f79d --- /dev/null +++ b/crates/treehouse/src/vfs/file.rs @@ -0,0 +1,30 @@ +use super::{DirEntry, ReadFilesystem, VPath}; + +pub struct BufferedFile { + pub content: Vec, +} + +impl BufferedFile { + pub fn new(content: Vec) -> Self { + Self { content } + } +} + +impl ReadFilesystem for BufferedFile { + fn dir(&self, _path: &VPath) -> Vec { + vec![] + } + + fn content_version(&self, _path: &VPath) -> Option { + // TODO: StaticFile should _probably_ calculate a content_version. + None + } + + fn content(&self, path: &VPath) -> Option> { + if path == VPath::ROOT { + Some(self.content.clone()) + } else { + None + } + } +} diff --git a/crates/treehouse/src/vfs/mount_points.rs b/crates/treehouse/src/vfs/mount_points.rs new file mode 100644 index 0000000..7950f29 --- /dev/null +++ b/crates/treehouse/src/vfs/mount_points.rs @@ -0,0 +1,92 @@ +use std::collections::HashMap; + +use super::{DirEntry, ReadFilesystem, VPath, VPathBuf}; + +pub struct MountPoints { + mount_points: HashMap>, +} + +enum Resolved<'fs, 'path> { + Root, + MountPoint { + fs: &'fs dyn ReadFilesystem, + subpath: &'path VPath, + }, + None, +} + +impl MountPoints { + pub fn new() -> Self { + Self { + mount_points: HashMap::new(), + } + } + + pub fn add(&mut self, path: &VPath, fs: Box) { + assert_eq!( + path.depth(), 0, + "path must be situated at root. MountPoints does not support nested paths, but you can nest MountPoints within other MountPoints" + ); + + assert!( + self.mount_points + .insert(path.as_str().to_owned(), fs) + .is_none(), + "duplicate mount point at {path:?}" + ); + } + + fn resolve<'fs, 'path>(&'fs self, path: &'path VPath) -> Resolved<'fs, 'path> { + if path == VPath::ROOT { + return Resolved::Root; + } else { + let mount_point_name = path.as_str().split(VPath::SEPARATOR).next().unwrap(); + if let Some(mount_point) = self.mount_points.get(mount_point_name) { + return Resolved::MountPoint { + fs: &**mount_point, + subpath: path + .strip_prefix(VPath::new(mount_point_name)) + .expect("path should have `mount_point_name` as its prefix"), + }; + } + } + + Resolved::None + } +} + +impl Default for MountPoints { + fn default() -> Self { + Self::new() + } +} + +impl ReadFilesystem for MountPoints { + fn dir(&self, path: &VPath) -> Vec { + match self.resolve(path) { + Resolved::Root => self + .mount_points + .keys() + .map(|name| DirEntry { + path: VPathBuf::new(name), + }) + .collect(), + Resolved::MountPoint { fs, subpath } => fs.dir(subpath), + Resolved::None => vec![], + } + } + + fn content_version(&self, path: &VPath) -> Option { + match self.resolve(path) { + Resolved::MountPoint { fs, subpath } => fs.content_version(subpath), + Resolved::Root | Resolved::None => None, + } + } + + fn content(&self, path: &VPath) -> Option> { + match self.resolve(path) { + Resolved::MountPoint { fs, subpath } => fs.content(subpath), + Resolved::Root | Resolved::None => None, + } + } +} diff --git a/crates/treehouse/src/vfs/physical.rs b/crates/treehouse/src/vfs/physical.rs new file mode 100644 index 0000000..64c13f2 --- /dev/null +++ b/crates/treehouse/src/vfs/physical.rs @@ -0,0 +1,78 @@ +use std::path::{Path, PathBuf}; + +use log::error; + +use super::{DirEntry, ReadFilesystem, VPath, VPathBuf}; + +#[derive(Debug, Clone)] +pub struct PhysicalDir { + root: PathBuf, +} + +impl PhysicalDir { + pub fn new(root: PathBuf) -> Self { + Self { root } + } +} + +impl ReadFilesystem for PhysicalDir { + fn dir(&self, vpath: &VPath) -> Vec { + let physical = self.root.join(physical_path(vpath)); + match std::fs::read_dir(physical) { + Ok(read_dir) => read_dir + .filter_map(|entry| { + entry + .inspect_err(|err| { + error!( + "PhysicalDir {:?} error while reading entries in vpath {vpath:?}: {err:?}", + self.root + ) + }) + .ok() + .and_then(|entry| { + let path = entry.path(); + let path_str = match path.strip_prefix(&self.root).unwrap_or(&path).to_str() { + Some(p) => p, + None => { + error!("PhysicalDir {:?} entry {path:?} has invalid UTF-8 (while reading vpath {vpath:?})", self.root); + return None; + }, + }; + let vpath_buf = VPathBuf::try_new(path_str.replace('\\', "/")) + .inspect_err(|err| { + error!("PhysicalDir {:?} error with vpath for {path_str:?}: {err:?}", self.root); + }) + .ok()?; + Some(DirEntry { path: vpath_buf }) + }) + }) + .collect(), + Err(err) => { + error!( + "PhysicalDir {:?} cannot read vpath {vpath:?}: {err:?}", + self.root + ); + vec![] + } + } + } + + fn content_version(&self, _path: &VPath) -> Option { + None + } + + fn content(&self, path: &VPath) -> Option> { + std::fs::read(self.root.join(physical_path(path))) + .inspect_err(|err| { + error!( + "PhysicalDir {:?} cannot read file at vpath {path:?}: {err:?}", + self.root + ) + }) + .ok() + } +} + +fn physical_path(path: &VPath) -> &Path { + Path::new(path.as_str()) +} diff --git a/crates/treehouse/tests/it/main.rs b/crates/treehouse/tests/it/main.rs new file mode 100644 index 0000000..03a9976 --- /dev/null +++ b/crates/treehouse/tests/it/main.rs @@ -0,0 +1 @@ +mod vfs; diff --git a/crates/treehouse/tests/it/vfs.rs b/crates/treehouse/tests/it/vfs.rs new file mode 100644 index 0000000..6c6651c --- /dev/null +++ b/crates/treehouse/tests/it/vfs.rs @@ -0,0 +1,4 @@ +mod empty; +mod file; +mod mount_points; +mod physical; diff --git a/crates/treehouse/tests/it/vfs/empty.rs b/crates/treehouse/tests/it/vfs/empty.rs new file mode 100644 index 0000000..48205d1 --- /dev/null +++ b/crates/treehouse/tests/it/vfs/empty.rs @@ -0,0 +1,16 @@ +use treehouse::vfs::{empty::EmptyFilesystem, ReadFilesystem, VPath}; + +#[test] +fn dir() { + assert!(EmptyFilesystem.dir(VPath::ROOT).is_empty()); +} + +#[test] +fn content_version() { + assert!(EmptyFilesystem.content_version(VPath::ROOT).is_none()); +} + +#[test] +fn content() { + assert!(EmptyFilesystem.content(VPath::ROOT).is_none()); +} diff --git a/crates/treehouse/tests/it/vfs/file.rs b/crates/treehouse/tests/it/vfs/file.rs new file mode 100644 index 0000000..8eeee4c --- /dev/null +++ b/crates/treehouse/tests/it/vfs/file.rs @@ -0,0 +1,29 @@ +use treehouse::vfs::{file::BufferedFile, ReadFilesystem, VPath}; + +fn vfs() -> BufferedFile { + BufferedFile::new(b"hewwo :3".to_vec()) +} + +#[test] +fn dir() { + let vfs = vfs(); + assert!(vfs.dir(VPath::ROOT).is_empty()); +} + +#[test] +fn content_version() { + let vfs = vfs(); + assert!( + vfs.content_version(VPath::ROOT).is_none(), + "content_version is not implemented for BufferedFile for now" + ); +} + +#[test] +fn content() { + let vfs = vfs(); + assert_eq!( + vfs.content(VPath::ROOT).as_deref(), + Some(b"hewwo :3".as_slice()), + ); +} diff --git a/crates/treehouse/tests/it/vfs/mount_points.rs b/crates/treehouse/tests/it/vfs/mount_points.rs new file mode 100644 index 0000000..ca6747f --- /dev/null +++ b/crates/treehouse/tests/it/vfs/mount_points.rs @@ -0,0 +1,50 @@ +use std::path::Path; + +use treehouse::vfs::{ + file::BufferedFile, mount_points::MountPoints, physical::PhysicalDir, DirEntry, ReadFilesystem, + VPath, VPathBuf, +}; + +fn vfs() -> MountPoints { + let file1 = BufferedFile::new(b"hewwo :3".to_vec()); + let file2 = BufferedFile::new(b"fwoofee -w-".to_vec()); + let file3 = BufferedFile::new(b"boop >w<".to_vec()); + + let mut inner = MountPoints::new(); + inner.add(VPath::new("file3.txt"), Box::new(file3)); + + let mut vfs = MountPoints::new(); + vfs.add(VPath::new("file1.txt"), Box::new(file1)); + vfs.add(VPath::new("file2.txt"), Box::new(file2)); + vfs.add(VPath::new("inner"), Box::new(inner)); + vfs +} + +#[test] +fn dir() { + let vfs = vfs(); + + assert_eq!( + vfs.dir(VPath::new("")), + vec![ + DirEntry { + path: VPathBuf::new("file1.txt"), + }, + DirEntry { + path: VPathBuf::new("file2.txt"), + }, + DirEntry { + path: VPathBuf::new("inner"), + } + ] + ); + + assert!(vfs.dir(VPath::new("file1.txt")).is_empty()); + assert!(vfs.dir(VPath::new("file2.txt")).is_empty()); + assert_eq!( + vfs.dir(VPath::new("inner")), + vec![DirEntry { + path: VPathBuf::new("file3.txt") + }] + ); +} diff --git a/crates/treehouse/tests/it/vfs/physical.rs b/crates/treehouse/tests/it/vfs/physical.rs new file mode 100644 index 0000000..6c6f857 --- /dev/null +++ b/crates/treehouse/tests/it/vfs/physical.rs @@ -0,0 +1,37 @@ +use std::path::Path; + +use treehouse::vfs::{physical::PhysicalDir, DirEntry, ReadFilesystem, VPath, VPathBuf}; + +fn vfs() -> PhysicalDir { + let root = Path::new("tests/it/vfs_physical").to_path_buf(); + PhysicalDir::new(root) +} + +#[test] +fn dir() { + let vfs = vfs(); + let dir = vfs.dir(VPath::ROOT); + assert_eq!( + &dir[..], + &[DirEntry { + path: VPathBuf::new("test.txt"), + }] + ); +} + +#[test] +fn content_version() { + let vfs = vfs(); + let content_version = vfs.content_version(VPath::new("test.txt")); + assert_eq!( + content_version, None, + "content_version remains unimplemented for now" + ); +} + +#[test] +fn content() { + let vfs = vfs(); + let content = vfs.content(VPath::new("test.txt")); + assert_eq!(content.as_deref(), Some(b"hewwo :3\n".as_slice())); +} diff --git a/crates/treehouse/tests/it/vfs_physical/test.txt b/crates/treehouse/tests/it/vfs_physical/test.txt new file mode 100644 index 0000000..73dca14 --- /dev/null +++ b/crates/treehouse/tests/it/vfs_physical/test.txt @@ -0,0 +1 @@ +hewwo :3