From 65807c40cd874514e97392e35d25b788b1a706d8 Mon Sep 17 00:00:00 2001 From: Ariel Davis Date: Wed, 31 Jan 2024 17:15:14 -0800 Subject: [PATCH] Add str process --- README.md | 4 ++ crates/str-process/Cargo.toml | 16 +++++ crates/str-process/src/lib.rs | 117 ++++++++++++++++++++++++++++++++++ 3 files changed, 137 insertions(+) create mode 100644 crates/str-process/Cargo.toml create mode 100644 crates/str-process/src/lib.rs diff --git a/README.md b/README.md index 158879d..894f962 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,10 @@ Turn `PascalCase` into `snake_case` and vice versa. Determine whether a sequence of patterns is exhaustive or if any of the patterns are unreachable. +## `str-process` + +Process a string by each byte. + ## `syntax-gen` Generates Rust code from an [ungrammar][]. diff --git a/crates/str-process/Cargo.toml b/crates/str-process/Cargo.toml new file mode 100644 index 0000000..711687a --- /dev/null +++ b/crates/str-process/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "str-process" +version.workspace = true +edition.workspace = true +license.workspace = true +publish.workspace = true + +[lints] +workspace = true + +[lib] +test = false +doctest = false + +[dependencies] +drop_bomb.workspace = true diff --git a/crates/str-process/src/lib.rs b/crates/str-process/src/lib.rs new file mode 100644 index 0000000..7ae7608 --- /dev/null +++ b/crates/str-process/src/lib.rs @@ -0,0 +1,117 @@ +//! Process a string by each byte. + +use drop_bomb::DebugDropBomb; + +/// The state for processing a string. +#[derive(Debug, Default)] +pub struct St<'a> { + s: &'a str, + idx: usize, +} + +impl<'a> St<'a> { + /// Returns a new state for the string. + #[must_use] + pub fn new(s: &'a str) -> St<'a> { + St { s, idx: 0 } + } + + /// Returns the current byte. + #[must_use] + pub fn cur(&self) -> Option { + self.s.as_bytes().get(self.idx).copied() + } + + /// Returns the current byte index. Use this for marking where errors occur in the string. + #[must_use] + pub fn cur_idx(&self) -> usize { + self.idx + } + + /// Advances the index by 1. + pub fn bump(&mut self) { + self.idx += 1; + } + + /// Advances the index while `cond` holds true. + pub fn bump_while(&mut self, mut cond: F) + where + F: FnMut(u8) -> bool, + { + while let Some(b) = self.cur() { + if cond(b) { + self.bump(); + } else { + break; + } + } + } + + /// Returns a marker that must be consumed later. + #[must_use] + pub fn mark(&self) -> Marker { + Marker { bomb: DebugDropBomb::new("must be passed to a `St` method"), idx: self.idx } + } + + /// Returns a non-empty slice since the marker. + /// + /// # Panics + /// + /// If it would return an empty slice. + #[must_use] + pub fn non_empty_since(&self, m: Marker) -> &'a [u8] { + let start = m.idx; + assert!(self.did_bump_since(m)); + &self.s.as_bytes()[start..self.idx] + } + + /// Returns the slice since the marker. + /// + /// NOTE: allowed to return an empty slice. + #[must_use] + pub fn since(&self, mut m: Marker) -> &'a [u8] { + let start = m.idx; + m.bomb.defuse(); + &self.s.as_bytes()[start..self.idx] + } + + /// Returns whether the state was bumped since the marker. + #[must_use] + pub fn did_bump_since(&self, mut m: Marker) -> bool { + m.bomb.defuse(); + self.idx > m.idx + } + + /// If the next few bytes of the string are equal to prefix, advance by that much and return true. + /// Else return false. + pub fn eat_prefix(&mut self, prefix: &[u8]) -> bool { + let end = self.idx + prefix.len(); + if self.s.as_bytes().get(self.idx..end).is_some_and(|bs| bs == prefix) { + self.idx = end; + true + } else { + false + } + } + + /// Advances the index to the next char boundary. + pub fn next_str(&mut self) { + self.bump(); + loop { + if self.s.is_char_boundary(self.idx) { + break; + } + match self.cur() { + Some(_) => self.bump(), + None => unreachable!("got to the end without a valid str"), + } + } + } +} + +/// A marker for the current position. +#[derive(Debug)] +pub struct Marker { + bomb: DebugDropBomb, + idx: usize, +}