Merge #167: decode: Add accessors

2fae3aa Add UncheckedHrpstring::remove_witness_version function (Tobin C. Harding) 0ee999d Add UncheckHrpstring::witness_version function (Tobin C. Harding) b91207c Add ascii accessor methods (Tobin C. Harding) 045b50a decode: Rename data field (Tobin C. Harding) Pull request description: This is a sexy little PR right here. Add an `ascii` accessor method to the `UncheckedHrpstring` and `CheckedHrpstring` types. - Patch 1 is preparation, renames the `data` field. - Patch 2 is the meat and potatoes. Fix: #160 ACKs for top commit: apoelstra: ACK 2fae3aa Tree-SHA512: ce706db35d1119d3a240ffdfc11c4205b7a6052eed57019fa730ccf4643b78b5058a067b112b7bdd0e8fecf5108fe2e0ef6d2c072caeac08a08b461256318cf8
rust-bitcoin · Jan 11, 2024 · cb2f61e · cb2f61e
2 parents 84b2c50 + 2fae3aa
commit cb2f61e
Showing 1 changed file with 154 additions and 32 deletions.
diff --git a/src/primitives/decode.rs b/src/primitives/decode.rs
@@ -117,8 +117,8 @@ pub struct UncheckedHrpstring<'s> {
     hrp: Hrp,
     /// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters.
     ///
-    /// Contains the checksum if one was present in the parsed string.
-    data: &'s [u8],
+    /// The characters after the separator i.e., the "data part" defined by BIP-173.
+    data_part_ascii: &'s [u8],
     /// The length of the parsed hrpstring.
     hrpstring_length: usize,
 }
@@ -130,11 +130,11 @@ impl<'s> UncheckedHrpstring<'s> {
     #[inline]
     pub fn new(s: &'s str) -> Result<Self, UncheckedHrpstringError> {
         let sep_pos = check_characters(s)?;
-        let (hrp, data) = s.split_at(sep_pos);
+        let (hrp, rest) = s.split_at(sep_pos);
 
         let ret = UncheckedHrpstring {
             hrp: Hrp::parse(hrp)?,
-            data: data[1..].as_bytes(), // Skip the separator.
+            data_part_ascii: rest[1..].as_bytes(), // Skip the separator.
             hrpstring_length: s.len(),
         };
 
@@ -145,6 +145,85 @@ impl<'s> UncheckedHrpstring<'s> {
     #[inline]
     pub fn hrp(&self) -> Hrp { self.hrp }
 
+    /// Returns the data part as ASCII bytes i.e., everything after the separator '1'.
+    ///
+    /// The byte values are guaranteed to be valid bech32 characters. Includes the checksum
+    /// if one was present in the parsed string.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bech32::primitives::decode::UncheckedHrpstring;
+    ///
+    /// let addr = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
+    /// let ascii = "qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
+    ///
+    /// let unchecked = UncheckedHrpstring::new(&addr).unwrap();
+    /// assert!(unchecked.data_part_ascii().iter().eq(ascii.as_bytes().iter()))
+    /// ```
+    #[inline]
+    pub fn data_part_ascii(&self) -> &[u8] { self.data_part_ascii }
+
+    /// Attempts to remove the first byte of the data part, treating it as a witness version.
+    ///
+    /// If [`Self::witness_version`] succeeds this function removes the first character (witness
+    /// version byte) from the internal ASCII data part buffer. Future calls to
+    /// [`Self::data_part_ascii`] will no longer include it.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bech32::{primitives::decode::UncheckedHrpstring, Fe32};
+    ///
+    /// let addr = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
+    /// let ascii = "ar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
+    ///
+    /// let mut unchecked = UncheckedHrpstring::new(&addr).unwrap();
+    /// let witness_version = unchecked.remove_witness_version().unwrap();
+    /// assert_eq!(witness_version, Fe32::Q);
+    /// assert!(unchecked.data_part_ascii().iter().eq(ascii.as_bytes().iter()))
+    /// ```
+    #[inline]
+    pub fn remove_witness_version(&mut self) -> Option<Fe32> {
+        self.witness_version().map(|witver| {
+            self.data_part_ascii = &self.data_part_ascii[1..]; // Remove the witness version byte.
+            witver
+        })
+    }
+
+    /// Returns the segwit witness version if there is one.
+    ///
+    /// Attempts to convert the first character of the data part to a witness version. If this
+    /// succeeds, and it is a valid version (0..16 inclusive) we return it, otherwise `None`.
+    ///
+    /// This function makes no guarantees on the validity of the checksum.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bech32::{primitives::decode::UncheckedHrpstring, Fe32};
+    ///
+    /// // Note the invalid checksum!
+    /// let addr = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzffffff";
+    ///
+    /// let unchecked = UncheckedHrpstring::new(&addr).unwrap();
+    /// assert_eq!(unchecked.witness_version(), Some(Fe32::Q));
+    /// ```
+    #[inline]
+    pub fn witness_version(&self) -> Option<Fe32> {
+        let data_part = self.data_part_ascii();
+        if data_part.is_empty() {
+            return None;
+        }
+
+        // unwrap ok because we know we gave valid bech32 characters.
+        let witness_version = Fe32::from_char(data_part[0].into()).unwrap();
+        if witness_version.to_u8() > 16 {
+            return None;
+        }
+        Some(witness_version)
+    }
+
     /// Validates that data has a valid checksum for the `Ck` algorithm and returns a [`CheckedHrpstring`].
     #[inline]
     pub fn validate_and_remove_checksum<Ck: Checksum>(
@@ -183,15 +262,15 @@ impl<'s> UncheckedHrpstring<'s> {
             return Ok(());
         }
 
-        if self.data.len() < Ck::CHECKSUM_LENGTH {
+        if self.data_part_ascii.len() < Ck::CHECKSUM_LENGTH {
             return Err(InvalidLength);
         }
 
         let mut checksum_eng = checksum::Engine::<Ck>::new();
         checksum_eng.input_hrp(self.hrp());
 
         // Unwrap ok since we checked all characters in our constructor.
-        for fe in self.data.iter().map(|&b| Fe32::from_char_unchecked(b)) {
+        for fe in self.data_part_ascii.iter().map(|&b| Fe32::from_char_unchecked(b)) {
             checksum_eng.input_fe(fe);
         }
 
@@ -213,20 +292,20 @@ impl<'s> UncheckedHrpstring<'s> {
     /// May panic if data is not valid.
     #[inline]
     pub fn remove_checksum<Ck: Checksum>(self) -> CheckedHrpstring<'s> {
-        let data_len = self.data.len() - Ck::CHECKSUM_LENGTH;
+        let end = self.data_part_ascii.len() - Ck::CHECKSUM_LENGTH;
 
         CheckedHrpstring {
             hrp: self.hrp(),
-            data: &self.data[..data_len],
+            ascii: &self.data_part_ascii[..end],
             hrpstring_length: self.hrpstring_length,
         }
     }
 }
 
 /// An HRP string that has been parsed and had the checksum validated.
 ///
-/// This type does not treat the first byte of the data in any special way i.e., as the witness
-/// version byte. If you are parsing Bitcoin segwit addresses you likely want to use [`SegwitHrpstring`].
+/// This type does not treat the first byte of the data part in any special way i.e., as the witness
+/// version byte. If you are parsing Bitcoin segwit addresses consider using [`SegwitHrpstring`].
 ///
 /// > We first describe the general checksummed base32 format called Bech32 and then
 /// > define Segregated Witness addresses using it.
@@ -250,9 +329,10 @@ impl<'s> UncheckedHrpstring<'s> {
 pub struct CheckedHrpstring<'s> {
     /// The human-readable part, guaranteed to be lowercase ASCII characters.
     hrp: Hrp,
-    /// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters,
-    /// with the checksum removed.
-    data: &'s [u8],
+    /// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters.
+    ///
+    /// The characters after the '1' separator and the before the checksum.
+    ascii: &'s [u8],
     /// The length of the parsed hrpstring.
     hrpstring_length: usize, // Guaranteed to be <= CK::CODE_LENGTH
 }
@@ -274,19 +354,38 @@ impl<'s> CheckedHrpstring<'s> {
     #[inline]
     pub fn hrp(&self) -> Hrp { self.hrp }
 
+    /// Returns a partial slice of the data part, as ASCII bytes, everything after the separator '1'
+    /// before the checksum.
+    ///
+    /// The byte values are guaranteed to be valid bech32 characters.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bech32::{Bech32, primitives::decode::CheckedHrpstring};
+    ///
+    /// let addr = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
+    /// let ascii = "qar0srrr7xfkvy5l643lydnw9re59gtzz";
+    ///
+    /// let checked = CheckedHrpstring::new::<Bech32>(&addr).unwrap();
+    /// assert!(checked.data_part_ascii_no_checksum().iter().eq(ascii.as_bytes().iter()))
+    /// ```
+    #[inline]
+    pub fn data_part_ascii_no_checksum(&self) -> &[u8] { self.ascii }
+
     /// Returns an iterator that yields the data part of the parsed bech32 encoded string.
     ///
     /// Converts the ASCII bytes representing field elements to the respective field elements, then
     /// converts the stream of field elements to a stream of bytes.
     #[inline]
     pub fn byte_iter(&self) -> ByteIter {
-        ByteIter { iter: AsciiToFe32Iter { iter: self.data.iter().copied() }.fes_to_bytes() }
+        ByteIter { iter: AsciiToFe32Iter { iter: self.ascii.iter().copied() }.fes_to_bytes() }
     }
 
     /// Converts this type to a [`SegwitHrpstring`] after validating the witness and HRP.
     #[inline]
     pub fn validate_segwit(mut self) -> Result<SegwitHrpstring<'s>, SegwitHrpstringError> {
-        if self.data.is_empty() {
+        if self.ascii.is_empty() {
             return Err(SegwitHrpstringError::NoData);
         }
 
@@ -295,28 +394,28 @@ impl<'s> CheckedHrpstring<'s> {
         }
 
         // Unwrap ok since check_characters checked the bech32-ness of this char.
-        let witness_version = Fe32::from_char(self.data[0].into()).unwrap();
-        self.data = &self.data[1..]; // Remove the witness version byte from data.
+        let witness_version = Fe32::from_char(self.ascii[0].into()).unwrap();
+        self.ascii = &self.ascii[1..]; // Remove the witness version byte.
 
         self.validate_padding()?;
         self.validate_witness_program_length(witness_version)?;
 
-        Ok(SegwitHrpstring { hrp: self.hrp(), witness_version, data: self.data })
+        Ok(SegwitHrpstring { hrp: self.hrp(), witness_version, ascii: self.ascii })
     }
 
     /// Validates the segwit padding rules.
     ///
-    /// Must be called after the witness version byte is removed from the data.
+    /// Must be called after the witness version byte is removed from the data part.
     ///
     /// From BIP-173:
     /// > Re-arrange those bits into groups of 8 bits. Any incomplete group at the
     /// > end MUST be 4 bits or less, MUST be all zeroes, and is discarded.
     fn validate_padding(&self) -> Result<(), PaddingError> {
-        if self.data.is_empty() {
+        if self.ascii.is_empty() {
             return Ok(()); // Empty data implies correct padding.
         }
 
-        let fe_iter = AsciiToFe32Iter { iter: self.data.iter().copied() };
+        let fe_iter = AsciiToFe32Iter { iter: self.ascii.iter().copied() };
         let padding_len = fe_iter.len() * 5 % 8;
 
         if padding_len > 4 {
@@ -343,7 +442,7 @@ impl<'s> CheckedHrpstring<'s> {
 
     /// Validates the segwit witness length rules.
     ///
-    /// Must be called after the witness version byte is removed from the data.
+    /// Must be called after the witness version byte is removed from the data part.
     fn validate_witness_program_length(
         &self,
         witness_version: Fe32,
@@ -372,11 +471,12 @@ impl<'s> CheckedHrpstring<'s> {
 pub struct SegwitHrpstring<'s> {
     /// The human-readable part, valid for segwit addresses.
     hrp: Hrp,
-    /// The first byte of the parsed data.
+    /// The first byte of the parsed data part.
     witness_version: Fe32,
-    /// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters,
-    /// with the witness version and checksum removed.
-    data: &'s [u8],
+    /// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters.
+    ///
+    /// The characters after the witness version and before the checksum.
+    ascii: &'s [u8],
 }
 
 impl<'s> SegwitHrpstring<'s> {
@@ -396,12 +496,14 @@ impl<'s> SegwitHrpstring<'s> {
 
         let unchecked = UncheckedHrpstring::new(s)?;
 
-        if unchecked.data.is_empty() {
+        let data_part = unchecked.data_part_ascii();
+
+        if data_part.is_empty() {
             return Err(SegwitHrpstringError::NoData);
         }
 
         // Unwrap ok since check_characters (in `Self::new`) checked the bech32-ness of this char.
-        let witness_version = Fe32::from_char(unchecked.data[0].into()).unwrap();
+        let witness_version = Fe32::from_char(data_part[0].into()).unwrap();
         if witness_version.to_u8() > 16 {
             return Err(SegwitHrpstringError::InvalidWitnessVersion(witness_version));
         }
@@ -429,9 +531,10 @@ impl<'s> SegwitHrpstring<'s> {
     #[inline]
     pub fn new_bech32(s: &'s str) -> Result<Self, SegwitHrpstringError> {
         let unchecked = UncheckedHrpstring::new(s)?;
+        let data_part = unchecked.data_part_ascii();
 
         // Unwrap ok since check_characters (in `Self::new`) checked the bech32-ness of this char.
-        let witness_version = Fe32::from_char(unchecked.data[0].into()).unwrap();
+        let witness_version = Fe32::from_char(data_part[0].into()).unwrap();
         if witness_version.to_u8() > 16 {
             return Err(SegwitHrpstringError::InvalidWitnessVersion(witness_version));
         }
@@ -456,6 +559,25 @@ impl<'s> SegwitHrpstring<'s> {
     #[inline]
     pub fn witness_version(&self) -> Fe32 { self.witness_version }
 
+    /// Returns a partial slice of the data part, as ASCII bytes, everything after the witness
+    /// version and before the checksum.
+    ///
+    /// The byte values are guaranteed to be valid bech32 characters.
+    ///
+    /// # Examples
+    ///
+    /// ```
+    /// use bech32::{Bech32, primitives::decode::SegwitHrpstring};
+    ///
+    /// let addr = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
+    /// let ascii = "ar0srrr7xfkvy5l643lydnw9re59gtzz";
+    ///
+    /// let segwit = SegwitHrpstring::new(&addr).unwrap();
+    /// assert!(segwit.data_part_ascii_no_witver_no_checksum().iter().eq(ascii.as_bytes().iter()))
+    /// ```
+    #[inline]
+    pub fn data_part_ascii_no_witver_no_checksum(&self) -> &[u8] { self.ascii }
+
     /// Returns an iterator that yields the data part, excluding the witness version, of the parsed
     /// bech32 encoded string.
     ///
@@ -465,12 +587,12 @@ impl<'s> SegwitHrpstring<'s> {
     /// Use `self.witness_version()` to get the witness version.
     #[inline]
     pub fn byte_iter(&self) -> ByteIter {
-        ByteIter { iter: AsciiToFe32Iter { iter: self.data.iter().copied() }.fes_to_bytes() }
+        ByteIter { iter: AsciiToFe32Iter { iter: self.ascii.iter().copied() }.fes_to_bytes() }
     }
 }
 
-/// Checks whether a given HRP string has data characters in the bech32 alphabet (incl. checksum
-/// characters), and that the whole string has consistent casing (hrp, data, and checksum).
+/// Checks whether a given HRP string has data part characters in the bech32 alphabet (incl.
+/// checksum characters), and that the whole string has consistent casing (hrp and data part).
 ///
 /// # Returns
 ///