Skip to content

Commit

Permalink
Merge #167: decode: Add accessors
Browse files Browse the repository at this point in the history
2fae3aa Add UncheckedHrpstring::remove_witness_version function (Tobin C. Harding)
0ee999d Add UncheckHrpstring::witness_version function (Tobin C. Harding)
b91207c Add ascii accessor methods (Tobin C. Harding)
045b50a decode: Rename data field (Tobin C. Harding)

Pull request description:

  This is a sexy little PR right here.

  Add an `ascii` accessor method to the `UncheckedHrpstring` and `CheckedHrpstring` types.

  - Patch 1 is preparation, renames the `data` field.
  - Patch 2 is the meat and potatoes.

  Fix: #160

ACKs for top commit:
  apoelstra:
    ACK 2fae3aa

Tree-SHA512: ce706db35d1119d3a240ffdfc11c4205b7a6052eed57019fa730ccf4643b78b5058a067b112b7bdd0e8fecf5108fe2e0ef6d2c072caeac08a08b461256318cf8
  • Loading branch information
apoelstra committed Jan 11, 2024
2 parents 84b2c50 + 2fae3aa commit cb2f61e
Showing 1 changed file with 154 additions and 32 deletions.
186 changes: 154 additions & 32 deletions src/primitives/decode.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,8 @@ pub struct UncheckedHrpstring<'s> {
hrp: Hrp,
/// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters.
///
/// Contains the checksum if one was present in the parsed string.
data: &'s [u8],
/// The characters after the separator i.e., the "data part" defined by BIP-173.
data_part_ascii: &'s [u8],
/// The length of the parsed hrpstring.
hrpstring_length: usize,
}
Expand All @@ -130,11 +130,11 @@ impl<'s> UncheckedHrpstring<'s> {
#[inline]
pub fn new(s: &'s str) -> Result<Self, UncheckedHrpstringError> {
let sep_pos = check_characters(s)?;
let (hrp, data) = s.split_at(sep_pos);
let (hrp, rest) = s.split_at(sep_pos);

let ret = UncheckedHrpstring {
hrp: Hrp::parse(hrp)?,
data: data[1..].as_bytes(), // Skip the separator.
data_part_ascii: rest[1..].as_bytes(), // Skip the separator.
hrpstring_length: s.len(),
};

Expand All @@ -145,6 +145,85 @@ impl<'s> UncheckedHrpstring<'s> {
#[inline]
pub fn hrp(&self) -> Hrp { self.hrp }

/// Returns the data part as ASCII bytes i.e., everything after the separator '1'.
///
/// The byte values are guaranteed to be valid bech32 characters. Includes the checksum
/// if one was present in the parsed string.
///
/// # Examples
///
/// ```
/// use bech32::primitives::decode::UncheckedHrpstring;
///
/// let addr = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
/// let ascii = "qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
///
/// let unchecked = UncheckedHrpstring::new(&addr).unwrap();
/// assert!(unchecked.data_part_ascii().iter().eq(ascii.as_bytes().iter()))
/// ```
#[inline]
pub fn data_part_ascii(&self) -> &[u8] { self.data_part_ascii }

/// Attempts to remove the first byte of the data part, treating it as a witness version.
///
/// If [`Self::witness_version`] succeeds this function removes the first character (witness
/// version byte) from the internal ASCII data part buffer. Future calls to
/// [`Self::data_part_ascii`] will no longer include it.
///
/// # Examples
///
/// ```
/// use bech32::{primitives::decode::UncheckedHrpstring, Fe32};
///
/// let addr = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
/// let ascii = "ar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
///
/// let mut unchecked = UncheckedHrpstring::new(&addr).unwrap();
/// let witness_version = unchecked.remove_witness_version().unwrap();
/// assert_eq!(witness_version, Fe32::Q);
/// assert!(unchecked.data_part_ascii().iter().eq(ascii.as_bytes().iter()))
/// ```
#[inline]
pub fn remove_witness_version(&mut self) -> Option<Fe32> {
self.witness_version().map(|witver| {
self.data_part_ascii = &self.data_part_ascii[1..]; // Remove the witness version byte.
witver
})
}

/// Returns the segwit witness version if there is one.
///
/// Attempts to convert the first character of the data part to a witness version. If this
/// succeeds, and it is a valid version (0..16 inclusive) we return it, otherwise `None`.
///
/// This function makes no guarantees on the validity of the checksum.
///
/// # Examples
///
/// ```
/// use bech32::{primitives::decode::UncheckedHrpstring, Fe32};
///
/// // Note the invalid checksum!
/// let addr = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzffffff";
///
/// let unchecked = UncheckedHrpstring::new(&addr).unwrap();
/// assert_eq!(unchecked.witness_version(), Some(Fe32::Q));
/// ```
#[inline]
pub fn witness_version(&self) -> Option<Fe32> {
let data_part = self.data_part_ascii();
if data_part.is_empty() {
return None;
}

// unwrap ok because we know we gave valid bech32 characters.
let witness_version = Fe32::from_char(data_part[0].into()).unwrap();
if witness_version.to_u8() > 16 {
return None;
}
Some(witness_version)
}

/// Validates that data has a valid checksum for the `Ck` algorithm and returns a [`CheckedHrpstring`].
#[inline]
pub fn validate_and_remove_checksum<Ck: Checksum>(
Expand Down Expand Up @@ -183,15 +262,15 @@ impl<'s> UncheckedHrpstring<'s> {
return Ok(());
}

if self.data.len() < Ck::CHECKSUM_LENGTH {
if self.data_part_ascii.len() < Ck::CHECKSUM_LENGTH {
return Err(InvalidLength);
}

let mut checksum_eng = checksum::Engine::<Ck>::new();
checksum_eng.input_hrp(self.hrp());

// Unwrap ok since we checked all characters in our constructor.
for fe in self.data.iter().map(|&b| Fe32::from_char_unchecked(b)) {
for fe in self.data_part_ascii.iter().map(|&b| Fe32::from_char_unchecked(b)) {
checksum_eng.input_fe(fe);
}

Expand All @@ -213,20 +292,20 @@ impl<'s> UncheckedHrpstring<'s> {
/// May panic if data is not valid.
#[inline]
pub fn remove_checksum<Ck: Checksum>(self) -> CheckedHrpstring<'s> {
let data_len = self.data.len() - Ck::CHECKSUM_LENGTH;
let end = self.data_part_ascii.len() - Ck::CHECKSUM_LENGTH;

CheckedHrpstring {
hrp: self.hrp(),
data: &self.data[..data_len],
ascii: &self.data_part_ascii[..end],
hrpstring_length: self.hrpstring_length,
}
}
}

/// An HRP string that has been parsed and had the checksum validated.
///
/// This type does not treat the first byte of the data in any special way i.e., as the witness
/// version byte. If you are parsing Bitcoin segwit addresses you likely want to use [`SegwitHrpstring`].
/// This type does not treat the first byte of the data part in any special way i.e., as the witness
/// version byte. If you are parsing Bitcoin segwit addresses consider using [`SegwitHrpstring`].
///
/// > We first describe the general checksummed base32 format called Bech32 and then
/// > define Segregated Witness addresses using it.
Expand All @@ -250,9 +329,10 @@ impl<'s> UncheckedHrpstring<'s> {
pub struct CheckedHrpstring<'s> {
/// The human-readable part, guaranteed to be lowercase ASCII characters.
hrp: Hrp,
/// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters,
/// with the checksum removed.
data: &'s [u8],
/// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters.
///
/// The characters after the '1' separator and the before the checksum.
ascii: &'s [u8],
/// The length of the parsed hrpstring.
hrpstring_length: usize, // Guaranteed to be <= CK::CODE_LENGTH
}
Expand All @@ -274,19 +354,38 @@ impl<'s> CheckedHrpstring<'s> {
#[inline]
pub fn hrp(&self) -> Hrp { self.hrp }

/// Returns a partial slice of the data part, as ASCII bytes, everything after the separator '1'
/// before the checksum.
///
/// The byte values are guaranteed to be valid bech32 characters.
///
/// # Examples
///
/// ```
/// use bech32::{Bech32, primitives::decode::CheckedHrpstring};
///
/// let addr = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
/// let ascii = "qar0srrr7xfkvy5l643lydnw9re59gtzz";
///
/// let checked = CheckedHrpstring::new::<Bech32>(&addr).unwrap();
/// assert!(checked.data_part_ascii_no_checksum().iter().eq(ascii.as_bytes().iter()))
/// ```
#[inline]
pub fn data_part_ascii_no_checksum(&self) -> &[u8] { self.ascii }

/// Returns an iterator that yields the data part of the parsed bech32 encoded string.
///
/// Converts the ASCII bytes representing field elements to the respective field elements, then
/// converts the stream of field elements to a stream of bytes.
#[inline]
pub fn byte_iter(&self) -> ByteIter {
ByteIter { iter: AsciiToFe32Iter { iter: self.data.iter().copied() }.fes_to_bytes() }
ByteIter { iter: AsciiToFe32Iter { iter: self.ascii.iter().copied() }.fes_to_bytes() }
}

/// Converts this type to a [`SegwitHrpstring`] after validating the witness and HRP.
#[inline]
pub fn validate_segwit(mut self) -> Result<SegwitHrpstring<'s>, SegwitHrpstringError> {
if self.data.is_empty() {
if self.ascii.is_empty() {
return Err(SegwitHrpstringError::NoData);
}

Expand All @@ -295,28 +394,28 @@ impl<'s> CheckedHrpstring<'s> {
}

// Unwrap ok since check_characters checked the bech32-ness of this char.
let witness_version = Fe32::from_char(self.data[0].into()).unwrap();
self.data = &self.data[1..]; // Remove the witness version byte from data.
let witness_version = Fe32::from_char(self.ascii[0].into()).unwrap();
self.ascii = &self.ascii[1..]; // Remove the witness version byte.

self.validate_padding()?;
self.validate_witness_program_length(witness_version)?;

Ok(SegwitHrpstring { hrp: self.hrp(), witness_version, data: self.data })
Ok(SegwitHrpstring { hrp: self.hrp(), witness_version, ascii: self.ascii })
}

/// Validates the segwit padding rules.
///
/// Must be called after the witness version byte is removed from the data.
/// Must be called after the witness version byte is removed from the data part.
///
/// From BIP-173:
/// > Re-arrange those bits into groups of 8 bits. Any incomplete group at the
/// > end MUST be 4 bits or less, MUST be all zeroes, and is discarded.
fn validate_padding(&self) -> Result<(), PaddingError> {
if self.data.is_empty() {
if self.ascii.is_empty() {
return Ok(()); // Empty data implies correct padding.
}

let fe_iter = AsciiToFe32Iter { iter: self.data.iter().copied() };
let fe_iter = AsciiToFe32Iter { iter: self.ascii.iter().copied() };
let padding_len = fe_iter.len() * 5 % 8;

if padding_len > 4 {
Expand All @@ -343,7 +442,7 @@ impl<'s> CheckedHrpstring<'s> {

/// Validates the segwit witness length rules.
///
/// Must be called after the witness version byte is removed from the data.
/// Must be called after the witness version byte is removed from the data part.
fn validate_witness_program_length(
&self,
witness_version: Fe32,
Expand Down Expand Up @@ -372,11 +471,12 @@ impl<'s> CheckedHrpstring<'s> {
pub struct SegwitHrpstring<'s> {
/// The human-readable part, valid for segwit addresses.
hrp: Hrp,
/// The first byte of the parsed data.
/// The first byte of the parsed data part.
witness_version: Fe32,
/// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters,
/// with the witness version and checksum removed.
data: &'s [u8],
/// This is ASCII byte values of the parsed string, guaranteed to be valid bech32 characters.
///
/// The characters after the witness version and before the checksum.
ascii: &'s [u8],
}

impl<'s> SegwitHrpstring<'s> {
Expand All @@ -396,12 +496,14 @@ impl<'s> SegwitHrpstring<'s> {

let unchecked = UncheckedHrpstring::new(s)?;

if unchecked.data.is_empty() {
let data_part = unchecked.data_part_ascii();

if data_part.is_empty() {
return Err(SegwitHrpstringError::NoData);
}

// Unwrap ok since check_characters (in `Self::new`) checked the bech32-ness of this char.
let witness_version = Fe32::from_char(unchecked.data[0].into()).unwrap();
let witness_version = Fe32::from_char(data_part[0].into()).unwrap();
if witness_version.to_u8() > 16 {
return Err(SegwitHrpstringError::InvalidWitnessVersion(witness_version));
}
Expand Down Expand Up @@ -429,9 +531,10 @@ impl<'s> SegwitHrpstring<'s> {
#[inline]
pub fn new_bech32(s: &'s str) -> Result<Self, SegwitHrpstringError> {
let unchecked = UncheckedHrpstring::new(s)?;
let data_part = unchecked.data_part_ascii();

// Unwrap ok since check_characters (in `Self::new`) checked the bech32-ness of this char.
let witness_version = Fe32::from_char(unchecked.data[0].into()).unwrap();
let witness_version = Fe32::from_char(data_part[0].into()).unwrap();
if witness_version.to_u8() > 16 {
return Err(SegwitHrpstringError::InvalidWitnessVersion(witness_version));
}
Expand All @@ -456,6 +559,25 @@ impl<'s> SegwitHrpstring<'s> {
#[inline]
pub fn witness_version(&self) -> Fe32 { self.witness_version }

/// Returns a partial slice of the data part, as ASCII bytes, everything after the witness
/// version and before the checksum.
///
/// The byte values are guaranteed to be valid bech32 characters.
///
/// # Examples
///
/// ```
/// use bech32::{Bech32, primitives::decode::SegwitHrpstring};
///
/// let addr = "bc1qar0srrr7xfkvy5l643lydnw9re59gtzzwf5mdq";
/// let ascii = "ar0srrr7xfkvy5l643lydnw9re59gtzz";
///
/// let segwit = SegwitHrpstring::new(&addr).unwrap();
/// assert!(segwit.data_part_ascii_no_witver_no_checksum().iter().eq(ascii.as_bytes().iter()))
/// ```
#[inline]
pub fn data_part_ascii_no_witver_no_checksum(&self) -> &[u8] { self.ascii }

/// Returns an iterator that yields the data part, excluding the witness version, of the parsed
/// bech32 encoded string.
///
Expand All @@ -465,12 +587,12 @@ impl<'s> SegwitHrpstring<'s> {
/// Use `self.witness_version()` to get the witness version.
#[inline]
pub fn byte_iter(&self) -> ByteIter {
ByteIter { iter: AsciiToFe32Iter { iter: self.data.iter().copied() }.fes_to_bytes() }
ByteIter { iter: AsciiToFe32Iter { iter: self.ascii.iter().copied() }.fes_to_bytes() }
}
}

/// Checks whether a given HRP string has data characters in the bech32 alphabet (incl. checksum
/// characters), and that the whole string has consistent casing (hrp, data, and checksum).
/// Checks whether a given HRP string has data part characters in the bech32 alphabet (incl.
/// checksum characters), and that the whole string has consistent casing (hrp and data part).
///
/// # Returns
///
Expand Down

0 comments on commit cb2f61e

Please sign in to comment.