Skip to content

Commit

Permalink
Fix Unicode handling in String and WString (#362)
Browse files Browse the repository at this point in the history
According to https://design.ros2.org/articles/wide_strings.html, the `string` and `wstring` types must use the UTF-8 and UTF-16 encodings (not necessarily enforced), cannot contain null bytes or words (enforced), and, when bounded, are measured in terms of bytes or words.

Moreover, though the rosidl_runtime_c `U16String` type uses `uint_least16_t`, Rust guarantees the existence of a precise `u16` type, so we should use that instead of `ushort`, which isn't guaranteed to be the same as `uint_least16_t` either. (Rust doesn't support platforms where `uint_least16_t != uint16_t`.)
  • Loading branch information
nwn authored Apr 26, 2024
1 parent 0e6939c commit 875bda3
Showing 1 changed file with 13 additions and 15 deletions.
28 changes: 13 additions & 15 deletions rosidl_runtime_rs/src/string.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ mod serde;
use crate::sequence::Sequence;
use crate::traits::SequenceAlloc;

/// A zero-terminated string of 8-bit characters.
/// A zero-terminated UTF-8 string.
///
/// The layout of this type is the same as `rosidl_runtime_c__String`. See the
/// [`Message`](crate::Message) trait for background information on this topic.
Expand All @@ -34,7 +34,7 @@ pub struct String {
capacity: usize,
}

/// A zero-terminated string of 16-bit characters.
/// A zero-terminated UTF-16 string.
///
/// The layout of this type is the same as `rosidl_runtime_c__U16String`. See the
/// [`Message`](crate::Message) trait for background information on this topic.
Expand All @@ -50,15 +50,15 @@ pub struct String {
/// ```
#[repr(C)]
pub struct WString {
data: *mut std::os::raw::c_ushort,
data: *mut u16,
size: usize,
capacity: usize,
}

/// A zero-terminated string of 8-bit characters with a length limit.
/// A zero-terminated UTF-8 string with a length limit.
///
/// The same as [`String`], but it cannot be constructed from a string that is too large.
/// The length is measured as the number of Unicode scalar values, not bytes.
/// The length is measured as the number of bytes.
///
/// # Example
///
Expand All @@ -77,10 +77,10 @@ pub struct BoundedString<const N: usize> {
inner: String,
}

/// A zero-terminated string of 16-bit characters with a length limit.
/// A zero-terminated UTF-16 string with a length limit.
///
/// The same as [`WString`], but it cannot be constructed from a string that is too large.
/// The length is measured as the number of Unicode scalar values, not bytes.
/// The length is measured as the number of 16-bit words.
///
/// # Example
///
Expand Down Expand Up @@ -290,7 +290,7 @@ string_impl!(
);
string_impl!(
WString,
std::os::raw::c_ushort,
u16,
u16,
from_utf16_lossy,
rosidl_runtime_c__U16String__init,
Expand Down Expand Up @@ -406,7 +406,7 @@ impl<const N: usize> SequenceAlloc for BoundedString<N> {
impl<const N: usize> TryFrom<&str> for BoundedString<N> {
type Error = StringExceedsBoundsError;
fn try_from(s: &str) -> Result<Self, Self::Error> {
let length = s.chars().count();
let length = s.len();
if length <= N {
Ok(Self {
inner: String::from(s),
Expand Down Expand Up @@ -472,14 +472,12 @@ impl<const N: usize> SequenceAlloc for BoundedWString<N> {
impl<const N: usize> TryFrom<&str> for BoundedWString<N> {
type Error = StringExceedsBoundsError;
fn try_from(s: &str) -> Result<Self, Self::Error> {
let length = s.chars().count();
if length <= N {
Ok(Self {
inner: WString::from(s),
})
let inner = WString::from(s);
if inner.size <= N {
Ok(Self { inner })
} else {
Err(StringExceedsBoundsError {
len: length,
len: inner.size,
upper_bound: N,
})
}
Expand Down

0 comments on commit 875bda3

Please sign in to comment.