From 57c2de81f2b53dc7cb93f8ba0d3e24f80cd61425 Mon Sep 17 00:00:00 2001 From: ash <97464181+Borgerr@users.noreply.github.com> Date: Tue, 25 Jun 2024 23:58:43 -0600 Subject: [PATCH 1/2] set self.is_known_utf8 to false in extend_from_slice --- std/src/sys_common/wtf8.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/std/src/sys_common/wtf8.rs b/std/src/sys_common/wtf8.rs index 708f62f476e73..117a3e23044ea 100644 --- a/std/src/sys_common/wtf8.rs +++ b/std/src/sys_common/wtf8.rs @@ -480,7 +480,7 @@ impl Wtf8Buf { #[inline] pub(crate) fn extend_from_slice(&mut self, other: &[u8]) { self.bytes.extend_from_slice(other); - self.is_known_utf8 = self.is_known_utf8 || self.next_surrogate(0).is_none(); + self.is_known_utf8 = false; } } From 4788a93eee24a3e90becec001fd921e5565330db Mon Sep 17 00:00:00 2001 From: Jubilee Young Date: Tue, 25 Jun 2024 22:29:37 -0700 Subject: [PATCH 2/2] std: test a variety of ways to extend a Wtf8Buf --- std/src/sys_common/wtf8/tests.rs | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/std/src/sys_common/wtf8/tests.rs b/std/src/sys_common/wtf8/tests.rs index 6a1cc41a8fb04..b57c99a8452a1 100644 --- a/std/src/sys_common/wtf8/tests.rs +++ b/std/src/sys_common/wtf8/tests.rs @@ -725,3 +725,27 @@ fn wtf8_utf8_boundary_between_surrogates() { string.push(CodePoint::from_u32(0xD800).unwrap()); check_utf8_boundary(&string, 3); } + +#[test] +fn wobbled_wtf8_plus_bytes_isnt_utf8() { + let mut string: Wtf8Buf = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + assert!(!string.is_known_utf8); + string.extend_from_slice(b"some utf-8"); + assert!(!string.is_known_utf8); +} + +#[test] +fn wobbled_wtf8_plus_str_isnt_utf8() { + let mut string: Wtf8Buf = unsafe { Wtf8::from_bytes_unchecked(b"\xED\xA0\x80").to_owned() }; + assert!(!string.is_known_utf8); + string.push_str("some utf-8"); + assert!(!string.is_known_utf8); +} + +#[test] +fn unwobbly_wtf8_plus_utf8_is_utf8() { + let mut string: Wtf8Buf = Wtf8Buf::from_str("hello world"); + assert!(string.is_known_utf8); + string.push_str("some utf-8"); + assert!(string.is_known_utf8); +}