From 0cbbfcd56c9f0556e86884abeba39260db3dee4e Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 28 Aug 2024 09:27:02 +0000 Subject: [PATCH 1/2] Tighten unsafe block --- crates/polars-io/src/csv/read/splitfields.rs | 26 ++++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/crates/polars-io/src/csv/read/splitfields.rs b/crates/polars-io/src/csv/read/splitfields.rs index ea6558950576..b71a035e896a 100644 --- a/crates/polars-io/src/csv/read/splitfields.rs +++ b/crates/polars-io/src/csv/read/splitfields.rs @@ -266,21 +266,21 @@ mod inner { let bytes = unsafe { self.v.get_unchecked_release(total_idx..) }; if bytes.len() > SIMD_SIZE { - unsafe { - let lane: [u8; SIMD_SIZE] = bytes + let lane: [u8; SIMD_SIZE] = unsafe { + bytes .get_unchecked(0..SIMD_SIZE) .try_into() - .unwrap_unchecked_release(); - let simd_bytes = SimdVec::from(lane); - let has_eol_char = simd_bytes.simd_eq(self.simd_eol_char); - let has_separator = simd_bytes.simd_eq(self.simd_separator); - let has_any = has_separator.bitor(has_eol_char); - if let Some(idx) = has_any.first_set() { - total_idx += idx; - break; - } else { - total_idx += SIMD_SIZE; - } + .unwrap_unchecked_release() + }; + let simd_bytes = SimdVec::from(lane); + let has_eol_char = simd_bytes.simd_eq(self.simd_eol_char); + let has_separator = simd_bytes.simd_eq(self.simd_separator); + let has_any = has_separator.bitor(has_eol_char); + if let Some(idx) = has_any.first_set() { + total_idx += idx; + break; + } else { + total_idx += SIMD_SIZE; } } else { match bytes.iter().position(|&c| self.eof_oel(c)) { From 17ac49a9e0421b7cdd182090d7fc1f8be6259005 Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Wed, 28 Aug 2024 09:27:36 +0000 Subject: [PATCH 2/2] refactor(rust): Fix fencepost debug assertion in splitfields For some slice `v` of length `n`, it is UB to call `get_unchecked` with a range starting at `n+1`. Previously, we asserted that `pos <= n`, which is the requirement for `v.get_unchecked(..pos)`, but not `v.get_unchecked(pos+1..)` which has the tighter requirement `pos < n`. --- crates/polars-io/src/csv/read/splitfields.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/polars-io/src/csv/read/splitfields.rs b/crates/polars-io/src/csv/read/splitfields.rs index b71a035e896a..59f9bcd53bd8 100644 --- a/crates/polars-io/src/csv/read/splitfields.rs +++ b/crates/polars-io/src/csv/read/splitfields.rs @@ -302,7 +302,7 @@ mod inner { }; unsafe { - debug_assert!(pos <= self.v.len()); + debug_assert!(pos < self.v.len()); // SAFETY: // we are in bounds let ret = Some((self.v.get_unchecked(..pos), needs_escaping));