From f585e3e2dc09eeb061e2cfbb459f63d212c74232 Mon Sep 17 00:00:00 2001 From: Andrew Gallant Date: Tue, 28 Nov 2023 09:50:03 -0500 Subject: [PATCH] remove several uses of `unsafe` (#8600) This PR removes several uses of `unsafe`. I generally limited myself to low hanging fruit that I could see. There are still a few remaining uses of `unsafe` that looked a bit more difficult to remove (if possible at all). But this gets rid of a good chunk of them. I put each `unsafe` removal into its own commit with a justification for why I did it. So I would encourage reviewing this PR commit-by-commit. That way, we can legislate them independently. It's no problem to drop a commit if we feel the `unsafe` should stay in that case. --- crates/ruff_formatter/src/arguments.rs | 41 ++++--------------- crates/ruff_formatter/src/builders.rs | 30 +++++++------- crates/ruff_formatter/src/format_element.rs | 38 +++++++++++++---- crates/ruff_formatter/src/macros.rs | 6 +-- crates/ruff_macros/src/newtype_index.rs | 6 +++ .../ruff_python_formatter/src/comments/map.rs | 6 +-- .../src/expression/binary_like.rs | 5 +-- crates/ruff_python_literal/src/escape.rs | 8 ++-- crates/ruff_python_parser/src/string.rs | 6 +-- crates/ruff_source_file/src/newlines.rs | 6 +-- 10 files changed, 69 insertions(+), 83 deletions(-) diff --git a/crates/ruff_formatter/src/arguments.rs b/crates/ruff_formatter/src/arguments.rs index 8fa70d73db8d8..d26306c4ea502 100644 --- a/crates/ruff_formatter/src/arguments.rs +++ b/crates/ruff_formatter/src/arguments.rs @@ -1,23 +1,9 @@ use super::{Buffer, Format, Formatter}; use crate::FormatResult; -use std::ffi::c_void; -use std::marker::PhantomData; -/// Mono-morphed type to format an object. Used by the [`crate::format`!], [`crate::format_args`!], and -/// [`crate::write`!] macros. -/// -/// This struct is similar to a dynamic dispatch (using `dyn Format`) because it stores a pointer to the value. -/// However, it doesn't store the pointer to `dyn Format`'s vtable, instead it statically resolves the function -/// pointer of `Format::format` and stores it in `formatter`. +/// A convenience wrapper for representing a formattable argument. pub struct Argument<'fmt, Context> { - /// The value to format stored as a raw pointer where `lifetime` stores the value's lifetime. - value: *const c_void, - - /// Stores the lifetime of the value. To get the most out of our dear borrow checker. - lifetime: PhantomData<&'fmt ()>, - - /// The function pointer to `value`'s `Format::format` method - formatter: fn(*const c_void, &mut Formatter<'_, Context>) -> FormatResult<()>, + value: &'fmt dyn Format, } impl Clone for Argument<'_, Context> { @@ -28,32 +14,19 @@ impl Clone for Argument<'_, Context> { impl Copy for Argument<'_, Context> {} impl<'fmt, Context> Argument<'fmt, Context> { - /// Called by the [ruff_formatter::format_args] macro. Creates a mono-morphed value for formatting - /// an object. + /// Called by the [ruff_formatter::format_args] macro. #[doc(hidden)] #[inline] pub fn new>(value: &'fmt F) -> Self { - #[inline] - fn formatter, Context>( - ptr: *const c_void, - fmt: &mut Formatter, - ) -> FormatResult<()> { - // SAFETY: Safe because the 'fmt lifetime is captured by the 'lifetime' field. - #[allow(unsafe_code)] - F::fmt(unsafe { &*ptr.cast::() }, fmt) - } - - Self { - value: (value as *const F).cast::(), - lifetime: PhantomData, - formatter: formatter::, - } + Self { value } } /// Formats the value stored by this argument using the given formatter. #[inline] + // Seems to only be triggered on wasm32 and looks like a false positive? + #[allow(clippy::trivially_copy_pass_by_ref)] pub(super) fn format(&self, f: &mut Formatter) -> FormatResult<()> { - (self.formatter)(self.value, f) + self.value.fmt(f) } } diff --git a/crates/ruff_formatter/src/builders.rs b/crates/ruff_formatter/src/builders.rs index 7eadba123a207..fdf87e6327daf 100644 --- a/crates/ruff_formatter/src/builders.rs +++ b/crates/ruff_formatter/src/builders.rs @@ -2555,17 +2555,17 @@ pub struct BestFitting<'a, Context> { } impl<'a, Context> BestFitting<'a, Context> { - /// Creates a new best fitting IR with the given variants. The method itself isn't unsafe - /// but it is to discourage people from using it because the printer will panic if - /// the slice doesn't contain at least the least and most expanded variants. + /// Creates a new best fitting IR with the given variants. + /// + /// Callers are required to ensure that the number of variants given + /// is at least 2. /// /// You're looking for a way to create a `BestFitting` object, use the `best_fitting![least_expanded, most_expanded]` macro. /// - /// ## Safety - - /// The slice must contain at least two variants. - #[allow(unsafe_code)] - pub unsafe fn from_arguments_unchecked(variants: Arguments<'a, Context>) -> Self { + /// # Panics + /// + /// When the slice contains less than two variants. + pub fn from_arguments_unchecked(variants: Arguments<'a, Context>) -> Self { assert!( variants.0.len() >= 2, "Requires at least the least expanded and most expanded variants" @@ -2696,14 +2696,12 @@ impl Format for BestFitting<'_, Context> { buffer.write_element(FormatElement::Tag(EndBestFittingEntry)); } - // SAFETY: The constructor guarantees that there are always at least two variants. It's, therefore, - // safe to call into the unsafe `from_vec_unchecked` function - #[allow(unsafe_code)] - let element = unsafe { - FormatElement::BestFitting { - variants: BestFittingVariants::from_vec_unchecked(buffer.into_vec()), - mode: self.mode, - } + // OK because the constructor guarantees that there are always at + // least two variants. + let variants = BestFittingVariants::from_vec_unchecked(buffer.into_vec()); + let element = FormatElement::BestFitting { + variants, + mode: self.mode, }; f.write_element(element); diff --git a/crates/ruff_formatter/src/format_element.rs b/crates/ruff_formatter/src/format_element.rs index f9fe281df3fde..0adcf7dda4338 100644 --- a/crates/ruff_formatter/src/format_element.rs +++ b/crates/ruff_formatter/src/format_element.rs @@ -332,17 +332,14 @@ pub enum BestFittingMode { pub struct BestFittingVariants(Box<[FormatElement]>); impl BestFittingVariants { - /// Creates a new best fitting IR with the given variants. The method itself isn't unsafe - /// but it is to discourage people from using it because the printer will panic if - /// the slice doesn't contain at least the least and most expanded variants. + /// Creates a new best fitting IR with the given variants. /// - /// You're looking for a way to create a `BestFitting` object, use the `best_fitting![least_expanded, most_expanded]` macro. + /// Callers are required to ensure that the number of variants given + /// is at least 2 when using `most_expanded` or `most_flag`. /// - /// ## Safety - /// The slice must contain at least two variants. + /// You're looking for a way to create a `BestFitting` object, use the `best_fitting![least_expanded, most_expanded]` macro. #[doc(hidden)] - #[allow(unsafe_code)] - pub unsafe fn from_vec_unchecked(variants: Vec) -> Self { + pub fn from_vec_unchecked(variants: Vec) -> Self { debug_assert!( variants .iter() @@ -351,12 +348,23 @@ impl BestFittingVariants { >= 2, "Requires at least the least expanded and most expanded variants" ); - Self(variants.into_boxed_slice()) } /// Returns the most expanded variant + /// + /// # Panics + /// + /// When the number of variants is less than two. pub fn most_expanded(&self) -> &[FormatElement] { + assert!( + self.as_slice() + .iter() + .filter(|element| matches!(element, FormatElement::Tag(Tag::StartBestFittingEntry))) + .count() + >= 2, + "Requires at least the least expanded and most expanded variants" + ); self.into_iter().last().unwrap() } @@ -365,7 +373,19 @@ impl BestFittingVariants { } /// Returns the least expanded variant + /// + /// # Panics + /// + /// When the number of variants is less than two. pub fn most_flat(&self) -> &[FormatElement] { + assert!( + self.as_slice() + .iter() + .filter(|element| matches!(element, FormatElement::Tag(Tag::StartBestFittingEntry))) + .count() + >= 2, + "Requires at least the least expanded and most expanded variants" + ); self.into_iter().next().unwrap() } } diff --git a/crates/ruff_formatter/src/macros.rs b/crates/ruff_formatter/src/macros.rs index 97a4cc696115d..2e327739a9091 100644 --- a/crates/ruff_formatter/src/macros.rs +++ b/crates/ruff_formatter/src/macros.rs @@ -329,10 +329,8 @@ macro_rules! format { #[macro_export] macro_rules! best_fitting { ($least_expanded:expr, $($tail:expr),+ $(,)?) => {{ - #[allow(unsafe_code)] - unsafe { - $crate::BestFitting::from_arguments_unchecked($crate::format_args!($least_expanded, $($tail),+)) - } + // OK because the macro syntax requires at least two variants. + $crate::BestFitting::from_arguments_unchecked($crate::format_args!($least_expanded, $($tail),+)) }} } diff --git a/crates/ruff_macros/src/newtype_index.rs b/crates/ruff_macros/src/newtype_index.rs index 2c1f6e14eca05..4ddc76c2e6838 100644 --- a/crates/ruff_macros/src/newtype_index.rs +++ b/crates/ruff_macros/src/newtype_index.rs @@ -45,6 +45,9 @@ pub(super) fn generate_newtype_index(item: ItemStruct) -> syn::Result syn::Result Self { assert!(value < u32::MAX as usize); - // SAFETY: + // OK because: // * The `value < u32::MAX` guarantees that the add doesn't overflow. // * The `+ 1` guarantees that the index is not zero - #[allow(unsafe_code, clippy::cast_possible_truncation)] - Self(unsafe { std::num::NonZeroU32::new_unchecked((value as u32) + 1) }) + #[allow(clippy::cast_possible_truncation)] + Self(std::num::NonZeroU32::new((value as u32) + 1).expect("valid value")) } fn value(self) -> usize { diff --git a/crates/ruff_python_formatter/src/expression/binary_like.rs b/crates/ruff_python_formatter/src/expression/binary_like.rs index f77851e395cb5..6c88db34f6e0f 100644 --- a/crates/ruff_python_formatter/src/expression/binary_like.rs +++ b/crates/ruff_python_formatter/src/expression/binary_like.rs @@ -1105,9 +1105,8 @@ impl OperatorIndex { fn new(index: usize) -> Self { assert_eq!(index % 2, 1, "Operator indices must be odd positions"); - // SAFETY A value with a module 0 is guaranteed to never equal 0 - #[allow(unsafe_code)] - Self(unsafe { NonZeroUsize::new_unchecked(index) }) + // OK because a value with a modulo 1 is guaranteed to never equal 0 + Self(NonZeroUsize::new(index).expect("valid index")) } const fn value(self) -> usize { diff --git a/crates/ruff_python_literal/src/escape.rs b/crates/ruff_python_literal/src/escape.rs index 01de325f102d7..1894bbff952af 100644 --- a/crates/ruff_python_literal/src/escape.rs +++ b/crates/ruff_python_literal/src/escape.rs @@ -415,12 +415,10 @@ impl<'a> Escape for AsciiEscape<'a> { fn layout(&self) -> &EscapeLayout { &self.layout } - #[allow(unsafe_code)] fn write_source(&self, formatter: &mut impl std::fmt::Write) -> std::fmt::Result { - formatter.write_str(unsafe { - // SAFETY: this function must be called only when source is printable ascii characters - std::str::from_utf8_unchecked(self.source) - }) + // OK because function must be called only when source is printable ascii characters. + let string = std::str::from_utf8(self.source).expect("ASCII bytes"); + formatter.write_str(string) } #[cold] diff --git a/crates/ruff_python_parser/src/string.rs b/crates/ruff_python_parser/src/string.rs index 2d64e66bd2ec4..2d4f2c5df926a 100644 --- a/crates/ruff_python_parser/src/string.rs +++ b/crates/ruff_python_parser/src/string.rs @@ -120,10 +120,8 @@ impl<'a> StringParser<'a> { len += 1; } - // SAFETY: radix_bytes is always going to be in the ASCII range. - #[allow(unsafe_code)] - let radix_str = unsafe { std::str::from_utf8_unchecked(&radix_bytes[..len]) }; - + // OK because radix_bytes is always going to be in the ASCII range. + let radix_str = std::str::from_utf8(&radix_bytes[..len]).expect("ASCII bytes"); let value = u32::from_str_radix(radix_str, 8).unwrap(); char::from_u32(value).unwrap() } diff --git a/crates/ruff_source_file/src/newlines.rs b/crates/ruff_source_file/src/newlines.rs index 6a79878fe4399..4e4d4e09a4a3e 100644 --- a/crates/ruff_source_file/src/newlines.rs +++ b/crates/ruff_source_file/src/newlines.rs @@ -58,11 +58,7 @@ impl<'a> UniversalNewlineIterator<'a> { pub fn find_newline(text: &str) -> Option<(usize, LineEnding)> { let bytes = text.as_bytes(); if let Some(position) = memchr2(b'\n', b'\r', bytes) { - // SAFETY: memchr guarantees to return valid positions - #[allow(unsafe_code)] - let newline_character = unsafe { *bytes.get_unchecked(position) }; - - let line_ending = match newline_character { + let line_ending = match bytes[position] { // Explicit branch for `\n` as this is the most likely path b'\n' => LineEnding::Lf, // '\r\n'