Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Uplift clippy::invalid_utf8_in_unchecked lint #111543

Merged
merged 4 commits into from
May 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions compiler/rustc_lint/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,14 @@ lint_improper_ctypes_union_layout_help = consider adding a `#[repr(C)]` or `#[re
lint_improper_ctypes_union_layout_reason = this union has unspecified layout
lint_improper_ctypes_union_non_exhaustive = this union is non-exhaustive

# FIXME: we should ordinalize $valid_up_to when we add support for doing so
lint_invalid_from_utf8_checked = calls to `{$method}` with a invalid literal always return an error
.label = the literal was valid UTF-8 up to the {$valid_up_to} bytes

# FIXME: we should ordinalize $valid_up_to when we add support for doing so
lint_invalid_from_utf8_unchecked = calls to `{$method}` with a invalid literal are undefined behavior
.label = the literal was valid UTF-8 up to the {$valid_up_to} bytes

lint_lintpass_by_hand = implementing `LintPass` by hand
.help = try using `declare_lint_pass!` or `impl_lint_pass!` instead

Expand Down
118 changes: 118 additions & 0 deletions compiler/rustc_lint/src/invalid_from_utf8.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
use std::str::Utf8Error;

use rustc_ast::{BorrowKind, LitKind};
use rustc_hir::{Expr, ExprKind};
use rustc_span::source_map::Spanned;
use rustc_span::sym;

use crate::lints::InvalidFromUtf8Diag;
use crate::{LateContext, LateLintPass, LintContext};

declare_lint! {
/// The `invalid_from_utf8_unchecked` lint checks for calls to
/// `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`
/// with an invalid UTF-8 literal.
///
/// ### Example
///
/// ```rust,compile_fail
/// # #[allow(unused)]
/// unsafe {
/// std::str::from_utf8_unchecked(b"Ru\x82st");
/// }
/// ```
///
/// {{produces}}
///
/// ### Explanation
///
/// Creating such a `str` would result in undefined behavior as per documentation
/// for `std::str::from_utf8_unchecked` and `std::str::from_utf8_unchecked_mut`.
pub INVALID_FROM_UTF8_UNCHECKED,
Deny,
"using a non UTF-8 literal in `std::str::from_utf8_unchecked`"
}

declare_lint! {
/// The `invalid_from_utf8` lint checks for calls to
/// `std::str::from_utf8` and `std::str::from_utf8_mut`
/// with an invalid UTF-8 literal.
///
/// ### Example
///
/// ```rust
/// # #[allow(unused)]
/// std::str::from_utf8(b"Ru\x82st");
/// ```
///
/// {{produces}}
///
/// ### Explanation
///
/// Trying to create such a `str` would always return an error as per documentation
/// for `std::str::from_utf8` and `std::str::from_utf8_mut`.
pub INVALID_FROM_UTF8,
Warn,
"using a non UTF-8 literal in `std::str::from_utf8`"
}

declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED, INVALID_FROM_UTF8]);

impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) {
if let ExprKind::Call(path, [arg]) = expr.kind
&& let ExprKind::Path(ref qpath) = path.kind
&& let Some(def_id) = cx.qpath_res(qpath, path.hir_id).opt_def_id()
&& let Some(diag_item) = cx.tcx.get_diagnostic_name(def_id)
&& [sym::str_from_utf8, sym::str_from_utf8_mut,
sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
{
let lint = |utf8_error: Utf8Error| {
let label = arg.span;
let method = diag_item.as_str().strip_prefix("str_").unwrap();
let method = format!("std::str::{method}");
let valid_up_to = utf8_error.valid_up_to();
let is_unchecked_variant = diag_item.as_str().contains("unchecked");

cx.emit_spanned_lint(
if is_unchecked_variant { INVALID_FROM_UTF8_UNCHECKED } else { INVALID_FROM_UTF8 },
expr.span,
if is_unchecked_variant {
InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label }
} else {
InvalidFromUtf8Diag::Checked { method, valid_up_to, label }
}
)
};

match &arg.kind {
ExprKind::Lit(Spanned { node: lit, .. }) => {
if let LitKind::ByteStr(bytes, _) = &lit
&& let Err(utf8_error) = std::str::from_utf8(bytes)
{
lint(utf8_error);
}
},
ExprKind::AddrOf(BorrowKind::Ref, _, Expr { kind: ExprKind::Array(args), .. }) => {
let elements = args.iter().map(|e|{
match &e.kind {
ExprKind::Lit(Spanned { node: lit, .. }) => match lit {
LitKind::Byte(b) => Some(*b),
LitKind::Int(b, _) => Some(*b as u8),
_ => None
}
_ => None
}
}).collect::<Option<Vec<_>>>();

if let Some(elements) = elements
&& let Err(utf8_error) = std::str::from_utf8(&elements)
{
lint(utf8_error);
}
}
_ => {}
}
}
}
}
3 changes: 3 additions & 0 deletions compiler/rustc_lint/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ mod expect;
mod for_loops_over_fallibles;
pub mod hidden_unicode_codepoints;
mod internal;
mod invalid_from_utf8;
mod late;
mod let_underscore;
mod levels;
Expand Down Expand Up @@ -102,6 +103,7 @@ use enum_intrinsics_non_enums::EnumIntrinsicsNonEnums;
use for_loops_over_fallibles::*;
use hidden_unicode_codepoints::*;
use internal::*;
use invalid_from_utf8::*;
use let_underscore::*;
use map_unit_fn::*;
use methods::*;
Expand Down Expand Up @@ -207,6 +209,7 @@ late_lint_methods!(
HardwiredLints: HardwiredLints,
ImproperCTypesDeclarations: ImproperCTypesDeclarations,
ImproperCTypesDefinitions: ImproperCTypesDefinitions,
InvalidFromUtf8: InvalidFromUtf8,
VariantSizeDifferences: VariantSizeDifferences,
BoxPointers: BoxPointers,
PathStatements: PathStatements,
Expand Down
19 changes: 19 additions & 0 deletions compiler/rustc_lint/src/lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -699,6 +699,25 @@ pub struct ForgetCopyDiag<'a> {
pub label: Span,
}

// invalid_from_utf8.rs
#[derive(LintDiagnostic)]
pub enum InvalidFromUtf8Diag {
#[diag(lint_invalid_from_utf8_unchecked)]
Unchecked {
method: String,
valid_up_to: usize,
#[label]
label: Span,
},
#[diag(lint_invalid_from_utf8_checked)]
Checked {
method: String,
valid_up_to: usize,
#[label]
label: Span,
},
}

// hidden_unicode_codepoints.rs
#[derive(LintDiagnostic)]
#[diag(lint_hidden_unicode_codepoints)]
Expand Down
4 changes: 4 additions & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1454,6 +1454,10 @@ symbols! {
stop_after_dataflow,
store,
str,
str_from_utf8,
str_from_utf8_mut,
str_from_utf8_unchecked,
str_from_utf8_unchecked_mut,
str_split_whitespace,
str_trim,
str_trim_end,
Expand Down
2 changes: 2 additions & 0 deletions library/alloc/tests/str.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#![cfg_attr(not(bootstrap), allow(invalid_from_utf8))]

use std::assert_matches::assert_matches;
use std::borrow::Cow;
use std::cmp::Ordering::{Equal, Greater, Less};
Expand Down
4 changes: 4 additions & 0 deletions library/core/src/str/converts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ use super::Utf8Error;
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_stable(feature = "const_str_from_utf8_shared", since = "1.63.0")]
#[rustc_allow_const_fn_unstable(str_internals)]
#[rustc_diagnostic_item = "str_from_utf8"]
pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
// FIXME: This should use `?` again, once it's `const`
match run_utf8_validation(v) {
Expand Down Expand Up @@ -127,6 +128,7 @@ pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
/// errors that can be returned.
#[stable(feature = "str_mut_extras", since = "1.20.0")]
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "91006")]
#[rustc_diagnostic_item = "str_from_utf8_mut"]
pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
// This should use `?` again, once it's `const`
match run_utf8_validation(v) {
Expand Down Expand Up @@ -167,6 +169,7 @@ pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
#[must_use]
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_stable(feature = "const_str_from_utf8_unchecked", since = "1.55.0")]
#[rustc_diagnostic_item = "str_from_utf8_unchecked"]
pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
// SAFETY: the caller must guarantee that the bytes `v` are valid UTF-8.
// Also relies on `&str` and `&[u8]` having the same layout.
Expand Down Expand Up @@ -194,6 +197,7 @@ pub const unsafe fn from_utf8_unchecked(v: &[u8]) -> &str {
#[must_use]
#[stable(feature = "str_mut_extras", since = "1.20.0")]
#[rustc_const_unstable(feature = "const_str_from_utf8_unchecked_mut", issue = "91005")]
#[rustc_diagnostic_item = "str_from_utf8_unchecked_mut"]
pub const unsafe fn from_utf8_unchecked_mut(v: &mut [u8]) -> &mut str {
// SAFETY: the caller must guarantee that the bytes `v`
// are valid UTF-8, thus the cast to `*mut str` is safe.
Expand Down
1 change: 0 additions & 1 deletion src/tools/clippy/clippy_lints/src/declared_lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,6 @@ pub(crate) static LINTS: &[&crate::LintInfo] = &[
crate::instant_subtraction::UNCHECKED_DURATION_SUBTRACTION_INFO,
crate::int_plus_one::INT_PLUS_ONE_INFO,
crate::invalid_upcast_comparisons::INVALID_UPCAST_COMPARISONS_INFO,
crate::invalid_utf8_in_unchecked::INVALID_UTF8_IN_UNCHECKED_INFO,
crate::items_after_statements::ITEMS_AFTER_STATEMENTS_INFO,
crate::items_after_test_module::ITEMS_AFTER_TEST_MODULE_INFO,
crate::iter_not_returning_iterator::ITER_NOT_RETURNING_ITERATOR_INFO,
Expand Down
74 changes: 0 additions & 74 deletions src/tools/clippy/clippy_lints/src/invalid_utf8_in_unchecked.rs

This file was deleted.

2 changes: 0 additions & 2 deletions src/tools/clippy/clippy_lints/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,6 @@ mod inline_fn_without_body;
mod instant_subtraction;
mod int_plus_one;
mod invalid_upcast_comparisons;
mod invalid_utf8_in_unchecked;
mod items_after_statements;
mod items_after_test_module;
mod iter_not_returning_iterator;
Expand Down Expand Up @@ -937,7 +936,6 @@ pub fn register_plugins(store: &mut rustc_lint::LintStore, sess: &Session, conf:
store.register_late_pass(move |_| Box::new(manual_retain::ManualRetain::new(msrv())));
let verbose_bit_mask_threshold = conf.verbose_bit_mask_threshold;
store.register_late_pass(move |_| Box::new(operators::Operators::new(verbose_bit_mask_threshold)));
store.register_late_pass(|_| Box::new(invalid_utf8_in_unchecked::InvalidUtf8InUnchecked));
store.register_late_pass(|_| Box::<std_instead_of_core::StdReexports>::default());
store.register_late_pass(move |_| Box::new(instant_subtraction::InstantSubtraction::new(msrv())));
store.register_late_pass(|_| Box::new(partialeq_to_none::PartialeqToNone));
Expand Down
1 change: 1 addition & 0 deletions src/tools/clippy/clippy_lints/src/renamed_lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ pub static RENAMED_LINTS: &[(&str, &str)] = &[
("clippy::into_iter_on_array", "array_into_iter"),
("clippy::invalid_atomic_ordering", "invalid_atomic_ordering"),
("clippy::invalid_ref", "invalid_value"),
("clippy::invalid_utf8_in_unchecked", "invalid_from_utf8_unchecked"),
("clippy::let_underscore_drop", "let_underscore_drop"),
("clippy::mem_discriminant_non_enum", "enum_intrinsics_non_enums"),
("clippy::panic_params", "non_fmt_panics"),
Expand Down
20 changes: 0 additions & 20 deletions src/tools/clippy/tests/ui/invalid_utf8_in_unchecked.rs

This file was deleted.

22 changes: 0 additions & 22 deletions src/tools/clippy/tests/ui/invalid_utf8_in_unchecked.stderr

This file was deleted.

2 changes: 2 additions & 0 deletions src/tools/clippy/tests/ui/rename.fixed
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
#![allow(array_into_iter)]
#![allow(invalid_atomic_ordering)]
#![allow(invalid_value)]
#![allow(invalid_from_utf8_unchecked)]
#![allow(let_underscore_drop)]
#![allow(enum_intrinsics_non_enums)]
#![allow(non_fmt_panics)]
Expand Down Expand Up @@ -87,6 +88,7 @@
#![warn(array_into_iter)]
#![warn(invalid_atomic_ordering)]
#![warn(invalid_value)]
#![warn(invalid_from_utf8_unchecked)]
#![warn(let_underscore_drop)]
#![warn(enum_intrinsics_non_enums)]
#![warn(non_fmt_panics)]
Expand Down
Loading