Skip to content

Commit

Permalink
Add invalid_from_utf8 analogous to invalid_from_utf8_unchecked
Browse files Browse the repository at this point in the history
  • Loading branch information
Urgau committed May 26, 2023
1 parent a0612d9 commit 7f99c7d
Show file tree
Hide file tree
Showing 7 changed files with 169 additions and 21 deletions.
4 changes: 4 additions & 0 deletions compiler/rustc_lint/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,10 @@ lint_improper_ctypes_union_layout_help = consider adding a `#[repr(C)]` or `#[re
lint_improper_ctypes_union_layout_reason = this union has unspecified layout
lint_improper_ctypes_union_non_exhaustive = this union is non-exhaustive
# FIXME: we should ordinalize $valid_up_to when we add support for doing so
lint_invalid_from_utf8_checked = calls to `{$method}` with a invalid literal always return an error
.label = the literal was valid UTF-8 up to the {$valid_up_to} bytes
# FIXME: we should ordinalize $valid_up_to when we add support for doing so
lint_invalid_from_utf8_unchecked = calls to `{$method}` with a invalid literal are undefined behavior
.label = the literal was valid UTF-8 up to the {$valid_up_to} bytes
Expand Down
49 changes: 41 additions & 8 deletions compiler/rustc_lint/src/invalid_from_utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ use rustc_hir::{Expr, ExprKind};
use rustc_span::source_map::Spanned;
use rustc_span::sym;

use crate::lints::InvalidFromUtf8UncheckedDiag;
use crate::lints::InvalidFromUtf8Diag;
use crate::{LateContext, LateLintPass, LintContext};

declare_lint! {
Expand Down Expand Up @@ -33,23 +33,56 @@ declare_lint! {
"using a non UTF-8 literal in `std::str::from_utf8_unchecked`"
}

declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED]);
declare_lint! {
/// The `invalid_from_utf8` lint checks for calls to
/// `std::str::from_utf8` and `std::str::from_utf8_mut`
/// with an invalid UTF-8 literal.
///
/// ### Example
///
/// ```rust
/// # #[allow(unused)]
/// std::str::from_utf8(b"Ru\x82st");
/// ```
///
/// {{produces}}
///
/// ### Explanation
///
/// Trying to create such a `str` would always return an error as per documentation
/// for `std::str::from_utf8` and `std::str::from_utf8_mut`.
pub INVALID_FROM_UTF8,
Warn,
"using a non UTF-8 literal in `std::str::from_utf8`"
}

declare_lint_pass!(InvalidFromUtf8 => [INVALID_FROM_UTF8_UNCHECKED, INVALID_FROM_UTF8]);

impl<'tcx> LateLintPass<'tcx> for InvalidFromUtf8 {
fn check_expr(&mut self, cx: &LateContext<'tcx>, expr: &'tcx Expr<'tcx>) {
if let ExprKind::Call(path, [arg]) = expr.kind
&& let ExprKind::Path(ref qpath) = path.kind
&& let Some(def_id) = cx.qpath_res(qpath, path.hir_id).opt_def_id()
&& let Some(diag_item) = cx.tcx.get_diagnostic_name(def_id)
&& [sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
&& [sym::str_from_utf8, sym::str_from_utf8_mut,
sym::str_from_utf8_unchecked, sym::str_from_utf8_unchecked_mut].contains(&diag_item)
{
let lint = |utf8_error: Utf8Error| {
let label = arg.span;
let method = diag_item.as_str().strip_prefix("str_").unwrap();
cx.emit_spanned_lint(INVALID_FROM_UTF8_UNCHECKED, expr.span, InvalidFromUtf8UncheckedDiag {
method: format!("std::str::{method}"),
valid_up_to: utf8_error.valid_up_to(),
label: arg.span,
})
let method = format!("std::str::{method}");
let valid_up_to = utf8_error.valid_up_to();
let is_unchecked_variant = diag_item.as_str().contains("unchecked");

cx.emit_spanned_lint(
if is_unchecked_variant { INVALID_FROM_UTF8_UNCHECKED } else { INVALID_FROM_UTF8 },
expr.span,
if is_unchecked_variant {
InvalidFromUtf8Diag::Unchecked { method, valid_up_to, label }
} else {
InvalidFromUtf8Diag::Checked { method, valid_up_to, label }
}
)
};

match &arg.kind {
Expand Down
21 changes: 15 additions & 6 deletions compiler/rustc_lint/src/lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -701,12 +701,21 @@ pub struct ForgetCopyDiag<'a> {

// invalid_from_utf8.rs
#[derive(LintDiagnostic)]
#[diag(lint_invalid_from_utf8_unchecked)]
pub struct InvalidFromUtf8UncheckedDiag {
pub method: String,
pub valid_up_to: usize,
#[label]
pub label: Span,
pub enum InvalidFromUtf8Diag {
#[diag(lint_invalid_from_utf8_unchecked)]
Unchecked {
method: String,
valid_up_to: usize,
#[label]
label: Span,
},
#[diag(lint_invalid_from_utf8_checked)]
Checked {
method: String,
valid_up_to: usize,
#[label]
label: Span,
},
}

// hidden_unicode_codepoints.rs
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1454,6 +1454,8 @@ symbols! {
stop_after_dataflow,
store,
str,
str_from_utf8,
str_from_utf8_mut,
str_from_utf8_unchecked,
str_from_utf8_unchecked_mut,
str_split_whitespace,
Expand Down
2 changes: 2 additions & 0 deletions library/core/src/str/converts.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ use super::Utf8Error;
#[stable(feature = "rust1", since = "1.0.0")]
#[rustc_const_stable(feature = "const_str_from_utf8_shared", since = "1.63.0")]
#[rustc_allow_const_fn_unstable(str_internals)]
#[rustc_diagnostic_item = "str_from_utf8"]
pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
// FIXME: This should use `?` again, once it's `const`
match run_utf8_validation(v) {
Expand Down Expand Up @@ -127,6 +128,7 @@ pub const fn from_utf8(v: &[u8]) -> Result<&str, Utf8Error> {
/// errors that can be returned.
#[stable(feature = "str_mut_extras", since = "1.20.0")]
#[rustc_const_unstable(feature = "const_str_from_utf8", issue = "91006")]
#[rustc_diagnostic_item = "str_from_utf8_mut"]
pub const fn from_utf8_mut(v: &mut [u8]) -> Result<&mut str, Utf8Error> {
// This should use `?` again, once it's `const`
match run_utf8_validation(v) {
Expand Down
44 changes: 44 additions & 0 deletions tests/ui/lint/invalid_from_utf8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#![feature(concat_bytes)]
#![warn(invalid_from_utf8_unchecked)]
#![warn(invalid_from_utf8)]

pub fn from_utf8_unchecked_mut() {
// Valid
Expand Down Expand Up @@ -46,4 +47,47 @@ pub fn from_utf8_unchecked() {
}
}

pub fn from_utf8_mut() {
// Valid
{
std::str::from_utf8_mut(&mut [99, 108, 105, 112, 112, 121]);
std::str::from_utf8_mut(&mut [b'c', b'l', b'i', b'p', b'p', b'y']);

let x = 0xa0;
std::str::from_utf8_mut(&mut [0xc0, x]);
}

// Invalid
{
std::str::from_utf8_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
//~^ WARN calls to `std::str::from_utf8_mut`
std::str::from_utf8_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
//~^ WARN calls to `std::str::from_utf8_mut`
}
}

pub fn from_utf8() {
// Valid
{
std::str::from_utf8(&[99, 108, 105, 112, 112, 121]);
std::str::from_utf8(&[b'c', b'l', b'i', b'p', b'p', b'y']);
std::str::from_utf8(b"clippy");

let x = 0xA0;
std::str::from_utf8(&[0xC0, x]);
}

// Invalid
{
std::str::from_utf8(&[99, 108, 130, 105, 112, 112, 121]);
//~^ WARN calls to `std::str::from_utf8`
std::str::from_utf8(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
//~^ WARN calls to `std::str::from_utf8`
std::str::from_utf8(b"cl\x82ippy");
//~^ WARN calls to `std::str::from_utf8`
std::str::from_utf8(concat_bytes!(b"cl", b"\x82ippy"));
//~^ WARN calls to `std::str::from_utf8`
}
}

fn main() {}
68 changes: 61 additions & 7 deletions tests/ui/lint/invalid_from_utf8.stderr
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:18:9
--> $DIR/invalid_from_utf8.rs:19:9
|
LL | std::str::from_utf8_unchecked_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
Expand All @@ -13,44 +13,98 @@ LL | #![warn(invalid_from_utf8_unchecked)]
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8_unchecked_mut` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:20:9
--> $DIR/invalid_from_utf8.rs:21:9
|
LL | std::str::from_utf8_unchecked_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:38:9
--> $DIR/invalid_from_utf8.rs:39:9
|
LL | std::str::from_utf8_unchecked(&[99, 108, 130, 105, 112, 112, 121]);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-----------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:40:9
--> $DIR/invalid_from_utf8.rs:41:9
|
LL | std::str::from_utf8_unchecked(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:42:9
--> $DIR/invalid_from_utf8.rs:43:9
|
LL | std::str::from_utf8_unchecked(b"cl\x82ippy");
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^-------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8_unchecked` with a invalid literal are undefined behavior
--> $DIR/invalid_from_utf8.rs:44:9
--> $DIR/invalid_from_utf8.rs:45:9
|
LL | std::str::from_utf8_unchecked(concat_bytes!(b"cl", b"\x82ippy"));
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: 6 warnings emitted
warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:62:9
|
LL | std::str::from_utf8_mut(&mut [99, 108, 130, 105, 112, 112, 121]);
| ^^^^^^^^^^^^^^^^^^^^^^^^---------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes
|
note: the lint level is defined here
--> $DIR/invalid_from_utf8.rs:5:9
|
LL | #![warn(invalid_from_utf8)]
| ^^^^^^^^^^^^^^^^^

warning: calls to `std::str::from_utf8_mut` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:64:9
|
LL | std::str::from_utf8_mut(&mut [b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
| ^^^^^^^^^^^^^^^^^^^^^^^^--------------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:82:9
|
LL | std::str::from_utf8(&[99, 108, 130, 105, 112, 112, 121]);
| ^^^^^^^^^^^^^^^^^^^^-----------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:84:9
|
LL | std::str::from_utf8(&[b'c', b'l', b'\x82', b'i', b'p', b'p', b'y']);
| ^^^^^^^^^^^^^^^^^^^^----------------------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:86:9
|
LL | std::str::from_utf8(b"cl\x82ippy");
| ^^^^^^^^^^^^^^^^^^^^-------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: calls to `std::str::from_utf8` with a invalid literal always return an error
--> $DIR/invalid_from_utf8.rs:88:9
|
LL | std::str::from_utf8(concat_bytes!(b"cl", b"\x82ippy"));
| ^^^^^^^^^^^^^^^^^^^^---------------------------------^
| |
| the literal was valid UTF-8 up to the 2 bytes

warning: 12 warnings emitted

0 comments on commit 7f99c7d

Please sign in to comment.