From 0d88d7aa431e9858476f1b81d122ea1a1e8d7249 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 16 Feb 2023 12:45:02 -0800 Subject: [PATCH 1/3] (`c2rust-analyze`) Relaxed the transmutable checks from two-way to one-way, now allowing for arrays and slices to decay. This expands the definition of safe transmutability to be one-way. That is, it checks if `*T as *U` is safe, rather than also `*U as *T`. Thus, we can now allow for casts decaying pointers to arrays and slices to pointers to their element type. `do_unify` is modified to also be one-way, which it was already in all call sites. New tests are also added to `string_casts.rs` for all the types of ptr-to-ptr casts. Out of the full string cast, `b"" as *const u8 as *const core::ffi::c_char`, this adds support for the `as *const u8` (from `&[u8; _]`), so only support for the string literal itself remains. --- c2rust-analyze/src/context.rs | 4 +- c2rust-analyze/src/dataflow/type_check.rs | 20 +++++----- c2rust-analyze/src/util.rs | 40 ++++++++++++-------- c2rust-analyze/tests/analyze/string_casts.rs | 16 +++++++- 4 files changed, 50 insertions(+), 30 deletions(-) diff --git a/c2rust-analyze/src/context.rs b/c2rust-analyze/src/context.rs index 5ae0c58130..592fb5742e 100644 --- a/c2rust-analyze/src/context.rs +++ b/c2rust-analyze/src/context.rs @@ -3,7 +3,7 @@ use crate::pointer_id::{ GlobalPointerTable, LocalPointerTable, NextGlobalPointerId, NextLocalPointerId, PointerTable, PointerTableMut, }; -use crate::util::{self, are_transmutable_ptrs, describe_rvalue, RvalueDesc}; +use crate::util::{self, describe_rvalue, is_transmutable_ptr_cast, RvalueDesc}; use crate::AssignPointerIds; use bitflags::bitflags; use rustc_hir::def_id::DefId; @@ -358,7 +358,7 @@ impl<'a, 'tcx> AnalysisCtxt<'a, 'tcx> { // In particular, this allows casts from `*u8` to `*core::ffi::c_char`. let from_ty = op_lty.ty; let to_ty = ty; - match are_transmutable_ptrs(from_ty, to_ty) { + match is_transmutable_ptr_cast(from_ty, to_ty) { // Label the to type with the same [`PointerId`]s as the from type in all positions. // This works because the two types have the same structure. Some(true) => self.lcx().mk(ty, op_lty.args, op_lty.label), diff --git a/c2rust-analyze/src/dataflow/type_check.rs b/c2rust-analyze/src/dataflow/type_check.rs index df158b9829..78fb1aded0 100644 --- a/c2rust-analyze/src/dataflow/type_check.rs +++ b/c2rust-analyze/src/dataflow/type_check.rs @@ -1,6 +1,6 @@ use super::DataflowConstraints; use crate::context::{AnalysisCtxt, LTy, PermissionSet, PointerId}; -use crate::util::{self, are_transmutable, describe_rvalue, Callee, RvalueDesc}; +use crate::util::{self, describe_rvalue, is_transmutable_to, Callee, RvalueDesc}; use rustc_hir::def_id::DefId; use rustc_middle::mir::{ AggregateKind, BinOp, Body, Location, Mutability, Operand, Place, PlaceRef, ProjectionElem, @@ -202,19 +202,19 @@ impl<'tcx> TypeChecker<'tcx, '_> { /// Unify corresponding `PointerId`s in `lty1` and `lty2`. /// - /// The two inputs must have compatible ([safely transmutable](are_transmutable)) underlying types. + /// The two inputs must have compatible ([safely transmutable](is_transmutable_to)) underlying types. /// For any position where the underlying type has a pointer, /// this function unifies the `PointerId`s that `lty1` and `lty2` have at /// that position. For example, given `lty1 = *mut /*l1*/ *const /*l2*/ u8` and `lty2 = *mut /// /*l3*/ *const /*l4*/ u8`, this function will unify `l1` with `l3` and `l2` with `l4`. - fn do_unify(&mut self, lty1: LTy<'tcx>, lty2: LTy<'tcx>) { - let ty1 = lty1.ty; - let ty2 = lty2.ty; - assert!(are_transmutable( - self.acx.tcx().erase_regions(ty1), - self.acx.tcx().erase_regions(ty2), - ), "types not transmutable (compatible), so PointerId unification cannot be done: {ty1:?} !~ {ty2:?}"); - for (sub_lty1, sub_lty2) in lty1.iter().zip(lty2.iter()) { + fn do_unify(&mut self, pl_lty: LTy<'tcx>, rv_lty: LTy<'tcx>) { + let rv_ty = self.acx.tcx().erase_regions(rv_lty.ty); + let pl_ty = self.acx.tcx().erase_regions(pl_lty.ty); + assert!( + is_transmutable_to(rv_ty, pl_ty), + "types not transmutable (compatible), so PointerId unification cannot be done: *{rv_ty:?} as *{pl_ty:?}", + ); + for (sub_lty1, sub_lty2) in pl_lty.iter().zip(rv_lty.iter()) { eprintln!("equate {:?} = {:?}", sub_lty1, sub_lty2); if sub_lty1.label != PointerId::NONE || sub_lty2.label != PointerId::NONE { assert!(sub_lty1.label != PointerId::NONE); diff --git a/c2rust-analyze/src/util.rs b/c2rust-analyze/src/util.rs index 3f5aca913b..0552ac89d3 100644 --- a/c2rust-analyze/src/util.rs +++ b/c2rust-analyze/src/util.rs @@ -307,18 +307,22 @@ pub fn lty_project<'tcx, L: Debug>( } } -/// Determine if two types are safe to transmute to each other. +/// Determine if `from_ty` can be safely transmuted to `to_ty`. /// /// Safe transmutability is difficult to check abstractly, -/// so here it is limited to integer types of the same size -/// (but potentially different signedness). +/// so here it is limited to +/// * integer types of the same size (but potentially different signedness) +/// * e.x. `*u8 as *i8` +/// * decaying arrays and slices to their element type +/// * e.x. `*[u8; 0] as *u8` +/// * e.x. `*[u8] as *u8` /// /// Extra (but equal) levels of pointer/reference indirection are allowed, /// i.e. `u8 ~ i8` implies `**u8 ~ **i8`. /// /// Thus, [`true`] means it is definitely transmutable, /// while [`false`] means it may not be transmutable. -/// +/// /// Formally, safe transmutability defines /// an equivalence relation on types, named `~` here. /// `A ~ B` iff `*(a as *const B)` and `*(b as *const A)` are safe, @@ -328,11 +332,11 @@ pub fn lty_project<'tcx, L: Debug>( /// * `A = B => A ~ B` /// * `A ~ B => *A ~ *B` /// * `uN ~ iN`, where `N` is an integer width -pub fn are_transmutable<'tcx>(a: Ty<'tcx>, b: Ty<'tcx>) -> bool { +pub fn is_transmutable_to<'tcx>(from_ty: Ty<'tcx>, to_ty: Ty<'tcx>) -> bool { let transmutable_ints = || { use IntTy::*; use UintTy::*; - match (a.kind(), b.kind()) { + match (from_ty.kind(), to_ty.kind()) { (ty::Uint(u), ty::Int(i)) | (ty::Int(i), ty::Uint(u)) => { matches!((u, i), |(Usize, Isize)| (U8, I8) | (U16, I16) @@ -343,20 +347,24 @@ pub fn are_transmutable<'tcx>(a: Ty<'tcx>, b: Ty<'tcx>) -> bool { } }; - // only check for transmutable ints so far - a == b || are_transmutable_ptrs(a, b).unwrap_or(false) || transmutable_ints() + let one_way_transmutable = || match from_ty.kind() { + &ty::Array(from_ty, _) | &ty::Slice(from_ty) => is_transmutable_to(from_ty, to_ty), + _ => false, + }; + + from_ty == to_ty || is_transmutable_ptr_cast(from_ty, to_ty).unwrap_or(false) || transmutable_ints() || one_way_transmutable() } -/// Determine if two types (e.x. in a cast) are pointers, -/// and if they are, if the pointee types are compatible, -/// i.e. they are safely transmutable to each other. +/// Determine if the `from_ty as to_ty` is a ptr-to-ptr cast. +/// and if it is, if the pointee types are compatible, +/// i.e. they are safely transmutable. /// /// This returns [`Some`]`(is_transmutable)` if they're both pointers, /// and [`None`] if its some other types. /// -/// See [`are_transmutable`] for the definition of safe transmutability. -pub fn are_transmutable_ptrs<'tcx>(a: Ty<'tcx>, b: Ty<'tcx>) -> Option { - let a = a.builtin_deref(true)?.ty; - let b = b.builtin_deref(true)?.ty; - Some(are_transmutable(a, b)) +/// See [`is_transmutable_to`] for the definition of safe transmutability. +pub fn is_transmutable_ptr_cast<'tcx>(from_ty: Ty<'tcx>, to_ty: Ty<'tcx>) -> Option { + let from_ty = from_ty.builtin_deref(true)?.ty; + let to_ty = to_ty.builtin_deref(true)?.ty; + Some(is_transmutable_to(from_ty, to_ty)) } diff --git a/c2rust-analyze/tests/analyze/string_casts.rs b/c2rust-analyze/tests/analyze/string_casts.rs index 538183aaeb..abe0fbb867 100644 --- a/c2rust-analyze/tests/analyze/string_casts.rs +++ b/c2rust-analyze/tests/analyze/string_casts.rs @@ -1,11 +1,23 @@ -pub fn cast_only(s: *const u8) { +pub fn cast_ptr_to_ptr(s: *const u8) { s as *const core::ffi::c_char; } -pub fn deep_cast(x: *const *const u8) { +pub fn deep_cast_ptr_to_ptr(x: *const *const u8) { x as *const *const i8; } +pub fn cast_slice_ptr_to_ptr(s: *const [u8]) { + s as *const u8; +} + +pub fn cast_array_to_slice_ptr(s: &[u8; 0]) { + s as *const [u8]; +} + +pub fn cast_array_to_ptr(s: &[u8; 0]) { + s as *const u8; +} + #[cfg(any())] pub fn cast_from_literal() { b"" as *const u8 as *const core::ffi::c_char; From a148146e9c6e39642f3d2a81d2916e8db085a1db Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Thu, 16 Feb 2023 14:54:40 -0800 Subject: [PATCH 2/3] (`c2rust-analyze`) Updated `do_unify` docs after the renaming. --- c2rust-analyze/src/dataflow/type_check.rs | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/c2rust-analyze/src/dataflow/type_check.rs b/c2rust-analyze/src/dataflow/type_check.rs index 78fb1aded0..31909d9d7f 100644 --- a/c2rust-analyze/src/dataflow/type_check.rs +++ b/c2rust-analyze/src/dataflow/type_check.rs @@ -200,13 +200,21 @@ impl<'tcx> TypeChecker<'tcx, '_> { } } - /// Unify corresponding `PointerId`s in `lty1` and `lty2`. + /// Unify corresponding [`PointerId`]s in `pl_lty` and `rv_lty`. /// /// The two inputs must have compatible ([safely transmutable](is_transmutable_to)) underlying types. /// For any position where the underlying type has a pointer, - /// this function unifies the `PointerId`s that `lty1` and `lty2` have at - /// that position. For example, given `lty1 = *mut /*l1*/ *const /*l2*/ u8` and `lty2 = *mut - /// /*l3*/ *const /*l4*/ u8`, this function will unify `l1` with `l3` and `l2` with `l4`. + /// this function unifies the [`PointerId`]s that `pl_lty` and `rv_lty` have at that position. + /// For example, given + /// + /// ``` + /// # fn( + /// pl_lty: *mut /*l1*/ *const /*l2*/ u8, + /// rv_lty: *mut /*l3*/ *const /*l4*/ u8, + /// # ) {} + /// ``` + /// + /// this function will unify `l1` with `l3` and `l2` with `l4`. fn do_unify(&mut self, pl_lty: LTy<'tcx>, rv_lty: LTy<'tcx>) { let rv_ty = self.acx.tcx().erase_regions(rv_lty.ty); let pl_ty = self.acx.tcx().erase_regions(pl_lty.ty); From 26a42754d07bed09659c3431cf748754571bb9c3 Mon Sep 17 00:00:00 2001 From: Khyber Sen Date: Fri, 17 Feb 2023 16:51:12 -0800 Subject: [PATCH 3/3] (`c2rust-analyze`) Fix the `is_transmutable_to` docs, formalizing the expanded defintion of safe transmutability. --- c2rust-analyze/src/util.rs | 61 +++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 31 deletions(-) diff --git a/c2rust-analyze/src/util.rs b/c2rust-analyze/src/util.rs index 0552ac89d3..c2efebf91a 100644 --- a/c2rust-analyze/src/util.rs +++ b/c2rust-analyze/src/util.rs @@ -307,36 +307,33 @@ pub fn lty_project<'tcx, L: Debug>( } } -/// Determine if `from_ty` can be safely transmuted to `to_ty`. +/// Determine if `from` can be safely transmuted to `to`, +/// which is defined as `*(from as *const To)` being a safe operation, +/// where `from: *const From` and assuming `*from` already was safe. /// -/// Safe transmutability is difficult to check abstractly, -/// so here it is limited to -/// * integer types of the same size (but potentially different signedness) -/// * e.x. `*u8 as *i8` -/// * decaying arrays and slices to their element type -/// * e.x. `*[u8; 0] as *u8` -/// * e.x. `*[u8] as *u8` +/// Note that this is one-way, and is slightly different from [`core::mem::transmute`], +/// and more similar to [`core::mem::transmute_copy`]. /// -/// Extra (but equal) levels of pointer/reference indirection are allowed, -/// i.e. `u8 ~ i8` implies `**u8 ~ **i8`. +/// This forms a non-symmetric (one-way) equivalence relation, named `~` below. +/// Formally, `A ~ B` iff `*a` and `*(a as *const B)` are safe, where `a: *const A`. /// -/// Thus, [`true`] means it is definitely transmutable, -/// while [`false`] means it may not be transmutable. -/// -/// Formally, safe transmutability defines -/// an equivalence relation on types, named `~` here. -/// `A ~ B` iff `*(a as *const B)` and `*(b as *const A)` are safe, -/// where `a: *const A` and `b: *const B`. +/// However, safe transmutability is difficult to check completely, +/// so this function only checks a subset of it, +/// with these formal rules for all types `A`, `B`: /// -/// And the current incomplete implementation is defined as: /// * `A = B => A ~ B` /// * `A ~ B => *A ~ *B` -/// * `uN ~ iN`, where `N` is an integer width -pub fn is_transmutable_to<'tcx>(from_ty: Ty<'tcx>, to_ty: Ty<'tcx>) -> bool { +/// * `uN ~ iN`, `iN ~ uN`, where `N` is an integer width +/// * `[A] ~ A` +/// * `[A; N] ~ A`, where `const N: usize` +/// +/// Thus, [`true`] means it is definitely transmutable, +/// while [`false`] means it may not be transmutable. +pub fn is_transmutable_to<'tcx>(from: Ty<'tcx>, to: Ty<'tcx>) -> bool { let transmutable_ints = || { use IntTy::*; use UintTy::*; - match (from_ty.kind(), to_ty.kind()) { + match (from.kind(), to.kind()) { (ty::Uint(u), ty::Int(i)) | (ty::Int(i), ty::Uint(u)) => { matches!((u, i), |(Usize, Isize)| (U8, I8) | (U16, I16) @@ -347,24 +344,26 @@ pub fn is_transmutable_to<'tcx>(from_ty: Ty<'tcx>, to_ty: Ty<'tcx>) -> bool { } }; - let one_way_transmutable = || match from_ty.kind() { - &ty::Array(from_ty, _) | &ty::Slice(from_ty) => is_transmutable_to(from_ty, to_ty), + let one_way_transmutable = || match from.kind() { + &ty::Array(from, _) | &ty::Slice(from) => is_transmutable_to(from, to), _ => false, }; - from_ty == to_ty || is_transmutable_ptr_cast(from_ty, to_ty).unwrap_or(false) || transmutable_ints() || one_way_transmutable() + from == to + || is_transmutable_ptr_cast(from, to).unwrap_or(false) + || transmutable_ints() + || one_way_transmutable() } -/// Determine if the `from_ty as to_ty` is a ptr-to-ptr cast. -/// and if it is, if the pointee types are compatible, -/// i.e. they are safely transmutable. +/// Determine if `from as to` is a ptr-to-ptr cast. +/// and if it is, if the pointee types are [safely transmutable](is_transmutable_to). /// /// This returns [`Some`]`(is_transmutable)` if they're both pointers, /// and [`None`] if its some other types. /// /// See [`is_transmutable_to`] for the definition of safe transmutability. -pub fn is_transmutable_ptr_cast<'tcx>(from_ty: Ty<'tcx>, to_ty: Ty<'tcx>) -> Option { - let from_ty = from_ty.builtin_deref(true)?.ty; - let to_ty = to_ty.builtin_deref(true)?.ty; - Some(is_transmutable_to(from_ty, to_ty)) +pub fn is_transmutable_ptr_cast<'tcx>(from: Ty<'tcx>, to: Ty<'tcx>) -> Option { + let from = from.builtin_deref(true)?.ty; + let to = to.builtin_deref(true)?.ty; + Some(is_transmutable_to(from, to)) }