diff --git a/c2rust-analyze/src/labeled_ty.rs b/c2rust-analyze/src/labeled_ty.rs index d4b9824010..d32586ac40 100644 --- a/c2rust-analyze/src/labeled_ty.rs +++ b/c2rust-analyze/src/labeled_ty.rs @@ -20,7 +20,7 @@ use std::slice; /// constructor in the tree can have its own label. But maintaining a custom copy of /// `TyKind` would be annoying, so instead, we let labeled types form arbitrary trees, and /// make the `LabeledTyCtxt` responsible for making those trees match the `Ty`'s structure. -#[derive(Clone, Copy, PartialEq, Eq)] +#[derive(Clone, Copy, PartialEq, Eq, Hash)] pub struct LabeledTyS<'tcx, L: 'tcx> { /// The underlying type. pub ty: Ty<'tcx>, diff --git a/c2rust-analyze/src/main.rs b/c2rust-analyze/src/main.rs index 06f17d4ea3..a60ad5464d 100644 --- a/c2rust-analyze/src/main.rs +++ b/c2rust-analyze/src/main.rs @@ -25,6 +25,8 @@ use crate::equiv::{GlobalEquivSet, LocalEquivSet}; use crate::labeled_ty::LabeledTyCtxt; use crate::log::init_logger; use crate::panic_detail::PanicDetail; +use crate::pointee_type::PointeeTypes; +use crate::pointer_id::{GlobalPointerTable, LocalPointerTable, PointerTable}; use crate::type_desc::Ownership; use crate::util::{Callee, TestAttr}; use ::log::warn; @@ -60,6 +62,7 @@ mod known_fn; mod labeled_ty; mod log; mod panic_detail; +mod pointee_type; mod pointer_id; mod rewrite; mod trivial; @@ -180,10 +183,6 @@ fn label_rvalue_tys<'tcx>(acx: &mut AnalysisCtxt<'_, 'tcx>, mir: &Body<'tcx>) { block: bb, }; - if acx.c_void_casts.should_skip_stmt(loc) { - continue; - } - let _g = panic_detail::set_current_span(stmt.source_info.span); let lty = match rv { @@ -486,6 +485,10 @@ fn run(tcx: TyCtxt) { /// get a complete [`Assignment`] for this function, which maps every [`PointerId`] in this /// function to a [`PermissionSet`] and [`FlagSet`]. lasn: MaybeUnset, + /// Constraints on pointee types gathered from the body of this function. + pointee_constraints: MaybeUnset>, + /// Local part of pointee type sets. + local_pointee_types: MaybeUnset>>, } // Follow a postorder traversal, so that callers are visited after their callees. This means @@ -549,15 +552,10 @@ fn run(tcx: TyCtxt) { gacx.construct_region_metadata(); // ---------------------------------- - // Compute dataflow constraints + // Infer pointee types // ---------------------------------- - // Initial pass to assign local `PointerId`s and gather equivalence constraints, which state - // that two pointer types must be converted to the same reference type. Some additional data - // computed during this the process is kept around for use in later passes. - let mut global_equiv = GlobalEquivSet::new(gacx.num_pointers()); for &ldid in &all_fn_ldids { - // The function might already be marked as failed if one of its callees previously failed. if gacx.fn_failed(ldid.to_def_id()) { continue; } @@ -594,6 +592,102 @@ fn run(tcx: TyCtxt) { label_rvalue_tys(&mut acx, &mir); update_pointer_info(&mut acx, &mir); + pointee_type::generate_constraints(&acx, &mir) + })); + + let pointee_constraints = match r { + Ok(x) => x, + Err(pd) => { + gacx.mark_fn_failed(ldid.to_def_id(), pd); + continue; + } + }; + + let local_pointee_types = LocalPointerTable::new(acx.num_pointers()); + + let mut info = FuncInfo::default(); + info.acx_data.set(acx.into_data()); + info.pointee_constraints.set(pointee_constraints); + info.local_pointee_types.set(local_pointee_types); + func_info.insert(ldid, info); + } + + // Iterate pointee constraints to a fixpoint. + let mut global_pointee_types = GlobalPointerTable::::new(gacx.num_pointers()); + let mut loop_count = 0; + loop { + // Loop until the global assignment reaches a fixpoint. The inner loop also runs until a + // fixpoint, but it only considers a single function at a time. The inner loop for one + // function can affect other functions by updating `global_pointee_types`, so we also need + // the outer loop, which runs until the global type sets converge as well. + loop_count += 1; + // We shouldn't need more iterations than the longest acyclic path through the callgraph. + assert!(loop_count <= 1000); + let old_global_pointee_types = global_pointee_types.clone(); + + // Clear the `incomplete` flags for all global pointers. See comment in + // `pointee_types::solve::solve_constraints`. + for (_, tys) in global_pointee_types.iter_mut() { + tys.incomplete = false; + } + + for &ldid in &all_fn_ldids { + if gacx.fn_failed(ldid.to_def_id()) { + continue; + } + + let info = func_info.get_mut(&ldid).unwrap(); + + let pointee_constraints = info.pointee_constraints.get(); + let pointee_types = global_pointee_types.and_mut(info.local_pointee_types.get_mut()); + pointee_type::solve_constraints(pointee_constraints, pointee_types); + } + + if global_pointee_types == old_global_pointee_types { + break; + } + } + + // Print results for debugging + for &ldid in &all_fn_ldids { + if gacx.fn_failed(ldid.to_def_id()) { + continue; + } + + let ldid_const = WithOptConstParam::unknown(ldid); + let info = func_info.get_mut(&ldid).unwrap(); + let mir = tcx.mir_built(ldid_const); + let mir = mir.borrow(); + + let acx = gacx.function_context_with_data(&mir, info.acx_data.take()); + let name = tcx.item_name(ldid.to_def_id()); + let pointee_types = global_pointee_types.and(info.local_pointee_types.get()); + print_function_pointee_types(&acx, name, &mir, pointee_types); + + info.acx_data.set(acx.into_data()); + } + + // ---------------------------------- + // Compute dataflow constraints + // ---------------------------------- + + // Initial pass to assign local `PointerId`s and gather equivalence constraints, which state + // that two pointer types must be converted to the same reference type. Some additional data + // computed during this the process is kept around for use in later passes. + let mut global_equiv = GlobalEquivSet::new(gacx.num_pointers()); + for &ldid in &all_fn_ldids { + if gacx.fn_failed(ldid.to_def_id()) { + continue; + } + + let info = func_info.get_mut(&ldid).unwrap(); + let ldid_const = WithOptConstParam::unknown(ldid); + let mir = tcx.mir_built(ldid_const); + let mir = mir.borrow(); + + let acx = gacx.function_context_with_data(&mir, info.acx_data.take()); + + let r = panic_detail::catch_unwind(AssertUnwindSafe(|| { dataflow::generate_constraints(&acx, &mir) })); @@ -612,11 +706,9 @@ fn run(tcx: TyCtxt) { equiv.unify(a, b); } - let mut info = FuncInfo::default(); info.acx_data.set(acx.into_data()); info.dataflow.set(dataflow); info.local_equiv.set(local_equiv); - func_info.insert(ldid, info); } // ---------------------------------- @@ -1481,6 +1573,47 @@ fn print_labeling_for_var<'tcx>( eprintln!("{}: addr_of = {:?}, type = {:?}", desc, addr_of3, ty3); } +fn print_function_pointee_types<'tcx>( + acx: &AnalysisCtxt<'_, 'tcx>, + name: impl Display, + mir: &Body<'tcx>, + pointee_types: PointerTable>, +) { + eprintln!("\npointee types for {}", name); + for (local, decl) in mir.local_decls.iter_enumerated() { + eprintln!( + "{:?} ({}): addr_of = {:?}, type = {:?}", + local, + describe_local(acx.tcx(), decl), + acx.addr_of_local[local], + acx.local_tys[local] + ); + + let mut all_pointer_ids = Vec::new(); + if !acx.addr_of_local[local].is_none() { + all_pointer_ids.push(acx.addr_of_local[local]); + } + acx.local_tys[local].for_each_label(&mut |ptr| { + if !ptr.is_none() { + all_pointer_ids.push(ptr); + } + }); + + for ptr in all_pointer_ids { + let tys = &pointee_types[ptr]; + if tys.ltys.len() == 0 && !tys.incomplete { + continue; + } + eprintln!( + " pointer {:?}: {:?}{}", + ptr, + tys.ltys, + if tys.incomplete { " (INCOMPLETE)" } else { "" } + ); + } + } +} + /// Return `LocalDefId`s for all `static`s. fn all_static_items(tcx: TyCtxt) -> Vec { let mut order = Vec::new(); diff --git a/c2rust-analyze/src/pointee_type/constraint_set.rs b/c2rust-analyze/src/pointee_type/constraint_set.rs new file mode 100644 index 0000000000..470e5bdf57 --- /dev/null +++ b/c2rust-analyze/src/pointee_type/constraint_set.rs @@ -0,0 +1,151 @@ +use crate::context::LTy; +use crate::pointer_id::PointerId; +use std::cell::Cell; +use std::collections::HashSet; + +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +pub enum Constraint<'tcx> { + /// The set of types for pointer `.0` must contain type `.1`. This is used for "uses" of a + /// pointer, where the pointer is dereferenced to load or store data of a certain type. + ContainsType(PointerId, CTy<'tcx>), + + /// All possible types for pointer `.0` must be compatible with type `.1`. This is used for + /// "definitions" of a pointer, where the pointer is constructed by taking the address of data + /// of a certain type. + AllTypesCompatibleWith(PointerId, CTy<'tcx>), + + /// All possible types for pointer `.0` must be mutually compatible. This is used for + /// "definitions" of a pointer where the type of the data is unknown. + AllTypesCompatible(PointerId), + + /// The set of types for pointer `.0` must be a subset of the set of types for pointer `.1`. + /// Among other things, this is used for pointer assignments like `p = q`, where `p`'s types + /// must be a subset of `q`'s (`Subset(p_ptr, q_ptr)`). + Subset(PointerId, PointerId), +} + +#[derive(Debug, Default)] +pub struct ConstraintSet<'tcx> { + pub constraints: Vec>, + constraint_dedup: HashSet>, + pub var_table: VarTable<'tcx>, +} + +impl<'tcx> ConstraintSet<'tcx> { + fn add(&mut self, c: Constraint<'tcx>) { + if self.constraint_dedup.insert(c) { + self.constraints.push(c); + } + } + + pub fn contains_type(&mut self, p: PointerId, cty: CTy<'tcx>) { + self.add(Constraint::ContainsType(p, cty)); + } + + pub fn all_types_compatible_with(&mut self, p: PointerId, cty: CTy<'tcx>) { + self.add(Constraint::AllTypesCompatibleWith(p, cty)); + } + + pub fn all_types_compatible(&mut self, p: PointerId) { + self.add(Constraint::AllTypesCompatible(p)); + } + + pub fn subset(&mut self, p: PointerId, q: PointerId) { + self.add(Constraint::Subset(p, q)); + } + + pub fn fresh_var(&mut self) -> CTy<'tcx> { + self.var_table.fresh() + } +} + +/// A "constraint type", which is either an `LTy` or an inference variable. +/// +/// Our current implementation of type inference / unification is very primitive. In particular, +/// currently we allow inference variables only at top level, so constraints can refer to `T` but +/// not `*mut T` or `[T; 10]`. Eventually we may need to replace this implementation with +/// something more flexible. +#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)] +pub enum CTy<'tcx> { + Ty(LTy<'tcx>), + /// An inference variable. Note that inference variables are scoped to the local function; + /// there are no global inference variables. + Var(usize), +} + +impl<'tcx> From> for CTy<'tcx> { + fn from(x: LTy<'tcx>) -> CTy<'tcx> { + CTy::Ty(x) + } +} + +#[derive(Debug, Default)] +pub struct VarTable<'tcx> { + /// Equivalence class representative for each variable. This can be either a known type + /// (`CTy::Ty`) or an inference variable (`CTy::Var`). + vars: Vec>>, +} + +impl<'tcx> VarTable<'tcx> { + pub fn fresh(&mut self) -> CTy<'tcx> { + let cty = CTy::Var(self.vars.len()); + // Initially, the new variable is its own representative. + self.vars.push(Cell::new(cty)); + cty + } + + /// Obtain the representative for variable `var`. + pub fn rep(&self, var: usize) -> CTy<'tcx> { + let cty = self.vars[var].get(); + match cty { + CTy::Var(var2) => { + if var2 == var { + // `var` is the representative of its own equivalence class. + cty + } else { + let rep = self.rep(var2); + // Do path compression. + self.vars[var].set(rep); + rep + } + } + CTy::Ty(_) => { + // `cty` is a concrete type, which is the representative of `var`'s class. + cty + } + } + } + + pub fn cty_rep(&self, cty: CTy<'tcx>) -> CTy<'tcx> { + match cty { + CTy::Ty(_) => cty, + CTy::Var(v) => self.rep(v), + } + } + + /// Unify two types. If both resolve to concrete types and those types are unequal, this + /// returns `Err` with the two concrete types. + pub fn unify(&self, cty1: CTy<'tcx>, cty2: CTy<'tcx>) -> Result<(), (LTy<'tcx>, LTy<'tcx>)> { + match (self.cty_rep(cty1), self.cty_rep(cty2)) { + (CTy::Var(v1), CTy::Var(v2)) => { + // Make one the representative for the other. + debug_assert_eq!(self.vars[v1].get(), CTy::Var(v1)); + debug_assert_eq!(self.vars[v2].get(), CTy::Var(v2)); + self.vars[v1].set(CTy::Var(v2)); + Ok(()) + } + (CTy::Var(v), CTy::Ty(ty)) | (CTy::Ty(ty), CTy::Var(v)) => { + debug_assert_eq!(self.vars[v].get(), CTy::Var(v)); + self.vars[v].set(CTy::Ty(ty)); + Ok(()) + } + (CTy::Ty(ty1), CTy::Ty(ty2)) => { + if ty1 == ty2 { + Ok(()) + } else { + Err((ty1, ty2)) + } + } + } + } +} diff --git a/c2rust-analyze/src/pointee_type/mod.rs b/c2rust-analyze/src/pointee_type/mod.rs new file mode 100644 index 0000000000..ec2c25a2ca --- /dev/null +++ b/c2rust-analyze/src/pointee_type/mod.rs @@ -0,0 +1,16 @@ +use crate::context::AnalysisCtxt; +use rustc_middle::mir::Body; + +mod constraint_set; +mod solve; +mod type_check; + +pub use self::constraint_set::{CTy, Constraint, ConstraintSet}; +pub use self::solve::{solve_constraints, PointeeTypes}; + +pub fn generate_constraints<'tcx>( + acx: &AnalysisCtxt<'_, 'tcx>, + mir: &Body<'tcx>, +) -> ConstraintSet<'tcx> { + type_check::visit(acx, mir) +} diff --git a/c2rust-analyze/src/pointee_type/solve.rs b/c2rust-analyze/src/pointee_type/solve.rs new file mode 100644 index 0000000000..3370e31bbb --- /dev/null +++ b/c2rust-analyze/src/pointee_type/solve.rs @@ -0,0 +1,193 @@ +use super::constraint_set::{CTy, Constraint, ConstraintSet, VarTable}; +use crate::context::LTy; +use crate::pointer_id::{OwnedPointerTable, PointerId, PointerTable, PointerTableMut}; +use log::warn; +use std::collections::{HashMap, HashSet}; +use std::ptr; + +/// Add initial types to `ty_sets` based on the constraints in `cset`. +pub fn init_type_sets<'tcx>( + cset: &ConstraintSet<'tcx>, + mut ty_sets: PointerTableMut>>, +) { + for constraint in &cset.constraints { + if let Constraint::ContainsType(ptr, cty) = *constraint { + ty_sets[ptr].insert(cty); + } + } +} + +fn index_both<'a, T>( + pt: &'a mut PointerTableMut, + ptr1: PointerId, + ptr2: PointerId, +) -> (&'a mut T, &'a mut T) { + unsafe { + assert_ne!(ptr1, ptr2); + let x1 = ptr::addr_of_mut!(pt[ptr1]); + let x2 = ptr::addr_of_mut!(pt[ptr2]); + (&mut *x1, &mut *x2) + } +} + +/// Propagate pointee types through `Subset` relationships. For each unsatisfied `Subset` +/// constraints, we add all pointee types in the subset to the superset, which satisfies the +/// constraint by expanding the superset. +/// +/// The global portion of `ty_sets` is only the local view of the global pointee type sets, so it +/// can contain local `CTy::Var`s and refer to local `PointerId`s. +pub fn propagate_types<'tcx>( + cset: &ConstraintSet<'tcx>, + mut ty_sets: PointerTableMut>>, +) { + // Map from each `PointerId` to the `PointerId`s whose `ty_sets` should be supersets. + let mut subset_graph = HashMap::<_, HashSet<_>>::new(); + // Set of `PointerId`s whose `ty_sets` were recently modified. The changes to these `ty_sets` + // need to be propagated to its supersets in `subset_graph`. + let mut work_set = HashSet::new(); + for constraint in &cset.constraints { + if let Constraint::Subset(ptr1, ptr2) = *constraint { + let new = subset_graph + .entry(ptr1) + .or_insert_with(HashSet::new) + .insert(ptr2); + + // Initial update: propagate pointee types from `ptr1` to `ptr2`. + if new && !ty_sets[ptr1].is_subset(&ty_sets[ptr2]) { + let (tys1, tys2) = index_both(&mut ty_sets, ptr1, ptr2); + for cty in tys1.iter() { + tys2.insert(cty.clone()); + } + // Since `ty_sets[ptr2]` was not a subset of `ty_sets[ptr1]`, we must have added at + // least one element to `ty_sets[ptr2]`. + work_set.insert(ptr2); + } + } + } + + while let Some(&ptr1) = work_set.iter().next() { + work_set.remove(&ptr1); + let ptr2s = match subset_graph.get(&ptr1) { + Some(x) => x, + None => continue, + }; + for &ptr2 in ptr2s { + if !ty_sets[ptr1].is_subset(&ty_sets[ptr2]) { + let (tys1, tys2) = index_both(&mut ty_sets, ptr1, ptr2); + for cty in tys1.iter() { + tys2.insert(cty.clone()); + } + // Since `ty_sets[ptr2]` was not a subset of `ty_sets[ptr1]`, we must have added at + // least one element to `ty_sets[ptr2]`. + work_set.insert(ptr2); + } + } + } + + fn unify_types<'tcx>( + var_table: &VarTable<'tcx>, + ctys: &HashSet>, + extra_cty: Option>, + ) { + let mut prev = extra_cty; + for &cty in ctys { + if let Some(prev) = prev { + match var_table.unify(prev, cty) { + Ok(()) => {} + Err((ty1, ty2)) => { + warn!("unification failed: {ty1:?} != {ty2:?}"); + } + } + } + prev = Some(cty); + } + } + + // Currently, we just require all the types to unify. In the future perhaps we can extend this + // to do something smarter in cases where the set contains both `u8` and `[u8; 10]`, for + // example. + for constraint in &cset.constraints { + if let Constraint::AllTypesCompatibleWith(ptr, cty) = *constraint { + unify_types(&cset.var_table, &ty_sets[ptr], Some(cty)); + } + } + + for (_, ctys) in ty_sets.iter() { + unify_types(&cset.var_table, ctys, None); + } + + #[cfg(debug_assertions)] + { + for constraint in &cset.constraints { + if let Constraint::Subset(ptr1, ptr2) = *constraint { + assert!(ty_sets[ptr1].is_subset(&ty_sets[ptr2])); + } + } + } +} + +#[derive(Clone, PartialEq, Eq, Debug, Default)] +pub struct PointeeTypes<'tcx> { + /// The possible pointee types for this pointer. + pub ltys: HashSet>, + /// If set, `ltys` is incomplete - the analysis identified pointee types that couldn't be + /// exported into global scope. + pub incomplete: bool, +} + +/// Copy `LTy`s from `pointee_tys` into `ty_sets` for processing by the analysis. +fn import<'tcx>( + pointee_tys: PointerTable>, + mut ty_sets: PointerTableMut>>, +) { + for (ptr, tys) in pointee_tys.iter() { + let ty_set = &mut ty_sets[ptr]; + for <y in &tys.ltys { + ty_set.insert(CTy::Ty(lty)); + } + } +} + +/// Compute concrete `LTy`s for all the `CTy`s in `ty_sets`, and add them into `pointee_tys`. +fn export<'tcx>( + var_table: &VarTable<'tcx>, + ty_sets: PointerTable>>, + mut pointee_tys: PointerTableMut>, +) { + for (ptr, ctys) in ty_sets.iter() { + let out = &mut pointee_tys[ptr]; + for &cty in ctys { + if let CTy::Ty(lty) = var_table.cty_rep(cty) { + let mut ok = true; + lty.for_each_label(&mut |p| { + if p.is_local() { + ok = false; + } + }); + if ok { + out.ltys.insert(lty); + continue; + } + } + // If we failed to export this `CTy`, mark the `PointeeTypes` incomplete. + out.incomplete = true; + } + } +} + +pub fn solve_constraints<'tcx>( + cset: &ConstraintSet<'tcx>, + mut pointee_tys: PointerTableMut>, +) { + // Clear the `incomplete` flags for all local pointers. If there are still non-exportable + // types for those pointers, the flag will be set again in `export()`. + for (_, tys) in pointee_tys.local_mut().iter_mut() { + tys.incomplete = false; + } + + let mut ty_sets = OwnedPointerTable::with_len_of(&pointee_tys.borrow()); + import(pointee_tys.borrow(), ty_sets.borrow_mut()); + init_type_sets(cset, ty_sets.borrow_mut()); + propagate_types(cset, ty_sets.borrow_mut()); + export(&cset.var_table, ty_sets.borrow(), pointee_tys.borrow_mut()); +} diff --git a/c2rust-analyze/src/pointee_type/type_check.rs b/c2rust-analyze/src/pointee_type/type_check.rs new file mode 100644 index 0000000000..0be3df5344 --- /dev/null +++ b/c2rust-analyze/src/pointee_type/type_check.rs @@ -0,0 +1,365 @@ +use super::constraint_set::{CTy, ConstraintSet}; +use crate::context::{AnalysisCtxt, LTy, PointerId}; +use crate::panic_detail; +use crate::util::{describe_rvalue, ty_callee, Callee, RvalueDesc, UnknownDefCallee}; +use log::*; +use rustc_middle::mir::{ + BinOp, Body, Location, Operand, Place, PlaceRef, ProjectionElem, Rvalue, Statement, + StatementKind, Terminator, TerminatorKind, +}; +use rustc_middle::ty::{Ty, TyKind}; + +struct TypeChecker<'tcx, 'a> { + acx: &'a AnalysisCtxt<'a, 'tcx>, + mir: &'a Body<'tcx>, + constraints: ConstraintSet<'tcx>, +} + +impl<'tcx> TypeChecker<'tcx, '_> { + fn use_pointer_at_type(&mut self, ptr: PointerId, ty: impl Into>) { + if ptr.is_none() { + return; + } + let cty = ty.into(); + trace!("use_pointer_at_type({ptr:?}, {cty:?})"); + self.constraints.contains_type(ptr, cty); + } + + fn define_pointer(&mut self, ptr: PointerId) { + if ptr.is_none() { + return; + } + trace!("define_pointer({ptr:?})"); + self.constraints.all_types_compatible(ptr); + } + + fn define_pointer_with_type(&mut self, ptr: PointerId, ty: impl Into>) { + if ptr.is_none() { + return; + } + let cty = ty.into(); + trace!("define_pointer_with_type({ptr:?}, {cty:?})"); + self.constraints.all_types_compatible_with(ptr, cty); + } + + fn assign(&mut self, lhs: PointerId, rhs: PointerId) { + if lhs.is_none() || rhs.is_none() { + return; + } + trace!("assign({lhs:?}, {rhs:?})"); + // If `lhs` flows to a use at type `T`, then `rhs` also flows to a use at type `T`. + self.constraints.subset(lhs, rhs); + } + + /// Visit a `Place`, adding constraints as needed. + /// + /// As a convenience, this returns the `LTy` of the place, identical to `acx.type_of(pl)`. + pub fn visit_place(&mut self, pl: Place<'tcx>) -> LTy<'tcx> { + self.visit_place_ref(pl.as_ref()) + } + + /// Visit a `PlaceRef`, adding constraints as needed. + /// + /// As a convenience, this returns the `LTy` of the place, identical to `acx.type_of(pl)`. + pub fn visit_place_ref(&mut self, pl: PlaceRef<'tcx>) -> LTy<'tcx> { + trace!("visit_place_ref({pl:?})"); + let mut lty = self.acx.type_of(pl.local); + for proj in pl.projection { + match proj { + ProjectionElem::Deref => { + debug_assert!(matches!( + lty.ty.kind(), + TyKind::RawPtr(..) | TyKind::Ref(..) + )); + debug_assert_eq!(lty.args.len(), 1); + self.use_pointer_at_type(lty.label, lty.args[0]); + } + _ => {} + } + lty = self.acx.projection_lty(lty, &proj); + } + debug_assert_eq!(lty, self.acx.type_of(pl)); + lty + } + + /// Visit an `Rvalue`, adding constraints as needed. + pub fn visit_rvalue(&mut self, rv: &Rvalue<'tcx>, lty: LTy<'tcx>) { + trace!("visit_rvalue({rv:?}, {lty:?})"); + + if let Some(RvalueDesc::Project { base, proj: &[] }) = describe_rvalue(rv) { + // Special case for no-op projections like `&*p`. Since the pointer is passed through + // unchanged, we don't require the pointee type to actually match the type used for the + // paired deref and address-of operations. + let base_lty = self.visit_place_ref(base); + // Propagate the pointee types of `base` into `rv`, and from there into the LHS of the + // enclosing assignment. + self.assign(lty.label, base_lty.label); + return; + } + + // Aside from the above special case, there is no special handling for projections here + // (e.g. `util::describe_rvalue`). Instead, we treat projections like `&(*p).x` as a + // separate use (`*p`) and def (`&_.x`), using the concrete type of each part. `offset` + // projections are handled separately in a more sophisticated way that avoids overly + // constraining the pointee type. + + match *rv { + Rvalue::Use(ref op) => self.visit_operand(op), + Rvalue::Repeat(ref op, _) => self.visit_operand(op), + Rvalue::Ref(_rg, _kind, pl) => { + self.visit_place(pl); + debug_assert!(matches!(lty.ty.kind(), TyKind::Ref(..))); + debug_assert_eq!(lty.args.len(), 1); + self.define_pointer_with_type(lty.label, lty.args[0]); + } + Rvalue::ThreadLocalRef(_) => {} + Rvalue::AddressOf(_mutbl, pl) => { + self.visit_place(pl); + debug_assert!(matches!(lty.ty.kind(), TyKind::RawPtr(..))); + debug_assert_eq!(lty.args.len(), 1); + self.define_pointer_with_type(lty.label, lty.args[0]); + } + Rvalue::Len(pl) => { + self.visit_place(pl); + } + Rvalue::Cast(_kind, ref op, _ty) => { + self.visit_operand(op); + + let op_lty = self.acx.type_of(op); + self.assign(lty.label, op_lty.label); + } + Rvalue::BinaryOp(bin_op, ref ops) | Rvalue::CheckedBinaryOp(bin_op, ref ops) => { + assert_ne!(bin_op, BinOp::Offset, "BinOp::Offset special case NYI"); + let (ref op1, ref op2) = **ops; + self.visit_operand(op1); + self.visit_operand(op2); + } + Rvalue::NullaryOp(_, _) => {} + Rvalue::UnaryOp(_, ref op) => self.visit_operand(op), + Rvalue::Discriminant(pl) => { + self.visit_place(pl); + } + Rvalue::Aggregate(_, ref ops) => { + // FIXME: Needs dataflow edges between `ops` types and the rvalue's `lty`, similar + // to the corresponding case in `dataflow::type_check`. + for op in ops { + self.visit_operand(op); + } + } + Rvalue::ShallowInitBox(ref op, _) => self.visit_operand(op), + Rvalue::CopyForDeref(pl) => { + self.visit_place(pl); + } + } + } + + pub fn visit_operand(&mut self, op: &Operand<'tcx>) { + trace!("visit_operand({op:?})"); + match *op { + Operand::Copy(pl) | Operand::Move(pl) => { + self.visit_place(pl); + } + Operand::Constant(ref _c) => { + // TODO: addr of static may show up as `Operand::Constant` + } + } + } + + pub fn visit_statement(&mut self, stmt: &Statement<'tcx>, loc: Location) { + trace!( + "visit_statement({:?} @ {:?})", + stmt.kind, + stmt.source_info.span + ); + let _g = panic_detail::set_current_span(stmt.source_info.span); + + match stmt.kind { + StatementKind::Assign(ref x) => { + let (pl, ref rv) = **x; + let pl_lty = self.visit_place(pl); + + let rv_lty = self.acx.type_of_rvalue(rv, loc); + self.visit_rvalue(rv, rv_lty); + + self.assign(pl_lty.label, rv_lty.label); + } + _ => (), + } + } + + pub fn visit_terminator(&mut self, term: &Terminator<'tcx>, _loc: Location) { + trace!( + "visit_terminator({:?} @ {:?})", + term.kind, + term.source_info.span + ); + let _g = panic_detail::set_current_span(term.source_info.span); + let tcx = self.acx.tcx(); + + match term.kind { + TerminatorKind::Call { + ref func, + ref args, + destination, + target: _, + .. + } => { + for op in args { + self.visit_operand(op); + } + let dest_lty = self.visit_place(destination); + + let func = func.ty(self.mir, tcx); + self.visit_call(func, args, dest_lty); + } + _ => (), + } + } + + pub fn visit_call(&mut self, func: Ty<'tcx>, args: &[Operand<'tcx>], dest_lty: LTy<'tcx>) { + let tcx = self.acx.tcx(); + let callee = ty_callee(tcx, func); + eprintln!("callee = {callee:?}"); + match callee { + Callee::Trivial => {} + Callee::LocalDef { def_id, substs } => { + let sig = self + .acx + .gacx + .fn_sigs + .get(&def_id) + .unwrap_or_else(|| panic!("LFnSig not found for {def_id:?}")); + if substs.non_erasable_generics().next().is_some() { + todo!("call to generic function {def_id:?} {substs:?}"); + } + + // Process pseudo-assignments from `args` to the types declared in `sig`. + for (arg_op, &input_lty) in args.iter().zip(sig.inputs.iter()) { + let arg_lty = self.acx.type_of(arg_op); + self.assign(input_lty.label, arg_lty.label); + } + + // Process a pseudo-assignment from the return type declared in `sig` to `dest`. + let output_lty = sig.output; + self.assign(dest_lty.label, output_lty.label); + } + Callee::UnknownDef(UnknownDefCallee::Direct { + ty: _, + def_id, + substs: _, + is_foreign: true, + }) if self.acx.gacx.known_fn(def_id).is_some() => { + // TODO: no good handling for this currently - might need to expand KnownFn to + // include information about expected/required pointee types + } + Callee::UnknownDef(_) => { + log::error!("TODO: visit Callee::{callee:?}"); + } + + Callee::PtrOffset { .. } => { + // Normal uses of `offset` don't change the pointee type but only step + // forward/backward through a uniform array. We treat it as passing through any + // pointee types unchanged, like an assignment. + // + // In the future, we might want to make this handling more precise. When `offset` + // is called on `*mut T`, we could compare `size_of::()` and the offset amount + // to the size of the concrete pointee type we inferred to see whether this is an + // ordinary "step through the array" case or whether it's doing something unusual + // like stepping from a struct to a specific field within the struct. + assert_eq!(args.len(), 2); + let arg_lty = self.acx.type_of(&args[0]); + self.assign(dest_lty.label, arg_lty.label); + } + + Callee::SliceAsPtr { .. } => { + // The input is a `Ref`, so its underlying type is known precisely. + assert_eq!(args.len(), 1); + let arg_lty = self.acx.type_of(&args[0]); + assert!(matches!(arg_lty.ty.kind(), TyKind::Ref(..))); + assert_eq!(arg_lty.args.len(), 1); + let slice_lty = arg_lty.args[0]; + assert!(matches!(slice_lty.ty.kind(), TyKind::Slice(..))); + assert_eq!(slice_lty.args.len(), 1); + let elem_lty = slice_lty.args[0]; + self.define_pointer_with_type(dest_lty.label, elem_lty); + } + + Callee::Malloc | Callee::Calloc => { + // Currently, we just treat this as a definition of unknown type and assert that a + // single common pointee type can be found. In the future, we might expand this to + // assert that the inferred pointee type matches the size passed to `malloc`. + self.define_pointer(dest_lty.label); + } + Callee::Realloc => { + // Currently, we treat this as passing through the pointee type unchanged. + // + // In the future, we might check the new size as described for `malloc`. + assert_eq!(args.len(), 2); + let arg_lty = self.acx.type_of(&args[0]); + self.assign(dest_lty.label, arg_lty.label); + } + Callee::Free => { + // No constraints on `free`, since it doesn't reveal anything about the concrete + // type. + } + + Callee::Memcpy => { + // We treat the `memcpy` as loading from `*src` and then storing to `*dest`. The + // type of the load and store is unknown at this point (it definitely isn't the + // actual type of `*src`/`*dest`, which is `void`), so we introduce a new inference + // variable and solve for it later. + // + // In the future, we might check the copy length as described for `malloc`. + let var = self.constraints.fresh_var(); + assert_eq!(args.len(), 3); + let dest_arg_lty = self.acx.type_of(&args[0]); + let src_arg_lty = self.acx.type_of(&args[1]); + self.use_pointer_at_type(dest_arg_lty.label, var); + self.use_pointer_at_type(src_arg_lty.label, var); + self.assign(dest_lty.label, dest_arg_lty.label); + } + Callee::Memset => { + // We treat this much like `memcpy`, but with only a store, not a load. + // + // In the future, we might check the length as described for `malloc`. + let var = self.constraints.fresh_var(); + assert_eq!(args.len(), 3); + let dest_arg_lty = self.acx.type_of(&args[0]); + self.use_pointer_at_type(dest_lty.label, var); + self.assign(dest_lty.label, dest_arg_lty.label); + } + Callee::IsNull => { + // No constraints. + } + } + } +} + +pub fn visit<'tcx>(acx: &AnalysisCtxt<'_, 'tcx>, mir: &Body<'tcx>) -> ConstraintSet<'tcx> { + let mut tc = TypeChecker { + acx, + mir, + constraints: ConstraintSet::default(), + }; + + for (bb, bb_data) in mir.basic_blocks().iter_enumerated() { + for (i, stmt) in bb_data.statements.iter().enumerate() { + tc.visit_statement( + stmt, + Location { + block: bb, + statement_index: i, + }, + ); + } + tc.visit_terminator( + bb_data.terminator(), + Location { + statement_index: bb_data.statements.len(), + block: bb, + }, + ); + } + + tc.constraints +} diff --git a/c2rust-analyze/src/pointer_id.rs b/c2rust-analyze/src/pointer_id.rs index 74d1e1337f..110a8fc9b6 100644 --- a/c2rust-analyze/src/pointer_id.rs +++ b/c2rust-analyze/src/pointer_id.rs @@ -331,6 +331,14 @@ impl<'a, T> PointerTable<'a, T> { self.global.len() + self.local.len() } + pub fn global(&self) -> &'a GlobalPointerTable { + &self.global + } + + pub fn local(&self) -> &'a LocalPointerTable { + &self.local + } + pub fn iter(&self) -> impl Iterator { self.global.iter().chain(self.local.iter()) } @@ -377,6 +385,22 @@ impl<'a, T> PointerTableMut<'a, T> { self.global.len() + self.local.len() } + pub fn global(&self) -> &GlobalPointerTable { + &self.global + } + + pub fn global_mut(&mut self) -> &mut GlobalPointerTable { + &mut self.global + } + + pub fn local(&self) -> &LocalPointerTable { + &self.local + } + + pub fn local_mut(&mut self) -> &mut LocalPointerTable { + &mut self.local + } + pub fn iter(&self) -> impl Iterator { self.global.iter().chain(self.local.iter()) }