Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

analyze: add pointee_type analysis #1029

Merged
merged 7 commits into from
Oct 13, 2023
2 changes: 1 addition & 1 deletion c2rust-analyze/src/labeled_ty.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ use std::slice;
/// constructor in the tree can have its own label. But maintaining a custom copy of
/// `TyKind` would be annoying, so instead, we let labeled types form arbitrary trees, and
/// make the `LabeledTyCtxt` responsible for making those trees match the `Ty`'s structure.
#[derive(Clone, Copy, PartialEq, Eq)]
#[derive(Clone, Copy, PartialEq, Eq, Hash)]
pub struct LabeledTyS<'tcx, L: 'tcx> {
/// The underlying type.
pub ty: Ty<'tcx>,
Expand Down
157 changes: 145 additions & 12 deletions c2rust-analyze/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ use crate::equiv::{GlobalEquivSet, LocalEquivSet};
use crate::labeled_ty::LabeledTyCtxt;
use crate::log::init_logger;
use crate::panic_detail::PanicDetail;
use crate::pointee_type::PointeeTypes;
use crate::pointer_id::{GlobalPointerTable, LocalPointerTable, PointerTable};
use crate::type_desc::Ownership;
use crate::util::{Callee, TestAttr};
use ::log::warn;
Expand Down Expand Up @@ -60,6 +62,7 @@ mod known_fn;
mod labeled_ty;
mod log;
mod panic_detail;
mod pointee_type;
mod pointer_id;
mod rewrite;
mod trivial;
Expand Down Expand Up @@ -180,10 +183,6 @@ fn label_rvalue_tys<'tcx>(acx: &mut AnalysisCtxt<'_, 'tcx>, mir: &Body<'tcx>) {
block: bb,
};

if acx.c_void_casts.should_skip_stmt(loc) {
continue;
}

let _g = panic_detail::set_current_span(stmt.source_info.span);

let lty = match rv {
Expand Down Expand Up @@ -486,6 +485,10 @@ fn run(tcx: TyCtxt) {
/// get a complete [`Assignment`] for this function, which maps every [`PointerId`] in this
/// function to a [`PermissionSet`] and [`FlagSet`].
lasn: MaybeUnset<LocalAssignment>,
/// Constraints on pointee types gathered from the body of this function.
pointee_constraints: MaybeUnset<pointee_type::ConstraintSet<'tcx>>,
/// Local part of pointee type sets.
local_pointee_types: MaybeUnset<LocalPointerTable<PointeeTypes<'tcx>>>,
}

// Follow a postorder traversal, so that callers are visited after their callees. This means
Expand Down Expand Up @@ -549,15 +552,10 @@ fn run(tcx: TyCtxt) {
gacx.construct_region_metadata();

// ----------------------------------
// Compute dataflow constraints
// Infer pointee types
// ----------------------------------

// Initial pass to assign local `PointerId`s and gather equivalence constraints, which state
// that two pointer types must be converted to the same reference type. Some additional data
// computed during this the process is kept around for use in later passes.
let mut global_equiv = GlobalEquivSet::new(gacx.num_pointers());
for &ldid in &all_fn_ldids {
// The function might already be marked as failed if one of its callees previously failed.
if gacx.fn_failed(ldid.to_def_id()) {
continue;
}
Expand Down Expand Up @@ -594,6 +592,102 @@ fn run(tcx: TyCtxt) {
label_rvalue_tys(&mut acx, &mir);
update_pointer_info(&mut acx, &mir);

pointee_type::generate_constraints(&acx, &mir)
}));

let pointee_constraints = match r {
Ok(x) => x,
Err(pd) => {
gacx.mark_fn_failed(ldid.to_def_id(), pd);
continue;
}
};

let local_pointee_types = LocalPointerTable::new(acx.num_pointers());

let mut info = FuncInfo::default();
info.acx_data.set(acx.into_data());
info.pointee_constraints.set(pointee_constraints);
info.local_pointee_types.set(local_pointee_types);
func_info.insert(ldid, info);
}

// Iterate pointee constraints to a fixpoint.
let mut global_pointee_types = GlobalPointerTable::<PointeeTypes>::new(gacx.num_pointers());
let mut loop_count = 0;
loop {
// Loop until the global assignment reaches a fixpoint. The inner loop also runs until a
// fixpoint, but it only considers a single function at a time. The inner loop for one
// function can affect other functions by updating `global_pointee_types`, so we also need
// the outer loop, which runs until the global type sets converge as well.
loop_count += 1;
// We shouldn't need more iterations than the longest acyclic path through the callgraph.
assert!(loop_count <= 1000);
let old_global_pointee_types = global_pointee_types.clone();

// Clear the `incomplete` flags for all global pointers. See comment in
// `pointee_types::solve::solve_constraints`.
aneksteind marked this conversation as resolved.
Show resolved Hide resolved
for (_, tys) in global_pointee_types.iter_mut() {
tys.incomplete = false;
}

for &ldid in &all_fn_ldids {
if gacx.fn_failed(ldid.to_def_id()) {
continue;
}

let info = func_info.get_mut(&ldid).unwrap();

let pointee_constraints = info.pointee_constraints.get();
let pointee_types = global_pointee_types.and_mut(info.local_pointee_types.get_mut());
pointee_type::solve_constraints(pointee_constraints, pointee_types);
}

if global_pointee_types == old_global_pointee_types {
break;
}
}

// Print results for debugging
for &ldid in &all_fn_ldids {
spernsteiner marked this conversation as resolved.
Show resolved Hide resolved
if gacx.fn_failed(ldid.to_def_id()) {
continue;
}

let ldid_const = WithOptConstParam::unknown(ldid);
let info = func_info.get_mut(&ldid).unwrap();
let mir = tcx.mir_built(ldid_const);
let mir = mir.borrow();

let acx = gacx.function_context_with_data(&mir, info.acx_data.take());
let name = tcx.item_name(ldid.to_def_id());
let pointee_types = global_pointee_types.and(info.local_pointee_types.get());
print_function_pointee_types(&acx, name, &mir, pointee_types);

info.acx_data.set(acx.into_data());
}

// ----------------------------------
// Compute dataflow constraints
// ----------------------------------

// Initial pass to assign local `PointerId`s and gather equivalence constraints, which state
// that two pointer types must be converted to the same reference type. Some additional data
// computed during this the process is kept around for use in later passes.
let mut global_equiv = GlobalEquivSet::new(gacx.num_pointers());
for &ldid in &all_fn_ldids {
if gacx.fn_failed(ldid.to_def_id()) {
continue;
}

let info = func_info.get_mut(&ldid).unwrap();
let ldid_const = WithOptConstParam::unknown(ldid);
let mir = tcx.mir_built(ldid_const);
let mir = mir.borrow();

let acx = gacx.function_context_with_data(&mir, info.acx_data.take());

let r = panic_detail::catch_unwind(AssertUnwindSafe(|| {
dataflow::generate_constraints(&acx, &mir)
}));

Expand All @@ -612,11 +706,9 @@ fn run(tcx: TyCtxt) {
equiv.unify(a, b);
}

let mut info = FuncInfo::default();
info.acx_data.set(acx.into_data());
info.dataflow.set(dataflow);
info.local_equiv.set(local_equiv);
func_info.insert(ldid, info);
}

// ----------------------------------
Expand Down Expand Up @@ -1481,6 +1573,47 @@ fn print_labeling_for_var<'tcx>(
eprintln!("{}: addr_of = {:?}, type = {:?}", desc, addr_of3, ty3);
}

fn print_function_pointee_types<'tcx>(
acx: &AnalysisCtxt<'_, 'tcx>,
name: impl Display,
mir: &Body<'tcx>,
pointee_types: PointerTable<PointeeTypes<'tcx>>,
) {
eprintln!("\npointee types for {}", name);
for (local, decl) in mir.local_decls.iter_enumerated() {
eprintln!(
"{:?} ({}): addr_of = {:?}, type = {:?}",
local,
describe_local(acx.tcx(), decl),
acx.addr_of_local[local],
acx.local_tys[local]
);

let mut all_pointer_ids = Vec::new();
if !acx.addr_of_local[local].is_none() {
all_pointer_ids.push(acx.addr_of_local[local]);
}
acx.local_tys[local].for_each_label(&mut |ptr| {
if !ptr.is_none() {
all_pointer_ids.push(ptr);
}
});

for ptr in all_pointer_ids {
let tys = &pointee_types[ptr];
if tys.ltys.len() == 0 && !tys.incomplete {
continue;
}
eprintln!(
" pointer {:?}: {:?}{}",
ptr,
tys.ltys,
if tys.incomplete { " (INCOMPLETE)" } else { "" }
);
}
}
}

/// Return `LocalDefId`s for all `static`s.
fn all_static_items(tcx: TyCtxt) -> Vec<DefId> {
let mut order = Vec::new();
Expand Down
151 changes: 151 additions & 0 deletions c2rust-analyze/src/pointee_type/constraint_set.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
use crate::context::LTy;
use crate::pointer_id::PointerId;
use std::cell::Cell;
use std::collections::HashSet;

#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
pub enum Constraint<'tcx> {
/// The set of types for pointer `.0` must contain type `.1`. This is used for "uses" of a
Copy link
Contributor

@aneksteind aneksteind Sep 28, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what's the notation .N?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Referring to the anonymous fields of this enum variant, similar to field access in tuples and tuple structs (e.g. let foo = (1, 2); return foo.0;)

/// pointer, where the pointer is dereferenced to load or store data of a certain type.
ContainsType(PointerId, CTy<'tcx>),

/// All possible types for pointer `.0` must be compatible with type `.1`. This is used for
/// "definitions" of a pointer, where the pointer is constructed by taking the address of data
/// of a certain type.
AllTypesCompatibleWith(PointerId, CTy<'tcx>),

/// All possible types for pointer `.0` must be mutually compatible. This is used for
/// "definitions" of a pointer where the type of the data is unknown.
AllTypesCompatible(PointerId),

/// The set of types for pointer `.0` must be a subset of the set of types for pointer `.1`.
/// Among other things, this is used for pointer assignments like `p = q`, where `p`'s types
/// must be a subset of `q`'s (`Subset(p_ptr, q_ptr)`).
Subset(PointerId, PointerId),
}

#[derive(Debug, Default)]
pub struct ConstraintSet<'tcx> {
pub constraints: Vec<Constraint<'tcx>>,
constraint_dedup: HashSet<Constraint<'tcx>>,
pub var_table: VarTable<'tcx>,
}

impl<'tcx> ConstraintSet<'tcx> {
fn add(&mut self, c: Constraint<'tcx>) {
if self.constraint_dedup.insert(c) {
self.constraints.push(c);
}
}

pub fn contains_type(&mut self, p: PointerId, cty: CTy<'tcx>) {
self.add(Constraint::ContainsType(p, cty));
}

pub fn all_types_compatible_with(&mut self, p: PointerId, cty: CTy<'tcx>) {
self.add(Constraint::AllTypesCompatibleWith(p, cty));
}

pub fn all_types_compatible(&mut self, p: PointerId) {
self.add(Constraint::AllTypesCompatible(p));
}

pub fn subset(&mut self, p: PointerId, q: PointerId) {
self.add(Constraint::Subset(p, q));
}

pub fn fresh_var(&mut self) -> CTy<'tcx> {
self.var_table.fresh()
}
}

/// A "constraint type", which is either an `LTy` or an inference variable.
///
/// Our current implementation of type inference / unification is very primitive. In particular,
/// currently we allow inference variables only at top level, so constraints can refer to `T` but
/// not `*mut T` or `[T; 10]`. Eventually we may need to replace this implementation with
/// something more flexible.
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
pub enum CTy<'tcx> {
Ty(LTy<'tcx>),
/// An inference variable. Note that inference variables are scoped to the local function;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is an inference variable?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it the state of some variable before its equivalence to a type is determined?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's a unification variable as used in type inference.

/// there are no global inference variables.
Var(usize),
}

impl<'tcx> From<LTy<'tcx>> for CTy<'tcx> {
fn from(x: LTy<'tcx>) -> CTy<'tcx> {
CTy::Ty(x)
}
}

#[derive(Debug, Default)]
pub struct VarTable<'tcx> {
/// Equivalence class representative for each variable. This can be either a known type
/// (`CTy::Ty`) or an inference variable (`CTy::Var`).
vars: Vec<Cell<CTy<'tcx>>>,
}

impl<'tcx> VarTable<'tcx> {
pub fn fresh(&mut self) -> CTy<'tcx> {
let cty = CTy::Var(self.vars.len());
// Initially, the new variable is its own representative.
self.vars.push(Cell::new(cty));
cty
}

/// Obtain the representative for variable `var`.
pub fn rep(&self, var: usize) -> CTy<'tcx> {
let cty = self.vars[var].get();
match cty {
CTy::Var(var2) => {
if var2 == var {
// `var` is the representative of its own equivalence class.
cty
} else {
let rep = self.rep(var2);
// Do path compression.
self.vars[var].set(rep);
rep
}
}
CTy::Ty(_) => {
// `cty` is a concrete type, which is the representative of `var`'s class.
cty
}
}
}

pub fn cty_rep(&self, cty: CTy<'tcx>) -> CTy<'tcx> {
match cty {
CTy::Ty(_) => cty,
CTy::Var(v) => self.rep(v),
}
}

/// Unify two types. If both resolve to concrete types and those types are unequal, this
/// returns `Err` with the two concrete types.
pub fn unify(&self, cty1: CTy<'tcx>, cty2: CTy<'tcx>) -> Result<(), (LTy<'tcx>, LTy<'tcx>)> {
match (self.cty_rep(cty1), self.cty_rep(cty2)) {
(CTy::Var(v1), CTy::Var(v2)) => {
// Make one the representative for the other.
debug_assert_eq!(self.vars[v1].get(), CTy::Var(v1));
debug_assert_eq!(self.vars[v2].get(), CTy::Var(v2));
self.vars[v1].set(CTy::Var(v2));
Ok(())
}
(CTy::Var(v), CTy::Ty(ty)) | (CTy::Ty(ty), CTy::Var(v)) => {
debug_assert_eq!(self.vars[v].get(), CTy::Var(v));
self.vars[v].set(CTy::Ty(ty));
Ok(())
}
(CTy::Ty(ty1), CTy::Ty(ty2)) => {
if ty1 == ty2 {
Ok(())
} else {
Err((ty1, ty2))
}
}
}
}
}
16 changes: 16 additions & 0 deletions c2rust-analyze/src/pointee_type/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
use crate::context::AnalysisCtxt;
use rustc_middle::mir::Body;

mod constraint_set;
mod solve;
mod type_check;

pub use self::constraint_set::{CTy, Constraint, ConstraintSet};
pub use self::solve::{solve_constraints, PointeeTypes};

pub fn generate_constraints<'tcx>(
acx: &AnalysisCtxt<'_, 'tcx>,
mir: &Body<'tcx>,
) -> ConstraintSet<'tcx> {
type_check::visit(acx, mir)
}
Loading