Skip to content

Commit

Permalink
Extra tests and bug fixes to the delayed UB instrumentation (#3419)
Browse files Browse the repository at this point in the history
This PR is a follow-up to #3374.

It introduces the following changes:
- Instrument more writes to avoid the case when points-to analysis
overapproximates too much.
- Add extra tests featuring safe abstractions from standard library.

By submitting this pull request, I confirm that my contribution is made
under the terms of the Apache 2.0 and MIT licenses.
  • Loading branch information
artemagvanian authored Aug 28, 2024
1 parent 5aad1a9 commit e28f3db
Show file tree
Hide file tree
Showing 12 changed files with 295 additions and 46 deletions.
78 changes: 53 additions & 25 deletions kani-compiler/src/kani_middle/points_to/points_to_graph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,27 @@ impl<'tcx> MemLoc<'tcx> {
}
}

/// Data structure to keep track of both successors and ancestors of the node.
#[derive(Clone, Debug, Default, PartialEq, Eq)]
struct NodeData<'tcx> {
successors: HashSet<MemLoc<'tcx>>,
ancestors: HashSet<MemLoc<'tcx>>,
}

impl<'tcx> NodeData<'tcx> {
/// Merge two instances of NodeData together, return true if the original one was updated and
/// false otherwise.
fn merge(&mut self, other: Self) -> bool {
let successors_before = self.successors.len();
let ancestors_before = self.ancestors.len();
self.successors.extend(other.successors);
self.ancestors.extend(other.ancestors);
let successors_after = self.successors.len();
let ancestors_after = self.ancestors.len();
successors_before != successors_after || ancestors_before != ancestors_after
}
}

/// Graph data structure that stores the current results of the point-to analysis. The graph is
/// directed, so having an edge between two places means that one is pointing to the other.
///
Expand All @@ -82,24 +103,39 @@ impl<'tcx> MemLoc<'tcx> {
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct PointsToGraph<'tcx> {
/// A hash map of node --> {nodes} edges.
edges: HashMap<MemLoc<'tcx>, HashSet<MemLoc<'tcx>>>,
nodes: HashMap<MemLoc<'tcx>, NodeData<'tcx>>,
}

impl<'tcx> PointsToGraph<'tcx> {
pub fn empty() -> Self {
Self { edges: HashMap::new() }
Self { nodes: HashMap::new() }
}

/// Collect all nodes which have incoming edges from `nodes`.
pub fn successors(&self, nodes: &HashSet<MemLoc<'tcx>>) -> HashSet<MemLoc<'tcx>> {
nodes.iter().flat_map(|node| self.edges.get(node).cloned().unwrap_or_default()).collect()
nodes
.iter()
.flat_map(|node| self.nodes.get(node).cloned().unwrap_or_default().successors)
.collect()
}

/// For each node in `from`, add an edge to each node in `to`.
/// Collect all nodes which have outgoing edges to `nodes`.
pub fn ancestors(&self, nodes: &HashSet<MemLoc<'tcx>>) -> HashSet<MemLoc<'tcx>> {
nodes
.iter()
.flat_map(|node| self.nodes.get(node).cloned().unwrap_or_default().ancestors)
.collect()
}

/// For each node in `from`, add an edge to each node in `to` (and the reverse for ancestors).
pub fn extend(&mut self, from: &HashSet<MemLoc<'tcx>>, to: &HashSet<MemLoc<'tcx>>) {
for node in from.iter() {
let node_pointees = self.edges.entry(*node).or_default();
node_pointees.extend(to.iter());
let node_pointees = self.nodes.entry(*node).or_default();
node_pointees.successors.extend(to.iter());
}
for node in to.iter() {
let node_pointees = self.nodes.entry(*node).or_default();
node_pointees.ancestors.extend(from.iter());
}
}

Expand Down Expand Up @@ -150,16 +186,16 @@ impl<'tcx> PointsToGraph<'tcx> {
/// Dump the graph into a file using the graphviz format for later visualization.
pub fn dump(&self, file_path: &str) {
let mut nodes: Vec<String> =
self.edges.keys().map(|from| format!("\t\"{:?}\"", from)).collect();
self.nodes.keys().map(|from| format!("\t\"{:?}\"", from)).collect();
nodes.sort();
let nodes_str = nodes.join("\n");

let mut edges: Vec<String> = self
.edges
.nodes
.iter()
.flat_map(|(from, to)| {
let from = format!("\"{:?}\"", from);
to.iter().map(move |to| {
to.successors.iter().map(move |to| {
let to = format!("\"{:?}\"", to);
format!("\t{} -> {}", from.clone(), to)
})
Expand All @@ -178,24 +214,18 @@ impl<'tcx> PointsToGraph<'tcx> {
// Working queue.
let mut queue = VecDeque::from_iter(targets);
// Add all statics, as they can be accessed at any point.
let statics = self.edges.keys().filter(|node| matches!(node, MemLoc::Static(_)));
let statics = self.nodes.keys().filter(|node| matches!(node, MemLoc::Static(_)));
queue.extend(statics);
// Add all entries.
while let Some(next_target) = queue.pop_front() {
result.edges.entry(next_target).or_insert_with(|| {
let outgoing_edges =
self.edges.get(&next_target).cloned().unwrap_or(HashSet::new());
queue.extend(outgoing_edges.iter());
result.nodes.entry(next_target).or_insert_with(|| {
let outgoing_edges = self.nodes.get(&next_target).cloned().unwrap_or_default();
queue.extend(outgoing_edges.successors.iter());
outgoing_edges.clone()
});
}
result
}

/// Retrieve all places to which a given place is pointing to.
pub fn pointees_of(&self, target: &MemLoc<'tcx>) -> HashSet<MemLoc<'tcx>> {
self.edges.get(&target).unwrap_or(&HashSet::new()).clone()
}
}

/// Since we are performing the analysis using a dataflow, we need to implement a proper monotonous
Expand All @@ -206,12 +236,10 @@ impl<'tcx> JoinSemiLattice for PointsToGraph<'tcx> {
fn join(&mut self, other: &Self) -> bool {
let mut updated = false;
// Check every node in the other graph.
for (from, to) in other.edges.iter() {
let existing_to = self.edges.entry(*from).or_default();
let initial_size = existing_to.len();
existing_to.extend(to);
let new_size = existing_to.len();
updated |= initial_size != new_size;
for (node, data) in other.nodes.iter() {
let existing_node = self.nodes.entry(*node).or_default();
let changed = existing_node.merge(data.clone());
updated |= changed;
}
updated
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,25 +118,70 @@ impl<'a, 'tcx> MirVisitor for InstrumentationVisitor<'a, 'tcx> {
}

fn visit_place(&mut self, place: &Place, ptx: PlaceContext, location: Location) {
// Match the place by whatever it is pointing to and find an intersection with the targets.
if self
.points_to
.resolve_place_stable(place.clone(), self.current_instance, self.tcx)
.intersection(&self.analysis_targets)
.next()
.is_some()
{
// If we are mutating the place, initialize it.
if ptx.is_mutating() {
self.push_target(MemoryInitOp::SetRef {
operand: Operand::Copy(place.clone()),
value: true,
position: InsertPosition::After,
});
} else {
// Otherwise, check its initialization.
self.push_target(MemoryInitOp::CheckRef { operand: Operand::Copy(place.clone()) });
// In order to check whether we should get-instrument the place, see if it resolves to the
// analysis target.
let needs_get = {
self.points_to
.resolve_place_stable(place.clone(), self.current_instance, self.tcx)
.intersection(&self.analysis_targets)
.next()
.is_some()
};

// In order to check whether we should set-instrument the place, we need to figure out if
// the place has a common ancestor of the same level with the target.
//
// This is needed because instrumenting the place only if it resolves to the target could give
// false positives in presence of some aliasing relations.
//
// Here is a simple example:
// ```
// fn foo(val_1: u32, val_2: u32, flag: bool) {
// let reference = if flag {
// &val_1
// } else {
// &val_2
// };
// let _ = *reference;
// }
// ```
// It yields the following aliasing graph:
//
// `val_1 <-- reference --> val_2`
//
// If `val_1` is a legitimate instrumentation target, we would get-instrument an instruction
// that reads from `*reference`, but that could mean that `val_2` is checked, too. Hence,
// if we don't set-instrument `val_2` we will get a false-positive.
//
// See `tests/expected/uninit/delayed-ub-overapprox.rs` for a more specific example.
let needs_set = {
let mut has_common_ancestor = false;
let mut self_ancestors =
self.points_to.resolve_place_stable(place.clone(), self.current_instance, self.tcx);
let mut target_ancestors = self.analysis_targets.clone();

while !self_ancestors.is_empty() || !target_ancestors.is_empty() {
if self_ancestors.intersection(&target_ancestors).next().is_some() {
has_common_ancestor = true;
break;
}
self_ancestors = self.points_to.ancestors(&self_ancestors);
target_ancestors = self.points_to.ancestors(&target_ancestors);
}

has_common_ancestor
};

// If we are mutating the place, initialize it.
if ptx.is_mutating() && needs_set {
self.push_target(MemoryInitOp::SetRef {
operand: Operand::Copy(place.clone()),
value: true,
position: InsertPosition::After,
});
} else if !ptx.is_mutating() && needs_get {
// Otherwise, check its initialization.
self.push_target(MemoryInitOp::CheckRef { operand: Operand::Copy(place.clone()) });
}
self.super_place(place, ptx, location)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,7 @@ impl GlobalPass for DelayedUbPass {
}

// Since analysis targets are *pointers*, need to get its successors for instrumentation.
for target in targets.iter() {
analysis_targets.extend(global_points_to_graph.pointees_of(target));
}
analysis_targets.extend(global_points_to_graph.successors(&targets));

// If we are generating MIR, generate the points-to graph as well.
if tcx.sess.opts.output_types.contains_key(&OutputType::Mir) {
Expand Down
1 change: 1 addition & 0 deletions tests/expected/uninit/delayed-ub-overapprox.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Complete - 1 successfully verified harnesses, 0 failures, 1 total.
32 changes: 32 additions & 0 deletions tests/expected/uninit/delayed-ub-overapprox.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright Kani Contributors
// SPDX-License-Identifier: Apache-2.0 OR MIT
// kani-flags: -Z uninit-checks

//! Make sure that no false positives are generated when points-to analysis overapproximates
//! aliasing.
#[kani::proof]
fn check_delayed_ub_overapprox() {
unsafe {
let mut value: u128 = 0;
let value_ref = &mut value;
// Perform a call to the helper before mutable pointer cast. This way, a check inserted into
// the helper will pass.
helper(value_ref);
// Cast between two pointers of different padding, which will mark `value` as a possible
// delayed UB analysis target.
let ptr = value_ref as *mut _ as *mut (u8, u32, u64);
*ptr = (4, 4, 4); // Note that since we never read from `value` after overwriting it, no delayed UB occurs.
// Create another `value` and call helper. Note that since helper could potentially
// dereference a delayed-UB pointer, an initialization check will be added to the helper.
// Hence, delayed UB analysis needs to mark the value as properly initialized in shadow
// memory to avoid the spurious failure.
let mut value2: u128 = 0;
helper(&value2);
}
}

/// A helper that could trigger delayed UB.
fn helper(reference: &u128) -> bool {
*reference == 42
}
13 changes: 13 additions & 0 deletions tests/std-checks/std/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright Kani Contributors
# SPDX-License-Identifier: Apache-2.0 OR MIT
[package]
name = "verify-std"
version = "0.1.0"
edition = "2021"
description = "This crate contains contracts and harnesses for std library"

[package.metadata.kani]
unstable = { function-contracts = true, mem-predicates = true, uninit-checks = true }

[package.metadata.kani.flags]
output-format = "terse"
1 change: 1 addition & 0 deletions tests/std-checks/std/atomic.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Complete - 5 successfully verified harnesses, 0 failures, 5 total.
1 change: 1 addition & 0 deletions tests/std-checks/std/boxed.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Complete - 2 successfully verified harnesses, 0 failures, 2 total.
40 changes: 40 additions & 0 deletions tests/std-checks/std/src/boxed.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Copyright Kani Contributors
// SPDX-License-Identifier: Apache-2.0 OR MIT

extern crate kani;

/// Create wrapper functions to standard library functions that contains their contract.
pub mod contracts {
use kani::{mem::*, requires};

/// The actual pre-condition is more complicated:
///
/// "For non-zero-sized values, ... a value: *mut T that has been allocated with the Global
/// allocator with Layout::for_value(&*value) may be converted into a box using
/// Box::<T>::from_raw(value)."
///
/// "For zero-sized values, the Box pointer still has to be valid for reads and writes and
/// sufficiently aligned."
#[requires(can_dereference(raw))]
pub unsafe fn from_raw<T>(raw: *mut T) -> Box<T> {
std::boxed::Box::from_raw(raw)
}
}

#[cfg(kani)]
mod verify {
use super::*;

#[kani::proof_for_contract(contracts::from_raw)]
pub fn check_from_raw_u32() {
let ptr = unsafe { std::alloc::alloc(std::alloc::Layout::new::<u32>()) as *mut u32 };
unsafe { ptr.write(kani::any()) };
let _ = unsafe { contracts::from_raw(ptr) };
}

#[kani::proof_for_contract(contracts::from_raw)]
pub fn check_from_raw_unit() {
let ptr = kani::any::<usize>() as *mut ();
let _ = unsafe { contracts::from_raw(ptr) };
}
}
9 changes: 9 additions & 0 deletions tests/std-checks/std/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
// Copyright Kani Contributors
// SPDX-License-Identifier: Apache-2.0 OR MIT

//! Top file that includes all sub-modules mimicking std structure.
extern crate kani;

mod boxed;
mod sync;
Loading

0 comments on commit e28f3db

Please sign in to comment.