Skip to content

Commit

Permalink
Use StableHasher everywhere
Browse files Browse the repository at this point in the history
The standard implementations of Hasher have architecture-dependent
results when hashing integers. This causes problems when the hashes are
stored within metadata - metadata written by one host architecture can't
be read by another.

To fix that, implement an architecture-independent StableHasher and use
it in all places an architecture-independent hasher is needed.

Fixes #38177.
  • Loading branch information
arielb1 committed Dec 15, 2016
1 parent 01d53df commit e1d4b8f
Show file tree
Hide file tree
Showing 14 changed files with 269 additions and 229 deletions.
6 changes: 3 additions & 3 deletions src/librustc/hir/map/definitions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,9 @@

use hir::def_id::{CrateNum, DefId, DefIndex, LOCAL_CRATE};
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::stable_hasher::StableHasher;
use std::fmt::Write;
use std::hash::{Hash, Hasher};
use std::collections::hash_map::DefaultHasher;
use syntax::ast;
use syntax::symbol::{Symbol, InternedString};
use ty::TyCtxt;
Expand Down Expand Up @@ -131,7 +131,8 @@ impl DefPath {
}

pub fn deterministic_hash(&self, tcx: TyCtxt) -> u64 {
let mut state = DefaultHasher::new();
debug!("deterministic_hash({:?})", self);
let mut state = StableHasher::new();
self.deterministic_hash_to(tcx, &mut state);
state.finish()
}
Expand Down Expand Up @@ -377,4 +378,3 @@ impl DefPathData {
self.as_interned_str().to_string()
}
}

104 changes: 16 additions & 88 deletions src/librustc/ty/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,11 @@ use util::nodemap::FxHashMap;
use middle::lang_items;

use rustc_const_math::{ConstInt, ConstIsize, ConstUsize};
use rustc_data_structures::stable_hasher::{StableHasher, StableHasherResult};

use std::cell::RefCell;
use std::cmp;
use std::hash::{Hash, Hasher};
use std::collections::hash_map::DefaultHasher;
use std::hash::Hash;
use std::intrinsics;
use syntax::ast::{self, Name};
use syntax::attr::{self, SignedInt, UnsignedInt};
Expand Down Expand Up @@ -349,7 +349,7 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
/// Creates a hash of the type `Ty` which will be the same no matter what crate
/// context it's calculated within. This is used by the `type_id` intrinsic.
pub fn type_id_hash(self, ty: Ty<'tcx>) -> u64 {
let mut hasher = TypeIdHasher::new(self, DefaultHasher::default());
let mut hasher = TypeIdHasher::new(self);
hasher.visit_ty(ty);
hasher.finish()
}
Expand Down Expand Up @@ -395,96 +395,26 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> {
}
}

/// When hashing a type this ends up affecting properties like symbol names. We
/// want these symbol names to be calculated independent of other factors like
/// what architecture you're compiling *from*.
///
/// The hashing just uses the standard `Hash` trait, but the implementations of
/// `Hash` for the `usize` and `isize` types are *not* architecture independent
/// (e.g. they has 4 or 8 bytes). As a result we want to avoid `usize` and
/// `isize` completely when hashing. To ensure that these don't leak in we use a
/// custom hasher implementation here which inflates the size of these to a `u64`
/// and `i64`.
///
/// The same goes for endianess: We always convert multi-byte integers to little
/// endian before hashing.
#[derive(Debug)]
pub struct ArchIndependentHasher<H> {
inner: H,
}

impl<H> ArchIndependentHasher<H> {
pub fn new(inner: H) -> ArchIndependentHasher<H> {
ArchIndependentHasher { inner: inner }
}

pub fn into_inner(self) -> H {
self.inner
}
pub struct TypeIdHasher<'a, 'gcx: 'a+'tcx, 'tcx: 'a, W> {
tcx: TyCtxt<'a, 'gcx, 'tcx>,
state: StableHasher<W>,
}

impl<H: Hasher> Hasher for ArchIndependentHasher<H> {
fn write(&mut self, bytes: &[u8]) {
self.inner.write(bytes)
}

fn finish(&self) -> u64 {
self.inner.finish()
}

fn write_u8(&mut self, i: u8) {
self.inner.write_u8(i)
}
fn write_u16(&mut self, i: u16) {
self.inner.write_u16(i.to_le())
}
fn write_u32(&mut self, i: u32) {
self.inner.write_u32(i.to_le())
}
fn write_u64(&mut self, i: u64) {
self.inner.write_u64(i.to_le())
}
fn write_usize(&mut self, i: usize) {
self.inner.write_u64((i as u64).to_le())
impl<'a, 'gcx, 'tcx, W> TypeIdHasher<'a, 'gcx, 'tcx, W>
where W: StableHasherResult
{
pub fn new(tcx: TyCtxt<'a, 'gcx, 'tcx>) -> Self {
TypeIdHasher { tcx: tcx, state: StableHasher::new() }
}
fn write_i8(&mut self, i: i8) {
self.inner.write_i8(i)
}
fn write_i16(&mut self, i: i16) {
self.inner.write_i16(i.to_le())
}
fn write_i32(&mut self, i: i32) {
self.inner.write_i32(i.to_le())
}
fn write_i64(&mut self, i: i64) {
self.inner.write_i64(i.to_le())
}
fn write_isize(&mut self, i: isize) {
self.inner.write_i64((i as i64).to_le())
}
}

pub struct TypeIdHasher<'a, 'gcx: 'a+'tcx, 'tcx: 'a, H> {
tcx: TyCtxt<'a, 'gcx, 'tcx>,
state: ArchIndependentHasher<H>,
}

impl<'a, 'gcx, 'tcx, H: Hasher> TypeIdHasher<'a, 'gcx, 'tcx, H> {
pub fn new(tcx: TyCtxt<'a, 'gcx, 'tcx>, state: H) -> Self {
TypeIdHasher {
tcx: tcx,
state: ArchIndependentHasher::new(state),
}
pub fn finish(self) -> W {
self.state.finish()
}

pub fn hash<T: Hash>(&mut self, x: T) {
x.hash(&mut self.state);
}

pub fn finish(self) -> u64 {
self.state.finish()
}

fn hash_discriminant_u8<T>(&mut self, x: &T) {
let v = unsafe {
intrinsics::discriminant_value(x)
Expand All @@ -504,13 +434,11 @@ impl<'a, 'gcx, 'tcx, H: Hasher> TypeIdHasher<'a, 'gcx, 'tcx, H> {
pub fn def_path(&mut self, def_path: &ast_map::DefPath) {
def_path.deterministic_hash_to(self.tcx, &mut self.state);
}

pub fn into_inner(self) -> H {
self.state.inner
}
}

impl<'a, 'gcx, 'tcx, H: Hasher> TypeVisitor<'tcx> for TypeIdHasher<'a, 'gcx, 'tcx, H> {
impl<'a, 'gcx, 'tcx, W> TypeVisitor<'tcx> for TypeIdHasher<'a, 'gcx, 'tcx, W>
where W: StableHasherResult
{
fn visit_ty(&mut self, ty: Ty<'tcx>) -> bool {
// Distinguish between the Ty variants uniformly.
self.hash_discriminant_u8(&ty.sty);
Expand Down
3 changes: 3 additions & 0 deletions src/librustc_data_structures/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ extern crate serialize as rustc_serialize; // used by deriving
#[cfg(unix)]
extern crate libc;

pub use rustc_serialize::hex::ToHex;

pub mod array_vec;
pub mod accumulate_vec;
pub mod small_vec;
Expand All @@ -59,6 +61,7 @@ pub mod indexed_vec;
pub mod obligation_forest;
pub mod snapshot_map;
pub mod snapshot_vec;
pub mod stable_hasher;
pub mod transitive_relation;
pub mod unify;
pub mod fnv;
Expand Down
176 changes: 176 additions & 0 deletions src/librustc_data_structures/stable_hasher.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
// Copyright 2016 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

use std::hash::Hasher;
use std::marker::PhantomData;
use std::mem;
use blake2b::Blake2bHasher;
use rustc_serialize::leb128;

fn write_unsigned_leb128_to_buf(buf: &mut [u8; 16], value: u64) -> usize {
leb128::write_unsigned_leb128_to(value, |i, v| buf[i] = v)
}

fn write_signed_leb128_to_buf(buf: &mut [u8; 16], value: i64) -> usize {
leb128::write_signed_leb128_to(value, |i, v| buf[i] = v)
}

/// When hashing something that ends up affecting properties like symbol names. We
/// want these symbol names to be calculated independent of other factors like
/// what architecture you're compiling *from*.
///
/// The hashing just uses the standard `Hash` trait, but the implementations of
/// `Hash` for the `usize` and `isize` types are *not* architecture independent
/// (e.g. they has 4 or 8 bytes). As a result we want to avoid `usize` and
/// `isize` completely when hashing.
///
/// To do that, we encode all integers to be hashed with some
/// arch-independent encoding.
///
/// At the moment, we pass i8/u8 straight through and encode
/// all other integers using leb128.
///
/// This hasher currently always uses the stable Blake2b algorithm
/// and allows for variable output lengths through its type
/// parameter.
#[derive(Debug)]
pub struct StableHasher<W> {
state: Blake2bHasher,
bytes_hashed: u64,
width: PhantomData<W>,
}

pub trait StableHasherResult: Sized {
fn finish(hasher: StableHasher<Self>) -> Self;
}

impl<W: StableHasherResult> StableHasher<W> {
pub fn new() -> Self {
StableHasher {
state: Blake2bHasher::new(mem::size_of::<W>(), &[]),
bytes_hashed: 0,
width: PhantomData,
}
}

pub fn finish(self) -> W {
W::finish(self)
}
}

impl StableHasherResult for [u8; 20] {
fn finish(mut hasher: StableHasher<Self>) -> Self {
let mut result: [u8; 20] = [0; 20];
result.copy_from_slice(hasher.state.finalize());
result
}
}

impl StableHasherResult for u64 {
fn finish(mut hasher: StableHasher<Self>) -> Self {
hasher.state.finalize();
hasher.state.finish()
}
}

impl<W> StableHasher<W> {
#[inline]
pub fn finalize(&mut self) -> &[u8] {
self.state.finalize()
}

#[inline]
pub fn bytes_hashed(&self) -> u64 {
self.bytes_hashed
}

#[inline]
fn write_uleb128(&mut self, value: u64) {
let mut buf = [0; 16];
let len = write_unsigned_leb128_to_buf(&mut buf, value);
self.state.write(&buf[..len]);
self.bytes_hashed += len as u64;
}

#[inline]
fn write_ileb128(&mut self, value: i64) {
let mut buf = [0; 16];
let len = write_signed_leb128_to_buf(&mut buf, value);
self.state.write(&buf[..len]);
self.bytes_hashed += len as u64;
}
}

// For the non-u8 integer cases we leb128 encode them first. Because small
// integers dominate, this significantly and cheaply reduces the number of
// bytes hashed, which is good because blake2b is expensive.
impl<W> Hasher for StableHasher<W> {
fn finish(&self) -> u64 {
panic!("use StableHasher::finish instead");
}

#[inline]
fn write(&mut self, bytes: &[u8]) {
self.state.write(bytes);
self.bytes_hashed += bytes.len() as u64;
}

#[inline]
fn write_u8(&mut self, i: u8) {
self.state.write_u8(i);
self.bytes_hashed += 1;
}

#[inline]
fn write_u16(&mut self, i: u16) {
self.write_uleb128(i as u64);
}

#[inline]
fn write_u32(&mut self, i: u32) {
self.write_uleb128(i as u64);
}

#[inline]
fn write_u64(&mut self, i: u64) {
self.write_uleb128(i);
}

#[inline]
fn write_usize(&mut self, i: usize) {
self.write_uleb128(i as u64);
}

#[inline]
fn write_i8(&mut self, i: i8) {
self.state.write_i8(i);
self.bytes_hashed += 1;
}

#[inline]
fn write_i16(&mut self, i: i16) {
self.write_ileb128(i as i64);
}

#[inline]
fn write_i32(&mut self, i: i32) {
self.write_ileb128(i as i64);
}

#[inline]
fn write_i64(&mut self, i: i64) {
self.write_ileb128(i);
}

#[inline]
fn write_isize(&mut self, i: isize) {
self.write_ileb128(i as i64);
}
}
Loading

0 comments on commit e1d4b8f

Please sign in to comment.