diff --git a/mk/crates.mk b/mk/crates.mk index c7abf271e5127..25192bfd27a44 100644 --- a/mk/crates.mk +++ b/mk/crates.mk @@ -121,7 +121,7 @@ DEPS_rustc_driver := arena flate getopts graphviz libc rustc rustc_back rustc_bo rustc_trans rustc_privacy rustc_lint rustc_plugin \ rustc_metadata syntax_ext proc_macro_plugin \ rustc_passes rustc_save_analysis rustc_const_eval \ - rustc_incremental syntax_pos rustc_errors proc_macro + rustc_incremental syntax_pos rustc_errors proc_macro rustc_data_structures DEPS_rustc_errors := log libc serialize syntax_pos DEPS_rustc_lint := rustc log syntax syntax_pos rustc_const_eval DEPS_rustc_llvm := native:rustllvm libc std rustc_bitflags diff --git a/src/Cargo.lock b/src/Cargo.lock index 64c8ec101916b..5826995cc3cfc 100644 --- a/src/Cargo.lock +++ b/src/Cargo.lock @@ -383,6 +383,7 @@ dependencies = [ "rustc_back 0.0.0", "rustc_borrowck 0.0.0", "rustc_const_eval 0.0.0", + "rustc_data_structures 0.0.0", "rustc_errors 0.0.0", "rustc_incremental 0.0.0", "rustc_lint 0.0.0", diff --git a/src/librustc/lib.rs b/src/librustc/lib.rs index 5f2ba9e68d74d..d17402d21342b 100644 --- a/src/librustc/lib.rs +++ b/src/librustc/lib.rs @@ -111,8 +111,6 @@ pub mod traits; pub mod ty; pub mod util { - pub use rustc_back::sha2; - pub mod common; pub mod ppaux; pub mod nodemap; diff --git a/src/librustc/ty/util.rs b/src/librustc/ty/util.rs index bb36fa1487eee..cca4069ba5a17 100644 --- a/src/librustc/ty/util.rs +++ b/src/librustc/ty/util.rs @@ -405,6 +405,7 @@ impl<'a, 'gcx, 'tcx> TyCtxt<'a, 'gcx, 'tcx> { /// /// The same goes for endianess: We always convert multi-byte integers to little /// endian before hashing. +#[derive(Debug)] pub struct ArchIndependentHasher { inner: H, } @@ -413,6 +414,10 @@ impl ArchIndependentHasher { pub fn new(inner: H) -> ArchIndependentHasher { ArchIndependentHasher { inner: inner } } + + pub fn into_inner(self) -> H { + self.inner + } } impl Hasher for ArchIndependentHasher { diff --git a/src/librustc_back/lib.rs b/src/librustc_back/lib.rs index e6d1982d31c2d..da5f787bdf31e 100644 --- a/src/librustc_back/lib.rs +++ b/src/librustc_back/lib.rs @@ -36,9 +36,8 @@ #![feature(rand)] #![feature(rustc_private)] #![feature(staged_api)] -#![feature(step_by)] #![cfg_attr(stage0, feature(question_mark))] -#![cfg_attr(test, feature(test, rand))] +#![cfg_attr(test, feature(rand))] extern crate syntax; extern crate libc; @@ -48,7 +47,6 @@ extern crate serialize; extern crate serialize as rustc_serialize; // used by deriving pub mod tempdir; -pub mod sha2; pub mod target; pub mod slice; pub mod dynamic_lib; diff --git a/src/librustc_back/sha2.rs b/src/librustc_back/sha2.rs deleted file mode 100644 index 97fb39c17ea0e..0000000000000 --- a/src/librustc_back/sha2.rs +++ /dev/null @@ -1,679 +0,0 @@ -// Copyright 2012-2014 The Rust Project Developers. See the COPYRIGHT -// file at the top-level directory of this distribution and at -// http://rust-lang.org/COPYRIGHT. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! This module implements only the Sha256 function since that is all that is needed for internal -//! use. This implementation is not intended for external use or for any use where security is -//! important. - -use serialize::hex::ToHex; - -/// Write a u32 into a vector, which must be 4 bytes long. The value is written in big-endian -/// format. -fn write_u32_be(dst: &mut[u8], input: u32) { - dst[0] = (input >> 24) as u8; - dst[1] = (input >> 16) as u8; - dst[2] = (input >> 8) as u8; - dst[3] = input as u8; -} - -/// Read the value of a vector of bytes as a u32 value in big-endian format. -fn read_u32_be(input: &[u8]) -> u32 { - (input[0] as u32) << 24 | - (input[1] as u32) << 16 | - (input[2] as u32) << 8 | - (input[3] as u32) -} - -/// Read a vector of bytes into a vector of u32s. The values are read in big-endian format. -fn read_u32v_be(dst: &mut[u32], input: &[u8]) { - assert!(dst.len() * 4 == input.len()); - let mut pos = 0; - for chunk in input.chunks(4) { - dst[pos] = read_u32_be(chunk); - pos += 1; - } -} - -trait ToBits: Sized { - /// Convert the value in bytes to the number of bits, a tuple where the 1st item is the - /// high-order value and the 2nd item is the low order value. - fn to_bits(self) -> (Self, Self); -} - -impl ToBits for u64 { - fn to_bits(self) -> (u64, u64) { - (self >> 61, self << 3) - } -} - -/// Adds the specified number of bytes to the bit count. panic!() if this would cause numeric -/// overflow. -fn add_bytes_to_bits(bits: u64, bytes: u64) -> u64 { - let (new_high_bits, new_low_bits) = bytes.to_bits(); - - if new_high_bits > 0 { - panic!("numeric overflow occurred.") - } - - match bits.checked_add(new_low_bits) { - Some(x) => x, - None => panic!("numeric overflow occurred.") - } -} - -/// A FixedBuffer, likes its name implies, is a fixed size buffer. When the buffer becomes full, it -/// must be processed. The input() method takes care of processing and then clearing the buffer -/// automatically. However, other methods do not and require the caller to process the buffer. Any -/// method that modifies the buffer directory or provides the caller with bytes that can be modified -/// results in those bytes being marked as used by the buffer. -trait FixedBuffer { - /// Input a vector of bytes. If the buffer becomes full, process it with the provided - /// function and then clear the buffer. - fn input(&mut self, input: &[u8], func: F) where - F: FnMut(&[u8]); - - /// Reset the buffer. - fn reset(&mut self); - - /// Zero the buffer up until the specified index. The buffer position currently must not be - /// greater than that index. - fn zero_until(&mut self, idx: usize); - - /// Get a slice of the buffer of the specified size. There must be at least that many bytes - /// remaining in the buffer. - fn next<'s>(&'s mut self, len: usize) -> &'s mut [u8]; - - /// Get the current buffer. The buffer must already be full. This clears the buffer as well. - fn full_buffer<'s>(&'s mut self) -> &'s [u8]; - - /// Get the current position of the buffer. - fn position(&self) -> usize; - - /// Get the number of bytes remaining in the buffer until it is full. - fn remaining(&self) -> usize; - - /// Get the size of the buffer - fn size(&self) -> usize; -} - -/// A FixedBuffer of 64 bytes useful for implementing Sha256 which has a 64 byte blocksize. -struct FixedBuffer64 { - buffer: [u8; 64], - buffer_idx: usize, -} - -impl FixedBuffer64 { - /// Create a new FixedBuffer64 - fn new() -> FixedBuffer64 { - FixedBuffer64 { - buffer: [0; 64], - buffer_idx: 0 - } - } -} - -impl FixedBuffer for FixedBuffer64 { - fn input(&mut self, input: &[u8], mut func: F) where - F: FnMut(&[u8]), - { - let mut i = 0; - - let size = self.size(); - - // If there is already data in the buffer, copy as much as we can into it and process - // the data if the buffer becomes full. - if self.buffer_idx != 0 { - let buffer_remaining = size - self.buffer_idx; - if input.len() >= buffer_remaining { - self.buffer[self.buffer_idx..size] - .copy_from_slice(&input[..buffer_remaining]); - self.buffer_idx = 0; - func(&self.buffer); - i += buffer_remaining; - } else { - self.buffer[self.buffer_idx..self.buffer_idx + input.len()] - .copy_from_slice(input); - self.buffer_idx += input.len(); - return; - } - } - - // While we have at least a full buffer size chunk's worth of data, process that data - // without copying it into the buffer - while input.len() - i >= size { - func(&input[i..i + size]); - i += size; - } - - // Copy any input data into the buffer. At this point in the method, the amount of - // data left in the input vector will be less than the buffer size and the buffer will - // be empty. - let input_remaining = input.len() - i; - self.buffer[..input_remaining].copy_from_slice(&input[i..]); - self.buffer_idx += input_remaining; - } - - fn reset(&mut self) { - self.buffer_idx = 0; - } - - fn zero_until(&mut self, idx: usize) { - assert!(idx >= self.buffer_idx); - for slot in self.buffer[self.buffer_idx..idx].iter_mut() { - *slot = 0; - } - self.buffer_idx = idx; - } - - fn next<'s>(&'s mut self, len: usize) -> &'s mut [u8] { - self.buffer_idx += len; - &mut self.buffer[self.buffer_idx - len..self.buffer_idx] - } - - fn full_buffer<'s>(&'s mut self) -> &'s [u8] { - assert!(self.buffer_idx == 64); - self.buffer_idx = 0; - &self.buffer[..64] - } - - fn position(&self) -> usize { self.buffer_idx } - - fn remaining(&self) -> usize { 64 - self.buffer_idx } - - fn size(&self) -> usize { 64 } -} - -/// The StandardPadding trait adds a method useful for Sha256 to a FixedBuffer struct. -trait StandardPadding { - /// Add padding to the buffer. The buffer must not be full when this method is called and is - /// guaranteed to have exactly rem remaining bytes when it returns. If there are not at least - /// rem bytes available, the buffer will be zero padded, processed, cleared, and then filled - /// with zeros again until only rem bytes are remaining. - fn standard_padding(&mut self, rem: usize, func: F) where F: FnMut(&[u8]); -} - -impl StandardPadding for T { - fn standard_padding(&mut self, rem: usize, mut func: F) where F: FnMut(&[u8]) { - let size = self.size(); - - self.next(1)[0] = 128; - - if self.remaining() < rem { - self.zero_until(size); - func(self.full_buffer()); - } - - self.zero_until(size - rem); - } -} - -/// The Digest trait specifies an interface common to digest functions, such as SHA-1 and the SHA-2 -/// family of digest functions. -pub trait Digest { - /// Provide message data. - /// - /// # Arguments - /// - /// * input - A vector of message data - fn input(&mut self, input: &[u8]); - - /// Retrieve the digest result. This method may be called multiple times. - /// - /// # Arguments - /// - /// * out - the vector to hold the result. Must be large enough to contain output_bits(). - fn result(&mut self, out: &mut [u8]); - - /// Reset the digest. This method must be called after result() and before supplying more - /// data. - fn reset(&mut self); - - /// Get the output size in bits. - fn output_bits(&self) -> usize; - - /// Convenience function that feeds a string into a digest. - /// - /// # Arguments - /// - /// * `input` The string to feed into the digest - fn input_str(&mut self, input: &str) { - self.input(input.as_bytes()); - } - - /// Convenience function that retrieves the result of a digest as a - /// newly allocated vec of bytes. - fn result_bytes(&mut self) -> Vec { - let mut buf = vec![0; (self.output_bits()+7)/8]; - self.result(&mut buf); - buf - } - - /// Convenience function that retrieves the result of a digest as a - /// String in hexadecimal format. - fn result_str(&mut self) -> String { - self.result_bytes().to_hex().to_string() - } -} - -// A structure that represents that state of a digest computation for the SHA-2 512 family of digest -// functions -struct Engine256State { - h0: u32, - h1: u32, - h2: u32, - h3: u32, - h4: u32, - h5: u32, - h6: u32, - h7: u32, -} - -impl Engine256State { - fn new(h: &[u32; 8]) -> Engine256State { - Engine256State { - h0: h[0], - h1: h[1], - h2: h[2], - h3: h[3], - h4: h[4], - h5: h[5], - h6: h[6], - h7: h[7] - } - } - - fn reset(&mut self, h: &[u32; 8]) { - self.h0 = h[0]; - self.h1 = h[1]; - self.h2 = h[2]; - self.h3 = h[3]; - self.h4 = h[4]; - self.h5 = h[5]; - self.h6 = h[6]; - self.h7 = h[7]; - } - - fn process_block(&mut self, data: &[u8]) { - fn ch(x: u32, y: u32, z: u32) -> u32 { - ((x & y) ^ ((!x) & z)) - } - - fn maj(x: u32, y: u32, z: u32) -> u32 { - ((x & y) ^ (x & z) ^ (y & z)) - } - - fn sum0(x: u32) -> u32 { - ((x >> 2) | (x << 30)) ^ ((x >> 13) | (x << 19)) ^ ((x >> 22) | (x << 10)) - } - - fn sum1(x: u32) -> u32 { - ((x >> 6) | (x << 26)) ^ ((x >> 11) | (x << 21)) ^ ((x >> 25) | (x << 7)) - } - - fn sigma0(x: u32) -> u32 { - ((x >> 7) | (x << 25)) ^ ((x >> 18) | (x << 14)) ^ (x >> 3) - } - - fn sigma1(x: u32) -> u32 { - ((x >> 17) | (x << 15)) ^ ((x >> 19) | (x << 13)) ^ (x >> 10) - } - - let mut a = self.h0; - let mut b = self.h1; - let mut c = self.h2; - let mut d = self.h3; - let mut e = self.h4; - let mut f = self.h5; - let mut g = self.h6; - let mut h = self.h7; - - let mut w = [0; 64]; - - // Sha-512 and Sha-256 use basically the same calculations which are implemented - // by these macros. Inlining the calculations seems to result in better generated code. - macro_rules! schedule_round { ($t:expr) => ( - w[$t] = sigma1(w[$t - 2]).wrapping_add(w[$t - 7]) - .wrapping_add(sigma0(w[$t - 15])).wrapping_add(w[$t - 16]); - ) - } - - macro_rules! sha2_round { - ($A:ident, $B:ident, $C:ident, $D:ident, - $E:ident, $F:ident, $G:ident, $H:ident, $K:ident, $t:expr) => ( - { - $H = $H.wrapping_add(sum1($E)).wrapping_add(ch($E, $F, $G)) - .wrapping_add($K[$t]).wrapping_add(w[$t]); - $D = $D.wrapping_add($H); - $H = $H.wrapping_add(sum0($A)).wrapping_add(maj($A, $B, $C)); - } - ) - } - - read_u32v_be(&mut w[0..16], data); - - // Putting the message schedule inside the same loop as the round calculations allows for - // the compiler to generate better code. - for t in (0..48).step_by(8) { - schedule_round!(t + 16); - schedule_round!(t + 17); - schedule_round!(t + 18); - schedule_round!(t + 19); - schedule_round!(t + 20); - schedule_round!(t + 21); - schedule_round!(t + 22); - schedule_round!(t + 23); - - sha2_round!(a, b, c, d, e, f, g, h, K32, t); - sha2_round!(h, a, b, c, d, e, f, g, K32, t + 1); - sha2_round!(g, h, a, b, c, d, e, f, K32, t + 2); - sha2_round!(f, g, h, a, b, c, d, e, K32, t + 3); - sha2_round!(e, f, g, h, a, b, c, d, K32, t + 4); - sha2_round!(d, e, f, g, h, a, b, c, K32, t + 5); - sha2_round!(c, d, e, f, g, h, a, b, K32, t + 6); - sha2_round!(b, c, d, e, f, g, h, a, K32, t + 7); - } - - for t in (48..64).step_by(8) { - sha2_round!(a, b, c, d, e, f, g, h, K32, t); - sha2_round!(h, a, b, c, d, e, f, g, K32, t + 1); - sha2_round!(g, h, a, b, c, d, e, f, K32, t + 2); - sha2_round!(f, g, h, a, b, c, d, e, K32, t + 3); - sha2_round!(e, f, g, h, a, b, c, d, K32, t + 4); - sha2_round!(d, e, f, g, h, a, b, c, K32, t + 5); - sha2_round!(c, d, e, f, g, h, a, b, K32, t + 6); - sha2_round!(b, c, d, e, f, g, h, a, K32, t + 7); - } - - self.h0 = self.h0.wrapping_add(a); - self.h1 = self.h1.wrapping_add(b); - self.h2 = self.h2.wrapping_add(c); - self.h3 = self.h3.wrapping_add(d); - self.h4 = self.h4.wrapping_add(e); - self.h5 = self.h5.wrapping_add(f); - self.h6 = self.h6.wrapping_add(g); - self.h7 = self.h7.wrapping_add(h); - } -} - -static K32: [u32; 64] = [ - 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, - 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, - 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, - 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, - 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, - 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, - 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, - 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, - 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, - 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, - 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, - 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, - 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, - 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, - 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, - 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 -]; - -// A structure that keeps track of the state of the Sha-256 operation and contains the logic -// necessary to perform the final calculations. -struct Engine256 { - length_bits: u64, - buffer: FixedBuffer64, - state: Engine256State, - finished: bool, -} - -impl Engine256 { - fn new(h: &[u32; 8]) -> Engine256 { - Engine256 { - length_bits: 0, - buffer: FixedBuffer64::new(), - state: Engine256State::new(h), - finished: false - } - } - - fn reset(&mut self, h: &[u32; 8]) { - self.length_bits = 0; - self.buffer.reset(); - self.state.reset(h); - self.finished = false; - } - - fn input(&mut self, input: &[u8]) { - assert!(!self.finished); - // Assumes that input.len() can be converted to u64 without overflow - self.length_bits = add_bytes_to_bits(self.length_bits, input.len() as u64); - let self_state = &mut self.state; - self.buffer.input(input, |input: &[u8]| { self_state.process_block(input) }); - } - - fn finish(&mut self) { - if !self.finished { - let self_state = &mut self.state; - self.buffer.standard_padding(8, |input: &[u8]| { self_state.process_block(input) }); - write_u32_be(self.buffer.next(4), (self.length_bits >> 32) as u32 ); - write_u32_be(self.buffer.next(4), self.length_bits as u32); - self_state.process_block(self.buffer.full_buffer()); - - self.finished = true; - } - } -} - -/// The SHA-256 hash algorithm -pub struct Sha256 { - engine: Engine256 -} - -impl Sha256 { - /// Construct a new instance of a SHA-256 digest. - /// Do not – under any circumstances – use this where timing attacks might be possible! - pub fn new() -> Sha256 { - Sha256 { - engine: Engine256::new(&H256) - } - } -} - -impl Digest for Sha256 { - fn input(&mut self, d: &[u8]) { - self.engine.input(d); - } - - fn result(&mut self, out: &mut [u8]) { - self.engine.finish(); - - write_u32_be(&mut out[0..4], self.engine.state.h0); - write_u32_be(&mut out[4..8], self.engine.state.h1); - write_u32_be(&mut out[8..12], self.engine.state.h2); - write_u32_be(&mut out[12..16], self.engine.state.h3); - write_u32_be(&mut out[16..20], self.engine.state.h4); - write_u32_be(&mut out[20..24], self.engine.state.h5); - write_u32_be(&mut out[24..28], self.engine.state.h6); - write_u32_be(&mut out[28..32], self.engine.state.h7); - } - - fn reset(&mut self) { - self.engine.reset(&H256); - } - - fn output_bits(&self) -> usize { 256 } -} - -static H256: [u32; 8] = [ - 0x6a09e667, - 0xbb67ae85, - 0x3c6ef372, - 0xa54ff53a, - 0x510e527f, - 0x9b05688c, - 0x1f83d9ab, - 0x5be0cd19 -]; - -#[cfg(test)] -mod tests { - #![allow(deprecated)] - extern crate rand; - - use self::rand::Rng; - use self::rand::isaac::IsaacRng; - use serialize::hex::FromHex; - use std::u64; - use super::{Digest, Sha256}; - - // A normal addition - no overflow occurs - #[test] - fn test_add_bytes_to_bits_ok() { - assert!(super::add_bytes_to_bits(100, 10) == 180); - } - - // A simple failure case - adding 1 to the max value - #[test] - #[should_panic] - fn test_add_bytes_to_bits_overflow() { - super::add_bytes_to_bits(u64::MAX, 1); - } - - struct Test { - input: String, - output_str: String, - } - - fn test_hash(sh: &mut D, tests: &[Test]) { - // Test that it works when accepting the message all at once - for t in tests { - sh.reset(); - sh.input_str(&t.input); - let out_str = sh.result_str(); - assert!(out_str == t.output_str); - } - - // Test that it works when accepting the message in pieces - for t in tests { - sh.reset(); - let len = t.input.len(); - let mut left = len; - while left > 0 { - let take = (left + 1) / 2; - sh.input_str(&t.input[len - left..take + len - left]); - left = left - take; - } - let out_str = sh.result_str(); - assert!(out_str == t.output_str); - } - } - - #[test] - fn test_sha256() { - // Examples from wikipedia - let wikipedia_tests = vec!( - Test { - input: "".to_string(), - output_str: "e3b0c44298fc1c149afb\ - f4c8996fb92427ae41e4649b934ca495991b7852b855".to_string() - }, - Test { - input: "The quick brown fox jumps over the lazy \ - dog".to_string(), - output_str: "d7a8fbb307d7809469ca\ - 9abcb0082e4f8d5651e46d3cdb762d02d0bf37c9e592".to_string() - }, - Test { - input: "The quick brown fox jumps over the lazy \ - dog.".to_string(), - output_str: "ef537f25c895bfa78252\ - 6529a9b63d97aa631564d5d789c2b765448c8635fb6c".to_string() - }); - - let tests = wikipedia_tests; - - let mut sh: Box<_> = box Sha256::new(); - - test_hash(&mut *sh, &tests); - } - - /// Feed 1,000,000 'a's into the digest with varying input sizes and check that the result is - /// correct. - fn test_digest_1million_random(digest: &mut D, blocksize: usize, expected: &str) { - let total_size = 1000000; - let buffer = vec![b'a'; blocksize * 2]; - let mut rng = IsaacRng::new_unseeded(); - let mut count = 0; - - digest.reset(); - - while count < total_size { - let next: usize = rng.gen_range(0, 2 * blocksize + 1); - let remaining = total_size - count; - let size = if next > remaining { remaining } else { next }; - digest.input(&buffer[..size]); - count += size; - } - - let result_str = digest.result_str(); - let result_bytes = digest.result_bytes(); - - assert_eq!(expected, result_str); - - let expected_vec: Vec = expected.from_hex() - .unwrap() - .into_iter() - .collect(); - assert_eq!(expected_vec, result_bytes); - } - - #[test] - fn test_1million_random_sha256() { - let mut sh = Sha256::new(); - test_digest_1million_random( - &mut sh, - 64, - "cdc76e5c9914fb9281a1c7e284d73e67f1809a48a497200e046d39ccc7112cd0"); - } -} - -#[cfg(test)] -mod bench { - extern crate test; - use self::test::Bencher; - use super::{Sha256, Digest}; - - #[bench] - pub fn sha256_10(b: &mut Bencher) { - let mut sh = Sha256::new(); - let bytes = [1; 10]; - b.iter(|| { - sh.input(&bytes); - }); - b.bytes = bytes.len() as u64; - } - - #[bench] - pub fn sha256_1k(b: &mut Bencher) { - let mut sh = Sha256::new(); - let bytes = [1; 1024]; - b.iter(|| { - sh.input(&bytes); - }); - b.bytes = bytes.len() as u64; - } - - #[bench] - pub fn sha256_64k(b: &mut Bencher) { - let mut sh = Sha256::new(); - let bytes = [1; 65536]; - b.iter(|| { - sh.input(&bytes); - }); - b.bytes = bytes.len() as u64; - } -} diff --git a/src/librustc_data_structures/blake2b.rs b/src/librustc_data_structures/blake2b.rs index 996df2e7fcfca..8c82c135dc426 100644 --- a/src/librustc_data_structures/blake2b.rs +++ b/src/librustc_data_structures/blake2b.rs @@ -20,17 +20,25 @@ // implementation. If you have the luxury of being able to use crates from // crates.io, you can go there and find still faster implementations. +use std::mem; +use std::slice; + pub struct Blake2bCtx { b: [u8; 128], h: [u64; 8], t: [u64; 2], c: usize, - outlen: usize, + outlen: u16, + finalized: bool } impl ::std::fmt::Debug for Blake2bCtx { fn fmt(&self, fmt: &mut ::std::fmt::Formatter) -> Result<(), ::std::fmt::Error> { - write!(fmt, "{:?}", self.h) + try!(write!(fmt, "hash: ")); + for v in &self.h[..] { + try!(write!(fmt, "{:x}", v)); + } + Ok(()) } } @@ -136,7 +144,7 @@ fn blake2b_compress(ctx: &mut Blake2bCtx, last: bool) { } } -pub fn blake2b_new(outlen: usize, key: &[u8]) -> Blake2bCtx { +fn blake2b_new(outlen: usize, key: &[u8]) -> Blake2bCtx { assert!(outlen > 0 && outlen <= 64 && key.len() <= 64); let mut ctx = Blake2bCtx { @@ -144,7 +152,8 @@ pub fn blake2b_new(outlen: usize, key: &[u8]) -> Blake2bCtx { h: BLAKE2B_IV, t: [0; 2], c: 0, - outlen: outlen, + outlen: outlen as u16, + finalized: false, }; ctx.h[0] ^= 0x01010000 ^ ((key.len() << 8) as u64) ^ (outlen as u64); @@ -157,8 +166,9 @@ pub fn blake2b_new(outlen: usize, key: &[u8]) -> Blake2bCtx { ctx } -pub fn blake2b_update(ctx: &mut Blake2bCtx, mut data: &[u8]) -{ +fn blake2b_update(ctx: &mut Blake2bCtx, mut data: &[u8]) { + assert!(!ctx.finalized, "Blake2bCtx already finalized"); + let mut bytes_to_copy = data.len(); let mut space_in_buffer = ctx.b.len() - ctx.c; @@ -183,8 +193,10 @@ pub fn blake2b_update(ctx: &mut Blake2bCtx, mut data: &[u8]) } } -pub fn blake2b_final(mut ctx: Blake2bCtx, out: &mut [u8]) +fn blake2b_final(ctx: &mut Blake2bCtx) { + assert!(!ctx.finalized, "Blake2bCtx already finalized"); + ctx.t[0] = ctx.t[0].wrapping_add(ctx.c as u64); if ctx.t[0] < ctx.c as u64 { ctx.t[1] += 1; @@ -195,7 +207,7 @@ pub fn blake2b_final(mut ctx: Blake2bCtx, out: &mut [u8]) ctx.c += 1; } - blake2b_compress(&mut ctx, true); + blake2b_compress(ctx, true); if cfg!(target_endian = "big") { // Make sure that the data is in memory in little endian format, as is @@ -205,13 +217,13 @@ pub fn blake2b_final(mut ctx: Blake2bCtx, out: &mut [u8]) } } - checked_mem_copy(&ctx.h, out, ctx.outlen); + ctx.finalized = true; } #[inline(always)] fn checked_mem_copy(from: &[T1], to: &mut [T2], byte_count: usize) { - let from_size = from.len() * ::std::mem::size_of::(); - let to_size = to.len() * ::std::mem::size_of::(); + let from_size = from.len() * mem::size_of::(); + let to_size = to.len() * mem::size_of::(); assert!(from_size >= byte_count); assert!(to_size >= byte_count); let from_byte_ptr = from.as_ptr() as * const u8; @@ -225,7 +237,45 @@ pub fn blake2b(out: &mut [u8], key: &[u8], data: &[u8]) { let mut ctx = blake2b_new(out.len(), key); blake2b_update(&mut ctx, data); - blake2b_final(ctx, out); + blake2b_final(&mut ctx); + checked_mem_copy(&ctx.h, out, ctx.outlen as usize); +} + +pub struct Blake2bHasher(Blake2bCtx); + +impl ::std::hash::Hasher for Blake2bHasher { + fn write(&mut self, bytes: &[u8]) { + blake2b_update(&mut self.0, bytes); + } + + fn finish(&self) -> u64 { + assert!(self.0.outlen == 8, + "Hasher initialized with incompatible output length"); + u64::from_le(self.0.h[0]) + } +} + +impl Blake2bHasher { + pub fn new(outlen: usize, key: &[u8]) -> Blake2bHasher { + Blake2bHasher(blake2b_new(outlen, key)) + } + + pub fn finalize(&mut self) -> &[u8] { + if !self.0.finalized { + blake2b_final(&mut self.0); + } + debug_assert!(mem::size_of_val(&self.0.h) >= self.0.outlen as usize); + let raw_ptr = (&self.0.h[..]).as_ptr() as * const u8; + unsafe { + slice::from_raw_parts(raw_ptr, self.0.outlen as usize) + } + } +} + +impl ::std::fmt::Debug for Blake2bHasher { + fn fmt(&self, fmt: &mut ::std::fmt::Formatter) -> Result<(), ::std::fmt::Error> { + write!(fmt, "{:?}", self.0) + } } #[cfg(test)] @@ -245,6 +295,8 @@ fn selftest_seq(out: &mut [u8], seed: u32) #[test] fn blake2b_selftest() { + use std::hash::Hasher; + // grand hash of hash results const BLAKE2B_RES: [u8; 32] = [ 0xC2, 0x3A, 0x78, 0x00, 0xD9, 0x81, 0x23, 0xBD, @@ -261,7 +313,7 @@ fn blake2b_selftest() let mut md = [0u8; 64]; let mut key = [0u8; 64]; - let mut ctx = blake2b_new(32, &[]); + let mut hasher = Blake2bHasher::new(32, &[]); for i in 0 .. 4 { let outlen = B2B_MD_LEN[i]; @@ -270,16 +322,16 @@ fn blake2b_selftest() selftest_seq(&mut data[.. inlen], inlen as u32); // unkeyed hash blake2b(&mut md[.. outlen], &[], &data[.. inlen]); - blake2b_update(&mut ctx, &md[.. outlen]); // hash the hash + hasher.write(&md[.. outlen]); // hash the hash selftest_seq(&mut key[0 .. outlen], outlen as u32); // keyed hash blake2b(&mut md[.. outlen], &key[.. outlen], &data[.. inlen]); - blake2b_update(&mut ctx, &md[.. outlen]); // hash the hash + hasher.write(&md[.. outlen]); // hash the hash } } // compute and compare the hash of hashes - blake2b_final(ctx, &mut md[..]); + let md = hasher.finalize(); for i in 0 .. 32 { assert_eq!(md[i], BLAKE2B_RES[i]); } diff --git a/src/librustc_data_structures/fmt_wrap.rs b/src/librustc_data_structures/fmt_wrap.rs new file mode 100644 index 0000000000000..50fd1d802b7ff --- /dev/null +++ b/src/librustc_data_structures/fmt_wrap.rs @@ -0,0 +1,31 @@ +// Copyright 2016 The Rust Project Developers. See the COPYRIGHT +// file at the top-level directory of this distribution and at +// http://rust-lang.org/COPYRIGHT. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use std::fmt; + +// Provide some more formatting options for some data types (at the moment +// that's just `{:x}` for slices of u8). + +pub struct FmtWrap(pub T); + +impl<'a> fmt::LowerHex for FmtWrap<&'a [u8]> { + fn fmt(&self, formatter: &mut fmt::Formatter) -> fmt::Result { + for byte in self.0.iter() { + try!(write!(formatter, "{:02x}", byte)); + } + Ok(()) + } +} + +#[test] +fn test_lower_hex() { + let bytes: &[u8] = &[0x01, 0x23, 0x45, 0x67, 0x89, 0xab, 0xcd, 0xef]; + assert_eq!("0123456789abcdef", &format!("{:x}", FmtWrap(bytes))); +} diff --git a/src/librustc_data_structures/lib.rs b/src/librustc_data_structures/lib.rs index 143c180f823d0..fc963dac9495f 100644 --- a/src/librustc_data_structures/lib.rs +++ b/src/librustc_data_structures/lib.rs @@ -49,6 +49,7 @@ pub mod accumulate_vec; pub mod bitslice; pub mod blake2b; pub mod bitvec; +pub mod fmt_wrap; pub mod graph; pub mod ivar; pub mod indexed_set; diff --git a/src/librustc_driver/Cargo.toml b/src/librustc_driver/Cargo.toml index 98e2aa881891d..99d3e155e8936 100644 --- a/src/librustc_driver/Cargo.toml +++ b/src/librustc_driver/Cargo.toml @@ -18,6 +18,7 @@ rustc = { path = "../librustc" } rustc_back = { path = "../librustc_back" } rustc_borrowck = { path = "../librustc_borrowck" } rustc_const_eval = { path = "../librustc_const_eval" } +rustc_data_structures = { path = "../librustc_data_structures" } rustc_errors = { path = "../librustc_errors" } rustc_incremental = { path = "../librustc_incremental" } rustc_lint = { path = "../librustc_lint" } diff --git a/src/librustc_driver/driver.rs b/src/librustc_driver/driver.rs index 318616726f417..da1d5ad2c4a9b 100644 --- a/src/librustc_driver/driver.rs +++ b/src/librustc_driver/driver.rs @@ -12,6 +12,9 @@ use rustc::hir; use rustc::hir::{map as hir_map, FreevarMap, TraitMap}; use rustc::hir::def::DefMap; use rustc::hir::lowering::lower_crate; +use rustc_data_structures::blake2b::Blake2bHasher; +use rustc_data_structures::fmt_wrap::FmtWrap; +use rustc::ty::util::ArchIndependentHasher; use rustc_mir as mir; use rustc::session::{Session, CompileResult, compile_result_from_err_count}; use rustc::session::config::{self, Input, OutputFilenames, OutputType, @@ -23,7 +26,6 @@ use rustc::middle::privacy::AccessLevels; use rustc::ty::{self, TyCtxt}; use rustc::util::common::time; use rustc::util::nodemap::NodeSet; -use rustc_back::sha2::{Sha256, Digest}; use rustc_borrowck as borrowck; use rustc_incremental::{self, IncrementalHashesMap}; use rustc_resolve::{MakeGlobMap, Resolver}; @@ -1221,7 +1223,16 @@ pub fn collect_crate_types(session: &Session, attrs: &[ast::Attribute]) -> Vec String { - let mut hasher = Sha256::new(); + use std::hash::Hasher; + + // The crate_disambiguator is a 128 bit hash. The disambiguator is fed + // into various other hashes quite a bit (symbol hashes, incr. comp. hashes, + // debuginfo type IDs, etc), so we don't want it to be too wide. 128 bits + // should still be safe enough to avoid collisions in practice. + // FIXME(mw): It seems that the crate_disambiguator is used everywhere as + // a hex-string instead of raw bytes. We should really use the + // smaller representation. + let mut hasher = ArchIndependentHasher::new(Blake2bHasher::new(128 / 8, &[])); let mut metadata = session.opts.cg.metadata.clone(); // We don't want the crate_disambiguator to dependent on the order @@ -1230,24 +1241,23 @@ pub fn compute_crate_disambiguator(session: &Session) -> String { // Every distinct -C metadata value is only incorporated once: metadata.dedup(); - hasher.input_str("metadata"); + hasher.write(b"metadata"); for s in &metadata { // Also incorporate the length of a metadata string, so that we generate // different values for `-Cmetadata=ab -Cmetadata=c` and // `-Cmetadata=a -Cmetadata=bc` - hasher.input_str(&format!("{}", s.len())[..]); - hasher.input_str(&s[..]); + hasher.write_usize(s.len()); + hasher.write(s.as_bytes()); } - let mut hash = hasher.result_str(); + let mut hash_state = hasher.into_inner(); + let hash_bytes = hash_state.finalize(); // If this is an executable, add a special suffix, so that we don't get // symbol conflicts when linking against a library of the same name. - if session.crate_types.borrow().contains(&config::CrateTypeExecutable) { - hash.push_str("-exe"); - } + let is_exe = session.crate_types.borrow().contains(&config::CrateTypeExecutable); - hash + format!("{:x}{}", FmtWrap(hash_bytes), if is_exe { "-exe" } else {""}) } pub fn build_output_filenames(input: &Input, diff --git a/src/librustc_driver/lib.rs b/src/librustc_driver/lib.rs index f4cae91289885..cb78baa12a6ad 100644 --- a/src/librustc_driver/lib.rs +++ b/src/librustc_driver/lib.rs @@ -42,6 +42,7 @@ extern crate rustc; extern crate rustc_back; extern crate rustc_borrowck; extern crate rustc_const_eval; +extern crate rustc_data_structures; extern crate rustc_errors as errors; extern crate rustc_passes; extern crate rustc_lint; diff --git a/src/librustc_incremental/calculate_svh/hasher.rs b/src/librustc_incremental/calculate_svh/hasher.rs index d92a8d375e0d3..49683a81227b1 100644 --- a/src/librustc_incremental/calculate_svh/hasher.rs +++ b/src/librustc_incremental/calculate_svh/hasher.rs @@ -8,21 +8,22 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::hash::Hasher; use std::mem; -use rustc_data_structures::blake2b; +use rustc_data_structures::blake2b::Blake2bHasher; +use rustc::ty::util::ArchIndependentHasher; use ich::Fingerprint; #[derive(Debug)] pub struct IchHasher { - state: blake2b::Blake2bCtx, + state: ArchIndependentHasher, bytes_hashed: u64, } impl IchHasher { pub fn new() -> IchHasher { + let hash_size = mem::size_of::(); IchHasher { - state: blake2b::blake2b_new(mem::size_of::(), &[]), + state: ArchIndependentHasher::new(Blake2bHasher::new(hash_size, &[])), bytes_hashed: 0 } } @@ -33,40 +34,19 @@ impl IchHasher { pub fn finish(self) -> Fingerprint { let mut fingerprint = Fingerprint::zero(); - blake2b::blake2b_final(self.state, &mut fingerprint.0); + fingerprint.0.copy_from_slice(self.state.into_inner().finalize()); fingerprint } } -impl Hasher for IchHasher { +impl ::std::hash::Hasher for IchHasher { fn finish(&self) -> u64 { bug!("Use other finish() implementation to get the full 128-bit hash."); } #[inline] fn write(&mut self, bytes: &[u8]) { - blake2b::blake2b_update(&mut self.state, bytes); + self.state.write(bytes); self.bytes_hashed += bytes.len() as u64; } - - #[inline] - fn write_u16(&mut self, i: u16) { - self.write(&unsafe { mem::transmute::<_, [u8; 2]>(i.to_le()) }) - } - - #[inline] - fn write_u32(&mut self, i: u32) { - self.write(&unsafe { mem::transmute::<_, [u8; 4]>(i.to_le()) }) - } - - #[inline] - fn write_u64(&mut self, i: u64) { - self.write(&unsafe { mem::transmute::<_, [u8; 8]>(i.to_le()) }) - } - - #[inline] - fn write_usize(&mut self, i: usize) { - // always hash as u64, so we don't depend on the size of `usize` - self.write_u64(i as u64); - } } diff --git a/src/librustc_trans/back/symbol_names.rs b/src/librustc_trans/back/symbol_names.rs index f0661e03bc815..bf2a5d76c10d4 100644 --- a/src/librustc_trans/back/symbol_names.rs +++ b/src/librustc_trans/back/symbol_names.rs @@ -99,7 +99,8 @@ use common::SharedCrateContext; use monomorphize::Instance; -use util::sha2::{Digest, Sha256}; +use rustc_data_structures::fmt_wrap::FmtWrap; +use rustc_data_structures::blake2b::Blake2bHasher; use rustc::middle::weak_lang_items; use rustc::hir::def_id::LOCAL_CRATE; @@ -113,21 +114,6 @@ use rustc::util::common::record_time; use syntax::attr; use syntax::parse::token::{self, InternedString}; -use serialize::hex::ToHex; - -use std::hash::Hasher; - -struct Sha256Hasher<'a>(&'a mut Sha256); - -impl<'a> Hasher for Sha256Hasher<'a> { - fn write(&mut self, msg: &[u8]) { - self.0.input(msg) - } - - fn finish(&self) -> u64 { - bug!("Sha256Hasher::finish should not be called"); - } -} fn get_symbol_hash<'a, 'tcx>(scx: &SharedCrateContext<'a, 'tcx>, @@ -149,12 +135,9 @@ fn get_symbol_hash<'a, 'tcx>(scx: &SharedCrateContext<'a, 'tcx>, let tcx = scx.tcx(); - let mut hash_state = scx.symbol_hasher().borrow_mut(); - record_time(&tcx.sess.perf_stats.symbol_hash_time, || { - hash_state.reset(); - let hasher = Sha256Hasher(&mut hash_state); - let mut hasher = ty::util::TypeIdHasher::new(tcx, hasher); + let mut hasher = ty::util::TypeIdHasher::new(tcx, Blake2bHasher::new(8, &[])); + record_time(&tcx.sess.perf_stats.symbol_hash_time, || { // the main symbol name is not necessarily unique; hash in the // compiler's internal def-path, guaranteeing each symbol has a // truly unique path @@ -175,8 +158,9 @@ fn get_symbol_hash<'a, 'tcx>(scx: &SharedCrateContext<'a, 'tcx>, }); // 64 bits should be enough to avoid collisions. - let output = hash_state.result_bytes(); - format!("h{}", output[..8].to_hex()) + let mut hasher = hasher.into_inner(); + let hash_bytes = hasher.finalize(); + format!("h{:x}", FmtWrap(hash_bytes)) } impl<'a, 'tcx> Instance<'tcx> { diff --git a/src/librustc_trans/base.rs b/src/librustc_trans/base.rs index 54dff8c8bb3cf..977ababbf5688 100644 --- a/src/librustc_trans/base.rs +++ b/src/librustc_trans/base.rs @@ -79,7 +79,6 @@ use type_::Type; use type_of; use value::Value; use Disr; -use util::sha2::Sha256; use util::nodemap::{NodeSet, FnvHashMap, FnvHashSet}; use arena::TypedArena; @@ -1550,7 +1549,6 @@ pub fn trans_crate<'a, 'tcx>(tcx: TyCtxt<'a, 'tcx, 'tcx>, let shared_ccx = SharedCrateContext::new(tcx, export_map, - Sha256::new(), link_meta.clone(), reachable, check_overflow); diff --git a/src/librustc_trans/context.rs b/src/librustc_trans/context.rs index 42773a8cd2c44..fc75b1018ec35 100644 --- a/src/librustc_trans/context.rs +++ b/src/librustc_trans/context.rs @@ -32,7 +32,6 @@ use session::config::NoDebugInfo; use session::Session; use session::config; use symbol_map::SymbolMap; -use util::sha2::Sha256; use util::nodemap::{NodeSet, DefIdMap, FnvHashMap, FnvHashSet}; use std::ffi::{CStr, CString}; @@ -69,7 +68,6 @@ pub struct SharedCrateContext<'a, 'tcx: 'a> { export_map: ExportMap, reachable: NodeSet, link_meta: LinkMeta, - symbol_hasher: RefCell, tcx: TyCtxt<'a, 'tcx, 'tcx>, stats: Stats, check_overflow: bool, @@ -436,7 +434,6 @@ unsafe fn create_context_and_module(sess: &Session, mod_name: &str) -> (ContextR impl<'b, 'tcx> SharedCrateContext<'b, 'tcx> { pub fn new(tcx: TyCtxt<'b, 'tcx, 'tcx>, export_map: ExportMap, - symbol_hasher: Sha256, link_meta: LinkMeta, reachable: NodeSet, check_overflow: bool) @@ -496,7 +493,6 @@ impl<'b, 'tcx> SharedCrateContext<'b, 'tcx> { export_map: export_map, reachable: reachable, link_meta: link_meta, - symbol_hasher: RefCell::new(symbol_hasher), tcx: tcx, stats: Stats { n_glues_created: Cell::new(0), @@ -575,10 +571,6 @@ impl<'b, 'tcx> SharedCrateContext<'b, 'tcx> { }) } - pub fn symbol_hasher(&self) -> &RefCell { - &self.symbol_hasher - } - pub fn metadata_symbol_name(&self) -> String { format!("rust_metadata_{}_{}", self.link_meta().crate_name, @@ -877,10 +869,6 @@ impl<'b, 'tcx> CrateContext<'b, 'tcx> { &self.local().llsizingtypes } - pub fn symbol_hasher<'a>(&'a self) -> &'a RefCell { - &self.shared.symbol_hasher - } - pub fn type_hashcodes<'a>(&'a self) -> &'a RefCell, String>> { &self.local().type_hashcodes } diff --git a/src/librustc_trans/debuginfo/metadata.rs b/src/librustc_trans/debuginfo/metadata.rs index 2804e3ffe37dd..863aecc824433 100644 --- a/src/librustc_trans/debuginfo/metadata.rs +++ b/src/librustc_trans/debuginfo/metadata.rs @@ -30,7 +30,7 @@ use rustc::ty::fold::TypeVisitor; use rustc::ty::subst::Substs; use rustc::ty::util::TypeIdHasher; use rustc::hir; -use rustc_data_structures::blake2b; +use rustc_data_structures::blake2b::Blake2bHasher; use {type_of, machine, monomorphize}; use common::CrateContext; use type_::Type; @@ -149,10 +149,16 @@ impl<'tcx> TypeMap<'tcx> { None => { /* generate one */} }; + // The hasher we are using to generate the UniqueTypeId. We want + // something that provides more than the 64 bits of the DefaultHasher. + const TYPE_ID_HASH_LENGTH: usize = 20; + let mut type_id_hasher = TypeIdHasher::new(cx.tcx(), - DebugInfoTypeIdHasher::new()); + Blake2bHasher::new(TYPE_ID_HASH_LENGTH, &[])); type_id_hasher.visit_ty(type_); - let hash = type_id_hasher.into_inner().into_hash(); + let mut hash_state = type_id_hasher.into_inner(); + let hash: &[u8] = hash_state.finalize(); + debug_assert!(hash.len() == TYPE_ID_HASH_LENGTH); let mut unique_type_id = String::with_capacity(TYPE_ID_HASH_LENGTH * 2); @@ -164,39 +170,6 @@ impl<'tcx> TypeMap<'tcx> { self.type_to_unique_id.insert(type_, UniqueTypeId(key)); return UniqueTypeId(key); - - // The hasher we are using to generate the UniqueTypeId. We want - // something that provides more than the 64 bits of the DefaultHasher. - const TYPE_ID_HASH_LENGTH: usize = 20; - - struct DebugInfoTypeIdHasher { - state: blake2b::Blake2bCtx - } - - impl ::std::hash::Hasher for DebugInfoTypeIdHasher { - fn finish(&self) -> u64 { - unimplemented!() - } - - #[inline] - fn write(&mut self, bytes: &[u8]) { - blake2b::blake2b_update(&mut self.state, bytes); - } - } - - impl DebugInfoTypeIdHasher { - fn new() -> DebugInfoTypeIdHasher { - DebugInfoTypeIdHasher { - state: blake2b::blake2b_new(TYPE_ID_HASH_LENGTH, &[]) - } - } - - fn into_hash(self) -> [u8; TYPE_ID_HASH_LENGTH] { - let mut hash = [0u8; TYPE_ID_HASH_LENGTH]; - blake2b::blake2b_final(self.state, &mut hash); - hash - } - } } // Get the UniqueTypeId for an enum variant. Enum variants are not really