Skip to content

Commit

Permalink
simple_op_store: hash view/operation data directly
Browse files Browse the repository at this point in the history
Decouples view/operation IDs from serialized forms, which are not
necessarily stable. Not breaking as these IDs are persistent, never
recomputed or used for integrity checking.
  • Loading branch information
Ralith committed Nov 12, 2022
1 parent eb79076 commit 016eb75
Show file tree
Hide file tree
Showing 7 changed files with 293 additions and 79 deletions.
5 changes: 3 additions & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions lib/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ bytes = "1.2.1"
byteorder = "1.4.3"
chrono = { version = "0.4.22", default-features = false, features = ["std", "clock"] }
config = { version = "0.13.2", default-features = false, features = ["toml"] }
digest = "0.10.5"
git2 = "0.15.0"
hex = "0.4.3"
itertools = "0.10.5"
Expand Down
24 changes: 15 additions & 9 deletions lib/src/backend.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@ use thiserror::Error;

use crate::repo_path::{RepoPath, RepoPathComponent};

#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct CommitId(Vec<u8>);
content_hash! {
#[derive(PartialEq, Eq, PartialOrd, Ord, Clone, Hash)]
pub struct CommitId(Vec<u8>);
}

impl Debug for CommitId {
fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> {
Expand Down Expand Up @@ -225,14 +227,18 @@ pub enum Phase {
Draft,
}

#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
pub struct MillisSinceEpoch(pub i64);
content_hash! {
#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
pub struct MillisSinceEpoch(pub i64);
}

#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
pub struct Timestamp {
pub timestamp: MillisSinceEpoch,
// time zone offset in minutes
pub tz_offset: i32,
content_hash! {
#[derive(Debug, PartialEq, Eq, Clone, PartialOrd, Ord)]
pub struct Timestamp {
pub timestamp: MillisSinceEpoch,
// time zone offset in minutes
pub tz_offset: i32,
}
}

impl Timestamp {
Expand Down
160 changes: 160 additions & 0 deletions lib/src/content_hash.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
use itertools::Itertools as _;

/// Portable, stable hashing suitable for identifying values
///
/// Variable-length sequences should hash a 64-bit little-endian representation
/// of their length, then their elements in order. Unordered containers should
/// order their elements according to their `Ord` implementation. Enums should
/// hash a 32-bit little-endian encoding of the ordinal number of the enum
/// variant, then the variant's fields in lexical order.
pub trait ContentHash {
fn hash(&self, state: &mut impl digest::Update);
}

impl ContentHash for u8 {
fn hash(&self, state: &mut impl digest::Update) {
state.update(&[*self]);
}
}

impl ContentHash for i32 {
fn hash(&self, state: &mut impl digest::Update) {
state.update(&self.to_le_bytes());
}
}

impl ContentHash for i64 {
fn hash(&self, state: &mut impl digest::Update) {
state.update(&self.to_le_bytes());
}
}

// TODO: Specialize for [u8] once specialization exists
impl<T: ContentHash> ContentHash for [T] {
fn hash(&self, state: &mut impl digest::Update) {
state.update(&(self.len() as u64).to_le_bytes());
for x in self {
x.hash(state);
}
}
}

impl<T: ContentHash> ContentHash for Vec<T> {
fn hash(&self, state: &mut impl digest::Update) {
self.as_slice().hash(state)
}
}

impl ContentHash for String {
fn hash(&self, state: &mut impl digest::Update) {
self.as_bytes().hash(state);
}
}

impl<T: ContentHash> ContentHash for Option<T> {
fn hash(&self, state: &mut impl digest::Update) {
match *self {
None => state.update(&[0]),
Some(ref x) => {
state.update(&[1]);
x.hash(state)
}
}
}
}

impl<K, V> ContentHash for std::collections::HashMap<K, V>
where
K: ContentHash + Ord,
V: ContentHash + Ord,
{
fn hash(&self, state: &mut impl digest::Update) {
state.update(&(self.len() as u64).to_le_bytes());
for (k, v) in self.iter().sorted() {
k.hash(state);
v.hash(state);
}
}
}

impl<K> ContentHash for std::collections::HashSet<K>
where
K: ContentHash + Ord,
{
fn hash(&self, state: &mut impl digest::Update) {
state.update(&(self.len() as u64).to_le_bytes());
for k in self.iter().sorted() {
k.hash(state);
}
}
}

impl<K, V> ContentHash for std::collections::BTreeMap<K, V>
where
K: ContentHash,
V: ContentHash,
{
fn hash(&self, state: &mut impl digest::Update) {
state.update(&(self.len() as u64).to_le_bytes());
for (k, v) in self.iter() {
k.hash(state);
v.hash(state);
}
}
}

macro_rules! content_hash {
($(#[$meta:meta])* $vis:vis struct $name:ident {
$($(#[$field_meta:meta])* $field_vis:vis $field:ident : $ty:ty),* $(,)?
}) => {
$(#[$meta])*
$vis struct $name {
$($(#[$field_meta])* $field_vis $field : $ty),*
}

impl crate::content_hash::ContentHash for $name {
fn hash(&self, state: &mut impl digest::Update) {
$(<$ty as crate::content_hash::ContentHash>::hash(&self.$field, state);)*
}
}
};
($(#[$meta:meta])* $vis:vis struct $name:ident($field_vis:vis $ty:ty);) => {
$(#[$meta])*
$vis struct $name($field_vis $ty);

impl crate::content_hash::ContentHash for $name {
fn hash(&self, state: &mut impl digest::Update) {
<$ty as crate::content_hash::ContentHash>::hash(&self.0, state);
}
}
};
}

#[cfg(test)]
mod tests {
use std::collections::HashMap;

use blake2::{Blake2b512, Digest};

use super::*;

#[test]
fn hash_map_uniqueness() {
let a = [("ab".to_string(), "cd".to_string())]
.into_iter()
.collect::<HashMap<_, _>>();
let b = [("a".to_string(), "bcd".to_string())]
.into_iter()
.collect::<HashMap<_, _>>();

let mut hasher = Blake2b512::default();
a.hash(&mut hasher);
let hash_a = hasher.finalize();

let mut hasher = Blake2b512::default();
b.hash(&mut hasher);
let hash_b = hasher.finalize();

assert_ne!(hash_a, hash_b);
}
}
3 changes: 3 additions & 0 deletions lib/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@

#![deny(unused_must_use)]

#[macro_use]
mod content_hash;

pub mod backend;
pub mod commit;
pub mod commit_builder;
Expand Down
Loading

0 comments on commit 016eb75

Please sign in to comment.